aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c87
-rw-r--r--fs/9p/vfs_dir.c15
-rw-r--r--fs/9p/vfs_file.c326
-rw-r--r--fs/9p/xattr.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/dir_fplus.c1
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/super.c20
-rw-r--r--fs/affs/affs.h28
-rw-r--r--fs/affs/amigaffs.c3
-rw-r--r--fs/affs/file.c34
-rw-r--r--fs/affs/inode.c32
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/affs/super.c43
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/misc.c16
-rw-r--r--fs/afs/rxrpc.c5
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c278
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/befs.h22
-rw-r--r--fs/befs/datastream.c4
-rw-r--r--fs/befs/io.c2
-rw-r--r--fs/befs/linuxvfs.c16
-rw-r--r--fs/befs/super.c4
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/binfmt_misc.c30
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/async-thread.c4
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/btrfs_inode.h14
-rw-r--r--fs/btrfs/check-integrity.c9
-rw-r--r--fs/btrfs/compression.c4
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.c70
-rw-r--r--fs/btrfs/ctree.h51
-rw-r--r--fs/btrfs/delayed-inode.c9
-rw-r--r--fs/btrfs/delayed-ref.c22
-rw-r--r--fs/btrfs/delayed-ref.h10
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/disk-io.c572
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/extent-tree.c527
-rw-r--r--fs/btrfs/extent_io.c11
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c182
-rw-r--r--fs/btrfs/free-space-cache.c301
-rw-r--r--fs/btrfs/free-space-cache.h9
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c283
-rw-r--r--fs/btrfs/ioctl.c33
-rw-r--r--fs/btrfs/lzo.c2
-rw-r--r--fs/btrfs/math.h6
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/props.c2
-rw-r--r--fs/btrfs/qgroup.c348
-rw-r--r--fs/btrfs/qgroup.h3
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/relocation.c11
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c250
-rw-r--r--fs/btrfs/super.c23
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/sysfs.h22
-rw-r--r--fs/btrfs/tests/inode-tests.c197
-rw-r--r--fs/btrfs/tests/qgroup-tests.c4
-rw-r--r--fs/btrfs/transaction.c94
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c384
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c149
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/xattr.c61
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c41
-rw-r--r--fs/ceph/caps.c51
-rw-r--r--fs/ceph/dir.c48
-rw-r--r--fs/ceph/file.c27
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c56
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c23
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c12
-rw-r--r--fs/cifs/connect.c19
-rw-r--r--fs/cifs/file.c94
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/smb2misc.c2
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/coda/file.c38
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dax.c44
-rw-r--r--fs/dcache.c49
-rw-r--r--fs/debugfs/inode.c5
-rw-r--r--fs/direct-io.c44
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/file.c43
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/exec.c88
-rw-r--r--fs/exofs/file.c2
-rw-r--r--fs/exofs/inode.c4
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/inode.c18
-rw-r--r--fs/ext2/namei.c10
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/inode.c16
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext3/xattr.c3
-rw-r--r--fs/ext4/Kconfig17
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/acl.c5
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/crypto.c558
-rw-r--r--fs/ext4/crypto_fname.c709
-rw-r--r--fs/ext4/crypto_key.c165
-rw-r--r--fs/ext4/crypto_policy.c194
-rw-r--r--fs/ext4/dir.c81
-rw-r--r--fs/ext4/ext4.h174
-rw-r--r--fs/ext4/ext4_crypto.h147
-rw-r--r--fs/ext4/extents.c81
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c77
-rw-r--r--fs/ext4/fsync.c1
-rw-r--r--fs/ext4/hash.c1
-rw-r--r--fs/ext4/ialloc.c28
-rw-r--r--fs/ext4/indirect.c27
-rw-r--r--fs/ext4/inline.c16
-rw-r--r--fs/ext4/inode.c164
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/namei.c647
-rw-r--r--fs/ext4/page-io.c48
-rw-r--r--fs/ext4/readpage.c328
-rw-r--r--fs/ext4/super.c58
-rw-r--r--fs/ext4/symlink.c97
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h3
-rw-r--r--fs/f2fs/Kconfig2
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c38
-rw-r--r--fs/f2fs/data.c766
-rw-r--r--fs/f2fs/debug.c22
-rw-r--r--fs/f2fs/dir.c93
-rw-r--r--fs/f2fs/f2fs.h174
-rw-r--r--fs/f2fs/file.c66
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/f2fs/inline.c69
-rw-r--r--fs/f2fs/inode.c25
-rw-r--r--fs/f2fs/namei.c81
-rw-r--r--fs/f2fs/node.c18
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c76
-rw-r--r--fs/f2fs/segment.c17
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c40
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/fat.h5
-rw-r--r--fs/fat/fatent.c3
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c23
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/file.c3
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fs-writeback.c93
-rw-r--r--fs/fs_pin.c4
-rw-r--r--fs/fuse/cuse.c27
-rw-r--r--fs/fuse/dev.c83
-rw-r--r--fs/fuse/file.c151
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c24
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c108
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c90
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c12
-rw-r--r--fs/hfsplus/bfind.c4
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--fs/hfsplus/catalog.c3
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c19
-rw-r--r--fs/hfsplus/ioctl.c12
-rw-r--r--fs/hfsplus/xattr.c86
-rw-r--r--fs/hfsplus/xattr.h22
-rw-r--r--fs/hfsplus/xattr_security.c38
-rw-r--r--fs/hfsplus/xattr_trusted.c37
-rw-r--r--fs/hfsplus/xattr_user.c35
-rw-r--r--fs/hostfs/hostfs.h6
-rw-r--r--fs/hostfs/hostfs_kern.c114
-rw-r--r--fs/hostfs/hostfs_user.c29
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hugetlbfs/inode.c183
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/xattr.c3
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/inode.c10
-rw-r--r--fs/jfs/jfs_metapage.c31
-rw-r--r--fs/jfs/jfs_metapage.h1
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/kernfs/file.c1
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c104
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/namei.c176
-rw-r--r--fs/namespace.c142
-rw-r--r--fs/ncpfs/file.c88
-rw-r--r--fs/ncpfs/ncplib_kernel.c6
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c45
-rw-r--r--fs/nfs/dir.c22
-rw-r--r--fs/nfs/direct.c40
-rw-r--r--fs/nfs/file.c29
-rw-r--r--fs/nfs/inode.c111
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4client.c9
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c31
-rw-r--r--fs/nfs/nfs4session.h1
-rw-r--r--fs/nfs/nfs4state.c18
-rw-r--r--fs/nfs/proc.c6
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/write.c35
-rw-r--r--fs/nfsd/Kconfig3
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/blocklayoutxdr.c6
-rw-r--r--fs/nfsd/export.c4
-rw-r--r--fs/nfsd/nfs4acl.c50
-rw-r--r--fs/nfsd/nfs4layouts.c12
-rw-r--r--fs/nfsd/nfs4proc.c14
-rw-r--r--fs/nfsd/nfs4state.c29
-rw-r--r--fs/nfsd/nfs4xdr.c36
-rw-r--r--fs/nfsd/nfscache.c6
-rw-r--r--fs/nfsd/nfsctl.c16
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/xdr4.h3
-rw-r--r--fs/nilfs2/alloc.c5
-rw-r--r--fs/nilfs2/bmap.c48
-rw-r--r--fs/nilfs2/bmap.h13
-rw-r--r--fs/nilfs2/btree.c110
-rw-r--r--fs/nilfs2/cpfile.c58
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/inode.c37
-rw-r--r--fs/nilfs2/mdt.c54
-rw-r--r--fs/nilfs2/mdt.h10
-rw-r--r--fs/nilfs2/page.c24
-rw-r--r--fs/nilfs2/segment.c24
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/fanotify/fanotify.c3
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c778
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c178
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/file.c147
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h15
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c37
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/open.c17
-rw-r--r--fs/overlayfs/super.c33
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/pnode.c60
-rw-r--r--fs/pnode.h7
-rw-r--r--fs/proc/array.c26
-rw-r--r--fs/proc/base.c82
-rw-r--r--fs/proc/fd.c27
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/pstore/inode.c3
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/quota/dquot.c151
-rw-r--r--fs/quota/quota.c217
-rw-r--r--fs/quota/quota_tree.c7
-rw-r--r--fs/quota/quota_v2.c12
-rw-r--r--fs/quota/quotaio_v2.h6
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/read_write.c213
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/reiserfs.h1
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/romfs/mmap-nommu.c1
-rw-r--r--fs/splice.c31
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c12
-rw-r--r--fs/ubifs/compress.c22
-rw-r--r--fs/ubifs/debug.c186
-rw-r--r--fs/ubifs/dir.c23
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/io.c40
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c17
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/lprops.c62
-rw-r--r--fs/ubifs/lpt.c59
-rw-r--r--fs/ubifs/lpt_commit.c34
-rw-r--r--fs/ubifs/master.c6
-rw-r--r--fs/ubifs/orphan.c26
-rw-r--r--fs/ubifs/recovery.c44
-rw-r--r--fs/ubifs/replay.c34
-rw-r--r--fs/ubifs/sb.c30
-rw-r--r--fs/ubifs/scan.c24
-rw-r--r--fs/ubifs/super.c107
-rw-r--r--fs/ubifs/tnc.c20
-rw-r--r--fs/ubifs/tnc_commit.c12
-rw-r--r--fs/ubifs/tnc_misc.c24
-rw-r--r--fs/ubifs/ubifs.h40
-rw-r--r--fs/ubifs/xattr.c18
-rw-r--r--fs/udf/balloc.c20
-rw-r--r--fs/udf/dir.c1
-rw-r--r--fs/udf/directory.c1
-rw-r--r--fs/udf/file.c30
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/udf/misc.c1
-rw-r--r--fs/udf/namei.c10
-rw-r--r--fs/udf/partition.c1
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/udf/symlink.c1
-rw-r--r--fs/udf/truncate.c1
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c104
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c150
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c554
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/libxfs/xfs_btree.c24
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c39
-rw-r--r--fs/xfs/libxfs/xfs_format.h62
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c48
-rw-r--r--fs/xfs/libxfs/xfs_sb.c20
-rw-r--r--fs/xfs/xfs_aops.c283
-rw-r--r--fs/xfs/xfs_attr_inactive.c3
-rw-r--r--fs/xfs/xfs_attr_list.c9
-rw-r--r--fs/xfs/xfs_bmap_util.c164
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_error.h8
-rw-r--r--fs/xfs/xfs_file.c215
-rw-r--r--fs/xfs/xfs_filestream.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_inode.c554
-rw-r--r--fs/xfs/xfs_inode.h58
-rw-r--r--fs/xfs/xfs_ioctl.c7
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_iops.c127
-rw-r--r--fs/xfs/xfs_iops.h2
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_linux.h9
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c918
-rw-r--r--fs/xfs/xfs_mount.h95
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_pnfs.c11
-rw-r--r--fs/xfs/xfs_pnfs.h5
-rw-r--r--fs/xfs/xfs_qm.c18
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c176
-rw-r--r--fs/xfs/xfs_quotaops.c117
-rw-r--r--fs/xfs/xfs_super.c132
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_symlink.c58
-rw-r--r--fs/xfs/xfs_trace.h29
-rw-r--r--fs/xfs/xfs_trans.c234
423 files changed, 14482 insertions, 7598 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 099c7712631c..fb9ffcb43277 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -78,7 +78,6 @@ enum p9_cache_modes {
78 * @cache: cache mode of type &p9_cache_modes 78 * @cache: cache mode of type &p9_cache_modes
79 * @cachetag: the tag of the cache associated with this session 79 * @cachetag: the tag of the cache associated with this session
80 * @fscache: session cookie associated with FS-Cache 80 * @fscache: session cookie associated with FS-Cache
81 * @options: copy of options string given by user
82 * @uname: string user name to mount hierarchy as 81 * @uname: string user name to mount hierarchy as
83 * @aname: mount specifier for remote hierarchy 82 * @aname: mount specifier for remote hierarchy
84 * @maxdata: maximum data to be sent/recvd per protocol message 83 * @maxdata: maximum data to be sent/recvd per protocol message
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b83ebfbf3fdc..5a0db6dec8d1 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -68,14 +68,10 @@ int v9fs_file_open(struct inode *inode, struct file *file);
68void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); 68void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
69int v9fs_uflags2omode(int uflags, int extended); 69int v9fs_uflags2omode(int uflags, int extended);
70 70
71ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
72ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
73void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
74int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
75int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, 73int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
76 int datasync); 74 int datasync);
77ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
78 const char __user *, size_t, loff_t *, int);
79int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); 75int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
80int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode); 76int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
81static inline void v9fs_invalidate_inode_attr(struct inode *inode) 77static inline void v9fs_invalidate_inode_attr(struct inode *inode)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e055ea83..e9e04376c52c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/aio.h> 36#include <linux/uio.h>
37#include <net/9p/9p.h> 37#include <net/9p/9p.h>
38#include <net/9p/client.h> 38#include <net/9p/client.h>
39 39
@@ -51,12 +51,11 @@
51 */ 51 */
52static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) 52static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
53{ 53{
54 int retval; 54 struct inode *inode = page->mapping->host;
55 loff_t offset; 55 struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
56 char *buffer; 56 struct iov_iter to;
57 struct inode *inode; 57 int retval, err;
58 58
59 inode = page->mapping->host;
60 p9_debug(P9_DEBUG_VFS, "\n"); 59 p9_debug(P9_DEBUG_VFS, "\n");
61 60
62 BUG_ON(!PageLocked(page)); 61 BUG_ON(!PageLocked(page));
@@ -65,16 +64,16 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
65 if (retval == 0) 64 if (retval == 0)
66 return retval; 65 return retval;
67 66
68 buffer = kmap(page); 67 iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
69 offset = page_offset(page);
70 68
71 retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset); 69 retval = p9_client_read(fid, page_offset(page), &to, &err);
72 if (retval < 0) { 70 if (err) {
73 v9fs_uncache_page(inode, page); 71 v9fs_uncache_page(inode, page);
72 retval = err;
74 goto done; 73 goto done;
75 } 74 }
76 75
77 memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); 76 zero_user(page, retval, PAGE_SIZE - retval);
78 flush_dcache_page(page); 77 flush_dcache_page(page);
79 SetPageUptodate(page); 78 SetPageUptodate(page);
80 79
@@ -82,7 +81,6 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
82 retval = 0; 81 retval = 0;
83 82
84done: 83done:
85 kunmap(page);
86 unlock_page(page); 84 unlock_page(page);
87 return retval; 85 return retval;
88} 86}
@@ -161,41 +159,32 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset,
161 159
162static int v9fs_vfs_writepage_locked(struct page *page) 160static int v9fs_vfs_writepage_locked(struct page *page)
163{ 161{
164 char *buffer;
165 int retval, len;
166 loff_t offset, size;
167 mm_segment_t old_fs;
168 struct v9fs_inode *v9inode;
169 struct inode *inode = page->mapping->host; 162 struct inode *inode = page->mapping->host;
163 struct v9fs_inode *v9inode = V9FS_I(inode);
164 loff_t size = i_size_read(inode);
165 struct iov_iter from;
166 struct bio_vec bvec;
167 int err, len;
170 168
171 v9inode = V9FS_I(inode);
172 size = i_size_read(inode);
173 if (page->index == size >> PAGE_CACHE_SHIFT) 169 if (page->index == size >> PAGE_CACHE_SHIFT)
174 len = size & ~PAGE_CACHE_MASK; 170 len = size & ~PAGE_CACHE_MASK;
175 else 171 else
176 len = PAGE_CACHE_SIZE; 172 len = PAGE_CACHE_SIZE;
177 173
178 set_page_writeback(page); 174 bvec.bv_page = page;
179 175 bvec.bv_offset = 0;
180 buffer = kmap(page); 176 bvec.bv_len = len;
181 offset = page_offset(page); 177 iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
182 178
183 old_fs = get_fs();
184 set_fs(get_ds());
185 /* We should have writeback_fid always set */ 179 /* We should have writeback_fid always set */
186 BUG_ON(!v9inode->writeback_fid); 180 BUG_ON(!v9inode->writeback_fid);
187 181
188 retval = v9fs_file_write_internal(inode, 182 set_page_writeback(page);
189 v9inode->writeback_fid, 183
190 (__force const char __user *)buffer, 184 p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err);
191 len, &offset, 0);
192 if (retval > 0)
193 retval = 0;
194 185
195 set_fs(old_fs);
196 kunmap(page);
197 end_page_writeback(page); 186 end_page_writeback(page);
198 return retval; 187 return err;
199} 188}
200 189
201static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) 190static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -241,11 +230,8 @@ static int v9fs_launder_page(struct page *page)
241 230
242/** 231/**
243 * v9fs_direct_IO - 9P address space operation for direct I/O 232 * v9fs_direct_IO - 9P address space operation for direct I/O
244 * @rw: direction (read or write)
245 * @iocb: target I/O control block 233 * @iocb: target I/O control block
246 * @iov: array of vectors that define I/O buffer
247 * @pos: offset in file to begin the operation 234 * @pos: offset in file to begin the operation
248 * @nr_segs: size of iovec array
249 * 235 *
250 * The presence of v9fs_direct_IO() in the address space ops vector 236 * The presence of v9fs_direct_IO() in the address space ops vector
251 * allowes open() O_DIRECT flags which would have failed otherwise. 237 * allowes open() O_DIRECT flags which would have failed otherwise.
@@ -259,18 +245,23 @@ static int v9fs_launder_page(struct page *page)
259 * 245 *
260 */ 246 */
261static ssize_t 247static ssize_t
262v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) 248v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
263{ 249{
264 /* 250 struct file *file = iocb->ki_filp;
265 * FIXME 251 ssize_t n;
266 * Now that we do caching with cache mode enabled, We need 252 int err = 0;
267 * to support direct IO 253 if (iov_iter_rw(iter) == WRITE) {
268 */ 254 n = p9_client_write(file->private_data, pos, iter, &err);
269 p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n", 255 if (n) {
270 iocb->ki_filp, 256 struct inode *inode = file_inode(file);
271 (long long)pos, iter->nr_segs); 257 loff_t i_size = i_size_read(inode);
272 258 if (pos + n > i_size)
273 return -EINVAL; 259 inode_add_bytes(inode, pos + n - i_size);
260 }
261 } else {
262 n = p9_client_read(file->private_data, pos, iter, &err);
263 }
264 return n ? n : err;
274} 265}
275 266
276static int v9fs_write_begin(struct file *filp, struct address_space *mapping, 267static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 4f1151088ebe..76c3b1ab6361 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -33,6 +33,7 @@
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/uio.h>
36#include <net/9p/9p.h> 37#include <net/9p/9p.h>
37#include <net/9p/client.h> 38#include <net/9p/client.h>
38 39
@@ -115,6 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
115 int buflen; 116 int buflen;
116 int reclen = 0; 117 int reclen = 0;
117 struct p9_rdir *rdir; 118 struct p9_rdir *rdir;
119 struct kvec kvec;
118 120
119 p9_debug(P9_DEBUG_VFS, "name %pD\n", file); 121 p9_debug(P9_DEBUG_VFS, "name %pD\n", file);
120 fid = file->private_data; 122 fid = file->private_data;
@@ -124,16 +126,21 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
124 rdir = v9fs_alloc_rdir_buf(file, buflen); 126 rdir = v9fs_alloc_rdir_buf(file, buflen);
125 if (!rdir) 127 if (!rdir)
126 return -ENOMEM; 128 return -ENOMEM;
129 kvec.iov_base = rdir->buf;
130 kvec.iov_len = buflen;
127 131
128 while (1) { 132 while (1) {
129 if (rdir->tail == rdir->head) { 133 if (rdir->tail == rdir->head) {
130 err = v9fs_file_readn(file, rdir->buf, NULL, 134 struct iov_iter to;
131 buflen, ctx->pos); 135 int n;
132 if (err <= 0) 136 iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
137 n = p9_client_read(file->private_data, ctx->pos, &to,
138 &err);
139 if (err)
133 return err; 140 return err;
134 141
135 rdir->head = 0; 142 rdir->head = 0;
136 rdir->tail = err; 143 rdir->tail = n;
137 } 144 }
138 while (rdir->head < rdir->tail) { 145 while (rdir->head < rdir->tail) {
139 p9stat_init(&st); 146 p9stat_init(&st);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b40133796b87..1ef16bd8280b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -36,6 +36,8 @@
36#include <linux/utsname.h> 36#include <linux/utsname.h>
37#include <asm/uaccess.h> 37#include <asm/uaccess.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/uio.h>
40#include <linux/slab.h>
39#include <net/9p/9p.h> 41#include <net/9p/9p.h>
40#include <net/9p/client.h> 42#include <net/9p/client.h>
41 43
@@ -149,7 +151,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
149{ 151{
150 struct p9_flock flock; 152 struct p9_flock flock;
151 struct p9_fid *fid; 153 struct p9_fid *fid;
152 uint8_t status; 154 uint8_t status = P9_LOCK_ERROR;
153 int res = 0; 155 int res = 0;
154 unsigned char fl_type; 156 unsigned char fl_type;
155 157
@@ -194,7 +196,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
194 for (;;) { 196 for (;;) {
195 res = p9_client_lock_dotl(fid, &flock, &status); 197 res = p9_client_lock_dotl(fid, &flock, &status);
196 if (res < 0) 198 if (res < 0)
197 break; 199 goto out_unlock;
198 200
199 if (status != P9_LOCK_BLOCKED) 201 if (status != P9_LOCK_BLOCKED)
200 break; 202 break;
@@ -212,14 +214,16 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
212 case P9_LOCK_BLOCKED: 214 case P9_LOCK_BLOCKED:
213 res = -EAGAIN; 215 res = -EAGAIN;
214 break; 216 break;
217 default:
218 WARN_ONCE(1, "unknown lock status code: %d\n", status);
219 /* fallthough */
215 case P9_LOCK_ERROR: 220 case P9_LOCK_ERROR:
216 case P9_LOCK_GRACE: 221 case P9_LOCK_GRACE:
217 res = -ENOLCK; 222 res = -ENOLCK;
218 break; 223 break;
219 default:
220 BUG();
221 } 224 }
222 225
226out_unlock:
223 /* 227 /*
224 * incase server returned error for lock request, revert 228 * incase server returned error for lock request, revert
225 * it locally 229 * it locally
@@ -285,6 +289,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
285 fl->fl_end = glock.start + glock.length - 1; 289 fl->fl_end = glock.start + glock.length - 1;
286 fl->fl_pid = glock.proc_id; 290 fl->fl_pid = glock.proc_id;
287 } 291 }
292 kfree(glock.client_id);
288 return res; 293 return res;
289} 294}
290 295
@@ -364,63 +369,6 @@ out_err:
364} 369}
365 370
366/** 371/**
367 * v9fs_fid_readn - read from a fid
368 * @fid: fid to read
369 * @data: data buffer to read data into
370 * @udata: user data buffer to read data into
371 * @count: size of buffer
372 * @offset: offset at which to read data
373 *
374 */
375ssize_t
376v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
377 u64 offset)
378{
379 int n, total, size;
380
381 p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n",
382 fid->fid, (long long unsigned)offset, count);
383 n = 0;
384 total = 0;
385 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
386 do {
387 n = p9_client_read(fid, data, udata, offset, count);
388 if (n <= 0)
389 break;
390
391 if (data)
392 data += n;
393 if (udata)
394 udata += n;
395
396 offset += n;
397 count -= n;
398 total += n;
399 } while (count > 0 && n == size);
400
401 if (n < 0)
402 total = n;
403
404 return total;
405}
406
407/**
408 * v9fs_file_readn - read from a file
409 * @filp: file pointer to read
410 * @data: data buffer to read data into
411 * @udata: user data buffer to read data into
412 * @count: size of buffer
413 * @offset: offset at which to read data
414 *
415 */
416ssize_t
417v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
418 u64 offset)
419{
420 return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
421}
422
423/**
424 * v9fs_file_read - read from a file 372 * v9fs_file_read - read from a file
425 * @filp: file pointer to read 373 * @filp: file pointer to read
426 * @udata: user data buffer to read data into 374 * @udata: user data buffer to read data into
@@ -430,69 +378,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
430 */ 378 */
431 379
432static ssize_t 380static ssize_t
433v9fs_file_read(struct file *filp, char __user *udata, size_t count, 381v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
434 loff_t * offset)
435{ 382{
436 int ret; 383 struct p9_fid *fid = iocb->ki_filp->private_data;
437 struct p9_fid *fid; 384 int ret, err;
438 size_t size;
439
440 p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
441 fid = filp->private_data;
442 385
443 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 386 p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
444 if (count > size) 387 iov_iter_count(to), iocb->ki_pos);
445 ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
446 else
447 ret = p9_client_read(fid, NULL, udata, *offset, count);
448 388
449 if (ret > 0) 389 ret = p9_client_read(fid, iocb->ki_pos, to, &err);
450 *offset += ret; 390 if (!ret)
391 return err;
451 392
393 iocb->ki_pos += ret;
452 return ret; 394 return ret;
453} 395}
454 396
455ssize_t
456v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
457 const char __user *data, size_t count,
458 loff_t *offset, int invalidate)
459{
460 int n;
461 loff_t i_size;
462 size_t total = 0;
463 loff_t origin = *offset;
464 unsigned long pg_start, pg_end;
465
466 p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
467 data, (int)count, (int)*offset);
468
469 do {
470 n = p9_client_write(fid, NULL, data+total, origin+total, count);
471 if (n <= 0)
472 break;
473 count -= n;
474 total += n;
475 } while (count > 0);
476
477 if (invalidate && (total > 0)) {
478 pg_start = origin >> PAGE_CACHE_SHIFT;
479 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
480 if (inode->i_mapping && inode->i_mapping->nrpages)
481 invalidate_inode_pages2_range(inode->i_mapping,
482 pg_start, pg_end);
483 *offset += total;
484 i_size = i_size_read(inode);
485 if (*offset > i_size) {
486 inode_add_bytes(inode, *offset - i_size);
487 i_size_write(inode, *offset);
488 }
489 }
490 if (n < 0)
491 return n;
492
493 return total;
494}
495
496/** 397/**
497 * v9fs_file_write - write to a file 398 * v9fs_file_write - write to a file
498 * @filp: file pointer to write 399 * @filp: file pointer to write
@@ -502,35 +403,39 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
502 * 403 *
503 */ 404 */
504static ssize_t 405static ssize_t
505v9fs_file_write(struct file *filp, const char __user * data, 406v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
506 size_t count, loff_t *offset)
507{ 407{
508 ssize_t retval = 0; 408 struct file *file = iocb->ki_filp;
509 loff_t origin = *offset; 409 ssize_t retval;
510 410 loff_t origin;
511 411 int err = 0;
512 retval = generic_write_checks(filp, &origin, &count, 0);
513 if (retval)
514 goto out;
515 412
516 retval = -EINVAL; 413 retval = generic_write_checks(iocb, from);
517 if ((ssize_t) count < 0) 414 if (retval <= 0)
518 goto out; 415 return retval;
519 retval = 0;
520 if (!count)
521 goto out;
522 416
523 retval = v9fs_file_write_internal(file_inode(filp), 417 origin = iocb->ki_pos;
524 filp->private_data, 418 retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
525 data, count, &origin, 1); 419 if (retval > 0) {
526 /* update offset on successful write */ 420 struct inode *inode = file_inode(file);
527 if (retval > 0) 421 loff_t i_size;
528 *offset = origin; 422 unsigned long pg_start, pg_end;
529out: 423 pg_start = origin >> PAGE_CACHE_SHIFT;
530 return retval; 424 pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
425 if (inode->i_mapping && inode->i_mapping->nrpages)
426 invalidate_inode_pages2_range(inode->i_mapping,
427 pg_start, pg_end);
428 iocb->ki_pos += retval;
429 i_size = i_size_read(inode);
430 if (iocb->ki_pos > i_size) {
431 inode_add_bytes(inode, iocb->ki_pos - i_size);
432 i_size_write(inode, iocb->ki_pos);
433 }
434 return retval;
435 }
436 return err;
531} 437}
532 438
533
534static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, 439static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
535 int datasync) 440 int datasync)
536{ 441{
@@ -657,44 +562,6 @@ out_unlock:
657 return VM_FAULT_NOPAGE; 562 return VM_FAULT_NOPAGE;
658} 563}
659 564
660static ssize_t
661v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
662 loff_t *offsetp)
663{
664 loff_t size, offset;
665 struct inode *inode;
666 struct address_space *mapping;
667
668 offset = *offsetp;
669 mapping = filp->f_mapping;
670 inode = mapping->host;
671 if (!count)
672 return 0;
673 size = i_size_read(inode);
674 if (offset < size)
675 filemap_write_and_wait_range(mapping, offset,
676 offset + count - 1);
677
678 return v9fs_file_read(filp, udata, count, offsetp);
679}
680
681/**
682 * v9fs_cached_file_read - read from a file
683 * @filp: file pointer to read
684 * @data: user data buffer to read data into
685 * @count: size of buffer
686 * @offset: offset at which to read data
687 *
688 */
689static ssize_t
690v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
691 loff_t *offset)
692{
693 if (filp->f_flags & O_DIRECT)
694 return v9fs_direct_read(filp, data, count, offset);
695 return new_sync_read(filp, data, count, offset);
696}
697
698/** 565/**
699 * v9fs_mmap_file_read - read from a file 566 * v9fs_mmap_file_read - read from a file
700 * @filp: file pointer to read 567 * @filp: file pointer to read
@@ -704,84 +571,12 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
704 * 571 *
705 */ 572 */
706static ssize_t 573static ssize_t
707v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count, 574v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
708 loff_t *offset)
709{ 575{
710 /* TODO: Check if there are dirty pages */ 576 /* TODO: Check if there are dirty pages */
711 return v9fs_file_read(filp, data, count, offset); 577 return v9fs_file_read_iter(iocb, to);
712}
713
714static ssize_t
715v9fs_direct_write(struct file *filp, const char __user * data,
716 size_t count, loff_t *offsetp)
717{
718 loff_t offset;
719 ssize_t retval;
720 struct inode *inode;
721 struct address_space *mapping;
722
723 offset = *offsetp;
724 mapping = filp->f_mapping;
725 inode = mapping->host;
726 if (!count)
727 return 0;
728
729 mutex_lock(&inode->i_mutex);
730 retval = filemap_write_and_wait_range(mapping, offset,
731 offset + count - 1);
732 if (retval)
733 goto err_out;
734 /*
735 * After a write we want buffered reads to be sure to go to disk to get
736 * the new data. We invalidate clean cached page from the region we're
737 * about to write. We do this *before* the write so that if we fail
738 * here we fall back to buffered write
739 */
740 if (mapping->nrpages) {
741 pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
742 pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
743
744 retval = invalidate_inode_pages2_range(mapping,
745 pg_start, pg_end);
746 /*
747 * If a page can not be invalidated, fall back
748 * to buffered write.
749 */
750 if (retval) {
751 if (retval == -EBUSY)
752 goto buff_write;
753 goto err_out;
754 }
755 }
756 retval = v9fs_file_write(filp, data, count, offsetp);
757err_out:
758 mutex_unlock(&inode->i_mutex);
759 return retval;
760
761buff_write:
762 mutex_unlock(&inode->i_mutex);
763 return new_sync_write(filp, data, count, offsetp);
764}
765
766/**
767 * v9fs_cached_file_write - write to a file
768 * @filp: file pointer to write
769 * @data: data buffer to write data from
770 * @count: size of buffer
771 * @offset: offset at which to write data
772 *
773 */
774static ssize_t
775v9fs_cached_file_write(struct file *filp, const char __user * data,
776 size_t count, loff_t *offset)
777{
778
779 if (filp->f_flags & O_DIRECT)
780 return v9fs_direct_write(filp, data, count, offset);
781 return new_sync_write(filp, data, count, offset);
782} 578}
783 579
784
785/** 580/**
786 * v9fs_mmap_file_write - write to a file 581 * v9fs_mmap_file_write - write to a file
787 * @filp: file pointer to write 582 * @filp: file pointer to write
@@ -791,14 +586,13 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
791 * 586 *
792 */ 587 */
793static ssize_t 588static ssize_t
794v9fs_mmap_file_write(struct file *filp, const char __user *data, 589v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
795 size_t count, loff_t *offset)
796{ 590{
797 /* 591 /*
798 * TODO: invalidate mmaps on filp's inode between 592 * TODO: invalidate mmaps on filp's inode between
799 * offset and offset+count 593 * offset and offset+count
800 */ 594 */
801 return v9fs_file_write(filp, data, count, offset); 595 return v9fs_file_write_iter(iocb, from);
802} 596}
803 597
804static void v9fs_mmap_vm_close(struct vm_area_struct *vma) 598static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
@@ -843,8 +637,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
843 637
844const struct file_operations v9fs_cached_file_operations = { 638const struct file_operations v9fs_cached_file_operations = {
845 .llseek = generic_file_llseek, 639 .llseek = generic_file_llseek,
846 .read = v9fs_cached_file_read,
847 .write = v9fs_cached_file_write,
848 .read_iter = generic_file_read_iter, 640 .read_iter = generic_file_read_iter,
849 .write_iter = generic_file_write_iter, 641 .write_iter = generic_file_write_iter,
850 .open = v9fs_file_open, 642 .open = v9fs_file_open,
@@ -856,8 +648,6 @@ const struct file_operations v9fs_cached_file_operations = {
856 648
857const struct file_operations v9fs_cached_file_operations_dotl = { 649const struct file_operations v9fs_cached_file_operations_dotl = {
858 .llseek = generic_file_llseek, 650 .llseek = generic_file_llseek,
859 .read = v9fs_cached_file_read,
860 .write = v9fs_cached_file_write,
861 .read_iter = generic_file_read_iter, 651 .read_iter = generic_file_read_iter,
862 .write_iter = generic_file_write_iter, 652 .write_iter = generic_file_write_iter,
863 .open = v9fs_file_open, 653 .open = v9fs_file_open,
@@ -870,8 +660,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
870 660
871const struct file_operations v9fs_file_operations = { 661const struct file_operations v9fs_file_operations = {
872 .llseek = generic_file_llseek, 662 .llseek = generic_file_llseek,
873 .read = v9fs_file_read, 663 .read_iter = v9fs_file_read_iter,
874 .write = v9fs_file_write, 664 .write_iter = v9fs_file_write_iter,
875 .open = v9fs_file_open, 665 .open = v9fs_file_open,
876 .release = v9fs_dir_release, 666 .release = v9fs_dir_release,
877 .lock = v9fs_file_lock, 667 .lock = v9fs_file_lock,
@@ -881,8 +671,8 @@ const struct file_operations v9fs_file_operations = {
881 671
882const struct file_operations v9fs_file_operations_dotl = { 672const struct file_operations v9fs_file_operations_dotl = {
883 .llseek = generic_file_llseek, 673 .llseek = generic_file_llseek,
884 .read = v9fs_file_read, 674 .read_iter = v9fs_file_read_iter,
885 .write = v9fs_file_write, 675 .write_iter = v9fs_file_write_iter,
886 .open = v9fs_file_open, 676 .open = v9fs_file_open,
887 .release = v9fs_dir_release, 677 .release = v9fs_dir_release,
888 .lock = v9fs_file_lock_dotl, 678 .lock = v9fs_file_lock_dotl,
@@ -893,8 +683,8 @@ const struct file_operations v9fs_file_operations_dotl = {
893 683
894const struct file_operations v9fs_mmap_file_operations = { 684const struct file_operations v9fs_mmap_file_operations = {
895 .llseek = generic_file_llseek, 685 .llseek = generic_file_llseek,
896 .read = v9fs_mmap_file_read, 686 .read_iter = v9fs_mmap_file_read_iter,
897 .write = v9fs_mmap_file_write, 687 .write_iter = v9fs_mmap_file_write_iter,
898 .open = v9fs_file_open, 688 .open = v9fs_file_open,
899 .release = v9fs_dir_release, 689 .release = v9fs_dir_release,
900 .lock = v9fs_file_lock, 690 .lock = v9fs_file_lock,
@@ -904,8 +694,8 @@ const struct file_operations v9fs_mmap_file_operations = {
904 694
905const struct file_operations v9fs_mmap_file_operations_dotl = { 695const struct file_operations v9fs_mmap_file_operations_dotl = {
906 .llseek = generic_file_llseek, 696 .llseek = generic_file_llseek,
907 .read = v9fs_mmap_file_read, 697 .read_iter = v9fs_mmap_file_read_iter,
908 .write = v9fs_mmap_file_write, 698 .write_iter = v9fs_mmap_file_write_iter,
909 .open = v9fs_file_open, 699 .open = v9fs_file_open,
910 .release = v9fs_dir_release, 700 .release = v9fs_dir_release,
911 .lock = v9fs_file_lock_dotl, 701 .lock = v9fs_file_lock_dotl,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f95e01e058e4..0cf44b6cccd6 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/uio.h>
18#include <net/9p/9p.h> 19#include <net/9p/9p.h>
19#include <net/9p/client.h> 20#include <net/9p/client.h>
20 21
@@ -25,50 +26,34 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
25 void *buffer, size_t buffer_size) 26 void *buffer, size_t buffer_size)
26{ 27{
27 ssize_t retval; 28 ssize_t retval;
28 int msize, read_count; 29 u64 attr_size;
29 u64 offset = 0, attr_size;
30 struct p9_fid *attr_fid; 30 struct p9_fid *attr_fid;
31 struct kvec kvec = {.iov_base = buffer, .iov_len = buffer_size};
32 struct iov_iter to;
33 int err;
34
35 iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
31 36
32 attr_fid = p9_client_xattrwalk(fid, name, &attr_size); 37 attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
33 if (IS_ERR(attr_fid)) { 38 if (IS_ERR(attr_fid)) {
34 retval = PTR_ERR(attr_fid); 39 retval = PTR_ERR(attr_fid);
35 p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n", 40 p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n",
36 retval); 41 retval);
37 attr_fid = NULL; 42 return retval;
38 goto error;
39 }
40 if (!buffer_size) {
41 /* request to get the attr_size */
42 retval = attr_size;
43 goto error;
44 } 43 }
45 if (attr_size > buffer_size) { 44 if (attr_size > buffer_size) {
46 retval = -ERANGE; 45 if (!buffer_size) /* request to get the attr_size */
47 goto error; 46 retval = attr_size;
48 }
49 msize = attr_fid->clnt->msize;
50 while (attr_size) {
51 if (attr_size > (msize - P9_IOHDRSZ))
52 read_count = msize - P9_IOHDRSZ;
53 else 47 else
54 read_count = attr_size; 48 retval = -ERANGE;
55 read_count = p9_client_read(attr_fid, ((char *)buffer)+offset, 49 } else {
56 NULL, offset, read_count); 50 iov_iter_truncate(&to, attr_size);
57 if (read_count < 0) { 51 retval = p9_client_read(attr_fid, 0, &to, &err);
58 /* error in xattr read */ 52 if (err)
59 retval = read_count; 53 retval = err;
60 goto error;
61 }
62 offset += read_count;
63 attr_size -= read_count;
64 } 54 }
65 /* Total read xattr bytes */ 55 p9_client_clunk(attr_fid);
66 retval = offset;
67error:
68 if (attr_fid)
69 p9_client_clunk(attr_fid);
70 return retval; 56 return retval;
71
72} 57}
73 58
74 59
@@ -120,8 +105,11 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
120int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, 105int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
121 const void *value, size_t value_len, int flags) 106 const void *value, size_t value_len, int flags)
122{ 107{
123 u64 offset = 0; 108 struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len};
124 int retval, msize, write_count; 109 struct iov_iter from;
110 int retval;
111
112 iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
125 113
126 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", 114 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
127 name, value_len, flags); 115 name, value_len, flags);
@@ -135,29 +123,11 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
135 * On success fid points to xattr 123 * On success fid points to xattr
136 */ 124 */
137 retval = p9_client_xattrcreate(fid, name, value_len, flags); 125 retval = p9_client_xattrcreate(fid, name, value_len, flags);
138 if (retval < 0) { 126 if (retval < 0)
139 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n", 127 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
140 retval); 128 retval);
141 goto err; 129 else
142 } 130 p9_client_write(fid, 0, &from, &retval);
143 msize = fid->clnt->msize;
144 while (value_len) {
145 if (value_len > (msize - P9_IOHDRSZ))
146 write_count = msize - P9_IOHDRSZ;
147 else
148 write_count = value_len;
149 write_count = p9_client_write(fid, ((char *)value)+offset,
150 NULL, offset, write_count);
151 if (write_count < 0) {
152 /* error in xattr write */
153 retval = write_count;
154 goto err;
155 }
156 offset += write_count;
157 value_len -= write_count;
158 }
159 retval = 0;
160err:
161 p9_client_clunk(fid); 131 p9_client_clunk(fid);
162 return retval; 132 return retval;
163} 133}
diff --git a/fs/Kconfig b/fs/Kconfig
index ec35851e5b71..011f43365d7b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig"
32source "fs/ocfs2/Kconfig" 32source "fs/ocfs2/Kconfig"
33source "fs/btrfs/Kconfig" 33source "fs/btrfs/Kconfig"
34source "fs/nilfs2/Kconfig" 34source "fs/nilfs2/Kconfig"
35source "fs/f2fs/Kconfig"
35 36
36config FS_DAX 37config FS_DAX
37 bool "Direct Access (DAX) support" 38 bool "Direct Access (DAX) support"
@@ -217,7 +218,6 @@ source "fs/pstore/Kconfig"
217source "fs/sysv/Kconfig" 218source "fs/sysv/Kconfig"
218source "fs/ufs/Kconfig" 219source "fs/ufs/Kconfig"
219source "fs/exofs/Kconfig" 220source "fs/exofs/Kconfig"
220source "fs/f2fs/Kconfig"
221 221
222endif # MISC_FILESYSTEMS 222endif # MISC_FILESYSTEMS
223 223
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
27 bool 27 bool
28 depends on COMPAT && BINFMT_ELF 28 depends on COMPAT && BINFMT_ELF
29 29
30config ARCH_BINFMT_ELF_RANDOMIZE_PIE
31 bool
32
33config ARCH_BINFMT_ELF_STATE 30config ARCH_BINFMT_ELF_STATE
34 bool 31 bool
35 32
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
118obj-$(CONFIG_HPPFS) += hppfs/ 118obj-$(CONFIG_HPPFS) += hppfs/
119obj-$(CONFIG_CACHEFILES) += cachefiles/ 119obj-$(CONFIG_CACHEFILES) += cachefiles/
120obj-$(CONFIG_DEBUG_FS) += debugfs/ 120obj-$(CONFIG_DEBUG_FS) += debugfs/
121obj-$(CONFIG_TRACING) += tracefs/
121obj-$(CONFIG_OCFS2_FS) += ocfs2/ 122obj-$(CONFIG_OCFS2_FS) += ocfs2/
122obj-$(CONFIG_BTRFS_FS) += btrfs/ 123obj-$(CONFIG_BTRFS_FS) += btrfs/
123obj-$(CONFIG_GFS2_FS) += gfs2/ 124obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index f2ba88ab4aed..82d14cdf70f9 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -61,6 +61,7 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
61 kcalloc(size, sizeof(struct buffer_head *), 61 kcalloc(size, sizeof(struct buffer_head *),
62 GFP_KERNEL); 62 GFP_KERNEL);
63 if (!bh_fplus) { 63 if (!bh_fplus) {
64 ret = -ENOMEM;
64 adfs_error(sb, "not enough memory for" 65 adfs_error(sb, "not enough memory for"
65 " dir object %X (%d blocks)", id, size); 66 " dir object %X (%d blocks)", id, size);
66 goto out; 67 goto out;
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 07c9edce5aa7..46c0d5671cd5 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -23,11 +23,9 @@
23 23
24const struct file_operations adfs_file_operations = { 24const struct file_operations adfs_file_operations = {
25 .llseek = generic_file_llseek, 25 .llseek = generic_file_llseek,
26 .read = new_sync_read,
27 .read_iter = generic_file_read_iter, 26 .read_iter = generic_file_read_iter,
28 .mmap = generic_file_mmap, 27 .mmap = generic_file_mmap,
29 .fsync = generic_file_fsync, 28 .fsync = generic_file_fsync,
30 .write = new_sync_write,
31 .write_iter = generic_file_write_iter, 29 .write_iter = generic_file_write_iter,
32 .splice_read = generic_file_splice_read, 30 .splice_read = generic_file_splice_read,
33}; 31};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9852bdf34d76..a19c31d3f369 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -316,7 +316,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
316 dm = kmalloc(nzones * sizeof(*dm), GFP_KERNEL); 316 dm = kmalloc(nzones * sizeof(*dm), GFP_KERNEL);
317 if (dm == NULL) { 317 if (dm == NULL) {
318 adfs_error(sb, "not enough memory"); 318 adfs_error(sb, "not enough memory");
319 return NULL; 319 return ERR_PTR(-ENOMEM);
320 } 320 }
321 321
322 for (zone = 0; zone < nzones; zone++, map_addr++) { 322 for (zone = 0; zone < nzones; zone++, map_addr++) {
@@ -349,7 +349,7 @@ error_free:
349 brelse(dm[zone].dm_bh); 349 brelse(dm[zone].dm_bh);
350 350
351 kfree(dm); 351 kfree(dm);
352 return NULL; 352 return ERR_PTR(-EIO);
353} 353}
354 354
355static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_bits) 355static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_bits)
@@ -370,6 +370,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
370 unsigned char *b_data; 370 unsigned char *b_data;
371 struct adfs_sb_info *asb; 371 struct adfs_sb_info *asb;
372 struct inode *root; 372 struct inode *root;
373 int ret = -EINVAL;
373 374
374 sb->s_flags |= MS_NODIRATIME; 375 sb->s_flags |= MS_NODIRATIME;
375 376
@@ -391,6 +392,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
391 sb_set_blocksize(sb, BLOCK_SIZE); 392 sb_set_blocksize(sb, BLOCK_SIZE);
392 if (!(bh = sb_bread(sb, ADFS_DISCRECORD / BLOCK_SIZE))) { 393 if (!(bh = sb_bread(sb, ADFS_DISCRECORD / BLOCK_SIZE))) {
393 adfs_error(sb, "unable to read superblock"); 394 adfs_error(sb, "unable to read superblock");
395 ret = -EIO;
394 goto error; 396 goto error;
395 } 397 }
396 398
@@ -400,6 +402,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
400 if (!silent) 402 if (!silent)
401 printk("VFS: Can't find an adfs filesystem on dev " 403 printk("VFS: Can't find an adfs filesystem on dev "
402 "%s.\n", sb->s_id); 404 "%s.\n", sb->s_id);
405 ret = -EINVAL;
403 goto error_free_bh; 406 goto error_free_bh;
404 } 407 }
405 408
@@ -412,6 +415,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
412 if (!silent) 415 if (!silent)
413 printk("VPS: Can't find an adfs filesystem on dev " 416 printk("VPS: Can't find an adfs filesystem on dev "
414 "%s.\n", sb->s_id); 417 "%s.\n", sb->s_id);
418 ret = -EINVAL;
415 goto error_free_bh; 419 goto error_free_bh;
416 } 420 }
417 421
@@ -421,11 +425,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
421 if (!bh) { 425 if (!bh) {
422 adfs_error(sb, "couldn't read superblock on " 426 adfs_error(sb, "couldn't read superblock on "
423 "2nd try."); 427 "2nd try.");
428 ret = -EIO;
424 goto error; 429 goto error;
425 } 430 }
426 b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize); 431 b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize);
427 if (adfs_checkbblk(b_data)) { 432 if (adfs_checkbblk(b_data)) {
428 adfs_error(sb, "disc record mismatch, very weird!"); 433 adfs_error(sb, "disc record mismatch, very weird!");
434 ret = -EINVAL;
429 goto error_free_bh; 435 goto error_free_bh;
430 } 436 }
431 dr = (struct adfs_discrecord *)(b_data + ADFS_DR_OFFSET); 437 dr = (struct adfs_discrecord *)(b_data + ADFS_DR_OFFSET);
@@ -433,6 +439,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
433 if (!silent) 439 if (!silent)
434 printk(KERN_ERR "VFS: Unsupported blocksize on dev " 440 printk(KERN_ERR "VFS: Unsupported blocksize on dev "
435 "%s.\n", sb->s_id); 441 "%s.\n", sb->s_id);
442 ret = -EINVAL;
436 goto error; 443 goto error;
437 } 444 }
438 445
@@ -447,10 +454,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
447 asb->s_size = adfs_discsize(dr, sb->s_blocksize_bits); 454 asb->s_size = adfs_discsize(dr, sb->s_blocksize_bits);
448 asb->s_version = dr->format_version; 455 asb->s_version = dr->format_version;
449 asb->s_log2sharesize = dr->log2sharesize; 456 asb->s_log2sharesize = dr->log2sharesize;
450 457
451 asb->s_map = adfs_read_map(sb, dr); 458 asb->s_map = adfs_read_map(sb, dr);
452 if (!asb->s_map) 459 if (IS_ERR(asb->s_map)) {
460 ret = PTR_ERR(asb->s_map);
453 goto error_free_bh; 461 goto error_free_bh;
462 }
454 463
455 brelse(bh); 464 brelse(bh);
456 465
@@ -499,6 +508,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
499 brelse(asb->s_map[i].dm_bh); 508 brelse(asb->s_map[i].dm_bh);
500 kfree(asb->s_map); 509 kfree(asb->s_map);
501 adfs_error(sb, "get root inode failed\n"); 510 adfs_error(sb, "get root inode failed\n");
511 ret = -EIO;
502 goto error; 512 goto error;
503 } 513 }
504 return 0; 514 return 0;
@@ -508,7 +518,7 @@ error_free_bh:
508error: 518error:
509 sb->s_fs_info = NULL; 519 sb->s_fs_info = NULL;
510 kfree(asb); 520 kfree(asb);
511 return -EINVAL; 521 return ret;
512} 522}
513 523
514static struct dentry *adfs_mount(struct file_system_type *fs_type, 524static struct dentry *adfs_mount(struct file_system_type *fs_type,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c8764bd7497d..cffe8370fb44 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,18 +106,22 @@ struct affs_sb_info {
106 spinlock_t work_lock; /* protects sb_work and work_queued */ 106 spinlock_t work_lock; /* protects sb_work and work_queued */
107}; 107};
108 108
109#define SF_INTL 0x0001 /* International filesystem. */ 109#define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
110#define SF_BM_VALID 0x0002 /* Bitmap is valid. */ 110#define AFFS_MOUNT_SF_BM_VALID 0x0002 /* Bitmap is valid. */
111#define SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */ 111#define AFFS_MOUNT_SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */
112#define SF_QUIET 0x0008 /* chmod errors will be not reported */ 112#define AFFS_MOUNT_SF_QUIET 0x0008 /* chmod errors will be not reported */
113#define SF_SETUID 0x0010 /* Ignore Amiga uid */ 113#define AFFS_MOUNT_SF_SETUID 0x0010 /* Ignore Amiga uid */
114#define SF_SETGID 0x0020 /* Ignore Amiga gid */ 114#define AFFS_MOUNT_SF_SETGID 0x0020 /* Ignore Amiga gid */
115#define SF_SETMODE 0x0040 /* Ignore Amiga protection bits */ 115#define AFFS_MOUNT_SF_SETMODE 0x0040 /* Ignore Amiga protection bits */
116#define SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */ 116#define AFFS_MOUNT_SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */
117#define SF_OFS 0x0200 /* Old filesystem */ 117#define AFFS_MOUNT_SF_OFS 0x0200 /* Old filesystem */
118#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */ 118#define AFFS_MOUNT_SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
119#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */ 119#define AFFS_MOUNT_SF_VERBOSE 0x0800 /* Talk about fs when mounting */
120#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */ 120#define AFFS_MOUNT_SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
121
122#define affs_clear_opt(o, opt) (o &= ~AFFS_MOUNT_##opt)
123#define affs_set_opt(o, opt) (o |= AFFS_MOUNT_##opt)
124#define affs_test_opt(o, opt) ((o) & AFFS_MOUNT_##opt)
121 125
122/* short cut to get to the affs specific sb data */ 126/* short cut to get to the affs specific sb data */
123static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) 127static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 388da1ea815d..5022ac96aa40 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -472,7 +472,8 @@ bool
472affs_nofilenametruncate(const struct dentry *dentry) 472affs_nofilenametruncate(const struct dentry *dentry)
473{ 473{
474 struct inode *inode = dentry->d_inode; 474 struct inode *inode = dentry->d_inode;
475 return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE; 475
476 return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
476 477
477} 478}
478 479
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..659c579c4588 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
12 * affs regular file handling primitives 12 * affs regular file handling primitives
13 */ 13 */
14 14
15#include <linux/aio.h> 15#include <linux/uio.h>
16#include "affs.h" 16#include "affs.h"
17 17
18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); 18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
389} 389}
390 390
391static ssize_t 391static ssize_t
392affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 392affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
393 loff_t offset)
394{ 393{
395 struct file *file = iocb->ki_filp; 394 struct file *file = iocb->ki_filp;
396 struct address_space *mapping = file->f_mapping; 395 struct address_space *mapping = file->f_mapping;
@@ -398,15 +397,15 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
398 size_t count = iov_iter_count(iter); 397 size_t count = iov_iter_count(iter);
399 ssize_t ret; 398 ssize_t ret;
400 399
401 if (rw == WRITE) { 400 if (iov_iter_rw(iter) == WRITE) {
402 loff_t size = offset + count; 401 loff_t size = offset + count;
403 402
404 if (AFFS_I(inode)->mmu_private < size) 403 if (AFFS_I(inode)->mmu_private < size)
405 return 0; 404 return 0;
406 } 405 }
407 406
408 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block); 407 ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block);
409 if (ret < 0 && (rw & WRITE)) 408 if (ret < 0 && iov_iter_rw(iter) == WRITE)
410 affs_write_failed(mapping, offset + count); 409 affs_write_failed(mapping, offset + count);
411 return ret; 410 return ret;
412} 411}
@@ -699,8 +698,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
699 boff = tmp % bsize; 698 boff = tmp % bsize;
700 if (boff) { 699 if (boff) {
701 bh = affs_bread_ino(inode, bidx, 0); 700 bh = affs_bread_ino(inode, bidx, 0);
702 if (IS_ERR(bh)) 701 if (IS_ERR(bh)) {
703 return PTR_ERR(bh); 702 written = PTR_ERR(bh);
703 goto err_first_bh;
704 }
704 tmp = min(bsize - boff, to - from); 705 tmp = min(bsize - boff, to - from);
705 BUG_ON(boff + tmp > bsize || tmp > bsize); 706 BUG_ON(boff + tmp > bsize || tmp > bsize);
706 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 707 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +713,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
712 bidx++; 713 bidx++;
713 } else if (bidx) { 714 } else if (bidx) {
714 bh = affs_bread_ino(inode, bidx - 1, 0); 715 bh = affs_bread_ino(inode, bidx - 1, 0);
715 if (IS_ERR(bh)) 716 if (IS_ERR(bh)) {
716 return PTR_ERR(bh); 717 written = PTR_ERR(bh);
718 goto err_first_bh;
719 }
717 } 720 }
718 while (from + bsize <= to) { 721 while (from + bsize <= to) {
719 prev_bh = bh; 722 prev_bh = bh;
720 bh = affs_getemptyblk_ino(inode, bidx); 723 bh = affs_getemptyblk_ino(inode, bidx);
721 if (IS_ERR(bh)) 724 if (IS_ERR(bh))
722 goto out; 725 goto err_bh;
723 memcpy(AFFS_DATA(bh), data + from, bsize); 726 memcpy(AFFS_DATA(bh), data + from, bsize);
724 if (buffer_new(bh)) { 727 if (buffer_new(bh)) {
725 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 728 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +754,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
751 prev_bh = bh; 754 prev_bh = bh;
752 bh = affs_bread_ino(inode, bidx, 1); 755 bh = affs_bread_ino(inode, bidx, 1);
753 if (IS_ERR(bh)) 756 if (IS_ERR(bh))
754 goto out; 757 goto err_bh;
755 tmp = min(bsize, to - from); 758 tmp = min(bsize, to - from);
756 BUG_ON(tmp > bsize); 759 BUG_ON(tmp > bsize);
757 memcpy(AFFS_DATA(bh), data + from, tmp); 760 memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +793,13 @@ done:
790 if (tmp > inode->i_size) 793 if (tmp > inode->i_size)
791 inode->i_size = AFFS_I(inode)->mmu_private = tmp; 794 inode->i_size = AFFS_I(inode)->mmu_private = tmp;
792 795
796err_first_bh:
793 unlock_page(page); 797 unlock_page(page);
794 page_cache_release(page); 798 page_cache_release(page);
795 799
796 return written; 800 return written;
797 801
798out: 802err_bh:
799 bh = prev_bh; 803 bh = prev_bh;
800 if (!written) 804 if (!written)
801 written = PTR_ERR(bh); 805 written = PTR_ERR(bh);
@@ -910,7 +914,7 @@ affs_truncate(struct inode *inode)
910 if (inode->i_size) { 914 if (inode->i_size) {
911 AFFS_I(inode)->i_blkcnt = last_blk + 1; 915 AFFS_I(inode)->i_blkcnt = last_blk + 1;
912 AFFS_I(inode)->i_extcnt = ext + 1; 916 AFFS_I(inode)->i_extcnt = ext + 1;
913 if (AFFS_SB(sb)->s_flags & SF_OFS) { 917 if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS)) {
914 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0); 918 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
915 u32 tmp; 919 u32 tmp;
916 if (IS_ERR(bh)) { 920 if (IS_ERR(bh)) {
@@ -964,9 +968,7 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
964} 968}
965const struct file_operations affs_file_operations = { 969const struct file_operations affs_file_operations = {
966 .llseek = generic_file_llseek, 970 .llseek = generic_file_llseek,
967 .read = new_sync_read,
968 .read_iter = generic_file_read_iter, 971 .read_iter = generic_file_read_iter,
969 .write = new_sync_write,
970 .write_iter = generic_file_write_iter, 972 .write_iter = generic_file_write_iter,
971 .mmap = generic_file_mmap, 973 .mmap = generic_file_mmap,
972 .open = affs_file_open, 974 .open = affs_file_open,
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 6f34510449e8..9628003ccd2f 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -66,23 +66,23 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
66 AFFS_I(inode)->i_lastalloc = 0; 66 AFFS_I(inode)->i_lastalloc = 0;
67 AFFS_I(inode)->i_pa_cnt = 0; 67 AFFS_I(inode)->i_pa_cnt = 0;
68 68
69 if (sbi->s_flags & SF_SETMODE) 69 if (affs_test_opt(sbi->s_flags, SF_SETMODE))
70 inode->i_mode = sbi->s_mode; 70 inode->i_mode = sbi->s_mode;
71 else 71 else
72 inode->i_mode = prot_to_mode(prot); 72 inode->i_mode = prot_to_mode(prot);
73 73
74 id = be16_to_cpu(tail->uid); 74 id = be16_to_cpu(tail->uid);
75 if (id == 0 || sbi->s_flags & SF_SETUID) 75 if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
76 inode->i_uid = sbi->s_uid; 76 inode->i_uid = sbi->s_uid;
77 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) 77 else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
78 i_uid_write(inode, 0); 78 i_uid_write(inode, 0);
79 else 79 else
80 i_uid_write(inode, id); 80 i_uid_write(inode, id);
81 81
82 id = be16_to_cpu(tail->gid); 82 id = be16_to_cpu(tail->gid);
83 if (id == 0 || sbi->s_flags & SF_SETGID) 83 if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETGID))
84 inode->i_gid = sbi->s_gid; 84 inode->i_gid = sbi->s_gid;
85 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) 85 else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
86 i_gid_write(inode, 0); 86 i_gid_write(inode, 0);
87 else 87 else
88 i_gid_write(inode, id); 88 i_gid_write(inode, id);
@@ -94,7 +94,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
94 /* fall through */ 94 /* fall through */
95 case ST_USERDIR: 95 case ST_USERDIR:
96 if (be32_to_cpu(tail->stype) == ST_USERDIR || 96 if (be32_to_cpu(tail->stype) == ST_USERDIR ||
97 sbi->s_flags & SF_SETMODE) { 97 affs_test_opt(sbi->s_flags, SF_SETMODE)) {
98 if (inode->i_mode & S_IRUSR) 98 if (inode->i_mode & S_IRUSR)
99 inode->i_mode |= S_IXUSR; 99 inode->i_mode |= S_IXUSR;
100 if (inode->i_mode & S_IRGRP) 100 if (inode->i_mode & S_IRGRP)
@@ -133,7 +133,8 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
133 } 133 }
134 if (tail->link_chain) 134 if (tail->link_chain)
135 set_nlink(inode, 2); 135 set_nlink(inode, 2);
136 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 136 inode->i_mapping->a_ops = affs_test_opt(sbi->s_flags, SF_OFS) ?
137 &affs_aops_ofs : &affs_aops;
137 inode->i_op = &affs_file_inode_operations; 138 inode->i_op = &affs_file_inode_operations;
138 inode->i_fop = &affs_file_operations; 139 inode->i_fop = &affs_file_operations;
139 break; 140 break;
@@ -190,15 +191,15 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
190 if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { 191 if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
191 uid = i_uid_read(inode); 192 uid = i_uid_read(inode);
192 gid = i_gid_read(inode); 193 gid = i_gid_read(inode);
193 if (AFFS_SB(sb)->s_flags & SF_MUFS) { 194 if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_MUFS)) {
194 if (uid == 0 || uid == 0xFFFF) 195 if (uid == 0 || uid == 0xFFFF)
195 uid = uid ^ ~0; 196 uid = uid ^ ~0;
196 if (gid == 0 || gid == 0xFFFF) 197 if (gid == 0 || gid == 0xFFFF)
197 gid = gid ^ ~0; 198 gid = gid ^ ~0;
198 } 199 }
199 if (!(AFFS_SB(sb)->s_flags & SF_SETUID)) 200 if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETUID))
200 tail->uid = cpu_to_be16(uid); 201 tail->uid = cpu_to_be16(uid);
201 if (!(AFFS_SB(sb)->s_flags & SF_SETGID)) 202 if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETGID))
202 tail->gid = cpu_to_be16(gid); 203 tail->gid = cpu_to_be16(gid);
203 } 204 }
204 } 205 }
@@ -221,11 +222,14 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
221 if (error) 222 if (error)
222 goto out; 223 goto out;
223 224
224 if (((attr->ia_valid & ATTR_UID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETUID)) || 225 if (((attr->ia_valid & ATTR_UID) &&
225 ((attr->ia_valid & ATTR_GID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETGID)) || 226 affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETUID)) ||
227 ((attr->ia_valid & ATTR_GID) &&
228 affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETGID)) ||
226 ((attr->ia_valid & ATTR_MODE) && 229 ((attr->ia_valid & ATTR_MODE) &&
227 (AFFS_SB(inode->i_sb)->s_flags & (SF_SETMODE | SF_IMMUTABLE)))) { 230 (AFFS_SB(inode->i_sb)->s_flags &
228 if (!(AFFS_SB(inode->i_sb)->s_flags & SF_QUIET)) 231 (AFFS_MOUNT_SF_SETMODE | AFFS_MOUNT_SF_IMMUTABLE)))) {
232 if (!affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_QUIET))
229 error = -EPERM; 233 error = -EPERM;
230 goto out; 234 goto out;
231 } 235 }
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index ffb7bd82c2a5..ec8ca0efb960 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -53,7 +53,8 @@ affs_intl_toupper(int ch)
53static inline toupper_t 53static inline toupper_t
54affs_get_toupper(struct super_block *sb) 54affs_get_toupper(struct super_block *sb)
55{ 55{
56 return AFFS_SB(sb)->s_flags & SF_INTL ? affs_intl_toupper : affs_toupper; 56 return affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL) ?
57 affs_intl_toupper : affs_toupper;
57} 58}
58 59
59/* 60/*
@@ -275,7 +276,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
275 276
276 inode->i_op = &affs_file_inode_operations; 277 inode->i_op = &affs_file_inode_operations;
277 inode->i_fop = &affs_file_operations; 278 inode->i_fop = &affs_file_operations;
278 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 279 inode->i_mapping->a_ops = affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS) ?
280 &affs_aops_ofs : &affs_aops;
279 error = affs_add_entry(dir, inode, dentry, ST_FILE); 281 error = affs_add_entry(dir, inode, dentry, ST_FILE);
280 if (error) { 282 if (error) {
281 clear_nlink(inode); 283 clear_nlink(inode);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4cf0e9113fb6..3f89c9e05b40 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -227,22 +227,22 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
227 if (match_octal(&args[0], &option)) 227 if (match_octal(&args[0], &option))
228 return 0; 228 return 0;
229 *mode = option & 0777; 229 *mode = option & 0777;
230 *mount_opts |= SF_SETMODE; 230 affs_set_opt(*mount_opts, SF_SETMODE);
231 break; 231 break;
232 case Opt_mufs: 232 case Opt_mufs:
233 *mount_opts |= SF_MUFS; 233 affs_set_opt(*mount_opts, SF_MUFS);
234 break; 234 break;
235 case Opt_notruncate: 235 case Opt_notruncate:
236 *mount_opts |= SF_NO_TRUNCATE; 236 affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
237 break; 237 break;
238 case Opt_prefix: 238 case Opt_prefix:
239 *prefix = match_strdup(&args[0]); 239 *prefix = match_strdup(&args[0]);
240 if (!*prefix) 240 if (!*prefix)
241 return 0; 241 return 0;
242 *mount_opts |= SF_PREFIX; 242 affs_set_opt(*mount_opts, SF_PREFIX);
243 break; 243 break;
244 case Opt_protect: 244 case Opt_protect:
245 *mount_opts |= SF_IMMUTABLE; 245 affs_set_opt(*mount_opts, SF_IMMUTABLE);
246 break; 246 break;
247 case Opt_reserved: 247 case Opt_reserved:
248 if (match_int(&args[0], reserved)) 248 if (match_int(&args[0], reserved))
@@ -258,7 +258,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
258 *gid = make_kgid(current_user_ns(), option); 258 *gid = make_kgid(current_user_ns(), option);
259 if (!gid_valid(*gid)) 259 if (!gid_valid(*gid))
260 return 0; 260 return 0;
261 *mount_opts |= SF_SETGID; 261 affs_set_opt(*mount_opts, SF_SETGID);
262 break; 262 break;
263 case Opt_setuid: 263 case Opt_setuid:
264 if (match_int(&args[0], &option)) 264 if (match_int(&args[0], &option))
@@ -266,10 +266,10 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
266 *uid = make_kuid(current_user_ns(), option); 266 *uid = make_kuid(current_user_ns(), option);
267 if (!uid_valid(*uid)) 267 if (!uid_valid(*uid))
268 return 0; 268 return 0;
269 *mount_opts |= SF_SETUID; 269 affs_set_opt(*mount_opts, SF_SETUID);
270 break; 270 break;
271 case Opt_verbose: 271 case Opt_verbose:
272 *mount_opts |= SF_VERBOSE; 272 affs_set_opt(*mount_opts, SF_VERBOSE);
273 break; 273 break;
274 case Opt_volume: { 274 case Opt_volume: {
275 char *vol = match_strdup(&args[0]); 275 char *vol = match_strdup(&args[0]);
@@ -435,30 +435,31 @@ got_root:
435 case MUFS_FS: 435 case MUFS_FS:
436 case MUFS_INTLFFS: 436 case MUFS_INTLFFS:
437 case MUFS_DCFFS: 437 case MUFS_DCFFS:
438 sbi->s_flags |= SF_MUFS; 438 affs_set_opt(sbi->s_flags, SF_MUFS);
439 /* fall thru */ 439 /* fall thru */
440 case FS_INTLFFS: 440 case FS_INTLFFS:
441 case FS_DCFFS: 441 case FS_DCFFS:
442 sbi->s_flags |= SF_INTL; 442 affs_set_opt(sbi->s_flags, SF_INTL);
443 break; 443 break;
444 case MUFS_FFS: 444 case MUFS_FFS:
445 sbi->s_flags |= SF_MUFS; 445 affs_set_opt(sbi->s_flags, SF_MUFS);
446 break; 446 break;
447 case FS_FFS: 447 case FS_FFS:
448 break; 448 break;
449 case MUFS_OFS: 449 case MUFS_OFS:
450 sbi->s_flags |= SF_MUFS; 450 affs_set_opt(sbi->s_flags, SF_MUFS);
451 /* fall thru */ 451 /* fall thru */
452 case FS_OFS: 452 case FS_OFS:
453 sbi->s_flags |= SF_OFS; 453 affs_set_opt(sbi->s_flags, SF_OFS);
454 sb->s_flags |= MS_NOEXEC; 454 sb->s_flags |= MS_NOEXEC;
455 break; 455 break;
456 case MUFS_DCOFS: 456 case MUFS_DCOFS:
457 case MUFS_INTLOFS: 457 case MUFS_INTLOFS:
458 sbi->s_flags |= SF_MUFS; 458 affs_set_opt(sbi->s_flags, SF_MUFS);
459 case FS_DCOFS: 459 case FS_DCOFS:
460 case FS_INTLOFS: 460 case FS_INTLOFS:
461 sbi->s_flags |= SF_INTL | SF_OFS; 461 affs_set_opt(sbi->s_flags, SF_INTL);
462 affs_set_opt(sbi->s_flags, SF_OFS);
462 sb->s_flags |= MS_NOEXEC; 463 sb->s_flags |= MS_NOEXEC;
463 break; 464 break;
464 default: 465 default:
@@ -467,7 +468,7 @@ got_root:
467 return -EINVAL; 468 return -EINVAL;
468 } 469 }
469 470
470 if (mount_flags & SF_VERBOSE) { 471 if (affs_test_opt(mount_flags, SF_VERBOSE)) {
471 u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0]; 472 u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
472 pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n", 473 pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
473 len > 31 ? 31 : len, 474 len > 31 ? 31 : len,
@@ -478,7 +479,7 @@ got_root:
478 sb->s_flags |= MS_NODEV | MS_NOSUID; 479 sb->s_flags |= MS_NODEV | MS_NOSUID;
479 480
480 sbi->s_data_blksize = sb->s_blocksize; 481 sbi->s_data_blksize = sb->s_blocksize;
481 if (sbi->s_flags & SF_OFS) 482 if (affs_test_opt(sbi->s_flags, SF_OFS))
482 sbi->s_data_blksize -= 24; 483 sbi->s_data_blksize -= 24;
483 484
484 tmp_flags = sb->s_flags; 485 tmp_flags = sb->s_flags;
@@ -493,7 +494,7 @@ got_root:
493 if (IS_ERR(root_inode)) 494 if (IS_ERR(root_inode))
494 return PTR_ERR(root_inode); 495 return PTR_ERR(root_inode);
495 496
496 if (AFFS_SB(sb)->s_flags & SF_INTL) 497 if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL))
497 sb->s_d_op = &affs_intl_dentry_operations; 498 sb->s_d_op = &affs_intl_dentry_operations;
498 else 499 else
499 sb->s_d_op = &affs_dentry_operations; 500 sb->s_d_op = &affs_dentry_operations;
@@ -520,10 +521,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
520 int root_block; 521 int root_block;
521 unsigned long mount_flags; 522 unsigned long mount_flags;
522 int res = 0; 523 int res = 0;
523 char *new_opts = kstrdup(data, GFP_KERNEL); 524 char *new_opts;
524 char volume[32]; 525 char volume[32];
525 char *prefix = NULL; 526 char *prefix = NULL;
526 527
528 new_opts = kstrdup(data, GFP_KERNEL);
529 if (!new_opts)
530 return -ENOMEM;
531
527 pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); 532 pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
528 533
529 sync_filesystem(sb); 534 sync_filesystem(sb);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 932ce07948b3..999bc3caec92 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -31,8 +31,6 @@ const struct file_operations afs_file_operations = {
31 .open = afs_open, 31 .open = afs_open,
32 .release = afs_release, 32 .release = afs_release,
33 .llseek = generic_file_llseek, 33 .llseek = generic_file_llseek,
34 .read = new_sync_read,
35 .write = new_sync_write,
36 .read_iter = generic_file_read_iter, 34 .read_iter = generic_file_read_iter,
37 .write_iter = afs_file_write, 35 .write_iter = afs_file_write,
38 .mmap = generic_file_readonly_mmap, 36 .mmap = generic_file_readonly_mmap,
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 0dd4dafee10b..91ea1aa0d8b3 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,9 +22,12 @@
22int afs_abort_to_error(u32 abort_code) 22int afs_abort_to_error(u32 abort_code)
23{ 23{
24 switch (abort_code) { 24 switch (abort_code) {
25 /* low errno codes inserted into abort namespace */
25 case 13: return -EACCES; 26 case 13: return -EACCES;
26 case 27: return -EFBIG; 27 case 27: return -EFBIG;
27 case 30: return -EROFS; 28 case 30: return -EROFS;
29
30 /* VICE "special error" codes; 101 - 111 */
28 case VSALVAGE: return -EIO; 31 case VSALVAGE: return -EIO;
29 case VNOVNODE: return -ENOENT; 32 case VNOVNODE: return -ENOENT;
30 case VNOVOL: return -ENOMEDIUM; 33 case VNOVOL: return -ENOMEDIUM;
@@ -36,11 +39,18 @@ int afs_abort_to_error(u32 abort_code)
36 case VOVERQUOTA: return -EDQUOT; 39 case VOVERQUOTA: return -EDQUOT;
37 case VBUSY: return -EBUSY; 40 case VBUSY: return -EBUSY;
38 case VMOVED: return -ENXIO; 41 case VMOVED: return -ENXIO;
39 case 0x2f6df0a: return -EWOULDBLOCK; 42
43 /* Unified AFS error table; ET "uae" == 0x2f6df00 */
44 case 0x2f6df00: return -EPERM;
45 case 0x2f6df01: return -ENOENT;
46 case 0x2f6df04: return -EIO;
47 case 0x2f6df0a: return -EAGAIN;
48 case 0x2f6df0b: return -ENOMEM;
40 case 0x2f6df0c: return -EACCES; 49 case 0x2f6df0c: return -EACCES;
41 case 0x2f6df0f: return -EBUSY; 50 case 0x2f6df0f: return -EBUSY;
42 case 0x2f6df10: return -EEXIST; 51 case 0x2f6df10: return -EEXIST;
43 case 0x2f6df11: return -EXDEV; 52 case 0x2f6df11: return -EXDEV;
53 case 0x2f6df12: return -ENODEV;
44 case 0x2f6df13: return -ENOTDIR; 54 case 0x2f6df13: return -ENOTDIR;
45 case 0x2f6df14: return -EISDIR; 55 case 0x2f6df14: return -EISDIR;
46 case 0x2f6df15: return -EINVAL; 56 case 0x2f6df15: return -EINVAL;
@@ -54,8 +64,12 @@ int afs_abort_to_error(u32 abort_code)
54 case 0x2f6df23: return -ENAMETOOLONG; 64 case 0x2f6df23: return -ENAMETOOLONG;
55 case 0x2f6df24: return -ENOLCK; 65 case 0x2f6df24: return -ENOLCK;
56 case 0x2f6df26: return -ENOTEMPTY; 66 case 0x2f6df26: return -ENOTEMPTY;
67 case 0x2f6df28: return -EWOULDBLOCK;
68 case 0x2f6df69: return -ENOTCONN;
69 case 0x2f6df6c: return -ETIMEDOUT;
57 case 0x2f6df78: return -EDQUOT; 70 case 0x2f6df78: return -EDQUOT;
58 71
72 /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
59 case RXKADINCONSISTENCY: return -EPROTO; 73 case RXKADINCONSISTENCY: return -EPROTO;
60 case RXKADPACKETSHORT: return -EPROTO; 74 case RXKADPACKETSHORT: return -EPROTO;
61 case RXKADLEVELFAIL: return -EKEYREJECTED; 75 case RXKADLEVELFAIL: return -EKEYREJECTED;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index dbc732e9a5c0..3a57a1b0fb51 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -770,15 +770,12 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
770void afs_send_empty_reply(struct afs_call *call) 770void afs_send_empty_reply(struct afs_call *call)
771{ 771{
772 struct msghdr msg; 772 struct msghdr msg;
773 struct kvec iov[1];
774 773
775 _enter(""); 774 _enter("");
776 775
777 iov[0].iov_base = NULL;
778 iov[0].iov_len = 0;
779 msg.msg_name = NULL; 776 msg.msg_name = NULL;
780 msg.msg_namelen = 0; 777 msg.msg_namelen = 0;
781 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */ 778 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
782 msg.msg_control = NULL; 779 msg.msg_control = NULL;
783 msg.msg_controllen = 0; 780 msg.msg_controllen = 0;
784 msg.msg_flags = 0; 781 msg.msg_flags = 0;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08964ed..0714abcd7f32 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/aio.h>
18#include "internal.h" 17#include "internal.h"
19 18
20static int afs_write_back_from_locked_page(struct afs_writeback *wb, 19static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1854c1..480440f4701f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
77 unsigned reqs_available; 77 unsigned reqs_available;
78}; 78};
79 79
80struct ctx_rq_wait {
81 struct completion comp;
82 atomic_t count;
83};
84
80struct kioctx { 85struct kioctx {
81 struct percpu_ref users; 86 struct percpu_ref users;
82 atomic_t dead; 87 atomic_t dead;
@@ -115,7 +120,7 @@ struct kioctx {
115 /* 120 /*
116 * signals when all in-flight requests are done 121 * signals when all in-flight requests are done
117 */ 122 */
118 struct completion *requests_done; 123 struct ctx_rq_wait *rq_wait;
119 124
120 struct { 125 struct {
121 /* 126 /*
@@ -151,6 +156,38 @@ struct kioctx {
151 unsigned id; 156 unsigned id;
152}; 157};
153 158
159/*
160 * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
161 * cancelled or completed (this makes a certain amount of sense because
162 * successful cancellation - io_cancel() - does deliver the completion to
163 * userspace).
164 *
165 * And since most things don't implement kiocb cancellation and we'd really like
166 * kiocb completion to be lockless when possible, we use ki_cancel to
167 * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
168 * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
169 */
170#define KIOCB_CANCELLED ((void *) (~0ULL))
171
172struct aio_kiocb {
173 struct kiocb common;
174
175 struct kioctx *ki_ctx;
176 kiocb_cancel_fn *ki_cancel;
177
178 struct iocb __user *ki_user_iocb; /* user's aiocb */
179 __u64 ki_user_data; /* user's data for completion */
180
181 struct list_head ki_list; /* the aio core uses this
182 * for cancellation */
183
184 /*
185 * If the aio_resfd field of the userspace iocb is not zero,
186 * this is the underlying eventfd context to deliver events to.
187 */
188 struct eventfd_ctx *ki_eventfd;
189};
190
154/*------ sysctl variables----*/ 191/*------ sysctl variables----*/
155static DEFINE_SPINLOCK(aio_nr_lock); 192static DEFINE_SPINLOCK(aio_nr_lock);
156unsigned long aio_nr; /* current system wide number of aio requests */ 193unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +257,7 @@ static int __init aio_setup(void)
220 if (IS_ERR(aio_mnt)) 257 if (IS_ERR(aio_mnt))
221 panic("Failed to create aio fs mount."); 258 panic("Failed to create aio fs mount.");
222 259
223 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 260 kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
224 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 261 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
225 262
226 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); 263 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -278,11 +315,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
278 return 0; 315 return 0;
279} 316}
280 317
281static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) 318static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
282{ 319{
283 struct mm_struct *mm = vma->vm_mm; 320 struct mm_struct *mm = vma->vm_mm;
284 struct kioctx_table *table; 321 struct kioctx_table *table;
285 int i; 322 int i, res = -EINVAL;
286 323
287 spin_lock(&mm->ioctx_lock); 324 spin_lock(&mm->ioctx_lock);
288 rcu_read_lock(); 325 rcu_read_lock();
@@ -292,13 +329,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
292 329
293 ctx = table->table[i]; 330 ctx = table->table[i];
294 if (ctx && ctx->aio_ring_file == file) { 331 if (ctx && ctx->aio_ring_file == file) {
295 ctx->user_id = ctx->mmap_base = vma->vm_start; 332 if (!atomic_read(&ctx->dead)) {
333 ctx->user_id = ctx->mmap_base = vma->vm_start;
334 res = 0;
335 }
296 break; 336 break;
297 } 337 }
298 } 338 }
299 339
300 rcu_read_unlock(); 340 rcu_read_unlock();
301 spin_unlock(&mm->ioctx_lock); 341 spin_unlock(&mm->ioctx_lock);
342 return res;
302} 343}
303 344
304static const struct file_operations aio_ring_fops = { 345static const struct file_operations aio_ring_fops = {
@@ -480,8 +521,9 @@ static int aio_setup_ring(struct kioctx *ctx)
480#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 521#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
481#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 522#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
482 523
483void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) 524void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
484{ 525{
526 struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
485 struct kioctx *ctx = req->ki_ctx; 527 struct kioctx *ctx = req->ki_ctx;
486 unsigned long flags; 528 unsigned long flags;
487 529
@@ -496,7 +538,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
496} 538}
497EXPORT_SYMBOL(kiocb_set_cancel_fn); 539EXPORT_SYMBOL(kiocb_set_cancel_fn);
498 540
499static int kiocb_cancel(struct kiocb *kiocb) 541static int kiocb_cancel(struct aio_kiocb *kiocb)
500{ 542{
501 kiocb_cancel_fn *old, *cancel; 543 kiocb_cancel_fn *old, *cancel;
502 544
@@ -514,7 +556,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
514 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); 556 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
515 } while (cancel != old); 557 } while (cancel != old);
516 558
517 return cancel(kiocb); 559 return cancel(&kiocb->common);
518} 560}
519 561
520static void free_ioctx(struct work_struct *work) 562static void free_ioctx(struct work_struct *work)
@@ -535,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
535 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 577 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
536 578
537 /* At this point we know that there are no any in-flight requests */ 579 /* At this point we know that there are no any in-flight requests */
538 if (ctx->requests_done) 580 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
539 complete(ctx->requests_done); 581 complete(&ctx->rq_wait->comp);
540 582
541 INIT_WORK(&ctx->free_work, free_ioctx); 583 INIT_WORK(&ctx->free_work, free_ioctx);
542 schedule_work(&ctx->free_work); 584 schedule_work(&ctx->free_work);
@@ -550,13 +592,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
550static void free_ioctx_users(struct percpu_ref *ref) 592static void free_ioctx_users(struct percpu_ref *ref)
551{ 593{
552 struct kioctx *ctx = container_of(ref, struct kioctx, users); 594 struct kioctx *ctx = container_of(ref, struct kioctx, users);
553 struct kiocb *req; 595 struct aio_kiocb *req;
554 596
555 spin_lock_irq(&ctx->ctx_lock); 597 spin_lock_irq(&ctx->ctx_lock);
556 598
557 while (!list_empty(&ctx->active_reqs)) { 599 while (!list_empty(&ctx->active_reqs)) {
558 req = list_first_entry(&ctx->active_reqs, 600 req = list_first_entry(&ctx->active_reqs,
559 struct kiocb, ki_list); 601 struct aio_kiocb, ki_list);
560 602
561 list_del_init(&req->ki_list); 603 list_del_init(&req->ki_list);
562 kiocb_cancel(req); 604 kiocb_cancel(req);
@@ -655,8 +697,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
655 nr_events *= 2; 697 nr_events *= 2;
656 698
657 /* Prevent overflows */ 699 /* Prevent overflows */
658 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 700 if (nr_events > (0x10000000U / sizeof(struct io_event))) {
659 (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
660 pr_debug("ENOMEM: nr_events too high\n"); 701 pr_debug("ENOMEM: nr_events too high\n");
661 return ERR_PTR(-EINVAL); 702 return ERR_PTR(-EINVAL);
662 } 703 }
@@ -727,6 +768,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
727err_cleanup: 768err_cleanup:
728 aio_nr_sub(ctx->max_reqs); 769 aio_nr_sub(ctx->max_reqs);
729err_ctx: 770err_ctx:
771 atomic_set(&ctx->dead, 1);
772 if (ctx->mmap_size)
773 vm_munmap(ctx->mmap_base, ctx->mmap_size);
730 aio_free_ring(ctx); 774 aio_free_ring(ctx);
731err: 775err:
732 mutex_unlock(&ctx->ring_lock); 776 mutex_unlock(&ctx->ring_lock);
@@ -744,15 +788,16 @@ err:
744 * the rapid destruction of the kioctx. 788 * the rapid destruction of the kioctx.
745 */ 789 */
746static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, 790static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
747 struct completion *requests_done) 791 struct ctx_rq_wait *wait)
748{ 792{
749 struct kioctx_table *table; 793 struct kioctx_table *table;
750 794
751 if (atomic_xchg(&ctx->dead, 1)) 795 spin_lock(&mm->ioctx_lock);
796 if (atomic_xchg(&ctx->dead, 1)) {
797 spin_unlock(&mm->ioctx_lock);
752 return -EINVAL; 798 return -EINVAL;
799 }
753 800
754
755 spin_lock(&mm->ioctx_lock);
756 table = rcu_dereference_raw(mm->ioctx_table); 801 table = rcu_dereference_raw(mm->ioctx_table);
757 WARN_ON(ctx != table->table[ctx->id]); 802 WARN_ON(ctx != table->table[ctx->id]);
758 table->table[ctx->id] = NULL; 803 table->table[ctx->id] = NULL;
@@ -773,27 +818,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
773 if (ctx->mmap_size) 818 if (ctx->mmap_size)
774 vm_munmap(ctx->mmap_base, ctx->mmap_size); 819 vm_munmap(ctx->mmap_base, ctx->mmap_size);
775 820
776 ctx->requests_done = requests_done; 821 ctx->rq_wait = wait;
777 percpu_ref_kill(&ctx->users); 822 percpu_ref_kill(&ctx->users);
778 return 0; 823 return 0;
779} 824}
780 825
781/* wait_on_sync_kiocb:
782 * Waits on the given sync kiocb to complete.
783 */
784ssize_t wait_on_sync_kiocb(struct kiocb *req)
785{
786 while (!req->ki_ctx) {
787 set_current_state(TASK_UNINTERRUPTIBLE);
788 if (req->ki_ctx)
789 break;
790 io_schedule();
791 }
792 __set_current_state(TASK_RUNNING);
793 return req->ki_user_data;
794}
795EXPORT_SYMBOL(wait_on_sync_kiocb);
796
797/* 826/*
798 * exit_aio: called when the last user of mm goes away. At this point, there is 827 * exit_aio: called when the last user of mm goes away. At this point, there is
799 * no way for any new requests to be submited or any of the io_* syscalls to be 828 * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -805,18 +834,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
805void exit_aio(struct mm_struct *mm) 834void exit_aio(struct mm_struct *mm)
806{ 835{
807 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); 836 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
808 int i; 837 struct ctx_rq_wait wait;
838 int i, skipped;
809 839
810 if (!table) 840 if (!table)
811 return; 841 return;
812 842
843 atomic_set(&wait.count, table->nr);
844 init_completion(&wait.comp);
845
846 skipped = 0;
813 for (i = 0; i < table->nr; ++i) { 847 for (i = 0; i < table->nr; ++i) {
814 struct kioctx *ctx = table->table[i]; 848 struct kioctx *ctx = table->table[i];
815 struct completion requests_done =
816 COMPLETION_INITIALIZER_ONSTACK(requests_done);
817 849
818 if (!ctx) 850 if (!ctx) {
851 skipped++;
819 continue; 852 continue;
853 }
854
820 /* 855 /*
821 * We don't need to bother with munmap() here - exit_mmap(mm) 856 * We don't need to bother with munmap() here - exit_mmap(mm)
822 * is coming and it'll unmap everything. And we simply can't, 857 * is coming and it'll unmap everything. And we simply can't,
@@ -825,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
825 * that it needs to unmap the area, just set it to 0. 860 * that it needs to unmap the area, just set it to 0.
826 */ 861 */
827 ctx->mmap_size = 0; 862 ctx->mmap_size = 0;
828 kill_ioctx(mm, ctx, &requests_done); 863 kill_ioctx(mm, ctx, &wait);
864 }
829 865
866 if (!atomic_sub_and_test(skipped, &wait.count)) {
830 /* Wait until all IO for the context are done. */ 867 /* Wait until all IO for the context are done. */
831 wait_for_completion(&requests_done); 868 wait_for_completion(&wait.comp);
832 } 869 }
833 870
834 RCU_INIT_POINTER(mm->ioctx_table, NULL); 871 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -948,9 +985,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
948 * Allocate a slot for an aio request. 985 * Allocate a slot for an aio request.
949 * Returns NULL if no requests are free. 986 * Returns NULL if no requests are free.
950 */ 987 */
951static inline struct kiocb *aio_get_req(struct kioctx *ctx) 988static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
952{ 989{
953 struct kiocb *req; 990 struct aio_kiocb *req;
954 991
955 if (!get_reqs_available(ctx)) { 992 if (!get_reqs_available(ctx)) {
956 user_refill_reqs_available(ctx); 993 user_refill_reqs_available(ctx);
@@ -971,10 +1008,10 @@ out_put:
971 return NULL; 1008 return NULL;
972} 1009}
973 1010
974static void kiocb_free(struct kiocb *req) 1011static void kiocb_free(struct aio_kiocb *req)
975{ 1012{
976 if (req->ki_filp) 1013 if (req->common.ki_filp)
977 fput(req->ki_filp); 1014 fput(req->common.ki_filp);
978 if (req->ki_eventfd != NULL) 1015 if (req->ki_eventfd != NULL)
979 eventfd_ctx_put(req->ki_eventfd); 1016 eventfd_ctx_put(req->ki_eventfd);
980 kmem_cache_free(kiocb_cachep, req); 1017 kmem_cache_free(kiocb_cachep, req);
@@ -1010,8 +1047,9 @@ out:
1010/* aio_complete 1047/* aio_complete
1011 * Called when the io request on the given iocb is complete. 1048 * Called when the io request on the given iocb is complete.
1012 */ 1049 */
1013void aio_complete(struct kiocb *iocb, long res, long res2) 1050static void aio_complete(struct kiocb *kiocb, long res, long res2)
1014{ 1051{
1052 struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
1015 struct kioctx *ctx = iocb->ki_ctx; 1053 struct kioctx *ctx = iocb->ki_ctx;
1016 struct aio_ring *ring; 1054 struct aio_ring *ring;
1017 struct io_event *ev_page, *event; 1055 struct io_event *ev_page, *event;
@@ -1025,13 +1063,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1025 * ref, no other paths have a way to get another ref 1063 * ref, no other paths have a way to get another ref
1026 * - the sync task helpfully left a reference to itself in the iocb 1064 * - the sync task helpfully left a reference to itself in the iocb
1027 */ 1065 */
1028 if (is_sync_kiocb(iocb)) { 1066 BUG_ON(is_sync_kiocb(kiocb));
1029 iocb->ki_user_data = res;
1030 smp_wmb();
1031 iocb->ki_ctx = ERR_PTR(-EXDEV);
1032 wake_up_process(iocb->ki_obj.tsk);
1033 return;
1034 }
1035 1067
1036 if (iocb->ki_list.next) { 1068 if (iocb->ki_list.next) {
1037 unsigned long flags; 1069 unsigned long flags;
@@ -1057,7 +1089,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1057 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1089 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1058 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1090 event = ev_page + pos % AIO_EVENTS_PER_PAGE;
1059 1091
1060 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 1092 event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
1061 event->data = iocb->ki_user_data; 1093 event->data = iocb->ki_user_data;
1062 event->res = res; 1094 event->res = res;
1063 event->res2 = res2; 1095 event->res2 = res2;
@@ -1066,7 +1098,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1066 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1098 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1067 1099
1068 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", 1100 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
1069 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, 1101 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
1070 res, res2); 1102 res, res2);
1071 1103
1072 /* after flagging the request as done, we 1104 /* after flagging the request as done, we
@@ -1113,7 +1145,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1113 1145
1114 percpu_ref_put(&ctx->reqs); 1146 percpu_ref_put(&ctx->reqs);
1115} 1147}
1116EXPORT_SYMBOL(aio_complete);
1117 1148
1118/* aio_read_events_ring 1149/* aio_read_events_ring
1119 * Pull an event off of the ioctx's event ring. Returns the number of 1150 * Pull an event off of the ioctx's event ring. Returns the number of
@@ -1313,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1313{ 1344{
1314 struct kioctx *ioctx = lookup_ioctx(ctx); 1345 struct kioctx *ioctx = lookup_ioctx(ctx);
1315 if (likely(NULL != ioctx)) { 1346 if (likely(NULL != ioctx)) {
1316 struct completion requests_done = 1347 struct ctx_rq_wait wait;
1317 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1318 int ret; 1348 int ret;
1319 1349
1350 init_completion(&wait.comp);
1351 atomic_set(&wait.count, 1);
1352
1320 /* Pass requests_done to kill_ioctx() where it can be set 1353 /* Pass requests_done to kill_ioctx() where it can be set
1321 * in a thread-safe way. If we try to set it here then we have 1354 * in a thread-safe way. If we try to set it here then we have
1322 * a race condition if two io_destroy() called simultaneously. 1355 * a race condition if two io_destroy() called simultaneously.
1323 */ 1356 */
1324 ret = kill_ioctx(current->mm, ioctx, &requests_done); 1357 ret = kill_ioctx(current->mm, ioctx, &wait);
1325 percpu_ref_put(&ioctx->users); 1358 percpu_ref_put(&ioctx->users);
1326 1359
1327 /* Wait until all IO for the context are done. Otherwise kernel 1360 /* Wait until all IO for the context are done. Otherwise kernel
@@ -1329,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1329 * is destroyed. 1362 * is destroyed.
1330 */ 1363 */
1331 if (!ret) 1364 if (!ret)
1332 wait_for_completion(&requests_done); 1365 wait_for_completion(&wait.comp);
1333 1366
1334 return ret; 1367 return ret;
1335 } 1368 }
@@ -1337,50 +1370,21 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1337 return -EINVAL; 1370 return -EINVAL;
1338} 1371}
1339 1372
1340typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
1341 unsigned long, loff_t);
1342typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); 1373typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1343 1374
1344static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1375static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
1345 int rw, char __user *buf, 1376 struct iovec **iovec,
1346 unsigned long *nr_segs, 1377 bool compat,
1347 struct iovec **iovec, 1378 struct iov_iter *iter)
1348 bool compat)
1349{ 1379{
1350 ssize_t ret;
1351
1352 *nr_segs = kiocb->ki_nbytes;
1353
1354#ifdef CONFIG_COMPAT 1380#ifdef CONFIG_COMPAT
1355 if (compat) 1381 if (compat)
1356 ret = compat_rw_copy_check_uvector(rw, 1382 return compat_import_iovec(rw,
1357 (struct compat_iovec __user *)buf, 1383 (struct compat_iovec __user *)buf,
1358 *nr_segs, UIO_FASTIOV, *iovec, iovec); 1384 len, UIO_FASTIOV, iovec, iter);
1359 else
1360#endif 1385#endif
1361 ret = rw_copy_check_uvector(rw, 1386 return import_iovec(rw, (struct iovec __user *)buf,
1362 (struct iovec __user *)buf, 1387 len, UIO_FASTIOV, iovec, iter);
1363 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1364 if (ret < 0)
1365 return ret;
1366
1367 /* ki_nbytes now reflect bytes instead of segs */
1368 kiocb->ki_nbytes = ret;
1369 return 0;
1370}
1371
1372static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1373 int rw, char __user *buf,
1374 unsigned long *nr_segs,
1375 struct iovec *iovec)
1376{
1377 if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
1378 return -EFAULT;
1379
1380 iovec->iov_base = buf;
1381 iovec->iov_len = kiocb->ki_nbytes;
1382 *nr_segs = 1;
1383 return 0;
1384} 1388}
1385 1389
1386/* 1390/*
@@ -1388,14 +1392,12 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1388 * Performs the initial checks and io submission. 1392 * Performs the initial checks and io submission.
1389 */ 1393 */
1390static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1394static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1391 char __user *buf, bool compat) 1395 char __user *buf, size_t len, bool compat)
1392{ 1396{
1393 struct file *file = req->ki_filp; 1397 struct file *file = req->ki_filp;
1394 ssize_t ret; 1398 ssize_t ret;
1395 unsigned long nr_segs;
1396 int rw; 1399 int rw;
1397 fmode_t mode; 1400 fmode_t mode;
1398 aio_rw_op *rw_op;
1399 rw_iter_op *iter_op; 1401 rw_iter_op *iter_op;
1400 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 1402 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
1401 struct iov_iter iter; 1403 struct iov_iter iter;
@@ -1405,7 +1407,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1405 case IOCB_CMD_PREADV: 1407 case IOCB_CMD_PREADV:
1406 mode = FMODE_READ; 1408 mode = FMODE_READ;
1407 rw = READ; 1409 rw = READ;
1408 rw_op = file->f_op->aio_read;
1409 iter_op = file->f_op->read_iter; 1410 iter_op = file->f_op->read_iter;
1410 goto rw_common; 1411 goto rw_common;
1411 1412
@@ -1413,51 +1414,40 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1413 case IOCB_CMD_PWRITEV: 1414 case IOCB_CMD_PWRITEV:
1414 mode = FMODE_WRITE; 1415 mode = FMODE_WRITE;
1415 rw = WRITE; 1416 rw = WRITE;
1416 rw_op = file->f_op->aio_write;
1417 iter_op = file->f_op->write_iter; 1417 iter_op = file->f_op->write_iter;
1418 goto rw_common; 1418 goto rw_common;
1419rw_common: 1419rw_common:
1420 if (unlikely(!(file->f_mode & mode))) 1420 if (unlikely(!(file->f_mode & mode)))
1421 return -EBADF; 1421 return -EBADF;
1422 1422
1423 if (!rw_op && !iter_op) 1423 if (!iter_op)
1424 return -EINVAL; 1424 return -EINVAL;
1425 1425
1426 ret = (opcode == IOCB_CMD_PREADV || 1426 if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
1427 opcode == IOCB_CMD_PWRITEV) 1427 ret = aio_setup_vectored_rw(rw, buf, len,
1428 ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, 1428 &iovec, compat, &iter);
1429 &iovec, compat) 1429 else {
1430 : aio_setup_single_vector(req, rw, buf, &nr_segs, 1430 ret = import_single_range(rw, buf, len, iovec, &iter);
1431 iovec); 1431 iovec = NULL;
1432 }
1432 if (!ret) 1433 if (!ret)
1433 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1434 ret = rw_verify_area(rw, file, &req->ki_pos,
1435 iov_iter_count(&iter));
1434 if (ret < 0) { 1436 if (ret < 0) {
1435 if (iovec != inline_vecs) 1437 kfree(iovec);
1436 kfree(iovec);
1437 return ret; 1438 return ret;
1438 } 1439 }
1439 1440
1440 req->ki_nbytes = ret; 1441 len = ret;
1441
1442 /* XXX: move/kill - rw_verify_area()? */
1443 /* This matches the pread()/pwrite() logic */
1444 if (req->ki_pos < 0) {
1445 ret = -EINVAL;
1446 break;
1447 }
1448 1442
1449 if (rw == WRITE) 1443 if (rw == WRITE)
1450 file_start_write(file); 1444 file_start_write(file);
1451 1445
1452 if (iter_op) { 1446 ret = iter_op(req, &iter);
1453 iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
1454 ret = iter_op(req, &iter);
1455 } else {
1456 ret = rw_op(req, iovec, nr_segs, req->ki_pos);
1457 }
1458 1447
1459 if (rw == WRITE) 1448 if (rw == WRITE)
1460 file_end_write(file); 1449 file_end_write(file);
1450 kfree(iovec);
1461 break; 1451 break;
1462 1452
1463 case IOCB_CMD_FDSYNC: 1453 case IOCB_CMD_FDSYNC:
@@ -1479,9 +1469,6 @@ rw_common:
1479 return -EINVAL; 1469 return -EINVAL;
1480 } 1470 }
1481 1471
1482 if (iovec != inline_vecs)
1483 kfree(iovec);
1484
1485 if (ret != -EIOCBQUEUED) { 1472 if (ret != -EIOCBQUEUED) {
1486 /* 1473 /*
1487 * There's no easy way to restart the syscall since other AIO's 1474 * There's no easy way to restart the syscall since other AIO's
@@ -1500,7 +1487,7 @@ rw_common:
1500static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1487static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1501 struct iocb *iocb, bool compat) 1488 struct iocb *iocb, bool compat)
1502{ 1489{
1503 struct kiocb *req; 1490 struct aio_kiocb *req;
1504 ssize_t ret; 1491 ssize_t ret;
1505 1492
1506 /* enforce forwards compatibility on users */ 1493 /* enforce forwards compatibility on users */
@@ -1523,11 +1510,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1523 if (unlikely(!req)) 1510 if (unlikely(!req))
1524 return -EAGAIN; 1511 return -EAGAIN;
1525 1512
1526 req->ki_filp = fget(iocb->aio_fildes); 1513 req->common.ki_filp = fget(iocb->aio_fildes);
1527 if (unlikely(!req->ki_filp)) { 1514 if (unlikely(!req->common.ki_filp)) {
1528 ret = -EBADF; 1515 ret = -EBADF;
1529 goto out_put_req; 1516 goto out_put_req;
1530 } 1517 }
1518 req->common.ki_pos = iocb->aio_offset;
1519 req->common.ki_complete = aio_complete;
1520 req->common.ki_flags = iocb_flags(req->common.ki_filp);
1531 1521
1532 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1522 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1533 /* 1523 /*
@@ -1542,6 +1532,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1542 req->ki_eventfd = NULL; 1532 req->ki_eventfd = NULL;
1543 goto out_put_req; 1533 goto out_put_req;
1544 } 1534 }
1535
1536 req->common.ki_flags |= IOCB_EVENTFD;
1545 } 1537 }
1546 1538
1547 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1539 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1550,13 +1542,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1550 goto out_put_req; 1542 goto out_put_req;
1551 } 1543 }
1552 1544
1553 req->ki_obj.user = user_iocb; 1545 req->ki_user_iocb = user_iocb;
1554 req->ki_user_data = iocb->aio_data; 1546 req->ki_user_data = iocb->aio_data;
1555 req->ki_pos = iocb->aio_offset;
1556 req->ki_nbytes = iocb->aio_nbytes;
1557 1547
1558 ret = aio_run_iocb(req, iocb->aio_lio_opcode, 1548 ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
1559 (char __user *)(unsigned long)iocb->aio_buf, 1549 (char __user *)(unsigned long)iocb->aio_buf,
1550 iocb->aio_nbytes,
1560 compat); 1551 compat);
1561 if (ret) 1552 if (ret)
1562 goto out_put_req; 1553 goto out_put_req;
@@ -1643,10 +1634,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1643/* lookup_kiocb 1634/* lookup_kiocb
1644 * Finds a given iocb for cancellation. 1635 * Finds a given iocb for cancellation.
1645 */ 1636 */
1646static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, 1637static struct aio_kiocb *
1647 u32 key) 1638lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
1648{ 1639{
1649 struct list_head *pos; 1640 struct aio_kiocb *kiocb;
1650 1641
1651 assert_spin_locked(&ctx->ctx_lock); 1642 assert_spin_locked(&ctx->ctx_lock);
1652 1643
@@ -1654,9 +1645,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
1654 return NULL; 1645 return NULL;
1655 1646
1656 /* TODO: use a hash or array, this sucks. */ 1647 /* TODO: use a hash or array, this sucks. */
1657 list_for_each(pos, &ctx->active_reqs) { 1648 list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
1658 struct kiocb *kiocb = list_kiocb(pos); 1649 if (kiocb->ki_user_iocb == iocb)
1659 if (kiocb->ki_obj.user == iocb)
1660 return kiocb; 1650 return kiocb;
1661 } 1651 }
1662 return NULL; 1652 return NULL;
@@ -1676,7 +1666,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1676 struct io_event __user *, result) 1666 struct io_event __user *, result)
1677{ 1667{
1678 struct kioctx *ctx; 1668 struct kioctx *ctx;
1679 struct kiocb *kiocb; 1669 struct aio_kiocb *kiocb;
1680 u32 key; 1670 u32 key;
1681 int ret; 1671 int ret;
1682 1672
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8e98cf954bab..d10e619632ab 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -213,7 +213,7 @@ void autofs4_clean_ino(struct autofs_info *);
213 213
214static inline int autofs_prepare_pipe(struct file *pipe) 214static inline int autofs_prepare_pipe(struct file *pipe)
215{ 215{
216 if (!pipe->f_op->write) 216 if (!(pipe->f_mode & FMODE_CAN_WRITE))
217 return -EINVAL; 217 return -EINVAL;
218 if (!S_ISFIFO(file_inode(pipe)->i_mode)) 218 if (!S_ISFIFO(file_inode(pipe)->i_mode))
219 return -EINVAL; 219 return -EINVAL;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 116fd38ee472..2ad05ab93db8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -70,7 +70,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
70 70
71 mutex_lock(&sbi->pipe_mutex); 71 mutex_lock(&sbi->pipe_mutex);
72 while (bytes && 72 while (bytes &&
73 (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { 73 (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) {
74 data += wr; 74 data += wr;
75 bytes -= wr; 75 bytes -= wr;
76 } 76 }
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index 3a7813ab8c95..1fead8d56a98 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -19,16 +19,16 @@ typedef u64 befs_blocknr_t;
19 * BeFS in memory structures 19 * BeFS in memory structures
20 */ 20 */
21 21
22typedef struct befs_mount_options { 22struct befs_mount_options {
23 kgid_t gid; 23 kgid_t gid;
24 kuid_t uid; 24 kuid_t uid;
25 int use_gid; 25 int use_gid;
26 int use_uid; 26 int use_uid;
27 int debug; 27 int debug;
28 char *iocharset; 28 char *iocharset;
29} befs_mount_options; 29};
30 30
31typedef struct befs_sb_info { 31struct befs_sb_info {
32 u32 magic1; 32 u32 magic1;
33 u32 block_size; 33 u32 block_size;
34 u32 block_shift; 34 u32 block_shift;
@@ -52,12 +52,11 @@ typedef struct befs_sb_info {
52 befs_inode_addr indices; 52 befs_inode_addr indices;
53 u32 magic3; 53 u32 magic3;
54 54
55 befs_mount_options mount_opts; 55 struct befs_mount_options mount_opts;
56 struct nls_table *nls; 56 struct nls_table *nls;
57};
57 58
58} befs_sb_info; 59struct befs_inode_info {
59
60typedef struct befs_inode_info {
61 u32 i_flags; 60 u32 i_flags;
62 u32 i_type; 61 u32 i_type;
63 62
@@ -71,8 +70,7 @@ typedef struct befs_inode_info {
71 } i_data; 70 } i_data;
72 71
73 struct inode vfs_inode; 72 struct inode vfs_inode;
74 73};
75} befs_inode_info;
76 74
77enum befs_err { 75enum befs_err {
78 BEFS_OK, 76 BEFS_OK,
@@ -105,13 +103,13 @@ void befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *);
105/* Gets a pointer to the private portion of the super_block 103/* Gets a pointer to the private portion of the super_block
106 * structure from the public part 104 * structure from the public part
107 */ 105 */
108static inline befs_sb_info * 106static inline struct befs_sb_info *
109BEFS_SB(const struct super_block *super) 107BEFS_SB(const struct super_block *super)
110{ 108{
111 return (befs_sb_info *) super->s_fs_info; 109 return (struct befs_sb_info *) super->s_fs_info;
112} 110}
113 111
114static inline befs_inode_info * 112static inline struct befs_inode_info *
115BEFS_I(const struct inode *inode) 113BEFS_I(const struct inode *inode)
116{ 114{
117 return list_entry(inode, struct befs_inode_info, vfs_inode); 115 return list_entry(inode, struct befs_inode_info, vfs_inode);
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 1e8e0b8d8836..ebd50718659f 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -168,7 +168,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds)
168 befs_blocknr_t blocks; 168 befs_blocknr_t blocks;
169 befs_blocknr_t datablocks; /* File data blocks */ 169 befs_blocknr_t datablocks; /* File data blocks */
170 befs_blocknr_t metablocks; /* FS metadata blocks */ 170 befs_blocknr_t metablocks; /* FS metadata blocks */
171 befs_sb_info *befs_sb = BEFS_SB(sb); 171 struct befs_sb_info *befs_sb = BEFS_SB(sb);
172 172
173 befs_debug(sb, "---> %s", __func__); 173 befs_debug(sb, "---> %s", __func__);
174 174
@@ -428,7 +428,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
428 struct buffer_head *indir_block; 428 struct buffer_head *indir_block;
429 befs_block_run indir_run; 429 befs_block_run indir_run;
430 befs_disk_inode_addr *iaddr_array = NULL; 430 befs_disk_inode_addr *iaddr_array = NULL;
431 befs_sb_info *befs_sb = BEFS_SB(sb); 431 struct befs_sb_info *befs_sb = BEFS_SB(sb);
432 432
433 befs_blocknr_t indir_start_blk = 433 befs_blocknr_t indir_start_blk =
434 data->max_indirect_range >> befs_sb->block_shift; 434 data->max_indirect_range >> befs_sb->block_shift;
diff --git a/fs/befs/io.c b/fs/befs/io.c
index 0408a3d601d0..7a5b4ec21c56 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -28,7 +28,7 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
28{ 28{
29 struct buffer_head *bh = NULL; 29 struct buffer_head *bh = NULL;
30 befs_blocknr_t block = 0; 30 befs_blocknr_t block = 0;
31 befs_sb_info *befs_sb = BEFS_SB(sb); 31 struct befs_sb_info *befs_sb = BEFS_SB(sb);
32 32
33 befs_debug(sb, "---> Enter %s " 33 befs_debug(sb, "---> Enter %s "
34 "[%u, %hu, %hu]", __func__, iaddr.allocation_group, 34 "[%u, %hu, %hu]", __func__, iaddr.allocation_group,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e089f1985fca..16e0a48bfccd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -51,7 +51,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
51static void befs_put_super(struct super_block *); 51static void befs_put_super(struct super_block *);
52static int befs_remount(struct super_block *, int *, char *); 52static int befs_remount(struct super_block *, int *, char *);
53static int befs_statfs(struct dentry *, struct kstatfs *); 53static int befs_statfs(struct dentry *, struct kstatfs *);
54static int parse_options(char *, befs_mount_options *); 54static int parse_options(char *, struct befs_mount_options *);
55 55
56static const struct super_operations befs_sops = { 56static const struct super_operations befs_sops = {
57 .alloc_inode = befs_alloc_inode, /* allocate a new inode */ 57 .alloc_inode = befs_alloc_inode, /* allocate a new inode */
@@ -304,9 +304,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
304{ 304{
305 struct buffer_head *bh = NULL; 305 struct buffer_head *bh = NULL;
306 befs_inode *raw_inode = NULL; 306 befs_inode *raw_inode = NULL;
307 307 struct befs_sb_info *befs_sb = BEFS_SB(sb);
308 befs_sb_info *befs_sb = BEFS_SB(sb); 308 struct befs_inode_info *befs_ino = NULL;
309 befs_inode_info *befs_ino = NULL;
310 struct inode *inode; 309 struct inode *inode;
311 long ret = -EIO; 310 long ret = -EIO;
312 311
@@ -472,7 +471,7 @@ static void *
472befs_follow_link(struct dentry *dentry, struct nameidata *nd) 471befs_follow_link(struct dentry *dentry, struct nameidata *nd)
473{ 472{
474 struct super_block *sb = dentry->d_sb; 473 struct super_block *sb = dentry->d_sb;
475 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 474 struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
476 befs_data_stream *data = &befs_ino->i_data.ds; 475 befs_data_stream *data = &befs_ino->i_data.ds;
477 befs_off_t len = data->size; 476 befs_off_t len = data->size;
478 char *link; 477 char *link;
@@ -502,7 +501,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
502static void * 501static void *
503befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) 502befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
504{ 503{
505 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 504 struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
505
506 nd_set_link(nd, befs_ino->i_data.symlink); 506 nd_set_link(nd, befs_ino->i_data.symlink);
507 return NULL; 507 return NULL;
508} 508}
@@ -669,7 +669,7 @@ static const match_table_t befs_tokens = {
669}; 669};
670 670
671static int 671static int
672parse_options(char *options, befs_mount_options * opts) 672parse_options(char *options, struct befs_mount_options *opts)
673{ 673{
674 char *p; 674 char *p;
675 substring_t args[MAX_OPT_ARGS]; 675 substring_t args[MAX_OPT_ARGS];
@@ -769,7 +769,7 @@ static int
769befs_fill_super(struct super_block *sb, void *data, int silent) 769befs_fill_super(struct super_block *sb, void *data, int silent)
770{ 770{
771 struct buffer_head *bh; 771 struct buffer_head *bh;
772 befs_sb_info *befs_sb; 772 struct befs_sb_info *befs_sb;
773 befs_super_block *disk_sb; 773 befs_super_block *disk_sb;
774 struct inode *root; 774 struct inode *root;
775 long ret = -EINVAL; 775 long ret = -EINVAL;
diff --git a/fs/befs/super.c b/fs/befs/super.c
index ca40f828f64d..aeafc4d84278 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -24,7 +24,7 @@
24int 24int
25befs_load_sb(struct super_block *sb, befs_super_block * disk_sb) 25befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
26{ 26{
27 befs_sb_info *befs_sb = BEFS_SB(sb); 27 struct befs_sb_info *befs_sb = BEFS_SB(sb);
28 28
29 /* Check the byte order of the filesystem */ 29 /* Check the byte order of the filesystem */
30 if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE) 30 if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
@@ -59,7 +59,7 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
59int 59int
60befs_check_sb(struct super_block *sb) 60befs_check_sb(struct super_block *sb)
61{ 61{
62 befs_sb_info *befs_sb = BEFS_SB(sb); 62 struct befs_sb_info *befs_sb = BEFS_SB(sb);
63 63
64 /* Check magic headers of super block */ 64 /* Check magic headers of super block */
65 if ((befs_sb->magic1 != BEFS_SUPER_MAGIC1) 65 if ((befs_sb->magic1 != BEFS_SUPER_MAGIC1)
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 08063ae0a17c..7a8182770649 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -86,7 +86,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
86 86
87 inode = new_inode(s); 87 inode = new_inode(s);
88 if (!inode) 88 if (!inode)
89 return -ENOSPC; 89 return -ENOMEM;
90 mutex_lock(&info->bfs_lock); 90 mutex_lock(&info->bfs_lock);
91 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1); 91 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
92 if (ino > info->si_lasti) { 92 if (ino > info->si_lasti) {
@@ -293,7 +293,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
293 for (block = sblock; block <= eblock; block++) { 293 for (block = sblock; block <= eblock; block++) {
294 bh = sb_bread(dir->i_sb, block); 294 bh = sb_bread(dir->i_sb, block);
295 if (!bh) 295 if (!bh)
296 return -ENOSPC; 296 return -EIO;
297 for (off = 0; off < BFS_BSIZE; off += BFS_DIRENT_SIZE) { 297 for (off = 0; off < BFS_BSIZE; off += BFS_DIRENT_SIZE) {
298 de = (struct bfs_dirent *)(bh->b_data + off); 298 de = (struct bfs_dirent *)(bh->b_data + off);
299 if (!de->ino) { 299 if (!de->ino) {
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index e7f88ace1a25..97f1b5160155 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -23,9 +23,7 @@
23 23
24const struct file_operations bfs_file_operations = { 24const struct file_operations bfs_file_operations = {
25 .llseek = generic_file_llseek, 25 .llseek = generic_file_llseek,
26 .read = new_sync_read,
27 .read_iter = generic_file_read_iter, 26 .read_iter = generic_file_read_iter,
28 .write = new_sync_write,
29 .write_iter = generic_file_write_iter, 27 .write_iter = generic_file_write_iter,
30 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
31 .splice_read = generic_file_splice_read, 29 .splice_read = generic_file_splice_read,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079d9982..fdcb4d69f430 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/vfs.h> 16#include <linux/vfs.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/uio.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "bfs.h" 20#include "bfs.h"
20 21
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/random.h> 32#include <linux/random.h>
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/elf-randomize.h>
34#include <linux/utsname.h> 35#include <linux/utsname.h>
35#include <linux/coredump.h> 36#include <linux/coredump.h>
36#include <linux/sched.h> 37#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 863 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 int elf_prot = 0, elf_flags; 864 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr; 865 unsigned long k, vaddr;
866 unsigned long total_size = 0;
865 867
866 if (elf_ppnt->p_type != PT_LOAD) 868 if (elf_ppnt->p_type != PT_LOAD)
867 continue; 869 continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
909 * default mmap base, as well as whatever program they 911 * default mmap base, as well as whatever program they
910 * might try to exec. This is because the brk will 912 * might try to exec. This is because the brk will
911 * follow the loader, and is not movable. */ 913 * follow the loader, and is not movable. */
912#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE 914 load_bias = ELF_ET_DYN_BASE - vaddr;
913 /* Memory randomization might have been switched off
914 * in runtime via sysctl or explicit setting of
915 * personality flags.
916 * If that is the case, retain the original non-zero
917 * load_bias value in order to establish proper
918 * non-randomized mappings.
919 */
920 if (current->flags & PF_RANDOMIZE) 915 if (current->flags & PF_RANDOMIZE)
921 load_bias = 0; 916 load_bias += arch_mmap_rnd();
922 else 917 load_bias = ELF_PAGESTART(load_bias);
923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 918 total_size = total_mapping_size(elf_phdata,
924#else 919 loc->elf_ex.e_phnum);
925 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 920 if (!total_size) {
926#endif 921 error = -EINVAL;
922 goto out_free_dentry;
923 }
927 } 924 }
928 925
929 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 926 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
930 elf_prot, elf_flags, 0); 927 elf_prot, elf_flags, total_size);
931 if (BAD_ADDR(error)) { 928 if (BAD_ADDR(error)) {
932 retval = IS_ERR((void *)error) ? 929 retval = IS_ERR((void *)error) ?
933 PTR_ERR((void*)error) : -EINVAL; 930 PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
1053 current->mm->end_data = end_data; 1050 current->mm->end_data = end_data;
1054 current->mm->start_stack = bprm->p; 1051 current->mm->start_stack = bprm->p;
1055 1052
1056#ifdef arch_randomize_brk
1057 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1053 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1058 current->mm->brk = current->mm->start_brk = 1054 current->mm->brk = current->mm->start_brk =
1059 arch_randomize_brk(current->mm); 1055 arch_randomize_brk(current->mm);
1060#ifdef CONFIG_COMPAT_BRK 1056#ifdef compat_brk_randomized
1061 current->brk_randomized = 1; 1057 current->brk_randomized = 1;
1062#endif 1058#endif
1063 } 1059 }
1064#endif
1065 1060
1066 if (current->personality & MMAP_PAGE_ZERO) { 1061 if (current->personality & MMAP_PAGE_ZERO) {
1067 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 1062 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 97aff2879cda..9dcb05409ba7 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -9,6 +9,7 @@
9 9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 11
12#include <linux/kernel.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/init.h> 14#include <linux/init.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
@@ -521,9 +522,8 @@ static int parse_command(const char __user *buffer, size_t count)
521 522
522static void entry_status(Node *e, char *page) 523static void entry_status(Node *e, char *page)
523{ 524{
524 char *dp; 525 char *dp = page;
525 char *status = "disabled"; 526 const char *status = "disabled";
526 const char *flags = "flags: ";
527 527
528 if (test_bit(Enabled, &e->flags)) 528 if (test_bit(Enabled, &e->flags))
529 status = "enabled"; 529 status = "enabled";
@@ -533,12 +533,10 @@ static void entry_status(Node *e, char *page)
533 return; 533 return;
534 } 534 }
535 535
536 sprintf(page, "%s\ninterpreter %s\n", status, e->interpreter); 536 dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
537 dp = page + strlen(page);
538 537
539 /* print the special flags */ 538 /* print the special flags */
540 sprintf(dp, "%s", flags); 539 dp += sprintf(dp, "flags: ");
541 dp += strlen(flags);
542 if (e->flags & MISC_FMT_PRESERVE_ARGV0) 540 if (e->flags & MISC_FMT_PRESERVE_ARGV0)
543 *dp++ = 'P'; 541 *dp++ = 'P';
544 if (e->flags & MISC_FMT_OPEN_BINARY) 542 if (e->flags & MISC_FMT_OPEN_BINARY)
@@ -550,21 +548,11 @@ static void entry_status(Node *e, char *page)
550 if (!test_bit(Magic, &e->flags)) { 548 if (!test_bit(Magic, &e->flags)) {
551 sprintf(dp, "extension .%s\n", e->magic); 549 sprintf(dp, "extension .%s\n", e->magic);
552 } else { 550 } else {
553 int i; 551 dp += sprintf(dp, "offset %i\nmagic ", e->offset);
554 552 dp = bin2hex(dp, e->magic, e->size);
555 sprintf(dp, "offset %i\nmagic ", e->offset);
556 dp = page + strlen(page);
557 for (i = 0; i < e->size; i++) {
558 sprintf(dp, "%02x", 0xff & (int) (e->magic[i]));
559 dp += 2;
560 }
561 if (e->mask) { 553 if (e->mask) {
562 sprintf(dp, "\nmask "); 554 dp += sprintf(dp, "\nmask ");
563 dp += 6; 555 dp = bin2hex(dp, e->mask, e->size);
564 for (i = 0; i < e->size; i++) {
565 sprintf(dp, "%02x", 0xff & (int) (e->mask[i]));
566 dp += 2;
567 }
568 } 556 }
569 *dp++ = '\n'; 557 *dp++ = '\n';
570 *dp = '\0'; 558 *dp = '\0';
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266be67d3..897ee0503932 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/cleancache.h> 29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h> 30#include <asm/uaccess.h>
32#include "internal.h" 31#include "internal.h"
33 32
@@ -147,15 +146,13 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
147} 146}
148 147
149static ssize_t 148static ssize_t
150blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 149blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
151 loff_t offset)
152{ 150{
153 struct file *file = iocb->ki_filp; 151 struct file *file = iocb->ki_filp;
154 struct inode *inode = file->f_mapping->host; 152 struct inode *inode = file->f_mapping->host;
155 153
156 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, 154 return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
157 offset, blkdev_get_block, 155 blkdev_get_block, NULL, NULL, 0);
158 NULL, NULL, 0);
159} 156}
160 157
161int __sync_blockdev(struct block_device *bdev, int wait) 158int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1598,9 +1595,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1598ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 1595ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1599{ 1596{
1600 struct file *file = iocb->ki_filp; 1597 struct file *file = iocb->ki_filp;
1598 struct inode *bd_inode = file->f_mapping->host;
1599 loff_t size = i_size_read(bd_inode);
1601 struct blk_plug plug; 1600 struct blk_plug plug;
1602 ssize_t ret; 1601 ssize_t ret;
1603 1602
1603 if (bdev_read_only(I_BDEV(bd_inode)))
1604 return -EPERM;
1605
1606 if (!iov_iter_count(from))
1607 return 0;
1608
1609 if (iocb->ki_pos >= size)
1610 return -ENOSPC;
1611
1612 iov_iter_truncate(from, size - iocb->ki_pos);
1613
1604 blk_start_plug(&plug); 1614 blk_start_plug(&plug);
1605 ret = __generic_file_write_iter(iocb, from); 1615 ret = __generic_file_write_iter(iocb, from);
1606 if (ret > 0) { 1616 if (ret > 0) {
@@ -1660,8 +1670,6 @@ const struct file_operations def_blk_fops = {
1660 .open = blkdev_open, 1670 .open = blkdev_open,
1661 .release = blkdev_close, 1671 .release = blkdev_close,
1662 .llseek = block_llseek, 1672 .llseek = block_llseek,
1663 .read = new_sync_read,
1664 .write = new_sync_write,
1665 .read_iter = blkdev_read_iter, 1673 .read_iter = blkdev_read_iter,
1666 .write_iter = blkdev_write_iter, 1674 .write_iter = blkdev_write_iter,
1667 .mmap = generic_file_mmap, 1675 .mmap = generic_file_mmap,
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 4dabeb893b7c..df9932b00d08 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -87,7 +87,7 @@ BTRFS_WORK_HELPER(scrubwrc_helper);
87BTRFS_WORK_HELPER(scrubnc_helper); 87BTRFS_WORK_HELPER(scrubnc_helper);
88 88
89static struct __btrfs_workqueue * 89static struct __btrfs_workqueue *
90__btrfs_alloc_workqueue(const char *name, int flags, int max_active, 90__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
91 int thresh) 91 int thresh)
92{ 92{
93 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 93 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -132,7 +132,7 @@ static inline void
132__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); 132__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
133 133
134struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 134struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
135 int flags, 135 unsigned int flags,
136 int max_active, 136 int max_active,
137 int thresh) 137 int thresh)
138{ 138{
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index e386c29ef1f6..ec2ee477f8ba 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -66,7 +66,7 @@ BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
66BTRFS_WORK_HELPER_PROTO(scrubnc_helper); 66BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
67 67
68struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 68struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
69 int flags, 69 unsigned int flags,
70 int max_active, 70 int max_active,
71 int thresh); 71 int thresh);
72void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, 72void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f55721ff9385..9de772ee0031 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
1206 struct ulist *roots = NULL; 1206 struct ulist *roots = NULL;
1207 struct ulist_iterator uiter; 1207 struct ulist_iterator uiter;
1208 struct ulist_node *node; 1208 struct ulist_node *node;
1209 struct seq_list elem = {}; 1209 struct seq_list elem = SEQ_LIST_INIT(elem);
1210 int ret = 0; 1210 int ret = 0;
1211 1211
1212 tmp = ulist_alloc(GFP_NOFS); 1212 tmp = ulist_alloc(GFP_NOFS);
@@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1610 struct ulist *roots = NULL; 1610 struct ulist *roots = NULL;
1611 struct ulist_node *ref_node = NULL; 1611 struct ulist_node *ref_node = NULL;
1612 struct ulist_node *root_node = NULL; 1612 struct ulist_node *root_node = NULL;
1613 struct seq_list tree_mod_seq_elem = {}; 1613 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
1614 struct ulist_iterator ref_uiter; 1614 struct ulist_iterator ref_uiter;
1615 struct ulist_iterator root_uiter; 1615 struct ulist_iterator root_uiter;
1616 1616
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index de5e4f2adfea..0ef5cc13fae2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,7 +66,11 @@ struct btrfs_inode {
66 */ 66 */
67 struct btrfs_key location; 67 struct btrfs_key location;
68 68
69 /* Lock for counters */ 69 /*
70 * Lock for counters and all fields used to determine if the inode is in
71 * the log or not (last_trans, last_sub_trans, last_log_commit,
72 * logged_trans).
73 */
70 spinlock_t lock; 74 spinlock_t lock;
71 75
72 /* the extent_tree has caches of all the extent mappings to disk */ 76 /* the extent_tree has caches of all the extent mappings to disk */
@@ -250,6 +254,9 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
250 254
251static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) 255static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
252{ 256{
257 int ret = 0;
258
259 spin_lock(&BTRFS_I(inode)->lock);
253 if (BTRFS_I(inode)->logged_trans == generation && 260 if (BTRFS_I(inode)->logged_trans == generation &&
254 BTRFS_I(inode)->last_sub_trans <= 261 BTRFS_I(inode)->last_sub_trans <=
255 BTRFS_I(inode)->last_log_commit && 262 BTRFS_I(inode)->last_log_commit &&
@@ -263,9 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
263 */ 270 */
264 smp_mb(); 271 smp_mb();
265 if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) 272 if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
266 return 1; 273 ret = 1;
267 } 274 }
268 return 0; 275 spin_unlock(&BTRFS_I(inode)->lock);
276 return ret;
269} 277}
270 278
271#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 279#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d897ef803b3b..ce7dec88f4b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
2990 (unsigned long long)bio->bi_iter.bi_sector, 2990 (unsigned long long)bio->bi_iter.bi_sector,
2991 dev_bytenr, bio->bi_bdev); 2991 dev_bytenr, bio->bi_bdev);
2992 2992
2993 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 2993 mapped_datav = kmalloc_array(bio->bi_vcnt,
2994 GFP_NOFS); 2994 sizeof(*mapped_datav), GFP_NOFS);
2995 if (!mapped_datav) 2995 if (!mapped_datav)
2996 goto leave; 2996 goto leave;
2997 cur_bytenr = dev_bytenr; 2997 cur_bytenr = dev_bytenr;
@@ -3241,8 +3241,5 @@ void btrfsic_unmount(struct btrfs_root *root,
3241 3241
3242 mutex_unlock(&btrfsic_mutex); 3242 mutex_unlock(&btrfsic_mutex);
3243 3243
3244 if (is_vmalloc_addr(state)) 3244 kvfree(state);
3245 vfree(state);
3246 else
3247 kfree(state);
3248} 3245}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index e9df8862012c..ce62324c78e7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
622 cb->orig_bio = bio; 622 cb->orig_bio = bio;
623 623
624 nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); 624 nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
625 cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, 625 cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
626 GFP_NOFS); 626 GFP_NOFS);
627 if (!cb->compressed_pages) 627 if (!cb->compressed_pages)
628 goto fail1; 628 goto fail1;
@@ -750,7 +750,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
750static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; 750static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
751static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; 751static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
752 752
753static struct btrfs_compress_op *btrfs_compress_op[] = { 753static const struct btrfs_compress_op * const btrfs_compress_op[] = {
754 &btrfs_zlib_compress, 754 &btrfs_zlib_compress,
755 &btrfs_lzo_compress, 755 &btrfs_lzo_compress,
756}; 756};
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index d181f70caae0..13a4dc0436c9 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -77,7 +77,7 @@ struct btrfs_compress_op {
77 size_t srclen, size_t destlen); 77 size_t srclen, size_t destlen);
78}; 78};
79 79
80extern struct btrfs_compress_op btrfs_zlib_compress; 80extern const struct btrfs_compress_op btrfs_zlib_compress;
81extern struct btrfs_compress_op btrfs_lzo_compress; 81extern const struct btrfs_compress_op btrfs_lzo_compress;
82 82
83#endif 83#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 993642199326..0f11ebc92f02 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
578 if (!tree_mod_need_log(fs_info, eb)) 578 if (!tree_mod_need_log(fs_info, eb))
579 return 0; 579 return 0;
580 580
581 tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags); 581 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
582 if (!tm_list) 582 if (!tm_list)
583 return -ENOMEM; 583 return -ENOMEM;
584 584
@@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
677 677
678 if (log_removal && btrfs_header_level(old_root) > 0) { 678 if (log_removal && btrfs_header_level(old_root) > 0) {
679 nritems = btrfs_header_nritems(old_root); 679 nritems = btrfs_header_nritems(old_root);
680 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), 680 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
681 flags); 681 flags);
682 if (!tm_list) { 682 if (!tm_list) {
683 ret = -ENOMEM; 683 ret = -ENOMEM;
@@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
814 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 814 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
815 return 0; 815 return 0;
816 816
817 tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *), 817 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
818 GFP_NOFS); 818 GFP_NOFS);
819 if (!tm_list) 819 if (!tm_list)
820 return -ENOMEM; 820 return -ENOMEM;
@@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
905 return 0; 905 return 0;
906 906
907 nritems = btrfs_header_nritems(eb); 907 nritems = btrfs_header_nritems(eb);
908 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), 908 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
909 GFP_NOFS);
910 if (!tm_list) 909 if (!tm_list)
911 return -ENOMEM; 910 return -ENOMEM;
912 911
@@ -1073,7 +1072,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1073 ret = btrfs_dec_ref(trans, root, buf, 1); 1072 ret = btrfs_dec_ref(trans, root, buf, 1);
1074 BUG_ON(ret); /* -ENOMEM */ 1073 BUG_ON(ret); /* -ENOMEM */
1075 } 1074 }
1076 clean_tree_block(trans, root, buf); 1075 clean_tree_block(trans, root->fs_info, buf);
1077 *last_ref = 1; 1076 *last_ref = 1;
1078 } 1077 }
1079 return 0; 1078 return 0;
@@ -1645,14 +1644,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1645 1644
1646 parent_nritems = btrfs_header_nritems(parent); 1645 parent_nritems = btrfs_header_nritems(parent);
1647 blocksize = root->nodesize; 1646 blocksize = root->nodesize;
1648 end_slot = parent_nritems; 1647 end_slot = parent_nritems - 1;
1649 1648
1650 if (parent_nritems == 1) 1649 if (parent_nritems <= 1)
1651 return 0; 1650 return 0;
1652 1651
1653 btrfs_set_lock_blocking(parent); 1652 btrfs_set_lock_blocking(parent);
1654 1653
1655 for (i = start_slot; i < end_slot; i++) { 1654 for (i = start_slot; i <= end_slot; i++) {
1656 int close = 1; 1655 int close = 1;
1657 1656
1658 btrfs_node_key(parent, &disk_key, i); 1657 btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1668,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1669 other = btrfs_node_blockptr(parent, i - 1); 1668 other = btrfs_node_blockptr(parent, i - 1);
1670 close = close_blocks(blocknr, other, blocksize); 1669 close = close_blocks(blocknr, other, blocksize);
1671 } 1670 }
1672 if (!close && i < end_slot - 2) { 1671 if (!close && i < end_slot) {
1673 other = btrfs_node_blockptr(parent, i + 1); 1672 other = btrfs_node_blockptr(parent, i + 1);
1674 close = close_blocks(blocknr, other, blocksize); 1673 close = close_blocks(blocknr, other, blocksize);
1675 } 1674 }
@@ -1678,7 +1677,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1678 continue; 1677 continue;
1679 } 1678 }
1680 1679
1681 cur = btrfs_find_tree_block(root, blocknr); 1680 cur = btrfs_find_tree_block(root->fs_info, blocknr);
1682 if (cur) 1681 if (cur)
1683 uptodate = btrfs_buffer_uptodate(cur, gen, 0); 1682 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
1684 else 1683 else
@@ -1943,7 +1942,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1943 1942
1944 path->locks[level] = 0; 1943 path->locks[level] = 0;
1945 path->nodes[level] = NULL; 1944 path->nodes[level] = NULL;
1946 clean_tree_block(trans, root, mid); 1945 clean_tree_block(trans, root->fs_info, mid);
1947 btrfs_tree_unlock(mid); 1946 btrfs_tree_unlock(mid);
1948 /* once for the path */ 1947 /* once for the path */
1949 free_extent_buffer(mid); 1948 free_extent_buffer(mid);
@@ -1997,7 +1996,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1997 if (wret < 0 && wret != -ENOSPC) 1996 if (wret < 0 && wret != -ENOSPC)
1998 ret = wret; 1997 ret = wret;
1999 if (btrfs_header_nritems(right) == 0) { 1998 if (btrfs_header_nritems(right) == 0) {
2000 clean_tree_block(trans, root, right); 1999 clean_tree_block(trans, root->fs_info, right);
2001 btrfs_tree_unlock(right); 2000 btrfs_tree_unlock(right);
2002 del_ptr(root, path, level + 1, pslot + 1); 2001 del_ptr(root, path, level + 1, pslot + 1);
2003 root_sub_used(root, right->len); 2002 root_sub_used(root, right->len);
@@ -2041,7 +2040,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
2041 BUG_ON(wret == 1); 2040 BUG_ON(wret == 1);
2042 } 2041 }
2043 if (btrfs_header_nritems(mid) == 0) { 2042 if (btrfs_header_nritems(mid) == 0) {
2044 clean_tree_block(trans, root, mid); 2043 clean_tree_block(trans, root->fs_info, mid);
2045 btrfs_tree_unlock(mid); 2044 btrfs_tree_unlock(mid);
2046 del_ptr(root, path, level + 1, pslot); 2045 del_ptr(root, path, level + 1, pslot);
2047 root_sub_used(root, mid->len); 2046 root_sub_used(root, mid->len);
@@ -2259,7 +2258,7 @@ static void reada_for_search(struct btrfs_root *root,
2259 2258
2260 search = btrfs_node_blockptr(node, slot); 2259 search = btrfs_node_blockptr(node, slot);
2261 blocksize = root->nodesize; 2260 blocksize = root->nodesize;
2262 eb = btrfs_find_tree_block(root, search); 2261 eb = btrfs_find_tree_block(root->fs_info, search);
2263 if (eb) { 2262 if (eb) {
2264 free_extent_buffer(eb); 2263 free_extent_buffer(eb);
2265 return; 2264 return;
@@ -2319,7 +2318,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2319 if (slot > 0) { 2318 if (slot > 0) {
2320 block1 = btrfs_node_blockptr(parent, slot - 1); 2319 block1 = btrfs_node_blockptr(parent, slot - 1);
2321 gen = btrfs_node_ptr_generation(parent, slot - 1); 2320 gen = btrfs_node_ptr_generation(parent, slot - 1);
2322 eb = btrfs_find_tree_block(root, block1); 2321 eb = btrfs_find_tree_block(root->fs_info, block1);
2323 /* 2322 /*
2324 * if we get -eagain from btrfs_buffer_uptodate, we 2323 * if we get -eagain from btrfs_buffer_uptodate, we
2325 * don't want to return eagain here. That will loop 2324 * don't want to return eagain here. That will loop
@@ -2332,7 +2331,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2332 if (slot + 1 < nritems) { 2331 if (slot + 1 < nritems) {
2333 block2 = btrfs_node_blockptr(parent, slot + 1); 2332 block2 = btrfs_node_blockptr(parent, slot + 1);
2334 gen = btrfs_node_ptr_generation(parent, slot + 1); 2333 gen = btrfs_node_ptr_generation(parent, slot + 1);
2335 eb = btrfs_find_tree_block(root, block2); 2334 eb = btrfs_find_tree_block(root->fs_info, block2);
2336 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) 2335 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
2337 block2 = 0; 2336 block2 = 0;
2338 free_extent_buffer(eb); 2337 free_extent_buffer(eb);
@@ -2450,7 +2449,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
2450 blocknr = btrfs_node_blockptr(b, slot); 2449 blocknr = btrfs_node_blockptr(b, slot);
2451 gen = btrfs_node_ptr_generation(b, slot); 2450 gen = btrfs_node_ptr_generation(b, slot);
2452 2451
2453 tmp = btrfs_find_tree_block(root, blocknr); 2452 tmp = btrfs_find_tree_block(root->fs_info, blocknr);
2454 if (tmp) { 2453 if (tmp) {
2455 /* first we do an atomic uptodate check */ 2454 /* first we do an atomic uptodate check */
2456 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { 2455 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
@@ -3126,7 +3125,8 @@ again:
3126 * higher levels 3125 * higher levels
3127 * 3126 *
3128 */ 3127 */
3129static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, 3128static void fixup_low_keys(struct btrfs_fs_info *fs_info,
3129 struct btrfs_path *path,
3130 struct btrfs_disk_key *key, int level) 3130 struct btrfs_disk_key *key, int level)
3131{ 3131{
3132 int i; 3132 int i;
@@ -3137,7 +3137,7 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
3137 if (!path->nodes[i]) 3137 if (!path->nodes[i])
3138 break; 3138 break;
3139 t = path->nodes[i]; 3139 t = path->nodes[i];
3140 tree_mod_log_set_node_key(root->fs_info, t, tslot, 1); 3140 tree_mod_log_set_node_key(fs_info, t, tslot, 1);
3141 btrfs_set_node_key(t, key, tslot); 3141 btrfs_set_node_key(t, key, tslot);
3142 btrfs_mark_buffer_dirty(path->nodes[i]); 3142 btrfs_mark_buffer_dirty(path->nodes[i]);
3143 if (tslot != 0) 3143 if (tslot != 0)
@@ -3151,7 +3151,8 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
3151 * This function isn't completely safe. It's the caller's responsibility 3151 * This function isn't completely safe. It's the caller's responsibility
3152 * that the new key won't break the order 3152 * that the new key won't break the order
3153 */ 3153 */
3154void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, 3154void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3155 struct btrfs_path *path,
3155 struct btrfs_key *new_key) 3156 struct btrfs_key *new_key)
3156{ 3157{
3157 struct btrfs_disk_key disk_key; 3158 struct btrfs_disk_key disk_key;
@@ -3173,7 +3174,7 @@ void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
3173 btrfs_set_item_key(eb, &disk_key, slot); 3174 btrfs_set_item_key(eb, &disk_key, slot);
3174 btrfs_mark_buffer_dirty(eb); 3175 btrfs_mark_buffer_dirty(eb);
3175 if (slot == 0) 3176 if (slot == 0)
3176 fixup_low_keys(root, path, &disk_key, 1); 3177 fixup_low_keys(fs_info, path, &disk_key, 1);
3177} 3178}
3178 3179
3179/* 3180/*
@@ -3692,7 +3693,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3692 if (left_nritems) 3693 if (left_nritems)
3693 btrfs_mark_buffer_dirty(left); 3694 btrfs_mark_buffer_dirty(left);
3694 else 3695 else
3695 clean_tree_block(trans, root, left); 3696 clean_tree_block(trans, root->fs_info, left);
3696 3697
3697 btrfs_mark_buffer_dirty(right); 3698 btrfs_mark_buffer_dirty(right);
3698 3699
@@ -3704,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3704 if (path->slots[0] >= left_nritems) { 3705 if (path->slots[0] >= left_nritems) {
3705 path->slots[0] -= left_nritems; 3706 path->slots[0] -= left_nritems;
3706 if (btrfs_header_nritems(path->nodes[0]) == 0) 3707 if (btrfs_header_nritems(path->nodes[0]) == 0)
3707 clean_tree_block(trans, root, path->nodes[0]); 3708 clean_tree_block(trans, root->fs_info, path->nodes[0]);
3708 btrfs_tree_unlock(path->nodes[0]); 3709 btrfs_tree_unlock(path->nodes[0]);
3709 free_extent_buffer(path->nodes[0]); 3710 free_extent_buffer(path->nodes[0]);
3710 path->nodes[0] = right; 3711 path->nodes[0] = right;
@@ -3928,10 +3929,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3928 if (right_nritems) 3929 if (right_nritems)
3929 btrfs_mark_buffer_dirty(right); 3930 btrfs_mark_buffer_dirty(right);
3930 else 3931 else
3931 clean_tree_block(trans, root, right); 3932 clean_tree_block(trans, root->fs_info, right);
3932 3933
3933 btrfs_item_key(right, &disk_key, 0); 3934 btrfs_item_key(right, &disk_key, 0);
3934 fixup_low_keys(root, path, &disk_key, 1); 3935 fixup_low_keys(root->fs_info, path, &disk_key, 1);
3935 3936
3936 /* then fixup the leaf pointer in the path */ 3937 /* then fixup the leaf pointer in the path */
3937 if (path->slots[0] < push_items) { 3938 if (path->slots[0] < push_items) {
@@ -4168,6 +4169,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
4168 int mid; 4169 int mid;
4169 int slot; 4170 int slot;
4170 struct extent_buffer *right; 4171 struct extent_buffer *right;
4172 struct btrfs_fs_info *fs_info = root->fs_info;
4171 int ret = 0; 4173 int ret = 0;
4172 int wret; 4174 int wret;
4173 int split; 4175 int split;
@@ -4271,10 +4273,10 @@ again:
4271 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 4273 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
4272 btrfs_set_header_owner(right, root->root_key.objectid); 4274 btrfs_set_header_owner(right, root->root_key.objectid);
4273 btrfs_set_header_level(right, 0); 4275 btrfs_set_header_level(right, 0);
4274 write_extent_buffer(right, root->fs_info->fsid, 4276 write_extent_buffer(right, fs_info->fsid,
4275 btrfs_header_fsid(), BTRFS_FSID_SIZE); 4277 btrfs_header_fsid(), BTRFS_FSID_SIZE);
4276 4278
4277 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 4279 write_extent_buffer(right, fs_info->chunk_tree_uuid,
4278 btrfs_header_chunk_tree_uuid(right), 4280 btrfs_header_chunk_tree_uuid(right),
4279 BTRFS_UUID_SIZE); 4281 BTRFS_UUID_SIZE);
4280 4282
@@ -4297,7 +4299,7 @@ again:
4297 path->nodes[0] = right; 4299 path->nodes[0] = right;
4298 path->slots[0] = 0; 4300 path->slots[0] = 0;
4299 if (path->slots[1] == 0) 4301 if (path->slots[1] == 0)
4300 fixup_low_keys(root, path, &disk_key, 1); 4302 fixup_low_keys(fs_info, path, &disk_key, 1);
4301 } 4303 }
4302 btrfs_mark_buffer_dirty(right); 4304 btrfs_mark_buffer_dirty(right);
4303 return ret; 4305 return ret;
@@ -4615,7 +4617,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
4615 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4617 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4616 btrfs_set_item_key(leaf, &disk_key, slot); 4618 btrfs_set_item_key(leaf, &disk_key, slot);
4617 if (slot == 0) 4619 if (slot == 0)
4618 fixup_low_keys(root, path, &disk_key, 1); 4620 fixup_low_keys(root->fs_info, path, &disk_key, 1);
4619 } 4621 }
4620 4622
4621 item = btrfs_item_nr(slot); 4623 item = btrfs_item_nr(slot);
@@ -4716,7 +4718,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4716 4718
4717 if (path->slots[0] == 0) { 4719 if (path->slots[0] == 0) {
4718 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4720 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4719 fixup_low_keys(root, path, &disk_key, 1); 4721 fixup_low_keys(root->fs_info, path, &disk_key, 1);
4720 } 4722 }
4721 btrfs_unlock_up_safe(path, 1); 4723 btrfs_unlock_up_safe(path, 1);
4722 4724
@@ -4888,7 +4890,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4888 struct btrfs_disk_key disk_key; 4890 struct btrfs_disk_key disk_key;
4889 4891
4890 btrfs_node_key(parent, &disk_key, 0); 4892 btrfs_node_key(parent, &disk_key, 0);
4891 fixup_low_keys(root, path, &disk_key, level + 1); 4893 fixup_low_keys(root->fs_info, path, &disk_key, level + 1);
4892 } 4894 }
4893 btrfs_mark_buffer_dirty(parent); 4895 btrfs_mark_buffer_dirty(parent);
4894} 4896}
@@ -4981,7 +4983,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4981 btrfs_set_header_level(leaf, 0); 4983 btrfs_set_header_level(leaf, 0);
4982 } else { 4984 } else {
4983 btrfs_set_path_blocking(path); 4985 btrfs_set_path_blocking(path);
4984 clean_tree_block(trans, root, leaf); 4986 clean_tree_block(trans, root->fs_info, leaf);
4985 btrfs_del_leaf(trans, root, path, leaf); 4987 btrfs_del_leaf(trans, root, path, leaf);
4986 } 4988 }
4987 } else { 4989 } else {
@@ -4990,7 +4992,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4990 struct btrfs_disk_key disk_key; 4992 struct btrfs_disk_key disk_key;
4991 4993
4992 btrfs_item_key(leaf, &disk_key, 0); 4994 btrfs_item_key(leaf, &disk_key, 0);
4993 fixup_low_keys(root, path, &disk_key, 1); 4995 fixup_low_keys(root->fs_info, path, &disk_key, 1);
4994 } 4996 }
4995 4997
4996 /* delete the leaf if it is mostly empty */ 4998 /* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..6f364e1d8d3d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1061,6 +1061,12 @@ struct btrfs_block_group_item {
1061 __le64 flags; 1061 __le64 flags;
1062} __attribute__ ((__packed__)); 1062} __attribute__ ((__packed__));
1063 1063
1064#define BTRFS_QGROUP_LEVEL_SHIFT 48
1065static inline u64 btrfs_qgroup_level(u64 qgroupid)
1066{
1067 return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
1068}
1069
1064/* 1070/*
1065 * is subvolume quota turned on? 1071 * is subvolume quota turned on?
1066 */ 1072 */
@@ -1256,6 +1262,20 @@ struct btrfs_caching_control {
1256 atomic_t count; 1262 atomic_t count;
1257}; 1263};
1258 1264
1265struct btrfs_io_ctl {
1266 void *cur, *orig;
1267 struct page *page;
1268 struct page **pages;
1269 struct btrfs_root *root;
1270 struct inode *inode;
1271 unsigned long size;
1272 int index;
1273 int num_pages;
1274 int entries;
1275 int bitmaps;
1276 unsigned check_crcs:1;
1277};
1278
1259struct btrfs_block_group_cache { 1279struct btrfs_block_group_cache {
1260 struct btrfs_key key; 1280 struct btrfs_key key;
1261 struct btrfs_block_group_item item; 1281 struct btrfs_block_group_item item;
@@ -1321,6 +1341,9 @@ struct btrfs_block_group_cache {
1321 1341
1322 /* For dirty block groups */ 1342 /* For dirty block groups */
1323 struct list_head dirty_list; 1343 struct list_head dirty_list;
1344 struct list_head io_list;
1345
1346 struct btrfs_io_ctl io_ctl;
1324}; 1347};
1325 1348
1326/* delayed seq elem */ 1349/* delayed seq elem */
@@ -1329,6 +1352,8 @@ struct seq_list {
1329 u64 seq; 1352 u64 seq;
1330}; 1353};
1331 1354
1355#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
1356
1332enum btrfs_orphan_cleanup_state { 1357enum btrfs_orphan_cleanup_state {
1333 ORPHAN_CLEANUP_STARTED = 1, 1358 ORPHAN_CLEANUP_STARTED = 1,
1334 ORPHAN_CLEANUP_DONE = 2, 1359 ORPHAN_CLEANUP_DONE = 2,
@@ -1472,6 +1497,12 @@ struct btrfs_fs_info {
1472 struct mutex chunk_mutex; 1497 struct mutex chunk_mutex;
1473 struct mutex volume_mutex; 1498 struct mutex volume_mutex;
1474 1499
1500 /*
1501 * this is taken to make sure we don't set block groups ro after
1502 * the free space cache has been allocated on them
1503 */
1504 struct mutex ro_block_group_mutex;
1505
1475 /* this is used during read/modify/write to make sure 1506 /* this is used during read/modify/write to make sure
1476 * no two ios are trying to mod the same stripe at the same 1507 * no two ios are trying to mod the same stripe at the same
1477 * time 1508 * time
@@ -1513,6 +1544,7 @@ struct btrfs_fs_info {
1513 1544
1514 spinlock_t delayed_iput_lock; 1545 spinlock_t delayed_iput_lock;
1515 struct list_head delayed_iputs; 1546 struct list_head delayed_iputs;
1547 struct rw_semaphore delayed_iput_sem;
1516 1548
1517 /* this protects tree_mod_seq_list */ 1549 /* this protects tree_mod_seq_list */
1518 spinlock_t tree_mod_seq_lock; 1550 spinlock_t tree_mod_seq_lock;
@@ -3295,6 +3327,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
3295} 3327}
3296 3328
3297/* extent-tree.c */ 3329/* extent-tree.c */
3330
3331u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
3332
3298static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 3333static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3299 unsigned num_items) 3334 unsigned num_items)
3300{ 3335{
@@ -3385,8 +3420,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3385 u64 bytenr, u64 num_bytes, u64 parent, 3420 u64 bytenr, u64 num_bytes, u64 parent,
3386 u64 root_objectid, u64 owner, u64 offset, int no_quota); 3421 u64 root_objectid, u64 owner, u64 offset, int no_quota);
3387 3422
3423int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3424 struct btrfs_root *root);
3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3425int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3389 struct btrfs_root *root); 3426 struct btrfs_root *root);
3427int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3428 struct btrfs_root *root);
3390int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 3429int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
3391int btrfs_free_block_groups(struct btrfs_fs_info *info); 3430int btrfs_free_block_groups(struct btrfs_fs_info *info);
3392int btrfs_read_block_groups(struct btrfs_root *root); 3431int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3415,7 +3454,7 @@ enum btrfs_reserve_flush_enum {
3415 BTRFS_RESERVE_FLUSH_ALL, 3454 BTRFS_RESERVE_FLUSH_ALL,
3416}; 3455};
3417 3456
3418int btrfs_check_data_free_space(struct inode *inode, u64 bytes); 3457int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
3419void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 3458void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
3420void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3459void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3421 struct btrfs_root *root); 3460 struct btrfs_root *root);
@@ -3438,6 +3477,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
3438 unsigned short type); 3477 unsigned short type);
3439void btrfs_free_block_rsv(struct btrfs_root *root, 3478void btrfs_free_block_rsv(struct btrfs_root *root,
3440 struct btrfs_block_rsv *rsv); 3479 struct btrfs_block_rsv *rsv);
3480void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
3441int btrfs_block_rsv_add(struct btrfs_root *root, 3481int btrfs_block_rsv_add(struct btrfs_root *root,
3442 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 3482 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
3443 enum btrfs_reserve_flush_enum flush); 3483 enum btrfs_reserve_flush_enum flush);
@@ -3484,7 +3524,8 @@ int btrfs_previous_item(struct btrfs_root *root,
3484 int type); 3524 int type);
3485int btrfs_previous_extent_item(struct btrfs_root *root, 3525int btrfs_previous_extent_item(struct btrfs_root *root,
3486 struct btrfs_path *path, u64 min_objectid); 3526 struct btrfs_path *path, u64 min_objectid);
3487void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, 3527void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3528 struct btrfs_path *path,
3488 struct btrfs_key *new_key); 3529 struct btrfs_key *new_key);
3489struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 3530struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
3490struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 3531struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -3909,6 +3950,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
3909 loff_t actual_len, u64 *alloc_hint); 3950 loff_t actual_len, u64 *alloc_hint);
3910int btrfs_inode_check_errors(struct inode *inode); 3951int btrfs_inode_check_errors(struct inode *inode);
3911extern const struct dentry_operations btrfs_dentry_operations; 3952extern const struct dentry_operations btrfs_dentry_operations;
3953#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3954void btrfs_test_inode_set_ops(struct inode *inode);
3955#endif
3912 3956
3913/* ioctl.c */ 3957/* ioctl.c */
3914long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3958long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -4175,7 +4219,8 @@ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
4175static inline int is_fstree(u64 rootid) 4219static inline int is_fstree(u64 rootid)
4176{ 4220{
4177 if (rootid == BTRFS_FS_TREE_OBJECTID || 4221 if (rootid == BTRFS_FS_TREE_OBJECTID ||
4178 (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) 4222 ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
4223 !btrfs_qgroup_level(rootid)))
4179 return 1; 4224 return 1;
4180 return 0; 4225 return 0;
4181} 4226}
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 82f0c7c95474..cde698a07d21 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1383,7 +1383,7 @@ out:
1383 1383
1384 1384
1385static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, 1385static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1386 struct btrfs_root *root, int nr) 1386 struct btrfs_fs_info *fs_info, int nr)
1387{ 1387{
1388 struct btrfs_async_delayed_work *async_work; 1388 struct btrfs_async_delayed_work *async_work;
1389 1389
@@ -1399,7 +1399,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1399 btrfs_async_run_delayed_root, NULL, NULL); 1399 btrfs_async_run_delayed_root, NULL, NULL);
1400 async_work->nr = nr; 1400 async_work->nr = nr;
1401 1401
1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); 1402 btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1403 return 0; 1403 return 0;
1404} 1404}
1405 1405
@@ -1426,6 +1426,7 @@ static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1426void btrfs_balance_delayed_items(struct btrfs_root *root) 1426void btrfs_balance_delayed_items(struct btrfs_root *root)
1427{ 1427{
1428 struct btrfs_delayed_root *delayed_root; 1428 struct btrfs_delayed_root *delayed_root;
1429 struct btrfs_fs_info *fs_info = root->fs_info;
1429 1430
1430 delayed_root = btrfs_get_delayed_root(root); 1431 delayed_root = btrfs_get_delayed_root(root);
1431 1432
@@ -1438,7 +1439,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
1438 1439
1439 seq = atomic_read(&delayed_root->items_seq); 1440 seq = atomic_read(&delayed_root->items_seq);
1440 1441
1441 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); 1442 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1442 if (ret) 1443 if (ret)
1443 return; 1444 return;
1444 1445
@@ -1447,7 +1448,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
1447 return; 1448 return;
1448 } 1449 }
1449 1450
1450 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); 1451 btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1451} 1452}
1452 1453
1453/* Will return 0 or -ENOMEM */ 1454/* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6d16bea94e1c..8f8ed7d20bac 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
489 * existing and update must have the same bytenr 489 * existing and update must have the same bytenr
490 */ 490 */
491static noinline void 491static noinline void
492update_existing_head_ref(struct btrfs_delayed_ref_node *existing, 492update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
493 struct btrfs_delayed_ref_node *existing,
493 struct btrfs_delayed_ref_node *update) 494 struct btrfs_delayed_ref_node *update)
494{ 495{
495 struct btrfs_delayed_ref_head *existing_ref; 496 struct btrfs_delayed_ref_head *existing_ref;
496 struct btrfs_delayed_ref_head *ref; 497 struct btrfs_delayed_ref_head *ref;
498 int old_ref_mod;
497 499
498 existing_ref = btrfs_delayed_node_to_head(existing); 500 existing_ref = btrfs_delayed_node_to_head(existing);
499 ref = btrfs_delayed_node_to_head(update); 501 ref = btrfs_delayed_node_to_head(update);
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
541 * only need the lock for this case cause we could be processing it 543 * only need the lock for this case cause we could be processing it
542 * currently, for refs we just added we know we're a-ok. 544 * currently, for refs we just added we know we're a-ok.
543 */ 545 */
546 old_ref_mod = existing_ref->total_ref_mod;
544 existing->ref_mod += update->ref_mod; 547 existing->ref_mod += update->ref_mod;
548 existing_ref->total_ref_mod += update->ref_mod;
549
550 /*
551 * If we are going to from a positive ref mod to a negative or vice
552 * versa we need to make sure to adjust pending_csums accordingly.
553 */
554 if (existing_ref->is_data) {
555 if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
556 delayed_refs->pending_csums -= existing->num_bytes;
557 if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
558 delayed_refs->pending_csums += existing->num_bytes;
559 }
545 spin_unlock(&existing_ref->lock); 560 spin_unlock(&existing_ref->lock);
546} 561}
547 562
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
605 head_ref->is_data = is_data; 620 head_ref->is_data = is_data;
606 head_ref->ref_root = RB_ROOT; 621 head_ref->ref_root = RB_ROOT;
607 head_ref->processing = 0; 622 head_ref->processing = 0;
623 head_ref->total_ref_mod = count_mod;
608 624
609 spin_lock_init(&head_ref->lock); 625 spin_lock_init(&head_ref->lock);
610 mutex_init(&head_ref->mutex); 626 mutex_init(&head_ref->mutex);
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
614 existing = htree_insert(&delayed_refs->href_root, 630 existing = htree_insert(&delayed_refs->href_root,
615 &head_ref->href_node); 631 &head_ref->href_node);
616 if (existing) { 632 if (existing) {
617 update_existing_head_ref(&existing->node, ref); 633 update_existing_head_ref(delayed_refs, &existing->node, ref);
618 /* 634 /*
619 * we've updated the existing ref, free the newly 635 * we've updated the existing ref, free the newly
620 * allocated ref 636 * allocated ref
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
622 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 638 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
623 head_ref = existing; 639 head_ref = existing;
624 } else { 640 } else {
641 if (is_data && count_mod < 0)
642 delayed_refs->pending_csums += num_bytes;
625 delayed_refs->num_heads++; 643 delayed_refs->num_heads++;
626 delayed_refs->num_heads_ready++; 644 delayed_refs->num_heads_ready++;
627 atomic_inc(&delayed_refs->num_entries); 645 atomic_inc(&delayed_refs->num_entries);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a764e2340d48..5eb0892396d0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head {
88 struct rb_node href_node; 88 struct rb_node href_node;
89 89
90 struct btrfs_delayed_extent_op *extent_op; 90 struct btrfs_delayed_extent_op *extent_op;
91
92 /*
93 * This is used to track the final ref_mod from all the refs associated
94 * with this head ref, this is not adjusted as delayed refs are run,
95 * this is meant to track if we need to do the csum accounting or not.
96 */
97 int total_ref_mod;
98
91 /* 99 /*
92 * when a new extent is allocated, it is just reserved in memory 100 * when a new extent is allocated, it is just reserved in memory
93 * The actual extent isn't inserted into the extent allocation tree 101 * The actual extent isn't inserted into the extent allocation tree
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root {
138 /* total number of head nodes ready for processing */ 146 /* total number of head nodes ready for processing */
139 unsigned long num_heads_ready; 147 unsigned long num_heads_ready;
140 148
149 u64 pending_csums;
150
141 /* 151 /*
142 * set when the tree is flushing before a transaction commit, 152 * set when the tree is flushing before a transaction commit,
143 * used by the throttling code to decide if new updates need 153 * used by the throttling code to decide if new updates need
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5ec03d999c37..0573848c7333 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
670 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 670 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
671 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 671 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
672 srcdev = dev_replace->srcdev; 672 srcdev = dev_replace->srcdev;
673 args->status.progress_1000 = div64_u64(dev_replace->cursor_left, 673 args->status.progress_1000 = div_u64(dev_replace->cursor_left,
674 div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); 674 div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
675 break; 675 break;
676 } 676 }
677 btrfs_dev_replace_unlock(dev_replace); 677 btrfs_dev_replace_unlock(dev_replace);
@@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data)
806 btrfs_dev_replace_status(fs_info, status_args); 806 btrfs_dev_replace_status(fs_info, status_args);
807 progress = status_args->status.progress_1000; 807 progress = status_args->status.progress_1000;
808 kfree(status_args); 808 kfree(status_args);
809 do_div(progress, 10); 809 progress = div_u64(progress, 10);
810 printk_in_rcu(KERN_INFO 810 printk_in_rcu(KERN_INFO
811 "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", 811 "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
812 dev_replace->srcdev->missing ? "<missing disk>" : 812 dev_replace->srcdev->missing ? "<missing disk>" :
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..2ef9a4b72d06 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -54,7 +54,7 @@
54#include <asm/cpufeature.h> 54#include <asm/cpufeature.h>
55#endif 55#endif
56 56
57static struct extent_io_ops btree_extent_io_ops; 57static const struct extent_io_ops btree_extent_io_ops;
58static void end_workqueue_fn(struct btrfs_work *work); 58static void end_workqueue_fn(struct btrfs_work *work);
59static void free_fs_root(struct btrfs_root *root); 59static void free_fs_root(struct btrfs_root *root);
60static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 60static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result)
274 * compute the csum for a btree block, and either verify it or write it 274 * compute the csum for a btree block, and either verify it or write it
275 * into the csum field of the block. 275 * into the csum field of the block.
276 */ 276 */
277static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 277static int csum_tree_block(struct btrfs_fs_info *fs_info,
278 struct extent_buffer *buf,
278 int verify) 279 int verify)
279{ 280{
280 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 281 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
281 char *result = NULL; 282 char *result = NULL;
282 unsigned long len; 283 unsigned long len;
283 unsigned long cur_len; 284 unsigned long cur_len;
@@ -302,7 +303,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
302 offset += cur_len; 303 offset += cur_len;
303 } 304 }
304 if (csum_size > sizeof(inline_result)) { 305 if (csum_size > sizeof(inline_result)) {
305 result = kzalloc(csum_size * sizeof(char), GFP_NOFS); 306 result = kzalloc(csum_size, GFP_NOFS);
306 if (!result) 307 if (!result)
307 return 1; 308 return 1;
308 } else { 309 } else {
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
321 printk_ratelimited(KERN_WARNING 322 printk_ratelimited(KERN_WARNING
322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X " 323 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
323 "level %d\n", 324 "level %d\n",
324 root->fs_info->sb->s_id, buf->start, 325 fs_info->sb->s_id, buf->start,
325 val, found, btrfs_header_level(buf)); 326 val, found, btrfs_header_level(buf));
326 if (result != (char *)&inline_result) 327 if (result != (char *)&inline_result)
327 kfree(result); 328 kfree(result);
@@ -418,12 +419,6 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
418 419
419 if (memcmp(raw_disk_sb, result, csum_size)) 420 if (memcmp(raw_disk_sb, result, csum_size))
420 ret = 1; 421 ret = 1;
421
422 if (ret && btrfs_super_generation(disk_sb) < 10) {
423 printk(KERN_WARNING
424 "BTRFS: super block crcs don't match, older mkfs detected\n");
425 ret = 0;
426 }
427 } 422 }
428 423
429 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { 424 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
@@ -501,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
501 * we only fill in the checksum field in the first page of a multi-page block 496 * we only fill in the checksum field in the first page of a multi-page block
502 */ 497 */
503 498
504static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 499static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
505{ 500{
506 u64 start = page_offset(page); 501 u64 start = page_offset(page);
507 u64 found_start; 502 u64 found_start;
@@ -513,14 +508,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
513 found_start = btrfs_header_bytenr(eb); 508 found_start = btrfs_header_bytenr(eb);
514 if (WARN_ON(found_start != start || !PageUptodate(page))) 509 if (WARN_ON(found_start != start || !PageUptodate(page)))
515 return 0; 510 return 0;
516 csum_tree_block(root, eb, 0); 511 csum_tree_block(fs_info, eb, 0);
517 return 0; 512 return 0;
518} 513}
519 514
520static int check_tree_block_fsid(struct btrfs_root *root, 515static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
521 struct extent_buffer *eb) 516 struct extent_buffer *eb)
522{ 517{
523 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 518 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
524 u8 fsid[BTRFS_UUID_SIZE]; 519 u8 fsid[BTRFS_UUID_SIZE];
525 int ret = 1; 520 int ret = 1;
526 521
@@ -640,7 +635,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
640 ret = -EIO; 635 ret = -EIO;
641 goto err; 636 goto err;
642 } 637 }
643 if (check_tree_block_fsid(root, eb)) { 638 if (check_tree_block_fsid(root->fs_info, eb)) {
644 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", 639 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
645 eb->fs_info->sb->s_id, eb->start); 640 eb->fs_info->sb->s_id, eb->start);
646 ret = -EIO; 641 ret = -EIO;
@@ -657,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
657 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 652 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
658 eb, found_level); 653 eb, found_level);
659 654
660 ret = csum_tree_block(root, eb, 1); 655 ret = csum_tree_block(root->fs_info, eb, 1);
661 if (ret) { 656 if (ret) {
662 ret = -EIO; 657 ret = -EIO;
663 goto err; 658 goto err;
@@ -882,7 +877,7 @@ static int btree_csum_one_bio(struct bio *bio)
882 877
883 bio_for_each_segment_all(bvec, bio, i) { 878 bio_for_each_segment_all(bvec, bio, i) {
884 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 879 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
885 ret = csum_dirty_buffer(root, bvec->bv_page); 880 ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
886 if (ret) 881 if (ret)
887 break; 882 break;
888 } 883 }
@@ -1119,10 +1114,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
1119 return 0; 1114 return 0;
1120} 1115}
1121 1116
1122struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1117struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
1123 u64 bytenr) 1118 u64 bytenr)
1124{ 1119{
1125 return find_extent_buffer(root->fs_info, bytenr); 1120 return find_extent_buffer(fs_info, bytenr);
1126} 1121}
1127 1122
1128struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1123struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
@@ -1165,11 +1160,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1165 1160
1166} 1161}
1167 1162
1168void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1163void clean_tree_block(struct btrfs_trans_handle *trans,
1164 struct btrfs_fs_info *fs_info,
1169 struct extent_buffer *buf) 1165 struct extent_buffer *buf)
1170{ 1166{
1171 struct btrfs_fs_info *fs_info = root->fs_info;
1172
1173 if (btrfs_header_generation(buf) == 1167 if (btrfs_header_generation(buf) ==
1174 fs_info->running_transaction->transid) { 1168 fs_info->running_transaction->transid) {
1175 btrfs_assert_tree_locked(buf); 1169 btrfs_assert_tree_locked(buf);
@@ -2146,6 +2140,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
2146 } 2140 }
2147} 2141}
2148 2142
2143static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
2144{
2145 mutex_init(&fs_info->scrub_lock);
2146 atomic_set(&fs_info->scrubs_running, 0);
2147 atomic_set(&fs_info->scrub_pause_req, 0);
2148 atomic_set(&fs_info->scrubs_paused, 0);
2149 atomic_set(&fs_info->scrub_cancel_req, 0);
2150 init_waitqueue_head(&fs_info->scrub_pause_wait);
2151 fs_info->scrub_workers_refcnt = 0;
2152}
2153
2154static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
2155{
2156 spin_lock_init(&fs_info->balance_lock);
2157 mutex_init(&fs_info->balance_mutex);
2158 atomic_set(&fs_info->balance_running, 0);
2159 atomic_set(&fs_info->balance_pause_req, 0);
2160 atomic_set(&fs_info->balance_cancel_req, 0);
2161 fs_info->balance_ctl = NULL;
2162 init_waitqueue_head(&fs_info->balance_wait_q);
2163}
2164
2165static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info,
2166 struct btrfs_root *tree_root)
2167{
2168 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
2169 set_nlink(fs_info->btree_inode, 1);
2170 /*
2171 * we set the i_size on the btree inode to the max possible int.
2172 * the real end of the address space is determined by all of
2173 * the devices in the system
2174 */
2175 fs_info->btree_inode->i_size = OFFSET_MAX;
2176 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2177
2178 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2179 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2180 fs_info->btree_inode->i_mapping);
2181 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2182 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2183
2184 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
2185
2186 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2187 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2188 sizeof(struct btrfs_key));
2189 set_bit(BTRFS_INODE_DUMMY,
2190 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2191 btrfs_insert_inode_hash(fs_info->btree_inode);
2192}
2193
2194static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
2195{
2196 fs_info->dev_replace.lock_owner = 0;
2197 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2198 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2199 mutex_init(&fs_info->dev_replace.lock_management_lock);
2200 mutex_init(&fs_info->dev_replace.lock);
2201 init_waitqueue_head(&fs_info->replace_wait);
2202}
2203
2204static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
2205{
2206 spin_lock_init(&fs_info->qgroup_lock);
2207 mutex_init(&fs_info->qgroup_ioctl_lock);
2208 fs_info->qgroup_tree = RB_ROOT;
2209 fs_info->qgroup_op_tree = RB_ROOT;
2210 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2211 fs_info->qgroup_seq = 1;
2212 fs_info->quota_enabled = 0;
2213 fs_info->pending_quota_state = 0;
2214 fs_info->qgroup_ulist = NULL;
2215 mutex_init(&fs_info->qgroup_rescan_lock);
2216}
2217
2218static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
2219 struct btrfs_fs_devices *fs_devices)
2220{
2221 int max_active = fs_info->thread_pool_size;
2222 unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
2223
2224 fs_info->workers =
2225 btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
2226 max_active, 16);
2227
2228 fs_info->delalloc_workers =
2229 btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
2230
2231 fs_info->flush_workers =
2232 btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
2233
2234 fs_info->caching_workers =
2235 btrfs_alloc_workqueue("cache", flags, max_active, 0);
2236
2237 /*
2238 * a higher idle thresh on the submit workers makes it much more
2239 * likely that bios will be send down in a sane order to the
2240 * devices
2241 */
2242 fs_info->submit_workers =
2243 btrfs_alloc_workqueue("submit", flags,
2244 min_t(u64, fs_devices->num_devices,
2245 max_active), 64);
2246
2247 fs_info->fixup_workers =
2248 btrfs_alloc_workqueue("fixup", flags, 1, 0);
2249
2250 /*
2251 * endios are largely parallel and should have a very
2252 * low idle thresh
2253 */
2254 fs_info->endio_workers =
2255 btrfs_alloc_workqueue("endio", flags, max_active, 4);
2256 fs_info->endio_meta_workers =
2257 btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
2258 fs_info->endio_meta_write_workers =
2259 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2260 fs_info->endio_raid56_workers =
2261 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2262 fs_info->endio_repair_workers =
2263 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2264 fs_info->rmw_workers =
2265 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2266 fs_info->endio_write_workers =
2267 btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
2268 fs_info->endio_freespace_worker =
2269 btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
2270 fs_info->delayed_workers =
2271 btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
2272 fs_info->readahead_workers =
2273 btrfs_alloc_workqueue("readahead", flags, max_active, 2);
2274 fs_info->qgroup_rescan_workers =
2275 btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
2276 fs_info->extent_workers =
2277 btrfs_alloc_workqueue("extent-refs", flags,
2278 min_t(u64, fs_devices->num_devices,
2279 max_active), 8);
2280
2281 if (!(fs_info->workers && fs_info->delalloc_workers &&
2282 fs_info->submit_workers && fs_info->flush_workers &&
2283 fs_info->endio_workers && fs_info->endio_meta_workers &&
2284 fs_info->endio_meta_write_workers &&
2285 fs_info->endio_repair_workers &&
2286 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2287 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2288 fs_info->caching_workers && fs_info->readahead_workers &&
2289 fs_info->fixup_workers && fs_info->delayed_workers &&
2290 fs_info->extent_workers &&
2291 fs_info->qgroup_rescan_workers)) {
2292 return -ENOMEM;
2293 }
2294
2295 return 0;
2296}
2297
2298static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
2299 struct btrfs_fs_devices *fs_devices)
2300{
2301 int ret;
2302 struct btrfs_root *tree_root = fs_info->tree_root;
2303 struct btrfs_root *log_tree_root;
2304 struct btrfs_super_block *disk_super = fs_info->super_copy;
2305 u64 bytenr = btrfs_super_log_root(disk_super);
2306
2307 if (fs_devices->rw_devices == 0) {
2308 printk(KERN_WARNING "BTRFS: log replay required "
2309 "on RO media\n");
2310 return -EIO;
2311 }
2312
2313 log_tree_root = btrfs_alloc_root(fs_info);
2314 if (!log_tree_root)
2315 return -ENOMEM;
2316
2317 __setup_root(tree_root->nodesize, tree_root->sectorsize,
2318 tree_root->stripesize, log_tree_root, fs_info,
2319 BTRFS_TREE_LOG_OBJECTID);
2320
2321 log_tree_root->node = read_tree_block(tree_root, bytenr,
2322 fs_info->generation + 1);
2323 if (!log_tree_root->node ||
2324 !extent_buffer_uptodate(log_tree_root->node)) {
2325 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2326 free_extent_buffer(log_tree_root->node);
2327 kfree(log_tree_root);
2328 return -EIO;
2329 }
2330 /* returns with log_tree_root freed on success */
2331 ret = btrfs_recover_log_trees(log_tree_root);
2332 if (ret) {
2333 btrfs_error(tree_root->fs_info, ret,
2334 "Failed to recover log tree");
2335 free_extent_buffer(log_tree_root->node);
2336 kfree(log_tree_root);
2337 return ret;
2338 }
2339
2340 if (fs_info->sb->s_flags & MS_RDONLY) {
2341 ret = btrfs_commit_super(tree_root);
2342 if (ret)
2343 return ret;
2344 }
2345
2346 return 0;
2347}
2348
2349static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
2350 struct btrfs_root *tree_root)
2351{
2352 struct btrfs_root *root;
2353 struct btrfs_key location;
2354 int ret;
2355
2356 location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
2357 location.type = BTRFS_ROOT_ITEM_KEY;
2358 location.offset = 0;
2359
2360 root = btrfs_read_tree_root(tree_root, &location);
2361 if (IS_ERR(root))
2362 return PTR_ERR(root);
2363 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2364 fs_info->extent_root = root;
2365
2366 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2367 root = btrfs_read_tree_root(tree_root, &location);
2368 if (IS_ERR(root))
2369 return PTR_ERR(root);
2370 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2371 fs_info->dev_root = root;
2372 btrfs_init_devices_late(fs_info);
2373
2374 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2375 root = btrfs_read_tree_root(tree_root, &location);
2376 if (IS_ERR(root))
2377 return PTR_ERR(root);
2378 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2379 fs_info->csum_root = root;
2380
2381 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2382 root = btrfs_read_tree_root(tree_root, &location);
2383 if (!IS_ERR(root)) {
2384 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2385 fs_info->quota_enabled = 1;
2386 fs_info->pending_quota_state = 1;
2387 fs_info->quota_root = root;
2388 }
2389
2390 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2391 root = btrfs_read_tree_root(tree_root, &location);
2392 if (IS_ERR(root)) {
2393 ret = PTR_ERR(root);
2394 if (ret != -ENOENT)
2395 return ret;
2396 } else {
2397 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2398 fs_info->uuid_root = root;
2399 }
2400
2401 return 0;
2402}
2403
2149int open_ctree(struct super_block *sb, 2404int open_ctree(struct super_block *sb,
2150 struct btrfs_fs_devices *fs_devices, 2405 struct btrfs_fs_devices *fs_devices,
2151 char *options) 2406 char *options)
@@ -2160,21 +2415,12 @@ int open_ctree(struct super_block *sb,
2160 struct btrfs_super_block *disk_super; 2415 struct btrfs_super_block *disk_super;
2161 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2416 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2162 struct btrfs_root *tree_root; 2417 struct btrfs_root *tree_root;
2163 struct btrfs_root *extent_root;
2164 struct btrfs_root *csum_root;
2165 struct btrfs_root *chunk_root; 2418 struct btrfs_root *chunk_root;
2166 struct btrfs_root *dev_root;
2167 struct btrfs_root *quota_root;
2168 struct btrfs_root *uuid_root;
2169 struct btrfs_root *log_tree_root;
2170 int ret; 2419 int ret;
2171 int err = -EINVAL; 2420 int err = -EINVAL;
2172 int num_backups_tried = 0; 2421 int num_backups_tried = 0;
2173 int backup_index = 0; 2422 int backup_index = 0;
2174 int max_active; 2423 int max_active;
2175 int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
2176 bool create_uuid_tree;
2177 bool check_uuid_tree;
2178 2424
2179 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2425 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
2180 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2426 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2241,11 +2487,12 @@ int open_ctree(struct super_block *sb,
2241 spin_lock_init(&fs_info->qgroup_op_lock); 2487 spin_lock_init(&fs_info->qgroup_op_lock);
2242 spin_lock_init(&fs_info->buffer_lock); 2488 spin_lock_init(&fs_info->buffer_lock);
2243 spin_lock_init(&fs_info->unused_bgs_lock); 2489 spin_lock_init(&fs_info->unused_bgs_lock);
2244 mutex_init(&fs_info->unused_bg_unpin_mutex);
2245 rwlock_init(&fs_info->tree_mod_log_lock); 2490 rwlock_init(&fs_info->tree_mod_log_lock);
2491 mutex_init(&fs_info->unused_bg_unpin_mutex);
2246 mutex_init(&fs_info->reloc_mutex); 2492 mutex_init(&fs_info->reloc_mutex);
2247 mutex_init(&fs_info->delalloc_root_mutex); 2493 mutex_init(&fs_info->delalloc_root_mutex);
2248 seqlock_init(&fs_info->profiles_lock); 2494 seqlock_init(&fs_info->profiles_lock);
2495 init_rwsem(&fs_info->delayed_iput_sem);
2249 2496
2250 init_completion(&fs_info->kobj_unregister); 2497 init_completion(&fs_info->kobj_unregister);
2251 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 2498 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2276,7 +2523,7 @@ int open_ctree(struct super_block *sb,
2276 fs_info->free_chunk_space = 0; 2523 fs_info->free_chunk_space = 0;
2277 fs_info->tree_mod_log = RB_ROOT; 2524 fs_info->tree_mod_log = RB_ROOT;
2278 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 2525 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2279 fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64); 2526 fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
2280 /* readahead state */ 2527 /* readahead state */
2281 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2528 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2282 spin_lock_init(&fs_info->reada_lock); 2529 spin_lock_init(&fs_info->reada_lock);
@@ -2294,55 +2541,18 @@ int open_ctree(struct super_block *sb,
2294 } 2541 }
2295 btrfs_init_delayed_root(fs_info->delayed_root); 2542 btrfs_init_delayed_root(fs_info->delayed_root);
2296 2543
2297 mutex_init(&fs_info->scrub_lock); 2544 btrfs_init_scrub(fs_info);
2298 atomic_set(&fs_info->scrubs_running, 0);
2299 atomic_set(&fs_info->scrub_pause_req, 0);
2300 atomic_set(&fs_info->scrubs_paused, 0);
2301 atomic_set(&fs_info->scrub_cancel_req, 0);
2302 init_waitqueue_head(&fs_info->replace_wait);
2303 init_waitqueue_head(&fs_info->scrub_pause_wait);
2304 fs_info->scrub_workers_refcnt = 0;
2305#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2545#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2306 fs_info->check_integrity_print_mask = 0; 2546 fs_info->check_integrity_print_mask = 0;
2307#endif 2547#endif
2308 2548 btrfs_init_balance(fs_info);
2309 spin_lock_init(&fs_info->balance_lock);
2310 mutex_init(&fs_info->balance_mutex);
2311 atomic_set(&fs_info->balance_running, 0);
2312 atomic_set(&fs_info->balance_pause_req, 0);
2313 atomic_set(&fs_info->balance_cancel_req, 0);
2314 fs_info->balance_ctl = NULL;
2315 init_waitqueue_head(&fs_info->balance_wait_q);
2316 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); 2549 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
2317 2550
2318 sb->s_blocksize = 4096; 2551 sb->s_blocksize = 4096;
2319 sb->s_blocksize_bits = blksize_bits(4096); 2552 sb->s_blocksize_bits = blksize_bits(4096);
2320 sb->s_bdi = &fs_info->bdi; 2553 sb->s_bdi = &fs_info->bdi;
2321 2554
2322 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 2555 btrfs_init_btree_inode(fs_info, tree_root);
2323 set_nlink(fs_info->btree_inode, 1);
2324 /*
2325 * we set the i_size on the btree inode to the max possible int.
2326 * the real end of the address space is determined by all of
2327 * the devices in the system
2328 */
2329 fs_info->btree_inode->i_size = OFFSET_MAX;
2330 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2331
2332 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2333 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2334 fs_info->btree_inode->i_mapping);
2335 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2336 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2337
2338 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
2339
2340 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2341 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2342 sizeof(struct btrfs_key));
2343 set_bit(BTRFS_INODE_DUMMY,
2344 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2345 btrfs_insert_inode_hash(fs_info->btree_inode);
2346 2556
2347 spin_lock_init(&fs_info->block_group_cache_lock); 2557 spin_lock_init(&fs_info->block_group_cache_lock);
2348 fs_info->block_group_cache_tree = RB_ROOT; 2558 fs_info->block_group_cache_tree = RB_ROOT;
@@ -2363,26 +2573,14 @@ int open_ctree(struct super_block *sb,
2363 mutex_init(&fs_info->transaction_kthread_mutex); 2573 mutex_init(&fs_info->transaction_kthread_mutex);
2364 mutex_init(&fs_info->cleaner_mutex); 2574 mutex_init(&fs_info->cleaner_mutex);
2365 mutex_init(&fs_info->volume_mutex); 2575 mutex_init(&fs_info->volume_mutex);
2576 mutex_init(&fs_info->ro_block_group_mutex);
2366 init_rwsem(&fs_info->commit_root_sem); 2577 init_rwsem(&fs_info->commit_root_sem);
2367 init_rwsem(&fs_info->cleanup_work_sem); 2578 init_rwsem(&fs_info->cleanup_work_sem);
2368 init_rwsem(&fs_info->subvol_sem); 2579 init_rwsem(&fs_info->subvol_sem);
2369 sema_init(&fs_info->uuid_tree_rescan_sem, 1); 2580 sema_init(&fs_info->uuid_tree_rescan_sem, 1);
2370 fs_info->dev_replace.lock_owner = 0;
2371 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2372 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2373 mutex_init(&fs_info->dev_replace.lock_management_lock);
2374 mutex_init(&fs_info->dev_replace.lock);
2375 2581
2376 spin_lock_init(&fs_info->qgroup_lock); 2582 btrfs_init_dev_replace_locks(fs_info);
2377 mutex_init(&fs_info->qgroup_ioctl_lock); 2583 btrfs_init_qgroup(fs_info);
2378 fs_info->qgroup_tree = RB_ROOT;
2379 fs_info->qgroup_op_tree = RB_ROOT;
2380 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2381 fs_info->qgroup_seq = 1;
2382 fs_info->quota_enabled = 0;
2383 fs_info->pending_quota_state = 0;
2384 fs_info->qgroup_ulist = NULL;
2385 mutex_init(&fs_info->qgroup_rescan_lock);
2386 2584
2387 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2585 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
2388 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 2586 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2554,75 +2752,9 @@ int open_ctree(struct super_block *sb,
2554 2752
2555 max_active = fs_info->thread_pool_size; 2753 max_active = fs_info->thread_pool_size;
2556 2754
2557 fs_info->workers = 2755 ret = btrfs_init_workqueues(fs_info, fs_devices);
2558 btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, 2756 if (ret) {
2559 max_active, 16); 2757 err = ret;
2560
2561 fs_info->delalloc_workers =
2562 btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
2563
2564 fs_info->flush_workers =
2565 btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
2566
2567 fs_info->caching_workers =
2568 btrfs_alloc_workqueue("cache", flags, max_active, 0);
2569
2570 /*
2571 * a higher idle thresh on the submit workers makes it much more
2572 * likely that bios will be send down in a sane order to the
2573 * devices
2574 */
2575 fs_info->submit_workers =
2576 btrfs_alloc_workqueue("submit", flags,
2577 min_t(u64, fs_devices->num_devices,
2578 max_active), 64);
2579
2580 fs_info->fixup_workers =
2581 btrfs_alloc_workqueue("fixup", flags, 1, 0);
2582
2583 /*
2584 * endios are largely parallel and should have a very
2585 * low idle thresh
2586 */
2587 fs_info->endio_workers =
2588 btrfs_alloc_workqueue("endio", flags, max_active, 4);
2589 fs_info->endio_meta_workers =
2590 btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
2591 fs_info->endio_meta_write_workers =
2592 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2593 fs_info->endio_raid56_workers =
2594 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2595 fs_info->endio_repair_workers =
2596 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2597 fs_info->rmw_workers =
2598 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2599 fs_info->endio_write_workers =
2600 btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
2601 fs_info->endio_freespace_worker =
2602 btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
2603 fs_info->delayed_workers =
2604 btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
2605 fs_info->readahead_workers =
2606 btrfs_alloc_workqueue("readahead", flags, max_active, 2);
2607 fs_info->qgroup_rescan_workers =
2608 btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
2609 fs_info->extent_workers =
2610 btrfs_alloc_workqueue("extent-refs", flags,
2611 min_t(u64, fs_devices->num_devices,
2612 max_active), 8);
2613
2614 if (!(fs_info->workers && fs_info->delalloc_workers &&
2615 fs_info->submit_workers && fs_info->flush_workers &&
2616 fs_info->endio_workers && fs_info->endio_meta_workers &&
2617 fs_info->endio_meta_write_workers &&
2618 fs_info->endio_repair_workers &&
2619 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2620 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2621 fs_info->caching_workers && fs_info->readahead_workers &&
2622 fs_info->fixup_workers && fs_info->delayed_workers &&
2623 fs_info->extent_workers &&
2624 fs_info->qgroup_rescan_workers)) {
2625 err = -ENOMEM;
2626 goto fail_sb_buffer; 2758 goto fail_sb_buffer;
2627 } 2759 }
2628 2760
@@ -2688,7 +2820,7 @@ int open_ctree(struct super_block *sb,
2688 * keep the device that is marked to be the target device for the 2820 * keep the device that is marked to be the target device for the
2689 * dev_replace procedure 2821 * dev_replace procedure
2690 */ 2822 */
2691 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2823 btrfs_close_extra_devices(fs_devices, 0);
2692 2824
2693 if (!fs_devices->latest_bdev) { 2825 if (!fs_devices->latest_bdev) {
2694 printk(KERN_ERR "BTRFS: failed to read devices on %s\n", 2826 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
@@ -2714,61 +2846,9 @@ retry_root_backup:
2714 tree_root->commit_root = btrfs_root_node(tree_root); 2846 tree_root->commit_root = btrfs_root_node(tree_root);
2715 btrfs_set_root_refs(&tree_root->root_item, 1); 2847 btrfs_set_root_refs(&tree_root->root_item, 1);
2716 2848
2717 location.objectid = BTRFS_EXTENT_TREE_OBJECTID; 2849 ret = btrfs_read_roots(fs_info, tree_root);
2718 location.type = BTRFS_ROOT_ITEM_KEY; 2850 if (ret)
2719 location.offset = 0;
2720
2721 extent_root = btrfs_read_tree_root(tree_root, &location);
2722 if (IS_ERR(extent_root)) {
2723 ret = PTR_ERR(extent_root);
2724 goto recovery_tree_root;
2725 }
2726 set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
2727 fs_info->extent_root = extent_root;
2728
2729 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2730 dev_root = btrfs_read_tree_root(tree_root, &location);
2731 if (IS_ERR(dev_root)) {
2732 ret = PTR_ERR(dev_root);
2733 goto recovery_tree_root;
2734 }
2735 set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
2736 fs_info->dev_root = dev_root;
2737 btrfs_init_devices_late(fs_info);
2738
2739 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2740 csum_root = btrfs_read_tree_root(tree_root, &location);
2741 if (IS_ERR(csum_root)) {
2742 ret = PTR_ERR(csum_root);
2743 goto recovery_tree_root; 2851 goto recovery_tree_root;
2744 }
2745 set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
2746 fs_info->csum_root = csum_root;
2747
2748 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2749 quota_root = btrfs_read_tree_root(tree_root, &location);
2750 if (!IS_ERR(quota_root)) {
2751 set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
2752 fs_info->quota_enabled = 1;
2753 fs_info->pending_quota_state = 1;
2754 fs_info->quota_root = quota_root;
2755 }
2756
2757 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2758 uuid_root = btrfs_read_tree_root(tree_root, &location);
2759 if (IS_ERR(uuid_root)) {
2760 ret = PTR_ERR(uuid_root);
2761 if (ret != -ENOENT)
2762 goto recovery_tree_root;
2763 create_uuid_tree = true;
2764 check_uuid_tree = false;
2765 } else {
2766 set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
2767 fs_info->uuid_root = uuid_root;
2768 create_uuid_tree = false;
2769 check_uuid_tree =
2770 generation != btrfs_super_uuid_tree_generation(disk_super);
2771 }
2772 2852
2773 fs_info->generation = generation; 2853 fs_info->generation = generation;
2774 fs_info->last_trans_committed = generation; 2854 fs_info->last_trans_committed = generation;
@@ -2792,7 +2872,7 @@ retry_root_backup:
2792 goto fail_block_groups; 2872 goto fail_block_groups;
2793 } 2873 }
2794 2874
2795 btrfs_close_extra_devices(fs_info, fs_devices, 1); 2875 btrfs_close_extra_devices(fs_devices, 1);
2796 2876
2797 ret = btrfs_sysfs_add_one(fs_info); 2877 ret = btrfs_sysfs_add_one(fs_info);
2798 if (ret) { 2878 if (ret) {
@@ -2806,7 +2886,7 @@ retry_root_backup:
2806 goto fail_sysfs; 2886 goto fail_sysfs;
2807 } 2887 }
2808 2888
2809 ret = btrfs_read_block_groups(extent_root); 2889 ret = btrfs_read_block_groups(fs_info->extent_root);
2810 if (ret) { 2890 if (ret) {
2811 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); 2891 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
2812 goto fail_sysfs; 2892 goto fail_sysfs;
@@ -2864,48 +2944,11 @@ retry_root_backup:
2864 2944
2865 /* do not make disk changes in broken FS */ 2945 /* do not make disk changes in broken FS */
2866 if (btrfs_super_log_root(disk_super) != 0) { 2946 if (btrfs_super_log_root(disk_super) != 0) {
2867 u64 bytenr = btrfs_super_log_root(disk_super); 2947 ret = btrfs_replay_log(fs_info, fs_devices);
2868
2869 if (fs_devices->rw_devices == 0) {
2870 printk(KERN_WARNING "BTRFS: log replay required "
2871 "on RO media\n");
2872 err = -EIO;
2873 goto fail_qgroup;
2874 }
2875
2876 log_tree_root = btrfs_alloc_root(fs_info);
2877 if (!log_tree_root) {
2878 err = -ENOMEM;
2879 goto fail_qgroup;
2880 }
2881
2882 __setup_root(nodesize, sectorsize, stripesize,
2883 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
2884
2885 log_tree_root->node = read_tree_block(tree_root, bytenr,
2886 generation + 1);
2887 if (!log_tree_root->node ||
2888 !extent_buffer_uptodate(log_tree_root->node)) {
2889 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2890 free_extent_buffer(log_tree_root->node);
2891 kfree(log_tree_root);
2892 goto fail_qgroup;
2893 }
2894 /* returns with log_tree_root freed on success */
2895 ret = btrfs_recover_log_trees(log_tree_root);
2896 if (ret) { 2948 if (ret) {
2897 btrfs_error(tree_root->fs_info, ret, 2949 err = ret;
2898 "Failed to recover log tree");
2899 free_extent_buffer(log_tree_root->node);
2900 kfree(log_tree_root);
2901 goto fail_qgroup; 2950 goto fail_qgroup;
2902 } 2951 }
2903
2904 if (sb->s_flags & MS_RDONLY) {
2905 ret = btrfs_commit_super(tree_root);
2906 if (ret)
2907 goto fail_qgroup;
2908 }
2909 } 2952 }
2910 2953
2911 ret = btrfs_find_orphan_roots(tree_root); 2954 ret = btrfs_find_orphan_roots(tree_root);
@@ -2966,7 +3009,7 @@ retry_root_backup:
2966 3009
2967 btrfs_qgroup_rescan_resume(fs_info); 3010 btrfs_qgroup_rescan_resume(fs_info);
2968 3011
2969 if (create_uuid_tree) { 3012 if (!fs_info->uuid_root) {
2970 pr_info("BTRFS: creating UUID tree\n"); 3013 pr_info("BTRFS: creating UUID tree\n");
2971 ret = btrfs_create_uuid_tree(fs_info); 3014 ret = btrfs_create_uuid_tree(fs_info);
2972 if (ret) { 3015 if (ret) {
@@ -2975,8 +3018,9 @@ retry_root_backup:
2975 close_ctree(tree_root); 3018 close_ctree(tree_root);
2976 return ret; 3019 return ret;
2977 } 3020 }
2978 } else if (check_uuid_tree || 3021 } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
2979 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { 3022 fs_info->generation !=
3023 btrfs_super_uuid_tree_generation(disk_super)) {
2980 pr_info("BTRFS: checking UUID tree\n"); 3024 pr_info("BTRFS: checking UUID tree\n");
2981 ret = btrfs_check_uuid_tree(fs_info); 3025 ret = btrfs_check_uuid_tree(fs_info);
2982 if (ret) { 3026 if (ret) {
@@ -3668,7 +3712,7 @@ void close_ctree(struct btrfs_root *root)
3668 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3712 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3669 ret = btrfs_commit_super(root); 3713 ret = btrfs_commit_super(root);
3670 if (ret) 3714 if (ret)
3671 btrfs_err(root->fs_info, "commit super ret %d", ret); 3715 btrfs_err(fs_info, "commit super ret %d", ret);
3672 } 3716 }
3673 3717
3674 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3718 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
@@ -3680,10 +3724,10 @@ void close_ctree(struct btrfs_root *root)
3680 fs_info->closing = 2; 3724 fs_info->closing = 2;
3681 smp_mb(); 3725 smp_mb();
3682 3726
3683 btrfs_free_qgroup_config(root->fs_info); 3727 btrfs_free_qgroup_config(fs_info);
3684 3728
3685 if (percpu_counter_sum(&fs_info->delalloc_bytes)) { 3729 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3686 btrfs_info(root->fs_info, "at unmount delalloc count %lld", 3730 btrfs_info(fs_info, "at unmount delalloc count %lld",
3687 percpu_counter_sum(&fs_info->delalloc_bytes)); 3731 percpu_counter_sum(&fs_info->delalloc_bytes));
3688 } 3732 }
3689 3733
@@ -3723,7 +3767,7 @@ void close_ctree(struct btrfs_root *root)
3723 3767
3724 btrfs_free_stripe_hash_table(fs_info); 3768 btrfs_free_stripe_hash_table(fs_info);
3725 3769
3726 btrfs_free_block_rsv(root, root->orphan_block_rsv); 3770 __btrfs_free_block_rsv(root->orphan_block_rsv);
3727 root->orphan_block_rsv = NULL; 3771 root->orphan_block_rsv = NULL;
3728 3772
3729 lock_chunks(root); 3773 lock_chunks(root);
@@ -3921,7 +3965,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3921 } 3965 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) 3966 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) { 3967 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", 3968 printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
3925 btrfs_super_sys_array_size(sb), 3969 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key) 3970 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk)); 3971 + sizeof(struct btrfs_chunk));
@@ -4134,7 +4178,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
4134 4178
4135 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 4179 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
4136 while (start <= end) { 4180 while (start <= end) {
4137 eb = btrfs_find_tree_block(root, start); 4181 eb = btrfs_find_tree_block(root->fs_info, start);
4138 start += root->nodesize; 4182 start += root->nodesize;
4139 if (!eb) 4183 if (!eb)
4140 continue; 4184 continue;
@@ -4285,7 +4329,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
4285 return 0; 4329 return 0;
4286} 4330}
4287 4331
4288static struct extent_io_ops btree_extent_io_ops = { 4332static const struct extent_io_ops btree_extent_io_ops = {
4289 .readpage_end_io_hook = btree_readpage_end_io_hook, 4333 .readpage_end_io_hook = btree_readpage_end_io_hook,
4290 .readpage_io_failed_hook = btree_io_failed_hook, 4334 .readpage_io_failed_hook = btree_io_failed_hook,
4291 .submit_bio_hook = btree_submit_bio_hook, 4335 .submit_bio_hook = btree_submit_bio_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 27d44c0fd236..d4cbfeeeedd4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,7 +52,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
53 u64 bytenr); 53 u64 bytenr);
54void clean_tree_block(struct btrfs_trans_handle *trans, 54void clean_tree_block(struct btrfs_trans_handle *trans,
55 struct btrfs_root *root, struct extent_buffer *buf); 55 struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
56int open_ctree(struct super_block *sb, 56int open_ctree(struct super_block *sb,
57 struct btrfs_fs_devices *fs_devices, 57 struct btrfs_fs_devices *fs_devices,
58 char *options); 58 char *options);
@@ -61,7 +61,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, int max_mirrors); 61 struct btrfs_root *root, int max_mirrors);
62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
63int btrfs_commit_super(struct btrfs_root *root); 63int btrfs_commit_super(struct btrfs_root *root);
64struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 64struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
65 u64 bytenr); 65 u64 bytenr);
66struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, 66struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
67 struct btrfs_key *location); 67 struct btrfs_key *location);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 571f402d3fc4..1eef4ee01d1a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2538 * list before we release it. 2538 * list before we release it.
2539 */ 2539 */
2540 if (btrfs_delayed_ref_is_head(ref)) { 2540 if (btrfs_delayed_ref_is_head(ref)) {
2541 if (locked_ref->is_data &&
2542 locked_ref->total_ref_mod < 0) {
2543 spin_lock(&delayed_refs->lock);
2544 delayed_refs->pending_csums -= ref->num_bytes;
2545 spin_unlock(&delayed_refs->lock);
2546 }
2541 btrfs_delayed_ref_unlock(locked_ref); 2547 btrfs_delayed_ref_unlock(locked_ref);
2542 locked_ref = NULL; 2548 locked_ref = NULL;
2543 } 2549 }
@@ -2561,8 +2567,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2561 */ 2567 */
2562 spin_lock(&delayed_refs->lock); 2568 spin_lock(&delayed_refs->lock);
2563 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime; 2569 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2564 avg = div64_u64(avg, 4); 2570 fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */
2565 fs_info->avg_delayed_ref_runtime = avg;
2566 spin_unlock(&delayed_refs->lock); 2571 spin_unlock(&delayed_refs->lock);
2567 } 2572 }
2568 return 0; 2573 return 0;
@@ -2624,7 +2629,26 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2624 * We don't ever fill up leaves all the way so multiply by 2 just to be 2629 * We don't ever fill up leaves all the way so multiply by 2 just to be
2625 * closer to what we're really going to want to ouse. 2630 * closer to what we're really going to want to ouse.
2626 */ 2631 */
2627 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); 2632 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2633}
2634
2635/*
2636 * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2637 * would require to store the csums for that many bytes.
2638 */
2639u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2640{
2641 u64 csum_size;
2642 u64 num_csums_per_leaf;
2643 u64 num_csums;
2644
2645 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2646 num_csums_per_leaf = div64_u64(csum_size,
2647 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2648 num_csums = div64_u64(csum_bytes, root->sectorsize);
2649 num_csums += num_csums_per_leaf - 1;
2650 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2651 return num_csums;
2628} 2652}
2629 2653
2630int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, 2654int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2632,7 +2656,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2632{ 2656{
2633 struct btrfs_block_rsv *global_rsv; 2657 struct btrfs_block_rsv *global_rsv;
2634 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; 2658 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2635 u64 num_bytes; 2659 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2660 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2661 u64 num_bytes, num_dirty_bgs_bytes;
2636 int ret = 0; 2662 int ret = 0;
2637 2663
2638 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 2664 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
@@ -2640,17 +2666,22 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2640 if (num_heads > 1) 2666 if (num_heads > 1)
2641 num_bytes += (num_heads - 1) * root->nodesize; 2667 num_bytes += (num_heads - 1) * root->nodesize;
2642 num_bytes <<= 1; 2668 num_bytes <<= 1;
2669 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2670 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2671 num_dirty_bgs);
2643 global_rsv = &root->fs_info->global_block_rsv; 2672 global_rsv = &root->fs_info->global_block_rsv;
2644 2673
2645 /* 2674 /*
2646 * If we can't allocate any more chunks lets make sure we have _lots_ of 2675 * If we can't allocate any more chunks lets make sure we have _lots_ of
2647 * wiggle room since running delayed refs can create more delayed refs. 2676 * wiggle room since running delayed refs can create more delayed refs.
2648 */ 2677 */
2649 if (global_rsv->space_info->full) 2678 if (global_rsv->space_info->full) {
2679 num_dirty_bgs_bytes <<= 1;
2650 num_bytes <<= 1; 2680 num_bytes <<= 1;
2681 }
2651 2682
2652 spin_lock(&global_rsv->lock); 2683 spin_lock(&global_rsv->lock);
2653 if (global_rsv->reserved <= num_bytes) 2684 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2654 ret = 1; 2685 ret = 1;
2655 spin_unlock(&global_rsv->lock); 2686 spin_unlock(&global_rsv->lock);
2656 return ret; 2687 return ret;
@@ -3193,7 +3224,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3193 struct inode *inode = NULL; 3224 struct inode *inode = NULL;
3194 u64 alloc_hint = 0; 3225 u64 alloc_hint = 0;
3195 int dcs = BTRFS_DC_ERROR; 3226 int dcs = BTRFS_DC_ERROR;
3196 int num_pages = 0; 3227 u64 num_pages = 0;
3197 int retries = 0; 3228 int retries = 0;
3198 int ret = 0; 3229 int ret = 0;
3199 3230
@@ -3208,6 +3239,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3208 return 0; 3239 return 0;
3209 } 3240 }
3210 3241
3242 if (trans->aborted)
3243 return 0;
3211again: 3244again:
3212 inode = lookup_free_space_inode(root, block_group, path); 3245 inode = lookup_free_space_inode(root, block_group, path);
3213 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 3246 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3276,20 @@ again:
3243 */ 3276 */
3244 BTRFS_I(inode)->generation = 0; 3277 BTRFS_I(inode)->generation = 0;
3245 ret = btrfs_update_inode(trans, root, inode); 3278 ret = btrfs_update_inode(trans, root, inode);
3279 if (ret) {
3280 /*
3281 * So theoretically we could recover from this, simply set the
3282 * super cache generation to 0 so we know to invalidate the
3283 * cache, but then we'd have to keep track of the block groups
3284 * that fail this way so we know we _have_ to reset this cache
3285 * before the next commit or risk reading stale cache. So to
3286 * limit our exposure to horrible edge cases lets just abort the
3287 * transaction, this only happens in really bad situations
3288 * anyway.
3289 */
3290 btrfs_abort_transaction(trans, root, ret);
3291 goto out_put;
3292 }
3246 WARN_ON(ret); 3293 WARN_ON(ret);
3247 3294
3248 if (i_size_read(inode) > 0) { 3295 if (i_size_read(inode) > 0) {
@@ -3251,7 +3298,7 @@ again:
3251 if (ret) 3298 if (ret)
3252 goto out_put; 3299 goto out_put;
3253 3300
3254 ret = btrfs_truncate_free_space_cache(root, trans, inode); 3301 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3255 if (ret) 3302 if (ret)
3256 goto out_put; 3303 goto out_put;
3257 } 3304 }
@@ -3277,14 +3324,14 @@ again:
3277 * taking up quite a bit since it's not folded into the other space 3324 * taking up quite a bit since it's not folded into the other space
3278 * cache. 3325 * cache.
3279 */ 3326 */
3280 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); 3327 num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
3281 if (!num_pages) 3328 if (!num_pages)
3282 num_pages = 1; 3329 num_pages = 1;
3283 3330
3284 num_pages *= 16; 3331 num_pages *= 16;
3285 num_pages *= PAGE_CACHE_SIZE; 3332 num_pages *= PAGE_CACHE_SIZE;
3286 3333
3287 ret = btrfs_check_data_free_space(inode, num_pages); 3334 ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
3288 if (ret) 3335 if (ret)
3289 goto out_put; 3336 goto out_put;
3290 3337
@@ -3309,16 +3356,182 @@ out:
3309 return ret; 3356 return ret;
3310} 3357}
3311 3358
3312int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3359int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3360 struct btrfs_root *root)
3361{
3362 struct btrfs_block_group_cache *cache, *tmp;
3363 struct btrfs_transaction *cur_trans = trans->transaction;
3364 struct btrfs_path *path;
3365
3366 if (list_empty(&cur_trans->dirty_bgs) ||
3367 !btrfs_test_opt(root, SPACE_CACHE))
3368 return 0;
3369
3370 path = btrfs_alloc_path();
3371 if (!path)
3372 return -ENOMEM;
3373
3374 /* Could add new block groups, use _safe just in case */
3375 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3376 dirty_list) {
3377 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3378 cache_save_setup(cache, trans, path);
3379 }
3380
3381 btrfs_free_path(path);
3382 return 0;
3383}
3384
3385/*
3386 * transaction commit does final block group cache writeback during a
3387 * critical section where nothing is allowed to change the FS. This is
3388 * required in order for the cache to actually match the block group,
3389 * but can introduce a lot of latency into the commit.
3390 *
3391 * So, btrfs_start_dirty_block_groups is here to kick off block group
3392 * cache IO. There's a chance we'll have to redo some of it if the
3393 * block group changes again during the commit, but it greatly reduces
3394 * the commit latency by getting rid of the easy block groups while
3395 * we're still allowing others to join the commit.
3396 */
3397int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3313 struct btrfs_root *root) 3398 struct btrfs_root *root)
3314{ 3399{
3315 struct btrfs_block_group_cache *cache; 3400 struct btrfs_block_group_cache *cache;
3316 struct btrfs_transaction *cur_trans = trans->transaction; 3401 struct btrfs_transaction *cur_trans = trans->transaction;
3317 int ret = 0; 3402 int ret = 0;
3318 struct btrfs_path *path; 3403 int should_put;
3404 struct btrfs_path *path = NULL;
3405 LIST_HEAD(dirty);
3406 struct list_head *io = &cur_trans->io_bgs;
3407 int num_started = 0;
3408 int loops = 0;
3319 3409
3320 if (list_empty(&cur_trans->dirty_bgs)) 3410 spin_lock(&cur_trans->dirty_bgs_lock);
3411 if (!list_empty(&cur_trans->dirty_bgs)) {
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3413 }
3414 spin_unlock(&cur_trans->dirty_bgs_lock);
3415
3416again:
3417 if (list_empty(&dirty)) {
3418 btrfs_free_path(path);
3321 return 0; 3419 return 0;
3420 }
3421
3422 /*
3423 * make sure all the block groups on our dirty list actually
3424 * exist
3425 */
3426 btrfs_create_pending_block_groups(trans, root);
3427
3428 if (!path) {
3429 path = btrfs_alloc_path();
3430 if (!path)
3431 return -ENOMEM;
3432 }
3433
3434 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache,
3437 dirty_list);
3438
3439 /*
3440 * cache_write_mutex is here only to save us from balance
3441 * deleting this block group while we are writing out the
3442 * cache
3443 */
3444 mutex_lock(&trans->transaction->cache_write_mutex);
3445
3446 /*
3447 * this can happen if something re-dirties a block
3448 * group that is already under IO. Just wait for it to
3449 * finish and then do it all again
3450 */
3451 if (!list_empty(&cache->io_list)) {
3452 list_del_init(&cache->io_list);
3453 btrfs_wait_cache_io(root, trans, cache,
3454 &cache->io_ctl, path,
3455 cache->key.objectid);
3456 btrfs_put_block_group(cache);
3457 }
3458
3459
3460 /*
3461 * btrfs_wait_cache_io uses the cache->dirty_list to decide
3462 * if it should update the cache_state. Don't delete
3463 * until after we wait.
3464 *
3465 * Since we're not running in the commit critical section
3466 * we need the dirty_bgs_lock to protect from update_block_group
3467 */
3468 spin_lock(&cur_trans->dirty_bgs_lock);
3469 list_del_init(&cache->dirty_list);
3470 spin_unlock(&cur_trans->dirty_bgs_lock);
3471
3472 should_put = 1;
3473
3474 cache_save_setup(cache, trans, path);
3475
3476 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3477 cache->io_ctl.inode = NULL;
3478 ret = btrfs_write_out_cache(root, trans, cache, path);
3479 if (ret == 0 && cache->io_ctl.inode) {
3480 num_started++;
3481 should_put = 0;
3482
3483 /*
3484 * the cache_write_mutex is protecting
3485 * the io_list
3486 */
3487 list_add_tail(&cache->io_list, io);
3488 } else {
3489 /*
3490 * if we failed to write the cache, the
3491 * generation will be bad and life goes on
3492 */
3493 ret = 0;
3494 }
3495 }
3496 if (!ret)
3497 ret = write_one_cache_group(trans, root, path, cache);
3498 mutex_unlock(&trans->transaction->cache_write_mutex);
3499
3500 /* if its not on the io list, we need to put the block group */
3501 if (should_put)
3502 btrfs_put_block_group(cache);
3503
3504 if (ret)
3505 break;
3506 }
3507
3508 /*
3509 * go through delayed refs for all the stuff we've just kicked off
3510 * and then loop back (just once)
3511 */
3512 ret = btrfs_run_delayed_refs(trans, root, 0);
3513 if (!ret && loops == 0) {
3514 loops++;
3515 spin_lock(&cur_trans->dirty_bgs_lock);
3516 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3517 spin_unlock(&cur_trans->dirty_bgs_lock);
3518 goto again;
3519 }
3520
3521 btrfs_free_path(path);
3522 return ret;
3523}
3524
3525int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3526 struct btrfs_root *root)
3527{
3528 struct btrfs_block_group_cache *cache;
3529 struct btrfs_transaction *cur_trans = trans->transaction;
3530 int ret = 0;
3531 int should_put;
3532 struct btrfs_path *path;
3533 struct list_head *io = &cur_trans->io_bgs;
3534 int num_started = 0;
3322 3535
3323 path = btrfs_alloc_path(); 3536 path = btrfs_alloc_path();
3324 if (!path) 3537 if (!path)
@@ -3334,16 +3547,61 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3334 cache = list_first_entry(&cur_trans->dirty_bgs, 3547 cache = list_first_entry(&cur_trans->dirty_bgs,
3335 struct btrfs_block_group_cache, 3548 struct btrfs_block_group_cache,
3336 dirty_list); 3549 dirty_list);
3550
3551 /*
3552 * this can happen if cache_save_setup re-dirties a block
3553 * group that is already under IO. Just wait for it to
3554 * finish and then do it all again
3555 */
3556 if (!list_empty(&cache->io_list)) {
3557 list_del_init(&cache->io_list);
3558 btrfs_wait_cache_io(root, trans, cache,
3559 &cache->io_ctl, path,
3560 cache->key.objectid);
3561 btrfs_put_block_group(cache);
3562 }
3563
3564 /*
3565 * don't remove from the dirty list until after we've waited
3566 * on any pending IO
3567 */
3337 list_del_init(&cache->dirty_list); 3568 list_del_init(&cache->dirty_list);
3338 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 3569 should_put = 1;
3339 cache_save_setup(cache, trans, path); 3570
3571 cache_save_setup(cache, trans, path);
3572
3340 if (!ret) 3573 if (!ret)
3341 ret = btrfs_run_delayed_refs(trans, root, 3574 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3342 (unsigned long) -1); 3575
3343 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) 3576 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3344 btrfs_write_out_cache(root, trans, cache, path); 3577 cache->io_ctl.inode = NULL;
3578 ret = btrfs_write_out_cache(root, trans, cache, path);
3579 if (ret == 0 && cache->io_ctl.inode) {
3580 num_started++;
3581 should_put = 0;
3582 list_add_tail(&cache->io_list, io);
3583 } else {
3584 /*
3585 * if we failed to write the cache, the
3586 * generation will be bad and life goes on
3587 */
3588 ret = 0;
3589 }
3590 }
3345 if (!ret) 3591 if (!ret)
3346 ret = write_one_cache_group(trans, root, path, cache); 3592 ret = write_one_cache_group(trans, root, path, cache);
3593
3594 /* if its not on the io list, we need to put the block group */
3595 if (should_put)
3596 btrfs_put_block_group(cache);
3597 }
3598
3599 while (!list_empty(io)) {
3600 cache = list_first_entry(io, struct btrfs_block_group_cache,
3601 io_list);
3602 list_del_init(&cache->io_list);
3603 btrfs_wait_cache_io(root, trans, cache,
3604 &cache->io_ctl, path, cache->key.objectid);
3347 btrfs_put_block_group(cache); 3605 btrfs_put_block_group(cache);
3348 } 3606 }
3349 3607
@@ -3593,19 +3851,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3593 * This will check the space that the inode allocates from to make sure we have 3851 * This will check the space that the inode allocates from to make sure we have
3594 * enough space for bytes. 3852 * enough space for bytes.
3595 */ 3853 */
3596int btrfs_check_data_free_space(struct inode *inode, u64 bytes) 3854int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
3597{ 3855{
3598 struct btrfs_space_info *data_sinfo; 3856 struct btrfs_space_info *data_sinfo;
3599 struct btrfs_root *root = BTRFS_I(inode)->root; 3857 struct btrfs_root *root = BTRFS_I(inode)->root;
3600 struct btrfs_fs_info *fs_info = root->fs_info; 3858 struct btrfs_fs_info *fs_info = root->fs_info;
3601 u64 used; 3859 u64 used;
3602 int ret = 0, committed = 0, alloc_chunk = 1; 3860 int ret = 0;
3861 int need_commit = 2;
3862 int have_pinned_space;
3603 3863
3604 /* make sure bytes are sectorsize aligned */ 3864 /* make sure bytes are sectorsize aligned */
3605 bytes = ALIGN(bytes, root->sectorsize); 3865 bytes = ALIGN(bytes, root->sectorsize);
3606 3866
3607 if (btrfs_is_free_space_inode(inode)) { 3867 if (btrfs_is_free_space_inode(inode)) {
3608 committed = 1; 3868 need_commit = 0;
3609 ASSERT(current->journal_info); 3869 ASSERT(current->journal_info);
3610 } 3870 }
3611 3871
@@ -3627,7 +3887,7 @@ again:
3627 * if we don't have enough free bytes in this space then we need 3887 * if we don't have enough free bytes in this space then we need
3628 * to alloc a new chunk. 3888 * to alloc a new chunk.
3629 */ 3889 */
3630 if (!data_sinfo->full && alloc_chunk) { 3890 if (!data_sinfo->full) {
3631 u64 alloc_target; 3891 u64 alloc_target;
3632 3892
3633 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 3893 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
@@ -3655,8 +3915,10 @@ alloc:
3655 if (ret < 0) { 3915 if (ret < 0) {
3656 if (ret != -ENOSPC) 3916 if (ret != -ENOSPC)
3657 return ret; 3917 return ret;
3658 else 3918 else {
3919 have_pinned_space = 1;
3659 goto commit_trans; 3920 goto commit_trans;
3921 }
3660 } 3922 }
3661 3923
3662 if (!data_sinfo) 3924 if (!data_sinfo)
@@ -3667,26 +3929,39 @@ alloc:
3667 3929
3668 /* 3930 /*
3669 * If we don't have enough pinned space to deal with this 3931 * If we don't have enough pinned space to deal with this
3670 * allocation don't bother committing the transaction. 3932 * allocation, and no removed chunk in current transaction,
3933 * don't bother committing the transaction.
3671 */ 3934 */
3672 if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, 3935 have_pinned_space = percpu_counter_compare(
3673 bytes) < 0) 3936 &data_sinfo->total_bytes_pinned,
3674 committed = 1; 3937 used + bytes - data_sinfo->total_bytes);
3675 spin_unlock(&data_sinfo->lock); 3938 spin_unlock(&data_sinfo->lock);
3676 3939
3677 /* commit the current transaction and try again */ 3940 /* commit the current transaction and try again */
3678commit_trans: 3941commit_trans:
3679 if (!committed && 3942 if (need_commit &&
3680 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3943 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3681 committed = 1; 3944 need_commit--;
3682 3945
3683 trans = btrfs_join_transaction(root); 3946 trans = btrfs_join_transaction(root);
3684 if (IS_ERR(trans)) 3947 if (IS_ERR(trans))
3685 return PTR_ERR(trans); 3948 return PTR_ERR(trans);
3686 ret = btrfs_commit_transaction(trans, root); 3949 if (have_pinned_space >= 0 ||
3687 if (ret) 3950 trans->transaction->have_free_bgs ||
3688 return ret; 3951 need_commit > 0) {
3689 goto again; 3952 ret = btrfs_commit_transaction(trans, root);
3953 if (ret)
3954 return ret;
3955 /*
3956 * make sure that all running delayed iput are
3957 * done
3958 */
3959 down_write(&root->fs_info->delayed_iput_sem);
3960 up_write(&root->fs_info->delayed_iput_sem);
3961 goto again;
3962 } else {
3963 btrfs_end_transaction(trans, root);
3964 }
3690 } 3965 }
3691 3966
3692 trace_btrfs_space_reservation(root->fs_info, 3967 trace_btrfs_space_reservation(root->fs_info,
@@ -3694,12 +3969,16 @@ commit_trans:
3694 data_sinfo->flags, bytes, 1); 3969 data_sinfo->flags, bytes, 1);
3695 return -ENOSPC; 3970 return -ENOSPC;
3696 } 3971 }
3972 ret = btrfs_qgroup_reserve(root, write_bytes);
3973 if (ret)
3974 goto out;
3697 data_sinfo->bytes_may_use += bytes; 3975 data_sinfo->bytes_may_use += bytes;
3698 trace_btrfs_space_reservation(root->fs_info, "space_info", 3976 trace_btrfs_space_reservation(root->fs_info, "space_info",
3699 data_sinfo->flags, bytes, 1); 3977 data_sinfo->flags, bytes, 1);
3978out:
3700 spin_unlock(&data_sinfo->lock); 3979 spin_unlock(&data_sinfo->lock);
3701 3980
3702 return 0; 3981 return ret;
3703} 3982}
3704 3983
3705/* 3984/*
@@ -4256,8 +4535,13 @@ out:
4256static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, 4535static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4257 struct btrfs_fs_info *fs_info, u64 used) 4536 struct btrfs_fs_info *fs_info, u64 used)
4258{ 4537{
4259 return (used >= div_factor_fine(space_info->total_bytes, 98) && 4538 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4260 !btrfs_fs_closing(fs_info) && 4539
4540 /* If we're just plain full then async reclaim just slows us down. */
4541 if (space_info->bytes_used >= thresh)
4542 return 0;
4543
4544 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4261 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); 4545 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4262} 4546}
4263 4547
@@ -4312,10 +4596,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4312 if (!btrfs_need_do_async_reclaim(space_info, fs_info, 4596 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4313 flush_state)) 4597 flush_state))
4314 return; 4598 return;
4315 } while (flush_state <= COMMIT_TRANS); 4599 } while (flush_state < COMMIT_TRANS);
4316
4317 if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state))
4318 queue_work(system_unbound_wq, work);
4319} 4600}
4320 4601
4321void btrfs_init_async_reclaim_work(struct work_struct *work) 4602void btrfs_init_async_reclaim_work(struct work_struct *work)
@@ -4658,6 +4939,11 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
4658 kfree(rsv); 4939 kfree(rsv);
4659} 4940}
4660 4941
4942void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
4943{
4944 kfree(rsv);
4945}
4946
4661int btrfs_block_rsv_add(struct btrfs_root *root, 4947int btrfs_block_rsv_add(struct btrfs_root *root,
4662 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 4948 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
4663 enum btrfs_reserve_flush_enum flush) 4949 enum btrfs_reserve_flush_enum flush)
@@ -4770,10 +5056,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
4770 5056
4771 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * 5057 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
4772 csum_size * 2; 5058 csum_size * 2;
4773 num_bytes += div64_u64(data_used + meta_used, 50); 5059 num_bytes += div_u64(data_used + meta_used, 50);
4774 5060
4775 if (num_bytes * 3 > meta_used) 5061 if (num_bytes * 3 > meta_used)
4776 num_bytes = div64_u64(meta_used, 3); 5062 num_bytes = div_u64(meta_used, 3);
4777 5063
4778 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); 5064 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
4779} 5065}
@@ -4956,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
4956 u64 qgroup_reserved) 5242 u64 qgroup_reserved)
4957{ 5243{
4958 btrfs_block_rsv_release(root, rsv, (u64)-1); 5244 btrfs_block_rsv_release(root, rsv, (u64)-1);
4959 if (qgroup_reserved)
4960 btrfs_qgroup_free(root, qgroup_reserved);
4961} 5245}
4962 5246
4963/** 5247/**
@@ -5024,30 +5308,18 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5024 int reserve) 5308 int reserve)
5025{ 5309{
5026 struct btrfs_root *root = BTRFS_I(inode)->root; 5310 struct btrfs_root *root = BTRFS_I(inode)->root;
5027 u64 csum_size; 5311 u64 old_csums, num_csums;
5028 int num_csums_per_leaf;
5029 int num_csums;
5030 int old_csums;
5031 5312
5032 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && 5313 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5033 BTRFS_I(inode)->csum_bytes == 0) 5314 BTRFS_I(inode)->csum_bytes == 0)
5034 return 0; 5315 return 0;
5035 5316
5036 old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); 5317 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5037 if (reserve) 5318 if (reserve)
5038 BTRFS_I(inode)->csum_bytes += num_bytes; 5319 BTRFS_I(inode)->csum_bytes += num_bytes;
5039 else 5320 else
5040 BTRFS_I(inode)->csum_bytes -= num_bytes; 5321 BTRFS_I(inode)->csum_bytes -= num_bytes;
5041 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); 5322 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5042 num_csums_per_leaf = (int)div64_u64(csum_size,
5043 sizeof(struct btrfs_csum_item) +
5044 sizeof(struct btrfs_disk_key));
5045 num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
5046 num_csums = num_csums + num_csums_per_leaf - 1;
5047 num_csums = num_csums / num_csums_per_leaf;
5048
5049 old_csums = old_csums + num_csums_per_leaf - 1;
5050 old_csums = old_csums / num_csums_per_leaf;
5051 5323
5052 /* No change, no need to reserve more */ 5324 /* No change, no need to reserve more */
5053 if (old_csums == num_csums) 5325 if (old_csums == num_csums)
@@ -5094,7 +5366,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5094 num_bytes = ALIGN(num_bytes, root->sectorsize); 5366 num_bytes = ALIGN(num_bytes, root->sectorsize);
5095 5367
5096 spin_lock(&BTRFS_I(inode)->lock); 5368 spin_lock(&BTRFS_I(inode)->lock);
5097 BTRFS_I(inode)->outstanding_extents++; 5369 nr_extents = (unsigned)div64_u64(num_bytes +
5370 BTRFS_MAX_EXTENT_SIZE - 1,
5371 BTRFS_MAX_EXTENT_SIZE);
5372 BTRFS_I(inode)->outstanding_extents += nr_extents;
5373 nr_extents = 0;
5098 5374
5099 if (BTRFS_I(inode)->outstanding_extents > 5375 if (BTRFS_I(inode)->outstanding_extents >
5100 BTRFS_I(inode)->reserved_extents) 5376 BTRFS_I(inode)->reserved_extents)
@@ -5117,8 +5393,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5117 spin_unlock(&BTRFS_I(inode)->lock); 5393 spin_unlock(&BTRFS_I(inode)->lock);
5118 5394
5119 if (root->fs_info->quota_enabled) { 5395 if (root->fs_info->quota_enabled) {
5120 ret = btrfs_qgroup_reserve(root, num_bytes + 5396 ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
5121 nr_extents * root->nodesize);
5122 if (ret) 5397 if (ret)
5123 goto out_fail; 5398 goto out_fail;
5124 } 5399 }
@@ -5126,8 +5401,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5126 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 5401 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5127 if (unlikely(ret)) { 5402 if (unlikely(ret)) {
5128 if (root->fs_info->quota_enabled) 5403 if (root->fs_info->quota_enabled)
5129 btrfs_qgroup_free(root, num_bytes + 5404 btrfs_qgroup_free(root, nr_extents * root->nodesize);
5130 nr_extents * root->nodesize);
5131 goto out_fail; 5405 goto out_fail;
5132 } 5406 }
5133 5407
@@ -5239,12 +5513,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5239 if (dropped > 0) 5513 if (dropped > 0)
5240 to_free += btrfs_calc_trans_metadata_size(root, dropped); 5514 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5241 5515
5516 if (btrfs_test_is_dummy_root(root))
5517 return;
5518
5242 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5519 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5243 btrfs_ino(inode), to_free, 0); 5520 btrfs_ino(inode), to_free, 0);
5244 if (root->fs_info->quota_enabled) {
5245 btrfs_qgroup_free(root, num_bytes +
5246 dropped * root->nodesize);
5247 }
5248 5521
5249 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 5522 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5250 to_free); 5523 to_free);
@@ -5269,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5269{ 5542{
5270 int ret; 5543 int ret;
5271 5544
5272 ret = btrfs_check_data_free_space(inode, num_bytes); 5545 ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
5273 if (ret) 5546 if (ret)
5274 return ret; 5547 return ret;
5275 5548
@@ -5341,14 +5614,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5341 if (!alloc && cache->cached == BTRFS_CACHE_NO) 5614 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5342 cache_block_group(cache, 1); 5615 cache_block_group(cache, 1);
5343 5616
5344 spin_lock(&trans->transaction->dirty_bgs_lock);
5345 if (list_empty(&cache->dirty_list)) {
5346 list_add_tail(&cache->dirty_list,
5347 &trans->transaction->dirty_bgs);
5348 btrfs_get_block_group(cache);
5349 }
5350 spin_unlock(&trans->transaction->dirty_bgs_lock);
5351
5352 byte_in_group = bytenr - cache->key.objectid; 5617 byte_in_group = bytenr - cache->key.objectid;
5353 WARN_ON(byte_in_group > cache->key.offset); 5618 WARN_ON(byte_in_group > cache->key.offset);
5354 5619
@@ -5397,6 +5662,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5397 spin_unlock(&info->unused_bgs_lock); 5662 spin_unlock(&info->unused_bgs_lock);
5398 } 5663 }
5399 } 5664 }
5665
5666 spin_lock(&trans->transaction->dirty_bgs_lock);
5667 if (list_empty(&cache->dirty_list)) {
5668 list_add_tail(&cache->dirty_list,
5669 &trans->transaction->dirty_bgs);
5670 trans->transaction->num_dirty_bgs++;
5671 btrfs_get_block_group(cache);
5672 }
5673 spin_unlock(&trans->transaction->dirty_bgs_lock);
5674
5400 btrfs_put_block_group(cache); 5675 btrfs_put_block_group(cache);
5401 total -= num_bytes; 5676 total -= num_bytes;
5402 bytenr += num_bytes; 5677 bytenr += num_bytes;
@@ -6907,15 +7182,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6907 return -ENOSPC; 7182 return -ENOSPC;
6908 } 7183 }
6909 7184
6910 if (btrfs_test_opt(root, DISCARD))
6911 ret = btrfs_discard_extent(root, start, len, NULL);
6912
6913 if (pin) 7185 if (pin)
6914 pin_down_extent(root, cache, start, len, 1); 7186 pin_down_extent(root, cache, start, len, 1);
6915 else { 7187 else {
7188 if (btrfs_test_opt(root, DISCARD))
7189 ret = btrfs_discard_extent(root, start, len, NULL);
6916 btrfs_add_free_space(cache, start, len); 7190 btrfs_add_free_space(cache, start, len);
6917 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 7191 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
6918 } 7192 }
7193
6919 btrfs_put_block_group(cache); 7194 btrfs_put_block_group(cache);
6920 7195
6921 trace_btrfs_reserved_extent_free(root, start, len); 7196 trace_btrfs_reserved_extent_free(root, start, len);
@@ -7046,9 +7321,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7046 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 7321 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7047 ins, size); 7322 ins, size);
7048 if (ret) { 7323 if (ret) {
7324 btrfs_free_path(path);
7049 btrfs_free_and_pin_reserved_extent(root, ins->objectid, 7325 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7050 root->nodesize); 7326 root->nodesize);
7051 btrfs_free_path(path);
7052 return ret; 7327 return ret;
7053 } 7328 }
7054 7329
@@ -7168,7 +7443,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7168 btrfs_set_header_generation(buf, trans->transid); 7443 btrfs_set_header_generation(buf, trans->transid);
7169 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 7444 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
7170 btrfs_tree_lock(buf); 7445 btrfs_tree_lock(buf);
7171 clean_tree_block(trans, root, buf); 7446 clean_tree_block(trans, root->fs_info, buf);
7172 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); 7447 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
7173 7448
7174 btrfs_set_lock_blocking(buf); 7449 btrfs_set_lock_blocking(buf);
@@ -7766,7 +8041,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7766 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 8041 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
7767 blocksize = root->nodesize; 8042 blocksize = root->nodesize;
7768 8043
7769 next = btrfs_find_tree_block(root, bytenr); 8044 next = btrfs_find_tree_block(root->fs_info, bytenr);
7770 if (!next) { 8045 if (!next) {
7771 next = btrfs_find_create_tree_block(root, bytenr); 8046 next = btrfs_find_create_tree_block(root, bytenr);
7772 if (!next) 8047 if (!next)
@@ -7967,7 +8242,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7967 btrfs_set_lock_blocking(eb); 8242 btrfs_set_lock_blocking(eb);
7968 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 8243 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7969 } 8244 }
7970 clean_tree_block(trans, root, eb); 8245 clean_tree_block(trans, root->fs_info, eb);
7971 } 8246 }
7972 8247
7973 if (eb == root->node) { 8248 if (eb == root->node) {
@@ -8484,10 +8759,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8484 8759
8485 BUG_ON(cache->ro); 8760 BUG_ON(cache->ro);
8486 8761
8762again:
8487 trans = btrfs_join_transaction(root); 8763 trans = btrfs_join_transaction(root);
8488 if (IS_ERR(trans)) 8764 if (IS_ERR(trans))
8489 return PTR_ERR(trans); 8765 return PTR_ERR(trans);
8490 8766
8767 /*
8768 * we're not allowed to set block groups readonly after the dirty
8769 * block groups cache has started writing. If it already started,
8770 * back off and let this transaction commit
8771 */
8772 mutex_lock(&root->fs_info->ro_block_group_mutex);
8773 if (trans->transaction->dirty_bg_run) {
8774 u64 transid = trans->transid;
8775
8776 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8777 btrfs_end_transaction(trans, root);
8778
8779 ret = btrfs_wait_for_commit(root, transid);
8780 if (ret)
8781 return ret;
8782 goto again;
8783 }
8784
8785
8491 ret = set_block_group_ro(cache, 0); 8786 ret = set_block_group_ro(cache, 0);
8492 if (!ret) 8787 if (!ret)
8493 goto out; 8788 goto out;
@@ -8502,6 +8797,7 @@ out:
8502 alloc_flags = update_block_group_flags(root, cache->flags); 8797 alloc_flags = update_block_group_flags(root, cache->flags);
8503 check_system_chunk(trans, root, alloc_flags); 8798 check_system_chunk(trans, root, alloc_flags);
8504 } 8799 }
8800 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8505 8801
8506 btrfs_end_transaction(trans, root); 8802 btrfs_end_transaction(trans, root);
8507 return ret; 8803 return ret;
@@ -8671,7 +8967,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8671 min_free <<= 1; 8967 min_free <<= 1;
8672 } else if (index == BTRFS_RAID_RAID0) { 8968 } else if (index == BTRFS_RAID_RAID0) {
8673 dev_min = fs_devices->rw_devices; 8969 dev_min = fs_devices->rw_devices;
8674 do_div(min_free, dev_min); 8970 min_free = div64_u64(min_free, dev_min);
8675 } 8971 }
8676 8972
8677 /* We need to do this so that we can look at pending chunks */ 8973 /* We need to do this so that we can look at pending chunks */
@@ -8943,6 +9239,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8943 INIT_LIST_HEAD(&cache->bg_list); 9239 INIT_LIST_HEAD(&cache->bg_list);
8944 INIT_LIST_HEAD(&cache->ro_list); 9240 INIT_LIST_HEAD(&cache->ro_list);
8945 INIT_LIST_HEAD(&cache->dirty_list); 9241 INIT_LIST_HEAD(&cache->dirty_list);
9242 INIT_LIST_HEAD(&cache->io_list);
8946 btrfs_init_free_space_ctl(cache); 9243 btrfs_init_free_space_ctl(cache);
8947 atomic_set(&cache->trimming, 0); 9244 atomic_set(&cache->trimming, 0);
8948 9245
@@ -9306,7 +9603,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9306 goto out; 9603 goto out;
9307 } 9604 }
9308 9605
9606 /*
9607 * get the inode first so any iput calls done for the io_list
9608 * aren't the final iput (no unlinks allowed now)
9609 */
9309 inode = lookup_free_space_inode(tree_root, block_group, path); 9610 inode = lookup_free_space_inode(tree_root, block_group, path);
9611
9612 mutex_lock(&trans->transaction->cache_write_mutex);
9613 /*
9614 * make sure our free spache cache IO is done before remove the
9615 * free space inode
9616 */
9617 spin_lock(&trans->transaction->dirty_bgs_lock);
9618 if (!list_empty(&block_group->io_list)) {
9619 list_del_init(&block_group->io_list);
9620
9621 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
9622
9623 spin_unlock(&trans->transaction->dirty_bgs_lock);
9624 btrfs_wait_cache_io(root, trans, block_group,
9625 &block_group->io_ctl, path,
9626 block_group->key.objectid);
9627 btrfs_put_block_group(block_group);
9628 spin_lock(&trans->transaction->dirty_bgs_lock);
9629 }
9630
9631 if (!list_empty(&block_group->dirty_list)) {
9632 list_del_init(&block_group->dirty_list);
9633 btrfs_put_block_group(block_group);
9634 }
9635 spin_unlock(&trans->transaction->dirty_bgs_lock);
9636 mutex_unlock(&trans->transaction->cache_write_mutex);
9637
9310 if (!IS_ERR(inode)) { 9638 if (!IS_ERR(inode)) {
9311 ret = btrfs_orphan_add(trans, inode); 9639 ret = btrfs_orphan_add(trans, inode);
9312 if (ret) { 9640 if (ret) {
@@ -9399,18 +9727,29 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9399 9727
9400 spin_lock(&trans->transaction->dirty_bgs_lock); 9728 spin_lock(&trans->transaction->dirty_bgs_lock);
9401 if (!list_empty(&block_group->dirty_list)) { 9729 if (!list_empty(&block_group->dirty_list)) {
9402 list_del_init(&block_group->dirty_list); 9730 WARN_ON(1);
9403 btrfs_put_block_group(block_group); 9731 }
9732 if (!list_empty(&block_group->io_list)) {
9733 WARN_ON(1);
9404 } 9734 }
9405 spin_unlock(&trans->transaction->dirty_bgs_lock); 9735 spin_unlock(&trans->transaction->dirty_bgs_lock);
9406
9407 btrfs_remove_free_space_cache(block_group); 9736 btrfs_remove_free_space_cache(block_group);
9408 9737
9409 spin_lock(&block_group->space_info->lock); 9738 spin_lock(&block_group->space_info->lock);
9410 list_del_init(&block_group->ro_list); 9739 list_del_init(&block_group->ro_list);
9740
9741 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
9742 WARN_ON(block_group->space_info->total_bytes
9743 < block_group->key.offset);
9744 WARN_ON(block_group->space_info->bytes_readonly
9745 < block_group->key.offset);
9746 WARN_ON(block_group->space_info->disk_total
9747 < block_group->key.offset * factor);
9748 }
9411 block_group->space_info->total_bytes -= block_group->key.offset; 9749 block_group->space_info->total_bytes -= block_group->key.offset;
9412 block_group->space_info->bytes_readonly -= block_group->key.offset; 9750 block_group->space_info->bytes_readonly -= block_group->key.offset;
9413 block_group->space_info->disk_total -= block_group->key.offset * factor; 9751 block_group->space_info->disk_total -= block_group->key.offset * factor;
9752
9414 spin_unlock(&block_group->space_info->lock); 9753 spin_unlock(&block_group->space_info->lock);
9415 9754
9416 memcpy(&key, &block_group->key, sizeof(key)); 9755 memcpy(&key, &block_group->key, sizeof(key));
@@ -9598,8 +9937,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9598 mutex_unlock(&fs_info->unused_bg_unpin_mutex); 9937 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9599 9938
9600 /* Reset pinned so btrfs_put_block_group doesn't complain */ 9939 /* Reset pinned so btrfs_put_block_group doesn't complain */
9940 spin_lock(&space_info->lock);
9941 spin_lock(&block_group->lock);
9942
9943 space_info->bytes_pinned -= block_group->pinned;
9944 space_info->bytes_readonly += block_group->pinned;
9945 percpu_counter_add(&space_info->total_bytes_pinned,
9946 -block_group->pinned);
9601 block_group->pinned = 0; 9947 block_group->pinned = 0;
9602 9948
9949 spin_unlock(&block_group->lock);
9950 spin_unlock(&space_info->lock);
9951
9603 /* 9952 /*
9604 * Btrfs_remove_chunk will abort the transaction if things go 9953 * Btrfs_remove_chunk will abort the transaction if things go
9605 * horribly wrong. 9954 * horribly wrong.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..782f3bc4651d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4514,8 +4514,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4514 } 4514 }
4515 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 4515 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
4516 em_len, flags); 4516 em_len, flags);
4517 if (ret) 4517 if (ret) {
4518 if (ret == 1)
4519 ret = 0;
4518 goto out_free; 4520 goto out_free;
4521 }
4519 } 4522 }
4520out_free: 4523out_free:
4521 free_extent_map(em); 4524 free_extent_map(em);
@@ -4968,6 +4971,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
4968 4971
4969 /* Should be safe to release our pages at this point */ 4972 /* Should be safe to release our pages at this point */
4970 btrfs_release_extent_buffer_page(eb); 4973 btrfs_release_extent_buffer_page(eb);
4974#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4975 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4976 __free_extent_buffer(eb);
4977 return 1;
4978 }
4979#endif
4971 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4980 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4972 return 1; 4981 return 1;
4973 } 4982 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 695b0ccfb755..c668f36898d3 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -97,7 +97,7 @@ struct extent_io_tree {
97 u64 dirty_bytes; 97 u64 dirty_bytes;
98 int track_uptodate; 98 int track_uptodate;
99 spinlock_t lock; 99 spinlock_t lock;
100 struct extent_io_ops *ops; 100 const struct extent_io_ops *ops;
101}; 101};
102 102
103struct extent_state { 103struct extent_state {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 84a2d1868271..58ece6558430 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
185 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; 185 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) { 186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, 188 btrfs_bio->csum_allocated = kmalloc_array(nblocks,
189 GFP_NOFS); 189 csum_size, GFP_NOFS);
190 if (!btrfs_bio->csum_allocated) { 190 if (!btrfs_bio->csum_allocated) {
191 btrfs_free_path(path); 191 btrfs_free_path(path);
192 return -ENOMEM; 192 return -ENOMEM;
@@ -553,7 +553,7 @@ static noinline void truncate_one_csum(struct btrfs_root *root,
553 btrfs_truncate_item(root, path, new_size, 0); 553 btrfs_truncate_item(root, path, new_size, 0);
554 554
555 key->offset = end_byte; 555 key->offset = end_byte;
556 btrfs_set_item_key_safe(root, path, key); 556 btrfs_set_item_key_safe(root->fs_info, path, key);
557 } else { 557 } else {
558 BUG(); 558 BUG();
559 } 559 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbbac900d..467620a3b1f9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/aio.h>
28#include <linux/falloc.h> 27#include <linux/falloc.h>
29#include <linux/swap.h> 28#include <linux/swap.h>
30#include <linux/writeback.h> 29#include <linux/writeback.h>
@@ -32,6 +31,7 @@
32#include <linux/compat.h> 31#include <linux/compat.h>
33#include <linux/slab.h> 32#include <linux/slab.h>
34#include <linux/btrfs.h> 33#include <linux/btrfs.h>
34#include <linux/uio.h>
35#include "ctree.h" 35#include "ctree.h"
36#include "disk-io.h" 36#include "disk-io.h"
37#include "transaction.h" 37#include "transaction.h"
@@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
273 defrag = rb_entry(node, struct inode_defrag, rb_node); 273 defrag = rb_entry(node, struct inode_defrag, rb_node);
274 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 274 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
275 275
276 if (need_resched()) { 276 cond_resched_lock(&fs_info->defrag_inodes_lock);
277 spin_unlock(&fs_info->defrag_inodes_lock);
278 cond_resched();
279 spin_lock(&fs_info->defrag_inodes_lock);
280 }
281 277
282 node = rb_first(&fs_info->defrag_inodes); 278 node = rb_first(&fs_info->defrag_inodes);
283 } 279 }
@@ -868,7 +864,7 @@ next_slot:
868 864
869 memcpy(&new_key, &key, sizeof(new_key)); 865 memcpy(&new_key, &key, sizeof(new_key));
870 new_key.offset = end; 866 new_key.offset = end;
871 btrfs_set_item_key_safe(root, path, &new_key); 867 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
872 868
873 extent_offset += end - key.offset; 869 extent_offset += end - key.offset;
874 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 870 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1126,7 +1122,7 @@ again:
1126 ino, bytenr, orig_offset, 1122 ino, bytenr, orig_offset,
1127 &other_start, &other_end)) { 1123 &other_start, &other_end)) {
1128 new_key.offset = end; 1124 new_key.offset = end;
1129 btrfs_set_item_key_safe(root, path, &new_key); 1125 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1130 fi = btrfs_item_ptr(leaf, path->slots[0], 1126 fi = btrfs_item_ptr(leaf, path->slots[0],
1131 struct btrfs_file_extent_item); 1127 struct btrfs_file_extent_item);
1132 btrfs_set_file_extent_generation(leaf, fi, 1128 btrfs_set_file_extent_generation(leaf, fi,
@@ -1160,7 +1156,7 @@ again:
1160 trans->transid); 1156 trans->transid);
1161 path->slots[0]++; 1157 path->slots[0]++;
1162 new_key.offset = start; 1158 new_key.offset = start;
1163 btrfs_set_item_key_safe(root, path, &new_key); 1159 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1164 1160
1165 fi = btrfs_item_ptr(leaf, path->slots[0], 1161 fi = btrfs_item_ptr(leaf, path->slots[0],
1166 struct btrfs_file_extent_item); 1162 struct btrfs_file_extent_item);
@@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1485 PAGE_CACHE_SIZE / (sizeof(struct page *))); 1481 PAGE_CACHE_SIZE / (sizeof(struct page *)));
1486 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); 1482 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1487 nrptrs = max(nrptrs, 8); 1483 nrptrs = max(nrptrs, 8);
1488 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 1484 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1489 if (!pages) 1485 if (!pages)
1490 return -ENOMEM; 1486 return -ENOMEM;
1491 1487
@@ -1514,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1514 } 1510 }
1515 1511
1516 reserve_bytes = num_pages << PAGE_CACHE_SHIFT; 1512 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1517 ret = btrfs_check_data_free_space(inode, reserve_bytes); 1513 ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
1518 if (ret == -ENOSPC && 1514 if (ret == -ENOSPC &&
1519 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | 1515 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1520 BTRFS_INODE_PREALLOC))) { 1516 BTRFS_INODE_PREALLOC))) {
@@ -1635,8 +1631,8 @@ again:
1635 btrfs_end_write_no_snapshoting(root); 1631 btrfs_end_write_no_snapshoting(root);
1636 1632
1637 if (only_release_metadata && copied > 0) { 1633 if (only_release_metadata && copied > 0) {
1638 u64 lockstart = round_down(pos, root->sectorsize); 1634 lockstart = round_down(pos, root->sectorsize);
1639 u64 lockend = lockstart + 1635 lockend = lockstart +
1640 (dirty_pages << PAGE_CACHE_SHIFT) - 1; 1636 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1641 1637
1642 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 1638 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -1739,27 +1735,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1739 u64 start_pos; 1735 u64 start_pos;
1740 u64 end_pos; 1736 u64 end_pos;
1741 ssize_t num_written = 0; 1737 ssize_t num_written = 0;
1742 ssize_t err = 0;
1743 size_t count = iov_iter_count(from);
1744 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1738 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1745 loff_t pos = iocb->ki_pos; 1739 ssize_t err;
1740 loff_t pos;
1741 size_t count;
1746 1742
1747 mutex_lock(&inode->i_mutex); 1743 mutex_lock(&inode->i_mutex);
1748 1744 err = generic_write_checks(iocb, from);
1749 current->backing_dev_info = inode_to_bdi(inode); 1745 if (err <= 0) {
1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1751 if (err) {
1752 mutex_unlock(&inode->i_mutex); 1746 mutex_unlock(&inode->i_mutex);
1753 goto out; 1747 return err;
1754 }
1755
1756 if (count == 0) {
1757 mutex_unlock(&inode->i_mutex);
1758 goto out;
1759 } 1748 }
1760 1749
1761 iov_iter_truncate(from, count); 1750 current->backing_dev_info = inode_to_bdi(inode);
1762
1763 err = file_remove_suid(file); 1751 err = file_remove_suid(file);
1764 if (err) { 1752 if (err) {
1765 mutex_unlock(&inode->i_mutex); 1753 mutex_unlock(&inode->i_mutex);
@@ -1786,6 +1774,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1786 */ 1774 */
1787 update_time_for_write(inode); 1775 update_time_for_write(inode);
1788 1776
1777 pos = iocb->ki_pos;
1778 count = iov_iter_count(from);
1789 start_pos = round_down(pos, root->sectorsize); 1779 start_pos = round_down(pos, root->sectorsize);
1790 if (start_pos > i_size_read(inode)) { 1780 if (start_pos > i_size_read(inode)) {
1791 /* Expand hole size to cover write data, preventing empty gap */ 1781 /* Expand hole size to cover write data, preventing empty gap */
@@ -1800,7 +1790,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1800 if (sync) 1790 if (sync)
1801 atomic_inc(&BTRFS_I(inode)->sync_writers); 1791 atomic_inc(&BTRFS_I(inode)->sync_writers);
1802 1792
1803 if (file->f_flags & O_DIRECT) { 1793 if (iocb->ki_flags & IOCB_DIRECT) {
1804 num_written = __btrfs_direct_write(iocb, from, pos); 1794 num_written = __btrfs_direct_write(iocb, from, pos);
1805 } else { 1795 } else {
1806 num_written = __btrfs_buffered_write(file, from, pos); 1796 num_written = __btrfs_buffered_write(file, from, pos);
@@ -1811,23 +1801,13 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1811 mutex_unlock(&inode->i_mutex); 1801 mutex_unlock(&inode->i_mutex);
1812 1802
1813 /* 1803 /*
1814 * we want to make sure fsync finds this change
1815 * but we haven't joined a transaction running right now.
1816 *
1817 * Later on, someone is sure to update the inode and get the
1818 * real transid recorded.
1819 *
1820 * We set last_trans now to the fs_info generation + 1,
1821 * this will either be one more than the running transaction
1822 * or the generation used for the next transaction if there isn't
1823 * one running right now.
1824 *
1825 * We also have to set last_sub_trans to the current log transid, 1804 * We also have to set last_sub_trans to the current log transid,
1826 * otherwise subsequent syncs to a file that's been synced in this 1805 * otherwise subsequent syncs to a file that's been synced in this
1827 * transaction will appear to have already occured. 1806 * transaction will appear to have already occured.
1828 */ 1807 */
1829 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 1808 spin_lock(&BTRFS_I(inode)->lock);
1830 BTRFS_I(inode)->last_sub_trans = root->log_transid; 1809 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1810 spin_unlock(&BTRFS_I(inode)->lock);
1831 if (num_written > 0) { 1811 if (num_written > 0) {
1832 err = generic_write_sync(file, pos, num_written); 1812 err = generic_write_sync(file, pos, num_written);
1833 if (err < 0) 1813 if (err < 0)
@@ -1959,25 +1939,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1959 atomic_inc(&root->log_batch); 1939 atomic_inc(&root->log_batch);
1960 1940
1961 /* 1941 /*
1962 * check the transaction that last modified this inode 1942 * If the last transaction that changed this file was before the current
1963 * and see if its already been committed 1943 * transaction and we have the full sync flag set in our inode, we can
1964 */ 1944 * bail out now without any syncing.
1965 if (!BTRFS_I(inode)->last_trans) { 1945 *
1966 mutex_unlock(&inode->i_mutex); 1946 * Note that we can't bail out if the full sync flag isn't set. This is
1967 goto out; 1947 * because when the full sync flag is set we start all ordered extents
1968 } 1948 * and wait for them to fully complete - when they complete they update
1969 1949 * the inode's last_trans field through:
1970 /* 1950 *
1971 * if the last transaction that changed this file was before 1951 * btrfs_finish_ordered_io() ->
1972 * the current transaction, we can bail out now without any 1952 * btrfs_update_inode_fallback() ->
1973 * syncing 1953 * btrfs_update_inode() ->
1954 * btrfs_set_inode_last_trans()
1955 *
1956 * So we are sure that last_trans is up to date and can do this check to
1957 * bail out safely. For the fast path, when the full sync flag is not
1958 * set in our inode, we can not do it because we start only our ordered
1959 * extents and don't wait for them to complete (that is when
1960 * btrfs_finish_ordered_io runs), so here at this point their last_trans
1961 * value might be less than or equals to fs_info->last_trans_committed,
1962 * and setting a speculative last_trans for an inode when a buffered
1963 * write is made (such as fs_info->generation + 1 for example) would not
1964 * be reliable since after setting the value and before fsync is called
1965 * any number of transactions can start and commit (transaction kthread
1966 * commits the current transaction periodically), and a transaction
1967 * commit does not start nor waits for ordered extents to complete.
1974 */ 1968 */
1975 smp_mb(); 1969 smp_mb();
1976 if (btrfs_inode_in_log(inode, root->fs_info->generation) || 1970 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1977 BTRFS_I(inode)->last_trans <= 1971 (full_sync && BTRFS_I(inode)->last_trans <=
1978 root->fs_info->last_trans_committed) { 1972 root->fs_info->last_trans_committed)) {
1979 BTRFS_I(inode)->last_trans = 0;
1980
1981 /* 1973 /*
1982 * We'v had everything committed since the last time we were 1974 * We'v had everything committed since the last time we were
1983 * modified so clear this flag in case it was set for whatever 1975 * modified so clear this flag in case it was set for whatever
@@ -2168,7 +2160,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2168 u64 num_bytes; 2160 u64 num_bytes;
2169 2161
2170 key.offset = offset; 2162 key.offset = offset;
2171 btrfs_set_item_key_safe(root, path, &key); 2163 btrfs_set_item_key_safe(root->fs_info, path, &key);
2172 fi = btrfs_item_ptr(leaf, path->slots[0], 2164 fi = btrfs_item_ptr(leaf, path->slots[0],
2173 struct btrfs_file_extent_item); 2165 struct btrfs_file_extent_item);
2174 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - 2166 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -2275,6 +2267,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2275 bool same_page; 2267 bool same_page;
2276 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2268 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2277 u64 ino_size; 2269 u64 ino_size;
2270 bool truncated_page = false;
2271 bool updated_inode = false;
2278 2272
2279 ret = btrfs_wait_ordered_range(inode, offset, len); 2273 ret = btrfs_wait_ordered_range(inode, offset, len);
2280 if (ret) 2274 if (ret)
@@ -2306,13 +2300,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2306 * entire page. 2300 * entire page.
2307 */ 2301 */
2308 if (same_page && len < PAGE_CACHE_SIZE) { 2302 if (same_page && len < PAGE_CACHE_SIZE) {
2309 if (offset < ino_size) 2303 if (offset < ino_size) {
2304 truncated_page = true;
2310 ret = btrfs_truncate_page(inode, offset, len, 0); 2305 ret = btrfs_truncate_page(inode, offset, len, 0);
2306 } else {
2307 ret = 0;
2308 }
2311 goto out_only_mutex; 2309 goto out_only_mutex;
2312 } 2310 }
2313 2311
2314 /* zero back part of the first page */ 2312 /* zero back part of the first page */
2315 if (offset < ino_size) { 2313 if (offset < ino_size) {
2314 truncated_page = true;
2316 ret = btrfs_truncate_page(inode, offset, 0, 0); 2315 ret = btrfs_truncate_page(inode, offset, 0, 0);
2317 if (ret) { 2316 if (ret) {
2318 mutex_unlock(&inode->i_mutex); 2317 mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2347,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2348 if (!ret) { 2347 if (!ret) {
2349 /* zero the front end of the last page */ 2348 /* zero the front end of the last page */
2350 if (tail_start + tail_len < ino_size) { 2349 if (tail_start + tail_len < ino_size) {
2350 truncated_page = true;
2351 ret = btrfs_truncate_page(inode, 2351 ret = btrfs_truncate_page(inode,
2352 tail_start + tail_len, 0, 1); 2352 tail_start + tail_len, 0, 1);
2353 if (ret) 2353 if (ret)
@@ -2357,8 +2357,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2357 } 2357 }
2358 2358
2359 if (lockend < lockstart) { 2359 if (lockend < lockstart) {
2360 mutex_unlock(&inode->i_mutex); 2360 ret = 0;
2361 return 0; 2361 goto out_only_mutex;
2362 } 2362 }
2363 2363
2364 while (1) { 2364 while (1) {
@@ -2506,6 +2506,7 @@ out_trans:
2506 2506
2507 trans->block_rsv = &root->fs_info->trans_block_rsv; 2507 trans->block_rsv = &root->fs_info->trans_block_rsv;
2508 ret = btrfs_update_inode(trans, root, inode); 2508 ret = btrfs_update_inode(trans, root, inode);
2509 updated_inode = true;
2509 btrfs_end_transaction(trans, root); 2510 btrfs_end_transaction(trans, root);
2510 btrfs_btree_balance_dirty(root); 2511 btrfs_btree_balance_dirty(root);
2511out_free: 2512out_free:
@@ -2515,6 +2516,22 @@ out:
2515 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2516 &cached_state, GFP_NOFS); 2517 &cached_state, GFP_NOFS);
2517out_only_mutex: 2518out_only_mutex:
2519 if (!updated_inode && truncated_page && !ret && !err) {
2520 /*
2521 * If we only end up zeroing part of a page, we still need to
2522 * update the inode item, so that all the time fields are
2523 * updated as well as the necessary btrfs inode in memory fields
2524 * for detecting, at fsync time, if the inode isn't yet in the
2525 * log tree or it's there but not up to date.
2526 */
2527 trans = btrfs_start_transaction(root, 1);
2528 if (IS_ERR(trans)) {
2529 err = PTR_ERR(trans);
2530 } else {
2531 err = btrfs_update_inode(trans, root, inode);
2532 ret = btrfs_end_transaction(trans, root);
2533 }
2534 }
2518 mutex_unlock(&inode->i_mutex); 2535 mutex_unlock(&inode->i_mutex);
2519 if (ret && !err) 2536 if (ret && !err)
2520 err = ret; 2537 err = ret;
@@ -2526,7 +2543,6 @@ static long btrfs_fallocate(struct file *file, int mode,
2526{ 2543{
2527 struct inode *inode = file_inode(file); 2544 struct inode *inode = file_inode(file);
2528 struct extent_state *cached_state = NULL; 2545 struct extent_state *cached_state = NULL;
2529 struct btrfs_root *root = BTRFS_I(inode)->root;
2530 u64 cur_offset; 2546 u64 cur_offset;
2531 u64 last_byte; 2547 u64 last_byte;
2532 u64 alloc_start; 2548 u64 alloc_start;
@@ -2551,14 +2567,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2551 * Make sure we have enough space before we do the 2567 * Make sure we have enough space before we do the
2552 * allocation. 2568 * allocation.
2553 */ 2569 */
2554 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); 2570 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
2555 if (ret) 2571 if (ret)
2556 return ret; 2572 return ret;
2557 if (root->fs_info->quota_enabled) {
2558 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
2559 if (ret)
2560 goto out_reserve_fail;
2561 }
2562 2573
2563 mutex_lock(&inode->i_mutex); 2574 mutex_lock(&inode->i_mutex);
2564 ret = inode_newsize_ok(inode, alloc_end); 2575 ret = inode_newsize_ok(inode, alloc_end);
@@ -2648,23 +2659,35 @@ static long btrfs_fallocate(struct file *file, int mode,
2648 1 << inode->i_blkbits, 2659 1 << inode->i_blkbits,
2649 offset + len, 2660 offset + len,
2650 &alloc_hint); 2661 &alloc_hint);
2651
2652 if (ret < 0) {
2653 free_extent_map(em);
2654 break;
2655 }
2656 } else if (actual_end > inode->i_size && 2662 } else if (actual_end > inode->i_size &&
2657 !(mode & FALLOC_FL_KEEP_SIZE)) { 2663 !(mode & FALLOC_FL_KEEP_SIZE)) {
2664 struct btrfs_trans_handle *trans;
2665 struct btrfs_root *root = BTRFS_I(inode)->root;
2666
2658 /* 2667 /*
2659 * We didn't need to allocate any more space, but we 2668 * We didn't need to allocate any more space, but we
2660 * still extended the size of the file so we need to 2669 * still extended the size of the file so we need to
2661 * update i_size. 2670 * update i_size and the inode item.
2662 */ 2671 */
2663 inode->i_ctime = CURRENT_TIME; 2672 trans = btrfs_start_transaction(root, 1);
2664 i_size_write(inode, actual_end); 2673 if (IS_ERR(trans)) {
2665 btrfs_ordered_update_i_size(inode, actual_end, NULL); 2674 ret = PTR_ERR(trans);
2675 } else {
2676 inode->i_ctime = CURRENT_TIME;
2677 i_size_write(inode, actual_end);
2678 btrfs_ordered_update_i_size(inode, actual_end,
2679 NULL);
2680 ret = btrfs_update_inode(trans, root, inode);
2681 if (ret)
2682 btrfs_end_transaction(trans, root);
2683 else
2684 ret = btrfs_end_transaction(trans,
2685 root);
2686 }
2666 } 2687 }
2667 free_extent_map(em); 2688 free_extent_map(em);
2689 if (ret < 0)
2690 break;
2668 2691
2669 cur_offset = last_byte; 2692 cur_offset = last_byte;
2670 if (cur_offset >= alloc_end) { 2693 if (cur_offset >= alloc_end) {
@@ -2676,9 +2699,6 @@ static long btrfs_fallocate(struct file *file, int mode,
2676 &cached_state, GFP_NOFS); 2699 &cached_state, GFP_NOFS);
2677out: 2700out:
2678 mutex_unlock(&inode->i_mutex); 2701 mutex_unlock(&inode->i_mutex);
2679 if (root->fs_info->quota_enabled)
2680 btrfs_qgroup_free(root, alloc_end - alloc_start);
2681out_reserve_fail:
2682 /* Let go of our reservation. */ 2702 /* Let go of our reservation. */
2683 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); 2703 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2684 return ret; 2704 return ret;
@@ -2781,8 +2801,6 @@ out:
2781 2801
2782const struct file_operations btrfs_file_operations = { 2802const struct file_operations btrfs_file_operations = {
2783 .llseek = btrfs_file_llseek, 2803 .llseek = btrfs_file_llseek,
2784 .read = new_sync_read,
2785 .write = new_sync_write,
2786 .read_iter = generic_file_read_iter, 2804 .read_iter = generic_file_read_iter,
2787 .splice_read = generic_file_splice_read, 2805 .splice_read = generic_file_splice_read,
2788 .write_iter = btrfs_file_write_iter, 2806 .write_iter = btrfs_file_write_iter,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a71978578fa7..253cb74b0e27 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,7 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
85 } 85 }
86 86
87 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_set_gfp_mask(inode->i_mapping,
88 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 88 mapping_gfp_mask(inode->i_mapping) &
89 ~(GFP_NOFS & ~__GFP_HIGHMEM));
89 90
90 return inode; 91 return inode;
91} 92}
@@ -170,13 +171,13 @@ static int __create_free_space_inode(struct btrfs_root *root,
170 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 171 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
171 key.offset = offset; 172 key.offset = offset;
172 key.type = 0; 173 key.type = 0;
173
174 ret = btrfs_insert_empty_item(trans, root, path, &key, 174 ret = btrfs_insert_empty_item(trans, root, path, &key,
175 sizeof(struct btrfs_free_space_header)); 175 sizeof(struct btrfs_free_space_header));
176 if (ret < 0) { 176 if (ret < 0) {
177 btrfs_release_path(path); 177 btrfs_release_path(path);
178 return ret; 178 return ret;
179 } 179 }
180
180 leaf = path->nodes[0]; 181 leaf = path->nodes[0];
181 header = btrfs_item_ptr(leaf, path->slots[0], 182 header = btrfs_item_ptr(leaf, path->slots[0],
182 struct btrfs_free_space_header); 183 struct btrfs_free_space_header);
@@ -225,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
225 226
226int btrfs_truncate_free_space_cache(struct btrfs_root *root, 227int btrfs_truncate_free_space_cache(struct btrfs_root *root,
227 struct btrfs_trans_handle *trans, 228 struct btrfs_trans_handle *trans,
229 struct btrfs_block_group_cache *block_group,
228 struct inode *inode) 230 struct inode *inode)
229{ 231{
230 int ret = 0; 232 int ret = 0;
233 struct btrfs_path *path = btrfs_alloc_path();
234
235 if (!path) {
236 ret = -ENOMEM;
237 goto fail;
238 }
239
240 if (block_group) {
241 mutex_lock(&trans->transaction->cache_write_mutex);
242 if (!list_empty(&block_group->io_list)) {
243 list_del_init(&block_group->io_list);
244
245 btrfs_wait_cache_io(root, trans, block_group,
246 &block_group->io_ctl, path,
247 block_group->key.objectid);
248 btrfs_put_block_group(block_group);
249 }
250
251 /*
252 * now that we've truncated the cache away, its no longer
253 * setup or written
254 */
255 spin_lock(&block_group->lock);
256 block_group->disk_cache_state = BTRFS_DC_CLEAR;
257 spin_unlock(&block_group->lock);
258 }
259 btrfs_free_path(path);
231 260
232 btrfs_i_size_write(inode, 0); 261 btrfs_i_size_write(inode, 0);
233 truncate_pagecache(inode, 0); 262 truncate_pagecache(inode, 0);
@@ -235,15 +264,23 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
235 /* 264 /*
236 * We don't need an orphan item because truncating the free space cache 265 * We don't need an orphan item because truncating the free space cache
237 * will never be split across transactions. 266 * will never be split across transactions.
267 * We don't need to check for -EAGAIN because we're a free space
268 * cache inode
238 */ 269 */
239 ret = btrfs_truncate_inode_items(trans, root, inode, 270 ret = btrfs_truncate_inode_items(trans, root, inode,
240 0, BTRFS_EXTENT_DATA_KEY); 271 0, BTRFS_EXTENT_DATA_KEY);
241 if (ret) { 272 if (ret) {
273 mutex_unlock(&trans->transaction->cache_write_mutex);
242 btrfs_abort_transaction(trans, root, ret); 274 btrfs_abort_transaction(trans, root, ret);
243 return ret; 275 return ret;
244 } 276 }
245 277
246 ret = btrfs_update_inode(trans, root, inode); 278 ret = btrfs_update_inode(trans, root, inode);
279
280 if (block_group)
281 mutex_unlock(&trans->transaction->cache_write_mutex);
282
283fail:
247 if (ret) 284 if (ret)
248 btrfs_abort_transaction(trans, root, ret); 285 btrfs_abort_transaction(trans, root, ret);
249 286
@@ -269,18 +306,7 @@ static int readahead_cache(struct inode *inode)
269 return 0; 306 return 0;
270} 307}
271 308
272struct io_ctl { 309static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
273 void *cur, *orig;
274 struct page *page;
275 struct page **pages;
276 struct btrfs_root *root;
277 unsigned long size;
278 int index;
279 int num_pages;
280 unsigned check_crcs:1;
281};
282
283static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
284 struct btrfs_root *root, int write) 310 struct btrfs_root *root, int write)
285{ 311{
286 int num_pages; 312 int num_pages;
@@ -296,45 +322,46 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
296 (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 322 (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
297 return -ENOSPC; 323 return -ENOSPC;
298 324
299 memset(io_ctl, 0, sizeof(struct io_ctl)); 325 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
300 326
301 io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 327 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
302 if (!io_ctl->pages) 328 if (!io_ctl->pages)
303 return -ENOMEM; 329 return -ENOMEM;
304 330
305 io_ctl->num_pages = num_pages; 331 io_ctl->num_pages = num_pages;
306 io_ctl->root = root; 332 io_ctl->root = root;
307 io_ctl->check_crcs = check_crcs; 333 io_ctl->check_crcs = check_crcs;
334 io_ctl->inode = inode;
308 335
309 return 0; 336 return 0;
310} 337}
311 338
312static void io_ctl_free(struct io_ctl *io_ctl) 339static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
313{ 340{
314 kfree(io_ctl->pages); 341 kfree(io_ctl->pages);
342 io_ctl->pages = NULL;
315} 343}
316 344
317static void io_ctl_unmap_page(struct io_ctl *io_ctl) 345static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
318{ 346{
319 if (io_ctl->cur) { 347 if (io_ctl->cur) {
320 kunmap(io_ctl->page);
321 io_ctl->cur = NULL; 348 io_ctl->cur = NULL;
322 io_ctl->orig = NULL; 349 io_ctl->orig = NULL;
323 } 350 }
324} 351}
325 352
326static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 353static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
327{ 354{
328 ASSERT(io_ctl->index < io_ctl->num_pages); 355 ASSERT(io_ctl->index < io_ctl->num_pages);
329 io_ctl->page = io_ctl->pages[io_ctl->index++]; 356 io_ctl->page = io_ctl->pages[io_ctl->index++];
330 io_ctl->cur = kmap(io_ctl->page); 357 io_ctl->cur = page_address(io_ctl->page);
331 io_ctl->orig = io_ctl->cur; 358 io_ctl->orig = io_ctl->cur;
332 io_ctl->size = PAGE_CACHE_SIZE; 359 io_ctl->size = PAGE_CACHE_SIZE;
333 if (clear) 360 if (clear)
334 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 361 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
335} 362}
336 363
337static void io_ctl_drop_pages(struct io_ctl *io_ctl) 364static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
338{ 365{
339 int i; 366 int i;
340 367
@@ -349,7 +376,7 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
349 } 376 }
350} 377}
351 378
352static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 379static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
353 int uptodate) 380 int uptodate)
354{ 381{
355 struct page *page; 382 struct page *page;
@@ -383,7 +410,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
383 return 0; 410 return 0;
384} 411}
385 412
386static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 413static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
387{ 414{
388 __le64 *val; 415 __le64 *val;
389 416
@@ -406,7 +433,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
406 io_ctl->cur += sizeof(u64); 433 io_ctl->cur += sizeof(u64);
407} 434}
408 435
409static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 436static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
410{ 437{
411 __le64 *gen; 438 __le64 *gen;
412 439
@@ -435,7 +462,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
435 return 0; 462 return 0;
436} 463}
437 464
438static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 465static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
439{ 466{
440 u32 *tmp; 467 u32 *tmp;
441 u32 crc = ~(u32)0; 468 u32 crc = ~(u32)0;
@@ -453,13 +480,12 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
453 PAGE_CACHE_SIZE - offset); 480 PAGE_CACHE_SIZE - offset);
454 btrfs_csum_final(crc, (char *)&crc); 481 btrfs_csum_final(crc, (char *)&crc);
455 io_ctl_unmap_page(io_ctl); 482 io_ctl_unmap_page(io_ctl);
456 tmp = kmap(io_ctl->pages[0]); 483 tmp = page_address(io_ctl->pages[0]);
457 tmp += index; 484 tmp += index;
458 *tmp = crc; 485 *tmp = crc;
459 kunmap(io_ctl->pages[0]);
460} 486}
461 487
462static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 488static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
463{ 489{
464 u32 *tmp, val; 490 u32 *tmp, val;
465 u32 crc = ~(u32)0; 491 u32 crc = ~(u32)0;
@@ -473,10 +499,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
473 if (index == 0) 499 if (index == 0)
474 offset = sizeof(u32) * io_ctl->num_pages; 500 offset = sizeof(u32) * io_ctl->num_pages;
475 501
476 tmp = kmap(io_ctl->pages[0]); 502 tmp = page_address(io_ctl->pages[0]);
477 tmp += index; 503 tmp += index;
478 val = *tmp; 504 val = *tmp;
479 kunmap(io_ctl->pages[0]);
480 505
481 io_ctl_map_page(io_ctl, 0); 506 io_ctl_map_page(io_ctl, 0);
482 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 507 crc = btrfs_csum_data(io_ctl->orig + offset, crc,
@@ -492,7 +517,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
492 return 0; 517 return 0;
493} 518}
494 519
495static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 520static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
496 void *bitmap) 521 void *bitmap)
497{ 522{
498 struct btrfs_free_space_entry *entry; 523 struct btrfs_free_space_entry *entry;
@@ -522,7 +547,7 @@ static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
522 return 0; 547 return 0;
523} 548}
524 549
525static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 550static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
526{ 551{
527 if (!io_ctl->cur) 552 if (!io_ctl->cur)
528 return -ENOSPC; 553 return -ENOSPC;
@@ -545,7 +570,7 @@ static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
545 return 0; 570 return 0;
546} 571}
547 572
548static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 573static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
549{ 574{
550 /* 575 /*
551 * If we're not on the boundary we know we've modified the page and we 576 * If we're not on the boundary we know we've modified the page and we
@@ -562,7 +587,7 @@ static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
562 } 587 }
563} 588}
564 589
565static int io_ctl_read_entry(struct io_ctl *io_ctl, 590static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
566 struct btrfs_free_space *entry, u8 *type) 591 struct btrfs_free_space *entry, u8 *type)
567{ 592{
568 struct btrfs_free_space_entry *e; 593 struct btrfs_free_space_entry *e;
@@ -589,7 +614,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
589 return 0; 614 return 0;
590} 615}
591 616
592static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 617static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
593 struct btrfs_free_space *entry) 618 struct btrfs_free_space *entry)
594{ 619{
595 int ret; 620 int ret;
@@ -648,7 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
648{ 673{
649 struct btrfs_free_space_header *header; 674 struct btrfs_free_space_header *header;
650 struct extent_buffer *leaf; 675 struct extent_buffer *leaf;
651 struct io_ctl io_ctl; 676 struct btrfs_io_ctl io_ctl;
652 struct btrfs_key key; 677 struct btrfs_key key;
653 struct btrfs_free_space *e, *n; 678 struct btrfs_free_space *e, *n;
654 LIST_HEAD(bitmaps); 679 LIST_HEAD(bitmaps);
@@ -877,7 +902,7 @@ out:
877} 902}
878 903
879static noinline_for_stack 904static noinline_for_stack
880int write_cache_extent_entries(struct io_ctl *io_ctl, 905int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
881 struct btrfs_free_space_ctl *ctl, 906 struct btrfs_free_space_ctl *ctl,
882 struct btrfs_block_group_cache *block_group, 907 struct btrfs_block_group_cache *block_group,
883 int *entries, int *bitmaps, 908 int *entries, int *bitmaps,
@@ -885,6 +910,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
885{ 910{
886 int ret; 911 int ret;
887 struct btrfs_free_cluster *cluster = NULL; 912 struct btrfs_free_cluster *cluster = NULL;
913 struct btrfs_free_cluster *cluster_locked = NULL;
888 struct rb_node *node = rb_first(&ctl->free_space_offset); 914 struct rb_node *node = rb_first(&ctl->free_space_offset);
889 struct btrfs_trim_range *trim_entry; 915 struct btrfs_trim_range *trim_entry;
890 916
@@ -896,6 +922,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
896 } 922 }
897 923
898 if (!node && cluster) { 924 if (!node && cluster) {
925 cluster_locked = cluster;
926 spin_lock(&cluster_locked->lock);
899 node = rb_first(&cluster->root); 927 node = rb_first(&cluster->root);
900 cluster = NULL; 928 cluster = NULL;
901 } 929 }
@@ -919,9 +947,15 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
919 node = rb_next(node); 947 node = rb_next(node);
920 if (!node && cluster) { 948 if (!node && cluster) {
921 node = rb_first(&cluster->root); 949 node = rb_first(&cluster->root);
950 cluster_locked = cluster;
951 spin_lock(&cluster_locked->lock);
922 cluster = NULL; 952 cluster = NULL;
923 } 953 }
924 } 954 }
955 if (cluster_locked) {
956 spin_unlock(&cluster_locked->lock);
957 cluster_locked = NULL;
958 }
925 959
926 /* 960 /*
927 * Make sure we don't miss any range that was removed from our rbtree 961 * Make sure we don't miss any range that was removed from our rbtree
@@ -939,6 +973,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
939 973
940 return 0; 974 return 0;
941fail: 975fail:
976 if (cluster_locked)
977 spin_unlock(&cluster_locked->lock);
942 return -ENOSPC; 978 return -ENOSPC;
943} 979}
944 980
@@ -1000,7 +1036,7 @@ fail:
1000static noinline_for_stack int 1036static noinline_for_stack int
1001write_pinned_extent_entries(struct btrfs_root *root, 1037write_pinned_extent_entries(struct btrfs_root *root,
1002 struct btrfs_block_group_cache *block_group, 1038 struct btrfs_block_group_cache *block_group,
1003 struct io_ctl *io_ctl, 1039 struct btrfs_io_ctl *io_ctl,
1004 int *entries) 1040 int *entries)
1005{ 1041{
1006 u64 start, extent_start, extent_end, len; 1042 u64 start, extent_start, extent_end, len;
@@ -1050,7 +1086,7 @@ write_pinned_extent_entries(struct btrfs_root *root,
1050} 1086}
1051 1087
1052static noinline_for_stack int 1088static noinline_for_stack int
1053write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) 1089write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1054{ 1090{
1055 struct list_head *pos, *n; 1091 struct list_head *pos, *n;
1056 int ret; 1092 int ret;
@@ -1084,7 +1120,7 @@ static int flush_dirty_cache(struct inode *inode)
1084 1120
1085static void noinline_for_stack 1121static void noinline_for_stack
1086cleanup_write_cache_enospc(struct inode *inode, 1122cleanup_write_cache_enospc(struct inode *inode,
1087 struct io_ctl *io_ctl, 1123 struct btrfs_io_ctl *io_ctl,
1088 struct extent_state **cached_state, 1124 struct extent_state **cached_state,
1089 struct list_head *bitmap_list) 1125 struct list_head *bitmap_list)
1090{ 1126{
@@ -1101,6 +1137,70 @@ cleanup_write_cache_enospc(struct inode *inode,
1101 GFP_NOFS); 1137 GFP_NOFS);
1102} 1138}
1103 1139
1140int btrfs_wait_cache_io(struct btrfs_root *root,
1141 struct btrfs_trans_handle *trans,
1142 struct btrfs_block_group_cache *block_group,
1143 struct btrfs_io_ctl *io_ctl,
1144 struct btrfs_path *path, u64 offset)
1145{
1146 int ret;
1147 struct inode *inode = io_ctl->inode;
1148
1149 if (!inode)
1150 return 0;
1151
1152 root = root->fs_info->tree_root;
1153
1154 /* Flush the dirty pages in the cache file. */
1155 ret = flush_dirty_cache(inode);
1156 if (ret)
1157 goto out;
1158
1159 /* Update the cache item to tell everyone this cache file is valid. */
1160 ret = update_cache_item(trans, root, inode, path, offset,
1161 io_ctl->entries, io_ctl->bitmaps);
1162out:
1163 io_ctl_free(io_ctl);
1164 if (ret) {
1165 invalidate_inode_pages2(inode->i_mapping);
1166 BTRFS_I(inode)->generation = 0;
1167 if (block_group) {
1168#ifdef DEBUG
1169 btrfs_err(root->fs_info,
1170 "failed to write free space cache for block group %llu",
1171 block_group->key.objectid);
1172#endif
1173 }
1174 }
1175 btrfs_update_inode(trans, root, inode);
1176
1177 if (block_group) {
1178 /* the dirty list is protected by the dirty_bgs_lock */
1179 spin_lock(&trans->transaction->dirty_bgs_lock);
1180
1181 /* the disk_cache_state is protected by the block group lock */
1182 spin_lock(&block_group->lock);
1183
1184 /*
1185 * only mark this as written if we didn't get put back on
1186 * the dirty list while waiting for IO. Otherwise our
1187 * cache state won't be right, and we won't get written again
1188 */
1189 if (!ret && list_empty(&block_group->dirty_list))
1190 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1191 else if (ret)
1192 block_group->disk_cache_state = BTRFS_DC_ERROR;
1193
1194 spin_unlock(&block_group->lock);
1195 spin_unlock(&trans->transaction->dirty_bgs_lock);
1196 io_ctl->inode = NULL;
1197 iput(inode);
1198 }
1199
1200 return ret;
1201
1202}
1203
1104/** 1204/**
1105 * __btrfs_write_out_cache - write out cached info to an inode 1205 * __btrfs_write_out_cache - write out cached info to an inode
1106 * @root - the root the inode belongs to 1206 * @root - the root the inode belongs to
@@ -1117,20 +1217,22 @@ cleanup_write_cache_enospc(struct inode *inode,
1117static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1217static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1118 struct btrfs_free_space_ctl *ctl, 1218 struct btrfs_free_space_ctl *ctl,
1119 struct btrfs_block_group_cache *block_group, 1219 struct btrfs_block_group_cache *block_group,
1220 struct btrfs_io_ctl *io_ctl,
1120 struct btrfs_trans_handle *trans, 1221 struct btrfs_trans_handle *trans,
1121 struct btrfs_path *path, u64 offset) 1222 struct btrfs_path *path, u64 offset)
1122{ 1223{
1123 struct extent_state *cached_state = NULL; 1224 struct extent_state *cached_state = NULL;
1124 struct io_ctl io_ctl;
1125 LIST_HEAD(bitmap_list); 1225 LIST_HEAD(bitmap_list);
1126 int entries = 0; 1226 int entries = 0;
1127 int bitmaps = 0; 1227 int bitmaps = 0;
1128 int ret; 1228 int ret;
1229 int must_iput = 0;
1129 1230
1130 if (!i_size_read(inode)) 1231 if (!i_size_read(inode))
1131 return -1; 1232 return -1;
1132 1233
1133 ret = io_ctl_init(&io_ctl, inode, root, 1); 1234 WARN_ON(io_ctl->pages);
1235 ret = io_ctl_init(io_ctl, inode, root, 1);
1134 if (ret) 1236 if (ret)
1135 return -1; 1237 return -1;
1136 1238
@@ -1143,24 +1245,27 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1143 up_write(&block_group->data_rwsem); 1245 up_write(&block_group->data_rwsem);
1144 BTRFS_I(inode)->generation = 0; 1246 BTRFS_I(inode)->generation = 0;
1145 ret = 0; 1247 ret = 0;
1248 must_iput = 1;
1146 goto out; 1249 goto out;
1147 } 1250 }
1148 spin_unlock(&block_group->lock); 1251 spin_unlock(&block_group->lock);
1149 } 1252 }
1150 1253
1151 /* Lock all pages first so we can lock the extent safely. */ 1254 /* Lock all pages first so we can lock the extent safely. */
1152 io_ctl_prepare_pages(&io_ctl, inode, 0); 1255 io_ctl_prepare_pages(io_ctl, inode, 0);
1153 1256
1154 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1257 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1155 0, &cached_state); 1258 0, &cached_state);
1156 1259
1157 io_ctl_set_generation(&io_ctl, trans->transid); 1260 io_ctl_set_generation(io_ctl, trans->transid);
1158 1261
1159 mutex_lock(&ctl->cache_writeout_mutex); 1262 mutex_lock(&ctl->cache_writeout_mutex);
1160 /* Write out the extent entries in the free space cache */ 1263 /* Write out the extent entries in the free space cache */
1161 ret = write_cache_extent_entries(&io_ctl, ctl, 1264 spin_lock(&ctl->tree_lock);
1265 ret = write_cache_extent_entries(io_ctl, ctl,
1162 block_group, &entries, &bitmaps, 1266 block_group, &entries, &bitmaps,
1163 &bitmap_list); 1267 &bitmap_list);
1268 spin_unlock(&ctl->tree_lock);
1164 if (ret) { 1269 if (ret) {
1165 mutex_unlock(&ctl->cache_writeout_mutex); 1270 mutex_unlock(&ctl->cache_writeout_mutex);
1166 goto out_nospc; 1271 goto out_nospc;
@@ -1170,8 +1275,11 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1170 * Some spaces that are freed in the current transaction are pinned, 1275 * Some spaces that are freed in the current transaction are pinned,
1171 * they will be added into free space cache after the transaction is 1276 * they will be added into free space cache after the transaction is
1172 * committed, we shouldn't lose them. 1277 * committed, we shouldn't lose them.
1278 *
1279 * If this changes while we are working we'll get added back to
1280 * the dirty list and redo it. No locking needed
1173 */ 1281 */
1174 ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); 1282 ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries);
1175 if (ret) { 1283 if (ret) {
1176 mutex_unlock(&ctl->cache_writeout_mutex); 1284 mutex_unlock(&ctl->cache_writeout_mutex);
1177 goto out_nospc; 1285 goto out_nospc;
@@ -1182,16 +1290,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1182 * locked while doing it because a concurrent trim can be manipulating 1290 * locked while doing it because a concurrent trim can be manipulating
1183 * or freeing the bitmap. 1291 * or freeing the bitmap.
1184 */ 1292 */
1185 ret = write_bitmap_entries(&io_ctl, &bitmap_list); 1293 spin_lock(&ctl->tree_lock);
1294 ret = write_bitmap_entries(io_ctl, &bitmap_list);
1295 spin_unlock(&ctl->tree_lock);
1186 mutex_unlock(&ctl->cache_writeout_mutex); 1296 mutex_unlock(&ctl->cache_writeout_mutex);
1187 if (ret) 1297 if (ret)
1188 goto out_nospc; 1298 goto out_nospc;
1189 1299
1190 /* Zero out the rest of the pages just to make sure */ 1300 /* Zero out the rest of the pages just to make sure */
1191 io_ctl_zero_remaining_pages(&io_ctl); 1301 io_ctl_zero_remaining_pages(io_ctl);
1192 1302
1193 /* Everything is written out, now we dirty the pages in the file. */ 1303 /* Everything is written out, now we dirty the pages in the file. */
1194 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 1304 ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
1195 0, i_size_read(inode), &cached_state); 1305 0, i_size_read(inode), &cached_state);
1196 if (ret) 1306 if (ret)
1197 goto out_nospc; 1307 goto out_nospc;
@@ -1202,30 +1312,39 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1202 * Release the pages and unlock the extent, we will flush 1312 * Release the pages and unlock the extent, we will flush
1203 * them out later 1313 * them out later
1204 */ 1314 */
1205 io_ctl_drop_pages(&io_ctl); 1315 io_ctl_drop_pages(io_ctl);
1206 1316
1207 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1317 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1208 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1318 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1209 1319
1210 /* Flush the dirty pages in the cache file. */ 1320 /*
1211 ret = flush_dirty_cache(inode); 1321 * at this point the pages are under IO and we're happy,
1322 * The caller is responsible for waiting on them and updating the
1323 * the cache and the inode
1324 */
1325 io_ctl->entries = entries;
1326 io_ctl->bitmaps = bitmaps;
1327
1328 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1212 if (ret) 1329 if (ret)
1213 goto out; 1330 goto out;
1214 1331
1215 /* Update the cache item to tell everyone this cache file is valid. */ 1332 return 0;
1216 ret = update_cache_item(trans, root, inode, path, offset, 1333
1217 entries, bitmaps);
1218out: 1334out:
1219 io_ctl_free(&io_ctl); 1335 io_ctl->inode = NULL;
1336 io_ctl_free(io_ctl);
1220 if (ret) { 1337 if (ret) {
1221 invalidate_inode_pages2(inode->i_mapping); 1338 invalidate_inode_pages2(inode->i_mapping);
1222 BTRFS_I(inode)->generation = 0; 1339 BTRFS_I(inode)->generation = 0;
1223 } 1340 }
1224 btrfs_update_inode(trans, root, inode); 1341 btrfs_update_inode(trans, root, inode);
1342 if (must_iput)
1343 iput(inode);
1225 return ret; 1344 return ret;
1226 1345
1227out_nospc: 1346out_nospc:
1228 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1347 cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
1229 1348
1230 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1349 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1231 up_write(&block_group->data_rwsem); 1350 up_write(&block_group->data_rwsem);
@@ -1241,7 +1360,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1241 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1360 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1242 struct inode *inode; 1361 struct inode *inode;
1243 int ret = 0; 1362 int ret = 0;
1244 enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
1245 1363
1246 root = root->fs_info->tree_root; 1364 root = root->fs_info->tree_root;
1247 1365
@@ -1250,34 +1368,34 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1250 spin_unlock(&block_group->lock); 1368 spin_unlock(&block_group->lock);
1251 return 0; 1369 return 0;
1252 } 1370 }
1253
1254 if (block_group->delalloc_bytes) {
1255 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1256 spin_unlock(&block_group->lock);
1257 return 0;
1258 }
1259 spin_unlock(&block_group->lock); 1371 spin_unlock(&block_group->lock);
1260 1372
1261 inode = lookup_free_space_inode(root, block_group, path); 1373 inode = lookup_free_space_inode(root, block_group, path);
1262 if (IS_ERR(inode)) 1374 if (IS_ERR(inode))
1263 return 0; 1375 return 0;
1264 1376
1265 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1377 ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
1378 &block_group->io_ctl, trans,
1266 path, block_group->key.objectid); 1379 path, block_group->key.objectid);
1267 if (ret) { 1380 if (ret) {
1268 dcs = BTRFS_DC_ERROR;
1269 ret = 0;
1270#ifdef DEBUG 1381#ifdef DEBUG
1271 btrfs_err(root->fs_info, 1382 btrfs_err(root->fs_info,
1272 "failed to write free space cache for block group %llu", 1383 "failed to write free space cache for block group %llu",
1273 block_group->key.objectid); 1384 block_group->key.objectid);
1274#endif 1385#endif
1386 spin_lock(&block_group->lock);
1387 block_group->disk_cache_state = BTRFS_DC_ERROR;
1388 spin_unlock(&block_group->lock);
1389
1390 block_group->io_ctl.inode = NULL;
1391 iput(inode);
1275 } 1392 }
1276 1393
1277 spin_lock(&block_group->lock); 1394 /*
1278 block_group->disk_cache_state = dcs; 1395 * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1279 spin_unlock(&block_group->lock); 1396 * to wait for IO and put the inode
1280 iput(inode); 1397 */
1398
1281 return ret; 1399 return ret;
1282} 1400}
1283 1401
@@ -1298,11 +1416,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1298 u64 offset) 1416 u64 offset)
1299{ 1417{
1300 u64 bitmap_start; 1418 u64 bitmap_start;
1301 u64 bytes_per_bitmap; 1419 u32 bytes_per_bitmap;
1302 1420
1303 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1421 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1304 bitmap_start = offset - ctl->start; 1422 bitmap_start = offset - ctl->start;
1305 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1423 bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
1306 bitmap_start *= bytes_per_bitmap; 1424 bitmap_start *= bytes_per_bitmap;
1307 bitmap_start += ctl->start; 1425 bitmap_start += ctl->start;
1308 1426
@@ -1521,10 +1639,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1521 u64 bitmap_bytes; 1639 u64 bitmap_bytes;
1522 u64 extent_bytes; 1640 u64 extent_bytes;
1523 u64 size = block_group->key.offset; 1641 u64 size = block_group->key.offset;
1524 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1642 u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1525 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1643 u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
1526 1644
1527 max_bitmaps = max(max_bitmaps, 1); 1645 max_bitmaps = max_t(u32, max_bitmaps, 1);
1528 1646
1529 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1647 ASSERT(ctl->total_bitmaps <= max_bitmaps);
1530 1648
@@ -1537,7 +1655,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1537 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1655 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1538 else 1656 else
1539 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1657 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1540 div64_u64(size, 1024 * 1024 * 1024); 1658 div_u64(size, 1024 * 1024 * 1024);
1541 1659
1542 /* 1660 /*
1543 * we want to account for 1 more bitmap than what we have so we can make 1661 * we want to account for 1 more bitmap than what we have so we can make
@@ -1552,14 +1670,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1552 } 1670 }
1553 1671
1554 /* 1672 /*
1555 * we want the extent entry threshold to always be at most 1/2 the maxw 1673 * we want the extent entry threshold to always be at most 1/2 the max
1556 * bytes we can have, or whatever is less than that. 1674 * bytes we can have, or whatever is less than that.
1557 */ 1675 */
1558 extent_bytes = max_bytes - bitmap_bytes; 1676 extent_bytes = max_bytes - bitmap_bytes;
1559 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1677 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
1560 1678
1561 ctl->extents_thresh = 1679 ctl->extents_thresh =
1562 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1680 div_u64(extent_bytes, sizeof(struct btrfs_free_space));
1563} 1681}
1564 1682
1565static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1683static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
@@ -1673,7 +1791,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1673 */ 1791 */
1674 if (*bytes >= align) { 1792 if (*bytes >= align) {
1675 tmp = entry->offset - ctl->start + align - 1; 1793 tmp = entry->offset - ctl->start + align - 1;
1676 do_div(tmp, align); 1794 tmp = div64_u64(tmp, align);
1677 tmp = tmp * align + ctl->start; 1795 tmp = tmp * align + ctl->start;
1678 align_off = tmp - entry->offset; 1796 align_off = tmp - entry->offset;
1679 } else { 1797 } else {
@@ -2402,11 +2520,8 @@ static void __btrfs_remove_free_space_cache_locked(
2402 } else { 2520 } else {
2403 free_bitmap(ctl, info); 2521 free_bitmap(ctl, info);
2404 } 2522 }
2405 if (need_resched()) { 2523
2406 spin_unlock(&ctl->tree_lock); 2524 cond_resched_lock(&ctl->tree_lock);
2407 cond_resched();
2408 spin_lock(&ctl->tree_lock);
2409 }
2410 } 2525 }
2411} 2526}
2412 2527
@@ -2431,11 +2546,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2431 2546
2432 WARN_ON(cluster->block_group != block_group); 2547 WARN_ON(cluster->block_group != block_group);
2433 __btrfs_return_cluster_to_free_space(block_group, cluster); 2548 __btrfs_return_cluster_to_free_space(block_group, cluster);
2434 if (need_resched()) { 2549
2435 spin_unlock(&ctl->tree_lock); 2550 cond_resched_lock(&ctl->tree_lock);
2436 cond_resched();
2437 spin_lock(&ctl->tree_lock);
2438 }
2439 } 2551 }
2440 __btrfs_remove_free_space_cache_locked(ctl); 2552 __btrfs_remove_free_space_cache_locked(ctl);
2441 spin_unlock(&ctl->tree_lock); 2553 spin_unlock(&ctl->tree_lock);
@@ -3346,11 +3458,14 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3346{ 3458{
3347 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3459 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3348 int ret; 3460 int ret;
3461 struct btrfs_io_ctl io_ctl;
3349 3462
3350 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3463 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
3351 return 0; 3464 return 0;
3352 3465
3353 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 3466 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
3467 trans, path, 0) ||
3468 btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
3354 if (ret) { 3469 if (ret) {
3355 btrfs_delalloc_release_metadata(inode, inode->i_size); 3470 btrfs_delalloc_release_metadata(inode, inode->i_size);
3356#ifdef DEBUG 3471#ifdef DEBUG
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 88b2238a0aed..a16a029ad3b1 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -48,6 +48,8 @@ struct btrfs_free_space_op {
48 struct btrfs_free_space *info); 48 struct btrfs_free_space *info);
49}; 49};
50 50
51struct btrfs_io_ctl;
52
51struct inode *lookup_free_space_inode(struct btrfs_root *root, 53struct inode *lookup_free_space_inode(struct btrfs_root *root,
52 struct btrfs_block_group_cache 54 struct btrfs_block_group_cache
53 *block_group, struct btrfs_path *path); 55 *block_group, struct btrfs_path *path);
@@ -60,14 +62,19 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
60 struct btrfs_block_rsv *rsv); 62 struct btrfs_block_rsv *rsv);
61int btrfs_truncate_free_space_cache(struct btrfs_root *root, 63int btrfs_truncate_free_space_cache(struct btrfs_root *root,
62 struct btrfs_trans_handle *trans, 64 struct btrfs_trans_handle *trans,
65 struct btrfs_block_group_cache *block_group,
63 struct inode *inode); 66 struct inode *inode);
64int load_free_space_cache(struct btrfs_fs_info *fs_info, 67int load_free_space_cache(struct btrfs_fs_info *fs_info,
65 struct btrfs_block_group_cache *block_group); 68 struct btrfs_block_group_cache *block_group);
69int btrfs_wait_cache_io(struct btrfs_root *root,
70 struct btrfs_trans_handle *trans,
71 struct btrfs_block_group_cache *block_group,
72 struct btrfs_io_ctl *io_ctl,
73 struct btrfs_path *path, u64 offset);
66int btrfs_write_out_cache(struct btrfs_root *root, 74int btrfs_write_out_cache(struct btrfs_root *root,
67 struct btrfs_trans_handle *trans, 75 struct btrfs_trans_handle *trans,
68 struct btrfs_block_group_cache *block_group, 76 struct btrfs_block_group_cache *block_group,
69 struct btrfs_path *path); 77 struct btrfs_path *path);
70
71struct inode *lookup_free_ino_inode(struct btrfs_root *root, 78struct inode *lookup_free_ino_inode(struct btrfs_root *root,
72 struct btrfs_path *path); 79 struct btrfs_path *path);
73int create_free_ino_inode(struct btrfs_root *root, 80int create_free_ino_inode(struct btrfs_root *root,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 74faea3a516e..f6a596d5a637 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -456,7 +456,7 @@ again:
456 } 456 }
457 457
458 if (i_size_read(inode) > 0) { 458 if (i_size_read(inode) > 0) {
459 ret = btrfs_truncate_free_space_cache(root, trans, inode); 459 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
460 if (ret) { 460 if (ret) {
461 if (ret != -ENOSPC) 461 if (ret != -ENOSPC)
462 btrfs_abort_transaction(trans, root, ret); 462 btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23dfcddb..56f00a25c003 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/statfs.h> 33#include <linux/statfs.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include <linux/bit_spinlock.h> 35#include <linux/bit_spinlock.h>
37#include <linux/xattr.h> 36#include <linux/xattr.h>
38#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
43#include <linux/btrfs.h> 42#include <linux/btrfs.h>
44#include <linux/blkdev.h> 43#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h> 44#include <linux/posix_acl_xattr.h>
45#include <linux/uio.h>
46#include "ctree.h" 46#include "ctree.h"
47#include "disk-io.h" 47#include "disk-io.h"
48#include "transaction.h" 48#include "transaction.h"
@@ -59,6 +59,7 @@
59#include "backref.h" 59#include "backref.h"
60#include "hash.h" 60#include "hash.h"
61#include "props.h" 61#include "props.h"
62#include "qgroup.h"
62 63
63struct btrfs_iget_args { 64struct btrfs_iget_args {
64 struct btrfs_key *location; 65 struct btrfs_key *location;
@@ -108,6 +109,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
108 109
109static int btrfs_dirty_inode(struct inode *inode); 110static int btrfs_dirty_inode(struct inode *inode);
110 111
112#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
113void btrfs_test_inode_set_ops(struct inode *inode)
114{
115 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
116}
117#endif
118
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 119static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir, 120 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr) 121 const struct qstr *qstr)
@@ -463,7 +471,7 @@ again:
463 */ 471 */
464 if (inode_need_compress(inode)) { 472 if (inode_need_compress(inode)) {
465 WARN_ON(pages); 473 WARN_ON(pages);
466 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 474 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
467 if (!pages) { 475 if (!pages) {
468 /* just bail out to the uncompressed code */ 476 /* just bail out to the uncompressed code */
469 goto cont; 477 goto cont;
@@ -745,7 +753,6 @@ retry:
745 } 753 }
746 goto out_free; 754 goto out_free;
747 } 755 }
748
749 /* 756 /*
750 * here we're doing allocation and writeback of the 757 * here we're doing allocation and writeback of the
751 * compressed pages 758 * compressed pages
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
1542 u64 new_size; 1549 u64 new_size;
1543 1550
1544 /* 1551 /*
1545 * We need the largest size of the remaining extent to see if we 1552 * See the explanation in btrfs_merge_extent_hook, the same
1546 * need to add a new outstanding extent. Think of the following 1553 * applies here, just in reverse.
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */ 1554 */
1561 new_size = orig->end - split + 1; 1555 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size) 1556 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1563 new_size = split - orig->start; 1557 BTRFS_MAX_EXTENT_SIZE);
1564 1558 new_size = split - orig->start;
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, 1559 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE); 1560 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1561 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents) 1562 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1569 return; 1563 return;
1570 } 1564 }
1571 1565
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1591 if (!(other->state & EXTENT_DELALLOC)) 1585 if (!(other->state & EXTENT_DELALLOC))
1592 return; 1586 return;
1593 1587
1594 old_size = other->end - other->start + 1; 1588 if (new->start > other->start)
1595 new_size = old_size + (new->end - new->start + 1); 1589 new_size = new->end - other->start + 1;
1590 else
1591 new_size = other->end - new->start + 1;
1596 1592
1597 /* we're not bigger than the max, unreserve the space and go */ 1593 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) { 1594 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1603 } 1599 }
1604 1600
1605 /* 1601 /*
1606 * If we grew by another max_extent, just return, we want to keep that 1602 * We have to add up either side to figure out how many extents were
1607 * reserved amount. 1603 * accounted for before we merged into one big extent. If the number of
1604 * extents we accounted for is <= the amount we need for the new range
1605 * then we can return, otherwise drop. Think of it like this
1606 *
1607 * [ 4k][MAX_SIZE]
1608 *
1609 * So we've grown the extent by a MAX_SIZE extent, this would mean we
1610 * need 2 outstanding extents, on one side we have 1 and the other side
1611 * we have 1 so they are == and we can return. But in this case
1612 *
1613 * [MAX_SIZE+4k][MAX_SIZE+4k]
1614 *
1615 * Each range on their own accounts for 2 extents, but merged together
1616 * they are only 3 extents worth of accounting, so we need to drop in
1617 * this case.
1608 */ 1618 */
1619 old_size = other->end - other->start + 1;
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, 1620 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE); 1621 BTRFS_MAX_EXTENT_SIZE);
1622 old_size = new->end - new->start + 1;
1623 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1624 BTRFS_MAX_EXTENT_SIZE);
1625
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1626 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents) 1627 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1613 return; 1628 return;
1614 1629
1615 spin_lock(&BTRFS_I(inode)->lock); 1630 spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
1686 spin_unlock(&BTRFS_I(inode)->lock); 1701 spin_unlock(&BTRFS_I(inode)->lock);
1687 } 1702 }
1688 1703
1704 /* For sanity tests */
1705 if (btrfs_test_is_dummy_root(root))
1706 return;
1707
1689 __percpu_counter_add(&root->fs_info->delalloc_bytes, len, 1708 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1690 root->fs_info->delalloc_batch); 1709 root->fs_info->delalloc_batch);
1691 spin_lock(&BTRFS_I(inode)->lock); 1710 spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1741 root != root->fs_info->tree_root) 1760 root != root->fs_info->tree_root)
1742 btrfs_delalloc_release_metadata(inode, len); 1761 btrfs_delalloc_release_metadata(inode, len);
1743 1762
1763 /* For sanity tests. */
1764 if (btrfs_test_is_dummy_root(root))
1765 return;
1766
1744 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1745 && do_list && !(state->state & EXTENT_NORESERVE)) 1768 && do_list && !(state->state & EXTENT_NORESERVE))
1746 btrfs_free_reserved_data_space(inode, len); 1769 btrfs_free_reserved_data_space(inode, len);
@@ -3087,6 +3110,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
3087 if (empty) 3110 if (empty)
3088 return; 3111 return;
3089 3112
3113 down_read(&fs_info->delayed_iput_sem);
3114
3090 spin_lock(&fs_info->delayed_iput_lock); 3115 spin_lock(&fs_info->delayed_iput_lock);
3091 list_splice_init(&fs_info->delayed_iputs, &list); 3116 list_splice_init(&fs_info->delayed_iputs, &list);
3092 spin_unlock(&fs_info->delayed_iput_lock); 3117 spin_unlock(&fs_info->delayed_iput_lock);
@@ -3097,6 +3122,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
3097 iput(delayed->inode); 3122 iput(delayed->inode);
3098 kfree(delayed); 3123 kfree(delayed);
3099 } 3124 }
3125
3126 up_read(&root->fs_info->delayed_iput_sem);
3100} 3127}
3101 3128
3102/* 3129/*
@@ -4139,6 +4166,21 @@ out:
4139 return err; 4166 return err;
4140} 4167}
4141 4168
4169static int truncate_space_check(struct btrfs_trans_handle *trans,
4170 struct btrfs_root *root,
4171 u64 bytes_deleted)
4172{
4173 int ret;
4174
4175 bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
4176 ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
4177 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
4178 if (!ret)
4179 trans->bytes_reserved += bytes_deleted;
4180 return ret;
4181
4182}
4183
4142/* 4184/*
4143 * this can truncate away extent items, csum items and directory items. 4185 * this can truncate away extent items, csum items and directory items.
4144 * It starts at a high offset and removes keys until it can't find 4186 * It starts at a high offset and removes keys until it can't find
@@ -4174,9 +4216,21 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4174 int ret; 4216 int ret;
4175 int err = 0; 4217 int err = 0;
4176 u64 ino = btrfs_ino(inode); 4218 u64 ino = btrfs_ino(inode);
4219 u64 bytes_deleted = 0;
4220 bool be_nice = 0;
4221 bool should_throttle = 0;
4222 bool should_end = 0;
4177 4223
4178 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 4224 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4179 4225
4226 /*
4227 * for non-free space inodes and ref cows, we want to back off from
4228 * time to time
4229 */
4230 if (!btrfs_is_free_space_inode(inode) &&
4231 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4232 be_nice = 1;
4233
4180 path = btrfs_alloc_path(); 4234 path = btrfs_alloc_path();
4181 if (!path) 4235 if (!path)
4182 return -ENOMEM; 4236 return -ENOMEM;
@@ -4206,6 +4260,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4206 key.type = (u8)-1; 4260 key.type = (u8)-1;
4207 4261
4208search_again: 4262search_again:
4263 /*
4264 * with a 16K leaf size and 128MB extents, you can actually queue
4265 * up a huge file in a single leaf. Most of the time that
4266 * bytes_deleted is > 0, it will be huge by the time we get here
4267 */
4268 if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
4269 if (btrfs_should_end_transaction(trans, root)) {
4270 err = -EAGAIN;
4271 goto error;
4272 }
4273 }
4274
4275
4209 path->leave_spinning = 1; 4276 path->leave_spinning = 1;
4210 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 4277 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4211 if (ret < 0) { 4278 if (ret < 0) {
@@ -4348,22 +4415,39 @@ delete:
4348 } else { 4415 } else {
4349 break; 4416 break;
4350 } 4417 }
4418 should_throttle = 0;
4419
4351 if (found_extent && 4420 if (found_extent &&
4352 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || 4421 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4353 root == root->fs_info->tree_root)) { 4422 root == root->fs_info->tree_root)) {
4354 btrfs_set_path_blocking(path); 4423 btrfs_set_path_blocking(path);
4424 bytes_deleted += extent_num_bytes;
4355 ret = btrfs_free_extent(trans, root, extent_start, 4425 ret = btrfs_free_extent(trans, root, extent_start,
4356 extent_num_bytes, 0, 4426 extent_num_bytes, 0,
4357 btrfs_header_owner(leaf), 4427 btrfs_header_owner(leaf),
4358 ino, extent_offset, 0); 4428 ino, extent_offset, 0);
4359 BUG_ON(ret); 4429 BUG_ON(ret);
4430 if (btrfs_should_throttle_delayed_refs(trans, root))
4431 btrfs_async_run_delayed_refs(root,
4432 trans->delayed_ref_updates * 2, 0);
4433 if (be_nice) {
4434 if (truncate_space_check(trans, root,
4435 extent_num_bytes)) {
4436 should_end = 1;
4437 }
4438 if (btrfs_should_throttle_delayed_refs(trans,
4439 root)) {
4440 should_throttle = 1;
4441 }
4442 }
4360 } 4443 }
4361 4444
4362 if (found_type == BTRFS_INODE_ITEM_KEY) 4445 if (found_type == BTRFS_INODE_ITEM_KEY)
4363 break; 4446 break;
4364 4447
4365 if (path->slots[0] == 0 || 4448 if (path->slots[0] == 0 ||
4366 path->slots[0] != pending_del_slot) { 4449 path->slots[0] != pending_del_slot ||
4450 should_throttle || should_end) {
4367 if (pending_del_nr) { 4451 if (pending_del_nr) {
4368 ret = btrfs_del_items(trans, root, path, 4452 ret = btrfs_del_items(trans, root, path,
4369 pending_del_slot, 4453 pending_del_slot,
@@ -4376,6 +4460,23 @@ delete:
4376 pending_del_nr = 0; 4460 pending_del_nr = 0;
4377 } 4461 }
4378 btrfs_release_path(path); 4462 btrfs_release_path(path);
4463 if (should_throttle) {
4464 unsigned long updates = trans->delayed_ref_updates;
4465 if (updates) {
4466 trans->delayed_ref_updates = 0;
4467 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4468 if (ret && !err)
4469 err = ret;
4470 }
4471 }
4472 /*
4473 * if we failed to refill our space rsv, bail out
4474 * and let the transaction restart
4475 */
4476 if (should_end) {
4477 err = -EAGAIN;
4478 goto error;
4479 }
4379 goto search_again; 4480 goto search_again;
4380 } else { 4481 } else {
4381 path->slots[0]--; 4482 path->slots[0]--;
@@ -4392,7 +4493,18 @@ error:
4392 if (last_size != (u64)-1 && 4493 if (last_size != (u64)-1 &&
4393 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 4494 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4394 btrfs_ordered_update_i_size(inode, last_size, NULL); 4495 btrfs_ordered_update_i_size(inode, last_size, NULL);
4496
4395 btrfs_free_path(path); 4497 btrfs_free_path(path);
4498
4499 if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
4500 unsigned long updates = trans->delayed_ref_updates;
4501 if (updates) {
4502 trans->delayed_ref_updates = 0;
4503 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4504 if (ret && !err)
4505 err = ret;
4506 }
4507 }
4396 return err; 4508 return err;
4397} 4509}
4398 4510
@@ -4901,6 +5013,7 @@ void btrfs_evict_inode(struct inode *inode)
4901 struct btrfs_trans_handle *trans; 5013 struct btrfs_trans_handle *trans;
4902 struct btrfs_root *root = BTRFS_I(inode)->root; 5014 struct btrfs_root *root = BTRFS_I(inode)->root;
4903 struct btrfs_block_rsv *rsv, *global_rsv; 5015 struct btrfs_block_rsv *rsv, *global_rsv;
5016 int steal_from_global = 0;
4904 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 5017 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
4905 int ret; 5018 int ret;
4906 5019
@@ -4968,9 +5081,20 @@ void btrfs_evict_inode(struct inode *inode)
4968 * hard as possible to get this to work. 5081 * hard as possible to get this to work.
4969 */ 5082 */
4970 if (ret) 5083 if (ret)
4971 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); 5084 steal_from_global++;
5085 else
5086 steal_from_global = 0;
5087 ret = 0;
4972 5088
4973 if (ret) { 5089 /*
5090 * steal_from_global == 0: we reserved stuff, hooray!
5091 * steal_from_global == 1: we didn't reserve stuff, boo!
5092 * steal_from_global == 2: we've committed, still not a lot of
5093 * room but maybe we'll have room in the global reserve this
5094 * time.
5095 * steal_from_global == 3: abandon all hope!
5096 */
5097 if (steal_from_global > 2) {
4974 btrfs_warn(root->fs_info, 5098 btrfs_warn(root->fs_info,
4975 "Could not get space for a delete, will truncate on mount %d", 5099 "Could not get space for a delete, will truncate on mount %d",
4976 ret); 5100 ret);
@@ -4986,10 +5110,40 @@ void btrfs_evict_inode(struct inode *inode)
4986 goto no_delete; 5110 goto no_delete;
4987 } 5111 }
4988 5112
5113 /*
5114 * We can't just steal from the global reserve, we need tomake
5115 * sure there is room to do it, if not we need to commit and try
5116 * again.
5117 */
5118 if (steal_from_global) {
5119 if (!btrfs_check_space_for_delayed_refs(trans, root))
5120 ret = btrfs_block_rsv_migrate(global_rsv, rsv,
5121 min_size);
5122 else
5123 ret = -ENOSPC;
5124 }
5125
5126 /*
5127 * Couldn't steal from the global reserve, we have too much
5128 * pending stuff built up, commit the transaction and try it
5129 * again.
5130 */
5131 if (ret) {
5132 ret = btrfs_commit_transaction(trans, root);
5133 if (ret) {
5134 btrfs_orphan_del(NULL, inode);
5135 btrfs_free_block_rsv(root, rsv);
5136 goto no_delete;
5137 }
5138 continue;
5139 } else {
5140 steal_from_global = 0;
5141 }
5142
4989 trans->block_rsv = rsv; 5143 trans->block_rsv = rsv;
4990 5144
4991 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 5145 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
4992 if (ret != -ENOSPC) 5146 if (ret != -ENOSPC && ret != -EAGAIN)
4993 break; 5147 break;
4994 5148
4995 trans->block_rsv = &root->fs_info->trans_block_rsv; 5149 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -7213,7 +7367,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7213 u64 start = iblock << inode->i_blkbits; 7367 u64 start = iblock << inode->i_blkbits;
7214 u64 lockstart, lockend; 7368 u64 lockstart, lockend;
7215 u64 len = bh_result->b_size; 7369 u64 len = bh_result->b_size;
7216 u64 orig_len = len; 7370 u64 *outstanding_extents = NULL;
7217 int unlock_bits = EXTENT_LOCKED; 7371 int unlock_bits = EXTENT_LOCKED;
7218 int ret = 0; 7372 int ret = 0;
7219 7373
@@ -7225,6 +7379,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7225 lockstart = start; 7379 lockstart = start;
7226 lockend = start + len - 1; 7380 lockend = start + len - 1;
7227 7381
7382 if (current->journal_info) {
7383 /*
7384 * Need to pull our outstanding extents and set journal_info to NULL so
7385 * that anything that needs to check if there's a transction doesn't get
7386 * confused.
7387 */
7388 outstanding_extents = current->journal_info;
7389 current->journal_info = NULL;
7390 }
7391
7228 /* 7392 /*
7229 * If this errors out it's because we couldn't invalidate pagecache for 7393 * If this errors out it's because we couldn't invalidate pagecache for
7230 * this range and we need to fallback to buffered. 7394 * this range and we need to fallback to buffered.
@@ -7285,7 +7449,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7285 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && 7449 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7286 em->block_start != EXTENT_MAP_HOLE)) { 7450 em->block_start != EXTENT_MAP_HOLE)) {
7287 int type; 7451 int type;
7288 int ret;
7289 u64 block_start, orig_start, orig_block_len, ram_bytes; 7452 u64 block_start, orig_start, orig_block_len, ram_bytes;
7290 7453
7291 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 7454 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7349,11 +7512,20 @@ unlock:
7349 if (start + len > i_size_read(inode)) 7512 if (start + len > i_size_read(inode))
7350 i_size_write(inode, start + len); 7513 i_size_write(inode, start + len);
7351 7514
7352 if (len < orig_len) { 7515 /*
7516 * If we have an outstanding_extents count still set then we're
7517 * within our reservation, otherwise we need to adjust our inode
7518 * counter appropriately.
7519 */
7520 if (*outstanding_extents) {
7521 (*outstanding_extents)--;
7522 } else {
7353 spin_lock(&BTRFS_I(inode)->lock); 7523 spin_lock(&BTRFS_I(inode)->lock);
7354 BTRFS_I(inode)->outstanding_extents++; 7524 BTRFS_I(inode)->outstanding_extents++;
7355 spin_unlock(&BTRFS_I(inode)->lock); 7525 spin_unlock(&BTRFS_I(inode)->lock);
7356 } 7526 }
7527
7528 current->journal_info = outstanding_extents;
7357 btrfs_free_reserved_data_space(inode, len); 7529 btrfs_free_reserved_data_space(inode, len);
7358 } 7530 }
7359 7531
@@ -7377,6 +7549,8 @@ unlock:
7377unlock_err: 7549unlock_err:
7378 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7550 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7379 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7551 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7552 if (outstanding_extents)
7553 current->journal_info = outstanding_extents;
7380 return ret; 7554 return ret;
7381} 7555}
7382 7556
@@ -8038,7 +8212,7 @@ free_ordered:
8038 bio_endio(dio_bio, ret); 8212 bio_endio(dio_bio, ret);
8039} 8213}
8040 8214
8041static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, 8215static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
8042 const struct iov_iter *iter, loff_t offset) 8216 const struct iov_iter *iter, loff_t offset)
8043{ 8217{
8044 int seg; 8218 int seg;
@@ -8053,7 +8227,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
8053 goto out; 8227 goto out;
8054 8228
8055 /* If this is a write we don't need to check anymore */ 8229 /* If this is a write we don't need to check anymore */
8056 if (rw & WRITE) 8230 if (iov_iter_rw(iter) == WRITE)
8057 return 0; 8231 return 0;
8058 /* 8232 /*
8059 * Check to make sure we don't have duplicate iov_base's in this 8233 * Check to make sure we don't have duplicate iov_base's in this
@@ -8071,18 +8245,19 @@ out:
8071 return retval; 8245 return retval;
8072} 8246}
8073 8247
8074static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 8248static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8075 struct iov_iter *iter, loff_t offset) 8249 loff_t offset)
8076{ 8250{
8077 struct file *file = iocb->ki_filp; 8251 struct file *file = iocb->ki_filp;
8078 struct inode *inode = file->f_mapping->host; 8252 struct inode *inode = file->f_mapping->host;
8253 u64 outstanding_extents = 0;
8079 size_t count = 0; 8254 size_t count = 0;
8080 int flags = 0; 8255 int flags = 0;
8081 bool wakeup = true; 8256 bool wakeup = true;
8082 bool relock = false; 8257 bool relock = false;
8083 ssize_t ret; 8258 ssize_t ret;
8084 8259
8085 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) 8260 if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
8086 return 0; 8261 return 0;
8087 8262
8088 atomic_inc(&inode->i_dio_count); 8263 atomic_inc(&inode->i_dio_count);
@@ -8100,7 +8275,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8100 filemap_fdatawrite_range(inode->i_mapping, offset, 8275 filemap_fdatawrite_range(inode->i_mapping, offset,
8101 offset + count - 1); 8276 offset + count - 1);
8102 8277
8103 if (rw & WRITE) { 8278 if (iov_iter_rw(iter) == WRITE) {
8104 /* 8279 /*
8105 * If the write DIO is beyond the EOF, we need update 8280 * If the write DIO is beyond the EOF, we need update
8106 * the isize, but it is protected by i_mutex. So we can 8281 * the isize, but it is protected by i_mutex. So we can
@@ -8113,6 +8288,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8113 ret = btrfs_delalloc_reserve_space(inode, count); 8288 ret = btrfs_delalloc_reserve_space(inode, count);
8114 if (ret) 8289 if (ret)
8115 goto out; 8290 goto out;
8291 outstanding_extents = div64_u64(count +
8292 BTRFS_MAX_EXTENT_SIZE - 1,
8293 BTRFS_MAX_EXTENT_SIZE);
8294
8295 /*
8296 * We need to know how many extents we reserved so that we can
8297 * do the accounting properly if we go over the number we
8298 * originally calculated. Abuse current->journal_info for this.
8299 */
8300 current->journal_info = &outstanding_extents;
8116 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8301 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8117 &BTRFS_I(inode)->runtime_flags)) { 8302 &BTRFS_I(inode)->runtime_flags)) {
8118 inode_dio_done(inode); 8303 inode_dio_done(inode);
@@ -8120,11 +8305,12 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8120 wakeup = false; 8305 wakeup = false;
8121 } 8306 }
8122 8307
8123 ret = __blockdev_direct_IO(rw, iocb, inode, 8308 ret = __blockdev_direct_IO(iocb, inode,
8124 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 8309 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
8125 iter, offset, btrfs_get_blocks_direct, NULL, 8310 iter, offset, btrfs_get_blocks_direct, NULL,
8126 btrfs_submit_direct, flags); 8311 btrfs_submit_direct, flags);
8127 if (rw & WRITE) { 8312 if (iov_iter_rw(iter) == WRITE) {
8313 current->journal_info = NULL;
8128 if (ret < 0 && ret != -EIOCBQUEUED) 8314 if (ret < 0 && ret != -EIOCBQUEUED)
8129 btrfs_delalloc_release_space(inode, count); 8315 btrfs_delalloc_release_space(inode, count);
8130 else if (ret >= 0 && (size_t)ret < count) 8316 else if (ret >= 0 && (size_t)ret < count)
@@ -8526,7 +8712,7 @@ static int btrfs_truncate(struct inode *inode)
8526 ret = btrfs_truncate_inode_items(trans, root, inode, 8712 ret = btrfs_truncate_inode_items(trans, root, inode,
8527 inode->i_size, 8713 inode->i_size,
8528 BTRFS_EXTENT_DATA_KEY); 8714 BTRFS_EXTENT_DATA_KEY);
8529 if (ret != -ENOSPC) { 8715 if (ret != -ENOSPC && ret != -EAGAIN) {
8530 err = ret; 8716 err = ret;
8531 break; 8717 break;
8532 } 8718 }
@@ -9396,6 +9582,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9396 btrfs_end_transaction(trans, root); 9582 btrfs_end_transaction(trans, root);
9397 break; 9583 break;
9398 } 9584 }
9585
9399 btrfs_drop_extent_cache(inode, cur_offset, 9586 btrfs_drop_extent_cache(inode, cur_offset,
9400 cur_offset + ins.offset -1, 0); 9587 cur_offset + ins.offset -1, 0);
9401 9588
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 74609b931ba5..ca5d968f4c37 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -456,6 +456,13 @@ static noinline int create_subvol(struct inode *dir,
456 if (ret) 456 if (ret)
457 return ret; 457 return ret;
458 458
459 /*
460 * Don't create subvolume whose level is not zero. Or qgroup will be
461 * screwed up since it assume subvolme qgroup's level to be 0.
462 */
463 if (btrfs_qgroup_level(objectid))
464 return -ENOSPC;
465
459 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 466 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
460 /* 467 /*
461 * The same as the snapshot creation, please see the comment 468 * The same as the snapshot creation, please see the comment
@@ -1564,7 +1571,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1564 goto out_free; 1571 goto out_free;
1565 } 1572 }
1566 1573
1567 do_div(new_size, root->sectorsize); 1574 new_size = div_u64(new_size, root->sectorsize);
1568 new_size *= root->sectorsize; 1575 new_size *= root->sectorsize;
1569 1576
1570 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", 1577 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
@@ -2897,6 +2904,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
2897 if (src == dst) 2904 if (src == dst)
2898 return -EINVAL; 2905 return -EINVAL;
2899 2906
2907 if (len == 0)
2908 return 0;
2909
2900 btrfs_double_lock(src, loff, dst, dst_loff, len); 2910 btrfs_double_lock(src, loff, dst, dst_loff, len);
2901 2911
2902 ret = extent_same_check_offsets(src, loff, len); 2912 ret = extent_same_check_offsets(src, loff, len);
@@ -3039,7 +3049,7 @@ out:
3039static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3049static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3040 u64 disko) 3050 u64 disko)
3041{ 3051{
3042 struct seq_list tree_mod_seq_elem = {}; 3052 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
3043 struct ulist *roots; 3053 struct ulist *roots;
3044 struct ulist_iterator uiter; 3054 struct ulist_iterator uiter;
3045 struct ulist_node *root_node = NULL; 3055 struct ulist_node *root_node = NULL;
@@ -3202,6 +3212,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
3202 key.offset = off; 3212 key.offset = off;
3203 3213
3204 while (1) { 3214 while (1) {
3215 u64 next_key_min_offset = key.offset + 1;
3216
3205 /* 3217 /*
3206 * note the key will change type as we walk through the 3218 * note the key will change type as we walk through the
3207 * tree. 3219 * tree.
@@ -3282,7 +3294,7 @@ process_slot:
3282 } else if (key.offset >= off + len) { 3294 } else if (key.offset >= off + len) {
3283 break; 3295 break;
3284 } 3296 }
3285 3297 next_key_min_offset = key.offset + datal;
3286 size = btrfs_item_size_nr(leaf, slot); 3298 size = btrfs_item_size_nr(leaf, slot);
3287 read_extent_buffer(leaf, buf, 3299 read_extent_buffer(leaf, buf,
3288 btrfs_item_ptr_offset(leaf, slot), 3300 btrfs_item_ptr_offset(leaf, slot),
@@ -3497,7 +3509,7 @@ process_slot:
3497 break; 3509 break;
3498 } 3510 }
3499 btrfs_release_path(path); 3511 btrfs_release_path(path);
3500 key.offset++; 3512 key.offset = next_key_min_offset;
3501 } 3513 }
3502 ret = 0; 3514 ret = 0;
3503 3515
@@ -3626,6 +3638,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3626 if (off + len == src->i_size) 3638 if (off + len == src->i_size)
3627 len = ALIGN(src->i_size, bs) - off; 3639 len = ALIGN(src->i_size, bs) - off;
3628 3640
3641 if (len == 0) {
3642 ret = 0;
3643 goto out_unlock;
3644 }
3645
3629 /* verify the end result is block aligned */ 3646 /* verify the end result is block aligned */
3630 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3647 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
3631 !IS_ALIGNED(destoff, bs)) 3648 !IS_ALIGNED(destoff, bs))
@@ -4624,6 +4641,11 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
4624 sa->src, sa->dst); 4641 sa->src, sa->dst);
4625 } 4642 }
4626 4643
4644 /* update qgroup status and info */
4645 err = btrfs_run_qgroups(trans, root->fs_info);
4646 if (err < 0)
4647 btrfs_error(root->fs_info, ret,
4648 "failed to update qgroup status and info\n");
4627 err = btrfs_end_transaction(trans, root); 4649 err = btrfs_end_transaction(trans, root);
4628 if (err && !ret) 4650 if (err && !ret)
4629 ret = err; 4651 ret = err;
@@ -4669,8 +4691,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
4669 4691
4670 /* FIXME: check if the IDs really exist */ 4692 /* FIXME: check if the IDs really exist */
4671 if (sa->create) { 4693 if (sa->create) {
4672 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, 4694 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid);
4673 NULL);
4674 } else { 4695 } else {
4675 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); 4696 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
4676 } 4697 }
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 617553cdb7d3..a2f051347731 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -434,7 +434,7 @@ out:
434 return ret; 434 return ret;
435} 435}
436 436
437struct btrfs_compress_op btrfs_lzo_compress = { 437const struct btrfs_compress_op btrfs_lzo_compress = {
438 .alloc_workspace = lzo_alloc_workspace, 438 .alloc_workspace = lzo_alloc_workspace,
439 .free_workspace = lzo_free_workspace, 439 .free_workspace = lzo_free_workspace,
440 .compress_pages = lzo_compress_pages, 440 .compress_pages = lzo_compress_pages,
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h
index b7816cefbd13..1b10a3cd1195 100644
--- a/fs/btrfs/math.h
+++ b/fs/btrfs/math.h
@@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor)
28 if (factor == 10) 28 if (factor == 10)
29 return num; 29 return num;
30 num *= factor; 30 num *= factor;
31 do_div(num, 10); 31 return div_u64(num, 10);
32 return num;
33} 32}
34 33
35static inline u64 div_factor_fine(u64 num, int factor) 34static inline u64 div_factor_fine(u64 num, int factor)
@@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor)
37 if (factor == 100) 36 if (factor == 100)
38 return num; 37 return num;
39 num *= factor; 38 num *= factor;
40 do_div(num, 100); 39 return div_u64(num, 100);
41 return num;
42} 40}
43 41
44#endif 42#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
452 continue; 452 continue;
453 if (entry_end(ordered) <= start) 453 if (entry_end(ordered) <= start)
454 break; 454 break;
455 if (!list_empty(&ordered->log_list)) 455 if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
456 continue;
457 if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
458 continue; 456 continue;
459 list_add(&ordered->log_list, logged_list); 457 list_add(&ordered->log_list, logged_list);
460 atomic_inc(&ordered->refs); 458 atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
511 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 509 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
512 &ordered->flags)); 510 &ordered->flags));
513 511
514 if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 512 list_add_tail(&ordered->trans_list, &trans->ordered);
515 list_add_tail(&ordered->trans_list, &trans->ordered);
516 spin_lock_irq(&log->log_extents_lock[index]); 513 spin_lock_irq(&log->log_extents_lock[index]);
517 } 514 }
518 spin_unlock_irq(&log->log_extents_lock[index]); 515 spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 129b1dd28527..dca137b04095 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -425,3 +425,5 @@ static const char *prop_compression_extract(struct inode *inode)
425 425
426 return NULL; 426 return NULL;
427} 427}
428
429
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..3d6546581bb9 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -644,9 +644,8 @@ out:
644} 644}
645 645
646static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 646static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root, u64 qgroupid, 647 struct btrfs_root *root,
648 u64 flags, u64 max_rfer, u64 max_excl, 648 struct btrfs_qgroup *qgroup)
649 u64 rsv_rfer, u64 rsv_excl)
650{ 649{
651 struct btrfs_path *path; 650 struct btrfs_path *path;
652 struct btrfs_key key; 651 struct btrfs_key key;
@@ -657,7 +656,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
657 656
658 key.objectid = 0; 657 key.objectid = 0;
659 key.type = BTRFS_QGROUP_LIMIT_KEY; 658 key.type = BTRFS_QGROUP_LIMIT_KEY;
660 key.offset = qgroupid; 659 key.offset = qgroup->qgroupid;
661 660
662 path = btrfs_alloc_path(); 661 path = btrfs_alloc_path();
663 if (!path) 662 if (!path)
@@ -673,11 +672,11 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
673 l = path->nodes[0]; 672 l = path->nodes[0];
674 slot = path->slots[0]; 673 slot = path->slots[0];
675 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 674 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
676 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 675 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
677 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 676 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
678 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 677 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
679 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); 678 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
680 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); 679 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
681 680
682 btrfs_mark_buffer_dirty(l); 681 btrfs_mark_buffer_dirty(l);
683 682
@@ -967,6 +966,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
967 fs_info->pending_quota_state = 0; 966 fs_info->pending_quota_state = 0;
968 quota_root = fs_info->quota_root; 967 quota_root = fs_info->quota_root;
969 fs_info->quota_root = NULL; 968 fs_info->quota_root = NULL;
969 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
970 spin_unlock(&fs_info->qgroup_lock); 970 spin_unlock(&fs_info->qgroup_lock);
971 971
972 btrfs_free_qgroup_config(fs_info); 972 btrfs_free_qgroup_config(fs_info);
@@ -982,7 +982,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
982 list_del(&quota_root->dirty_list); 982 list_del(&quota_root->dirty_list);
983 983
984 btrfs_tree_lock(quota_root->node); 984 btrfs_tree_lock(quota_root->node);
985 clean_tree_block(trans, tree_root, quota_root->node); 985 clean_tree_block(trans, tree_root->fs_info, quota_root->node);
986 btrfs_tree_unlock(quota_root->node); 986 btrfs_tree_unlock(quota_root->node);
987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
988 988
@@ -1001,6 +1001,110 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1002} 1002}
1003 1003
1004/*
1005 * The easy accounting, if we are adding/removing the only ref for an extent
1006 * then this qgroup and all of the parent qgroups get their refrence and
1007 * exclusive counts adjusted.
1008 *
1009 * Caller should hold fs_info->qgroup_lock.
1010 */
1011static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1012 struct ulist *tmp, u64 ref_root,
1013 u64 num_bytes, int sign)
1014{
1015 struct btrfs_qgroup *qgroup;
1016 struct btrfs_qgroup_list *glist;
1017 struct ulist_node *unode;
1018 struct ulist_iterator uiter;
1019 int ret = 0;
1020
1021 qgroup = find_qgroup_rb(fs_info, ref_root);
1022 if (!qgroup)
1023 goto out;
1024
1025 qgroup->rfer += sign * num_bytes;
1026 qgroup->rfer_cmpr += sign * num_bytes;
1027
1028 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1029 qgroup->excl += sign * num_bytes;
1030 qgroup->excl_cmpr += sign * num_bytes;
1031 if (sign > 0)
1032 qgroup->reserved -= num_bytes;
1033
1034 qgroup_dirty(fs_info, qgroup);
1035
1036 /* Get all of the parent groups that contain this qgroup */
1037 list_for_each_entry(glist, &qgroup->groups, next_group) {
1038 ret = ulist_add(tmp, glist->group->qgroupid,
1039 ptr_to_u64(glist->group), GFP_ATOMIC);
1040 if (ret < 0)
1041 goto out;
1042 }
1043
1044 /* Iterate all of the parents and adjust their reference counts */
1045 ULIST_ITER_INIT(&uiter);
1046 while ((unode = ulist_next(tmp, &uiter))) {
1047 qgroup = u64_to_ptr(unode->aux);
1048 qgroup->rfer += sign * num_bytes;
1049 qgroup->rfer_cmpr += sign * num_bytes;
1050 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1051 qgroup->excl += sign * num_bytes;
1052 if (sign > 0)
1053 qgroup->reserved -= num_bytes;
1054 qgroup->excl_cmpr += sign * num_bytes;
1055 qgroup_dirty(fs_info, qgroup);
1056
1057 /* Add any parents of the parents */
1058 list_for_each_entry(glist, &qgroup->groups, next_group) {
1059 ret = ulist_add(tmp, glist->group->qgroupid,
1060 ptr_to_u64(glist->group), GFP_ATOMIC);
1061 if (ret < 0)
1062 goto out;
1063 }
1064 }
1065 ret = 0;
1066out:
1067 return ret;
1068}
1069
1070
1071/*
1072 * Quick path for updating qgroup with only excl refs.
1073 *
1074 * In that case, just update all parent will be enough.
1075 * Or we needs to do a full rescan.
1076 * Caller should also hold fs_info->qgroup_lock.
1077 *
1078 * Return 0 for quick update, return >0 for need to full rescan
1079 * and mark INCONSISTENT flag.
1080 * Return < 0 for other error.
1081 */
1082static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1083 struct ulist *tmp, u64 src, u64 dst,
1084 int sign)
1085{
1086 struct btrfs_qgroup *qgroup;
1087 int ret = 1;
1088 int err = 0;
1089
1090 qgroup = find_qgroup_rb(fs_info, src);
1091 if (!qgroup)
1092 goto out;
1093 if (qgroup->excl == qgroup->rfer) {
1094 ret = 0;
1095 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1096 qgroup->excl, sign);
1097 if (err < 0) {
1098 ret = err;
1099 goto out;
1100 }
1101 }
1102out:
1103 if (ret)
1104 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1105 return ret;
1106}
1107
1004int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1108int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1005 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1109 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1006{ 1110{
@@ -1008,8 +1112,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1008 struct btrfs_qgroup *parent; 1112 struct btrfs_qgroup *parent;
1009 struct btrfs_qgroup *member; 1113 struct btrfs_qgroup *member;
1010 struct btrfs_qgroup_list *list; 1114 struct btrfs_qgroup_list *list;
1115 struct ulist *tmp;
1011 int ret = 0; 1116 int ret = 0;
1012 1117
1118 tmp = ulist_alloc(GFP_NOFS);
1119 if (!tmp)
1120 return -ENOMEM;
1121
1122 /* Check the level of src and dst first */
1123 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1124 return -EINVAL;
1125
1013 mutex_lock(&fs_info->qgroup_ioctl_lock); 1126 mutex_lock(&fs_info->qgroup_ioctl_lock);
1014 quota_root = fs_info->quota_root; 1127 quota_root = fs_info->quota_root;
1015 if (!quota_root) { 1128 if (!quota_root) {
@@ -1043,23 +1156,33 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1043 1156
1044 spin_lock(&fs_info->qgroup_lock); 1157 spin_lock(&fs_info->qgroup_lock);
1045 ret = add_relation_rb(quota_root->fs_info, src, dst); 1158 ret = add_relation_rb(quota_root->fs_info, src, dst);
1159 if (ret < 0) {
1160 spin_unlock(&fs_info->qgroup_lock);
1161 goto out;
1162 }
1163 ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1046 spin_unlock(&fs_info->qgroup_lock); 1164 spin_unlock(&fs_info->qgroup_lock);
1047out: 1165out:
1048 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1166 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1167 ulist_free(tmp);
1049 return ret; 1168 return ret;
1050} 1169}
1051 1170
1052int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1171int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1053 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1172 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1054{ 1173{
1055 struct btrfs_root *quota_root; 1174 struct btrfs_root *quota_root;
1056 struct btrfs_qgroup *parent; 1175 struct btrfs_qgroup *parent;
1057 struct btrfs_qgroup *member; 1176 struct btrfs_qgroup *member;
1058 struct btrfs_qgroup_list *list; 1177 struct btrfs_qgroup_list *list;
1178 struct ulist *tmp;
1059 int ret = 0; 1179 int ret = 0;
1060 int err; 1180 int err;
1061 1181
1062 mutex_lock(&fs_info->qgroup_ioctl_lock); 1182 tmp = ulist_alloc(GFP_NOFS);
1183 if (!tmp)
1184 return -ENOMEM;
1185
1063 quota_root = fs_info->quota_root; 1186 quota_root = fs_info->quota_root;
1064 if (!quota_root) { 1187 if (!quota_root) {
1065 ret = -EINVAL; 1188 ret = -EINVAL;
@@ -1088,14 +1211,27 @@ exist:
1088 1211
1089 spin_lock(&fs_info->qgroup_lock); 1212 spin_lock(&fs_info->qgroup_lock);
1090 del_relation_rb(fs_info, src, dst); 1213 del_relation_rb(fs_info, src, dst);
1214 ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1091 spin_unlock(&fs_info->qgroup_lock); 1215 spin_unlock(&fs_info->qgroup_lock);
1092out: 1216out:
1217 ulist_free(tmp);
1218 return ret;
1219}
1220
1221int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1222 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1223{
1224 int ret = 0;
1225
1226 mutex_lock(&fs_info->qgroup_ioctl_lock);
1227 ret = __del_qgroup_relation(trans, fs_info, src, dst);
1093 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1228 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1229
1094 return ret; 1230 return ret;
1095} 1231}
1096 1232
1097int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1233int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1098 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) 1234 struct btrfs_fs_info *fs_info, u64 qgroupid)
1099{ 1235{
1100 struct btrfs_root *quota_root; 1236 struct btrfs_root *quota_root;
1101 struct btrfs_qgroup *qgroup; 1237 struct btrfs_qgroup *qgroup;
@@ -1133,6 +1269,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1133{ 1269{
1134 struct btrfs_root *quota_root; 1270 struct btrfs_root *quota_root;
1135 struct btrfs_qgroup *qgroup; 1271 struct btrfs_qgroup *qgroup;
1272 struct btrfs_qgroup_list *list;
1136 int ret = 0; 1273 int ret = 0;
1137 1274
1138 mutex_lock(&fs_info->qgroup_ioctl_lock); 1275 mutex_lock(&fs_info->qgroup_ioctl_lock);
@@ -1147,15 +1284,24 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1147 ret = -ENOENT; 1284 ret = -ENOENT;
1148 goto out; 1285 goto out;
1149 } else { 1286 } else {
1150 /* check if there are no relations to this qgroup */ 1287 /* check if there are no children of this qgroup */
1151 if (!list_empty(&qgroup->groups) || 1288 if (!list_empty(&qgroup->members)) {
1152 !list_empty(&qgroup->members)) {
1153 ret = -EBUSY; 1289 ret = -EBUSY;
1154 goto out; 1290 goto out;
1155 } 1291 }
1156 } 1292 }
1157 ret = del_qgroup_item(trans, quota_root, qgroupid); 1293 ret = del_qgroup_item(trans, quota_root, qgroupid);
1158 1294
1295 while (!list_empty(&qgroup->groups)) {
1296 list = list_first_entry(&qgroup->groups,
1297 struct btrfs_qgroup_list, next_group);
1298 ret = __del_qgroup_relation(trans, fs_info,
1299 qgroupid,
1300 list->group->qgroupid);
1301 if (ret)
1302 goto out;
1303 }
1304
1159 spin_lock(&fs_info->qgroup_lock); 1305 spin_lock(&fs_info->qgroup_lock);
1160 del_qgroup_rb(quota_root->fs_info, qgroupid); 1306 del_qgroup_rb(quota_root->fs_info, qgroupid);
1161 spin_unlock(&fs_info->qgroup_lock); 1307 spin_unlock(&fs_info->qgroup_lock);
@@ -1184,23 +1330,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1184 ret = -ENOENT; 1330 ret = -ENOENT;
1185 goto out; 1331 goto out;
1186 } 1332 }
1187 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1333
1188 limit->flags, limit->max_rfer, 1334 spin_lock(&fs_info->qgroup_lock);
1189 limit->max_excl, limit->rsv_rfer, 1335 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
1190 limit->rsv_excl); 1336 qgroup->max_rfer = limit->max_rfer;
1337 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
1338 qgroup->max_excl = limit->max_excl;
1339 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
1340 qgroup->rsv_rfer = limit->rsv_rfer;
1341 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
1342 qgroup->rsv_excl = limit->rsv_excl;
1343 qgroup->lim_flags |= limit->flags;
1344
1345 spin_unlock(&fs_info->qgroup_lock);
1346
1347 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1191 if (ret) { 1348 if (ret) {
1192 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1349 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1193 btrfs_info(fs_info, "unable to update quota limit for %llu", 1350 btrfs_info(fs_info, "unable to update quota limit for %llu",
1194 qgroupid); 1351 qgroupid);
1195 } 1352 }
1196 1353
1197 spin_lock(&fs_info->qgroup_lock);
1198 qgroup->lim_flags = limit->flags;
1199 qgroup->max_rfer = limit->max_rfer;
1200 qgroup->max_excl = limit->max_excl;
1201 qgroup->rsv_rfer = limit->rsv_rfer;
1202 qgroup->rsv_excl = limit->rsv_excl;
1203 spin_unlock(&fs_info->qgroup_lock);
1204out: 1354out:
1205 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1355 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1206 return ret; 1356 return ret;
@@ -1256,14 +1406,14 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1256 return -1; 1406 return -1;
1257 if (oper1->bytenr > oper2->bytenr) 1407 if (oper1->bytenr > oper2->bytenr)
1258 return 1; 1408 return 1;
1259 if (oper1->seq < oper2->seq)
1260 return -1;
1261 if (oper1->seq > oper2->seq)
1262 return -1;
1263 if (oper1->ref_root < oper2->ref_root) 1409 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1410 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1411 if (oper1->ref_root > oper2->ref_root)
1266 return 1; 1412 return 1;
1413 if (oper1->seq < oper2->seq)
1414 return -1;
1415 if (oper1->seq > oper2->seq)
1416 return 1;
1267 if (oper1->type < oper2->type) 1417 if (oper1->type < oper2->type)
1268 return -1; 1418 return -1;
1269 if (oper1->type > oper2->type) 1419 if (oper1->type > oper2->type)
@@ -1372,19 +1522,10 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1372 return 0; 1522 return 0;
1373} 1523}
1374 1524
1375/*
1376 * The easy accounting, if we are adding/removing the only ref for an extent
1377 * then this qgroup and all of the parent qgroups get their refrence and
1378 * exclusive counts adjusted.
1379 */
1380static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1525static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1381 struct btrfs_qgroup_operation *oper) 1526 struct btrfs_qgroup_operation *oper)
1382{ 1527{
1383 struct btrfs_qgroup *qgroup;
1384 struct ulist *tmp; 1528 struct ulist *tmp;
1385 struct btrfs_qgroup_list *glist;
1386 struct ulist_node *unode;
1387 struct ulist_iterator uiter;
1388 int sign = 0; 1529 int sign = 0;
1389 int ret = 0; 1530 int ret = 0;
1390 1531
@@ -1395,9 +1536,7 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1395 spin_lock(&fs_info->qgroup_lock); 1536 spin_lock(&fs_info->qgroup_lock);
1396 if (!fs_info->quota_root) 1537 if (!fs_info->quota_root)
1397 goto out; 1538 goto out;
1398 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1539
1399 if (!qgroup)
1400 goto out;
1401 switch (oper->type) { 1540 switch (oper->type) {
1402 case BTRFS_QGROUP_OPER_ADD_EXCL: 1541 case BTRFS_QGROUP_OPER_ADD_EXCL:
1403 sign = 1; 1542 sign = 1;
@@ -1408,43 +1547,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1408 default: 1547 default:
1409 ASSERT(0); 1548 ASSERT(0);
1410 } 1549 }
1411 qgroup->rfer += sign * oper->num_bytes; 1550 ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
1412 qgroup->rfer_cmpr += sign * oper->num_bytes; 1551 oper->num_bytes, sign);
1413
1414 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1415 qgroup->excl += sign * oper->num_bytes;
1416 qgroup->excl_cmpr += sign * oper->num_bytes;
1417
1418 qgroup_dirty(fs_info, qgroup);
1419
1420 /* Get all of the parent groups that contain this qgroup */
1421 list_for_each_entry(glist, &qgroup->groups, next_group) {
1422 ret = ulist_add(tmp, glist->group->qgroupid,
1423 ptr_to_u64(glist->group), GFP_ATOMIC);
1424 if (ret < 0)
1425 goto out;
1426 }
1427
1428 /* Iterate all of the parents and adjust their reference counts */
1429 ULIST_ITER_INIT(&uiter);
1430 while ((unode = ulist_next(tmp, &uiter))) {
1431 qgroup = u64_to_ptr(unode->aux);
1432 qgroup->rfer += sign * oper->num_bytes;
1433 qgroup->rfer_cmpr += sign * oper->num_bytes;
1434 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1435 qgroup->excl += sign * oper->num_bytes;
1436 qgroup->excl_cmpr += sign * oper->num_bytes;
1437 qgroup_dirty(fs_info, qgroup);
1438
1439 /* Add any parents of the parents */
1440 list_for_each_entry(glist, &qgroup->groups, next_group) {
1441 ret = ulist_add(tmp, glist->group->qgroupid,
1442 ptr_to_u64(glist->group), GFP_ATOMIC);
1443 if (ret < 0)
1444 goto out;
1445 }
1446 }
1447 ret = 0;
1448out: 1552out:
1449 spin_unlock(&fs_info->qgroup_lock); 1553 spin_unlock(&fs_info->qgroup_lock);
1450 ulist_free(tmp); 1554 ulist_free(tmp);
@@ -1845,7 +1949,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1845 struct ulist *roots = NULL; 1949 struct ulist *roots = NULL;
1846 struct ulist *qgroups, *tmp; 1950 struct ulist *qgroups, *tmp;
1847 struct btrfs_qgroup *qgroup; 1951 struct btrfs_qgroup *qgroup;
1848 struct seq_list elem = {}; 1952 struct seq_list elem = SEQ_LIST_INIT(elem);
1849 u64 seq; 1953 u64 seq;
1850 int old_roots = 0; 1954 int old_roots = 0;
1851 int new_roots = 0; 1955 int new_roots = 0;
@@ -1967,7 +2071,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1967 int err; 2071 int err;
1968 struct btrfs_qgroup *qg; 2072 struct btrfs_qgroup *qg;
1969 u64 root_obj = 0; 2073 u64 root_obj = 0;
1970 struct seq_list elem = {}; 2074 struct seq_list elem = SEQ_LIST_INIT(elem);
1971 2075
1972 parents = ulist_alloc(GFP_NOFS); 2076 parents = ulist_alloc(GFP_NOFS);
1973 if (!parents) 2077 if (!parents)
@@ -2156,6 +2260,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2156 if (ret) 2260 if (ret)
2157 fs_info->qgroup_flags |= 2261 fs_info->qgroup_flags |=
2158 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2262 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2263 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2264 if (ret)
2265 fs_info->qgroup_flags |=
2266 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2159 spin_lock(&fs_info->qgroup_lock); 2267 spin_lock(&fs_info->qgroup_lock);
2160 } 2268 }
2161 if (fs_info->quota_enabled) 2269 if (fs_info->quota_enabled)
@@ -2219,6 +2327,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2219 ret = -EINVAL; 2327 ret = -EINVAL;
2220 goto out; 2328 goto out;
2221 } 2329 }
2330
2331 if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
2332 ret = -EINVAL;
2333 goto out;
2334 }
2222 ++i_qgroups; 2335 ++i_qgroups;
2223 } 2336 }
2224 } 2337 }
@@ -2230,17 +2343,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2230 if (ret) 2343 if (ret)
2231 goto out; 2344 goto out;
2232 2345
2233 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2234 ret = update_qgroup_limit_item(trans, quota_root, objectid,
2235 inherit->lim.flags,
2236 inherit->lim.max_rfer,
2237 inherit->lim.max_excl,
2238 inherit->lim.rsv_rfer,
2239 inherit->lim.rsv_excl);
2240 if (ret)
2241 goto out;
2242 }
2243
2244 if (srcid) { 2346 if (srcid) {
2245 struct btrfs_root *srcroot; 2347 struct btrfs_root *srcroot;
2246 struct btrfs_key srckey; 2348 struct btrfs_key srckey;
@@ -2286,6 +2388,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2286 goto unlock; 2388 goto unlock;
2287 } 2389 }
2288 2390
2391 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2392 dstgroup->lim_flags = inherit->lim.flags;
2393 dstgroup->max_rfer = inherit->lim.max_rfer;
2394 dstgroup->max_excl = inherit->lim.max_excl;
2395 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2396 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2397
2398 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2399 if (ret) {
2400 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2401 btrfs_info(fs_info, "unable to update quota limit for %llu",
2402 dstgroup->qgroupid);
2403 goto unlock;
2404 }
2405 }
2406
2289 if (srcid) { 2407 if (srcid) {
2290 srcgroup = find_qgroup_rb(fs_info, srcid); 2408 srcgroup = find_qgroup_rb(fs_info, srcid);
2291 if (!srcgroup) 2409 if (!srcgroup)
@@ -2302,6 +2420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2302 dstgroup->excl_cmpr = level_size; 2420 dstgroup->excl_cmpr = level_size;
2303 srcgroup->excl = level_size; 2421 srcgroup->excl = level_size;
2304 srcgroup->excl_cmpr = level_size; 2422 srcgroup->excl_cmpr = level_size;
2423
2424 /* inherit the limit info */
2425 dstgroup->lim_flags = srcgroup->lim_flags;
2426 dstgroup->max_rfer = srcgroup->max_rfer;
2427 dstgroup->max_excl = srcgroup->max_excl;
2428 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2429 dstgroup->rsv_excl = srcgroup->rsv_excl;
2430
2305 qgroup_dirty(fs_info, dstgroup); 2431 qgroup_dirty(fs_info, dstgroup);
2306 qgroup_dirty(fs_info, srcgroup); 2432 qgroup_dirty(fs_info, srcgroup);
2307 } 2433 }
@@ -2358,12 +2484,6 @@ out:
2358 return ret; 2484 return ret;
2359} 2485}
2360 2486
2361/*
2362 * reserve some space for a qgroup and all its parents. The reservation takes
2363 * place with start_transaction or dealloc_reserve, similar to ENOSPC
2364 * accounting. If not enough space is available, EDQUOT is returned.
2365 * We assume that the requested space is new for all qgroups.
2366 */
2367int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2487int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2368{ 2488{
2369 struct btrfs_root *quota_root; 2489 struct btrfs_root *quota_root;
@@ -2513,7 +2633,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2513 2633
2514/* 2634/*
2515 * returns < 0 on error, 0 when more leafs are to be scanned. 2635 * returns < 0 on error, 0 when more leafs are to be scanned.
2516 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. 2636 * returns 1 when done.
2517 */ 2637 */
2518static int 2638static int
2519qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2639qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
@@ -2522,7 +2642,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2522{ 2642{
2523 struct btrfs_key found; 2643 struct btrfs_key found;
2524 struct ulist *roots = NULL; 2644 struct ulist *roots = NULL;
2525 struct seq_list tree_mod_seq_elem = {}; 2645 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2526 u64 num_bytes; 2646 u64 num_bytes;
2527 u64 seq; 2647 u64 seq;
2528 int new_roots; 2648 int new_roots;
@@ -2618,6 +2738,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2618 struct ulist *tmp = NULL, *qgroups = NULL; 2738 struct ulist *tmp = NULL, *qgroups = NULL;
2619 struct extent_buffer *scratch_leaf = NULL; 2739 struct extent_buffer *scratch_leaf = NULL;
2620 int err = -ENOMEM; 2740 int err = -ENOMEM;
2741 int ret = 0;
2621 2742
2622 path = btrfs_alloc_path(); 2743 path = btrfs_alloc_path();
2623 if (!path) 2744 if (!path)
@@ -2660,7 +2781,7 @@ out:
2660 mutex_lock(&fs_info->qgroup_rescan_lock); 2781 mutex_lock(&fs_info->qgroup_rescan_lock);
2661 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2782 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2662 2783
2663 if (err == 2 && 2784 if (err > 0 &&
2664 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2785 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2665 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2786 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2666 } else if (err < 0) { 2787 } else if (err < 0) {
@@ -2668,13 +2789,33 @@ out:
2668 } 2789 }
2669 mutex_unlock(&fs_info->qgroup_rescan_lock); 2790 mutex_unlock(&fs_info->qgroup_rescan_lock);
2670 2791
2792 /*
2793 * only update status, since the previous part has alreay updated the
2794 * qgroup info.
2795 */
2796 trans = btrfs_start_transaction(fs_info->quota_root, 1);
2797 if (IS_ERR(trans)) {
2798 err = PTR_ERR(trans);
2799 btrfs_err(fs_info,
2800 "fail to start transaction for status update: %d\n",
2801 err);
2802 goto done;
2803 }
2804 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2805 if (ret < 0) {
2806 err = ret;
2807 btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
2808 }
2809 btrfs_end_transaction(trans, fs_info->quota_root);
2810
2671 if (err >= 0) { 2811 if (err >= 0) {
2672 btrfs_info(fs_info, "qgroup scan completed%s", 2812 btrfs_info(fs_info, "qgroup scan completed%s",
2673 err == 2 ? " (inconsistency flag cleared)" : ""); 2813 err > 0 ? " (inconsistency flag cleared)" : "");
2674 } else { 2814 } else {
2675 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2815 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2676 } 2816 }
2677 2817
2818done:
2678 complete_all(&fs_info->qgroup_rescan_completion); 2819 complete_all(&fs_info->qgroup_rescan_completion);
2679} 2820}
2680 2821
@@ -2709,7 +2850,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2709 mutex_unlock(&fs_info->qgroup_rescan_lock); 2850 mutex_unlock(&fs_info->qgroup_rescan_lock);
2710 goto err; 2851 goto err;
2711 } 2852 }
2712
2713 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2853 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2714 } 2854 }
2715 2855
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 18cc68ca3090..c5242aa9a4b2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -70,8 +70,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
70int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 70int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
71 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 71 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
72int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 72int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
73 struct btrfs_fs_info *fs_info, u64 qgroupid, 73 struct btrfs_fs_info *fs_info, u64 qgroupid);
74 char *name);
75int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 74int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
76 struct btrfs_fs_info *fs_info, u64 qgroupid); 75 struct btrfs_fs_info *fs_info, u64 qgroupid);
77int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 76int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5264858ed768..fa72068bd256 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -237,12 +237,8 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
237 } 237 }
238 238
239 x = cmpxchg(&info->stripe_hash_table, NULL, table); 239 x = cmpxchg(&info->stripe_hash_table, NULL, table);
240 if (x) { 240 if (x)
241 if (is_vmalloc_addr(x)) 241 kvfree(x);
242 vfree(x);
243 else
244 kfree(x);
245 }
246 return 0; 242 return 0;
247} 243}
248 244
@@ -453,10 +449,7 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
453 if (!info->stripe_hash_table) 449 if (!info->stripe_hash_table)
454 return; 450 return;
455 btrfs_clear_rbio_cache(info); 451 btrfs_clear_rbio_cache(info);
456 if (is_vmalloc_addr(info->stripe_hash_table)) 452 kvfree(info->stripe_hash_table);
457 vfree(info->stripe_hash_table);
458 else
459 kfree(info->stripe_hash_table);
460 info->stripe_hash_table = NULL; 453 info->stripe_hash_table = NULL;
461} 454}
462 455
@@ -1807,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1807 int err; 1800 int err;
1808 int i; 1801 int i;
1809 1802
1810 pointers = kzalloc(rbio->real_stripes * sizeof(void *), 1803 pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
1811 GFP_NOFS);
1812 if (!pointers) { 1804 if (!pointers) {
1813 err = -ENOMEM; 1805 err = -ENOMEM;
1814 goto cleanup_io; 1806 goto cleanup_io;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d83085381bcc..74b24b01d574 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
3027 mutex_lock(&inode->i_mutex); 3027 mutex_lock(&inode->i_mutex);
3028 3028
3029 ret = btrfs_check_data_free_space(inode, cluster->end + 3029 ret = btrfs_check_data_free_space(inode, cluster->end +
3030 1 - cluster->start); 3030 1 - cluster->start, 0);
3031 if (ret) 3031 if (ret)
3032 goto out; 3032 goto out;
3033 3033
@@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc,
3430} 3430}
3431 3431
3432static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3432static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3433 struct inode *inode, u64 ino) 3433 struct btrfs_block_group_cache *block_group,
3434 struct inode *inode,
3435 u64 ino)
3434{ 3436{
3435 struct btrfs_key key; 3437 struct btrfs_key key;
3436 struct btrfs_root *root = fs_info->tree_root; 3438 struct btrfs_root *root = fs_info->tree_root;
@@ -3463,7 +3465,7 @@ truncate:
3463 goto out; 3465 goto out;
3464 } 3466 }
3465 3467
3466 ret = btrfs_truncate_free_space_cache(root, trans, inode); 3468 ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
3467 3469
3468 btrfs_end_transaction(trans, root); 3470 btrfs_end_transaction(trans, root);
3469 btrfs_btree_balance_dirty(root); 3471 btrfs_btree_balance_dirty(root);
@@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc,
3509 */ 3511 */
3510 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3512 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3511 ret = delete_block_group_cache(rc->extent_root->fs_info, 3513 ret = delete_block_group_cache(rc->extent_root->fs_info,
3514 rc->block_group,
3512 NULL, ref_objectid); 3515 NULL, ref_objectid);
3513 if (ret != -ENOENT) 3516 if (ret != -ENOENT)
3514 return ret; 3517 return ret;
@@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4223 btrfs_free_path(path); 4226 btrfs_free_path(path);
4224 4227
4225 if (!IS_ERR(inode)) 4228 if (!IS_ERR(inode))
4226 ret = delete_block_group_cache(fs_info, inode, 0); 4229 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
4227 else 4230 else
4228 ret = PTR_ERR(inode); 4231 ret = PTR_ERR(inode);
4229 4232
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ec57687c9a4d..ab5811545a98 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
964 * the statistics. 964 * the statistics.
965 */ 965 */
966 966
967 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS * 967 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
968 sizeof(*sblocks_for_recheck), 968 sizeof(*sblocks_for_recheck), GFP_NOFS);
969 GFP_NOFS);
970 if (!sblocks_for_recheck) { 969 if (!sblocks_for_recheck) {
971 spin_lock(&sctx->stat_lock); 970 spin_lock(&sctx->stat_lock);
972 sctx->stat.malloc_errors++; 971 sctx->stat.malloc_errors++;
@@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2319 unsigned long *bitmap, 2318 unsigned long *bitmap,
2320 u64 start, u64 len) 2319 u64 start, u64 len)
2321{ 2320{
2322 int offset; 2321 u32 offset;
2323 int nsectors; 2322 int nsectors;
2324 int sectorsize = sparity->sctx->dev_root->sectorsize; 2323 int sectorsize = sparity->sctx->dev_root->sectorsize;
2325 2324
@@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2329 } 2328 }
2330 2329
2331 start -= sparity->logic_start; 2330 start -= sparity->logic_start;
2332 offset = (int)do_div(start, sparity->stripe_len); 2331 start = div_u64_rem(start, sparity->stripe_len, &offset);
2333 offset /= sectorsize; 2332 offset /= sectorsize;
2334 nsectors = (int)len / sectorsize; 2333 nsectors = (int)len / sectorsize;
2335 2334
@@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num,
2612 int j = 0; 2611 int j = 0;
2613 u64 stripe_nr; 2612 u64 stripe_nr;
2614 u64 last_offset; 2613 u64 last_offset;
2615 int stripe_index; 2614 u32 stripe_index;
2616 int rot; 2615 u32 rot;
2617 2616
2618 last_offset = (physical - map->stripes[num].physical) * 2617 last_offset = (physical - map->stripes[num].physical) *
2619 nr_data_stripes(map); 2618 nr_data_stripes(map);
@@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num,
2624 for (i = 0; i < nr_data_stripes(map); i++) { 2623 for (i = 0; i < nr_data_stripes(map); i++) {
2625 *offset = last_offset + i * map->stripe_len; 2624 *offset = last_offset + i * map->stripe_len;
2626 2625
2627 stripe_nr = *offset; 2626 stripe_nr = div_u64(*offset, map->stripe_len);
2628 do_div(stripe_nr, map->stripe_len); 2627 stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
2629 do_div(stripe_nr, nr_data_stripes(map));
2630 2628
2631 /* Work out the disk rotation on this stripe-set */ 2629 /* Work out the disk rotation on this stripe-set */
2632 rot = do_div(stripe_nr, map->num_stripes); 2630 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2633 /* calculate which stripe this data locates */ 2631 /* calculate which stripe this data locates */
2634 rot += i; 2632 rot += i;
2635 stripe_index = rot % map->num_stripes; 2633 stripe_index = rot % map->num_stripes;
@@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2995 int extent_mirror_num; 2993 int extent_mirror_num;
2996 int stop_loop = 0; 2994 int stop_loop = 0;
2997 2995
2998 nstripes = length;
2999 physical = map->stripes[num].physical; 2996 physical = map->stripes[num].physical;
3000 offset = 0; 2997 offset = 0;
3001 do_div(nstripes, map->stripe_len); 2998 nstripes = div_u64(length, map->stripe_len);
3002 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 2999 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3003 offset = map->stripe_len * num; 3000 offset = map->stripe_len * num;
3004 increment = map->stripe_len * map->num_stripes; 3001 increment = map->stripe_len * map->num_stripes;
@@ -3563,7 +3560,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3563 int is_dev_replace) 3560 int is_dev_replace)
3564{ 3561{
3565 int ret = 0; 3562 int ret = 0;
3566 int flags = WQ_FREEZABLE | WQ_UNBOUND; 3563 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3567 int max_active = fs_info->thread_pool_size; 3564 int max_active = fs_info->thread_pool_size;
3568 3565
3569 if (fs_info->scrub_workers_refcnt == 0) { 3566 if (fs_info->scrub_workers_refcnt == 0) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe5857223515..a1216f9b4917 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
230 u64 parent_ino; 230 u64 parent_ino;
231 u64 ino; 231 u64 ino;
232 u64 gen; 232 u64 gen;
233 bool is_orphan;
233 struct list_head update_refs; 234 struct list_head update_refs;
234}; 235};
235 236
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2984 u64 ino_gen, 2985 u64 ino_gen,
2985 u64 parent_ino, 2986 u64 parent_ino,
2986 struct list_head *new_refs, 2987 struct list_head *new_refs,
2987 struct list_head *deleted_refs) 2988 struct list_head *deleted_refs,
2989 const bool is_orphan)
2988{ 2990{
2989 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2991 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2990 struct rb_node *parent = NULL; 2992 struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2999 pm->parent_ino = parent_ino; 3001 pm->parent_ino = parent_ino;
3000 pm->ino = ino; 3002 pm->ino = ino;
3001 pm->gen = ino_gen; 3003 pm->gen = ino_gen;
3004 pm->is_orphan = is_orphan;
3002 INIT_LIST_HEAD(&pm->list); 3005 INIT_LIST_HEAD(&pm->list);
3003 INIT_LIST_HEAD(&pm->update_refs); 3006 INIT_LIST_HEAD(&pm->update_refs);
3004 RB_CLEAR_NODE(&pm->node); 3007 RB_CLEAR_NODE(&pm->node);
@@ -3064,48 +3067,6 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3064 return NULL; 3067 return NULL;
3065} 3068}
3066 3069
3067static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3068 u64 ino, u64 gen, u64 *ancestor_ino)
3069{
3070 int ret = 0;
3071 u64 parent_inode = 0;
3072 u64 parent_gen = 0;
3073 u64 start_ino = ino;
3074
3075 *ancestor_ino = 0;
3076 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3077 fs_path_reset(name);
3078
3079 if (is_waiting_for_rm(sctx, ino))
3080 break;
3081 if (is_waiting_for_move(sctx, ino)) {
3082 if (*ancestor_ino == 0)
3083 *ancestor_ino = ino;
3084 ret = get_first_ref(sctx->parent_root, ino,
3085 &parent_inode, &parent_gen, name);
3086 } else {
3087 ret = __get_cur_name_and_parent(sctx, ino, gen,
3088 &parent_inode,
3089 &parent_gen, name);
3090 if (ret > 0) {
3091 ret = 0;
3092 break;
3093 }
3094 }
3095 if (ret < 0)
3096 break;
3097 if (parent_inode == start_ino) {
3098 ret = 1;
3099 if (*ancestor_ino == 0)
3100 *ancestor_ino = ino;
3101 break;
3102 }
3103 ino = parent_inode;
3104 gen = parent_gen;
3105 }
3106 return ret;
3107}
3108
3109static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3070static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3110{ 3071{
3111 struct fs_path *from_path = NULL; 3072 struct fs_path *from_path = NULL;
@@ -3117,7 +3078,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3117 struct waiting_dir_move *dm = NULL; 3078 struct waiting_dir_move *dm = NULL;
3118 u64 rmdir_ino = 0; 3079 u64 rmdir_ino = 0;
3119 int ret; 3080 int ret;
3120 u64 ancestor = 0;
3121 3081
3122 name = fs_path_alloc(); 3082 name = fs_path_alloc();
3123 from_path = fs_path_alloc(); 3083 from_path = fs_path_alloc();
@@ -3131,35 +3091,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3131 rmdir_ino = dm->rmdir_ino; 3091 rmdir_ino = dm->rmdir_ino;
3132 free_waiting_dir_move(sctx, dm); 3092 free_waiting_dir_move(sctx, dm);
3133 3093
3134 ret = get_first_ref(sctx->parent_root, pm->ino, 3094 if (pm->is_orphan) {
3135 &parent_ino, &parent_gen, name); 3095 ret = gen_unique_name(sctx, pm->ino,
3136 if (ret < 0) 3096 pm->gen, from_path);
3137 goto out; 3097 } else {
3138 3098 ret = get_first_ref(sctx->parent_root, pm->ino,
3139 ret = get_cur_path(sctx, parent_ino, parent_gen, 3099 &parent_ino, &parent_gen, name);
3140 from_path); 3100 if (ret < 0)
3141 if (ret < 0) 3101 goto out;
3142 goto out; 3102 ret = get_cur_path(sctx, parent_ino, parent_gen,
3143 ret = fs_path_add_path(from_path, name); 3103 from_path);
3104 if (ret < 0)
3105 goto out;
3106 ret = fs_path_add_path(from_path, name);
3107 }
3144 if (ret < 0) 3108 if (ret < 0)
3145 goto out; 3109 goto out;
3146 3110
3147 sctx->send_progress = sctx->cur_ino + 1; 3111 sctx->send_progress = sctx->cur_ino + 1;
3148 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3149 if (ret) {
3150 LIST_HEAD(deleted_refs);
3151 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3152 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3153 &pm->update_refs, &deleted_refs);
3154 if (ret < 0)
3155 goto out;
3156 if (rmdir_ino) {
3157 dm = get_waiting_dir_move(sctx, pm->ino);
3158 ASSERT(dm);
3159 dm->rmdir_ino = rmdir_ino;
3160 }
3161 goto out;
3162 }
3163 fs_path_reset(name); 3112 fs_path_reset(name);
3164 to_path = name; 3113 to_path = name;
3165 name = NULL; 3114 name = NULL;
@@ -3283,6 +3232,127 @@ out:
3283 return ret; 3232 return ret;
3284} 3233}
3285 3234
3235/*
3236 * We might need to delay a directory rename even when no ancestor directory
3237 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3238 * renamed. This happens when we rename a directory to the old name (the name
3239 * in the parent root) of some other unrelated directory that got its rename
3240 * delayed due to some ancestor with higher number that got renamed.
3241 *
3242 * Example:
3243 *
3244 * Parent snapshot:
3245 * . (ino 256)
3246 * |---- a/ (ino 257)
3247 * | |---- file (ino 260)
3248 * |
3249 * |---- b/ (ino 258)
3250 * |---- c/ (ino 259)
3251 *
3252 * Send snapshot:
3253 * . (ino 256)
3254 * |---- a/ (ino 258)
3255 * |---- x/ (ino 259)
3256 * |---- y/ (ino 257)
3257 * |----- file (ino 260)
3258 *
3259 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3260 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3261 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3262 * must issue is:
3263 *
3264 * 1 - rename 259 from 'c' to 'x'
3265 * 2 - rename 257 from 'a' to 'x/y'
3266 * 3 - rename 258 from 'b' to 'a'
3267 *
3268 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3269 * be done right away and < 0 on error.
3270 */
3271static int wait_for_dest_dir_move(struct send_ctx *sctx,
3272 struct recorded_ref *parent_ref,
3273 const bool is_orphan)
3274{
3275 struct btrfs_path *path;
3276 struct btrfs_key key;
3277 struct btrfs_key di_key;
3278 struct btrfs_dir_item *di;
3279 u64 left_gen;
3280 u64 right_gen;
3281 int ret = 0;
3282
3283 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3284 return 0;
3285
3286 path = alloc_path_for_send();
3287 if (!path)
3288 return -ENOMEM;
3289
3290 key.objectid = parent_ref->dir;
3291 key.type = BTRFS_DIR_ITEM_KEY;
3292 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3293
3294 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3295 if (ret < 0) {
3296 goto out;
3297 } else if (ret > 0) {
3298 ret = 0;
3299 goto out;
3300 }
3301
3302 di = btrfs_match_dir_item_name(sctx->parent_root, path,
3303 parent_ref->name, parent_ref->name_len);
3304 if (!di) {
3305 ret = 0;
3306 goto out;
3307 }
3308 /*
3309 * di_key.objectid has the number of the inode that has a dentry in the
3310 * parent directory with the same name that sctx->cur_ino is being
3311 * renamed to. We need to check if that inode is in the send root as
3312 * well and if it is currently marked as an inode with a pending rename,
3313 * if it is, we need to delay the rename of sctx->cur_ino as well, so
3314 * that it happens after that other inode is renamed.
3315 */
3316 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3317 if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3318 ret = 0;
3319 goto out;
3320 }
3321
3322 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3323 &left_gen, NULL, NULL, NULL, NULL);
3324 if (ret < 0)
3325 goto out;
3326 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3327 &right_gen, NULL, NULL, NULL, NULL);
3328 if (ret < 0) {
3329 if (ret == -ENOENT)
3330 ret = 0;
3331 goto out;
3332 }
3333
3334 /* Different inode, no need to delay the rename of sctx->cur_ino */
3335 if (right_gen != left_gen) {
3336 ret = 0;
3337 goto out;
3338 }
3339
3340 if (is_waiting_for_move(sctx, di_key.objectid)) {
3341 ret = add_pending_dir_move(sctx,
3342 sctx->cur_ino,
3343 sctx->cur_inode_gen,
3344 di_key.objectid,
3345 &sctx->new_refs,
3346 &sctx->deleted_refs,
3347 is_orphan);
3348 if (!ret)
3349 ret = 1;
3350 }
3351out:
3352 btrfs_free_path(path);
3353 return ret;
3354}
3355
3286static int wait_for_parent_move(struct send_ctx *sctx, 3356static int wait_for_parent_move(struct send_ctx *sctx,
3287 struct recorded_ref *parent_ref) 3357 struct recorded_ref *parent_ref)
3288{ 3358{
@@ -3349,7 +3419,8 @@ out:
3349 sctx->cur_inode_gen, 3419 sctx->cur_inode_gen,
3350 ino, 3420 ino,
3351 &sctx->new_refs, 3421 &sctx->new_refs,
3352 &sctx->deleted_refs); 3422 &sctx->deleted_refs,
3423 false);
3353 if (!ret) 3424 if (!ret)
3354 ret = 1; 3425 ret = 1;
3355 } 3426 }
@@ -3372,6 +3443,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3372 int did_overwrite = 0; 3443 int did_overwrite = 0;
3373 int is_orphan = 0; 3444 int is_orphan = 0;
3374 u64 last_dir_ino_rm = 0; 3445 u64 last_dir_ino_rm = 0;
3446 bool can_rename = true;
3375 3447
3376verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3448verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3377 3449
@@ -3479,10 +3551,27 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3479 if (ret < 0) 3551 if (ret < 0)
3480 goto out; 3552 goto out;
3481 if (ret) { 3553 if (ret) {
3554 struct name_cache_entry *nce;
3555
3482 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3556 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3483 cur->full_path); 3557 cur->full_path);
3484 if (ret < 0) 3558 if (ret < 0)
3485 goto out; 3559 goto out;
3560 /*
3561 * Make sure we clear our orphanized inode's
3562 * name from the name cache. This is because the
3563 * inode ow_inode might be an ancestor of some
3564 * other inode that will be orphanized as well
3565 * later and has an inode number greater than
3566 * sctx->send_progress. We need to prevent
3567 * future name lookups from using the old name
3568 * and get instead the orphan name.
3569 */
3570 nce = name_cache_search(sctx, ow_inode, ow_gen);
3571 if (nce) {
3572 name_cache_delete(sctx, nce);
3573 kfree(nce);
3574 }
3486 } else { 3575 } else {
3487 ret = send_unlink(sctx, cur->full_path); 3576 ret = send_unlink(sctx, cur->full_path);
3488 if (ret < 0) 3577 if (ret < 0)
@@ -3490,12 +3579,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3490 } 3579 }
3491 } 3580 }
3492 3581
3582 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3583 ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3584 if (ret < 0)
3585 goto out;
3586 if (ret == 1) {
3587 can_rename = false;
3588 *pending_move = 1;
3589 }
3590 }
3591
3493 /* 3592 /*
3494 * link/move the ref to the new place. If we have an orphan 3593 * link/move the ref to the new place. If we have an orphan
3495 * inode, move it and update valid_path. If not, link or move 3594 * inode, move it and update valid_path. If not, link or move
3496 * it depending on the inode mode. 3595 * it depending on the inode mode.
3497 */ 3596 */
3498 if (is_orphan) { 3597 if (is_orphan && can_rename) {
3499 ret = send_rename(sctx, valid_path, cur->full_path); 3598 ret = send_rename(sctx, valid_path, cur->full_path);
3500 if (ret < 0) 3599 if (ret < 0)
3501 goto out; 3600 goto out;
@@ -3503,7 +3602,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3503 ret = fs_path_copy(valid_path, cur->full_path); 3602 ret = fs_path_copy(valid_path, cur->full_path);
3504 if (ret < 0) 3603 if (ret < 0)
3505 goto out; 3604 goto out;
3506 } else { 3605 } else if (can_rename) {
3507 if (S_ISDIR(sctx->cur_inode_mode)) { 3606 if (S_ISDIR(sctx->cur_inode_mode)) {
3508 /* 3607 /*
3509 * Dirs can't be linked, so move it. For moved 3608 * Dirs can't be linked, so move it. For moved
@@ -5711,19 +5810,20 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5711 ret = PTR_ERR(clone_root); 5810 ret = PTR_ERR(clone_root);
5712 goto out; 5811 goto out;
5713 } 5812 }
5714 clone_sources_to_rollback = i + 1;
5715 spin_lock(&clone_root->root_item_lock); 5813 spin_lock(&clone_root->root_item_lock);
5716 clone_root->send_in_progress++; 5814 if (!btrfs_root_readonly(clone_root) ||
5717 if (!btrfs_root_readonly(clone_root)) { 5815 btrfs_root_dead(clone_root)) {
5718 spin_unlock(&clone_root->root_item_lock); 5816 spin_unlock(&clone_root->root_item_lock);
5719 srcu_read_unlock(&fs_info->subvol_srcu, index); 5817 srcu_read_unlock(&fs_info->subvol_srcu, index);
5720 ret = -EPERM; 5818 ret = -EPERM;
5721 goto out; 5819 goto out;
5722 } 5820 }
5821 clone_root->send_in_progress++;
5723 spin_unlock(&clone_root->root_item_lock); 5822 spin_unlock(&clone_root->root_item_lock);
5724 srcu_read_unlock(&fs_info->subvol_srcu, index); 5823 srcu_read_unlock(&fs_info->subvol_srcu, index);
5725 5824
5726 sctx->clone_roots[i].root = clone_root; 5825 sctx->clone_roots[i].root = clone_root;
5826 clone_sources_to_rollback = i + 1;
5727 } 5827 }
5728 vfree(clone_sources_tmp); 5828 vfree(clone_sources_tmp);
5729 clone_sources_tmp = NULL; 5829 clone_sources_tmp = NULL;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 05fef198ff94..f2c9f9db3b19 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -901,6 +901,15 @@ find_root:
901 if (IS_ERR(new_root)) 901 if (IS_ERR(new_root))
902 return ERR_CAST(new_root); 902 return ERR_CAST(new_root);
903 903
904 if (!(sb->s_flags & MS_RDONLY)) {
905 int ret;
906 down_read(&fs_info->cleanup_work_sem);
907 ret = btrfs_orphan_cleanup(new_root);
908 up_read(&fs_info->cleanup_work_sem);
909 if (ret)
910 return ERR_PTR(ret);
911 }
912
904 dir_id = btrfs_root_dirid(&new_root->root_item); 913 dir_id = btrfs_root_dirid(&new_root->root_item);
905setup_root: 914setup_root:
906 location.objectid = dir_id; 915 location.objectid = dir_id;
@@ -1714,7 +1723,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1714 avail_space = device->total_bytes - device->bytes_used; 1723 avail_space = device->total_bytes - device->bytes_used;
1715 1724
1716 /* align with stripe_len */ 1725 /* align with stripe_len */
1717 do_div(avail_space, BTRFS_STRIPE_LEN); 1726 avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
1718 avail_space *= BTRFS_STRIPE_LEN; 1727 avail_space *= BTRFS_STRIPE_LEN;
1719 1728
1720 /* 1729 /*
@@ -1908,6 +1917,17 @@ static struct file_system_type btrfs_fs_type = {
1908}; 1917};
1909MODULE_ALIAS_FS("btrfs"); 1918MODULE_ALIAS_FS("btrfs");
1910 1919
1920static int btrfs_control_open(struct inode *inode, struct file *file)
1921{
1922 /*
1923 * The control file's private_data is used to hold the
1924 * transaction when it is started and is used to keep
1925 * track of whether a transaction is already in progress.
1926 */
1927 file->private_data = NULL;
1928 return 0;
1929}
1930
1911/* 1931/*
1912 * used by btrfsctl to scan devices when no FS is mounted 1932 * used by btrfsctl to scan devices when no FS is mounted
1913 */ 1933 */
@@ -2009,6 +2029,7 @@ static const struct super_operations btrfs_super_ops = {
2009}; 2029};
2010 2030
2011static const struct file_operations btrfs_ctl_fops = { 2031static const struct file_operations btrfs_ctl_fops = {
2032 .open = btrfs_control_open,
2012 .unlocked_ioctl = btrfs_control_ioctl, 2033 .unlocked_ioctl = btrfs_control_ioctl,
2013 .compat_ioctl = btrfs_control_ioctl, 2034 .compat_ioctl = btrfs_control_ioctl,
2014 .owner = THIS_MODULE, 2035 .owner = THIS_MODULE,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 94edb0a2a026..e8a4c86d274d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -459,7 +459,7 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
459static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13]; 459static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
460static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS]; 460static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
461 461
462static u64 supported_feature_masks[3] = { 462static const u64 supported_feature_masks[3] = {
463 [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP, 463 [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
464 [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP, 464 [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
465 [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP, 465 [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index f7dd298b3cf6..3a4bbed723fd 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) 61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
62 62
63/* convert from attribute */ 63/* convert from attribute */
64#define to_btrfs_feature_attr(a) \ 64static inline struct btrfs_feature_attr *
65 container_of(a, struct btrfs_feature_attr, kobj_attr) 65to_btrfs_feature_attr(struct kobj_attribute *a)
66#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr) 66{
67#define attr_to_btrfs_feature_attr(a) \ 67 return container_of(a, struct btrfs_feature_attr, kobj_attr);
68 to_btrfs_feature_attr(attr_to_btrfs_attr(a)) 68}
69
70static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr)
71{
72 return container_of(attr, struct kobj_attribute, attr);
73}
74
75static inline struct btrfs_feature_attr *
76attr_to_btrfs_feature_attr(struct attribute *attr)
77{
78 return to_btrfs_feature_attr(attr_to_btrfs_attr(attr));
79}
80
69char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); 81char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
70extern const char * const btrfs_feature_set_names[3]; 82extern const char * const btrfs_feature_set_names[3];
71extern struct kobj_type space_info_ktype; 83extern struct kobj_type space_info_ktype;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
911 return ret; 911 return ret;
912} 912}
913 913
914static int test_extent_accounting(void)
915{
916 struct inode *inode = NULL;
917 struct btrfs_root *root = NULL;
918 int ret = -ENOMEM;
919
920 inode = btrfs_new_test_inode();
921 if (!inode) {
922 test_msg("Couldn't allocate inode\n");
923 return ret;
924 }
925
926 root = btrfs_alloc_dummy_root();
927 if (IS_ERR(root)) {
928 test_msg("Couldn't allocate root\n");
929 goto out;
930 }
931
932 root->fs_info = btrfs_alloc_dummy_fs_info();
933 if (!root->fs_info) {
934 test_msg("Couldn't allocate dummy fs info\n");
935 goto out;
936 }
937
938 BTRFS_I(inode)->root = root;
939 btrfs_test_inode_set_ops(inode);
940
941 /* [BTRFS_MAX_EXTENT_SIZE] */
942 BTRFS_I(inode)->outstanding_extents++;
943 ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
944 NULL);
945 if (ret) {
946 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
947 goto out;
948 }
949 if (BTRFS_I(inode)->outstanding_extents != 1) {
950 ret = -EINVAL;
951 test_msg("Miscount, wanted 1, got %u\n",
952 BTRFS_I(inode)->outstanding_extents);
953 goto out;
954 }
955
956 /* [BTRFS_MAX_EXTENT_SIZE][4k] */
957 BTRFS_I(inode)->outstanding_extents++;
958 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
959 BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
960 if (ret) {
961 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
962 goto out;
963 }
964 if (BTRFS_I(inode)->outstanding_extents != 2) {
965 ret = -EINVAL;
966 test_msg("Miscount, wanted 2, got %u\n",
967 BTRFS_I(inode)->outstanding_extents);
968 goto out;
969 }
970
971 /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
972 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
973 BTRFS_MAX_EXTENT_SIZE >> 1,
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS);
978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out;
981 }
982 if (BTRFS_I(inode)->outstanding_extents != 2) {
983 ret = -EINVAL;
984 test_msg("Miscount, wanted 2, got %u\n",
985 BTRFS_I(inode)->outstanding_extents);
986 goto out;
987 }
988
989 /* [BTRFS_MAX_EXTENT_SIZE][4K] */
990 BTRFS_I(inode)->outstanding_extents++;
991 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
992 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
993 NULL);
994 if (ret) {
995 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
996 goto out;
997 }
998 if (BTRFS_I(inode)->outstanding_extents != 2) {
999 ret = -EINVAL;
1000 test_msg("Miscount, wanted 2, got %u\n",
1001 BTRFS_I(inode)->outstanding_extents);
1002 goto out;
1003 }
1004
1005 /*
1006 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
1007 *
1008 * I'm artificially adding 2 to outstanding_extents because in the
1009 * buffered IO case we'd add things up as we go, but I don't feel like
1010 * doing that here, this isn't the interesting case we want to test.
1011 */
1012 BTRFS_I(inode)->outstanding_extents += 2;
1013 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
1014 (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
1015 NULL);
1016 if (ret) {
1017 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1018 goto out;
1019 }
1020 if (BTRFS_I(inode)->outstanding_extents != 4) {
1021 ret = -EINVAL;
1022 test_msg("Miscount, wanted 4, got %u\n",
1023 BTRFS_I(inode)->outstanding_extents);
1024 goto out;
1025 }
1026
1027 /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
1028 BTRFS_I(inode)->outstanding_extents++;
1029 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1030 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1031 if (ret) {
1032 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1033 goto out;
1034 }
1035 if (BTRFS_I(inode)->outstanding_extents != 3) {
1036 ret = -EINVAL;
1037 test_msg("Miscount, wanted 3, got %u\n",
1038 BTRFS_I(inode)->outstanding_extents);
1039 goto out;
1040 }
1041
1042 /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
1043 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
1044 BTRFS_MAX_EXTENT_SIZE+4096,
1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS);
1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out;
1052 }
1053 if (BTRFS_I(inode)->outstanding_extents != 4) {
1054 ret = -EINVAL;
1055 test_msg("Miscount, wanted 4, got %u\n",
1056 BTRFS_I(inode)->outstanding_extents);
1057 goto out;
1058 }
1059
1060 /*
1061 * Refill the hole again just for good measure, because I thought it
1062 * might fail and I'd rather satisfy my paranoia at this point.
1063 */
1064 BTRFS_I(inode)->outstanding_extents++;
1065 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1066 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1067 if (ret) {
1068 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1069 goto out;
1070 }
1071 if (BTRFS_I(inode)->outstanding_extents != 3) {
1072 ret = -EINVAL;
1073 test_msg("Miscount, wanted 3, got %u\n",
1074 BTRFS_I(inode)->outstanding_extents);
1075 goto out;
1076 }
1077
1078 /* Empty */
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS);
1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out;
1086 }
1087 if (BTRFS_I(inode)->outstanding_extents) {
1088 ret = -EINVAL;
1089 test_msg("Miscount, wanted 0, got %u\n",
1090 BTRFS_I(inode)->outstanding_extents);
1091 goto out;
1092 }
1093 ret = 0;
1094out:
1095 if (ret)
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS);
1100 iput(inode);
1101 btrfs_free_dummy_root(root);
1102 return ret;
1103}
1104
914int btrfs_test_inodes(void) 1105int btrfs_test_inodes(void)
915{ 1106{
916 int ret; 1107 int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
924 if (ret) 1115 if (ret)
925 return ret; 1116 return ret;
926 test_msg("Running hole first btrfs_get_extent test\n"); 1117 test_msg("Running hole first btrfs_get_extent test\n");
927 return test_hole_first(); 1118 ret = test_hole_first();
1119 if (ret)
1120 return ret;
1121 test_msg("Running outstanding_extents tests\n");
1122 return test_extent_accounting();
928} 1123}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 73f299ebdabb..c32a7ba76bca 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -232,7 +232,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
232 init_dummy_trans(&trans); 232 init_dummy_trans(&trans);
233 233
234 test_msg("Qgroup basic add\n"); 234 test_msg("Qgroup basic add\n");
235 ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL); 235 ret = btrfs_create_qgroup(NULL, fs_info, 5);
236 if (ret) { 236 if (ret) {
237 test_msg("Couldn't create a qgroup %d\n", ret); 237 test_msg("Couldn't create a qgroup %d\n", ret);
238 return ret; 238 return ret;
@@ -301,7 +301,7 @@ static int test_multiple_refs(struct btrfs_root *root)
301 test_msg("Qgroup multiple refs test\n"); 301 test_msg("Qgroup multiple refs test\n");
302 302
303 /* We have 5 created already from the previous test */ 303 /* We have 5 created already from the previous test */
304 ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL); 304 ret = btrfs_create_qgroup(NULL, fs_info, 256);
305 if (ret) { 305 if (ret) {
306 test_msg("Couldn't create a qgroup %d\n", ret); 306 test_msg("Couldn't create a qgroup %d\n", ret);
307 return ret; 307 return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7e80f32550a6..5628e25250c0 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,7 +35,7 @@
35 35
36#define BTRFS_ROOT_TRANS_TAG 0 36#define BTRFS_ROOT_TRANS_TAG 0
37 37
38static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { 38static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
39 [TRANS_STATE_RUNNING] = 0U, 39 [TRANS_STATE_RUNNING] = 0U,
40 [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | 40 [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE |
41 __TRANS_START), 41 __TRANS_START),
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
64 if (atomic_dec_and_test(&transaction->use_count)) { 64 if (atomic_dec_and_test(&transaction->use_count)) {
65 BUG_ON(!list_empty(&transaction->list)); 65 BUG_ON(!list_empty(&transaction->list));
66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); 66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67 if (transaction->delayed_refs.pending_csums)
68 printk(KERN_ERR "pending csums is %llu\n",
69 transaction->delayed_refs.pending_csums);
67 while (!list_empty(&transaction->pending_chunks)) { 70 while (!list_empty(&transaction->pending_chunks)) {
68 struct extent_map *em; 71 struct extent_map *em;
69 72
@@ -93,11 +96,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
93 */ 96 */
94 ASSERT(!waitqueue_active(&state->wq)); 97 ASSERT(!waitqueue_active(&state->wq));
95 free_extent_state(state); 98 free_extent_state(state);
96 if (need_resched()) { 99
97 spin_unlock(&tree->lock); 100 cond_resched_lock(&tree->lock);
98 cond_resched();
99 spin_lock(&tree->lock);
100 }
101 } 101 }
102 spin_unlock(&tree->lock); 102 spin_unlock(&tree->lock);
103} 103}
@@ -222,10 +222,12 @@ loop:
222 atomic_set(&cur_trans->use_count, 2); 222 atomic_set(&cur_trans->use_count, 2);
223 cur_trans->have_free_bgs = 0; 223 cur_trans->have_free_bgs = 0;
224 cur_trans->start_time = get_seconds(); 224 cur_trans->start_time = get_seconds();
225 cur_trans->dirty_bg_run = 0;
225 226
226 cur_trans->delayed_refs.href_root = RB_ROOT; 227 cur_trans->delayed_refs.href_root = RB_ROOT;
227 atomic_set(&cur_trans->delayed_refs.num_entries, 0); 228 atomic_set(&cur_trans->delayed_refs.num_entries, 0);
228 cur_trans->delayed_refs.num_heads_ready = 0; 229 cur_trans->delayed_refs.num_heads_ready = 0;
230 cur_trans->delayed_refs.pending_csums = 0;
229 cur_trans->delayed_refs.num_heads = 0; 231 cur_trans->delayed_refs.num_heads = 0;
230 cur_trans->delayed_refs.flushing = 0; 232 cur_trans->delayed_refs.flushing = 0;
231 cur_trans->delayed_refs.run_delayed_start = 0; 233 cur_trans->delayed_refs.run_delayed_start = 0;
@@ -250,6 +252,9 @@ loop:
250 INIT_LIST_HEAD(&cur_trans->switch_commits); 252 INIT_LIST_HEAD(&cur_trans->switch_commits);
251 INIT_LIST_HEAD(&cur_trans->pending_ordered); 253 INIT_LIST_HEAD(&cur_trans->pending_ordered);
252 INIT_LIST_HEAD(&cur_trans->dirty_bgs); 254 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
255 INIT_LIST_HEAD(&cur_trans->io_bgs);
256 mutex_init(&cur_trans->cache_write_mutex);
257 cur_trans->num_dirty_bgs = 0;
253 spin_lock_init(&cur_trans->dirty_bgs_lock); 258 spin_lock_init(&cur_trans->dirty_bgs_lock);
254 list_add_tail(&cur_trans->list, &fs_info->trans_list); 259 list_add_tail(&cur_trans->list, &fs_info->trans_list);
255 extent_io_tree_init(&cur_trans->dirty_pages, 260 extent_io_tree_init(&cur_trans->dirty_pages,
@@ -721,7 +726,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
721 updates = trans->delayed_ref_updates; 726 updates = trans->delayed_ref_updates;
722 trans->delayed_ref_updates = 0; 727 trans->delayed_ref_updates = 0;
723 if (updates) { 728 if (updates) {
724 err = btrfs_run_delayed_refs(trans, root, updates); 729 err = btrfs_run_delayed_refs(trans, root, updates * 2);
725 if (err) /* Error code will also eval true */ 730 if (err) /* Error code will also eval true */
726 return err; 731 return err;
727 } 732 }
@@ -1023,17 +1028,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1023 u64 old_root_bytenr; 1028 u64 old_root_bytenr;
1024 u64 old_root_used; 1029 u64 old_root_used;
1025 struct btrfs_root *tree_root = root->fs_info->tree_root; 1030 struct btrfs_root *tree_root = root->fs_info->tree_root;
1026 bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
1027 1031
1028 old_root_used = btrfs_root_used(&root->root_item); 1032 old_root_used = btrfs_root_used(&root->root_item);
1029 btrfs_write_dirty_block_groups(trans, root);
1030 1033
1031 while (1) { 1034 while (1) {
1032 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1035 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1033 if (old_root_bytenr == root->node->start && 1036 if (old_root_bytenr == root->node->start &&
1034 old_root_used == btrfs_root_used(&root->root_item) && 1037 old_root_used == btrfs_root_used(&root->root_item))
1035 (!extent_root ||
1036 list_empty(&trans->transaction->dirty_bgs)))
1037 break; 1038 break;
1038 1039
1039 btrfs_set_root_node(&root->root_item, root->node); 1040 btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,17 +1045,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1044 return ret; 1045 return ret;
1045 1046
1046 old_root_used = btrfs_root_used(&root->root_item); 1047 old_root_used = btrfs_root_used(&root->root_item);
1047 if (extent_root) {
1048 ret = btrfs_write_dirty_block_groups(trans, root);
1049 if (ret)
1050 return ret;
1051 }
1052 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1053 if (ret)
1054 return ret;
1055 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1056 if (ret)
1057 return ret;
1058 } 1048 }
1059 1049
1060 return 0; 1050 return 0;
@@ -1071,6 +1061,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1071 struct btrfs_root *root) 1061 struct btrfs_root *root)
1072{ 1062{
1073 struct btrfs_fs_info *fs_info = root->fs_info; 1063 struct btrfs_fs_info *fs_info = root->fs_info;
1064 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1065 struct list_head *io_bgs = &trans->transaction->io_bgs;
1074 struct list_head *next; 1066 struct list_head *next;
1075 struct extent_buffer *eb; 1067 struct extent_buffer *eb;
1076 int ret; 1068 int ret;
@@ -1098,11 +1090,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1098 if (ret) 1090 if (ret)
1099 return ret; 1091 return ret;
1100 1092
1093 ret = btrfs_setup_space_cache(trans, root);
1094 if (ret)
1095 return ret;
1096
1101 /* run_qgroups might have added some more refs */ 1097 /* run_qgroups might have added some more refs */
1102 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1098 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1103 if (ret) 1099 if (ret)
1104 return ret; 1100 return ret;
1105 1101again:
1106 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 1102 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
1107 next = fs_info->dirty_cowonly_roots.next; 1103 next = fs_info->dirty_cowonly_roots.next;
1108 list_del_init(next); 1104 list_del_init(next);
@@ -1115,8 +1111,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1115 ret = update_cowonly_root(trans, root); 1111 ret = update_cowonly_root(trans, root);
1116 if (ret) 1112 if (ret)
1117 return ret; 1113 return ret;
1114 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1115 if (ret)
1116 return ret;
1118 } 1117 }
1119 1118
1119 while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
1120 ret = btrfs_write_dirty_block_groups(trans, root);
1121 if (ret)
1122 return ret;
1123 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1124 if (ret)
1125 return ret;
1126 }
1127
1128 if (!list_empty(&fs_info->dirty_cowonly_roots))
1129 goto again;
1130
1120 list_add_tail(&fs_info->extent_root->dirty_list, 1131 list_add_tail(&fs_info->extent_root->dirty_list,
1121 &trans->transaction->switch_commits); 1132 &trans->transaction->switch_commits);
1122 btrfs_after_dev_replace_commit(fs_info); 1133 btrfs_after_dev_replace_commit(fs_info);
@@ -1805,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1805 return ret; 1816 return ret;
1806 } 1817 }
1807 1818
1819 if (!cur_trans->dirty_bg_run) {
1820 int run_it = 0;
1821
1822 /* this mutex is also taken before trying to set
1823 * block groups readonly. We need to make sure
1824 * that nobody has set a block group readonly
1825 * after a extents from that block group have been
1826 * allocated for cache files. btrfs_set_block_group_ro
1827 * will wait for the transaction to commit if it
1828 * finds dirty_bg_run = 1
1829 *
1830 * The dirty_bg_run flag is also used to make sure only
1831 * one process starts all the block group IO. It wouldn't
1832 * hurt to have more than one go through, but there's no
1833 * real advantage to it either.
1834 */
1835 mutex_lock(&root->fs_info->ro_block_group_mutex);
1836 if (!cur_trans->dirty_bg_run) {
1837 run_it = 1;
1838 cur_trans->dirty_bg_run = 1;
1839 }
1840 mutex_unlock(&root->fs_info->ro_block_group_mutex);
1841
1842 if (run_it)
1843 ret = btrfs_start_dirty_block_groups(trans, root);
1844 }
1845 if (ret) {
1846 btrfs_end_transaction(trans, root);
1847 return ret;
1848 }
1849
1808 spin_lock(&root->fs_info->trans_lock); 1850 spin_lock(&root->fs_info->trans_lock);
1809 list_splice(&trans->ordered, &cur_trans->pending_ordered); 1851 list_splice(&trans->ordered, &cur_trans->pending_ordered);
1810 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 1852 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
@@ -1814,6 +1856,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1814 1856
1815 wait_for_commit(root, cur_trans); 1857 wait_for_commit(root, cur_trans);
1816 1858
1859 if (unlikely(cur_trans->aborted))
1860 ret = cur_trans->aborted;
1861
1817 btrfs_put_transaction(cur_trans); 1862 btrfs_put_transaction(cur_trans);
1818 1863
1819 return ret; 1864 return ret;
@@ -1995,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1995 2040
1996 assert_qgroups_uptodate(trans); 2041 assert_qgroups_uptodate(trans);
1997 ASSERT(list_empty(&cur_trans->dirty_bgs)); 2042 ASSERT(list_empty(&cur_trans->dirty_bgs));
2043 ASSERT(list_empty(&cur_trans->io_bgs));
1998 update_super_roots(root); 2044 update_super_roots(root);
1999 2045
2000 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 2046 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 937050a2b68e..0b24755596ba 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -64,9 +64,19 @@ struct btrfs_transaction {
64 struct list_head pending_ordered; 64 struct list_head pending_ordered;
65 struct list_head switch_commits; 65 struct list_head switch_commits;
66 struct list_head dirty_bgs; 66 struct list_head dirty_bgs;
67 struct list_head io_bgs;
68 u64 num_dirty_bgs;
69
70 /*
71 * we need to make sure block group deletion doesn't race with
72 * free space cache writeout. This mutex keeps them from stomping
73 * on each other
74 */
75 struct mutex cache_write_mutex;
67 spinlock_t dirty_bgs_lock; 76 spinlock_t dirty_bgs_lock;
68 struct btrfs_delayed_ref_root delayed_refs; 77 struct btrfs_delayed_ref_root delayed_refs;
69 int aborted; 78 int aborted;
79 int dirty_bg_run;
70}; 80};
71 81
72#define __TRANS_FREEZABLE (1U << 0) 82#define __TRANS_FREEZABLE (1U << 0)
@@ -136,9 +146,11 @@ struct btrfs_pending_snapshot {
136static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, 146static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
137 struct inode *inode) 147 struct inode *inode)
138{ 148{
149 spin_lock(&BTRFS_I(inode)->lock);
139 BTRFS_I(inode)->last_trans = trans->transaction->transid; 150 BTRFS_I(inode)->last_trans = trans->transaction->transid;
140 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 151 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
141 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; 152 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
153 spin_unlock(&BTRFS_I(inode)->lock);
142} 154}
143 155
144int btrfs_end_transaction(struct btrfs_trans_handle *trans, 156int btrfs_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a37f8b39bae..a089b5944efc 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
492 492
493 if (btrfs_inode_generation(eb, src_item) == 0) { 493 if (btrfs_inode_generation(eb, src_item) == 0) {
494 struct extent_buffer *dst_eb = path->nodes[0]; 494 struct extent_buffer *dst_eb = path->nodes[0];
495 const u64 ino_size = btrfs_inode_size(eb, src_item);
495 496
497 /*
498 * For regular files an ino_size == 0 is used only when
499 * logging that an inode exists, as part of a directory
500 * fsync, and the inode wasn't fsynced before. In this
501 * case don't set the size of the inode in the fs/subvol
502 * tree, otherwise we would be throwing valid data away.
503 */
496 if (S_ISREG(btrfs_inode_mode(eb, src_item)) && 504 if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497 S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { 505 S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
506 ino_size != 0) {
498 struct btrfs_map_token token; 507 struct btrfs_map_token token;
499 u64 ino_size = btrfs_inode_size(eb, src_item);
500 508
501 btrfs_init_map_token(&token); 509 btrfs_init_map_token(&token);
502 btrfs_set_token_inode_size(dst_eb, dst_item, 510 btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -1012,7 +1020,7 @@ again:
1012 base = btrfs_item_ptr_offset(leaf, path->slots[0]); 1020 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
1013 1021
1014 while (cur_offset < item_size) { 1022 while (cur_offset < item_size) {
1015 extref = (struct btrfs_inode_extref *)base + cur_offset; 1023 extref = (struct btrfs_inode_extref *)(base + cur_offset);
1016 1024
1017 victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 1025 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
1018 1026
@@ -1951,6 +1959,104 @@ out:
1951 return ret; 1959 return ret;
1952} 1960}
1953 1961
1962static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
1963 struct btrfs_root *root,
1964 struct btrfs_root *log,
1965 struct btrfs_path *path,
1966 const u64 ino)
1967{
1968 struct btrfs_key search_key;
1969 struct btrfs_path *log_path;
1970 int i;
1971 int nritems;
1972 int ret;
1973
1974 log_path = btrfs_alloc_path();
1975 if (!log_path)
1976 return -ENOMEM;
1977
1978 search_key.objectid = ino;
1979 search_key.type = BTRFS_XATTR_ITEM_KEY;
1980 search_key.offset = 0;
1981again:
1982 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
1983 if (ret < 0)
1984 goto out;
1985process_leaf:
1986 nritems = btrfs_header_nritems(path->nodes[0]);
1987 for (i = path->slots[0]; i < nritems; i++) {
1988 struct btrfs_key key;
1989 struct btrfs_dir_item *di;
1990 struct btrfs_dir_item *log_di;
1991 u32 total_size;
1992 u32 cur;
1993
1994 btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1995 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
1996 ret = 0;
1997 goto out;
1998 }
1999
2000 di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
2001 total_size = btrfs_item_size_nr(path->nodes[0], i);
2002 cur = 0;
2003 while (cur < total_size) {
2004 u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
2005 u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
2006 u32 this_len = sizeof(*di) + name_len + data_len;
2007 char *name;
2008
2009 name = kmalloc(name_len, GFP_NOFS);
2010 if (!name) {
2011 ret = -ENOMEM;
2012 goto out;
2013 }
2014 read_extent_buffer(path->nodes[0], name,
2015 (unsigned long)(di + 1), name_len);
2016
2017 log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
2018 name, name_len, 0);
2019 btrfs_release_path(log_path);
2020 if (!log_di) {
2021 /* Doesn't exist in log tree, so delete it. */
2022 btrfs_release_path(path);
2023 di = btrfs_lookup_xattr(trans, root, path, ino,
2024 name, name_len, -1);
2025 kfree(name);
2026 if (IS_ERR(di)) {
2027 ret = PTR_ERR(di);
2028 goto out;
2029 }
2030 ASSERT(di);
2031 ret = btrfs_delete_one_dir_name(trans, root,
2032 path, di);
2033 if (ret)
2034 goto out;
2035 btrfs_release_path(path);
2036 search_key = key;
2037 goto again;
2038 }
2039 kfree(name);
2040 if (IS_ERR(log_di)) {
2041 ret = PTR_ERR(log_di);
2042 goto out;
2043 }
2044 cur += this_len;
2045 di = (struct btrfs_dir_item *)((char *)di + this_len);
2046 }
2047 }
2048 ret = btrfs_next_leaf(root, path);
2049 if (ret > 0)
2050 ret = 0;
2051 else if (ret == 0)
2052 goto process_leaf;
2053out:
2054 btrfs_free_path(log_path);
2055 btrfs_release_path(path);
2056 return ret;
2057}
2058
2059
1954/* 2060/*
1955 * deletion replay happens before we copy any new directory items 2061 * deletion replay happens before we copy any new directory items
1956 * out of the log or out of backreferences from inodes. It 2062 * out of the log or out of backreferences from inodes. It
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2104 2210
2105 inode_item = btrfs_item_ptr(eb, i, 2211 inode_item = btrfs_item_ptr(eb, i,
2106 struct btrfs_inode_item); 2212 struct btrfs_inode_item);
2213 ret = replay_xattr_deletes(wc->trans, root, log,
2214 path, key.objectid);
2215 if (ret)
2216 break;
2107 mode = btrfs_inode_mode(eb, inode_item); 2217 mode = btrfs_inode_mode(eb, inode_item);
2108 if (S_ISDIR(mode)) { 2218 if (S_ISDIR(mode)) {
2109 ret = replay_dir_deletes(wc->trans, 2219 ret = replay_dir_deletes(wc->trans,
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2230 if (trans) { 2340 if (trans) {
2231 btrfs_tree_lock(next); 2341 btrfs_tree_lock(next);
2232 btrfs_set_lock_blocking(next); 2342 btrfs_set_lock_blocking(next);
2233 clean_tree_block(trans, root, next); 2343 clean_tree_block(trans, root->fs_info,
2344 next);
2234 btrfs_wait_tree_block_writeback(next); 2345 btrfs_wait_tree_block_writeback(next);
2235 btrfs_tree_unlock(next); 2346 btrfs_tree_unlock(next);
2236 } 2347 }
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2308 if (trans) { 2419 if (trans) {
2309 btrfs_tree_lock(next); 2420 btrfs_tree_lock(next);
2310 btrfs_set_lock_blocking(next); 2421 btrfs_set_lock_blocking(next);
2311 clean_tree_block(trans, root, next); 2422 clean_tree_block(trans, root->fs_info,
2423 next);
2312 btrfs_wait_tree_block_writeback(next); 2424 btrfs_wait_tree_block_writeback(next);
2313 btrfs_tree_unlock(next); 2425 btrfs_tree_unlock(next);
2314 } 2426 }
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2384 if (trans) { 2496 if (trans) {
2385 btrfs_tree_lock(next); 2497 btrfs_tree_lock(next);
2386 btrfs_set_lock_blocking(next); 2498 btrfs_set_lock_blocking(next);
2387 clean_tree_block(trans, log, next); 2499 clean_tree_block(trans, log->fs_info, next);
2388 btrfs_wait_tree_block_writeback(next); 2500 btrfs_wait_tree_block_writeback(next);
2389 btrfs_tree_unlock(next); 2501 btrfs_tree_unlock(next);
2390 } 2502 }
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3020 struct btrfs_root *root, struct inode *inode, 3132 struct btrfs_root *root, struct inode *inode,
3021 struct btrfs_path *path, 3133 struct btrfs_path *path,
3022 struct btrfs_path *dst_path, int key_type, 3134 struct btrfs_path *dst_path, int key_type,
3135 struct btrfs_log_ctx *ctx,
3023 u64 min_offset, u64 *last_offset_ret) 3136 u64 min_offset, u64 *last_offset_ret)
3024{ 3137{
3025 struct btrfs_key min_key; 3138 struct btrfs_key min_key;
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3104 src = path->nodes[0]; 3217 src = path->nodes[0];
3105 nritems = btrfs_header_nritems(src); 3218 nritems = btrfs_header_nritems(src);
3106 for (i = path->slots[0]; i < nritems; i++) { 3219 for (i = path->slots[0]; i < nritems; i++) {
3220 struct btrfs_dir_item *di;
3221
3107 btrfs_item_key_to_cpu(src, &min_key, i); 3222 btrfs_item_key_to_cpu(src, &min_key, i);
3108 3223
3109 if (min_key.objectid != ino || min_key.type != key_type) 3224 if (min_key.objectid != ino || min_key.type != key_type)
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3114 err = ret; 3229 err = ret;
3115 goto done; 3230 goto done;
3116 } 3231 }
3232
3233 /*
3234 * We must make sure that when we log a directory entry,
3235 * the corresponding inode, after log replay, has a
3236 * matching link count. For example:
3237 *
3238 * touch foo
3239 * mkdir mydir
3240 * sync
3241 * ln foo mydir/bar
3242 * xfs_io -c "fsync" mydir
3243 * <crash>
3244 * <mount fs and log replay>
3245 *
3246 * Would result in a fsync log that when replayed, our
3247 * file inode would have a link count of 1, but we get
3248 * two directory entries pointing to the same inode.
3249 * After removing one of the names, it would not be
3250 * possible to remove the other name, which resulted
3251 * always in stale file handle errors, and would not
3252 * be possible to rmdir the parent directory, since
3253 * its i_size could never decrement to the value
3254 * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
3255 */
3256 di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
3257 btrfs_dir_item_key_to_cpu(src, di, &tmp);
3258 if (ctx &&
3259 (btrfs_dir_transid(src, di) == trans->transid ||
3260 btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
3261 tmp.type != BTRFS_ROOT_ITEM_KEY)
3262 ctx->log_new_dentries = true;
3117 } 3263 }
3118 path->slots[0] = nritems; 3264 path->slots[0] = nritems;
3119 3265
@@ -3175,7 +3321,8 @@ done:
3175static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 3321static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
3176 struct btrfs_root *root, struct inode *inode, 3322 struct btrfs_root *root, struct inode *inode,
3177 struct btrfs_path *path, 3323 struct btrfs_path *path,
3178 struct btrfs_path *dst_path) 3324 struct btrfs_path *dst_path,
3325 struct btrfs_log_ctx *ctx)
3179{ 3326{
3180 u64 min_key; 3327 u64 min_key;
3181 u64 max_key; 3328 u64 max_key;
@@ -3187,7 +3334,7 @@ again:
3187 max_key = 0; 3334 max_key = 0;
3188 while (1) { 3335 while (1) {
3189 ret = log_dir_items(trans, root, inode, path, 3336 ret = log_dir_items(trans, root, inode, path,
3190 dst_path, key_type, min_key, 3337 dst_path, key_type, ctx, min_key,
3191 &max_key); 3338 &max_key);
3192 if (ret) 3339 if (ret)
3193 return ret; 3340 return ret;
@@ -3963,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3963 if (ret < 0) { 4110 if (ret < 0) {
3964 return ret; 4111 return ret;
3965 } else if (ret > 0) { 4112 } else if (ret > 0) {
3966 *size_ret = i_size_read(inode); 4113 *size_ret = 0;
3967 } else { 4114 } else {
3968 struct btrfs_inode_item *item; 4115 struct btrfs_inode_item *item;
3969 4116
@@ -4070,10 +4217,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4070 if (S_ISDIR(inode->i_mode)) { 4217 if (S_ISDIR(inode->i_mode)) {
4071 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4218 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
4072 4219
4073 if (inode_only == LOG_INODE_EXISTS) { 4220 if (inode_only == LOG_INODE_EXISTS)
4074 max_key_type = BTRFS_INODE_EXTREF_KEY; 4221 max_key_type = BTRFS_XATTR_ITEM_KEY;
4075 max_key.type = max_key_type;
4076 }
4077 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4222 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
4078 } else { 4223 } else {
4079 if (inode_only == LOG_INODE_EXISTS) { 4224 if (inode_only == LOG_INODE_EXISTS) {
@@ -4098,7 +4243,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4098 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4243 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4099 &BTRFS_I(inode)->runtime_flags)) { 4244 &BTRFS_I(inode)->runtime_flags)) {
4100 if (inode_only == LOG_INODE_EXISTS) { 4245 if (inode_only == LOG_INODE_EXISTS) {
4101 max_key.type = BTRFS_INODE_EXTREF_KEY; 4246 max_key.type = BTRFS_XATTR_ITEM_KEY;
4102 ret = drop_objectid_items(trans, log, path, ino, 4247 ret = drop_objectid_items(trans, log, path, ino,
4103 max_key.type); 4248 max_key.type);
4104 } else { 4249 } else {
@@ -4106,20 +4251,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4106 &BTRFS_I(inode)->runtime_flags); 4251 &BTRFS_I(inode)->runtime_flags);
4107 clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4252 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4108 &BTRFS_I(inode)->runtime_flags); 4253 &BTRFS_I(inode)->runtime_flags);
4109 ret = btrfs_truncate_inode_items(trans, log, 4254 while(1) {
4110 inode, 0, 0); 4255 ret = btrfs_truncate_inode_items(trans,
4256 log, inode, 0, 0);
4257 if (ret != -EAGAIN)
4258 break;
4259 }
4111 } 4260 }
4112 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, 4261 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4113 &BTRFS_I(inode)->runtime_flags) || 4262 &BTRFS_I(inode)->runtime_flags) ||
4114 inode_only == LOG_INODE_EXISTS) { 4263 inode_only == LOG_INODE_EXISTS) {
4115 if (inode_only == LOG_INODE_ALL) { 4264 if (inode_only == LOG_INODE_ALL)
4116 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4117 &BTRFS_I(inode)->runtime_flags);
4118 fast_search = true; 4265 fast_search = true;
4119 max_key.type = BTRFS_XATTR_ITEM_KEY; 4266 max_key.type = BTRFS_XATTR_ITEM_KEY;
4120 } else {
4121 max_key.type = BTRFS_INODE_EXTREF_KEY;
4122 }
4123 ret = drop_objectid_items(trans, log, path, ino, 4267 ret = drop_objectid_items(trans, log, path, ino,
4124 max_key.type); 4268 max_key.type);
4125 } else { 4269 } else {
@@ -4277,15 +4421,18 @@ log_extents:
4277 } 4421 }
4278 4422
4279 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 4423 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
4280 ret = log_directory_changes(trans, root, inode, path, dst_path); 4424 ret = log_directory_changes(trans, root, inode, path, dst_path,
4425 ctx);
4281 if (ret) { 4426 if (ret) {
4282 err = ret; 4427 err = ret;
4283 goto out_unlock; 4428 goto out_unlock;
4284 } 4429 }
4285 } 4430 }
4286 4431
4432 spin_lock(&BTRFS_I(inode)->lock);
4287 BTRFS_I(inode)->logged_trans = trans->transid; 4433 BTRFS_I(inode)->logged_trans = trans->transid;
4288 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 4434 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
4435 spin_unlock(&BTRFS_I(inode)->lock);
4289out_unlock: 4436out_unlock:
4290 if (unlikely(err)) 4437 if (unlikely(err))
4291 btrfs_put_logged_extents(&logged_list); 4438 btrfs_put_logged_extents(&logged_list);
@@ -4372,6 +4519,181 @@ out:
4372 return ret; 4519 return ret;
4373} 4520}
4374 4521
4522struct btrfs_dir_list {
4523 u64 ino;
4524 struct list_head list;
4525};
4526
4527/*
4528 * Log the inodes of the new dentries of a directory. See log_dir_items() for
4529 * details about the why it is needed.
4530 * This is a recursive operation - if an existing dentry corresponds to a
4531 * directory, that directory's new entries are logged too (same behaviour as
4532 * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
4533 * the dentries point to we do not lock their i_mutex, otherwise lockdep
4534 * complains about the following circular lock dependency / possible deadlock:
4535 *
4536 * CPU0 CPU1
4537 * ---- ----
4538 * lock(&type->i_mutex_dir_key#3/2);
4539 * lock(sb_internal#2);
4540 * lock(&type->i_mutex_dir_key#3/2);
4541 * lock(&sb->s_type->i_mutex_key#14);
4542 *
4543 * Where sb_internal is the lock (a counter that works as a lock) acquired by
4544 * sb_start_intwrite() in btrfs_start_transaction().
4545 * Not locking i_mutex of the inodes is still safe because:
4546 *
4547 * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
4548 * that while logging the inode new references (names) are added or removed
4549 * from the inode, leaving the logged inode item with a link count that does
4550 * not match the number of logged inode reference items. This is fine because
4551 * at log replay time we compute the real number of links and correct the
4552 * link count in the inode item (see replay_one_buffer() and
4553 * link_to_fixup_dir());
4554 *
4555 * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
4556 * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
4557 * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
4558 * has a size that doesn't match the sum of the lengths of all the logged
4559 * names. This does not result in a problem because if a dir_item key is
4560 * logged but its matching dir_index key is not logged, at log replay time we
4561 * don't use it to replay the respective name (see replay_one_name()). On the
4562 * other hand if only the dir_index key ends up being logged, the respective
4563 * name is added to the fs/subvol tree with both the dir_item and dir_index
4564 * keys created (see replay_one_name()).
4565 * The directory's inode item with a wrong i_size is not a problem as well,
4566 * since we don't use it at log replay time to set the i_size in the inode
4567 * item of the fs/subvol tree (see overwrite_item()).
4568 */
4569static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
4570 struct btrfs_root *root,
4571 struct inode *start_inode,
4572 struct btrfs_log_ctx *ctx)
4573{
4574 struct btrfs_root *log = root->log_root;
4575 struct btrfs_path *path;
4576 LIST_HEAD(dir_list);
4577 struct btrfs_dir_list *dir_elem;
4578 int ret = 0;
4579
4580 path = btrfs_alloc_path();
4581 if (!path)
4582 return -ENOMEM;
4583
4584 dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
4585 if (!dir_elem) {
4586 btrfs_free_path(path);
4587 return -ENOMEM;
4588 }
4589 dir_elem->ino = btrfs_ino(start_inode);
4590 list_add_tail(&dir_elem->list, &dir_list);
4591
4592 while (!list_empty(&dir_list)) {
4593 struct extent_buffer *leaf;
4594 struct btrfs_key min_key;
4595 int nritems;
4596 int i;
4597
4598 dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
4599 list);
4600 if (ret)
4601 goto next_dir_inode;
4602
4603 min_key.objectid = dir_elem->ino;
4604 min_key.type = BTRFS_DIR_ITEM_KEY;
4605 min_key.offset = 0;
4606again:
4607 btrfs_release_path(path);
4608 ret = btrfs_search_forward(log, &min_key, path, trans->transid);
4609 if (ret < 0) {
4610 goto next_dir_inode;
4611 } else if (ret > 0) {
4612 ret = 0;
4613 goto next_dir_inode;
4614 }
4615
4616process_leaf:
4617 leaf = path->nodes[0];
4618 nritems = btrfs_header_nritems(leaf);
4619 for (i = path->slots[0]; i < nritems; i++) {
4620 struct btrfs_dir_item *di;
4621 struct btrfs_key di_key;
4622 struct inode *di_inode;
4623 struct btrfs_dir_list *new_dir_elem;
4624 int log_mode = LOG_INODE_EXISTS;
4625 int type;
4626
4627 btrfs_item_key_to_cpu(leaf, &min_key, i);
4628 if (min_key.objectid != dir_elem->ino ||
4629 min_key.type != BTRFS_DIR_ITEM_KEY)
4630 goto next_dir_inode;
4631
4632 di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
4633 type = btrfs_dir_type(leaf, di);
4634 if (btrfs_dir_transid(leaf, di) < trans->transid &&
4635 type != BTRFS_FT_DIR)
4636 continue;
4637 btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
4638 if (di_key.type == BTRFS_ROOT_ITEM_KEY)
4639 continue;
4640
4641 di_inode = btrfs_iget(root->fs_info->sb, &di_key,
4642 root, NULL);
4643 if (IS_ERR(di_inode)) {
4644 ret = PTR_ERR(di_inode);
4645 goto next_dir_inode;
4646 }
4647
4648 if (btrfs_inode_in_log(di_inode, trans->transid)) {
4649 iput(di_inode);
4650 continue;
4651 }
4652
4653 ctx->log_new_dentries = false;
4654 if (type == BTRFS_FT_DIR)
4655 log_mode = LOG_INODE_ALL;
4656 btrfs_release_path(path);
4657 ret = btrfs_log_inode(trans, root, di_inode,
4658 log_mode, 0, LLONG_MAX, ctx);
4659 iput(di_inode);
4660 if (ret)
4661 goto next_dir_inode;
4662 if (ctx->log_new_dentries) {
4663 new_dir_elem = kmalloc(sizeof(*new_dir_elem),
4664 GFP_NOFS);
4665 if (!new_dir_elem) {
4666 ret = -ENOMEM;
4667 goto next_dir_inode;
4668 }
4669 new_dir_elem->ino = di_key.objectid;
4670 list_add_tail(&new_dir_elem->list, &dir_list);
4671 }
4672 break;
4673 }
4674 if (i == nritems) {
4675 ret = btrfs_next_leaf(log, path);
4676 if (ret < 0) {
4677 goto next_dir_inode;
4678 } else if (ret > 0) {
4679 ret = 0;
4680 goto next_dir_inode;
4681 }
4682 goto process_leaf;
4683 }
4684 if (min_key.offset < (u64)-1) {
4685 min_key.offset++;
4686 goto again;
4687 }
4688next_dir_inode:
4689 list_del(&dir_elem->list);
4690 kfree(dir_elem);
4691 }
4692
4693 btrfs_free_path(path);
4694 return ret;
4695}
4696
4375/* 4697/*
4376 * helper function around btrfs_log_inode to make sure newly created 4698 * helper function around btrfs_log_inode to make sure newly created
4377 * parent directories also end up in the log. A minimal inode and backref 4699 * parent directories also end up in the log. A minimal inode and backref
@@ -4394,6 +4716,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4394 const struct dentry * const first_parent = parent; 4716 const struct dentry * const first_parent = parent;
4395 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > 4717 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4396 last_committed); 4718 last_committed);
4719 bool log_dentries = false;
4720 struct inode *orig_inode = inode;
4397 4721
4398 sb = inode->i_sb; 4722 sb = inode->i_sb;
4399 4723
@@ -4449,6 +4773,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4449 goto end_trans; 4773 goto end_trans;
4450 } 4774 }
4451 4775
4776 if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
4777 log_dentries = true;
4778
4452 while (1) { 4779 while (1) {
4453 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4780 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
4454 break; 4781 break;
@@ -4485,7 +4812,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4485 dput(old_parent); 4812 dput(old_parent);
4486 old_parent = parent; 4813 old_parent = parent;
4487 } 4814 }
4488 ret = 0; 4815 if (log_dentries)
4816 ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
4817 else
4818 ret = 0;
4489end_trans: 4819end_trans:
4490 dput(old_parent); 4820 dput(old_parent);
4491 if (ret < 0) { 4821 if (ret < 0) {
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 154990c26dcb..6916a781ea02 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -29,6 +29,7 @@ struct btrfs_log_ctx {
29 int log_ret; 29 int log_ret;
30 int log_transid; 30 int log_transid;
31 int io_err; 31 int io_err;
32 bool log_new_dentries;
32 struct list_head list; 33 struct list_head list;
33}; 34};
34 35
@@ -37,6 +38,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
37 ctx->log_ret = 0; 38 ctx->log_ret = 0;
38 ctx->log_transid = 0; 39 ctx->log_transid = 0;
39 ctx->io_err = 0; 40 ctx->io_err = 0;
41 ctx->log_new_dentries = false;
40 INIT_LIST_HEAD(&ctx->list); 42 INIT_LIST_HEAD(&ctx->list);
41} 43}
42 44
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cd4d1315aaa9..8bcd2a007517 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -366,8 +366,8 @@ loop_lock:
366 btrfsic_submit_bio(cur->bi_rw, cur); 366 btrfsic_submit_bio(cur->bi_rw, cur);
367 num_run++; 367 num_run++;
368 batch_run++; 368 batch_run++;
369 if (need_resched()) 369
370 cond_resched(); 370 cond_resched();
371 371
372 /* 372 /*
373 * we made progress, there is more work to do and the bdi 373 * we made progress, there is more work to do and the bdi
@@ -400,8 +400,7 @@ loop_lock:
400 * against it before looping 400 * against it before looping
401 */ 401 */
402 last_waited = ioc->last_waited; 402 last_waited = ioc->last_waited;
403 if (need_resched()) 403 cond_resched();
404 cond_resched();
405 continue; 404 continue;
406 } 405 }
407 spin_lock(&device->io_lock); 406 spin_lock(&device->io_lock);
@@ -609,8 +608,7 @@ error:
609 return ERR_PTR(-ENOMEM); 608 return ERR_PTR(-ENOMEM);
610} 609}
611 610
612void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, 611void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
613 struct btrfs_fs_devices *fs_devices, int step)
614{ 612{
615 struct btrfs_device *device, *next; 613 struct btrfs_device *device, *next;
616 struct btrfs_device *latest_dev = NULL; 614 struct btrfs_device *latest_dev = NULL;
@@ -1136,11 +1134,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
1136 path = btrfs_alloc_path(); 1134 path = btrfs_alloc_path();
1137 if (!path) 1135 if (!path)
1138 return -ENOMEM; 1136 return -ENOMEM;
1139again: 1137
1140 max_hole_start = search_start; 1138 max_hole_start = search_start;
1141 max_hole_size = 0; 1139 max_hole_size = 0;
1142 hole_size = 0;
1143 1140
1141again:
1144 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { 1142 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
1145 ret = -ENOSPC; 1143 ret = -ENOSPC;
1146 goto out; 1144 goto out;
@@ -1233,21 +1231,23 @@ next:
1233 * allocated dev extents, and when shrinking the device, 1231 * allocated dev extents, and when shrinking the device,
1234 * search_end may be smaller than search_start. 1232 * search_end may be smaller than search_start.
1235 */ 1233 */
1236 if (search_end > search_start) 1234 if (search_end > search_start) {
1237 hole_size = search_end - search_start; 1235 hole_size = search_end - search_start;
1238 1236
1239 if (hole_size > max_hole_size) { 1237 if (contains_pending_extent(trans, device, &search_start,
1240 max_hole_start = search_start; 1238 hole_size)) {
1241 max_hole_size = hole_size; 1239 btrfs_release_path(path);
1242 } 1240 goto again;
1241 }
1243 1242
1244 if (contains_pending_extent(trans, device, &search_start, hole_size)) { 1243 if (hole_size > max_hole_size) {
1245 btrfs_release_path(path); 1244 max_hole_start = search_start;
1246 goto again; 1245 max_hole_size = hole_size;
1246 }
1247 } 1247 }
1248 1248
1249 /* See above. */ 1249 /* See above. */
1250 if (hole_size < num_bytes) 1250 if (max_hole_size < num_bytes)
1251 ret = -ENOSPC; 1251 ret = -ENOSPC;
1252 else 1252 else
1253 ret = 0; 1253 ret = 0;
@@ -2487,8 +2487,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
2487} 2487}
2488 2488
2489static int btrfs_free_chunk(struct btrfs_trans_handle *trans, 2489static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
2490 struct btrfs_root *root, 2490 struct btrfs_root *root, u64 chunk_objectid,
2491 u64 chunk_tree, u64 chunk_objectid,
2492 u64 chunk_offset) 2491 u64 chunk_offset)
2493{ 2492{
2494 int ret; 2493 int ret;
@@ -2580,7 +2579,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2580 struct map_lookup *map; 2579 struct map_lookup *map;
2581 u64 dev_extent_len = 0; 2580 u64 dev_extent_len = 0;
2582 u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2581 u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2583 u64 chunk_tree = root->fs_info->chunk_root->objectid;
2584 int i, ret = 0; 2582 int i, ret = 0;
2585 2583
2586 /* Just in case */ 2584 /* Just in case */
@@ -2634,8 +2632,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2634 } 2632 }
2635 } 2633 }
2636 } 2634 }
2637 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, 2635 ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
2638 chunk_offset);
2639 if (ret) { 2636 if (ret) {
2640 btrfs_abort_transaction(trans, root, ret); 2637 btrfs_abort_transaction(trans, root, ret);
2641 goto out; 2638 goto out;
@@ -2664,8 +2661,8 @@ out:
2664} 2661}
2665 2662
2666static int btrfs_relocate_chunk(struct btrfs_root *root, 2663static int btrfs_relocate_chunk(struct btrfs_root *root,
2667 u64 chunk_tree, u64 chunk_objectid, 2664 u64 chunk_objectid,
2668 u64 chunk_offset) 2665 u64 chunk_offset)
2669{ 2666{
2670 struct btrfs_root *extent_root; 2667 struct btrfs_root *extent_root;
2671 struct btrfs_trans_handle *trans; 2668 struct btrfs_trans_handle *trans;
@@ -2707,7 +2704,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
2707 struct btrfs_chunk *chunk; 2704 struct btrfs_chunk *chunk;
2708 struct btrfs_key key; 2705 struct btrfs_key key;
2709 struct btrfs_key found_key; 2706 struct btrfs_key found_key;
2710 u64 chunk_tree = chunk_root->root_key.objectid;
2711 u64 chunk_type; 2707 u64 chunk_type;
2712 bool retried = false; 2708 bool retried = false;
2713 int failed = 0; 2709 int failed = 0;
@@ -2744,7 +2740,7 @@ again:
2744 btrfs_release_path(path); 2740 btrfs_release_path(path);
2745 2741
2746 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { 2742 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
2747 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 2743 ret = btrfs_relocate_chunk(chunk_root,
2748 found_key.objectid, 2744 found_key.objectid,
2749 found_key.offset); 2745 found_key.offset);
2750 if (ret == -ENOSPC) 2746 if (ret == -ENOSPC)
@@ -3022,7 +3018,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
3022 3018
3023 stripe_offset = btrfs_stripe_offset(leaf, stripe); 3019 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3024 stripe_length = btrfs_chunk_length(leaf, chunk); 3020 stripe_length = btrfs_chunk_length(leaf, chunk);
3025 do_div(stripe_length, factor); 3021 stripe_length = div_u64(stripe_length, factor);
3026 3022
3027 if (stripe_offset < bargs->pend && 3023 if (stripe_offset < bargs->pend &&
3028 stripe_offset + stripe_length > bargs->pstart) 3024 stripe_offset + stripe_length > bargs->pstart)
@@ -3255,7 +3251,6 @@ again:
3255 } 3251 }
3256 3252
3257 ret = btrfs_relocate_chunk(chunk_root, 3253 ret = btrfs_relocate_chunk(chunk_root,
3258 chunk_root->root_key.objectid,
3259 found_key.objectid, 3254 found_key.objectid,
3260 found_key.offset); 3255 found_key.offset);
3261 if (ret && ret != -ENOSPC) 3256 if (ret && ret != -ENOSPC)
@@ -3957,7 +3952,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
3957 struct btrfs_dev_extent *dev_extent = NULL; 3952 struct btrfs_dev_extent *dev_extent = NULL;
3958 struct btrfs_path *path; 3953 struct btrfs_path *path;
3959 u64 length; 3954 u64 length;
3960 u64 chunk_tree;
3961 u64 chunk_objectid; 3955 u64 chunk_objectid;
3962 u64 chunk_offset; 3956 u64 chunk_offset;
3963 int ret; 3957 int ret;
@@ -4027,13 +4021,11 @@ again:
4027 break; 4021 break;
4028 } 4022 }
4029 4023
4030 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
4031 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 4024 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
4032 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 4025 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4033 btrfs_release_path(path); 4026 btrfs_release_path(path);
4034 4027
4035 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 4028 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
4036 chunk_offset);
4037 if (ret && ret != -ENOSPC) 4029 if (ret && ret != -ENOSPC)
4038 goto done; 4030 goto done;
4039 if (ret == -ENOSPC) 4031 if (ret == -ENOSPC)
@@ -4131,7 +4123,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
4131 return 0; 4123 return 0;
4132} 4124}
4133 4125
4134static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { 4126static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
4135 [BTRFS_RAID_RAID10] = { 4127 [BTRFS_RAID_RAID10] = {
4136 .sub_stripes = 2, 4128 .sub_stripes = 2,
4137 .dev_stripes = 1, 4129 .dev_stripes = 1,
@@ -4289,7 +4281,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4289 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 4281 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
4290 max_chunk_size); 4282 max_chunk_size);
4291 4283
4292 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, 4284 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
4293 GFP_NOFS); 4285 GFP_NOFS);
4294 if (!devices_info) 4286 if (!devices_info)
4295 return -ENOMEM; 4287 return -ENOMEM;
@@ -4400,8 +4392,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4400 */ 4392 */
4401 if (stripe_size * data_stripes > max_chunk_size) { 4393 if (stripe_size * data_stripes > max_chunk_size) {
4402 u64 mask = (1ULL << 24) - 1; 4394 u64 mask = (1ULL << 24) - 1;
4403 stripe_size = max_chunk_size; 4395
4404 do_div(stripe_size, data_stripes); 4396 stripe_size = div_u64(max_chunk_size, data_stripes);
4405 4397
4406 /* bump the answer up to a 16MB boundary */ 4398 /* bump the answer up to a 16MB boundary */
4407 stripe_size = (stripe_size + mask) & ~mask; 4399 stripe_size = (stripe_size + mask) & ~mask;
@@ -4413,10 +4405,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4413 stripe_size = devices_info[ndevs-1].max_avail; 4405 stripe_size = devices_info[ndevs-1].max_avail;
4414 } 4406 }
4415 4407
4416 do_div(stripe_size, dev_stripes); 4408 stripe_size = div_u64(stripe_size, dev_stripes);
4417 4409
4418 /* align to BTRFS_STRIPE_LEN */ 4410 /* align to BTRFS_STRIPE_LEN */
4419 do_div(stripe_size, raid_stripe_len); 4411 stripe_size = div_u64(stripe_size, raid_stripe_len);
4420 stripe_size *= raid_stripe_len; 4412 stripe_size *= raid_stripe_len;
4421 4413
4422 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 4414 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4903,10 +4895,17 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
4903static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) 4895static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
4904{ 4896{
4905 struct btrfs_bio *bbio = kzalloc( 4897 struct btrfs_bio *bbio = kzalloc(
4898 /* the size of the btrfs_bio */
4906 sizeof(struct btrfs_bio) + 4899 sizeof(struct btrfs_bio) +
4900 /* plus the variable array for the stripes */
4907 sizeof(struct btrfs_bio_stripe) * (total_stripes) + 4901 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
4902 /* plus the variable array for the tgt dev */
4908 sizeof(int) * (real_stripes) + 4903 sizeof(int) * (real_stripes) +
4909 sizeof(u64) * (real_stripes), 4904 /*
4905 * plus the raid_map, which includes both the tgt dev
4906 * and the stripes
4907 */
4908 sizeof(u64) * (total_stripes),
4910 GFP_NOFS); 4909 GFP_NOFS);
4911 if (!bbio) 4910 if (!bbio)
4912 return NULL; 4911 return NULL;
@@ -4947,7 +4946,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4947 u64 stripe_nr_orig; 4946 u64 stripe_nr_orig;
4948 u64 stripe_nr_end; 4947 u64 stripe_nr_end;
4949 u64 stripe_len; 4948 u64 stripe_len;
4950 int stripe_index; 4949 u32 stripe_index;
4951 int i; 4950 int i;
4952 int ret = 0; 4951 int ret = 0;
4953 int num_stripes; 4952 int num_stripes;
@@ -4988,7 +4987,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4988 * stripe_nr counts the total number of stripes we have to stride 4987 * stripe_nr counts the total number of stripes we have to stride
4989 * to get to this block 4988 * to get to this block
4990 */ 4989 */
4991 do_div(stripe_nr, stripe_len); 4990 stripe_nr = div64_u64(stripe_nr, stripe_len);
4992 4991
4993 stripe_offset = stripe_nr * stripe_len; 4992 stripe_offset = stripe_nr * stripe_len;
4994 BUG_ON(offset < stripe_offset); 4993 BUG_ON(offset < stripe_offset);
@@ -5004,7 +5003,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5004 /* allow a write of a full stripe, but make sure we don't 5003 /* allow a write of a full stripe, but make sure we don't
5005 * allow straddling of stripes 5004 * allow straddling of stripes
5006 */ 5005 */
5007 do_div(raid56_full_stripe_start, full_stripe_len); 5006 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
5007 full_stripe_len);
5008 raid56_full_stripe_start *= full_stripe_len; 5008 raid56_full_stripe_start *= full_stripe_len;
5009 } 5009 }
5010 5010
@@ -5129,7 +5129,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5129 stripe_index = 0; 5129 stripe_index = 0;
5130 stripe_nr_orig = stripe_nr; 5130 stripe_nr_orig = stripe_nr;
5131 stripe_nr_end = ALIGN(offset + *length, map->stripe_len); 5131 stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
5132 do_div(stripe_nr_end, map->stripe_len); 5132 stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
5133 stripe_end_offset = stripe_nr_end * map->stripe_len - 5133 stripe_end_offset = stripe_nr_end * map->stripe_len -
5134 (offset + *length); 5134 (offset + *length);
5135 5135
@@ -5137,7 +5137,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5137 if (rw & REQ_DISCARD) 5137 if (rw & REQ_DISCARD)
5138 num_stripes = min_t(u64, map->num_stripes, 5138 num_stripes = min_t(u64, map->num_stripes,
5139 stripe_nr_end - stripe_nr_orig); 5139 stripe_nr_end - stripe_nr_orig);
5140 stripe_index = do_div(stripe_nr, map->num_stripes); 5140 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5141 &stripe_index);
5141 if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) 5142 if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
5142 mirror_num = 1; 5143 mirror_num = 1;
5143 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 5144 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
@@ -5163,9 +5164,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5163 } 5164 }
5164 5165
5165 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { 5166 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
5166 int factor = map->num_stripes / map->sub_stripes; 5167 u32 factor = map->num_stripes / map->sub_stripes;
5167 5168
5168 stripe_index = do_div(stripe_nr, factor); 5169 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5169 stripe_index *= map->sub_stripes; 5170 stripe_index *= map->sub_stripes;
5170 5171
5171 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) 5172 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
@@ -5191,8 +5192,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5191 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || 5192 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5192 mirror_num > 1)) { 5193 mirror_num > 1)) {
5193 /* push stripe_nr back to the start of the full stripe */ 5194 /* push stripe_nr back to the start of the full stripe */
5194 stripe_nr = raid56_full_stripe_start; 5195 stripe_nr = div_u64(raid56_full_stripe_start,
5195 do_div(stripe_nr, stripe_len * nr_data_stripes(map)); 5196 stripe_len * nr_data_stripes(map));
5196 5197
5197 /* RAID[56] write or recovery. Return all stripes */ 5198 /* RAID[56] write or recovery. Return all stripes */
5198 num_stripes = map->num_stripes; 5199 num_stripes = map->num_stripes;
@@ -5202,32 +5203,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5202 stripe_index = 0; 5203 stripe_index = 0;
5203 stripe_offset = 0; 5204 stripe_offset = 0;
5204 } else { 5205 } else {
5205 u64 tmp;
5206
5207 /* 5206 /*
5208 * Mirror #0 or #1 means the original data block. 5207 * Mirror #0 or #1 means the original data block.
5209 * Mirror #2 is RAID5 parity block. 5208 * Mirror #2 is RAID5 parity block.
5210 * Mirror #3 is RAID6 Q block. 5209 * Mirror #3 is RAID6 Q block.
5211 */ 5210 */
5212 stripe_index = do_div(stripe_nr, nr_data_stripes(map)); 5211 stripe_nr = div_u64_rem(stripe_nr,
5212 nr_data_stripes(map), &stripe_index);
5213 if (mirror_num > 1) 5213 if (mirror_num > 1)
5214 stripe_index = nr_data_stripes(map) + 5214 stripe_index = nr_data_stripes(map) +
5215 mirror_num - 2; 5215 mirror_num - 2;
5216 5216
5217 /* We distribute the parity blocks across stripes */ 5217 /* We distribute the parity blocks across stripes */
5218 tmp = stripe_nr + stripe_index; 5218 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
5219 stripe_index = do_div(tmp, map->num_stripes); 5219 &stripe_index);
5220 if (!(rw & (REQ_WRITE | REQ_DISCARD | 5220 if (!(rw & (REQ_WRITE | REQ_DISCARD |
5221 REQ_GET_READ_MIRRORS)) && mirror_num <= 1) 5221 REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
5222 mirror_num = 1; 5222 mirror_num = 1;
5223 } 5223 }
5224 } else { 5224 } else {
5225 /* 5225 /*
5226 * after this do_div call, stripe_nr is the number of stripes 5226 * after this, stripe_nr is the number of stripes on this
5227 * on this device we have to walk to find the data, and 5227 * device we have to walk to find the data, and stripe_index is
5228 * stripe_index is the number of our device in the stripe array 5228 * the number of our device in the stripe array
5229 */ 5229 */
5230 stripe_index = do_div(stripe_nr, map->num_stripes); 5230 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5231 &stripe_index);
5231 mirror_num = stripe_index + 1; 5232 mirror_num = stripe_index + 1;
5232 } 5233 }
5233 BUG_ON(stripe_index >= map->num_stripes); 5234 BUG_ON(stripe_index >= map->num_stripes);
@@ -5254,7 +5255,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5254 need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || 5255 need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5255 mirror_num > 1)) { 5256 mirror_num > 1)) {
5256 u64 tmp; 5257 u64 tmp;
5257 int i, rot; 5258 unsigned rot;
5258 5259
5259 bbio->raid_map = (u64 *)((void *)bbio->stripes + 5260 bbio->raid_map = (u64 *)((void *)bbio->stripes +
5260 sizeof(struct btrfs_bio_stripe) * 5261 sizeof(struct btrfs_bio_stripe) *
@@ -5262,8 +5263,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5262 sizeof(int) * tgtdev_indexes); 5263 sizeof(int) * tgtdev_indexes);
5263 5264
5264 /* Work out the disk rotation on this stripe-set */ 5265 /* Work out the disk rotation on this stripe-set */
5265 tmp = stripe_nr; 5266 div_u64_rem(stripe_nr, num_stripes, &rot);
5266 rot = do_div(tmp, num_stripes);
5267 5267
5268 /* Fill in the logical address of each stripe */ 5268 /* Fill in the logical address of each stripe */
5269 tmp = stripe_nr * nr_data_stripes(map); 5269 tmp = stripe_nr * nr_data_stripes(map);
@@ -5278,8 +5278,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5278 } 5278 }
5279 5279
5280 if (rw & REQ_DISCARD) { 5280 if (rw & REQ_DISCARD) {
5281 int factor = 0; 5281 u32 factor = 0;
5282 int sub_stripes = 0; 5282 u32 sub_stripes = 0;
5283 u64 stripes_per_dev = 0; 5283 u64 stripes_per_dev = 0;
5284 u32 remaining_stripes = 0; 5284 u32 remaining_stripes = 0;
5285 u32 last_stripe = 0; 5285 u32 last_stripe = 0;
@@ -5430,9 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5430 } 5430 }
5431 } 5431 }
5432 if (found) { 5432 if (found) {
5433 u64 length = map->stripe_len; 5433 if (physical_of_found + map->stripe_len <=
5434
5435 if (physical_of_found + length <=
5436 dev_replace->cursor_left) { 5434 dev_replace->cursor_left) {
5437 struct btrfs_bio_stripe *tgtdev_stripe = 5435 struct btrfs_bio_stripe *tgtdev_stripe =
5438 bbio->stripes + num_stripes; 5436 bbio->stripes + num_stripes;
@@ -5528,15 +5526,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5528 rmap_len = map->stripe_len; 5526 rmap_len = map->stripe_len;
5529 5527
5530 if (map->type & BTRFS_BLOCK_GROUP_RAID10) 5528 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5531 do_div(length, map->num_stripes / map->sub_stripes); 5529 length = div_u64(length, map->num_stripes / map->sub_stripes);
5532 else if (map->type & BTRFS_BLOCK_GROUP_RAID0) 5530 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5533 do_div(length, map->num_stripes); 5531 length = div_u64(length, map->num_stripes);
5534 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { 5532 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5535 do_div(length, nr_data_stripes(map)); 5533 length = div_u64(length, nr_data_stripes(map));
5536 rmap_len = map->stripe_len * nr_data_stripes(map); 5534 rmap_len = map->stripe_len * nr_data_stripes(map);
5537 } 5535 }
5538 5536
5539 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); 5537 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
5540 BUG_ON(!buf); /* -ENOMEM */ 5538 BUG_ON(!buf); /* -ENOMEM */
5541 5539
5542 for (i = 0; i < map->num_stripes; i++) { 5540 for (i = 0; i < map->num_stripes; i++) {
@@ -5547,11 +5545,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5547 continue; 5545 continue;
5548 5546
5549 stripe_nr = physical - map->stripes[i].physical; 5547 stripe_nr = physical - map->stripes[i].physical;
5550 do_div(stripe_nr, map->stripe_len); 5548 stripe_nr = div_u64(stripe_nr, map->stripe_len);
5551 5549
5552 if (map->type & BTRFS_BLOCK_GROUP_RAID10) { 5550 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
5553 stripe_nr = stripe_nr * map->num_stripes + i; 5551 stripe_nr = stripe_nr * map->num_stripes + i;
5554 do_div(stripe_nr, map->sub_stripes); 5552 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
5555 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 5553 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
5556 stripe_nr = stripe_nr * map->num_stripes + i; 5554 stripe_nr = stripe_nr * map->num_stripes + i;
5557 } /* else if RAID[56], multiply by nr_data_stripes(). 5555 } /* else if RAID[56], multiply by nr_data_stripes().
@@ -5828,8 +5826,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5828 u64 length = 0; 5826 u64 length = 0;
5829 u64 map_length; 5827 u64 map_length;
5830 int ret; 5828 int ret;
5831 int dev_nr = 0; 5829 int dev_nr;
5832 int total_devs = 1; 5830 int total_devs;
5833 struct btrfs_bio *bbio = NULL; 5831 struct btrfs_bio *bbio = NULL;
5834 5832
5835 length = bio->bi_iter.bi_size; 5833 length = bio->bi_iter.bi_size;
@@ -5870,11 +5868,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5870 BUG(); 5868 BUG();
5871 } 5869 }
5872 5870
5873 while (dev_nr < total_devs) { 5871 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
5874 dev = bbio->stripes[dev_nr].dev; 5872 dev = bbio->stripes[dev_nr].dev;
5875 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { 5873 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
5876 bbio_error(bbio, first_bio, logical); 5874 bbio_error(bbio, first_bio, logical);
5877 dev_nr++;
5878 continue; 5875 continue;
5879 } 5876 }
5880 5877
@@ -5887,7 +5884,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5887 ret = breakup_stripe_bio(root, bbio, first_bio, dev, 5884 ret = breakup_stripe_bio(root, bbio, first_bio, dev,
5888 dev_nr, rw, async_submit); 5885 dev_nr, rw, async_submit);
5889 BUG_ON(ret); 5886 BUG_ON(ret);
5890 dev_nr++;
5891 continue; 5887 continue;
5892 } 5888 }
5893 5889
@@ -5902,7 +5898,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5902 submit_stripe_bio(root, bbio, bio, 5898 submit_stripe_bio(root, bbio, bio,
5903 bbio->stripes[dev_nr].physical, dev_nr, rw, 5899 bbio->stripes[dev_nr].physical, dev_nr, rw,
5904 async_submit); 5900 async_submit);
5905 dev_nr++;
5906 } 5901 }
5907 btrfs_bio_counter_dec(root->fs_info); 5902 btrfs_bio_counter_dec(root->fs_info);
5908 return 0; 5903 return 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 83069dec6898..ebc31331a837 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -421,8 +421,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
421int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 421int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
422 struct btrfs_fs_devices **fs_devices_ret); 422 struct btrfs_fs_devices **fs_devices_ret);
423int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 423int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
424void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, 424void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
425 struct btrfs_fs_devices *fs_devices, int step);
426int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, 425int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
427 char *device_path, 426 char *device_path,
428 struct btrfs_device **device); 427 struct btrfs_device **device);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..45ea704be030 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
111 name, name_len, -1); 111 name, name_len, -1);
112 if (!di && (flags & XATTR_REPLACE)) 112 if (!di && (flags & XATTR_REPLACE))
113 ret = -ENODATA; 113 ret = -ENODATA;
114 else if (IS_ERR(di))
115 ret = PTR_ERR(di);
114 else if (di) 116 else if (di)
115 ret = btrfs_delete_one_dir_name(trans, root, path, di); 117 ret = btrfs_delete_one_dir_name(trans, root, path, di);
116 goto out; 118 goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
127 ASSERT(mutex_is_locked(&inode->i_mutex)); 129 ASSERT(mutex_is_locked(&inode->i_mutex));
128 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), 130 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
129 name, name_len, 0); 131 name, name_len, 0);
130 if (!di) { 132 if (!di)
131 ret = -ENODATA; 133 ret = -ENODATA;
134 else if (IS_ERR(di))
135 ret = PTR_ERR(di);
136 if (ret)
132 goto out; 137 goto out;
133 }
134 btrfs_release_path(path); 138 btrfs_release_path(path);
135 di = NULL; 139 di = NULL;
136 } 140 }
@@ -360,22 +364,42 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
360/* 364/*
361 * Check if the attribute is in a supported namespace. 365 * Check if the attribute is in a supported namespace.
362 * 366 *
363 * This applied after the check for the synthetic attributes in the system 367 * This is applied after the check for the synthetic attributes in the system
364 * namespace. 368 * namespace.
365 */ 369 */
366static bool btrfs_is_valid_xattr(const char *name) 370static int btrfs_is_valid_xattr(const char *name)
367{ 371{
368 return !strncmp(name, XATTR_SECURITY_PREFIX, 372 int len = strlen(name);
369 XATTR_SECURITY_PREFIX_LEN) || 373 int prefixlen = 0;
370 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || 374
371 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 375 if (!strncmp(name, XATTR_SECURITY_PREFIX,
372 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) || 376 XATTR_SECURITY_PREFIX_LEN))
373 !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN); 377 prefixlen = XATTR_SECURITY_PREFIX_LEN;
378 else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
379 prefixlen = XATTR_SYSTEM_PREFIX_LEN;
380 else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
381 prefixlen = XATTR_TRUSTED_PREFIX_LEN;
382 else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
383 prefixlen = XATTR_USER_PREFIX_LEN;
384 else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
385 prefixlen = XATTR_BTRFS_PREFIX_LEN;
386 else
387 return -EOPNOTSUPP;
388
389 /*
390 * The name cannot consist of just prefix
391 */
392 if (len <= prefixlen)
393 return -EINVAL;
394
395 return 0;
374} 396}
375 397
376ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 398ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
377 void *buffer, size_t size) 399 void *buffer, size_t size)
378{ 400{
401 int ret;
402
379 /* 403 /*
380 * If this is a request for a synthetic attribute in the system.* 404 * If this is a request for a synthetic attribute in the system.*
381 * namespace use the generic infrastructure to resolve a handler 405 * namespace use the generic infrastructure to resolve a handler
@@ -384,8 +408,9 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
384 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 408 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
385 return generic_getxattr(dentry, name, buffer, size); 409 return generic_getxattr(dentry, name, buffer, size);
386 410
387 if (!btrfs_is_valid_xattr(name)) 411 ret = btrfs_is_valid_xattr(name);
388 return -EOPNOTSUPP; 412 if (ret)
413 return ret;
389 return __btrfs_getxattr(dentry->d_inode, name, buffer, size); 414 return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
390} 415}
391 416
@@ -393,6 +418,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
393 size_t size, int flags) 418 size_t size, int flags)
394{ 419{
395 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; 420 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
421 int ret;
396 422
397 /* 423 /*
398 * The permission on security.* and system.* is not checked 424 * The permission on security.* and system.* is not checked
@@ -409,8 +435,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
409 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 435 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
410 return generic_setxattr(dentry, name, value, size, flags); 436 return generic_setxattr(dentry, name, value, size, flags);
411 437
412 if (!btrfs_is_valid_xattr(name)) 438 ret = btrfs_is_valid_xattr(name);
413 return -EOPNOTSUPP; 439 if (ret)
440 return ret;
414 441
415 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) 442 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
416 return btrfs_set_prop(dentry->d_inode, name, 443 return btrfs_set_prop(dentry->d_inode, name,
@@ -426,6 +453,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
426int btrfs_removexattr(struct dentry *dentry, const char *name) 453int btrfs_removexattr(struct dentry *dentry, const char *name)
427{ 454{
428 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; 455 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
456 int ret;
429 457
430 /* 458 /*
431 * The permission on security.* and system.* is not checked 459 * The permission on security.* and system.* is not checked
@@ -442,8 +470,9 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
442 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 470 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
443 return generic_removexattr(dentry, name); 471 return generic_removexattr(dentry, name);
444 472
445 if (!btrfs_is_valid_xattr(name)) 473 ret = btrfs_is_valid_xattr(name);
446 return -EOPNOTSUPP; 474 if (ret)
475 return ret;
447 476
448 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) 477 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
449 return btrfs_set_prop(dentry->d_inode, name, 478 return btrfs_set_prop(dentry->d_inode, name,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index fb22fd8d8fb8..82990b8f872b 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -403,7 +403,7 @@ next:
403 return ret; 403 return ret;
404} 404}
405 405
406struct btrfs_compress_op btrfs_zlib_compress = { 406const struct btrfs_compress_op btrfs_zlib_compress = {
407 .alloc_workspace = zlib_alloc_workspace, 407 .alloc_workspace = zlib_alloc_workspace,
408 .free_workspace = zlib_free_workspace, 408 .free_workspace = zlib_free_workspace,
409 .compress_pages = zlib_compress_pages, 409 .compress_pages = zlib_compress_pages,
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
3243 * to synchronise against __set_page_dirty_buffers and prevent the 3243 * to synchronise against __set_page_dirty_buffers and prevent the
3244 * dirty bit from being lost. 3244 * dirty bit from being lost.
3245 */ 3245 */
3246 if (ret) 3246 if (ret && TestClearPageDirty(page))
3247 cancel_dirty_page(page, PAGE_CACHE_SIZE); 3247 account_page_cleaned(page, mapping);
3248 spin_unlock(&mapping->private_lock); 3248 spin_unlock(&mapping->private_lock);
3249out: 3249out:
3250 if (buffers_to_free) { 3250 if (buffers_to_free) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index fd5599d32362..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1146 inode, page, (int)pos, (int)len); 1146 inode, page, (int)pos, (int)len);
1147 1147
1148 r = ceph_update_writeable_page(file, pos, len, page); 1148 r = ceph_update_writeable_page(file, pos, len, page);
1149 if (r < 0)
1150 page_cache_release(page);
1151 else
1152 *pagep = page;
1149 } while (r == -EAGAIN); 1153 } while (r == -EAGAIN);
1150 1154
1151 return r; 1155 return r;
@@ -1198,8 +1202,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
1198 * intercept O_DIRECT reads and writes early, this function should 1202 * intercept O_DIRECT reads and writes early, this function should
1199 * never get called. 1203 * never get called.
1200 */ 1204 */
1201static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, 1205static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter,
1202 struct iov_iter *iter,
1203 loff_t pos) 1206 loff_t pos)
1204{ 1207{
1205 WARN_ON(1); 1208 WARN_ON(1);
@@ -1535,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
1535 1538
1536 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); 1539 osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
1537 1540
1538 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR, 1541 {
1539 "inline_version", &inline_version, 1542 __le64 xattr_buf = cpu_to_le64(inline_version);
1540 sizeof(inline_version), 1543 err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
1541 CEPH_OSD_CMPXATTR_OP_GT, 1544 "inline_version", &xattr_buf,
1542 CEPH_OSD_CMPXATTR_MODE_U64); 1545 sizeof(xattr_buf),
1543 if (err) 1546 CEPH_OSD_CMPXATTR_OP_GT,
1544 goto out_put; 1547 CEPH_OSD_CMPXATTR_MODE_U64);
1545 1548 if (err)
1546 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR, 1549 goto out_put;
1547 "inline_version", &inline_version, 1550 }
1548 sizeof(inline_version), 0, 0); 1551
1549 if (err) 1552 {
1550 goto out_put; 1553 char xattr_buf[32];
1554 int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
1555 "%llu", inline_version);
1556 err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
1557 "inline_version",
1558 xattr_buf, xattr_len, 0, 0);
1559 if (err)
1560 goto out_put;
1561 }
1551 1562
1552 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime); 1563 ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
1553 err = ceph_osdc_start_request(&fsc->client->osdc, req, false); 1564 err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8172775428a0..11631c4c7d14 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
896 return ret; 896 return ret;
897} 897}
898 898
899static void drop_inode_snap_realm(struct ceph_inode_info *ci)
900{
901 struct ceph_snap_realm *realm = ci->i_snap_realm;
902 spin_lock(&realm->inodes_with_caps_lock);
903 list_del_init(&ci->i_snap_realm_item);
904 ci->i_snap_realm_counter++;
905 ci->i_snap_realm = NULL;
906 spin_unlock(&realm->inodes_with_caps_lock);
907 ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
908 realm);
909}
910
899/* 911/*
900 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 912 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
901 * 913 *
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
946 if (removed) 958 if (removed)
947 ceph_put_cap(mdsc, cap); 959 ceph_put_cap(mdsc, cap);
948 960
949 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 961 /* when reconnect denied, we remove session caps forcibly,
950 struct ceph_snap_realm *realm = ci->i_snap_realm; 962 * i_wr_ref can be non-zero. If there are ongoing write,
951 spin_lock(&realm->inodes_with_caps_lock); 963 * keep i_snap_realm.
952 list_del_init(&ci->i_snap_realm_item); 964 */
953 ci->i_snap_realm_counter++; 965 if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
954 ci->i_snap_realm = NULL; 966 drop_inode_snap_realm(ci);
955 spin_unlock(&realm->inodes_with_caps_lock); 967
956 ceph_put_snap_realm(mdsc, realm);
957 }
958 if (!__ceph_is_any_real_caps(ci)) 968 if (!__ceph_is_any_real_caps(ci))
959 __cap_delay_cancel(mdsc, ci); 969 __cap_delay_cancel(mdsc, ci);
960} 970}
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1394 int was = ci->i_dirty_caps; 1404 int was = ci->i_dirty_caps;
1395 int dirty = 0; 1405 int dirty = 0;
1396 1406
1407 if (!ci->i_auth_cap) {
1408 pr_warn("__mark_dirty_caps %p %llx mask %s, "
1409 "but no auth cap (session was closed?)\n",
1410 inode, ceph_ino(inode), ceph_cap_string(mask));
1411 return 0;
1412 }
1413
1397 dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, 1414 dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
1398 ceph_cap_string(mask), ceph_cap_string(was), 1415 ceph_cap_string(mask), ceph_cap_string(was),
1399 ceph_cap_string(was | mask)); 1416 ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1404 ci->i_snap_realm->cached_context); 1421 ci->i_snap_realm->cached_context);
1405 dout(" inode %p now dirty snapc %p auth cap %p\n", 1422 dout(" inode %p now dirty snapc %p auth cap %p\n",
1406 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); 1423 &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
1407 WARN_ON(!ci->i_auth_cap);
1408 BUG_ON(!list_empty(&ci->i_dirty_item)); 1424 BUG_ON(!list_empty(&ci->i_dirty_item));
1409 spin_lock(&mdsc->cap_dirty_lock); 1425 spin_lock(&mdsc->cap_dirty_lock);
1410 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1426 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
1545 if (!mdsc->stopping && inode->i_nlink > 0) { 1561 if (!mdsc->stopping && inode->i_nlink > 0) {
1546 if (want) { 1562 if (want) {
1547 retain |= CEPH_CAP_ANY; /* be greedy */ 1563 retain |= CEPH_CAP_ANY; /* be greedy */
1564 } else if (S_ISDIR(inode->i_mode) &&
1565 (issued & CEPH_CAP_FILE_SHARED) &&
1566 __ceph_dir_is_complete(ci)) {
1567 /*
1568 * If a directory is complete, we want to keep
1569 * the exclusive cap. So that MDS does not end up
1570 * revoking the shared cap on every create/unlink
1571 * operation.
1572 */
1573 want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
1574 retain |= want;
1548 } else { 1575 } else {
1576
1549 retain |= CEPH_CAP_ANY_SHARED; 1577 retain |= CEPH_CAP_ANY_SHARED;
1550 /* 1578 /*
1551 * keep RD only if we didn't have the file open RW, 1579 * keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2309 wake = 1; 2337 wake = 1;
2310 } 2338 }
2311 } 2339 }
2340 /* see comment in __ceph_remove_cap() */
2341 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
2342 drop_inode_snap_realm(ci);
2312 } 2343 }
2313 spin_unlock(&ci->i_ceph_lock); 2344 spin_unlock(&ci->i_ceph_lock);
2314 2345
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83e9976f7189..e729b79812b4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
281 /* can we use the dcache? */ 281 /* can we use the dcache? */
282 spin_lock(&ci->i_ceph_lock); 282 spin_lock(&ci->i_ceph_lock);
283 if ((ctx->pos == 2 || fi->dentry) && 283 if ((ctx->pos == 2 || fi->dentry) &&
284 ceph_test_mount_opt(fsc, DCACHE) &&
284 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 285 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
285 ceph_snap(inode) != CEPH_SNAPDIR && 286 ceph_snap(inode) != CEPH_SNAPDIR &&
286 __ceph_dir_is_complete_ordered(ci) && 287 __ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
336 ceph_mdsc_put_request(req); 337 ceph_mdsc_put_request(req);
337 return err; 338 return err;
338 } 339 }
339 req->r_inode = inode;
340 ihold(inode);
341 req->r_dentry = dget(file->f_path.dentry);
342 /* hints to request -> mds selection code */ 340 /* hints to request -> mds selection code */
343 req->r_direct_mode = USE_AUTH_MDS; 341 req->r_direct_mode = USE_AUTH_MDS;
344 req->r_direct_hash = ceph_frag_value(frag); 342 req->r_direct_hash = ceph_frag_value(frag);
345 req->r_direct_is_hash = true; 343 req->r_direct_is_hash = true;
346 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); 344 if (fi->last_name) {
345 req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
346 if (!req->r_path2) {
347 ceph_mdsc_put_request(req);
348 return -ENOMEM;
349 }
350 }
347 req->r_readdir_offset = fi->next_offset; 351 req->r_readdir_offset = fi->next_offset;
348 req->r_args.readdir.frag = cpu_to_le32(frag); 352 req->r_args.readdir.frag = cpu_to_le32(frag);
353
354 req->r_inode = inode;
355 ihold(inode);
356 req->r_dentry = dget(file->f_path.dentry);
349 err = ceph_mdsc_do_request(mdsc, NULL, req); 357 err = ceph_mdsc_do_request(mdsc, NULL, req);
350 if (err < 0) { 358 if (err < 0) {
351 ceph_mdsc_put_request(req); 359 ceph_mdsc_put_request(req);
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
629 fsc->mount_options->snapdir_name, 637 fsc->mount_options->snapdir_name,
630 dentry->d_name.len) && 638 dentry->d_name.len) &&
631 !is_root_ceph_dentry(dir, dentry) && 639 !is_root_ceph_dentry(dir, dentry) &&
640 ceph_test_mount_opt(fsc, DCACHE) &&
632 __ceph_dir_is_complete(ci) && 641 __ceph_dir_is_complete(ci) &&
633 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 642 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
634 spin_unlock(&ci->i_ceph_lock); 643 spin_unlock(&ci->i_ceph_lock);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
755 err = PTR_ERR(req); 764 err = PTR_ERR(req);
756 goto out; 765 goto out;
757 } 766 }
758 req->r_dentry = dget(dentry);
759 req->r_num_caps = 2;
760 req->r_path2 = kstrdup(dest, GFP_NOFS); 767 req->r_path2 = kstrdup(dest, GFP_NOFS);
768 if (!req->r_path2) {
769 err = -ENOMEM;
770 ceph_mdsc_put_request(req);
771 goto out;
772 }
761 req->r_locked_dir = dir; 773 req->r_locked_dir = dir;
774 req->r_dentry = dget(dentry);
775 req->r_num_caps = 2;
762 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 776 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
763 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 777 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
764 err = ceph_mdsc_do_request(mdsc, dir, req); 778 err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
933 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); 947 struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
934 struct ceph_mds_client *mdsc = fsc->mdsc; 948 struct ceph_mds_client *mdsc = fsc->mdsc;
935 struct ceph_mds_request *req; 949 struct ceph_mds_request *req;
950 int op = CEPH_MDS_OP_RENAME;
936 int err; 951 int err;
937 952
938 if (ceph_snap(old_dir) != ceph_snap(new_dir)) 953 if (ceph_snap(old_dir) != ceph_snap(new_dir))
939 return -EXDEV; 954 return -EXDEV;
940 if (ceph_snap(old_dir) != CEPH_NOSNAP || 955 if (ceph_snap(old_dir) != CEPH_NOSNAP) {
941 ceph_snap(new_dir) != CEPH_NOSNAP) 956 if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
942 return -EROFS; 957 op = CEPH_MDS_OP_RENAMESNAP;
958 else
959 return -EROFS;
960 }
943 dout("rename dir %p dentry %p to dir %p dentry %p\n", 961 dout("rename dir %p dentry %p to dir %p dentry %p\n",
944 old_dir, old_dentry, new_dir, new_dentry); 962 old_dir, old_dentry, new_dir, new_dentry);
945 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); 963 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
946 if (IS_ERR(req)) 964 if (IS_ERR(req))
947 return PTR_ERR(req); 965 return PTR_ERR(req);
948 ihold(old_dir); 966 ihold(old_dir);
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1240 dout("dir_fsync %p wait on tid %llu (until %llu)\n", 1258 dout("dir_fsync %p wait on tid %llu (until %llu)\n",
1241 inode, req->r_tid, last_tid); 1259 inode, req->r_tid, last_tid);
1242 if (req->r_timeout) { 1260 if (req->r_timeout) {
1243 ret = wait_for_completion_timeout( 1261 unsigned long time_left = wait_for_completion_timeout(
1244 &req->r_safe_completion, req->r_timeout); 1262 &req->r_safe_completion,
1245 if (ret > 0) 1263 req->r_timeout);
1264 if (time_left > 0)
1246 ret = 0; 1265 ret = 0;
1247 else if (ret == 0) 1266 else
1248 ret = -EIO; /* timed out */ 1267 ret = -EIO; /* timed out */
1249 } else { 1268 } else {
1250 wait_for_completion(&req->r_safe_completion); 1269 wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
1372 .getattr = ceph_getattr, 1391 .getattr = ceph_getattr,
1373 .mkdir = ceph_mkdir, 1392 .mkdir = ceph_mkdir,
1374 .rmdir = ceph_unlink, 1393 .rmdir = ceph_unlink,
1394 .rename = ceph_rename,
1375}; 1395};
1376 1396
1377const struct dentry_operations ceph_dentry_ops = { 1397const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075a823d..b9b8eb225f66 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10#include <linux/aio.h>
11#include <linux/falloc.h> 10#include <linux/falloc.h>
12 11
13#include "super.h" 12#include "super.h"
@@ -458,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
458 if (ret < 0) 457 if (ret < 0)
459 return ret; 458 return ret;
460 459
461 if (file->f_flags & O_DIRECT) { 460 if (iocb->ki_flags & IOCB_DIRECT) {
462 while (iov_iter_count(i)) { 461 while (iov_iter_count(i)) {
463 size_t start; 462 size_t start;
464 ssize_t n; 463 ssize_t n;
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
808{ 807{
809 struct file *filp = iocb->ki_filp; 808 struct file *filp = iocb->ki_filp;
810 struct ceph_file_info *fi = filp->private_data; 809 struct ceph_file_info *fi = filp->private_data;
811 size_t len = iocb->ki_nbytes; 810 size_t len = iov_iter_count(to);
812 struct inode *inode = file_inode(filp); 811 struct inode *inode = file_inode(filp);
813 struct ceph_inode_info *ci = ceph_inode(inode); 812 struct ceph_inode_info *ci = ceph_inode(inode);
814 struct page *pinned_page = NULL; 813 struct page *pinned_page = NULL;
@@ -829,7 +828,7 @@ again:
829 return ret; 828 return ret;
830 829
831 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || 830 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
832 (iocb->ki_filp->f_flags & O_DIRECT) || 831 (iocb->ki_flags & IOCB_DIRECT) ||
833 (fi->flags & CEPH_F_SYNC)) { 832 (fi->flags & CEPH_F_SYNC)) {
834 833
835 dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", 834 dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
@@ -942,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
942 struct ceph_inode_info *ci = ceph_inode(inode); 941 struct ceph_inode_info *ci = ceph_inode(inode);
943 struct ceph_osd_client *osdc = 942 struct ceph_osd_client *osdc =
944 &ceph_sb_to_client(inode->i_sb)->client->osdc; 943 &ceph_sb_to_client(inode->i_sb)->client->osdc;
945 ssize_t count = iov_iter_count(from), written = 0; 944 ssize_t count, written = 0;
946 int err, want, got; 945 int err, want, got;
947 loff_t pos = iocb->ki_pos; 946 loff_t pos;
948 947
949 if (ceph_snap(inode) != CEPH_NOSNAP) 948 if (ceph_snap(inode) != CEPH_NOSNAP)
950 return -EROFS; 949 return -EROFS;
@@ -954,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
954 /* We can write back this queue in page reclaim */ 953 /* We can write back this queue in page reclaim */
955 current->backing_dev_info = inode_to_bdi(inode); 954 current->backing_dev_info = inode_to_bdi(inode);
956 955
957 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 956 err = generic_write_checks(iocb, from);
958 if (err) 957 if (err <= 0)
959 goto out;
960
961 if (count == 0)
962 goto out; 958 goto out;
963 iov_iter_truncate(from, count);
964 959
960 pos = iocb->ki_pos;
961 count = iov_iter_count(from);
965 err = file_remove_suid(file); 962 err = file_remove_suid(file);
966 if (err) 963 if (err)
967 goto out; 964 goto out;
@@ -998,12 +995,12 @@ retry_snap:
998 inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); 995 inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
999 996
1000 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || 997 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
1001 (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { 998 (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
1002 struct iov_iter data; 999 struct iov_iter data;
1003 mutex_unlock(&inode->i_mutex); 1000 mutex_unlock(&inode->i_mutex);
1004 /* we might need to revert back to that point */ 1001 /* we might need to revert back to that point */
1005 data = *from; 1002 data = *from;
1006 if (file->f_flags & O_DIRECT) 1003 if (iocb->ki_flags & IOCB_DIRECT)
1007 written = ceph_sync_direct_write(iocb, &data, pos); 1004 written = ceph_sync_direct_write(iocb, &data, pos);
1008 else 1005 else
1009 written = ceph_sync_write(iocb, &data, pos); 1006 written = ceph_sync_write(iocb, &data, pos);
@@ -1332,8 +1329,6 @@ const struct file_operations ceph_file_fops = {
1332 .open = ceph_open, 1329 .open = ceph_open,
1333 .release = ceph_release, 1330 .release = ceph_release,
1334 .llseek = ceph_llseek, 1331 .llseek = ceph_llseek,
1335 .read = new_sync_read,
1336 .write = new_sync_write,
1337 .read_iter = ceph_read_iter, 1332 .read_iter = ceph_read_iter,
1338 .write_iter = ceph_write_iter, 1333 .write_iter = ceph_write_iter,
1339 .mmap = ceph_mmap, 1334 .mmap = ceph_mmap,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 71c073f38e54..0a2eb32ffe43 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
1021 spin_unlock(&session->s_cap_lock); 1021 spin_unlock(&session->s_cap_lock);
1022} 1022}
1023 1023
1024static void cleanup_session_requests(struct ceph_mds_client *mdsc,
1025 struct ceph_mds_session *session)
1026{
1027 struct ceph_mds_request *req;
1028 struct rb_node *p;
1029
1030 dout("cleanup_session_requests mds%d\n", session->s_mds);
1031 mutex_lock(&mdsc->mutex);
1032 while (!list_empty(&session->s_unsafe)) {
1033 req = list_first_entry(&session->s_unsafe,
1034 struct ceph_mds_request, r_unsafe_item);
1035 list_del_init(&req->r_unsafe_item);
1036 pr_info(" dropping unsafe request %llu\n", req->r_tid);
1037 __unregister_request(mdsc, req);
1038 }
1039 /* zero r_attempts, so kick_requests() will re-send requests */
1040 p = rb_first(&mdsc->request_tree);
1041 while (p) {
1042 req = rb_entry(p, struct ceph_mds_request, r_node);
1043 p = rb_next(p);
1044 if (req->r_session &&
1045 req->r_session->s_mds == session->s_mds)
1046 req->r_attempts = 0;
1047 }
1048 mutex_unlock(&mdsc->mutex);
1049}
1050
1024/* 1051/*
1025 * Helper to safely iterate over all caps associated with a session, with 1052 * Helper to safely iterate over all caps associated with a session, with
1026 * special care taken to handle a racing __ceph_remove_cap(). 1053 * special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1098 cap, ci, &ci->vfs_inode); 1125 cap, ci, &ci->vfs_inode);
1099 spin_lock(&ci->i_ceph_lock); 1126 spin_lock(&ci->i_ceph_lock);
1100 __ceph_remove_cap(cap, false); 1127 __ceph_remove_cap(cap, false);
1101 if (!__ceph_is_any_real_caps(ci)) { 1128 if (!ci->i_auth_cap) {
1102 struct ceph_mds_client *mdsc = 1129 struct ceph_mds_client *mdsc =
1103 ceph_sb_to_client(inode->i_sb)->mdsc; 1130 ceph_sb_to_client(inode->i_sb)->mdsc;
1104 1131
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1120 mdsc->num_cap_flushing--; 1147 mdsc->num_cap_flushing--;
1121 drop = 1; 1148 drop = 1;
1122 } 1149 }
1123 if (drop && ci->i_wrbuffer_ref) {
1124 pr_info(" dropping dirty data for %p %lld\n",
1125 inode, ceph_ino(inode));
1126 ci->i_wrbuffer_ref = 0;
1127 ci->i_wrbuffer_ref_head = 0;
1128 drop++;
1129 }
1130 spin_unlock(&mdsc->cap_dirty_lock); 1150 spin_unlock(&mdsc->cap_dirty_lock);
1131 } 1151 }
1132 spin_unlock(&ci->i_ceph_lock); 1152 spin_unlock(&ci->i_ceph_lock);
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
1853 */ 1873 */
1854static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, 1874static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1855 struct ceph_mds_request *req, 1875 struct ceph_mds_request *req,
1856 int mds) 1876 int mds, bool drop_cap_releases)
1857{ 1877{
1858 struct ceph_msg *msg; 1878 struct ceph_msg *msg;
1859 struct ceph_mds_request_head *head; 1879 struct ceph_mds_request_head *head;
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1937 releases += ceph_encode_inode_release(&p, 1957 releases += ceph_encode_inode_release(&p,
1938 req->r_old_dentry->d_inode, 1958 req->r_old_dentry->d_inode,
1939 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 1959 mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
1960
1961 if (drop_cap_releases) {
1962 releases = 0;
1963 p = msg->front.iov_base + req->r_request_release_offset;
1964 }
1965
1940 head->num_releases = cpu_to_le16(releases); 1966 head->num_releases = cpu_to_le16(releases);
1941 1967
1942 /* time stamp */ 1968 /* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
1989 */ 2015 */
1990static int __prepare_send_request(struct ceph_mds_client *mdsc, 2016static int __prepare_send_request(struct ceph_mds_client *mdsc,
1991 struct ceph_mds_request *req, 2017 struct ceph_mds_request *req,
1992 int mds) 2018 int mds, bool drop_cap_releases)
1993{ 2019{
1994 struct ceph_mds_request_head *rhead; 2020 struct ceph_mds_request_head *rhead;
1995 struct ceph_msg *msg; 2021 struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
2048 ceph_msg_put(req->r_request); 2074 ceph_msg_put(req->r_request);
2049 req->r_request = NULL; 2075 req->r_request = NULL;
2050 } 2076 }
2051 msg = create_request_message(mdsc, req, mds); 2077 msg = create_request_message(mdsc, req, mds, drop_cap_releases);
2052 if (IS_ERR(msg)) { 2078 if (IS_ERR(msg)) {
2053 req->r_err = PTR_ERR(msg); 2079 req->r_err = PTR_ERR(msg);
2054 complete_request(mdsc, req); 2080 complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
2132 if (req->r_request_started == 0) /* note request start time */ 2158 if (req->r_request_started == 0) /* note request start time */
2133 req->r_request_started = jiffies; 2159 req->r_request_started = jiffies;
2134 2160
2135 err = __prepare_send_request(mdsc, req, mds); 2161 err = __prepare_send_request(mdsc, req, mds, false);
2136 if (!err) { 2162 if (!err) {
2137 ceph_msg_get(req->r_request); 2163 ceph_msg_get(req->r_request);
2138 ceph_con_send(&session->s_con, req->r_request); 2164 ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
2590 case CEPH_SESSION_CLOSE: 2616 case CEPH_SESSION_CLOSE:
2591 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 2617 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
2592 pr_info("mds%d reconnect denied\n", session->s_mds); 2618 pr_info("mds%d reconnect denied\n", session->s_mds);
2619 cleanup_session_requests(mdsc, session);
2593 remove_session_caps(session); 2620 remove_session_caps(session);
2594 wake = 2; /* for good measure */ 2621 wake = 2; /* for good measure */
2595 wake_up_all(&mdsc->session_close_wq); 2622 wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2658 2685
2659 mutex_lock(&mdsc->mutex); 2686 mutex_lock(&mdsc->mutex);
2660 list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) { 2687 list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
2661 err = __prepare_send_request(mdsc, req, session->s_mds); 2688 err = __prepare_send_request(mdsc, req, session->s_mds, true);
2662 if (!err) { 2689 if (!err) {
2663 ceph_msg_get(req->r_request); 2690 ceph_msg_get(req->r_request);
2664 ceph_con_send(&session->s_con, req->r_request); 2691 ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
2679 continue; /* only old requests */ 2706 continue; /* only old requests */
2680 if (req->r_session && 2707 if (req->r_session &&
2681 req->r_session->s_mds == session->s_mds) { 2708 req->r_session->s_mds == session->s_mds) {
2682 err = __prepare_send_request(mdsc, req, session->s_mds); 2709 err = __prepare_send_request(mdsc, req,
2710 session->s_mds, true);
2683 if (!err) { 2711 if (!err) {
2684 ceph_msg_get(req->r_request); 2712 ceph_msg_get(req->r_request);
2685 ceph_con_send(&session->s_con, req->r_request); 2713 ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2864 spin_unlock(&session->s_cap_lock); 2892 spin_unlock(&session->s_cap_lock);
2865 2893
2866 /* trim unused caps to reduce MDS's cache rejoin time */ 2894 /* trim unused caps to reduce MDS's cache rejoin time */
2867 shrink_dcache_parent(mdsc->fsc->sb->s_root); 2895 if (mdsc->fsc->sb->s_root)
2896 shrink_dcache_parent(mdsc->fsc->sb->s_root);
2868 2897
2869 ceph_con_close(&session->s_con); 2898 ceph_con_close(&session->s_con);
2870 ceph_con_open(&session->s_con, 2899 ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
3133 di->lease_renew_from && 3162 di->lease_renew_from &&
3134 di->lease_renew_after == 0) { 3163 di->lease_renew_after == 0) {
3135 unsigned long duration = 3164 unsigned long duration =
3136 le32_to_cpu(h->duration_ms) * HZ / 1000; 3165 msecs_to_jiffies(le32_to_cpu(h->duration_ms));
3137 3166
3138 di->lease_seq = seq; 3167 di->lease_seq = seq;
3139 dentry->d_time = di->lease_renew_from + duration; 3168 dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 51cc23e48111..89e6bc321df3 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
75 case CEPH_MDS_OP_LSSNAP: return "lssnap"; 75 case CEPH_MDS_OP_LSSNAP: return "lssnap";
76 case CEPH_MDS_OP_MKSNAP: return "mksnap"; 76 case CEPH_MDS_OP_MKSNAP: return "mksnap";
77 case CEPH_MDS_OP_RMSNAP: return "rmsnap"; 77 case CEPH_MDS_OP_RMSNAP: return "rmsnap";
78 case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
78 case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; 79 case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
79 case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; 80 case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
80 } 81 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a63997b8bcff..e463ebd69a9c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
345 fsopt->rsize = CEPH_RSIZE_DEFAULT; 345 fsopt->rsize = CEPH_RSIZE_DEFAULT;
346 fsopt->rasize = CEPH_RASIZE_DEFAULT; 346 fsopt->rasize = CEPH_RASIZE_DEFAULT;
347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 347 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
348 if (!fsopt->snapdir_name) {
349 err = -ENOMEM;
350 goto out;
351 }
352
348 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 353 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
349 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 354 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
350 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; 355 fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
406{ 411{
407 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); 412 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
408 struct ceph_mount_options *fsopt = fsc->mount_options; 413 struct ceph_mount_options *fsopt = fsc->mount_options;
409 struct ceph_options *opt = fsc->client->options; 414 size_t pos;
410 415 int ret;
411 if (opt->flags & CEPH_OPT_FSID) 416
412 seq_printf(m, ",fsid=%pU", &opt->fsid); 417 /* a comma between MNT/MS and client options */
413 if (opt->flags & CEPH_OPT_NOSHARE) 418 seq_putc(m, ',');
414 seq_puts(m, ",noshare"); 419 pos = m->count;
415 if (opt->flags & CEPH_OPT_NOCRC) 420
416 seq_puts(m, ",nocrc"); 421 ret = ceph_print_client_options(m, fsc->client);
417 if (opt->flags & CEPH_OPT_NOMSGAUTH) 422 if (ret)
418 seq_puts(m, ",nocephx_require_signatures"); 423 return ret;
419 if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) 424
420 seq_puts(m, ",notcp_nodelay"); 425 /* retract our comma if no client options */
421 426 if (m->count == pos)
422 if (opt->name) 427 m->count--;
423 seq_printf(m, ",name=%s", opt->name);
424 if (opt->key)
425 seq_puts(m, ",secret=<hidden>");
426
427 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
428 seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
429 if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
430 seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
431 if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
432 seq_printf(m, ",osdkeepalivetimeout=%d",
433 opt->osd_keepalive_timeout);
434 428
435 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) 429 if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
436 seq_puts(m, ",dirstat"); 430 seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
438 seq_puts(m, ",norbytes"); 432 seq_puts(m, ",norbytes");
439 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) 433 if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
440 seq_puts(m, ",noasyncreaddir"); 434 seq_puts(m, ",noasyncreaddir");
441 if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE) 435 if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
442 seq_puts(m, ",dcache");
443 else
444 seq_puts(m, ",nodcache"); 436 seq_puts(m, ",nodcache");
445 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) 437 if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
446 seq_puts(m, ",fsc"); 438 seq_puts(m, ",fsc");
447 else
448 seq_puts(m, ",nofsc");
449 439
450#ifdef CONFIG_CEPH_FS_POSIX_ACL 440#ifdef CONFIG_CEPH_FS_POSIX_ACL
451 if (fsopt->sb_flags & MS_POSIXACL) 441 if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
477 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); 467 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
478 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) 468 if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
479 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); 469 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
470
480 return 0; 471 return 0;
481} 472}
482 473
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
730 if (IS_ERR(req)) 721 if (IS_ERR(req))
731 return ERR_CAST(req); 722 return ERR_CAST(req);
732 req->r_path1 = kstrdup(path, GFP_NOFS); 723 req->r_path1 = kstrdup(path, GFP_NOFS);
724 if (!req->r_path1) {
725 root = ERR_PTR(-ENOMEM);
726 goto out;
727 }
728
733 req->r_ino1.ino = CEPH_INO_ROOT; 729 req->r_ino1.ino = CEPH_INO_ROOT;
734 req->r_ino1.snap = CEPH_NOSNAP; 730 req->r_ino1.snap = CEPH_NOSNAP;
735 req->r_started = started; 731 req->r_started = started;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 04c8124ed30e..fa20e1318939 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
36#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ 36#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
37#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ 37#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
38 38
39#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 39#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
40 CEPH_MOUNT_OPT_DCACHE)
40 41
41#define ceph_set_mount_opt(fsc, opt) \ 42#define ceph_set_mount_opt(fsc, opt) \
42 (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; 43 (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
881 882
882/* file.c */ 883/* file.c */
883extern const struct file_operations ceph_file_fops; 884extern const struct file_operations ceph_file_fops;
884extern const struct address_space_operations ceph_aops;
885 885
886extern int ceph_open(struct inode *inode, struct file *file); 886extern int ceph_open(struct inode *inode, struct file *file);
887extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 887extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5a492caf34cb..5c4c9c256931 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
877 err = PTR_ERR(req); 877 err = PTR_ERR(req);
878 goto out; 878 goto out;
879 } 879 }
880 req->r_inode = inode; 880
881 ihold(inode);
882 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
883 req->r_num_caps = 1;
884 req->r_args.setxattr.flags = cpu_to_le32(flags); 881 req->r_args.setxattr.flags = cpu_to_le32(flags);
885 req->r_path2 = kstrdup(name, GFP_NOFS); 882 req->r_path2 = kstrdup(name, GFP_NOFS);
883 if (!req->r_path2) {
884 ceph_mdsc_put_request(req);
885 err = -ENOMEM;
886 goto out;
887 }
886 888
887 req->r_pagelist = pagelist; 889 req->r_pagelist = pagelist;
888 pagelist = NULL; 890 pagelist = NULL;
889 891
892 req->r_inode = inode;
893 ihold(inode);
894 req->r_num_caps = 1;
895 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
896
890 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 897 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
891 err = ceph_mdsc_do_request(mdsc, NULL, req); 898 err = ceph_mdsc_do_request(mdsc, NULL, req);
892 ceph_mdsc_put_request(req); 899 ceph_mdsc_put_request(req);
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
1019 USE_AUTH_MDS); 1026 USE_AUTH_MDS);
1020 if (IS_ERR(req)) 1027 if (IS_ERR(req))
1021 return PTR_ERR(req); 1028 return PTR_ERR(req);
1029 req->r_path2 = kstrdup(name, GFP_NOFS);
1030 if (!req->r_path2)
1031 return -ENOMEM;
1032
1022 req->r_inode = inode; 1033 req->r_inode = inode;
1023 ihold(inode); 1034 ihold(inode);
1024 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1025 req->r_num_caps = 1; 1035 req->r_num_caps = 1;
1026 req->r_path2 = kstrdup(name, GFP_NOFS); 1036 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1027
1028 err = ceph_mdsc_do_request(mdsc, NULL, req); 1037 err = ceph_mdsc_do_request(mdsc, NULL, req);
1029 ceph_mdsc_put_request(req); 1038 ceph_mdsc_put_request(req);
1030 return err; 1039 return err;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ac7445e6ec7..aa0dc2573374 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * fs/cifs/cifsencrypt.c 2 * fs/cifs/cifsencrypt.c
3 * 3 *
4 * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP
5 * for more detailed information
6 *
4 * Copyright (C) International Business Machines Corp., 2005,2013 7 * Copyright (C) International Business Machines Corp., 2005,2013
5 * Author(s): Steve French (sfrench@us.ibm.com) 8 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 9 *
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
515 __func__); 518 __func__);
516 return rc; 519 return rc;
517 } 520 }
518 } else if (ses->serverName) { 521 } else {
522 /* We use ses->serverName if no domain name available */
519 len = strlen(ses->serverName); 523 len = strlen(ses->serverName);
520 524
521 server = kmalloc(2 + (len * 2), GFP_KERNEL); 525 server = kmalloc(2 + (len * 2), GFP_KERNEL);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d72fe37f5420..eaab4b2a0595 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -906,8 +906,6 @@ const struct inode_operations cifs_symlink_inode_ops = {
906}; 906};
907 907
908const struct file_operations cifs_file_ops = { 908const struct file_operations cifs_file_ops = {
909 .read = new_sync_read,
910 .write = new_sync_write,
911 .read_iter = cifs_loose_read_iter, 909 .read_iter = cifs_loose_read_iter,
912 .write_iter = cifs_file_write_iter, 910 .write_iter = cifs_file_write_iter,
913 .open = cifs_open, 911 .open = cifs_open,
@@ -926,8 +924,6 @@ const struct file_operations cifs_file_ops = {
926}; 924};
927 925
928const struct file_operations cifs_file_strict_ops = { 926const struct file_operations cifs_file_strict_ops = {
929 .read = new_sync_read,
930 .write = new_sync_write,
931 .read_iter = cifs_strict_readv, 927 .read_iter = cifs_strict_readv,
932 .write_iter = cifs_strict_writev, 928 .write_iter = cifs_strict_writev,
933 .open = cifs_open, 929 .open = cifs_open,
@@ -947,8 +943,6 @@ const struct file_operations cifs_file_strict_ops = {
947 943
948const struct file_operations cifs_file_direct_ops = { 944const struct file_operations cifs_file_direct_ops = {
949 /* BB reevaluate whether they can be done with directio, no cache */ 945 /* BB reevaluate whether they can be done with directio, no cache */
950 .read = new_sync_read,
951 .write = new_sync_write,
952 .read_iter = cifs_user_readv, 946 .read_iter = cifs_user_readv,
953 .write_iter = cifs_user_writev, 947 .write_iter = cifs_user_writev,
954 .open = cifs_open, 948 .open = cifs_open,
@@ -967,8 +961,6 @@ const struct file_operations cifs_file_direct_ops = {
967}; 961};
968 962
969const struct file_operations cifs_file_nobrl_ops = { 963const struct file_operations cifs_file_nobrl_ops = {
970 .read = new_sync_read,
971 .write = new_sync_write,
972 .read_iter = cifs_loose_read_iter, 964 .read_iter = cifs_loose_read_iter,
973 .write_iter = cifs_file_write_iter, 965 .write_iter = cifs_file_write_iter,
974 .open = cifs_open, 966 .open = cifs_open,
@@ -986,8 +978,6 @@ const struct file_operations cifs_file_nobrl_ops = {
986}; 978};
987 979
988const struct file_operations cifs_file_strict_nobrl_ops = { 980const struct file_operations cifs_file_strict_nobrl_ops = {
989 .read = new_sync_read,
990 .write = new_sync_write,
991 .read_iter = cifs_strict_readv, 981 .read_iter = cifs_strict_readv,
992 .write_iter = cifs_strict_writev, 982 .write_iter = cifs_strict_writev,
993 .open = cifs_open, 983 .open = cifs_open,
@@ -1006,8 +996,6 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
1006 996
1007const struct file_operations cifs_file_direct_nobrl_ops = { 997const struct file_operations cifs_file_direct_nobrl_ops = {
1008 /* BB reevaluate whether they can be done with directio, no cache */ 998 /* BB reevaluate whether they can be done with directio, no cache */
1009 .read = new_sync_read,
1010 .write = new_sync_write,
1011 .read_iter = cifs_user_readv, 999 .read_iter = cifs_user_readv,
1012 .write_iter = cifs_user_writev, 1000 .write_iter = cifs_user_writev,
1013 .open = cifs_open, 1001 .open = cifs_open,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3aa999ab785..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
773 773
774 length = atomic_dec_return(&tcpSesAllocCount); 774 length = atomic_dec_return(&tcpSesAllocCount);
775 if (length > 0) 775 if (length > 0)
776 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 776 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
777 GFP_KERNEL);
778} 777}
779 778
780static int 779static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
848 847
849 length = atomic_inc_return(&tcpSesAllocCount); 848 length = atomic_inc_return(&tcpSesAllocCount);
850 if (length > 1) 849 if (length > 1)
851 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 850 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
852 GFP_KERNEL);
853 851
854 set_freezable(); 852 set_freezable();
855 while (server->tcpStatus != CifsExiting) { 853 while (server->tcpStatus != CifsExiting) {
@@ -1599,6 +1597,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1599 pr_warn("CIFS: username too long\n"); 1597 pr_warn("CIFS: username too long\n");
1600 goto cifs_parse_mount_err; 1598 goto cifs_parse_mount_err;
1601 } 1599 }
1600
1601 kfree(vol->username);
1602 vol->username = kstrdup(string, GFP_KERNEL); 1602 vol->username = kstrdup(string, GFP_KERNEL);
1603 if (!vol->username) 1603 if (!vol->username)
1604 goto cifs_parse_mount_err; 1604 goto cifs_parse_mount_err;
@@ -1700,6 +1700,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1700 goto cifs_parse_mount_err; 1700 goto cifs_parse_mount_err;
1701 } 1701 }
1702 1702
1703 kfree(vol->domainname);
1703 vol->domainname = kstrdup(string, GFP_KERNEL); 1704 vol->domainname = kstrdup(string, GFP_KERNEL);
1704 if (!vol->domainname) { 1705 if (!vol->domainname) {
1705 pr_warn("CIFS: no memory for domainname\n"); 1706 pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1731 } 1732 }
1732 1733
1733 if (strncasecmp(string, "default", 7) != 0) { 1734 if (strncasecmp(string, "default", 7) != 0) {
1735 kfree(vol->iocharset);
1734 vol->iocharset = kstrdup(string, 1736 vol->iocharset = kstrdup(string,
1735 GFP_KERNEL); 1737 GFP_KERNEL);
1736 if (!vol->iocharset) { 1738 if (!vol->iocharset) {
@@ -2913,8 +2915,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2913 * calling name ends in null (byte 16) from old smb 2915 * calling name ends in null (byte 16) from old smb
2914 * convention. 2916 * convention.
2915 */ 2917 */
2916 if (server->workstation_RFC1001_name && 2918 if (server->workstation_RFC1001_name[0] != 0)
2917 server->workstation_RFC1001_name[0] != 0)
2918 rfc1002mangle(ses_init_buf->trailer. 2919 rfc1002mangle(ses_init_buf->trailer.
2919 session_req.calling_name, 2920 session_req.calling_name,
2920 server->workstation_RFC1001_name, 2921 server->workstation_RFC1001_name,
@@ -3692,6 +3693,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
3692#endif /* CIFS_WEAK_PW_HASH */ 3693#endif /* CIFS_WEAK_PW_HASH */
3693 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, 3694 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
3694 bcc_ptr, nls_codepage); 3695 bcc_ptr, nls_codepage);
3696 if (rc) {
3697 cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
3698 __func__, rc);
3699 cifs_buf_release(smb_buffer);
3700 return rc;
3701 }
3695 3702
3696 bcc_ptr += CIFS_AUTH_RESP_SIZE; 3703 bcc_ptr += CIFS_AUTH_RESP_SIZE;
3697 if (ses->capabilities & CAP_UNICODE) { 3704 if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a94b3e673182..ca2bc5406306 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1823,6 +1823,7 @@ refind_writable:
1823 cifsFileInfo_put(inv_file); 1823 cifsFileInfo_put(inv_file);
1824 spin_lock(&cifs_file_list_lock); 1824 spin_lock(&cifs_file_list_lock);
1825 ++refind; 1825 ++refind;
1826 inv_file = NULL;
1826 goto refind_writable; 1827 goto refind_writable;
1827 } 1828 }
1828 } 1829 }
@@ -2559,10 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2559 return rc; 2560 return rc;
2560} 2561}
2561 2562
2562static ssize_t 2563ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2563cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2564{ 2564{
2565 size_t len; 2565 struct file *file = iocb->ki_filp;
2566 ssize_t total_written = 0; 2566 ssize_t total_written = 0;
2567 struct cifsFileInfo *open_file; 2567 struct cifsFileInfo *open_file;
2568 struct cifs_tcon *tcon; 2568 struct cifs_tcon *tcon;
@@ -2572,15 +2572,15 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2572 struct iov_iter saved_from; 2572 struct iov_iter saved_from;
2573 int rc; 2573 int rc;
2574 2574
2575 len = iov_iter_count(from); 2575 /*
2576 rc = generic_write_checks(file, poffset, &len, 0); 2576 * BB - optimize the way when signing is disabled. We can drop this
2577 if (rc) 2577 * extra memory-to-memory copying and use iovec buffers for constructing
2578 return rc; 2578 * write request.
2579 2579 */
2580 if (!len)
2581 return 0;
2582 2580
2583 iov_iter_truncate(from, len); 2581 rc = generic_write_checks(iocb, from);
2582 if (rc <= 0)
2583 return rc;
2584 2584
2585 INIT_LIST_HEAD(&wdata_list); 2585 INIT_LIST_HEAD(&wdata_list);
2586 cifs_sb = CIFS_FILE_SB(file); 2586 cifs_sb = CIFS_FILE_SB(file);
@@ -2592,8 +2592,8 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2592 2592
2593 memcpy(&saved_from, from, sizeof(struct iov_iter)); 2593 memcpy(&saved_from, from, sizeof(struct iov_iter));
2594 2594
2595 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, 2595 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2596 &wdata_list); 2596 open_file, cifs_sb, &wdata_list);
2597 2597
2598 /* 2598 /*
2599 * If at least one write was successfully sent, then discard any rc 2599 * If at least one write was successfully sent, then discard any rc
@@ -2632,7 +2632,7 @@ restart_loop:
2632 memcpy(&tmp_from, &saved_from, 2632 memcpy(&tmp_from, &saved_from,
2633 sizeof(struct iov_iter)); 2633 sizeof(struct iov_iter));
2634 iov_iter_advance(&tmp_from, 2634 iov_iter_advance(&tmp_from,
2635 wdata->offset - *poffset); 2635 wdata->offset - iocb->ki_pos);
2636 2636
2637 rc = cifs_write_from_iter(wdata->offset, 2637 rc = cifs_write_from_iter(wdata->offset,
2638 wdata->bytes, &tmp_from, 2638 wdata->bytes, &tmp_from,
@@ -2649,34 +2649,13 @@ restart_loop:
2649 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2649 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2650 } 2650 }
2651 2651
2652 if (total_written > 0) 2652 if (unlikely(!total_written))
2653 *poffset += total_written; 2653 return rc;
2654 2654
2655 iocb->ki_pos += total_written;
2656 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2655 cifs_stats_bytes_written(tcon, total_written); 2657 cifs_stats_bytes_written(tcon, total_written);
2656 return total_written ? total_written : (ssize_t)rc; 2658 return total_written;
2657}
2658
2659ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2660{
2661 ssize_t written;
2662 struct inode *inode;
2663 loff_t pos = iocb->ki_pos;
2664
2665 inode = file_inode(iocb->ki_filp);
2666
2667 /*
2668 * BB - optimize the way when signing is disabled. We can drop this
2669 * extra memory-to-memory copying and use iovec buffers for constructing
2670 * write request.
2671 */
2672
2673 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2674 if (written > 0) {
2675 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2676 iocb->ki_pos = pos;
2677 }
2678
2679 return written;
2680} 2659}
2681 2660
2682static ssize_t 2661static ssize_t
@@ -2687,8 +2666,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2687 struct inode *inode = file->f_mapping->host; 2666 struct inode *inode = file->f_mapping->host;
2688 struct cifsInodeInfo *cinode = CIFS_I(inode); 2667 struct cifsInodeInfo *cinode = CIFS_I(inode);
2689 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 2668 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2690 ssize_t rc = -EACCES; 2669 ssize_t rc;
2691 loff_t lock_pos = iocb->ki_pos;
2692 2670
2693 /* 2671 /*
2694 * We need to hold the sem to be sure nobody modifies lock list 2672 * We need to hold the sem to be sure nobody modifies lock list
@@ -2696,23 +2674,24 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2696 */ 2674 */
2697 down_read(&cinode->lock_sem); 2675 down_read(&cinode->lock_sem);
2698 mutex_lock(&inode->i_mutex); 2676 mutex_lock(&inode->i_mutex);
2699 if (file->f_flags & O_APPEND) 2677
2700 lock_pos = i_size_read(inode); 2678 rc = generic_write_checks(iocb, from);
2701 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from), 2679 if (rc <= 0)
2680 goto out;
2681
2682 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2702 server->vals->exclusive_lock_type, NULL, 2683 server->vals->exclusive_lock_type, NULL,
2703 CIFS_WRITE_OP)) { 2684 CIFS_WRITE_OP))
2704 rc = __generic_file_write_iter(iocb, from); 2685 rc = __generic_file_write_iter(iocb, from);
2705 mutex_unlock(&inode->i_mutex); 2686 else
2706 2687 rc = -EACCES;
2707 if (rc > 0) { 2688out:
2708 ssize_t err; 2689 mutex_unlock(&inode->i_mutex);
2709 2690
2710 err = generic_write_sync(file, iocb->ki_pos - rc, rc); 2691 if (rc > 0) {
2711 if (err < 0) 2692 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2712 rc = err; 2693 if (err < 0)
2713 } 2694 rc = err;
2714 } else {
2715 mutex_unlock(&inode->i_mutex);
2716 } 2695 }
2717 up_read(&cinode->lock_sem); 2696 up_read(&cinode->lock_sem);
2718 return rc; 2697 return rc;
@@ -3876,8 +3855,7 @@ void cifs_oplock_break(struct work_struct *work)
3876 * Direct IO is not yet supported in the cached mode. 3855 * Direct IO is not yet supported in the cached mode.
3877 */ 3856 */
3878static ssize_t 3857static ssize_t
3879cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, 3858cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3880 loff_t pos)
3881{ 3859{
3882 /* 3860 /*
3883 * FIXME 3861 * FIXME
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2d4f37235ed0..3e126d7bb2ea 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
771 cifs_buf_release(srchinf->ntwrk_buf_start); 771 cifs_buf_release(srchinf->ntwrk_buf_start);
772 } 772 }
773 kfree(srchinf); 773 kfree(srchinf);
774 if (rc)
775 goto cgii_exit;
774 } else 776 } else
775 goto cgii_exit; 777 goto cgii_exit;
776 778
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 689f035915cf..22dfdf17d065 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
322 322
323 /* return pointer to beginning of data area, ie offset from SMB start */ 323 /* return pointer to beginning of data area, ie offset from SMB start */
324 if ((*off != 0) && (*len != 0)) 324 if ((*off != 0) && (*len != 0))
325 return hdr->ProtocolId + *off; 325 return (char *)(&hdr->ProtocolId[0]) + *off;
326 else 326 else
327 return NULL; 327 return NULL;
328} 328}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 96b5d40a2ece..eab05e1aa587 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid,
684 684
685 /* No need to change MaxChunks since already set to 1 */ 685 /* No need to change MaxChunks since already set to 1 */
686 chunk_sizes_updated = true; 686 chunk_sizes_updated = true;
687 } 687 } else
688 goto cchunk_out;
688 } 689 }
689 690
690cchunk_out: 691cchunk_out:
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3417340bf89e..65cd7a84c8bc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1218 struct smb2_ioctl_req *req; 1218 struct smb2_ioctl_req *req;
1219 struct smb2_ioctl_rsp *rsp; 1219 struct smb2_ioctl_rsp *rsp;
1220 struct TCP_Server_Info *server; 1220 struct TCP_Server_Info *server;
1221 struct cifs_ses *ses = tcon->ses; 1221 struct cifs_ses *ses;
1222 struct kvec iov[2]; 1222 struct kvec iov[2];
1223 int resp_buftype; 1223 int resp_buftype;
1224 int num_iovecs; 1224 int num_iovecs;
@@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1233 if (plen) 1233 if (plen)
1234 *plen = 0; 1234 *plen = 0;
1235 1235
1236 if (tcon)
1237 ses = tcon->ses;
1238 else
1239 return -EIO;
1240
1236 if (ses && (ses->server)) 1241 if (ses && (ses->server))
1237 server = ses->server; 1242 server = ses->server;
1238 else 1243 else
@@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1296 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; 1301 rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
1297 1302
1298 if ((rc != 0) && (rc != -EINVAL)) { 1303 if ((rc != 0) && (rc != -EINVAL)) {
1299 if (tcon) 1304 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1300 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1301 goto ioctl_exit; 1305 goto ioctl_exit;
1302 } else if (rc == -EINVAL) { 1306 } else if (rc == -EINVAL) {
1303 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && 1307 if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
1304 (opcode != FSCTL_SRV_COPYCHUNK)) { 1308 (opcode != FSCTL_SRV_COPYCHUNK)) {
1305 if (tcon) 1309 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1306 cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
1307 goto ioctl_exit; 1310 goto ioctl_exit;
1308 } 1311 }
1309 } 1312 }
@@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1629 1632
1630 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); 1633 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
1631 1634
1632 if ((rc != 0) && tcon) 1635 if (rc != 0)
1633 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); 1636 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
1634 1637
1635 free_rsp_buf(resp_buftype, iov[0].iov_base); 1638 free_rsp_buf(resp_buftype, iov[0].iov_base);
@@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
2114 struct kvec iov[2]; 2117 struct kvec iov[2];
2115 int rc = 0; 2118 int rc = 0;
2116 int len; 2119 int len;
2117 int resp_buftype; 2120 int resp_buftype = CIFS_NO_BUFFER;
2118 unsigned char *bufptr; 2121 unsigned char *bufptr;
2119 struct TCP_Server_Info *server; 2122 struct TCP_Server_Info *server;
2120 struct cifs_ses *ses = tcon->ses; 2123 struct cifs_ses *ses = tcon->ses;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index d244d743a232..1da3805f3ddc 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -27,19 +27,14 @@
27#include "coda_int.h" 27#include "coda_int.h"
28 28
29static ssize_t 29static ssize_t
30coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *ppos) 30coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
31{ 31{
32 struct coda_file_info *cfi; 32 struct file *coda_file = iocb->ki_filp;
33 struct file *host_file; 33 struct coda_file_info *cfi = CODA_FTOC(coda_file);
34 34
35 cfi = CODA_FTOC(coda_file);
36 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 35 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
37 host_file = cfi->cfi_container;
38 36
39 if (!host_file->f_op->read) 37 return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
40 return -EINVAL;
41
42 return host_file->f_op->read(host_file, buf, count, ppos);
43} 38}
44 39
45static ssize_t 40static ssize_t
@@ -64,32 +59,25 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
64} 59}
65 60
66static ssize_t 61static ssize_t
67coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos) 62coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
68{ 63{
69 struct inode *host_inode, *coda_inode = file_inode(coda_file); 64 struct file *coda_file = iocb->ki_filp;
70 struct coda_file_info *cfi; 65 struct inode *coda_inode = file_inode(coda_file);
66 struct coda_file_info *cfi = CODA_FTOC(coda_file);
71 struct file *host_file; 67 struct file *host_file;
72 ssize_t ret; 68 ssize_t ret;
73 69
74 cfi = CODA_FTOC(coda_file);
75 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 70 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
76 host_file = cfi->cfi_container;
77
78 if (!host_file->f_op->write)
79 return -EINVAL;
80 71
81 host_inode = file_inode(host_file); 72 host_file = cfi->cfi_container;
82 file_start_write(host_file); 73 file_start_write(host_file);
83 mutex_lock(&coda_inode->i_mutex); 74 mutex_lock(&coda_inode->i_mutex);
84 75 ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
85 ret = host_file->f_op->write(host_file, buf, count, ppos); 76 coda_inode->i_size = file_inode(host_file)->i_size;
86
87 coda_inode->i_size = host_inode->i_size;
88 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; 77 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
89 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; 78 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
90 mutex_unlock(&coda_inode->i_mutex); 79 mutex_unlock(&coda_inode->i_mutex);
91 file_end_write(host_file); 80 file_end_write(host_file);
92
93 return ret; 81 return ret;
94} 82}
95 83
@@ -231,8 +219,8 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
231 219
232const struct file_operations coda_file_operations = { 220const struct file_operations coda_file_operations = {
233 .llseek = generic_file_llseek, 221 .llseek = generic_file_llseek,
234 .read = coda_file_read, 222 .read_iter = coda_file_read_iter,
235 .write = coda_file_write, 223 .write_iter = coda_file_write_iter,
236 .mmap = coda_file_mmap, 224 .mmap = coda_file_mmap,
237 .open = coda_open, 225 .open = coda_open,
238 .release = coda_release, 226 .release = coda_release,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index afec6450450f..6b8e2f091f5b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -570,6 +570,7 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
570#define BNEPCONNDEL _IOW('B', 201, int) 570#define BNEPCONNDEL _IOW('B', 201, int)
571#define BNEPGETCONNLIST _IOR('B', 210, int) 571#define BNEPGETCONNLIST _IOR('B', 210, int)
572#define BNEPGETCONNINFO _IOR('B', 211, int) 572#define BNEPGETCONNINFO _IOR('B', 211, int)
573#define BNEPGETSUPPFEAT _IOR('B', 212, int)
573 574
574#define CMTPCONNADD _IOW('C', 200, int) 575#define CMTPCONNADD _IOW('C', 200, int)
575#define CMTPCONNDEL _IOW('C', 201, int) 576#define CMTPCONNDEL _IOW('C', 201, int)
@@ -1247,6 +1248,7 @@ COMPATIBLE_IOCTL(BNEPCONNADD)
1247COMPATIBLE_IOCTL(BNEPCONNDEL) 1248COMPATIBLE_IOCTL(BNEPCONNDEL)
1248COMPATIBLE_IOCTL(BNEPGETCONNLIST) 1249COMPATIBLE_IOCTL(BNEPGETCONNLIST)
1249COMPATIBLE_IOCTL(BNEPGETCONNINFO) 1250COMPATIBLE_IOCTL(BNEPGETCONNINFO)
1251COMPATIBLE_IOCTL(BNEPGETSUPPFEAT)
1250COMPATIBLE_IOCTL(CMTPCONNADD) 1252COMPATIBLE_IOCTL(CMTPCONNADD)
1251COMPATIBLE_IOCTL(CMTPCONNDEL) 1253COMPATIBLE_IOCTL(CMTPCONNDEL)
1252COMPATIBLE_IOCTL(CMTPGETCONNLIST) 1254COMPATIBLE_IOCTL(CMTPGETCONNLIST)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cf0db005d2f5..acb3d63bc9dc 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
1598 if (offset >= 0) 1598 if (offset >= 0)
1599 break; 1599 break;
1600 default: 1600 default:
1601 mutex_unlock(&file_inode(file)->i_mutex); 1601 mutex_unlock(&dentry->d_inode->i_mutex);
1602 return -EINVAL; 1602 return -EINVAL;
1603 } 1603 }
1604 if (offset != file->f_pos) { 1604 if (offset != file->f_pos) {
diff --git a/fs/coredump.c b/fs/coredump.c
index f319926ddf8c..bbbe139ab280 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -657,7 +657,7 @@ void do_coredump(const siginfo_t *siginfo)
657 */ 657 */
658 if (!uid_eq(inode->i_uid, current_fsuid())) 658 if (!uid_eq(inode->i_uid, current_fsuid()))
659 goto close_fail; 659 goto close_fail;
660 if (!cprm.file->f_op->write) 660 if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
661 goto close_fail; 661 goto close_fail;
662 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 662 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
663 goto close_fail; 663 goto close_fail;
diff --git a/fs/dax.c b/fs/dax.c
index ed1619ec6537..0bb0aecb556c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -98,9 +98,9 @@ static bool buffer_size_valid(struct buffer_head *bh)
98 return bh->b_state != 0; 98 return bh->b_state != 0;
99} 99}
100 100
101static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, 101static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
102 loff_t start, loff_t end, get_block_t get_block, 102 loff_t start, loff_t end, get_block_t get_block,
103 struct buffer_head *bh) 103 struct buffer_head *bh)
104{ 104{
105 ssize_t retval = 0; 105 ssize_t retval = 0;
106 loff_t pos = start; 106 loff_t pos = start;
@@ -109,7 +109,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
109 void *addr; 109 void *addr;
110 bool hole = false; 110 bool hole = false;
111 111
112 if (rw != WRITE) 112 if (iov_iter_rw(iter) != WRITE)
113 end = min(end, i_size_read(inode)); 113 end = min(end, i_size_read(inode));
114 114
115 while (pos < end) { 115 while (pos < end) {
@@ -124,7 +124,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
124 bh->b_size = PAGE_ALIGN(end - pos); 124 bh->b_size = PAGE_ALIGN(end - pos);
125 bh->b_state = 0; 125 bh->b_state = 0;
126 retval = get_block(inode, block, bh, 126 retval = get_block(inode, block, bh,
127 rw == WRITE); 127 iov_iter_rw(iter) == WRITE);
128 if (retval) 128 if (retval)
129 break; 129 break;
130 if (!buffer_size_valid(bh)) 130 if (!buffer_size_valid(bh))
@@ -137,7 +137,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
137 bh->b_size -= done; 137 bh->b_size -= done;
138 } 138 }
139 139
140 hole = (rw != WRITE) && !buffer_written(bh); 140 hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh);
141 if (hole) { 141 if (hole) {
142 addr = NULL; 142 addr = NULL;
143 size = bh->b_size - first; 143 size = bh->b_size - first;
@@ -154,7 +154,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
154 max = min(pos + size, end); 154 max = min(pos + size, end);
155 } 155 }
156 156
157 if (rw == WRITE) 157 if (iov_iter_rw(iter) == WRITE)
158 len = copy_from_iter(addr, max - pos, iter); 158 len = copy_from_iter(addr, max - pos, iter);
159 else if (!hole) 159 else if (!hole)
160 len = copy_to_iter(addr, max - pos, iter); 160 len = copy_to_iter(addr, max - pos, iter);
@@ -173,7 +173,6 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
173 173
174/** 174/**
175 * dax_do_io - Perform I/O to a DAX file 175 * dax_do_io - Perform I/O to a DAX file
176 * @rw: READ to read or WRITE to write
177 * @iocb: The control block for this I/O 176 * @iocb: The control block for this I/O
178 * @inode: The file which the I/O is directed at 177 * @inode: The file which the I/O is directed at
179 * @iter: The addresses to do I/O from or to 178 * @iter: The addresses to do I/O from or to
@@ -189,9 +188,9 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
189 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O 188 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
190 * is in progress. 189 * is in progress.
191 */ 190 */
192ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, 191ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
193 struct iov_iter *iter, loff_t pos, 192 struct iov_iter *iter, loff_t pos, get_block_t get_block,
194 get_block_t get_block, dio_iodone_t end_io, int flags) 193 dio_iodone_t end_io, int flags)
195{ 194{
196 struct buffer_head bh; 195 struct buffer_head bh;
197 ssize_t retval = -EINVAL; 196 ssize_t retval = -EINVAL;
@@ -199,7 +198,7 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
199 198
200 memset(&bh, 0, sizeof(bh)); 199 memset(&bh, 0, sizeof(bh));
201 200
202 if ((flags & DIO_LOCKING) && (rw == READ)) { 201 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
203 struct address_space *mapping = inode->i_mapping; 202 struct address_space *mapping = inode->i_mapping;
204 mutex_lock(&inode->i_mutex); 203 mutex_lock(&inode->i_mutex);
205 retval = filemap_write_and_wait_range(mapping, pos, end - 1); 204 retval = filemap_write_and_wait_range(mapping, pos, end - 1);
@@ -212,9 +211,9 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
212 /* Protects against truncate */ 211 /* Protects against truncate */
213 atomic_inc(&inode->i_dio_count); 212 atomic_inc(&inode->i_dio_count);
214 213
215 retval = dax_io(rw, inode, iter, pos, end, get_block, &bh); 214 retval = dax_io(inode, iter, pos, end, get_block, &bh);
216 215
217 if ((flags & DIO_LOCKING) && (rw == READ)) 216 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
218 mutex_unlock(&inode->i_mutex); 217 mutex_unlock(&inode->i_mutex);
219 218
220 if ((retval > 0) && end_io) 219 if ((retval > 0) && end_io)
@@ -464,6 +463,23 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
464EXPORT_SYMBOL_GPL(dax_fault); 463EXPORT_SYMBOL_GPL(dax_fault);
465 464
466/** 465/**
466 * dax_pfn_mkwrite - handle first write to DAX page
467 * @vma: The virtual memory area where the fault occurred
468 * @vmf: The description of the fault
469 *
470 */
471int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
472{
473 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
474
475 sb_start_pagefault(sb);
476 file_update_time(vma->vm_file);
477 sb_end_pagefault(sb);
478 return VM_FAULT_NOPAGE;
479}
480EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
481
482/**
467 * dax_zero_page_range - zero a range within a page of a DAX file 483 * dax_zero_page_range - zero a range within a page of a DAX file
468 * @inode: The file being truncated 484 * @inode: The file being truncated
469 * @from: The file offset that is being truncated to 485 * @from: The file offset that is being truncated to
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e3732e53b..656ce522a218 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,6 +269,41 @@ static inline int dname_external(const struct dentry *dentry)
269 return dentry->d_name.name != dentry->d_iname; 269 return dentry->d_name.name != dentry->d_iname;
270} 270}
271 271
272/*
273 * Make sure other CPUs see the inode attached before the type is set.
274 */
275static inline void __d_set_inode_and_type(struct dentry *dentry,
276 struct inode *inode,
277 unsigned type_flags)
278{
279 unsigned flags;
280
281 dentry->d_inode = inode;
282 smp_wmb();
283 flags = READ_ONCE(dentry->d_flags);
284 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
285 flags |= type_flags;
286 WRITE_ONCE(dentry->d_flags, flags);
287}
288
289/*
290 * Ideally, we want to make sure that other CPUs see the flags cleared before
291 * the inode is detached, but this is really a violation of RCU principles
292 * since the ordering suggests we should always set inode before flags.
293 *
294 * We should instead replace or discard the entire dentry - but that sucks
295 * performancewise on mass deletion/rename.
296 */
297static inline void __d_clear_type_and_inode(struct dentry *dentry)
298{
299 unsigned flags = READ_ONCE(dentry->d_flags);
300
301 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
302 WRITE_ONCE(dentry->d_flags, flags);
303 smp_wmb();
304 dentry->d_inode = NULL;
305}
306
272static void dentry_free(struct dentry *dentry) 307static void dentry_free(struct dentry *dentry)
273{ 308{
274 WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); 309 WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
@@ -311,7 +346,7 @@ static void dentry_iput(struct dentry * dentry)
311{ 346{
312 struct inode *inode = dentry->d_inode; 347 struct inode *inode = dentry->d_inode;
313 if (inode) { 348 if (inode) {
314 dentry->d_inode = NULL; 349 __d_clear_type_and_inode(dentry);
315 hlist_del_init(&dentry->d_u.d_alias); 350 hlist_del_init(&dentry->d_u.d_alias);
316 spin_unlock(&dentry->d_lock); 351 spin_unlock(&dentry->d_lock);
317 spin_unlock(&inode->i_lock); 352 spin_unlock(&inode->i_lock);
@@ -335,8 +370,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
335 __releases(dentry->d_inode->i_lock) 370 __releases(dentry->d_inode->i_lock)
336{ 371{
337 struct inode *inode = dentry->d_inode; 372 struct inode *inode = dentry->d_inode;
338 __d_clear_type(dentry); 373 __d_clear_type_and_inode(dentry);
339 dentry->d_inode = NULL;
340 hlist_del_init(&dentry->d_u.d_alias); 374 hlist_del_init(&dentry->d_u.d_alias);
341 dentry_rcuwalk_barrier(dentry); 375 dentry_rcuwalk_barrier(dentry);
342 spin_unlock(&dentry->d_lock); 376 spin_unlock(&dentry->d_lock);
@@ -1715,11 +1749,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1715 unsigned add_flags = d_flags_for_inode(inode); 1749 unsigned add_flags = d_flags_for_inode(inode);
1716 1750
1717 spin_lock(&dentry->d_lock); 1751 spin_lock(&dentry->d_lock);
1718 dentry->d_flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
1719 dentry->d_flags |= add_flags;
1720 if (inode) 1752 if (inode)
1721 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1753 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1722 dentry->d_inode = inode; 1754 __d_set_inode_and_type(dentry, inode, add_flags);
1723 dentry_rcuwalk_barrier(dentry); 1755 dentry_rcuwalk_barrier(dentry);
1724 spin_unlock(&dentry->d_lock); 1756 spin_unlock(&dentry->d_lock);
1725 fsnotify_d_instantiate(dentry, inode); 1757 fsnotify_d_instantiate(dentry, inode);
@@ -1937,8 +1969,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1937 add_flags |= DCACHE_DISCONNECTED; 1969 add_flags |= DCACHE_DISCONNECTED;
1938 1970
1939 spin_lock(&tmp->d_lock); 1971 spin_lock(&tmp->d_lock);
1940 tmp->d_inode = inode; 1972 __d_set_inode_and_type(tmp, inode, add_flags);
1941 tmp->d_flags |= add_flags;
1942 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); 1973 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
1943 hlist_bl_lock(&tmp->d_sb->s_anon); 1974 hlist_bl_lock(&tmp->d_sb->s_anon);
1944 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1975 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2690,7 +2721,7 @@ static int __d_unalias(struct inode *inode,
2690 struct dentry *dentry, struct dentry *alias) 2721 struct dentry *dentry, struct dentry *alias)
2691{ 2722{
2692 struct mutex *m1 = NULL, *m2 = NULL; 2723 struct mutex *m1 = NULL, *m2 = NULL;
2693 int ret = -EBUSY; 2724 int ret = -ESTALE;
2694 2725
2695 /* If alias and dentry share a parent, then no extra locks required */ 2726 /* If alias and dentry share a parent, then no extra locks required */
2696 if (alias->d_parent == dentry->d_parent) 2727 if (alias->d_parent == dentry->d_parent)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 96400ab42d13..c9ee0dfe90b5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
254 254
255 pr_debug("debugfs: creating file '%s'\n",name); 255 pr_debug("debugfs: creating file '%s'\n",name);
256 256
257 if (IS_ERR(parent))
258 return parent;
259
257 error = simple_pin_fs(&debug_fs_type, &debugfs_mount, 260 error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
258 &debugfs_mount_count); 261 &debugfs_mount_count);
259 if (error) 262 if (error)
@@ -521,7 +524,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
521 524
522 if (debugfs_positive(dentry)) { 525 if (debugfs_positive(dentry)) {
523 dget(dentry); 526 dget(dentry);
524 if (S_ISDIR(dentry->d_inode->i_mode)) 527 if (d_is_dir(dentry))
525 ret = simple_rmdir(parent->d_inode, dentry); 528 ret = simple_rmdir(parent->d_inode, dentry);
526 else 529 else
527 simple_unlink(parent->d_inode, dentry); 530 simple_unlink(parent->d_inode, dentry);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b2e297..c3b560b24a46 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h> 39#include <linux/prefetch.h>
40#include <linux/aio.h>
41 40
42/* 41/*
43 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
265 ret = err; 264 ret = err;
266 } 265 }
267 266
268 aio_complete(dio->iocb, ret, 0); 267 dio->iocb->ki_complete(dio->iocb, ret, 0);
269 } 268 }
270 269
271 kmem_cache_free(dio_cache, dio); 270 kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
1056 * operation. AIO can if it was a broken operation described above or 1055 * operation. AIO can if it was a broken operation described above or
1057 * in fact if all the bios race to complete before we get here. In 1056 * in fact if all the bios race to complete before we get here. In
1058 * that case dio_complete() translates the EIOCBQUEUED into the proper 1057 * that case dio_complete() translates the EIOCBQUEUED into the proper
1059 * return code that the caller will hand to aio_complete(). 1058 * return code that the caller will hand to ->complete().
1060 * 1059 *
1061 * This is managed by the bio_lock instead of being an atomic_t so that 1060 * This is managed by the bio_lock instead of being an atomic_t so that
1062 * completion paths can drop their ref and use the remaining count to 1061 * completion paths can drop their ref and use the remaining count to
@@ -1094,10 +1093,10 @@ static inline int drop_refcount(struct dio *dio)
1094 * for the whole file. 1093 * for the whole file.
1095 */ 1094 */
1096static inline ssize_t 1095static inline ssize_t
1097do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1096do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1098 struct block_device *bdev, struct iov_iter *iter, loff_t offset, 1097 struct block_device *bdev, struct iov_iter *iter,
1099 get_block_t get_block, dio_iodone_t end_io, 1098 loff_t offset, get_block_t get_block, dio_iodone_t end_io,
1100 dio_submit_t submit_io, int flags) 1099 dio_submit_t submit_io, int flags)
1101{ 1100{
1102 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); 1101 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
1103 unsigned blkbits = i_blkbits; 1102 unsigned blkbits = i_blkbits;
@@ -1111,9 +1110,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1111 struct blk_plug plug; 1110 struct blk_plug plug;
1112 unsigned long align = offset | iov_iter_alignment(iter); 1111 unsigned long align = offset | iov_iter_alignment(iter);
1113 1112
1114 if (rw & WRITE)
1115 rw = WRITE_ODIRECT;
1116
1117 /* 1113 /*
1118 * Avoid references to bdev if not absolutely needed to give 1114 * Avoid references to bdev if not absolutely needed to give
1119 * the early prefetch in the caller enough time. 1115 * the early prefetch in the caller enough time.
@@ -1128,7 +1124,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1128 } 1124 }
1129 1125
1130 /* watch out for a 0 len io from a tricksy fs */ 1126 /* watch out for a 0 len io from a tricksy fs */
1131 if (rw == READ && !iov_iter_count(iter)) 1127 if (iov_iter_rw(iter) == READ && !iov_iter_count(iter))
1132 return 0; 1128 return 0;
1133 1129
1134 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); 1130 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1144,7 +1140,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1144 1140
1145 dio->flags = flags; 1141 dio->flags = flags;
1146 if (dio->flags & DIO_LOCKING) { 1142 if (dio->flags & DIO_LOCKING) {
1147 if (rw == READ) { 1143 if (iov_iter_rw(iter) == READ) {
1148 struct address_space *mapping = 1144 struct address_space *mapping =
1149 iocb->ki_filp->f_mapping; 1145 iocb->ki_filp->f_mapping;
1150 1146
@@ -1170,19 +1166,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1170 if (is_sync_kiocb(iocb)) 1166 if (is_sync_kiocb(iocb))
1171 dio->is_async = false; 1167 dio->is_async = false;
1172 else if (!(dio->flags & DIO_ASYNC_EXTEND) && 1168 else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
1173 (rw & WRITE) && end > i_size_read(inode)) 1169 iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
1174 dio->is_async = false; 1170 dio->is_async = false;
1175 else 1171 else
1176 dio->is_async = true; 1172 dio->is_async = true;
1177 1173
1178 dio->inode = inode; 1174 dio->inode = inode;
1179 dio->rw = rw; 1175 dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ;
1180 1176
1181 /* 1177 /*
1182 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue 1178 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
1183 * so that we can call ->fsync. 1179 * so that we can call ->fsync.
1184 */ 1180 */
1185 if (dio->is_async && (rw & WRITE) && 1181 if (dio->is_async && iov_iter_rw(iter) == WRITE &&
1186 ((iocb->ki_filp->f_flags & O_DSYNC) || 1182 ((iocb->ki_filp->f_flags & O_DSYNC) ||
1187 IS_SYNC(iocb->ki_filp->f_mapping->host))) { 1183 IS_SYNC(iocb->ki_filp->f_mapping->host))) {
1188 retval = dio_set_defer_completion(dio); 1184 retval = dio_set_defer_completion(dio);
@@ -1275,7 +1271,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1275 * we can let i_mutex go now that its achieved its purpose 1271 * we can let i_mutex go now that its achieved its purpose
1276 * of protecting us from looking up uninitialized blocks. 1272 * of protecting us from looking up uninitialized blocks.
1277 */ 1273 */
1278 if (rw == READ && (dio->flags & DIO_LOCKING)) 1274 if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
1279 mutex_unlock(&dio->inode->i_mutex); 1275 mutex_unlock(&dio->inode->i_mutex);
1280 1276
1281 /* 1277 /*
@@ -1287,7 +1283,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1287 */ 1283 */
1288 BUG_ON(retval == -EIOCBQUEUED); 1284 BUG_ON(retval == -EIOCBQUEUED);
1289 if (dio->is_async && retval == 0 && dio->result && 1285 if (dio->is_async && retval == 0 && dio->result &&
1290 (rw == READ || dio->result == count)) 1286 (iov_iter_rw(iter) == READ || dio->result == count))
1291 retval = -EIOCBQUEUED; 1287 retval = -EIOCBQUEUED;
1292 else 1288 else
1293 dio_await_completion(dio); 1289 dio_await_completion(dio);
@@ -1301,11 +1297,11 @@ out:
1301 return retval; 1297 return retval;
1302} 1298}
1303 1299
1304ssize_t 1300ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1305__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1301 struct block_device *bdev, struct iov_iter *iter,
1306 struct block_device *bdev, struct iov_iter *iter, loff_t offset, 1302 loff_t offset, get_block_t get_block,
1307 get_block_t get_block, dio_iodone_t end_io, 1303 dio_iodone_t end_io, dio_submit_t submit_io,
1308 dio_submit_t submit_io, int flags) 1304 int flags)
1309{ 1305{
1310 /* 1306 /*
1311 * The block device state is needed in the end to finally 1307 * The block device state is needed in the end to finally
@@ -1319,8 +1315,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1319 prefetch(bdev->bd_queue); 1315 prefetch(bdev->bd_queue);
1320 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); 1316 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1321 1317
1322 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, 1318 return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block,
1323 get_block, end_io, submit_io, flags); 1319 end_io, submit_io, flags);
1324} 1320}
1325 1321
1326EXPORT_SYMBOL(__blockdev_direct_IO); 1322EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 90d1882b306f..5ba029e627cc 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key)
124} 124}
125 125
126#define ECRYPTFS_MAX_KEYSET_SIZE 1024 126#define ECRYPTFS_MAX_KEYSET_SIZE 1024
127#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 127#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31
128#define ECRYPTFS_MAX_NUM_ENC_KEYS 64 128#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
129#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ 129#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
130#define ECRYPTFS_SALT_BYTES 2 130#define ECRYPTFS_SALT_BYTES 2
@@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat {
237 struct crypto_ablkcipher *tfm; 237 struct crypto_ablkcipher *tfm;
238 struct crypto_hash *hash_tfm; /* Crypto context for generating 238 struct crypto_hash *hash_tfm; /* Crypto context for generating
239 * the initialization vectors */ 239 * the initialization vectors */
240 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; 240 unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
241 unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; 241 unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
242 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; 242 unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
243 struct list_head keysig_list; 243 struct list_head keysig_list;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b07731e68c0b..a65786e26b05 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/compat.h> 32#include <linux/compat.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/aio.h>
35#include "ecryptfs_kernel.h" 34#include "ecryptfs_kernel.h"
36 35
37/** 36/**
@@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
52 struct file *file = iocb->ki_filp; 51 struct file *file = iocb->ki_filp;
53 52
54 rc = generic_file_read_iter(iocb, to); 53 rc = generic_file_read_iter(iocb, to);
55 /*
56 * Even though this is a async interface, we need to wait
57 * for IO to finish to update atime
58 */
59 if (-EIOCBQUEUED == rc)
60 rc = wait_on_sync_kiocb(iocb);
61 if (rc >= 0) { 54 if (rc >= 0) {
62 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); 55 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
63 touch_atime(path); 56 touch_atime(path);
@@ -303,9 +296,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
303 struct file *lower_file = ecryptfs_file_to_lower(file); 296 struct file *lower_file = ecryptfs_file_to_lower(file);
304 long rc = -ENOTTY; 297 long rc = -ENOTTY;
305 298
306 if (lower_file->f_op->unlocked_ioctl) 299 if (!lower_file->f_op->unlocked_ioctl)
300 return rc;
301
302 switch (cmd) {
303 case FITRIM:
304 case FS_IOC_GETFLAGS:
305 case FS_IOC_SETFLAGS:
306 case FS_IOC_GETVERSION:
307 case FS_IOC_SETVERSION:
307 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); 308 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
308 return rc; 309 fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
310
311 return rc;
312 default:
313 return rc;
314 }
309} 315}
310 316
311#ifdef CONFIG_COMPAT 317#ifdef CONFIG_COMPAT
@@ -315,9 +321,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
315 struct file *lower_file = ecryptfs_file_to_lower(file); 321 struct file *lower_file = ecryptfs_file_to_lower(file);
316 long rc = -ENOIOCTLCMD; 322 long rc = -ENOIOCTLCMD;
317 323
318 if (lower_file->f_op->compat_ioctl) 324 if (!lower_file->f_op->compat_ioctl)
325 return rc;
326
327 switch (cmd) {
328 case FITRIM:
329 case FS_IOC32_GETFLAGS:
330 case FS_IOC32_SETFLAGS:
331 case FS_IOC32_GETVERSION:
332 case FS_IOC32_SETVERSION:
319 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); 333 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
320 return rc; 334 fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
335
336 return rc;
337 default:
338 return rc;
339 }
321} 340}
322#endif 341#endif
323 342
@@ -339,9 +358,7 @@ const struct file_operations ecryptfs_dir_fops = {
339 358
340const struct file_operations ecryptfs_main_fops = { 359const struct file_operations ecryptfs_main_fops = {
341 .llseek = generic_file_llseek, 360 .llseek = generic_file_llseek,
342 .read = new_sync_read,
343 .read_iter = ecryptfs_read_update_atime, 361 .read_iter = ecryptfs_read_update_atime,
344 .write = new_sync_write,
345 .write_iter = generic_file_write_iter, 362 .write_iter = generic_file_write_iter,
346 .iterate = ecryptfs_readdir, 363 .iterate = ecryptfs_readdir,
347 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 364 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 917bd5c9776a..6bd67e2011f0 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
891 struct blkcipher_desc desc; 891 struct blkcipher_desc desc;
892 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; 892 char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
893 char iv[ECRYPTFS_MAX_IV_BYTES]; 893 char iv[ECRYPTFS_MAX_IV_BYTES];
894 char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; 894 char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
895}; 895};
896 896
897/** 897/**
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1895d60f4122..c095d3264259 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
407 if (!cipher_name_set) { 407 if (!cipher_name_set) {
408 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 408 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
409 409
410 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); 410 BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE);
411 strcpy(mount_crypt_stat->global_default_cipher_name, 411 strcpy(mount_crypt_stat->global_default_cipher_name,
412 ECRYPTFS_DEFAULT_CIPHER); 412 ECRYPTFS_DEFAULT_CIPHER);
413 } 413 }
diff --git a/fs/exec.c b/fs/exec.c
index c7f9b733406d..49a1c61433b7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -926,10 +926,14 @@ static int de_thread(struct task_struct *tsk)
926 if (!thread_group_leader(tsk)) { 926 if (!thread_group_leader(tsk)) {
927 struct task_struct *leader = tsk->group_leader; 927 struct task_struct *leader = tsk->group_leader;
928 928
929 sig->notify_count = -1; /* for exit_notify() */
930 for (;;) { 929 for (;;) {
931 threadgroup_change_begin(tsk); 930 threadgroup_change_begin(tsk);
932 write_lock_irq(&tasklist_lock); 931 write_lock_irq(&tasklist_lock);
932 /*
933 * Do this under tasklist_lock to ensure that
934 * exit_notify() can't miss ->group_exit_task
935 */
936 sig->notify_count = -1;
933 if (likely(leader->exit_state)) 937 if (likely(leader->exit_state))
934 break; 938 break;
935 __set_current_state(TASK_KILLABLE); 939 __set_current_state(TASK_KILLABLE);
@@ -1078,7 +1082,13 @@ int flush_old_exec(struct linux_binprm * bprm)
1078 if (retval) 1082 if (retval)
1079 goto out; 1083 goto out;
1080 1084
1085 /*
1086 * Must be called _before_ exec_mmap() as bprm->mm is
1087 * not visibile until then. This also enables the update
1088 * to be lockless.
1089 */
1081 set_mm_exe_file(bprm->mm, bprm->file); 1090 set_mm_exe_file(bprm->mm, bprm->file);
1091
1082 /* 1092 /*
1083 * Release all of the old mmap stuff 1093 * Release all of the old mmap stuff
1084 */ 1094 */
@@ -1265,6 +1275,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
1265 spin_unlock(&p->fs->lock); 1275 spin_unlock(&p->fs->lock);
1266} 1276}
1267 1277
1278static void bprm_fill_uid(struct linux_binprm *bprm)
1279{
1280 struct inode *inode;
1281 unsigned int mode;
1282 kuid_t uid;
1283 kgid_t gid;
1284
1285 /* clear any previous set[ug]id data from a previous binary */
1286 bprm->cred->euid = current_euid();
1287 bprm->cred->egid = current_egid();
1288
1289 if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
1290 return;
1291
1292 if (task_no_new_privs(current))
1293 return;
1294
1295 inode = file_inode(bprm->file);
1296 mode = READ_ONCE(inode->i_mode);
1297 if (!(mode & (S_ISUID|S_ISGID)))
1298 return;
1299
1300 /* Be careful if suid/sgid is set */
1301 mutex_lock(&inode->i_mutex);
1302
1303 /* reload atomically mode/uid/gid now that lock held */
1304 mode = inode->i_mode;
1305 uid = inode->i_uid;
1306 gid = inode->i_gid;
1307 mutex_unlock(&inode->i_mutex);
1308
1309 /* We ignore suid/sgid if there are no mappings for them in the ns */
1310 if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
1311 !kgid_has_mapping(bprm->cred->user_ns, gid))
1312 return;
1313
1314 if (mode & S_ISUID) {
1315 bprm->per_clear |= PER_CLEAR_ON_SETID;
1316 bprm->cred->euid = uid;
1317 }
1318
1319 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1320 bprm->per_clear |= PER_CLEAR_ON_SETID;
1321 bprm->cred->egid = gid;
1322 }
1323}
1324
1268/* 1325/*
1269 * Fill the binprm structure from the inode. 1326 * Fill the binprm structure from the inode.
1270 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1327 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1273,36 +1330,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
1273 */ 1330 */
1274int prepare_binprm(struct linux_binprm *bprm) 1331int prepare_binprm(struct linux_binprm *bprm)
1275{ 1332{
1276 struct inode *inode = file_inode(bprm->file);
1277 umode_t mode = inode->i_mode;
1278 int retval; 1333 int retval;
1279 1334
1280 1335 bprm_fill_uid(bprm);
1281 /* clear any previous set[ug]id data from a previous binary */
1282 bprm->cred->euid = current_euid();
1283 bprm->cred->egid = current_egid();
1284
1285 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1286 !task_no_new_privs(current) &&
1287 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1288 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1289 /* Set-uid? */
1290 if (mode & S_ISUID) {
1291 bprm->per_clear |= PER_CLEAR_ON_SETID;
1292 bprm->cred->euid = inode->i_uid;
1293 }
1294
1295 /* Set-gid? */
1296 /*
1297 * If setgid is set but no group execute bit then this
1298 * is a candidate for mandatory locking, not a setgid
1299 * executable.
1300 */
1301 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1302 bprm->per_clear |= PER_CLEAR_ON_SETID;
1303 bprm->cred->egid = inode->i_gid;
1304 }
1305 }
1306 1336
1307 /* fill in binprm security blob */ 1337 /* fill in binprm security blob */
1308 retval = security_bprm_set_creds(bprm); 1338 retval = security_bprm_set_creds(bprm);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 1a376b42d305..906de66e8e7e 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -67,8 +67,6 @@ static int exofs_flush(struct file *file, fl_owner_t id)
67 67
68const struct file_operations exofs_file_operations = { 68const struct file_operations exofs_file_operations = {
69 .llseek = generic_file_llseek, 69 .llseek = generic_file_llseek,
70 .read = new_sync_read,
71 .write = new_sync_write,
72 .read_iter = generic_file_read_iter, 70 .read_iter = generic_file_read_iter,
73 .write_iter = generic_file_write_iter, 71 .write_iter = generic_file_write_iter,
74 .mmap = generic_file_mmap, 72 .mmap = generic_file_mmap,
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a198e94813fe..35073aaec6e0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -963,8 +963,8 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
963 963
964 964
965 /* TODO: Should be easy enough to do proprly */ 965 /* TODO: Should be easy enough to do proprly */
966static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, 966static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
967 struct iov_iter *iter, loff_t offset) 967 loff_t offset)
968{ 968{
969 return 0; 969 return 0;
970} 970}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 678f9ab08c48..8d15febd0aa3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -793,7 +793,6 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
793 int datasync); 793 int datasync);
794extern const struct inode_operations ext2_file_inode_operations; 794extern const struct inode_operations ext2_file_inode_operations;
795extern const struct file_operations ext2_file_operations; 795extern const struct file_operations ext2_file_operations;
796extern const struct file_operations ext2_dax_file_operations;
797 796
798/* inode.c */ 797/* inode.c */
799extern const struct address_space_operations ext2_aops; 798extern const struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index e31701713516..3a0a6c6406d0 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,6 +39,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
39static const struct vm_operations_struct ext2_dax_vm_ops = { 39static const struct vm_operations_struct ext2_dax_vm_ops = {
40 .fault = ext2_dax_fault, 40 .fault = ext2_dax_fault,
41 .page_mkwrite = ext2_dax_mkwrite, 41 .page_mkwrite = ext2_dax_mkwrite,
42 .pfn_mkwrite = dax_pfn_mkwrite,
42}; 43};
43 44
44static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) 45static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -92,8 +93,6 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
92 */ 93 */
93const struct file_operations ext2_file_operations = { 94const struct file_operations ext2_file_operations = {
94 .llseek = generic_file_llseek, 95 .llseek = generic_file_llseek,
95 .read = new_sync_read,
96 .write = new_sync_write,
97 .read_iter = generic_file_read_iter, 96 .read_iter = generic_file_read_iter,
98 .write_iter = generic_file_write_iter, 97 .write_iter = generic_file_write_iter,
99 .unlocked_ioctl = ext2_ioctl, 98 .unlocked_ioctl = ext2_ioctl,
@@ -108,24 +107,6 @@ const struct file_operations ext2_file_operations = {
108 .splice_write = iter_file_splice_write, 107 .splice_write = iter_file_splice_write,
109}; 108};
110 109
111#ifdef CONFIG_FS_DAX
112const struct file_operations ext2_dax_file_operations = {
113 .llseek = generic_file_llseek,
114 .read = new_sync_read,
115 .write = new_sync_write,
116 .read_iter = generic_file_read_iter,
117 .write_iter = generic_file_write_iter,
118 .unlocked_ioctl = ext2_ioctl,
119#ifdef CONFIG_COMPAT
120 .compat_ioctl = ext2_compat_ioctl,
121#endif
122 .mmap = ext2_file_mmap,
123 .open = dquot_file_open,
124 .release = ext2_release_file,
125 .fsync = ext2_fsync,
126};
127#endif
128
129const struct inode_operations ext2_file_inode_operations = { 110const struct inode_operations ext2_file_inode_operations = {
130#ifdef CONFIG_EXT2_FS_XATTR 111#ifdef CONFIG_EXT2_FS_XATTR
131 .setxattr = generic_setxattr, 112 .setxattr = generic_setxattr,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc000125..5d9213963fae 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
31#include <linux/mpage.h> 31#include <linux/mpage.h>
32#include <linux/fiemap.h> 32#include <linux/fiemap.h>
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/aio.h> 34#include <linux/uio.h>
35#include "ext2.h" 35#include "ext2.h"
36#include "acl.h" 36#include "acl.h"
37#include "xattr.h" 37#include "xattr.h"
@@ -851,8 +851,7 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
851} 851}
852 852
853static ssize_t 853static ssize_t
854ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 854ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
855 loff_t offset)
856{ 855{
857 struct file *file = iocb->ki_filp; 856 struct file *file = iocb->ki_filp;
858 struct address_space *mapping = file->f_mapping; 857 struct address_space *mapping = file->f_mapping;
@@ -861,12 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
861 ssize_t ret; 860 ssize_t ret;
862 861
863 if (IS_DAX(inode)) 862 if (IS_DAX(inode))
864 ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, 863 ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
865 NULL, DIO_LOCKING); 864 DIO_LOCKING);
866 else 865 else
867 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 866 ret = blockdev_direct_IO(iocb, inode, iter, offset,
868 ext2_get_block); 867 ext2_get_block);
869 if (ret < 0 && (rw & WRITE)) 868 if (ret < 0 && iov_iter_rw(iter) == WRITE)
870 ext2_write_failed(mapping, offset + count); 869 ext2_write_failed(mapping, offset + count);
871 return ret; 870 return ret;
872} 871}
@@ -1388,10 +1387,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1388 1387
1389 if (S_ISREG(inode->i_mode)) { 1388 if (S_ISREG(inode->i_mode)) {
1390 inode->i_op = &ext2_file_inode_operations; 1389 inode->i_op = &ext2_file_inode_operations;
1391 if (test_opt(inode->i_sb, DAX)) { 1390 if (test_opt(inode->i_sb, NOBH)) {
1392 inode->i_mapping->a_ops = &ext2_aops;
1393 inode->i_fop = &ext2_dax_file_operations;
1394 } else if (test_opt(inode->i_sb, NOBH)) {
1395 inode->i_mapping->a_ops = &ext2_nobh_aops; 1391 inode->i_mapping->a_ops = &ext2_nobh_aops;
1396 inode->i_fop = &ext2_file_operations; 1392 inode->i_fop = &ext2_file_operations;
1397 } else { 1393 } else {
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 148f6e3789ea..ce422931f411 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,10 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
104 return PTR_ERR(inode); 104 return PTR_ERR(inode);
105 105
106 inode->i_op = &ext2_file_inode_operations; 106 inode->i_op = &ext2_file_inode_operations;
107 if (test_opt(inode->i_sb, DAX)) { 107 if (test_opt(inode->i_sb, NOBH)) {
108 inode->i_mapping->a_ops = &ext2_aops;
109 inode->i_fop = &ext2_dax_file_operations;
110 } else if (test_opt(inode->i_sb, NOBH)) {
111 inode->i_mapping->a_ops = &ext2_nobh_aops; 108 inode->i_mapping->a_ops = &ext2_nobh_aops;
112 inode->i_fop = &ext2_file_operations; 109 inode->i_fop = &ext2_file_operations;
113 } else { 110 } else {
@@ -125,10 +122,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
125 return PTR_ERR(inode); 122 return PTR_ERR(inode);
126 123
127 inode->i_op = &ext2_file_inode_operations; 124 inode->i_op = &ext2_file_inode_operations;
128 if (test_opt(inode->i_sb, DAX)) { 125 if (test_opt(inode->i_sb, NOBH)) {
129 inode->i_mapping->a_ops = &ext2_aops;
130 inode->i_fop = &ext2_dax_file_operations;
131 } else if (test_opt(inode->i_sb, NOBH)) {
132 inode->i_mapping->a_ops = &ext2_nobh_aops; 126 inode->i_mapping->a_ops = &ext2_nobh_aops;
133 inode->i_fop = &ext2_file_operations; 127 inode->i_fop = &ext2_file_operations;
134 } else { 128 } else {
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index a062fa1e1b11..3b8f650de22c 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -50,8 +50,6 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
50 50
51const struct file_operations ext3_file_operations = { 51const struct file_operations ext3_file_operations = {
52 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
53 .read = new_sync_read,
54 .write = new_sync_write,
55 .read_iter = generic_file_read_iter, 53 .read_iter = generic_file_read_iter,
56 .write_iter = generic_file_write_iter, 54 .write_iter = generic_file_write_iter,
57 .unlocked_ioctl = ext3_ioctl, 55 .unlocked_ioctl = ext3_ioctl,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc49ba27..13c0868c7160 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/mpage.h> 28#include <linux/mpage.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/aio.h> 30#include <linux/uio.h>
31#include "ext3.h" 31#include "ext3.h"
32#include "xattr.h" 32#include "xattr.h"
33#include "acl.h" 33#include "acl.h"
@@ -1820,8 +1820,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
1820 * crashes then stale disk data _may_ be exposed inside the file. But current 1820 * crashes then stale disk data _may_ be exposed inside the file. But current
1821 * VFS code falls back into buffered path in that case so we are safe. 1821 * VFS code falls back into buffered path in that case so we are safe.
1822 */ 1822 */
1823static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, 1823static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1824 struct iov_iter *iter, loff_t offset) 1824 loff_t offset)
1825{ 1825{
1826 struct file *file = iocb->ki_filp; 1826 struct file *file = iocb->ki_filp;
1827 struct inode *inode = file->f_mapping->host; 1827 struct inode *inode = file->f_mapping->host;
@@ -1832,9 +1832,9 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1832 size_t count = iov_iter_count(iter); 1832 size_t count = iov_iter_count(iter);
1833 int retries = 0; 1833 int retries = 0;
1834 1834
1835 trace_ext3_direct_IO_enter(inode, offset, count, rw); 1835 trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1836 1836
1837 if (rw == WRITE) { 1837 if (iov_iter_rw(iter) == WRITE) {
1838 loff_t final_size = offset + count; 1838 loff_t final_size = offset + count;
1839 1839
1840 if (final_size > inode->i_size) { 1840 if (final_size > inode->i_size) {
@@ -1856,12 +1856,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1856 } 1856 }
1857 1857
1858retry: 1858retry:
1859 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); 1859 ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block);
1860 /* 1860 /*
1861 * In case of error extending write may have instantiated a few 1861 * In case of error extending write may have instantiated a few
1862 * blocks outside i_size. Trim these off again. 1862 * blocks outside i_size. Trim these off again.
1863 */ 1863 */
1864 if (unlikely((rw & WRITE) && ret < 0)) { 1864 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
1865 loff_t isize = i_size_read(inode); 1865 loff_t isize = i_size_read(inode);
1866 loff_t end = offset + count; 1866 loff_t end = offset + count;
1867 1867
@@ -1908,7 +1908,7 @@ retry:
1908 ret = err; 1908 ret = err;
1909 } 1909 }
1910out: 1910out:
1911 trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); 1911 trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
1912 return ret; 1912 return ret;
1913} 1913}
1914 1914
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index d4dbf3c259b3..f037b4b27300 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -789,7 +789,7 @@ static const struct quotactl_ops ext3_qctl_operations = {
789 .quota_on = ext3_quota_on, 789 .quota_on = ext3_quota_on,
790 .quota_off = dquot_quota_off, 790 .quota_off = dquot_quota_off,
791 .quota_sync = dquot_quota_sync, 791 .quota_sync = dquot_quota_sync,
792 .get_info = dquot_get_dqinfo, 792 .get_state = dquot_get_state,
793 .set_info = dquot_set_dqinfo, 793 .set_info = dquot_set_dqinfo,
794 .get_dqblk = dquot_get_dqblk, 794 .get_dqblk = dquot_get_dqblk,
795 .set_dqblk = dquot_set_dqblk 795 .set_dqblk = dquot_set_dqblk
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index c6874be6d58b..24215dc09a18 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -546,8 +546,7 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
546 free += EXT3_XATTR_LEN(name_len); 546 free += EXT3_XATTR_LEN(name_len);
547 } 547 }
548 if (i->value) { 548 if (i->value) {
549 if (free < EXT3_XATTR_SIZE(i->value_len) || 549 if (free < EXT3_XATTR_LEN(name_len) +
550 free < EXT3_XATTR_LEN(name_len) +
551 EXT3_XATTR_SIZE(i->value_len)) 550 EXT3_XATTR_SIZE(i->value_len))
552 return -ENOSPC; 551 return -ENOSPC;
553 } 552 }
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index efea5d5c44ce..18228c201f7f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -64,6 +64,23 @@ config EXT4_FS_SECURITY
64 If you are not using a security module that requires using 64 If you are not using a security module that requires using
65 extended attributes for file security labels, say N. 65 extended attributes for file security labels, say N.
66 66
67config EXT4_FS_ENCRYPTION
68 bool "Ext4 Encryption"
69 depends on EXT4_FS
70 select CRYPTO_AES
71 select CRYPTO_CBC
72 select CRYPTO_ECB
73 select CRYPTO_XTS
74 select CRYPTO_CTS
75 select CRYPTO_SHA256
76 select KEYS
77 select ENCRYPTED_KEYS
78 help
79 Enable encryption of ext4 files and directories. This
80 feature is similar to ecryptfs, but it is more memory
81 efficient since it avoids caching the encrypted and
82 decrypted pages in the page cache.
83
67config EXT4_DEBUG 84config EXT4_DEBUG
68 bool "EXT4 debugging support" 85 bool "EXT4 debugging support"
69 depends on EXT4_FS 86 depends on EXT4_FS
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 0310fec2ee3d..75285ea9aa05 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,7 +8,9 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
10 mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ 10 mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
11 xattr_trusted.o inline.o 11 xattr_trusted.o inline.o readpage.o
12 12
13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
14ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o 14ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
15ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \
16 crypto_key.o crypto_fname.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index d40c8dbbb0d6..69b1e73026a5 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,11 +4,6 @@
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7#include <linux/init.h>
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/capability.h>
11#include <linux/fs.h>
12#include "ext4_jbd2.h" 7#include "ext4_jbd2.h"
13#include "ext4.h" 8#include "ext4.h"
14#include "xattr.h" 9#include "xattr.h"
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 83a6f497c4e0..955bf49a7945 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -14,7 +14,6 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h>
18#include <linux/quotaops.h> 17#include <linux/quotaops.h>
19#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
20#include "ext4.h" 19#include "ext4.h"
@@ -641,8 +640,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
641 * fail EDQUOT for metdata, but we do account for it. 640 * fail EDQUOT for metdata, but we do account for it.
642 */ 641 */
643 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { 642 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
644 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
645 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
646 dquot_alloc_block_nofail(inode, 643 dquot_alloc_block_nofail(inode,
647 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); 644 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
648 } 645 }
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index b610779a958c..4a606afb171f 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -8,7 +8,6 @@
8 */ 8 */
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h>
12#include "ext4.h" 11#include "ext4.h"
13 12
14unsigned int ext4_count_free(char *bitmap, unsigned int numchars) 13unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 41eb9dcfac7e..3522340c7a99 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/swap.h> 16#include <linux/swap.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include <linux/mutex.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include "ext4.h" 20#include "ext4.h"
22 21
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
new file mode 100644
index 000000000000..8ff15273ab0c
--- /dev/null
+++ b/fs/ext4/crypto.c
@@ -0,0 +1,558 @@
1/*
2 * linux/fs/ext4/crypto.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption functions for ext4
7 *
8 * Written by Michael Halcrow, 2014.
9 *
10 * Filename encryption additions
11 * Uday Savagaonkar, 2014
12 * Encryption policy handling additions
13 * Ildar Muslukhov, 2014
14 *
15 * This has not yet undergone a rigorous security audit.
16 *
17 * The usage of AES-XTS should conform to recommendations in NIST
18 * Special Publication 800-38E and IEEE P1619/D16.
19 */
20
21#include <crypto/hash.h>
22#include <crypto/sha.h>
23#include <keys/user-type.h>
24#include <keys/encrypted-type.h>
25#include <linux/crypto.h>
26#include <linux/ecryptfs.h>
27#include <linux/gfp.h>
28#include <linux/kernel.h>
29#include <linux/key.h>
30#include <linux/list.h>
31#include <linux/mempool.h>
32#include <linux/module.h>
33#include <linux/mutex.h>
34#include <linux/random.h>
35#include <linux/scatterlist.h>
36#include <linux/spinlock_types.h>
37
38#include "ext4_extents.h"
39#include "xattr.h"
40
41/* Encryption added and removed here! (L: */
42
43static unsigned int num_prealloc_crypto_pages = 32;
44static unsigned int num_prealloc_crypto_ctxs = 128;
45
46module_param(num_prealloc_crypto_pages, uint, 0444);
47MODULE_PARM_DESC(num_prealloc_crypto_pages,
48 "Number of crypto pages to preallocate");
49module_param(num_prealloc_crypto_ctxs, uint, 0444);
50MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
51 "Number of crypto contexts to preallocate");
52
53static mempool_t *ext4_bounce_page_pool;
54
55static LIST_HEAD(ext4_free_crypto_ctxs);
56static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);
57
58/**
59 * ext4_release_crypto_ctx() - Releases an encryption context
60 * @ctx: The encryption context to release.
61 *
62 * If the encryption context was allocated from the pre-allocated pool, returns
63 * it to that pool. Else, frees it.
64 *
65 * If there's a bounce page in the context, this frees that.
66 */
67void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
68{
69 unsigned long flags;
70
71 if (ctx->bounce_page) {
72 if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
73 __free_page(ctx->bounce_page);
74 else
75 mempool_free(ctx->bounce_page, ext4_bounce_page_pool);
76 ctx->bounce_page = NULL;
77 }
78 ctx->control_page = NULL;
79 if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
80 if (ctx->tfm)
81 crypto_free_tfm(ctx->tfm);
82 kfree(ctx);
83 } else {
84 spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
85 list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
86 spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
87 }
88}
89
90/**
91 * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context
92 * @mask: The allocation mask.
93 *
94 * Return: An allocated and initialized encryption context on success. An error
95 * value or NULL otherwise.
96 */
97static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask)
98{
99 struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx),
100 mask);
101
102 if (!ctx)
103 return ERR_PTR(-ENOMEM);
104 return ctx;
105}
106
107/**
108 * ext4_get_crypto_ctx() - Gets an encryption context
109 * @inode: The inode for which we are doing the crypto
110 *
111 * Allocates and initializes an encryption context.
112 *
113 * Return: An allocated and initialized encryption context on success; error
114 * value or NULL otherwise.
115 */
116struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
117{
118 struct ext4_crypto_ctx *ctx = NULL;
119 int res = 0;
120 unsigned long flags;
121 struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key;
122
123 if (!ext4_read_workqueue)
124 ext4_init_crypto();
125
126 /*
127 * We first try getting the ctx from a free list because in
128 * the common case the ctx will have an allocated and
129 * initialized crypto tfm, so it's probably a worthwhile
130 * optimization. For the bounce page, we first try getting it
131 * from the kernel allocator because that's just about as fast
132 * as getting it from a list and because a cache of free pages
133 * should generally be a "last resort" option for a filesystem
134 * to be able to do its job.
135 */
136 spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
137 ctx = list_first_entry_or_null(&ext4_free_crypto_ctxs,
138 struct ext4_crypto_ctx, free_list);
139 if (ctx)
140 list_del(&ctx->free_list);
141 spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
142 if (!ctx) {
143 ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS);
144 if (IS_ERR(ctx)) {
145 res = PTR_ERR(ctx);
146 goto out;
147 }
148 ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
149 } else {
150 ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
151 }
152
153 /* Allocate a new Crypto API context if we don't already have
154 * one or if it isn't the right mode. */
155 BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID);
156 if (ctx->tfm && (ctx->mode != key->mode)) {
157 crypto_free_tfm(ctx->tfm);
158 ctx->tfm = NULL;
159 ctx->mode = EXT4_ENCRYPTION_MODE_INVALID;
160 }
161 if (!ctx->tfm) {
162 switch (key->mode) {
163 case EXT4_ENCRYPTION_MODE_AES_256_XTS:
164 ctx->tfm = crypto_ablkcipher_tfm(
165 crypto_alloc_ablkcipher("xts(aes)", 0, 0));
166 break;
167 case EXT4_ENCRYPTION_MODE_AES_256_GCM:
168 /* TODO(mhalcrow): AEAD w/ gcm(aes);
169 * crypto_aead_setauthsize() */
170 ctx->tfm = ERR_PTR(-ENOTSUPP);
171 break;
172 default:
173 BUG();
174 }
175 if (IS_ERR_OR_NULL(ctx->tfm)) {
176 res = PTR_ERR(ctx->tfm);
177 ctx->tfm = NULL;
178 goto out;
179 }
180 ctx->mode = key->mode;
181 }
182 BUG_ON(key->size != ext4_encryption_key_size(key->mode));
183
184 /* There shouldn't be a bounce page attached to the crypto
185 * context at this point. */
186 BUG_ON(ctx->bounce_page);
187
188out:
189 if (res) {
190 if (!IS_ERR_OR_NULL(ctx))
191 ext4_release_crypto_ctx(ctx);
192 ctx = ERR_PTR(res);
193 }
194 return ctx;
195}
196
197struct workqueue_struct *ext4_read_workqueue;
198static DEFINE_MUTEX(crypto_init);
199
200/**
201 * ext4_exit_crypto() - Shutdown the ext4 encryption system
202 */
203void ext4_exit_crypto(void)
204{
205 struct ext4_crypto_ctx *pos, *n;
206
207 list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) {
208 if (pos->bounce_page) {
209 if (pos->flags &
210 EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
211 __free_page(pos->bounce_page);
212 } else {
213 mempool_free(pos->bounce_page,
214 ext4_bounce_page_pool);
215 }
216 }
217 if (pos->tfm)
218 crypto_free_tfm(pos->tfm);
219 kfree(pos);
220 }
221 INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
222 if (ext4_bounce_page_pool)
223 mempool_destroy(ext4_bounce_page_pool);
224 ext4_bounce_page_pool = NULL;
225 if (ext4_read_workqueue)
226 destroy_workqueue(ext4_read_workqueue);
227 ext4_read_workqueue = NULL;
228}
229
230/**
231 * ext4_init_crypto() - Set up for ext4 encryption.
232 *
233 * We only call this when we start accessing encrypted files, since it
234 * results in memory getting allocated that wouldn't otherwise be used.
235 *
236 * Return: Zero on success, non-zero otherwise.
237 */
238int ext4_init_crypto(void)
239{
240 int i, res;
241
242 mutex_lock(&crypto_init);
243 if (ext4_read_workqueue)
244 goto already_initialized;
245 ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
246 if (!ext4_read_workqueue) {
247 res = -ENOMEM;
248 goto fail;
249 }
250
251 for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
252 struct ext4_crypto_ctx *ctx;
253
254 ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL);
255 if (IS_ERR(ctx)) {
256 res = PTR_ERR(ctx);
257 goto fail;
258 }
259 list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
260 }
261
262 ext4_bounce_page_pool =
263 mempool_create_page_pool(num_prealloc_crypto_pages, 0);
264 if (!ext4_bounce_page_pool) {
265 res = -ENOMEM;
266 goto fail;
267 }
268already_initialized:
269 mutex_unlock(&crypto_init);
270 return 0;
271fail:
272 ext4_exit_crypto();
273 mutex_unlock(&crypto_init);
274 return res;
275}
276
277void ext4_restore_control_page(struct page *data_page)
278{
279 struct ext4_crypto_ctx *ctx =
280 (struct ext4_crypto_ctx *)page_private(data_page);
281
282 set_page_private(data_page, (unsigned long)NULL);
283 ClearPagePrivate(data_page);
284 unlock_page(data_page);
285 ext4_release_crypto_ctx(ctx);
286}
287
288/**
289 * ext4_crypt_complete() - The completion callback for page encryption
290 * @req: The asynchronous encryption request context
291 * @res: The result of the encryption operation
292 */
293static void ext4_crypt_complete(struct crypto_async_request *req, int res)
294{
295 struct ext4_completion_result *ecr = req->data;
296
297 if (res == -EINPROGRESS)
298 return;
299 ecr->res = res;
300 complete(&ecr->completion);
301}
302
303typedef enum {
304 EXT4_DECRYPT = 0,
305 EXT4_ENCRYPT,
306} ext4_direction_t;
307
308static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
309 struct inode *inode,
310 ext4_direction_t rw,
311 pgoff_t index,
312 struct page *src_page,
313 struct page *dest_page)
314
315{
316 u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
317 struct ablkcipher_request *req = NULL;
318 DECLARE_EXT4_COMPLETION_RESULT(ecr);
319 struct scatterlist dst, src;
320 struct ext4_inode_info *ei = EXT4_I(inode);
321 struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
322 int res = 0;
323
324 BUG_ON(!ctx->tfm);
325 BUG_ON(ctx->mode != ei->i_encryption_key.mode);
326
327 if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) {
328 printk_ratelimited(KERN_ERR
329 "%s: unsupported crypto algorithm: %d\n",
330 __func__, ctx->mode);
331 return -ENOTSUPP;
332 }
333
334 crypto_ablkcipher_clear_flags(atfm, ~0);
335 crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
336
337 res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw,
338 ei->i_encryption_key.size);
339 if (res) {
340 printk_ratelimited(KERN_ERR
341 "%s: crypto_ablkcipher_setkey() failed\n",
342 __func__);
343 return res;
344 }
345 req = ablkcipher_request_alloc(atfm, GFP_NOFS);
346 if (!req) {
347 printk_ratelimited(KERN_ERR
348 "%s: crypto_request_alloc() failed\n",
349 __func__);
350 return -ENOMEM;
351 }
352 ablkcipher_request_set_callback(
353 req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
354 ext4_crypt_complete, &ecr);
355
356 BUILD_BUG_ON(EXT4_XTS_TWEAK_SIZE < sizeof(index));
357 memcpy(xts_tweak, &index, sizeof(index));
358 memset(&xts_tweak[sizeof(index)], 0,
359 EXT4_XTS_TWEAK_SIZE - sizeof(index));
360
361 sg_init_table(&dst, 1);
362 sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
363 sg_init_table(&src, 1);
364 sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
365 ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
366 xts_tweak);
367 if (rw == EXT4_DECRYPT)
368 res = crypto_ablkcipher_decrypt(req);
369 else
370 res = crypto_ablkcipher_encrypt(req);
371 if (res == -EINPROGRESS || res == -EBUSY) {
372 BUG_ON(req->base.data != &ecr);
373 wait_for_completion(&ecr.completion);
374 res = ecr.res;
375 }
376 ablkcipher_request_free(req);
377 if (res) {
378 printk_ratelimited(
379 KERN_ERR
380 "%s: crypto_ablkcipher_encrypt() returned %d\n",
381 __func__, res);
382 return res;
383 }
384 return 0;
385}
386
387/**
388 * ext4_encrypt() - Encrypts a page
389 * @inode: The inode for which the encryption should take place
390 * @plaintext_page: The page to encrypt. Must be locked.
391 *
392 * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
393 * encryption context.
394 *
395 * Called on the page write path. The caller must call
396 * ext4_restore_control_page() on the returned ciphertext page to
397 * release the bounce buffer and the encryption context.
398 *
399 * Return: An allocated page with the encrypted content on success. Else, an
400 * error value or NULL.
401 */
402struct page *ext4_encrypt(struct inode *inode,
403 struct page *plaintext_page)
404{
405 struct ext4_crypto_ctx *ctx;
406 struct page *ciphertext_page = NULL;
407 int err;
408
409 BUG_ON(!PageLocked(plaintext_page));
410
411 ctx = ext4_get_crypto_ctx(inode);
412 if (IS_ERR(ctx))
413 return (struct page *) ctx;
414
415 /* The encryption operation will require a bounce page. */
416 ciphertext_page = alloc_page(GFP_NOFS);
417 if (!ciphertext_page) {
418 /* This is a potential bottleneck, but at least we'll have
419 * forward progress. */
420 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
421 GFP_NOFS);
422 if (WARN_ON_ONCE(!ciphertext_page)) {
423 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
424 GFP_NOFS | __GFP_WAIT);
425 }
426 ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
427 } else {
428 ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
429 }
430 ctx->bounce_page = ciphertext_page;
431 ctx->control_page = plaintext_page;
432 err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index,
433 plaintext_page, ciphertext_page);
434 if (err) {
435 ext4_release_crypto_ctx(ctx);
436 return ERR_PTR(err);
437 }
438 SetPagePrivate(ciphertext_page);
439 set_page_private(ciphertext_page, (unsigned long)ctx);
440 lock_page(ciphertext_page);
441 return ciphertext_page;
442}
443
444/**
445 * ext4_decrypt() - Decrypts a page in-place
446 * @ctx: The encryption context.
447 * @page: The page to decrypt. Must be locked.
448 *
449 * Decrypts page in-place using the ctx encryption context.
450 *
451 * Called from the read completion callback.
452 *
453 * Return: Zero on success, non-zero otherwise.
454 */
455int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page)
456{
457 BUG_ON(!PageLocked(page));
458
459 return ext4_page_crypto(ctx, page->mapping->host,
460 EXT4_DECRYPT, page->index, page, page);
461}
462
463/*
464 * Convenience function which takes care of allocating and
465 * deallocating the encryption context
466 */
467int ext4_decrypt_one(struct inode *inode, struct page *page)
468{
469 int ret;
470
471 struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode);
472
473 if (!ctx)
474 return -ENOMEM;
475 ret = ext4_decrypt(ctx, page);
476 ext4_release_crypto_ctx(ctx);
477 return ret;
478}
479
480int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
481{
482 struct ext4_crypto_ctx *ctx;
483 struct page *ciphertext_page = NULL;
484 struct bio *bio;
485 ext4_lblk_t lblk = ex->ee_block;
486 ext4_fsblk_t pblk = ext4_ext_pblock(ex);
487 unsigned int len = ext4_ext_get_actual_len(ex);
488 int err = 0;
489
490 BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
491
492 ctx = ext4_get_crypto_ctx(inode);
493 if (IS_ERR(ctx))
494 return PTR_ERR(ctx);
495
496 ciphertext_page = alloc_page(GFP_NOFS);
497 if (!ciphertext_page) {
498 /* This is a potential bottleneck, but at least we'll have
499 * forward progress. */
500 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
501 GFP_NOFS);
502 if (WARN_ON_ONCE(!ciphertext_page)) {
503 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
504 GFP_NOFS | __GFP_WAIT);
505 }
506 ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
507 } else {
508 ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
509 }
510 ctx->bounce_page = ciphertext_page;
511
512 while (len--) {
513 err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk,
514 ZERO_PAGE(0), ciphertext_page);
515 if (err)
516 goto errout;
517
518 bio = bio_alloc(GFP_KERNEL, 1);
519 if (!bio) {
520 err = -ENOMEM;
521 goto errout;
522 }
523 bio->bi_bdev = inode->i_sb->s_bdev;
524 bio->bi_iter.bi_sector = pblk;
525 err = bio_add_page(bio, ciphertext_page,
526 inode->i_sb->s_blocksize, 0);
527 if (err) {
528 bio_put(bio);
529 goto errout;
530 }
531 err = submit_bio_wait(WRITE, bio);
532 if (err)
533 goto errout;
534 }
535 err = 0;
536errout:
537 ext4_release_crypto_ctx(ctx);
538 return err;
539}
540
541bool ext4_valid_contents_enc_mode(uint32_t mode)
542{
543 return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS);
544}
545
546/**
547 * ext4_validate_encryption_key_size() - Validate the encryption key size
548 * @mode: The key mode.
549 * @size: The key size to validate.
550 *
551 * Return: The validated key size for @mode. Zero if invalid.
552 */
553uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
554{
555 if (size == ext4_encryption_key_size(mode))
556 return size;
557 return 0;
558}
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
new file mode 100644
index 000000000000..ca2f5948c1ac
--- /dev/null
+++ b/fs/ext4/crypto_fname.c
@@ -0,0 +1,709 @@
1/*
2 * linux/fs/ext4/crypto_fname.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains functions for filename crypto management in ext4
7 *
8 * Written by Uday Savagaonkar, 2014.
9 *
10 * This has not yet undergone a rigorous security audit.
11 *
12 */
13
14#include <crypto/hash.h>
15#include <crypto/sha.h>
16#include <keys/encrypted-type.h>
17#include <keys/user-type.h>
18#include <linux/crypto.h>
19#include <linux/gfp.h>
20#include <linux/kernel.h>
21#include <linux/key.h>
22#include <linux/key.h>
23#include <linux/list.h>
24#include <linux/mempool.h>
25#include <linux/random.h>
26#include <linux/scatterlist.h>
27#include <linux/spinlock_types.h>
28
29#include "ext4.h"
30#include "ext4_crypto.h"
31#include "xattr.h"
32
33/**
34 * ext4_dir_crypt_complete() -
35 */
36static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
37{
38 struct ext4_completion_result *ecr = req->data;
39
40 if (res == -EINPROGRESS)
41 return;
42 ecr->res = res;
43 complete(&ecr->completion);
44}
45
46bool ext4_valid_filenames_enc_mode(uint32_t mode)
47{
48 return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
49}
50
51/**
52 * ext4_fname_encrypt() -
53 *
54 * This function encrypts the input filename, and returns the length of the
55 * ciphertext. Errors are returned as negative numbers. We trust the caller to
56 * allocate sufficient memory to oname string.
57 */
58static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
59 const struct qstr *iname,
60 struct ext4_str *oname)
61{
62 u32 ciphertext_len;
63 struct ablkcipher_request *req = NULL;
64 DECLARE_EXT4_COMPLETION_RESULT(ecr);
65 struct crypto_ablkcipher *tfm = ctx->ctfm;
66 int res = 0;
67 char iv[EXT4_CRYPTO_BLOCK_SIZE];
68 struct scatterlist sg[1];
69 char *workbuf;
70
71 if (iname->len <= 0 || iname->len > ctx->lim)
72 return -EIO;
73
74 ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
75 EXT4_CRYPTO_BLOCK_SIZE : iname->len;
76 ciphertext_len = (ciphertext_len > ctx->lim)
77 ? ctx->lim : ciphertext_len;
78
79 /* Allocate request */
80 req = ablkcipher_request_alloc(tfm, GFP_NOFS);
81 if (!req) {
82 printk_ratelimited(
83 KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
84 return -ENOMEM;
85 }
86 ablkcipher_request_set_callback(req,
87 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
88 ext4_dir_crypt_complete, &ecr);
89
90 /* Map the workpage */
91 workbuf = kmap(ctx->workpage);
92
93 /* Copy the input */
94 memcpy(workbuf, iname->name, iname->len);
95 if (iname->len < ciphertext_len)
96 memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
97
98 /* Initialize IV */
99 memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
100
101 /* Create encryption request */
102 sg_init_table(sg, 1);
103 sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
104 ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
105 res = crypto_ablkcipher_encrypt(req);
106 if (res == -EINPROGRESS || res == -EBUSY) {
107 BUG_ON(req->base.data != &ecr);
108 wait_for_completion(&ecr.completion);
109 res = ecr.res;
110 }
111 if (res >= 0) {
112 /* Copy the result to output */
113 memcpy(oname->name, workbuf, ciphertext_len);
114 res = ciphertext_len;
115 }
116 kunmap(ctx->workpage);
117 ablkcipher_request_free(req);
118 if (res < 0) {
119 printk_ratelimited(
120 KERN_ERR "%s: Error (error code %d)\n", __func__, res);
121 }
122 oname->len = ciphertext_len;
123 return res;
124}
125
126/*
127 * ext4_fname_decrypt()
128 * This function decrypts the input filename, and returns
129 * the length of the plaintext.
130 * Errors are returned as negative numbers.
131 * We trust the caller to allocate sufficient memory to oname string.
132 */
133static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
134 const struct ext4_str *iname,
135 struct ext4_str *oname)
136{
137 struct ext4_str tmp_in[2], tmp_out[1];
138 struct ablkcipher_request *req = NULL;
139 DECLARE_EXT4_COMPLETION_RESULT(ecr);
140 struct scatterlist sg[1];
141 struct crypto_ablkcipher *tfm = ctx->ctfm;
142 int res = 0;
143 char iv[EXT4_CRYPTO_BLOCK_SIZE];
144 char *workbuf;
145
146 if (iname->len <= 0 || iname->len > ctx->lim)
147 return -EIO;
148
149 tmp_in[0].name = iname->name;
150 tmp_in[0].len = iname->len;
151 tmp_out[0].name = oname->name;
152
153 /* Allocate request */
154 req = ablkcipher_request_alloc(tfm, GFP_NOFS);
155 if (!req) {
156 printk_ratelimited(
157 KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
158 return -ENOMEM;
159 }
160 ablkcipher_request_set_callback(req,
161 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
162 ext4_dir_crypt_complete, &ecr);
163
164 /* Map the workpage */
165 workbuf = kmap(ctx->workpage);
166
167 /* Copy the input */
168 memcpy(workbuf, iname->name, iname->len);
169
170 /* Initialize IV */
171 memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
172
173 /* Create encryption request */
174 sg_init_table(sg, 1);
175 sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
176 ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
177 res = crypto_ablkcipher_decrypt(req);
178 if (res == -EINPROGRESS || res == -EBUSY) {
179 BUG_ON(req->base.data != &ecr);
180 wait_for_completion(&ecr.completion);
181 res = ecr.res;
182 }
183 if (res >= 0) {
184 /* Copy the result to output */
185 memcpy(oname->name, workbuf, iname->len);
186 res = iname->len;
187 }
188 kunmap(ctx->workpage);
189 ablkcipher_request_free(req);
190 if (res < 0) {
191 printk_ratelimited(
192 KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n",
193 __func__, res);
194 return res;
195 }
196
197 oname->len = strnlen(oname->name, iname->len);
198 return oname->len;
199}
200
201/**
202 * ext4_fname_encode_digest() -
203 *
204 * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
205 * The encoded string is roughly 4/3 times the size of the input string.
206 */
207int ext4_fname_encode_digest(char *dst, char *src, u32 len)
208{
209 static const char *lookup_table =
210 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+";
211 u32 current_chunk, num_chunks, i;
212 char tmp_buf[3];
213 u32 c0, c1, c2, c3;
214
215 current_chunk = 0;
216 num_chunks = len/3;
217 for (i = 0; i < num_chunks; i++) {
218 c0 = src[3*i] & 0x3f;
219 c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f;
220 c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f;
221 c3 = (src[3*i+2]>>2) & 0x3f;
222 dst[4*i] = lookup_table[c0];
223 dst[4*i+1] = lookup_table[c1];
224 dst[4*i+2] = lookup_table[c2];
225 dst[4*i+3] = lookup_table[c3];
226 }
227 if (i*3 < len) {
228 memset(tmp_buf, 0, 3);
229 memcpy(tmp_buf, &src[3*i], len-3*i);
230 c0 = tmp_buf[0] & 0x3f;
231 c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f;
232 c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f;
233 c3 = (tmp_buf[2]>>2) & 0x3f;
234 dst[4*i] = lookup_table[c0];
235 dst[4*i+1] = lookup_table[c1];
236 dst[4*i+2] = lookup_table[c2];
237 dst[4*i+3] = lookup_table[c3];
238 i++;
239 }
240 return (i * 4);
241}
242
243/**
244 * ext4_fname_hash() -
245 *
246 * This function computes the hash of the input filename, and sets the output
247 * buffer to the *encoded* digest. It returns the length of the digest as its
248 * return value. Errors are returned as negative numbers. We trust the caller
249 * to allocate sufficient memory to oname string.
250 */
251static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx,
252 const struct ext4_str *iname,
253 struct ext4_str *oname)
254{
255 struct scatterlist sg;
256 struct hash_desc desc = {
257 .tfm = (struct crypto_hash *)ctx->htfm,
258 .flags = CRYPTO_TFM_REQ_MAY_SLEEP
259 };
260 int res = 0;
261
262 if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
263 res = ext4_fname_encode_digest(oname->name, iname->name,
264 iname->len);
265 oname->len = res;
266 return res;
267 }
268
269 sg_init_one(&sg, iname->name, iname->len);
270 res = crypto_hash_init(&desc);
271 if (res) {
272 printk(KERN_ERR
273 "%s: Error initializing crypto hash; res = [%d]\n",
274 __func__, res);
275 goto out;
276 }
277 res = crypto_hash_update(&desc, &sg, iname->len);
278 if (res) {
279 printk(KERN_ERR
280 "%s: Error updating crypto hash; res = [%d]\n",
281 __func__, res);
282 goto out;
283 }
284 res = crypto_hash_final(&desc,
285 &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]);
286 if (res) {
287 printk(KERN_ERR
288 "%s: Error finalizing crypto hash; res = [%d]\n",
289 __func__, res);
290 goto out;
291 }
292 /* Encode the digest as a printable string--this will increase the
293 * size of the digest */
294 oname->name[0] = 'I';
295 res = ext4_fname_encode_digest(oname->name+1,
296 &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE],
297 EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1;
298 oname->len = res;
299out:
300 return res;
301}
302
303/**
304 * ext4_free_fname_crypto_ctx() -
305 *
306 * Frees up a crypto context.
307 */
308void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx)
309{
310 if (ctx == NULL || IS_ERR(ctx))
311 return;
312
313 if (ctx->ctfm && !IS_ERR(ctx->ctfm))
314 crypto_free_ablkcipher(ctx->ctfm);
315 if (ctx->htfm && !IS_ERR(ctx->htfm))
316 crypto_free_hash(ctx->htfm);
317 if (ctx->workpage && !IS_ERR(ctx->workpage))
318 __free_page(ctx->workpage);
319 kfree(ctx);
320}
321
322/**
323 * ext4_put_fname_crypto_ctx() -
324 *
325 * Return: The crypto context onto free list. If the free list is above a
326 * threshold, completely frees up the context, and returns the memory.
327 *
328 * TODO: Currently we directly free the crypto context. Eventually we should
329 * add code it to return to free list. Such an approach will increase
330 * efficiency of directory lookup.
331 */
332void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx)
333{
334 if (*ctx == NULL || IS_ERR(*ctx))
335 return;
336 ext4_free_fname_crypto_ctx(*ctx);
337 *ctx = NULL;
338}
339
340/**
341 * ext4_search_fname_crypto_ctx() -
342 */
343static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx(
344 const struct ext4_encryption_key *key)
345{
346 return NULL;
347}
348
349/**
350 * ext4_alloc_fname_crypto_ctx() -
351 */
352struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx(
353 const struct ext4_encryption_key *key)
354{
355 struct ext4_fname_crypto_ctx *ctx;
356
357 ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS);
358 if (ctx == NULL)
359 return ERR_PTR(-ENOMEM);
360 if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) {
361 /* This will automatically set key mode to invalid
362 * As enum for ENCRYPTION_MODE_INVALID is zero */
363 memset(&ctx->key, 0, sizeof(ctx->key));
364 } else {
365 memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key));
366 }
367 ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode)
368 ? 0 : 1;
369 ctx->ctfm_key_is_ready = 0;
370 ctx->ctfm = NULL;
371 ctx->htfm = NULL;
372 ctx->workpage = NULL;
373 return ctx;
374}
375
376/**
377 * ext4_get_fname_crypto_ctx() -
378 *
379 * Allocates a free crypto context and initializes it to hold
380 * the crypto material for the inode.
381 *
382 * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise.
383 */
384struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
385 struct inode *inode, u32 max_ciphertext_len)
386{
387 struct ext4_fname_crypto_ctx *ctx;
388 struct ext4_inode_info *ei = EXT4_I(inode);
389 int res;
390
391 /* Check if the crypto policy is set on the inode */
392 res = ext4_encrypted_inode(inode);
393 if (res == 0)
394 return NULL;
395
396 if (!ext4_has_encryption_key(inode))
397 ext4_generate_encryption_key(inode);
398
399 /* Get a crypto context based on the key.
400 * A new context is allocated if no context matches the requested key.
401 */
402 ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key));
403 if (ctx == NULL)
404 ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key));
405 if (IS_ERR(ctx))
406 return ctx;
407
408 if (ctx->has_valid_key) {
409 if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
410 printk_once(KERN_WARNING
411 "ext4: unsupported key mode %d\n",
412 ctx->key.mode);
413 return ERR_PTR(-ENOKEY);
414 }
415
416 /* As a first cut, we will allocate new tfm in every call.
417 * later, we will keep the tfm around, in case the key gets
418 * re-used */
419 if (ctx->ctfm == NULL) {
420 ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))",
421 0, 0);
422 }
423 if (IS_ERR(ctx->ctfm)) {
424 res = PTR_ERR(ctx->ctfm);
425 printk(
426 KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
427 __func__, res);
428 ctx->ctfm = NULL;
429 ext4_put_fname_crypto_ctx(&ctx);
430 return ERR_PTR(res);
431 }
432 if (ctx->ctfm == NULL) {
433 printk(
434 KERN_DEBUG "%s: could not allocate crypto tfm\n",
435 __func__);
436 ext4_put_fname_crypto_ctx(&ctx);
437 return ERR_PTR(-ENOMEM);
438 }
439 if (ctx->workpage == NULL)
440 ctx->workpage = alloc_page(GFP_NOFS);
441 if (IS_ERR(ctx->workpage)) {
442 res = PTR_ERR(ctx->workpage);
443 printk(
444 KERN_DEBUG "%s: error (%d) allocating work page\n",
445 __func__, res);
446 ctx->workpage = NULL;
447 ext4_put_fname_crypto_ctx(&ctx);
448 return ERR_PTR(res);
449 }
450 if (ctx->workpage == NULL) {
451 printk(
452 KERN_DEBUG "%s: could not allocate work page\n",
453 __func__);
454 ext4_put_fname_crypto_ctx(&ctx);
455 return ERR_PTR(-ENOMEM);
456 }
457 ctx->lim = max_ciphertext_len;
458 crypto_ablkcipher_clear_flags(ctx->ctfm, ~0);
459 crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm),
460 CRYPTO_TFM_REQ_WEAK_KEY);
461
462 /* If we are lucky, we will get a context that is already
463 * set up with the right key. Else, we will have to
464 * set the key */
465 if (!ctx->ctfm_key_is_ready) {
466 /* Since our crypto objectives for filename encryption
467 * are pretty weak,
468 * we directly use the inode master key */
469 res = crypto_ablkcipher_setkey(ctx->ctfm,
470 ctx->key.raw, ctx->key.size);
471 if (res) {
472 ext4_put_fname_crypto_ctx(&ctx);
473 return ERR_PTR(-EIO);
474 }
475 ctx->ctfm_key_is_ready = 1;
476 } else {
477 /* In the current implementation, key should never be
478 * marked "ready" for a context that has just been
479 * allocated. So we should never reach here */
480 BUG();
481 }
482 }
483 if (ctx->htfm == NULL)
484 ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
485 if (IS_ERR(ctx->htfm)) {
486 res = PTR_ERR(ctx->htfm);
487 printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n",
488 __func__, res);
489 ctx->htfm = NULL;
490 ext4_put_fname_crypto_ctx(&ctx);
491 return ERR_PTR(res);
492 }
493 if (ctx->htfm == NULL) {
494 printk(KERN_DEBUG "%s: could not allocate hash tfm\n",
495 __func__);
496 ext4_put_fname_crypto_ctx(&ctx);
497 return ERR_PTR(-ENOMEM);
498 }
499
500 return ctx;
501}
502
503/**
504 * ext4_fname_crypto_round_up() -
505 *
506 * Return: The next multiple of block size
507 */
508u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
509{
510 return ((size+blksize-1)/blksize)*blksize;
511}
512
513/**
514 * ext4_fname_crypto_namelen_on_disk() -
515 */
516int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
517 u32 namelen)
518{
519 u32 ciphertext_len;
520
521 if (ctx == NULL)
522 return -EIO;
523 if (!(ctx->has_valid_key))
524 return -EACCES;
525 ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
526 EXT4_CRYPTO_BLOCK_SIZE : namelen;
527 ciphertext_len = (ciphertext_len > ctx->lim)
528 ? ctx->lim : ciphertext_len;
529 return (int) ciphertext_len;
530}
531
532/**
533 * ext4_fname_crypto_alloc_obuff() -
534 *
535 * Allocates an output buffer that is sufficient for the crypto operation
536 * specified by the context and the direction.
537 */
538int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
539 u32 ilen, struct ext4_str *crypto_str)
540{
541 unsigned int olen;
542
543 if (!ctx)
544 return -EIO;
545 olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE);
546 crypto_str->len = olen;
547 if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
548 olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
549 /* Allocated buffer can hold one more character to null-terminate the
550 * string */
551 crypto_str->name = kmalloc(olen+1, GFP_NOFS);
552 if (!(crypto_str->name))
553 return -ENOMEM;
554 return 0;
555}
556
557/**
558 * ext4_fname_crypto_free_buffer() -
559 *
560 * Frees the buffer allocated for crypto operation.
561 */
562void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
563{
564 if (!crypto_str)
565 return;
566 kfree(crypto_str->name);
567 crypto_str->name = NULL;
568}
569
570/**
571 * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
572 */
573int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
574 const struct ext4_str *iname,
575 struct ext4_str *oname)
576{
577 if (ctx == NULL)
578 return -EIO;
579 if (iname->len < 3) {
580 /*Check for . and .. */
581 if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
582 oname->name[0] = '.';
583 oname->name[iname->len-1] = '.';
584 oname->len = iname->len;
585 return oname->len;
586 }
587 }
588 if (ctx->has_valid_key)
589 return ext4_fname_decrypt(ctx, iname, oname);
590 else
591 return ext4_fname_hash(ctx, iname, oname);
592}
593
594int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
595 const struct ext4_dir_entry_2 *de,
596 struct ext4_str *oname)
597{
598 struct ext4_str iname = {.name = (unsigned char *) de->name,
599 .len = de->name_len };
600
601 return _ext4_fname_disk_to_usr(ctx, &iname, oname);
602}
603
604
605/**
606 * ext4_fname_usr_to_disk() - converts a filename from user space to disk space
607 */
608int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
609 const struct qstr *iname,
610 struct ext4_str *oname)
611{
612 int res;
613
614 if (ctx == NULL)
615 return -EIO;
616 if (iname->len < 3) {
617 /*Check for . and .. */
618 if (iname->name[0] == '.' &&
619 iname->name[iname->len-1] == '.') {
620 oname->name[0] = '.';
621 oname->name[iname->len-1] = '.';
622 oname->len = iname->len;
623 return oname->len;
624 }
625 }
626 if (ctx->has_valid_key) {
627 res = ext4_fname_encrypt(ctx, iname, oname);
628 return res;
629 }
630 /* Without a proper key, a user is not allowed to modify the filenames
631 * in a directory. Consequently, a user space name cannot be mapped to
632 * a disk-space name */
633 return -EACCES;
634}
635
636/*
637 * Calculate the htree hash from a filename from user space
638 */
639int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
640 const struct qstr *iname,
641 struct dx_hash_info *hinfo)
642{
643 struct ext4_str tmp, tmp2;
644 int ret = 0;
645
646 if (!ctx || !ctx->has_valid_key ||
647 ((iname->name[0] == '.') &&
648 ((iname->len == 1) ||
649 ((iname->name[1] == '.') && (iname->len == 2))))) {
650 ext4fs_dirhash(iname->name, iname->len, hinfo);
651 return 0;
652 }
653
654 /* First encrypt the plaintext name */
655 ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
656 if (ret < 0)
657 return ret;
658
659 ret = ext4_fname_encrypt(ctx, iname, &tmp);
660 if (ret < 0)
661 goto out;
662
663 tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
664 tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL);
665 if (tmp2.name == NULL) {
666 ret = -ENOMEM;
667 goto out;
668 }
669
670 ret = ext4_fname_hash(ctx, &tmp, &tmp2);
671 if (ret > 0)
672 ext4fs_dirhash(tmp2.name, tmp2.len, hinfo);
673 ext4_fname_crypto_free_buffer(&tmp2);
674out:
675 ext4_fname_crypto_free_buffer(&tmp);
676 return ret;
677}
678
679/**
680 * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string
681 */
682int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
683 const struct ext4_dir_entry_2 *de,
684 struct dx_hash_info *hinfo)
685{
686 struct ext4_str iname = {.name = (unsigned char *) de->name,
687 .len = de->name_len};
688 struct ext4_str tmp;
689 int ret;
690
691 if (!ctx ||
692 ((iname.name[0] == '.') &&
693 ((iname.len == 1) ||
694 ((iname.name[1] == '.') && (iname.len == 2))))) {
695 ext4fs_dirhash(iname.name, iname.len, hinfo);
696 return 0;
697 }
698
699 tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
700 tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL);
701 if (tmp.name == NULL)
702 return -ENOMEM;
703
704 ret = ext4_fname_hash(ctx, &iname, &tmp);
705 if (ret > 0)
706 ext4fs_dirhash(tmp.name, tmp.len, hinfo);
707 ext4_fname_crypto_free_buffer(&tmp);
708 return ret;
709}
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
new file mode 100644
index 000000000000..c8392af8abbb
--- /dev/null
+++ b/fs/ext4/crypto_key.c
@@ -0,0 +1,165 @@
1/*
2 * linux/fs/ext4/crypto_key.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption key functions for ext4
7 *
8 * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
9 */
10
11#include <keys/encrypted-type.h>
12#include <keys/user-type.h>
13#include <linux/random.h>
14#include <linux/scatterlist.h>
15#include <uapi/linux/keyctl.h>
16
17#include "ext4.h"
18#include "xattr.h"
19
20static void derive_crypt_complete(struct crypto_async_request *req, int rc)
21{
22 struct ext4_completion_result *ecr = req->data;
23
24 if (rc == -EINPROGRESS)
25 return;
26
27 ecr->res = rc;
28 complete(&ecr->completion);
29}
30
31/**
32 * ext4_derive_key_aes() - Derive a key using AES-128-ECB
33 * @deriving_key: Encryption key used for derivatio.
34 * @source_key: Source key to which to apply derivation.
35 * @derived_key: Derived key.
36 *
37 * Return: Zero on success; non-zero otherwise.
38 */
39static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
40 char source_key[EXT4_AES_256_XTS_KEY_SIZE],
41 char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
42{
43 int res = 0;
44 struct ablkcipher_request *req = NULL;
45 DECLARE_EXT4_COMPLETION_RESULT(ecr);
46 struct scatterlist src_sg, dst_sg;
47 struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
48 0);
49
50 if (IS_ERR(tfm)) {
51 res = PTR_ERR(tfm);
52 tfm = NULL;
53 goto out;
54 }
55 crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
56 req = ablkcipher_request_alloc(tfm, GFP_NOFS);
57 if (!req) {
58 res = -ENOMEM;
59 goto out;
60 }
61 ablkcipher_request_set_callback(req,
62 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
63 derive_crypt_complete, &ecr);
64 res = crypto_ablkcipher_setkey(tfm, deriving_key,
65 EXT4_AES_128_ECB_KEY_SIZE);
66 if (res < 0)
67 goto out;
68 sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE);
69 sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE);
70 ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
71 EXT4_AES_256_XTS_KEY_SIZE, NULL);
72 res = crypto_ablkcipher_encrypt(req);
73 if (res == -EINPROGRESS || res == -EBUSY) {
74 BUG_ON(req->base.data != &ecr);
75 wait_for_completion(&ecr.completion);
76 res = ecr.res;
77 }
78
79out:
80 if (req)
81 ablkcipher_request_free(req);
82 if (tfm)
83 crypto_free_ablkcipher(tfm);
84 return res;
85}
86
87/**
88 * ext4_generate_encryption_key() - generates an encryption key
89 * @inode: The inode to generate the encryption key for.
90 */
91int ext4_generate_encryption_key(struct inode *inode)
92{
93 struct ext4_inode_info *ei = EXT4_I(inode);
94 struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
95 char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
96 (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
97 struct key *keyring_key = NULL;
98 struct ext4_encryption_key *master_key;
99 struct ext4_encryption_context ctx;
100 struct user_key_payload *ukp;
101 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
102 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
103 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
104 &ctx, sizeof(ctx));
105
106 if (res != sizeof(ctx)) {
107 if (res > 0)
108 res = -EINVAL;
109 goto out;
110 }
111 res = 0;
112
113 if (S_ISREG(inode->i_mode))
114 crypt_key->mode = ctx.contents_encryption_mode;
115 else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
116 crypt_key->mode = ctx.filenames_encryption_mode;
117 else {
118 printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
119 BUG();
120 }
121 crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
122 BUG_ON(!crypt_key->size);
123 if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
124 memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
125 goto out;
126 }
127 memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
128 EXT4_KEY_DESC_PREFIX_SIZE);
129 sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE,
130 "%*phN", EXT4_KEY_DESCRIPTOR_SIZE,
131 ctx.master_key_descriptor);
132 full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
133 (2 * EXT4_KEY_DESCRIPTOR_SIZE)] = '\0';
134 keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
135 if (IS_ERR(keyring_key)) {
136 res = PTR_ERR(keyring_key);
137 keyring_key = NULL;
138 goto out;
139 }
140 BUG_ON(keyring_key->type != &key_type_logon);
141 ukp = ((struct user_key_payload *)keyring_key->payload.data);
142 if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
143 res = -EINVAL;
144 goto out;
145 }
146 master_key = (struct ext4_encryption_key *)ukp->data;
147 BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
148 EXT4_KEY_DERIVATION_NONCE_SIZE);
149 BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
150 res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
151out:
152 if (keyring_key)
153 key_put(keyring_key);
154 if (res < 0)
155 crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
156 return res;
157}
158
159int ext4_has_encryption_key(struct inode *inode)
160{
161 struct ext4_inode_info *ei = EXT4_I(inode);
162 struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
163
164 return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
165}
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
new file mode 100644
index 000000000000..30eaf9e9864a
--- /dev/null
+++ b/fs/ext4/crypto_policy.c
@@ -0,0 +1,194 @@
1/*
2 * linux/fs/ext4/crypto_policy.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption policy functions for ext4
7 *
8 * Written by Michael Halcrow, 2015.
9 */
10
11#include <linux/random.h>
12#include <linux/string.h>
13#include <linux/types.h>
14
15#include "ext4.h"
16#include "xattr.h"
17
18static int ext4_inode_has_encryption_context(struct inode *inode)
19{
20 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
21 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0);
22 return (res > 0);
23}
24
25/*
26 * check whether the policy is consistent with the encryption context
27 * for the inode
28 */
29static int ext4_is_encryption_context_consistent_with_policy(
30 struct inode *inode, const struct ext4_encryption_policy *policy)
31{
32 struct ext4_encryption_context ctx;
33 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
34 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
35 sizeof(ctx));
36 if (res != sizeof(ctx))
37 return 0;
38 return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
39 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
40 (ctx.contents_encryption_mode ==
41 policy->contents_encryption_mode) &&
42 (ctx.filenames_encryption_mode ==
43 policy->filenames_encryption_mode));
44}
45
46static int ext4_create_encryption_context_from_policy(
47 struct inode *inode, const struct ext4_encryption_policy *policy)
48{
49 struct ext4_encryption_context ctx;
50 int res = 0;
51
52 ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
53 memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
54 EXT4_KEY_DESCRIPTOR_SIZE);
55 if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) {
56 printk(KERN_WARNING
57 "%s: Invalid contents encryption mode %d\n", __func__,
58 policy->contents_encryption_mode);
59 res = -EINVAL;
60 goto out;
61 }
62 if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
63 printk(KERN_WARNING
64 "%s: Invalid filenames encryption mode %d\n", __func__,
65 policy->filenames_encryption_mode);
66 res = -EINVAL;
67 goto out;
68 }
69 ctx.contents_encryption_mode = policy->contents_encryption_mode;
70 ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
71 BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
72 get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
73
74 res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
75 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
76 sizeof(ctx), 0);
77out:
78 if (!res)
79 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
80 return res;
81}
82
83int ext4_process_policy(const struct ext4_encryption_policy *policy,
84 struct inode *inode)
85{
86 if (policy->version != 0)
87 return -EINVAL;
88
89 if (!ext4_inode_has_encryption_context(inode)) {
90 if (!ext4_empty_dir(inode))
91 return -ENOTEMPTY;
92 return ext4_create_encryption_context_from_policy(inode,
93 policy);
94 }
95
96 if (ext4_is_encryption_context_consistent_with_policy(inode, policy))
97 return 0;
98
99 printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
100 __func__);
101 return -EINVAL;
102}
103
104int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
105{
106 struct ext4_encryption_context ctx;
107
108 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
109 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
110 &ctx, sizeof(ctx));
111 if (res != sizeof(ctx))
112 return -ENOENT;
113 if (ctx.format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1)
114 return -EINVAL;
115 policy->version = 0;
116 policy->contents_encryption_mode = ctx.contents_encryption_mode;
117 policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
118 memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
119 EXT4_KEY_DESCRIPTOR_SIZE);
120 return 0;
121}
122
123int ext4_is_child_context_consistent_with_parent(struct inode *parent,
124 struct inode *child)
125{
126 struct ext4_encryption_context parent_ctx, child_ctx;
127 int res;
128
129 if ((parent == NULL) || (child == NULL)) {
130 pr_err("parent %p child %p\n", parent, child);
131 BUG_ON(1);
132 }
133 /* no restrictions if the parent directory is not encrypted */
134 if (!ext4_encrypted_inode(parent))
135 return 1;
136 res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
137 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
138 &parent_ctx, sizeof(parent_ctx));
139 if (res != sizeof(parent_ctx))
140 return 0;
141 /* if the child directory is not encrypted, this is always a problem */
142 if (!ext4_encrypted_inode(child))
143 return 0;
144 res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
145 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
146 &child_ctx, sizeof(child_ctx));
147 if (res != sizeof(child_ctx))
148 return 0;
149 return (memcmp(parent_ctx.master_key_descriptor,
150 child_ctx.master_key_descriptor,
151 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
152 (parent_ctx.contents_encryption_mode ==
153 child_ctx.contents_encryption_mode) &&
154 (parent_ctx.filenames_encryption_mode ==
155 child_ctx.filenames_encryption_mode));
156}
157
158/**
159 * ext4_inherit_context() - Sets a child context from its parent
160 * @parent: Parent inode from which the context is inherited.
161 * @child: Child inode that inherits the context from @parent.
162 *
163 * Return: Zero on success, non-zero otherwise
164 */
165int ext4_inherit_context(struct inode *parent, struct inode *child)
166{
167 struct ext4_encryption_context ctx;
168 int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
169 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
170 &ctx, sizeof(ctx));
171
172 if (res != sizeof(ctx)) {
173 if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
174 ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
175 ctx.contents_encryption_mode =
176 EXT4_ENCRYPTION_MODE_AES_256_XTS;
177 ctx.filenames_encryption_mode =
178 EXT4_ENCRYPTION_MODE_AES_256_CTS;
179 memset(ctx.master_key_descriptor, 0x42,
180 EXT4_KEY_DESCRIPTOR_SIZE);
181 res = 0;
182 } else {
183 goto out;
184 }
185 }
186 get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
187 res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
188 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
189 sizeof(ctx), 0);
190out:
191 if (!res)
192 ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
193 return res;
194}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c24143ea9c08..61db51a5ce4c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -22,10 +22,8 @@
22 */ 22 */
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h>
26#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <linux/rbtree.h>
29#include "ext4.h" 27#include "ext4.h"
30#include "xattr.h" 28#include "xattr.h"
31 29
@@ -110,7 +108,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
110 int err; 108 int err;
111 struct inode *inode = file_inode(file); 109 struct inode *inode = file_inode(file);
112 struct super_block *sb = inode->i_sb; 110 struct super_block *sb = inode->i_sb;
111 struct buffer_head *bh = NULL;
113 int dir_has_error = 0; 112 int dir_has_error = 0;
113 struct ext4_fname_crypto_ctx *enc_ctx = NULL;
114 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
114 115
115 if (is_dx_dir(inode)) { 116 if (is_dx_dir(inode)) {
116 err = ext4_dx_readdir(file, ctx); 117 err = ext4_dx_readdir(file, ctx);
@@ -127,17 +128,28 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
127 128
128 if (ext4_has_inline_data(inode)) { 129 if (ext4_has_inline_data(inode)) {
129 int has_inline_data = 1; 130 int has_inline_data = 1;
130 int ret = ext4_read_inline_dir(file, ctx, 131 err = ext4_read_inline_dir(file, ctx,
131 &has_inline_data); 132 &has_inline_data);
132 if (has_inline_data) 133 if (has_inline_data)
133 return ret; 134 return err;
135 }
136
137 enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
138 if (IS_ERR(enc_ctx))
139 return PTR_ERR(enc_ctx);
140 if (enc_ctx) {
141 err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
142 &fname_crypto_str);
143 if (err < 0) {
144 ext4_put_fname_crypto_ctx(&enc_ctx);
145 return err;
146 }
134 } 147 }
135 148
136 offset = ctx->pos & (sb->s_blocksize - 1); 149 offset = ctx->pos & (sb->s_blocksize - 1);
137 150
138 while (ctx->pos < inode->i_size) { 151 while (ctx->pos < inode->i_size) {
139 struct ext4_map_blocks map; 152 struct ext4_map_blocks map;
140 struct buffer_head *bh = NULL;
141 153
142 map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); 154 map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
143 map.m_len = 1; 155 map.m_len = 1;
@@ -180,6 +192,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
180 (unsigned long long)ctx->pos); 192 (unsigned long long)ctx->pos);
181 ctx->pos += sb->s_blocksize - offset; 193 ctx->pos += sb->s_blocksize - offset;
182 brelse(bh); 194 brelse(bh);
195 bh = NULL;
183 continue; 196 continue;
184 } 197 }
185 set_buffer_verified(bh); 198 set_buffer_verified(bh);
@@ -226,25 +239,44 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
226 offset += ext4_rec_len_from_disk(de->rec_len, 239 offset += ext4_rec_len_from_disk(de->rec_len,
227 sb->s_blocksize); 240 sb->s_blocksize);
228 if (le32_to_cpu(de->inode)) { 241 if (le32_to_cpu(de->inode)) {
229 if (!dir_emit(ctx, de->name, 242 if (enc_ctx == NULL) {
230 de->name_len, 243 /* Directory is not encrypted */
231 le32_to_cpu(de->inode), 244 if (!dir_emit(ctx, de->name,
232 get_dtype(sb, de->file_type))) { 245 de->name_len,
233 brelse(bh); 246 le32_to_cpu(de->inode),
234 return 0; 247 get_dtype(sb, de->file_type)))
248 goto done;
249 } else {
250 /* Directory is encrypted */
251 err = ext4_fname_disk_to_usr(enc_ctx,
252 de, &fname_crypto_str);
253 if (err < 0)
254 goto errout;
255 if (!dir_emit(ctx,
256 fname_crypto_str.name, err,
257 le32_to_cpu(de->inode),
258 get_dtype(sb, de->file_type)))
259 goto done;
235 } 260 }
236 } 261 }
237 ctx->pos += ext4_rec_len_from_disk(de->rec_len, 262 ctx->pos += ext4_rec_len_from_disk(de->rec_len,
238 sb->s_blocksize); 263 sb->s_blocksize);
239 } 264 }
240 offset = 0; 265 if ((ctx->pos < inode->i_size) && !dir_relax(inode))
266 goto done;
241 brelse(bh); 267 brelse(bh);
242 if (ctx->pos < inode->i_size) { 268 bh = NULL;
243 if (!dir_relax(inode)) 269 offset = 0;
244 return 0;
245 }
246 } 270 }
247 return 0; 271done:
272 err = 0;
273errout:
274#ifdef CONFIG_EXT4_FS_ENCRYPTION
275 ext4_put_fname_crypto_ctx(&enc_ctx);
276 ext4_fname_crypto_free_buffer(&fname_crypto_str);
277#endif
278 brelse(bh);
279 return err;
248} 280}
249 281
250static inline int is_32bit_api(void) 282static inline int is_32bit_api(void)
@@ -384,10 +416,15 @@ void ext4_htree_free_dir_info(struct dir_private_info *p)
384 416
385/* 417/*
386 * Given a directory entry, enter it into the fname rb tree. 418 * Given a directory entry, enter it into the fname rb tree.
419 *
420 * When filename encryption is enabled, the dirent will hold the
421 * encrypted filename, while the htree will hold decrypted filename.
422 * The decrypted filename is passed in via ent_name. parameter.
387 */ 423 */
388int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 424int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
389 __u32 minor_hash, 425 __u32 minor_hash,
390 struct ext4_dir_entry_2 *dirent) 426 struct ext4_dir_entry_2 *dirent,
427 struct ext4_str *ent_name)
391{ 428{
392 struct rb_node **p, *parent = NULL; 429 struct rb_node **p, *parent = NULL;
393 struct fname *fname, *new_fn; 430 struct fname *fname, *new_fn;
@@ -398,17 +435,17 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
398 p = &info->root.rb_node; 435 p = &info->root.rb_node;
399 436
400 /* Create and allocate the fname structure */ 437 /* Create and allocate the fname structure */
401 len = sizeof(struct fname) + dirent->name_len + 1; 438 len = sizeof(struct fname) + ent_name->len + 1;
402 new_fn = kzalloc(len, GFP_KERNEL); 439 new_fn = kzalloc(len, GFP_KERNEL);
403 if (!new_fn) 440 if (!new_fn)
404 return -ENOMEM; 441 return -ENOMEM;
405 new_fn->hash = hash; 442 new_fn->hash = hash;
406 new_fn->minor_hash = minor_hash; 443 new_fn->minor_hash = minor_hash;
407 new_fn->inode = le32_to_cpu(dirent->inode); 444 new_fn->inode = le32_to_cpu(dirent->inode);
408 new_fn->name_len = dirent->name_len; 445 new_fn->name_len = ent_name->len;
409 new_fn->file_type = dirent->file_type; 446 new_fn->file_type = dirent->file_type;
410 memcpy(new_fn->name, dirent->name, dirent->name_len); 447 memcpy(new_fn->name, ent_name->name, ent_name->len);
411 new_fn->name[dirent->name_len] = 0; 448 new_fn->name[ent_name->len] = 0;
412 449
413 while (*p) { 450 while (*p) {
414 parent = *p; 451 parent = *p;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f63c3d5805c4..ef267adce19a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -422,7 +422,7 @@ enum {
422 EXT4_INODE_DIRTY = 8, 422 EXT4_INODE_DIRTY = 8,
423 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ 423 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
424 EXT4_INODE_NOCOMPR = 10, /* Don't compress */ 424 EXT4_INODE_NOCOMPR = 10, /* Don't compress */
425 EXT4_INODE_ENCRYPT = 11, /* Compression error */ 425 EXT4_INODE_ENCRYPT = 11, /* Encrypted file */
426/* End compression flags --- maybe not all used */ 426/* End compression flags --- maybe not all used */
427 EXT4_INODE_INDEX = 12, /* hash-indexed directory */ 427 EXT4_INODE_INDEX = 12, /* hash-indexed directory */
428 EXT4_INODE_IMAGIC = 13, /* AFS directory */ 428 EXT4_INODE_IMAGIC = 13, /* AFS directory */
@@ -582,6 +582,15 @@ enum {
582#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 582#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
583#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 583#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
584 584
585/* Encryption algorithms */
586#define EXT4_ENCRYPTION_MODE_INVALID 0
587#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
588#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
589#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
590#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
591
592#include "ext4_crypto.h"
593
585/* 594/*
586 * ioctl commands 595 * ioctl commands
587 */ 596 */
@@ -603,6 +612,9 @@ enum {
603#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) 612#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
604#define EXT4_IOC_SWAP_BOOT _IO('f', 17) 613#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
605#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) 614#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
615#define EXT4_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct ext4_encryption_policy)
616#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
617#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
606 618
607#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 619#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
608/* 620/*
@@ -939,6 +951,11 @@ struct ext4_inode_info {
939 951
940 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ 952 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */
941 __u32 i_csum_seed; 953 __u32 i_csum_seed;
954
955#ifdef CONFIG_EXT4_FS_ENCRYPTION
956 /* Encryption params */
957 struct ext4_encryption_key i_encryption_key;
958#endif
942}; 959};
943 960
944/* 961/*
@@ -1142,7 +1159,8 @@ struct ext4_super_block {
1142 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1159 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
1143 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1160 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
1144 __u8 s_checksum_type; /* metadata checksum algorithm used */ 1161 __u8 s_checksum_type; /* metadata checksum algorithm used */
1145 __le16 s_reserved_pad; 1162 __u8 s_encryption_level; /* versioning level for encryption */
1163 __u8 s_reserved_pad; /* Padding to next 32bits */
1146 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ 1164 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
1147 __le32 s_snapshot_inum; /* Inode number of active snapshot */ 1165 __le32 s_snapshot_inum; /* Inode number of active snapshot */
1148 __le32 s_snapshot_id; /* sequential ID of active snapshot */ 1166 __le32 s_snapshot_id; /* sequential ID of active snapshot */
@@ -1169,7 +1187,9 @@ struct ext4_super_block {
1169 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ 1187 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1170 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ 1188 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
1171 __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ 1189 __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
1172 __le32 s_reserved[105]; /* Padding to the end of the block */ 1190 __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
1191 __le32 s_lpf_ino; /* Location of the lost+found inode */
1192 __le32 s_reserved[100]; /* Padding to the end of the block */
1173 __le32 s_checksum; /* crc32c(superblock) */ 1193 __le32 s_checksum; /* crc32c(superblock) */
1174}; 1194};
1175 1195
@@ -1180,8 +1200,16 @@ struct ext4_super_block {
1180/* 1200/*
1181 * run-time mount flags 1201 * run-time mount flags
1182 */ 1202 */
1183#define EXT4_MF_MNTDIR_SAMPLED 0x0001 1203#define EXT4_MF_MNTDIR_SAMPLED 0x0001
1184#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ 1204#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
1205#define EXT4_MF_TEST_DUMMY_ENCRYPTION 0x0004
1206
1207#ifdef CONFIG_EXT4_FS_ENCRYPTION
1208#define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \
1209 EXT4_MF_TEST_DUMMY_ENCRYPTION))
1210#else
1211#define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
1212#endif
1185 1213
1186/* Number of quota types we support */ 1214/* Number of quota types we support */
1187#define EXT4_MAXQUOTAS 2 1215#define EXT4_MAXQUOTAS 2
@@ -1351,6 +1379,12 @@ struct ext4_sb_info {
1351 struct ratelimit_state s_err_ratelimit_state; 1379 struct ratelimit_state s_err_ratelimit_state;
1352 struct ratelimit_state s_warning_ratelimit_state; 1380 struct ratelimit_state s_warning_ratelimit_state;
1353 struct ratelimit_state s_msg_ratelimit_state; 1381 struct ratelimit_state s_msg_ratelimit_state;
1382
1383#ifdef CONFIG_EXT4_FS_ENCRYPTION
1384 /* Encryption */
1385 uint32_t s_file_encryption_mode;
1386 uint32_t s_dir_encryption_mode;
1387#endif
1354}; 1388};
1355 1389
1356static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1390static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1466,6 +1500,18 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1466#define EXT4_SB(sb) (sb) 1500#define EXT4_SB(sb) (sb)
1467#endif 1501#endif
1468 1502
1503/*
1504 * Returns true if the inode is inode is encrypted
1505 */
1506static inline int ext4_encrypted_inode(struct inode *inode)
1507{
1508#ifdef CONFIG_EXT4_FS_ENCRYPTION
1509 return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
1510#else
1511 return 0;
1512#endif
1513}
1514
1469#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime 1515#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
1470 1516
1471/* 1517/*
@@ -1575,8 +1621,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1575 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1621 EXT4_FEATURE_INCOMPAT_EXTENTS| \
1576 EXT4_FEATURE_INCOMPAT_64BIT| \ 1622 EXT4_FEATURE_INCOMPAT_64BIT| \
1577 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1623 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
1578 EXT4_FEATURE_INCOMPAT_MMP | \ 1624 EXT4_FEATURE_INCOMPAT_MMP | \
1579 EXT4_FEATURE_INCOMPAT_INLINE_DATA) 1625 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
1626 EXT4_FEATURE_INCOMPAT_ENCRYPT)
1580#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1627#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1581 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1628 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1582 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ 1629 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2001,6 +2048,99 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
2001 struct ext4_group_desc *gdp); 2048 struct ext4_group_desc *gdp);
2002ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 2049ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
2003 2050
2051/* crypto_policy.c */
2052int ext4_is_child_context_consistent_with_parent(struct inode *parent,
2053 struct inode *child);
2054int ext4_inherit_context(struct inode *parent, struct inode *child);
2055void ext4_to_hex(char *dst, char *src, size_t src_size);
2056int ext4_process_policy(const struct ext4_encryption_policy *policy,
2057 struct inode *inode);
2058int ext4_get_policy(struct inode *inode,
2059 struct ext4_encryption_policy *policy);
2060
2061/* crypto.c */
2062bool ext4_valid_contents_enc_mode(uint32_t mode);
2063uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
2064extern struct workqueue_struct *ext4_read_workqueue;
2065struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode);
2066void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
2067void ext4_restore_control_page(struct page *data_page);
2068struct page *ext4_encrypt(struct inode *inode,
2069 struct page *plaintext_page);
2070int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page);
2071int ext4_decrypt_one(struct inode *inode, struct page *page);
2072int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
2073
2074#ifdef CONFIG_EXT4_FS_ENCRYPTION
2075int ext4_init_crypto(void);
2076void ext4_exit_crypto(void);
2077static inline int ext4_sb_has_crypto(struct super_block *sb)
2078{
2079 return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
2080}
2081#else
2082static inline int ext4_init_crypto(void) { return 0; }
2083static inline void ext4_exit_crypto(void) { }
2084static inline int ext4_sb_has_crypto(struct super_block *sb)
2085{
2086 return 0;
2087}
2088#endif
2089
2090/* crypto_fname.c */
2091bool ext4_valid_filenames_enc_mode(uint32_t mode);
2092u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
2093int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
2094 u32 ilen, struct ext4_str *crypto_str);
2095int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
2096 const struct ext4_str *iname,
2097 struct ext4_str *oname);
2098int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
2099 const struct ext4_dir_entry_2 *de,
2100 struct ext4_str *oname);
2101int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
2102 const struct qstr *iname,
2103 struct ext4_str *oname);
2104int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
2105 const struct qstr *iname,
2106 struct dx_hash_info *hinfo);
2107int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
2108 const struct ext4_dir_entry_2 *de,
2109 struct dx_hash_info *hinfo);
2110int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
2111 u32 namelen);
2112
2113#ifdef CONFIG_EXT4_FS_ENCRYPTION
2114void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
2115struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
2116 u32 max_len);
2117void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
2118#else
2119static inline
2120void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { }
2121static inline
2122struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
2123 u32 max_len)
2124{
2125 return NULL;
2126}
2127static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
2128#endif
2129
2130
2131/* crypto_key.c */
2132int ext4_generate_encryption_key(struct inode *inode);
2133
2134#ifdef CONFIG_EXT4_FS_ENCRYPTION
2135int ext4_has_encryption_key(struct inode *inode);
2136#else
2137static inline int ext4_has_encryption_key(struct inode *inode)
2138{
2139 return 0;
2140}
2141#endif
2142
2143
2004/* dir.c */ 2144/* dir.c */
2005extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 2145extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
2006 struct file *, 2146 struct file *,
@@ -2011,17 +2151,20 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
2011 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ 2151 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
2012 (de), (bh), (buf), (size), (offset))) 2152 (de), (bh), (buf), (size), (offset)))
2013extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 2153extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
2014 __u32 minor_hash, 2154 __u32 minor_hash,
2015 struct ext4_dir_entry_2 *dirent); 2155 struct ext4_dir_entry_2 *dirent,
2156 struct ext4_str *ent_name);
2016extern void ext4_htree_free_dir_info(struct dir_private_info *p); 2157extern void ext4_htree_free_dir_info(struct dir_private_info *p);
2017extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, 2158extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2018 struct buffer_head *bh, 2159 struct buffer_head *bh,
2019 void *buf, int buf_size, 2160 void *buf, int buf_size,
2020 const char *name, int namelen, 2161 const char *name, int namelen,
2021 struct ext4_dir_entry_2 **dest_de); 2162 struct ext4_dir_entry_2 **dest_de);
2022void ext4_insert_dentry(struct inode *inode, 2163int ext4_insert_dentry(struct inode *dir,
2164 struct inode *inode,
2023 struct ext4_dir_entry_2 *de, 2165 struct ext4_dir_entry_2 *de,
2024 int buf_size, 2166 int buf_size,
2167 const struct qstr *iname,
2025 const char *name, int namelen); 2168 const char *name, int namelen);
2026static inline void ext4_update_dx_flag(struct inode *inode) 2169static inline void ext4_update_dx_flag(struct inode *inode)
2027{ 2170{
@@ -2099,6 +2242,7 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2099extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2242extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
2100 2243
2101/* inode.c */ 2244/* inode.c */
2245int ext4_inode_is_fast_symlink(struct inode *inode);
2102struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); 2246struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
2103struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); 2247struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
2104int ext4_get_block_write(struct inode *inode, sector_t iblock, 2248int ext4_get_block_write(struct inode *inode, sector_t iblock,
@@ -2152,8 +2296,8 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
2152/* indirect.c */ 2296/* indirect.c */
2153extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 2297extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
2154 struct ext4_map_blocks *map, int flags); 2298 struct ext4_map_blocks *map, int flags);
2155extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 2299extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
2156 struct iov_iter *iter, loff_t offset); 2300 loff_t offset);
2157extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2301extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2158extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2302extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2159extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2303extern void ext4_ind_truncate(handle_t *, struct inode *inode);
@@ -2189,6 +2333,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
2189 void *entry_buf, 2333 void *entry_buf,
2190 int buf_size, 2334 int buf_size,
2191 int csum_size); 2335 int csum_size);
2336extern int ext4_empty_dir(struct inode *inode);
2192 2337
2193/* resize.c */ 2338/* resize.c */
2194extern int ext4_group_add(struct super_block *sb, 2339extern int ext4_group_add(struct super_block *sb,
@@ -2593,7 +2738,6 @@ extern const struct file_operations ext4_dir_operations;
2593/* file.c */ 2738/* file.c */
2594extern const struct inode_operations ext4_file_inode_operations; 2739extern const struct inode_operations ext4_file_inode_operations;
2595extern const struct file_operations ext4_file_operations; 2740extern const struct file_operations ext4_file_operations;
2596extern const struct file_operations ext4_dax_file_operations;
2597extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2741extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2598 2742
2599/* inline.c */ 2743/* inline.c */
@@ -2699,6 +2843,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
2699 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 2843 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
2700} 2844}
2701 2845
2846/* readpages.c */
2847extern int ext4_mpage_readpages(struct address_space *mapping,
2848 struct list_head *pages, struct page *page,
2849 unsigned nr_pages);
2702 2850
2703/* symlink.c */ 2851/* symlink.c */
2704extern const struct inode_operations ext4_symlink_inode_operations; 2852extern const struct inode_operations ext4_symlink_inode_operations;
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
new file mode 100644
index 000000000000..c2ba35a914b6
--- /dev/null
+++ b/fs/ext4/ext4_crypto.h
@@ -0,0 +1,147 @@
1/*
2 * linux/fs/ext4/ext4_crypto.h
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption header content for ext4
7 *
8 * Written by Michael Halcrow, 2015.
9 */
10
11#ifndef _EXT4_CRYPTO_H
12#define _EXT4_CRYPTO_H
13
14#include <linux/fs.h>
15
16#define EXT4_KEY_DESCRIPTOR_SIZE 8
17
18/* Policy provided via an ioctl on the topmost directory */
19struct ext4_encryption_policy {
20 char version;
21 char contents_encryption_mode;
22 char filenames_encryption_mode;
23 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
24} __attribute__((__packed__));
25
26#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
27#define EXT4_KEY_DERIVATION_NONCE_SIZE 16
28
29/**
30 * Encryption context for inode
31 *
32 * Protector format:
33 * 1 byte: Protector format (1 = this version)
34 * 1 byte: File contents encryption mode
35 * 1 byte: File names encryption mode
36 * 1 byte: Reserved
37 * 8 bytes: Master Key descriptor
38 * 16 bytes: Encryption Key derivation nonce
39 */
40struct ext4_encryption_context {
41 char format;
42 char contents_encryption_mode;
43 char filenames_encryption_mode;
44 char reserved;
45 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
46 char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
47} __attribute__((__packed__));
48
49/* Encryption parameters */
50#define EXT4_XTS_TWEAK_SIZE 16
51#define EXT4_AES_128_ECB_KEY_SIZE 16
52#define EXT4_AES_256_GCM_KEY_SIZE 32
53#define EXT4_AES_256_CBC_KEY_SIZE 32
54#define EXT4_AES_256_CTS_KEY_SIZE 32
55#define EXT4_AES_256_XTS_KEY_SIZE 64
56#define EXT4_MAX_KEY_SIZE 64
57
58#define EXT4_KEY_DESC_PREFIX "ext4:"
59#define EXT4_KEY_DESC_PREFIX_SIZE 5
60
61struct ext4_encryption_key {
62 uint32_t mode;
63 char raw[EXT4_MAX_KEY_SIZE];
64 uint32_t size;
65};
66
67#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
68#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002
69
70struct ext4_crypto_ctx {
71 struct crypto_tfm *tfm; /* Crypto API context */
72 struct page *bounce_page; /* Ciphertext page on write path */
73 struct page *control_page; /* Original page on write path */
74 struct bio *bio; /* The bio for this context */
75 struct work_struct work; /* Work queue for read complete path */
76 struct list_head free_list; /* Free list */
77 int flags; /* Flags */
78 int mode; /* Encryption mode for tfm */
79};
80
81struct ext4_completion_result {
82 struct completion completion;
83 int res;
84};
85
86#define DECLARE_EXT4_COMPLETION_RESULT(ecr) \
87 struct ext4_completion_result ecr = { \
88 COMPLETION_INITIALIZER((ecr).completion), 0 }
89
90static inline int ext4_encryption_key_size(int mode)
91{
92 switch (mode) {
93 case EXT4_ENCRYPTION_MODE_AES_256_XTS:
94 return EXT4_AES_256_XTS_KEY_SIZE;
95 case EXT4_ENCRYPTION_MODE_AES_256_GCM:
96 return EXT4_AES_256_GCM_KEY_SIZE;
97 case EXT4_ENCRYPTION_MODE_AES_256_CBC:
98 return EXT4_AES_256_CBC_KEY_SIZE;
99 case EXT4_ENCRYPTION_MODE_AES_256_CTS:
100 return EXT4_AES_256_CTS_KEY_SIZE;
101 default:
102 BUG();
103 }
104 return 0;
105}
106
107#define EXT4_FNAME_NUM_SCATTER_ENTRIES 4
108#define EXT4_CRYPTO_BLOCK_SIZE 16
109#define EXT4_FNAME_CRYPTO_DIGEST_SIZE 32
110
111struct ext4_str {
112 unsigned char *name;
113 u32 len;
114};
115
116struct ext4_fname_crypto_ctx {
117 u32 lim;
118 char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
119 struct crypto_ablkcipher *ctfm;
120 struct crypto_hash *htfm;
121 struct page *workpage;
122 struct ext4_encryption_key key;
123 unsigned has_valid_key : 1;
124 unsigned ctfm_key_is_ready : 1;
125};
126
127/**
128 * For encrypted symlinks, the ciphertext length is stored at the beginning
129 * of the string in little-endian format.
130 */
131struct ext4_encrypted_symlink_data {
132 __le16 len;
133 char encrypted_path[1];
134} __attribute__((__packed__));
135
136/**
137 * This function is used to calculate the disk space required to
138 * store a filename of length l in encrypted symlink format.
139 */
140static inline u32 encrypted_symlink_data_len(u32 l)
141{
142 if (l < EXT4_CRYPTO_BLOCK_SIZE)
143 l = EXT4_CRYPTO_BLOCK_SIZE;
144 return (l + sizeof(struct ext4_encrypted_symlink_data) - 1);
145}
146
147#endif /* _EXT4_CRYPTO_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bed43081720f..973816bfe4a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1717,12 +1717,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1717{ 1717{
1718 unsigned short ext1_ee_len, ext2_ee_len; 1718 unsigned short ext1_ee_len, ext2_ee_len;
1719 1719
1720 /*
1721 * Make sure that both extents are initialized. We don't merge
1722 * unwritten extents so that we can be sure that end_io code has
1723 * the extent that was written properly split out and conversion to
1724 * initialized is trivial.
1725 */
1726 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) 1720 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1727 return 0; 1721 return 0;
1728 1722
@@ -3128,6 +3122,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3128 ee_len = ext4_ext_get_actual_len(ex); 3122 ee_len = ext4_ext_get_actual_len(ex);
3129 ee_pblock = ext4_ext_pblock(ex); 3123 ee_pblock = ext4_ext_pblock(ex);
3130 3124
3125 if (ext4_encrypted_inode(inode))
3126 return ext4_encrypted_zeroout(inode, ex);
3127
3131 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); 3128 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
3132 if (ret > 0) 3129 if (ret > 0)
3133 ret = 0; 3130 ret = 0;
@@ -4535,19 +4532,7 @@ got_allocated_blocks:
4535 */ 4532 */
4536 reserved_clusters = get_reserved_cluster_alloc(inode, 4533 reserved_clusters = get_reserved_cluster_alloc(inode,
4537 map->m_lblk, allocated); 4534 map->m_lblk, allocated);
4538 if (map_from_cluster) { 4535 if (!map_from_cluster) {
4539 if (reserved_clusters) {
4540 /*
4541 * We have clusters reserved for this range.
4542 * But since we are not doing actual allocation
4543 * and are simply using blocks from previously
4544 * allocated cluster, we should release the
4545 * reservation and not claim quota.
4546 */
4547 ext4_da_update_reserve_space(inode,
4548 reserved_clusters, 0);
4549 }
4550 } else {
4551 BUG_ON(allocated_clusters < reserved_clusters); 4536 BUG_ON(allocated_clusters < reserved_clusters);
4552 if (reserved_clusters < allocated_clusters) { 4537 if (reserved_clusters < allocated_clusters) {
4553 struct ext4_inode_info *ei = EXT4_I(inode); 4538 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -4803,12 +4788,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4803 else 4788 else
4804 max_blocks -= lblk; 4789 max_blocks -= lblk;
4805 4790
4806 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
4807 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4808 EXT4_EX_NOCACHE;
4809 if (mode & FALLOC_FL_KEEP_SIZE)
4810 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4811
4812 mutex_lock(&inode->i_mutex); 4791 mutex_lock(&inode->i_mutex);
4813 4792
4814 /* 4793 /*
@@ -4825,15 +4804,28 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4825 ret = inode_newsize_ok(inode, new_size); 4804 ret = inode_newsize_ok(inode, new_size);
4826 if (ret) 4805 if (ret)
4827 goto out_mutex; 4806 goto out_mutex;
4828 /*
4829 * If we have a partial block after EOF we have to allocate
4830 * the entire block.
4831 */
4832 if (partial_end)
4833 max_blocks += 1;
4834 } 4807 }
4835 4808
4809 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4810 if (mode & FALLOC_FL_KEEP_SIZE)
4811 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4812
4813 /* Preallocate the range including the unaligned edges */
4814 if (partial_begin || partial_end) {
4815 ret = ext4_alloc_file_blocks(file,
4816 round_down(offset, 1 << blkbits) >> blkbits,
4817 (round_up((offset + len), 1 << blkbits) -
4818 round_down(offset, 1 << blkbits)) >> blkbits,
4819 new_size, flags, mode);
4820 if (ret)
4821 goto out_mutex;
4822
4823 }
4824
4825 /* Zero range excluding the unaligned edges */
4836 if (max_blocks > 0) { 4826 if (max_blocks > 0) {
4827 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4828 EXT4_EX_NOCACHE);
4837 4829
4838 /* Now release the pages and zero block aligned part of pages*/ 4830 /* Now release the pages and zero block aligned part of pages*/
4839 truncate_pagecache_range(inode, start, end - 1); 4831 truncate_pagecache_range(inode, start, end - 1);
@@ -4847,19 +4839,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4847 flags, mode); 4839 flags, mode);
4848 if (ret) 4840 if (ret)
4849 goto out_dio; 4841 goto out_dio;
4850 /*
4851 * Remove entire range from the extent status tree.
4852 *
4853 * ext4_es_remove_extent(inode, lblk, max_blocks) is
4854 * NOT sufficient. I'm not sure why this is the case,
4855 * but let's be conservative and remove the extent
4856 * status tree for the entire inode. There should be
4857 * no outstanding delalloc extents thanks to the
4858 * filemap_write_and_wait_range() call above.
4859 */
4860 ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
4861 if (ret)
4862 goto out_dio;
4863 } 4842 }
4864 if (!partial_begin && !partial_end) 4843 if (!partial_begin && !partial_end)
4865 goto out_dio; 4844 goto out_dio;
@@ -4922,6 +4901,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4922 ext4_lblk_t lblk; 4901 ext4_lblk_t lblk;
4923 unsigned int blkbits = inode->i_blkbits; 4902 unsigned int blkbits = inode->i_blkbits;
4924 4903
4904 /*
4905 * Encrypted inodes can't handle collapse range or insert
4906 * range since we would need to re-encrypt blocks with a
4907 * different IV or XTS tweak (which are based on the logical
4908 * block number).
4909 *
4910 * XXX It's not clear why zero range isn't working, but we'll
4911 * leave it disabled for encrypted inodes for now. This is a
4912 * bug we should fix....
4913 */
4914 if (ext4_encrypted_inode(inode) &&
4915 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
4916 return -EOPNOTSUPP;
4917
4925 /* Return error if mode is not supported */ 4918 /* Return error if mode is not supported */
4926 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 4919 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4927 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 4920 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e04d45733976..d33d5a6852b9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -9,12 +9,10 @@
9 * 9 *
10 * Ext4 extents status tree core functions. 10 * Ext4 extents status tree core functions.
11 */ 11 */
12#include <linux/rbtree.h>
13#include <linux/list_sort.h> 12#include <linux/list_sort.h>
14#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
15#include <linux/seq_file.h> 14#include <linux/seq_file.h>
16#include "ext4.h" 15#include "ext4.h"
17#include "extents_status.h"
18 16
19#include <trace/events/ext4.h> 17#include <trace/events/ext4.h>
20 18
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da16c9c..0613c256c344 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -20,12 +20,11 @@
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h>
24#include <linux/mount.h> 23#include <linux/mount.h>
25#include <linux/path.h> 24#include <linux/path.h>
26#include <linux/aio.h>
27#include <linux/quotaops.h> 25#include <linux/quotaops.h>
28#include <linux/pagevec.h> 26#include <linux/pagevec.h>
27#include <linux/uio.h>
29#include "ext4.h" 28#include "ext4.h"
30#include "ext4_jbd2.h" 29#include "ext4_jbd2.h"
31#include "xattr.h" 30#include "xattr.h"
@@ -95,11 +94,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
95 struct inode *inode = file_inode(iocb->ki_filp); 94 struct inode *inode = file_inode(iocb->ki_filp);
96 struct mutex *aio_mutex = NULL; 95 struct mutex *aio_mutex = NULL;
97 struct blk_plug plug; 96 struct blk_plug plug;
98 int o_direct = io_is_direct(file); 97 int o_direct = iocb->ki_flags & IOCB_DIRECT;
99 int overwrite = 0; 98 int overwrite = 0;
100 size_t length = iov_iter_count(from);
101 ssize_t ret; 99 ssize_t ret;
102 loff_t pos = iocb->ki_pos;
103 100
104 /* 101 /*
105 * Unaligned direct AIO must be serialized; see comment above 102 * Unaligned direct AIO must be serialized; see comment above
@@ -108,16 +105,17 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
108 if (o_direct && 105 if (o_direct &&
109 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && 106 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
110 !is_sync_kiocb(iocb) && 107 !is_sync_kiocb(iocb) &&
111 (file->f_flags & O_APPEND || 108 (iocb->ki_flags & IOCB_APPEND ||
112 ext4_unaligned_aio(inode, from, pos))) { 109 ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
113 aio_mutex = ext4_aio_mutex(inode); 110 aio_mutex = ext4_aio_mutex(inode);
114 mutex_lock(aio_mutex); 111 mutex_lock(aio_mutex);
115 ext4_unwritten_wait(inode); 112 ext4_unwritten_wait(inode);
116 } 113 }
117 114
118 mutex_lock(&inode->i_mutex); 115 mutex_lock(&inode->i_mutex);
119 if (file->f_flags & O_APPEND) 116 ret = generic_write_checks(iocb, from);
120 iocb->ki_pos = pos = i_size_read(inode); 117 if (ret <= 0)
118 goto out;
121 119
122 /* 120 /*
123 * If we have encountered a bitmap-format file, the size limit 121 * If we have encountered a bitmap-format file, the size limit
@@ -126,22 +124,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
126 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 124 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
127 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 125 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
128 126
129 if ((pos > sbi->s_bitmap_maxbytes) || 127 if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
130 (pos == sbi->s_bitmap_maxbytes && length > 0)) {
131 mutex_unlock(&inode->i_mutex);
132 ret = -EFBIG; 128 ret = -EFBIG;
133 goto errout; 129 goto out;
134 } 130 }
135 131 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
136 if (pos + length > sbi->s_bitmap_maxbytes)
137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
138 } 132 }
139 133
140 iocb->private = &overwrite; 134 iocb->private = &overwrite;
141 if (o_direct) { 135 if (o_direct) {
136 size_t length = iov_iter_count(from);
137 loff_t pos = iocb->ki_pos;
142 blk_start_plug(&plug); 138 blk_start_plug(&plug);
143 139
144
145 /* check whether we do a DIO overwrite or not */ 140 /* check whether we do a DIO overwrite or not */
146 if (ext4_should_dioread_nolock(inode) && !aio_mutex && 141 if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
147 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { 142 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
@@ -185,7 +180,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
185 if (o_direct) 180 if (o_direct)
186 blk_finish_plug(&plug); 181 blk_finish_plug(&plug);
187 182
188errout: 183 if (aio_mutex)
184 mutex_unlock(aio_mutex);
185 return ret;
186
187out:
188 mutex_unlock(&inode->i_mutex);
189 if (aio_mutex) 189 if (aio_mutex)
190 mutex_unlock(aio_mutex); 190 mutex_unlock(aio_mutex);
191 return ret; 191 return ret;
@@ -206,6 +206,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
206static const struct vm_operations_struct ext4_dax_vm_ops = { 206static const struct vm_operations_struct ext4_dax_vm_ops = {
207 .fault = ext4_dax_fault, 207 .fault = ext4_dax_fault,
208 .page_mkwrite = ext4_dax_mkwrite, 208 .page_mkwrite = ext4_dax_mkwrite,
209 .pfn_mkwrite = dax_pfn_mkwrite,
209}; 210};
210#else 211#else
211#define ext4_dax_vm_ops ext4_file_vm_ops 212#define ext4_dax_vm_ops ext4_file_vm_ops
@@ -219,6 +220,13 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
219 220
220static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 221static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
221{ 222{
223 struct inode *inode = file->f_mapping->host;
224
225 if (ext4_encrypted_inode(inode)) {
226 int err = ext4_generate_encryption_key(inode);
227 if (err)
228 return 0;
229 }
222 file_accessed(file); 230 file_accessed(file);
223 if (IS_DAX(file_inode(file))) { 231 if (IS_DAX(file_inode(file))) {
224 vma->vm_ops = &ext4_dax_vm_ops; 232 vma->vm_ops = &ext4_dax_vm_ops;
@@ -236,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
236 struct vfsmount *mnt = filp->f_path.mnt; 244 struct vfsmount *mnt = filp->f_path.mnt;
237 struct path path; 245 struct path path;
238 char buf[64], *cp; 246 char buf[64], *cp;
247 int ret;
239 248
240 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && 249 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
241 !(sb->s_flags & MS_RDONLY))) { 250 !(sb->s_flags & MS_RDONLY))) {
@@ -274,11 +283,17 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
274 * writing and the journal is present 283 * writing and the journal is present
275 */ 284 */
276 if (filp->f_mode & FMODE_WRITE) { 285 if (filp->f_mode & FMODE_WRITE) {
277 int ret = ext4_inode_attach_jinode(inode); 286 ret = ext4_inode_attach_jinode(inode);
278 if (ret < 0) 287 if (ret < 0)
279 return ret; 288 return ret;
280 } 289 }
281 return dquot_file_open(inode, filp); 290 ret = dquot_file_open(inode, filp);
291 if (!ret && ext4_encrypted_inode(inode)) {
292 ret = ext4_generate_encryption_key(inode);
293 if (ret)
294 ret = -EACCES;
295 }
296 return ret;
282} 297}
283 298
284/* 299/*
@@ -607,8 +622,6 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
607 622
608const struct file_operations ext4_file_operations = { 623const struct file_operations ext4_file_operations = {
609 .llseek = ext4_llseek, 624 .llseek = ext4_llseek,
610 .read = new_sync_read,
611 .write = new_sync_write,
612 .read_iter = generic_file_read_iter, 625 .read_iter = generic_file_read_iter,
613 .write_iter = ext4_file_write_iter, 626 .write_iter = ext4_file_write_iter,
614 .unlocked_ioctl = ext4_ioctl, 627 .unlocked_ioctl = ext4_ioctl,
@@ -624,26 +637,6 @@ const struct file_operations ext4_file_operations = {
624 .fallocate = ext4_fallocate, 637 .fallocate = ext4_fallocate,
625}; 638};
626 639
627#ifdef CONFIG_FS_DAX
628const struct file_operations ext4_dax_file_operations = {
629 .llseek = ext4_llseek,
630 .read = new_sync_read,
631 .write = new_sync_write,
632 .read_iter = generic_file_read_iter,
633 .write_iter = ext4_file_write_iter,
634 .unlocked_ioctl = ext4_ioctl,
635#ifdef CONFIG_COMPAT
636 .compat_ioctl = ext4_compat_ioctl,
637#endif
638 .mmap = ext4_file_mmap,
639 .open = ext4_file_open,
640 .release = ext4_release_file,
641 .fsync = ext4_sync_file,
642 /* Splice not yet supported with DAX */
643 .fallocate = ext4_fallocate,
644};
645#endif
646
647const struct inode_operations ext4_file_inode_operations = { 640const struct inode_operations ext4_file_inode_operations = {
648 .setattr = ext4_setattr, 641 .setattr = ext4_setattr,
649 .getattr = ext4_getattr, 642 .getattr = ext4_getattr,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..e9d632e9aa4b 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 29#include <linux/blkdev.h>
31 30
32#include "ext4.h" 31#include "ext4.h"
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3d586f02883e..e026aa941fd5 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h>
14#include <linux/cryptohash.h> 13#include <linux/cryptohash.h>
15#include "ext4.h" 14#include "ext4.h"
16 15
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ac644c31ca67..2cf18a2d5c72 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -14,7 +14,6 @@
14 14
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h>
18#include <linux/stat.h> 17#include <linux/stat.h>
19#include <linux/string.h> 18#include <linux/string.h>
20#include <linux/quotaops.h> 19#include <linux/quotaops.h>
@@ -997,6 +996,12 @@ got:
997 ei->i_block_group = group; 996 ei->i_block_group = group;
998 ei->i_last_alloc_group = ~0; 997 ei->i_last_alloc_group = ~0;
999 998
999 /* If the directory encrypted, then we should encrypt the inode. */
1000 if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
1001 (ext4_encrypted_inode(dir) ||
1002 DUMMY_ENCRYPTION_ENABLED(sbi)))
1003 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1004
1000 ext4_set_inode_flags(inode); 1005 ext4_set_inode_flags(inode);
1001 if (IS_DIRSYNC(inode)) 1006 if (IS_DIRSYNC(inode))
1002 ext4_handle_sync(handle); 1007 ext4_handle_sync(handle);
@@ -1029,11 +1034,28 @@ got:
1029 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1034 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1030 1035
1031 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1036 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
1032 1037#ifdef CONFIG_EXT4_FS_ENCRYPTION
1038 if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
1039 (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
1040 ei->i_inline_off = 0;
1041 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1042 EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1043 ext4_set_inode_state(inode,
1044 EXT4_STATE_MAY_INLINE_DATA);
1045 } else {
1046 /* Inline data and encryption are incompatible
1047 * We turn off inline data since encryption is enabled */
1048 ei->i_inline_off = 1;
1049 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1050 EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1051 ext4_clear_inode_state(inode,
1052 EXT4_STATE_MAY_INLINE_DATA);
1053 }
1054#else
1033 ei->i_inline_off = 0; 1055 ei->i_inline_off = 0;
1034 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) 1056 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1035 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1057 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1036 1058#endif
1037 ret = inode; 1059 ret = inode;
1038 err = dquot_alloc_inode(inode); 1060 err = dquot_alloc_inode(inode);
1039 if (err) 1061 if (err)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 45fe924f82bc..3580629e42d3 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,9 +20,9 @@
20 * (sct@redhat.com), 1993, 1998 20 * (sct@redhat.com), 1993, 1998
21 */ 21 */
22 22
23#include <linux/aio.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "truncate.h" 24#include "truncate.h"
25#include <linux/uio.h>
26 26
27#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
28 28
@@ -642,8 +642,8 @@ out:
642 * crashes then stale disk data _may_ be exposed inside the file. But current 642 * crashes then stale disk data _may_ be exposed inside the file. But current
643 * VFS code falls back into buffered path in that case so we are safe. 643 * VFS code falls back into buffered path in that case so we are safe.
644 */ 644 */
645ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 645ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
646 struct iov_iter *iter, loff_t offset) 646 loff_t offset)
647{ 647{
648 struct file *file = iocb->ki_filp; 648 struct file *file = iocb->ki_filp;
649 struct inode *inode = file->f_mapping->host; 649 struct inode *inode = file->f_mapping->host;
@@ -654,7 +654,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
654 size_t count = iov_iter_count(iter); 654 size_t count = iov_iter_count(iter);
655 int retries = 0; 655 int retries = 0;
656 656
657 if (rw == WRITE) { 657 if (iov_iter_rw(iter) == WRITE) {
658 loff_t final_size = offset + count; 658 loff_t final_size = offset + count;
659 659
660 if (final_size > inode->i_size) { 660 if (final_size > inode->i_size) {
@@ -676,7 +676,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
676 } 676 }
677 677
678retry: 678retry:
679 if (rw == READ && ext4_should_dioread_nolock(inode)) { 679 if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) {
680 /* 680 /*
681 * Nolock dioread optimization may be dynamically disabled 681 * Nolock dioread optimization may be dynamically disabled
682 * via ext4_inode_block_unlocked_dio(). Check inode's state 682 * via ext4_inode_block_unlocked_dio(). Check inode's state
@@ -690,23 +690,24 @@ retry:
690 goto locked; 690 goto locked;
691 } 691 }
692 if (IS_DAX(inode)) 692 if (IS_DAX(inode))
693 ret = dax_do_io(rw, iocb, inode, iter, offset, 693 ret = dax_do_io(iocb, inode, iter, offset,
694 ext4_get_block, NULL, 0); 694 ext4_get_block, NULL, 0);
695 else 695 else
696 ret = __blockdev_direct_IO(rw, iocb, inode, 696 ret = __blockdev_direct_IO(iocb, inode,
697 inode->i_sb->s_bdev, iter, offset, 697 inode->i_sb->s_bdev, iter,
698 ext4_get_block, NULL, NULL, 0); 698 offset, ext4_get_block, NULL,
699 NULL, 0);
699 inode_dio_done(inode); 700 inode_dio_done(inode);
700 } else { 701 } else {
701locked: 702locked:
702 if (IS_DAX(inode)) 703 if (IS_DAX(inode))
703 ret = dax_do_io(rw, iocb, inode, iter, offset, 704 ret = dax_do_io(iocb, inode, iter, offset,
704 ext4_get_block, NULL, DIO_LOCKING); 705 ext4_get_block, NULL, DIO_LOCKING);
705 else 706 else
706 ret = blockdev_direct_IO(rw, iocb, inode, iter, 707 ret = blockdev_direct_IO(iocb, inode, iter, offset,
707 offset, ext4_get_block); 708 ext4_get_block);
708 709
709 if (unlikely((rw & WRITE) && ret < 0)) { 710 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
710 loff_t isize = i_size_read(inode); 711 loff_t isize = i_size_read(inode);
711 loff_t end = offset + count; 712 loff_t end = offset + count;
712 713
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 4b143febf21f..feb2cafbeace 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -11,11 +11,13 @@
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 */ 13 */
14
15#include <linux/fiemap.h>
16
14#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
15#include "ext4.h" 18#include "ext4.h"
16#include "xattr.h" 19#include "xattr.h"
17#include "truncate.h" 20#include "truncate.h"
18#include <linux/fiemap.h>
19 21
20#define EXT4_XATTR_SYSTEM_DATA "data" 22#define EXT4_XATTR_SYSTEM_DATA "data"
21#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 23#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -972,7 +974,7 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
972 offset = 0; 974 offset = 0;
973 while ((void *)de < dlimit) { 975 while ((void *)de < dlimit) {
974 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 976 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
975 trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n", 977 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n",
976 offset, de_len, de->name_len, de->name, 978 offset, de_len, de->name_len, de->name,
977 de->name_len, le32_to_cpu(de->inode)); 979 de->name_len, le32_to_cpu(de->inode));
978 if (ext4_check_dir_entry(dir, NULL, de, bh, 980 if (ext4_check_dir_entry(dir, NULL, de, bh,
@@ -1014,7 +1016,8 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
1014 err = ext4_journal_get_write_access(handle, iloc->bh); 1016 err = ext4_journal_get_write_access(handle, iloc->bh);
1015 if (err) 1017 if (err)
1016 return err; 1018 return err;
1017 ext4_insert_dentry(inode, de, inline_size, name, namelen); 1019 ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
1020 name, namelen);
1018 1021
1019 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 1022 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
1020 1023
@@ -1327,6 +1330,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
1327 struct ext4_iloc iloc; 1330 struct ext4_iloc iloc;
1328 void *dir_buf = NULL; 1331 void *dir_buf = NULL;
1329 struct ext4_dir_entry_2 fake; 1332 struct ext4_dir_entry_2 fake;
1333 struct ext4_str tmp_str;
1330 1334
1331 ret = ext4_get_inode_loc(inode, &iloc); 1335 ret = ext4_get_inode_loc(inode, &iloc);
1332 if (ret) 1336 if (ret)
@@ -1398,8 +1402,10 @@ int htree_inlinedir_to_tree(struct file *dir_file,
1398 continue; 1402 continue;
1399 if (de->inode == 0) 1403 if (de->inode == 0)
1400 continue; 1404 continue;
1401 err = ext4_htree_store_dirent(dir_file, 1405 tmp_str.name = de->name;
1402 hinfo->hash, hinfo->minor_hash, de); 1406 tmp_str.len = de->name_len;
1407 err = ext4_htree_store_dirent(dir_file, hinfo->hash,
1408 hinfo->minor_hash, de, &tmp_str);
1403 if (err) { 1409 if (err) {
1404 count = err; 1410 count = err;
1405 goto out; 1411 goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cb9a212b86f..366476e71e10 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -20,7 +20,6 @@
20 20
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h>
24#include <linux/highuid.h> 23#include <linux/highuid.h>
25#include <linux/pagemap.h> 24#include <linux/pagemap.h>
26#include <linux/quotaops.h> 25#include <linux/quotaops.h>
@@ -36,8 +35,6 @@
36#include <linux/kernel.h> 35#include <linux/kernel.h>
37#include <linux/printk.h> 36#include <linux/printk.h>
38#include <linux/slab.h> 37#include <linux/slab.h>
39#include <linux/ratelimit.h>
40#include <linux/aio.h>
41#include <linux/bitops.h> 38#include <linux/bitops.h>
42 39
43#include "ext4_jbd2.h" 40#include "ext4_jbd2.h"
@@ -141,7 +138,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
141/* 138/*
142 * Test whether an inode is a fast symlink. 139 * Test whether an inode is a fast symlink.
143 */ 140 */
144static int ext4_inode_is_fast_symlink(struct inode *inode) 141int ext4_inode_is_fast_symlink(struct inode *inode)
145{ 142{
146 int ea_blocks = EXT4_I(inode)->i_file_acl ? 143 int ea_blocks = EXT4_I(inode)->i_file_acl ?
147 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 144 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
@@ -888,6 +885,95 @@ int do_journal_get_write_access(handle_t *handle,
888 885
889static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, 886static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
890 struct buffer_head *bh_result, int create); 887 struct buffer_head *bh_result, int create);
888
889#ifdef CONFIG_EXT4_FS_ENCRYPTION
890static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
891 get_block_t *get_block)
892{
893 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
894 unsigned to = from + len;
895 struct inode *inode = page->mapping->host;
896 unsigned block_start, block_end;
897 sector_t block;
898 int err = 0;
899 unsigned blocksize = inode->i_sb->s_blocksize;
900 unsigned bbits;
901 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
902 bool decrypt = false;
903
904 BUG_ON(!PageLocked(page));
905 BUG_ON(from > PAGE_CACHE_SIZE);
906 BUG_ON(to > PAGE_CACHE_SIZE);
907 BUG_ON(from > to);
908
909 if (!page_has_buffers(page))
910 create_empty_buffers(page, blocksize, 0);
911 head = page_buffers(page);
912 bbits = ilog2(blocksize);
913 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
914
915 for (bh = head, block_start = 0; bh != head || !block_start;
916 block++, block_start = block_end, bh = bh->b_this_page) {
917 block_end = block_start + blocksize;
918 if (block_end <= from || block_start >= to) {
919 if (PageUptodate(page)) {
920 if (!buffer_uptodate(bh))
921 set_buffer_uptodate(bh);
922 }
923 continue;
924 }
925 if (buffer_new(bh))
926 clear_buffer_new(bh);
927 if (!buffer_mapped(bh)) {
928 WARN_ON(bh->b_size != blocksize);
929 err = get_block(inode, block, bh, 1);
930 if (err)
931 break;
932 if (buffer_new(bh)) {
933 unmap_underlying_metadata(bh->b_bdev,
934 bh->b_blocknr);
935 if (PageUptodate(page)) {
936 clear_buffer_new(bh);
937 set_buffer_uptodate(bh);
938 mark_buffer_dirty(bh);
939 continue;
940 }
941 if (block_end > to || block_start < from)
942 zero_user_segments(page, to, block_end,
943 block_start, from);
944 continue;
945 }
946 }
947 if (PageUptodate(page)) {
948 if (!buffer_uptodate(bh))
949 set_buffer_uptodate(bh);
950 continue;
951 }
952 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
953 !buffer_unwritten(bh) &&
954 (block_start < from || block_end > to)) {
955 ll_rw_block(READ, 1, &bh);
956 *wait_bh++ = bh;
957 decrypt = ext4_encrypted_inode(inode) &&
958 S_ISREG(inode->i_mode);
959 }
960 }
961 /*
962 * If we issued read requests, let them complete.
963 */
964 while (wait_bh > wait) {
965 wait_on_buffer(*--wait_bh);
966 if (!buffer_uptodate(*wait_bh))
967 err = -EIO;
968 }
969 if (unlikely(err))
970 page_zero_new_buffers(page, from, to);
971 else if (decrypt)
972 err = ext4_decrypt_one(inode, page);
973 return err;
974}
975#endif
976
891static int ext4_write_begin(struct file *file, struct address_space *mapping, 977static int ext4_write_begin(struct file *file, struct address_space *mapping,
892 loff_t pos, unsigned len, unsigned flags, 978 loff_t pos, unsigned len, unsigned flags,
893 struct page **pagep, void **fsdata) 979 struct page **pagep, void **fsdata)
@@ -950,11 +1036,19 @@ retry_journal:
950 /* In case writeback began while the page was unlocked */ 1036 /* In case writeback began while the page was unlocked */
951 wait_for_stable_page(page); 1037 wait_for_stable_page(page);
952 1038
1039#ifdef CONFIG_EXT4_FS_ENCRYPTION
1040 if (ext4_should_dioread_nolock(inode))
1041 ret = ext4_block_write_begin(page, pos, len,
1042 ext4_get_block_write);
1043 else
1044 ret = ext4_block_write_begin(page, pos, len,
1045 ext4_get_block);
1046#else
953 if (ext4_should_dioread_nolock(inode)) 1047 if (ext4_should_dioread_nolock(inode))
954 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 1048 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
955 else 1049 else
956 ret = __block_write_begin(page, pos, len, ext4_get_block); 1050 ret = __block_write_begin(page, pos, len, ext4_get_block);
957 1051#endif
958 if (!ret && ext4_should_journal_data(inode)) { 1052 if (!ret && ext4_should_journal_data(inode)) {
959 ret = ext4_walk_page_buffers(handle, page_buffers(page), 1053 ret = ext4_walk_page_buffers(handle, page_buffers(page),
960 from, to, NULL, 1054 from, to, NULL,
@@ -2576,7 +2670,12 @@ retry_journal:
2576 /* In case writeback began while the page was unlocked */ 2670 /* In case writeback began while the page was unlocked */
2577 wait_for_stable_page(page); 2671 wait_for_stable_page(page);
2578 2672
2673#ifdef CONFIG_EXT4_FS_ENCRYPTION
2674 ret = ext4_block_write_begin(page, pos, len,
2675 ext4_da_get_block_prep);
2676#else
2579 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2677 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2678#endif
2580 if (ret < 0) { 2679 if (ret < 0) {
2581 unlock_page(page); 2680 unlock_page(page);
2582 ext4_journal_stop(handle); 2681 ext4_journal_stop(handle);
@@ -2822,7 +2921,7 @@ static int ext4_readpage(struct file *file, struct page *page)
2822 ret = ext4_readpage_inline(inode, page); 2921 ret = ext4_readpage_inline(inode, page);
2823 2922
2824 if (ret == -EAGAIN) 2923 if (ret == -EAGAIN)
2825 return mpage_readpage(page, ext4_get_block); 2924 return ext4_mpage_readpages(page->mapping, NULL, page, 1);
2826 2925
2827 return ret; 2926 return ret;
2828} 2927}
@@ -2837,7 +2936,7 @@ ext4_readpages(struct file *file, struct address_space *mapping,
2837 if (ext4_has_inline_data(inode)) 2936 if (ext4_has_inline_data(inode))
2838 return 0; 2937 return 0;
2839 2938
2840 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2939 return ext4_mpage_readpages(mapping, pages, NULL, nr_pages);
2841} 2940}
2842 2941
2843static void ext4_invalidatepage(struct page *page, unsigned int offset, 2942static void ext4_invalidatepage(struct page *page, unsigned int offset,
@@ -2953,8 +3052,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2953 * if the machine crashes during the write. 3052 * if the machine crashes during the write.
2954 * 3053 *
2955 */ 3054 */
2956static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, 3055static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
2957 struct iov_iter *iter, loff_t offset) 3056 loff_t offset)
2958{ 3057{
2959 struct file *file = iocb->ki_filp; 3058 struct file *file = iocb->ki_filp;
2960 struct inode *inode = file->f_mapping->host; 3059 struct inode *inode = file->f_mapping->host;
@@ -2967,8 +3066,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2967 ext4_io_end_t *io_end = NULL; 3066 ext4_io_end_t *io_end = NULL;
2968 3067
2969 /* Use the old path for reads and writes beyond i_size. */ 3068 /* Use the old path for reads and writes beyond i_size. */
2970 if (rw != WRITE || final_size > inode->i_size) 3069 if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
2971 return ext4_ind_direct_IO(rw, iocb, iter, offset); 3070 return ext4_ind_direct_IO(iocb, iter, offset);
2972 3071
2973 BUG_ON(iocb->private == NULL); 3072 BUG_ON(iocb->private == NULL);
2974 3073
@@ -2977,7 +3076,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2977 * conversion. This also disallows race between truncate() and 3076 * conversion. This also disallows race between truncate() and
2978 * overwrite DIO as i_dio_count needs to be incremented under i_mutex. 3077 * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
2979 */ 3078 */
2980 if (rw == WRITE) 3079 if (iov_iter_rw(iter) == WRITE)
2981 atomic_inc(&inode->i_dio_count); 3080 atomic_inc(&inode->i_dio_count);
2982 3081
2983 /* If we do a overwrite dio, i_mutex locking can be released */ 3082 /* If we do a overwrite dio, i_mutex locking can be released */
@@ -3034,11 +3133,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3034 get_block_func = ext4_get_block_write; 3133 get_block_func = ext4_get_block_write;
3035 dio_flags = DIO_LOCKING; 3134 dio_flags = DIO_LOCKING;
3036 } 3135 }
3136#ifdef CONFIG_EXT4_FS_ENCRYPTION
3137 BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
3138#endif
3037 if (IS_DAX(inode)) 3139 if (IS_DAX(inode))
3038 ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, 3140 ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
3039 ext4_end_io_dio, dio_flags); 3141 ext4_end_io_dio, dio_flags);
3040 else 3142 else
3041 ret = __blockdev_direct_IO(rw, iocb, inode, 3143 ret = __blockdev_direct_IO(iocb, inode,
3042 inode->i_sb->s_bdev, iter, offset, 3144 inode->i_sb->s_bdev, iter, offset,
3043 get_block_func, 3145 get_block_func,
3044 ext4_end_io_dio, NULL, dio_flags); 3146 ext4_end_io_dio, NULL, dio_flags);
@@ -3079,7 +3181,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3079 } 3181 }
3080 3182
3081retake_lock: 3183retake_lock:
3082 if (rw == WRITE) 3184 if (iov_iter_rw(iter) == WRITE)
3083 inode_dio_done(inode); 3185 inode_dio_done(inode);
3084 /* take i_mutex locking again if we do a ovewrite dio */ 3186 /* take i_mutex locking again if we do a ovewrite dio */
3085 if (overwrite) { 3187 if (overwrite) {
@@ -3090,14 +3192,19 @@ retake_lock:
3090 return ret; 3192 return ret;
3091} 3193}
3092 3194
3093static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3195static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3094 struct iov_iter *iter, loff_t offset) 3196 loff_t offset)
3095{ 3197{
3096 struct file *file = iocb->ki_filp; 3198 struct file *file = iocb->ki_filp;
3097 struct inode *inode = file->f_mapping->host; 3199 struct inode *inode = file->f_mapping->host;
3098 size_t count = iov_iter_count(iter); 3200 size_t count = iov_iter_count(iter);
3099 ssize_t ret; 3201 ssize_t ret;
3100 3202
3203#ifdef CONFIG_EXT4_FS_ENCRYPTION
3204 if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
3205 return 0;
3206#endif
3207
3101 /* 3208 /*
3102 * If we are doing data journalling we don't support O_DIRECT 3209 * If we are doing data journalling we don't support O_DIRECT
3103 */ 3210 */
@@ -3108,12 +3215,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3108 if (ext4_has_inline_data(inode)) 3215 if (ext4_has_inline_data(inode))
3109 return 0; 3216 return 0;
3110 3217
3111 trace_ext4_direct_IO_enter(inode, offset, count, rw); 3218 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3112 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3219 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3113 ret = ext4_ext_direct_IO(rw, iocb, iter, offset); 3220 ret = ext4_ext_direct_IO(iocb, iter, offset);
3114 else 3221 else
3115 ret = ext4_ind_direct_IO(rw, iocb, iter, offset); 3222 ret = ext4_ind_direct_IO(iocb, iter, offset);
3116 trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); 3223 trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
3117 return ret; 3224 return ret;
3118} 3225}
3119 3226
@@ -3262,6 +3369,13 @@ static int __ext4_block_zero_page_range(handle_t *handle,
3262 /* Uhhuh. Read error. Complain and punt. */ 3369 /* Uhhuh. Read error. Complain and punt. */
3263 if (!buffer_uptodate(bh)) 3370 if (!buffer_uptodate(bh))
3264 goto unlock; 3371 goto unlock;
3372 if (S_ISREG(inode->i_mode) &&
3373 ext4_encrypted_inode(inode)) {
3374 /* We expect the key to be set. */
3375 BUG_ON(!ext4_has_encryption_key(inode));
3376 BUG_ON(blocksize != PAGE_CACHE_SIZE);
3377 WARN_ON_ONCE(ext4_decrypt_one(inode, page));
3378 }
3265 } 3379 }
3266 if (ext4_should_journal_data(inode)) { 3380 if (ext4_should_journal_data(inode)) {
3267 BUFFER_TRACE(bh, "get write access"); 3381 BUFFER_TRACE(bh, "get write access");
@@ -4091,16 +4205,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4091 4205
4092 if (S_ISREG(inode->i_mode)) { 4206 if (S_ISREG(inode->i_mode)) {
4093 inode->i_op = &ext4_file_inode_operations; 4207 inode->i_op = &ext4_file_inode_operations;
4094 if (test_opt(inode->i_sb, DAX)) 4208 inode->i_fop = &ext4_file_operations;
4095 inode->i_fop = &ext4_dax_file_operations;
4096 else
4097 inode->i_fop = &ext4_file_operations;
4098 ext4_set_aops(inode); 4209 ext4_set_aops(inode);
4099 } else if (S_ISDIR(inode->i_mode)) { 4210 } else if (S_ISDIR(inode->i_mode)) {
4100 inode->i_op = &ext4_dir_inode_operations; 4211 inode->i_op = &ext4_dir_inode_operations;
4101 inode->i_fop = &ext4_dir_operations; 4212 inode->i_fop = &ext4_dir_operations;
4102 } else if (S_ISLNK(inode->i_mode)) { 4213 } else if (S_ISLNK(inode->i_mode)) {
4103 if (ext4_inode_is_fast_symlink(inode)) { 4214 if (ext4_inode_is_fast_symlink(inode) &&
4215 !ext4_encrypted_inode(inode)) {
4104 inode->i_op = &ext4_fast_symlink_inode_operations; 4216 inode->i_op = &ext4_fast_symlink_inode_operations;
4105 nd_terminate_link(ei->i_data, inode->i_size, 4217 nd_terminate_link(ei->i_data, inode->i_size,
4106 sizeof(ei->i_data) - 1); 4218 sizeof(ei->i_data) - 1);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f58a0d106726..2cb9e178d1c5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -8,12 +8,12 @@
8 */ 8 */
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h>
12#include <linux/capability.h> 11#include <linux/capability.h>
13#include <linux/time.h> 12#include <linux/time.h>
14#include <linux/compat.h> 13#include <linux/compat.h>
15#include <linux/mount.h> 14#include <linux/mount.h>
16#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/random.h>
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
@@ -196,6 +196,16 @@ journal_err_out:
196 return err; 196 return err;
197} 197}
198 198
199static int uuid_is_zero(__u8 u[16])
200{
201 int i;
202
203 for (i = 0; i < 16; i++)
204 if (u[i])
205 return 0;
206 return 1;
207}
208
199long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 209long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
200{ 210{
201 struct inode *inode = file_inode(filp); 211 struct inode *inode = file_inode(filp);
@@ -615,7 +625,78 @@ resizefs_out:
615 } 625 }
616 case EXT4_IOC_PRECACHE_EXTENTS: 626 case EXT4_IOC_PRECACHE_EXTENTS:
617 return ext4_ext_precache(inode); 627 return ext4_ext_precache(inode);
628 case EXT4_IOC_SET_ENCRYPTION_POLICY: {
629#ifdef CONFIG_EXT4_FS_ENCRYPTION
630 struct ext4_encryption_policy policy;
631 int err = 0;
632
633 if (copy_from_user(&policy,
634 (struct ext4_encryption_policy __user *)arg,
635 sizeof(policy))) {
636 err = -EFAULT;
637 goto encryption_policy_out;
638 }
618 639
640 err = ext4_process_policy(&policy, inode);
641encryption_policy_out:
642 return err;
643#else
644 return -EOPNOTSUPP;
645#endif
646 }
647 case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
648 int err, err2;
649 struct ext4_sb_info *sbi = EXT4_SB(sb);
650 handle_t *handle;
651
652 if (!ext4_sb_has_crypto(sb))
653 return -EOPNOTSUPP;
654 if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) {
655 err = mnt_want_write_file(filp);
656 if (err)
657 return err;
658 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
659 if (IS_ERR(handle)) {
660 err = PTR_ERR(handle);
661 goto pwsalt_err_exit;
662 }
663 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
664 if (err)
665 goto pwsalt_err_journal;
666 generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
667 err = ext4_handle_dirty_metadata(handle, NULL,
668 sbi->s_sbh);
669 pwsalt_err_journal:
670 err2 = ext4_journal_stop(handle);
671 if (err2 && !err)
672 err = err2;
673 pwsalt_err_exit:
674 mnt_drop_write_file(filp);
675 if (err)
676 return err;
677 }
678 if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
679 16))
680 return -EFAULT;
681 return 0;
682 }
683 case EXT4_IOC_GET_ENCRYPTION_POLICY: {
684#ifdef CONFIG_EXT4_FS_ENCRYPTION
685 struct ext4_encryption_policy policy;
686 int err = 0;
687
688 if (!ext4_encrypted_inode(inode))
689 return -ENOENT;
690 err = ext4_get_policy(inode, &policy);
691 if (err)
692 return err;
693 if (copy_to_user((void *)arg, &policy, sizeof(policy)))
694 return -EFAULT;
695 return 0;
696#else
697 return -EOPNOTSUPP;
698#endif
699 }
619 default: 700 default:
620 return -ENOTTY; 701 return -ENOTTY;
621 } 702 }
@@ -680,6 +761,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
680 case FITRIM: 761 case FITRIM:
681 case EXT4_IOC_RESIZE_FS: 762 case EXT4_IOC_RESIZE_FS:
682 case EXT4_IOC_PRECACHE_EXTENTS: 763 case EXT4_IOC_PRECACHE_EXTENTS:
764 case EXT4_IOC_SET_ENCRYPTION_POLICY:
765 case EXT4_IOC_GET_ENCRYPTION_PWSALT:
766 case EXT4_IOC_GET_ENCRYPTION_POLICY:
683 break; 767 break;
684 default: 768 default:
685 return -ENOIOCTLCMD; 769 return -ENOIOCTLCMD;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28fe71a2904c..ef22cd951c0c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h>
30#include <linux/time.h> 29#include <linux/time.h>
31#include <linux/fcntl.h> 30#include <linux/fcntl.h>
32#include <linux/stat.h> 31#include <linux/stat.h>
@@ -254,8 +253,9 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
254 struct dx_hash_info *hinfo, 253 struct dx_hash_info *hinfo,
255 struct dx_frame *frame); 254 struct dx_frame *frame);
256static void dx_release(struct dx_frame *frames); 255static void dx_release(struct dx_frame *frames);
257static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, 256static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
258 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 257 unsigned blocksize, struct dx_hash_info *hinfo,
258 struct dx_map_entry map[]);
259static void dx_sort_map(struct dx_map_entry *map, unsigned count); 259static void dx_sort_map(struct dx_map_entry *map, unsigned count);
260static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, 260static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
261 struct dx_map_entry *offsets, int count, unsigned blocksize); 261 struct dx_map_entry *offsets, int count, unsigned blocksize);
@@ -586,8 +586,10 @@ struct stats
586 unsigned bcount; 586 unsigned bcount;
587}; 587};
588 588
589static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de, 589static struct stats dx_show_leaf(struct inode *dir,
590 int size, int show_names) 590 struct dx_hash_info *hinfo,
591 struct ext4_dir_entry_2 *de,
592 int size, int show_names)
591{ 593{
592 unsigned names = 0, space = 0; 594 unsigned names = 0, space = 0;
593 char *base = (char *) de; 595 char *base = (char *) de;
@@ -600,12 +602,80 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
600 { 602 {
601 if (show_names) 603 if (show_names)
602 { 604 {
605#ifdef CONFIG_EXT4_FS_ENCRYPTION
606 int len;
607 char *name;
608 struct ext4_str fname_crypto_str
609 = {.name = NULL, .len = 0};
610 struct ext4_fname_crypto_ctx *ctx = NULL;
611 int res;
612
613 name = de->name;
614 len = de->name_len;
615 ctx = ext4_get_fname_crypto_ctx(dir,
616 EXT4_NAME_LEN);
617 if (IS_ERR(ctx)) {
618 printk(KERN_WARNING "Error acquiring"
619 " crypto ctxt--skipping crypto\n");
620 ctx = NULL;
621 }
622 if (ctx == NULL) {
623 /* Directory is not encrypted */
624 ext4fs_dirhash(de->name,
625 de->name_len, &h);
626 printk("%*.s:(U)%x.%u ", len,
627 name, h.hash,
628 (unsigned) ((char *) de
629 - base));
630 } else {
631 /* Directory is encrypted */
632 res = ext4_fname_crypto_alloc_buffer(
633 ctx, de->name_len,
634 &fname_crypto_str);
635 if (res < 0) {
636 printk(KERN_WARNING "Error "
637 "allocating crypto "
638 "buffer--skipping "
639 "crypto\n");
640 ext4_put_fname_crypto_ctx(&ctx);
641 ctx = NULL;
642 }
643 res = ext4_fname_disk_to_usr(ctx, de,
644 &fname_crypto_str);
645 if (res < 0) {
646 printk(KERN_WARNING "Error "
647 "converting filename "
648 "from disk to usr"
649 "\n");
650 name = "??";
651 len = 2;
652 } else {
653 name = fname_crypto_str.name;
654 len = fname_crypto_str.len;
655 }
656 res = ext4_fname_disk_to_hash(ctx, de,
657 &h);
658 if (res < 0) {
659 printk(KERN_WARNING "Error "
660 "converting filename "
661 "from disk to htree"
662 "\n");
663 h.hash = 0xDEADBEEF;
664 }
665 printk("%*.s:(E)%x.%u ", len, name,
666 h.hash, (unsigned) ((char *) de
667 - base));
668 ext4_put_fname_crypto_ctx(&ctx);
669 ext4_fname_crypto_free_buffer(
670 &fname_crypto_str);
671 }
672#else
603 int len = de->name_len; 673 int len = de->name_len;
604 char *name = de->name; 674 char *name = de->name;
605 while (len--) printk("%c", *name++);
606 ext4fs_dirhash(de->name, de->name_len, &h); 675 ext4fs_dirhash(de->name, de->name_len, &h);
607 printk(":%x.%u ", h.hash, 676 printk("%*.s:%x.%u ", len, name, h.hash,
608 (unsigned) ((char *) de - base)); 677 (unsigned) ((char *) de - base));
678#endif
609 } 679 }
610 space += EXT4_DIR_REC_LEN(de->name_len); 680 space += EXT4_DIR_REC_LEN(de->name_len);
611 names++; 681 names++;
@@ -623,7 +693,6 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
623 unsigned count = dx_get_count(entries), names = 0, space = 0, i; 693 unsigned count = dx_get_count(entries), names = 0, space = 0, i;
624 unsigned bcount = 0; 694 unsigned bcount = 0;
625 struct buffer_head *bh; 695 struct buffer_head *bh;
626 int err;
627 printk("%i indexed blocks...\n", count); 696 printk("%i indexed blocks...\n", count);
628 for (i = 0; i < count; i++, entries++) 697 for (i = 0; i < count; i++, entries++)
629 { 698 {
@@ -637,7 +706,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
637 continue; 706 continue;
638 stats = levels? 707 stats = levels?
639 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): 708 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
640 dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); 709 dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
710 bh->b_data, blocksize, 0);
641 names += stats.names; 711 names += stats.names;
642 space += stats.space; 712 space += stats.space;
643 bcount += stats.bcount; 713 bcount += stats.bcount;
@@ -687,8 +757,28 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
687 if (hinfo->hash_version <= DX_HASH_TEA) 757 if (hinfo->hash_version <= DX_HASH_TEA)
688 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; 758 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
689 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; 759 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
760#ifdef CONFIG_EXT4_FS_ENCRYPTION
761 if (d_name) {
762 struct ext4_fname_crypto_ctx *ctx = NULL;
763 int res;
764
765 /* Check if the directory is encrypted */
766 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
767 if (IS_ERR(ctx)) {
768 ret_err = ERR_PTR(PTR_ERR(ctx));
769 goto fail;
770 }
771 res = ext4_fname_usr_to_hash(ctx, d_name, hinfo);
772 if (res < 0) {
773 ret_err = ERR_PTR(res);
774 goto fail;
775 }
776 ext4_put_fname_crypto_ctx(&ctx);
777 }
778#else
690 if (d_name) 779 if (d_name)
691 ext4fs_dirhash(d_name->name, d_name->len, hinfo); 780 ext4fs_dirhash(d_name->name, d_name->len, hinfo);
781#endif
692 hash = hinfo->hash; 782 hash = hinfo->hash;
693 783
694 if (root->info.unused_flags & 1) { 784 if (root->info.unused_flags & 1) {
@@ -773,6 +863,7 @@ fail:
773 brelse(frame->bh); 863 brelse(frame->bh);
774 frame--; 864 frame--;
775 } 865 }
866
776 if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) 867 if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
777 ext4_warning(dir->i_sb, 868 ext4_warning(dir->i_sb,
778 "Corrupt dir inode %lu, running e2fsck is " 869 "Corrupt dir inode %lu, running e2fsck is "
@@ -878,6 +969,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
878 struct buffer_head *bh; 969 struct buffer_head *bh;
879 struct ext4_dir_entry_2 *de, *top; 970 struct ext4_dir_entry_2 *de, *top;
880 int err = 0, count = 0; 971 int err = 0, count = 0;
972 struct ext4_fname_crypto_ctx *ctx = NULL;
973 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str;
881 974
882 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", 975 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
883 (unsigned long)block)); 976 (unsigned long)block));
@@ -889,6 +982,24 @@ static int htree_dirblock_to_tree(struct file *dir_file,
889 top = (struct ext4_dir_entry_2 *) ((char *) de + 982 top = (struct ext4_dir_entry_2 *) ((char *) de +
890 dir->i_sb->s_blocksize - 983 dir->i_sb->s_blocksize -
891 EXT4_DIR_REC_LEN(0)); 984 EXT4_DIR_REC_LEN(0));
985#ifdef CONFIG_EXT4_FS_ENCRYPTION
986 /* Check if the directory is encrypted */
987 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
988 if (IS_ERR(ctx)) {
989 err = PTR_ERR(ctx);
990 brelse(bh);
991 return err;
992 }
993 if (ctx != NULL) {
994 err = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
995 &fname_crypto_str);
996 if (err < 0) {
997 ext4_put_fname_crypto_ctx(&ctx);
998 brelse(bh);
999 return err;
1000 }
1001 }
1002#endif
892 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { 1003 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
893 if (ext4_check_dir_entry(dir, NULL, de, bh, 1004 if (ext4_check_dir_entry(dir, NULL, de, bh,
894 bh->b_data, bh->b_size, 1005 bh->b_data, bh->b_size,
@@ -897,21 +1008,52 @@ static int htree_dirblock_to_tree(struct file *dir_file,
897 /* silently ignore the rest of the block */ 1008 /* silently ignore the rest of the block */
898 break; 1009 break;
899 } 1010 }
1011#ifdef CONFIG_EXT4_FS_ENCRYPTION
1012 err = ext4_fname_disk_to_hash(ctx, de, hinfo);
1013 if (err < 0) {
1014 count = err;
1015 goto errout;
1016 }
1017#else
900 ext4fs_dirhash(de->name, de->name_len, hinfo); 1018 ext4fs_dirhash(de->name, de->name_len, hinfo);
1019#endif
901 if ((hinfo->hash < start_hash) || 1020 if ((hinfo->hash < start_hash) ||
902 ((hinfo->hash == start_hash) && 1021 ((hinfo->hash == start_hash) &&
903 (hinfo->minor_hash < start_minor_hash))) 1022 (hinfo->minor_hash < start_minor_hash)))
904 continue; 1023 continue;
905 if (de->inode == 0) 1024 if (de->inode == 0)
906 continue; 1025 continue;
907 if ((err = ext4_htree_store_dirent(dir_file, 1026 if (ctx == NULL) {
908 hinfo->hash, hinfo->minor_hash, de)) != 0) { 1027 /* Directory is not encrypted */
909 brelse(bh); 1028 tmp_str.name = de->name;
910 return err; 1029 tmp_str.len = de->name_len;
1030 err = ext4_htree_store_dirent(dir_file,
1031 hinfo->hash, hinfo->minor_hash, de,
1032 &tmp_str);
1033 } else {
1034 /* Directory is encrypted */
1035 err = ext4_fname_disk_to_usr(ctx, de,
1036 &fname_crypto_str);
1037 if (err < 0) {
1038 count = err;
1039 goto errout;
1040 }
1041 err = ext4_htree_store_dirent(dir_file,
1042 hinfo->hash, hinfo->minor_hash, de,
1043 &fname_crypto_str);
1044 }
1045 if (err != 0) {
1046 count = err;
1047 goto errout;
911 } 1048 }
912 count++; 1049 count++;
913 } 1050 }
1051errout:
914 brelse(bh); 1052 brelse(bh);
1053#ifdef CONFIG_EXT4_FS_ENCRYPTION
1054 ext4_put_fname_crypto_ctx(&ctx);
1055 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1056#endif
915 return count; 1057 return count;
916} 1058}
917 1059
@@ -935,6 +1077,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
935 int count = 0; 1077 int count = 0;
936 int ret, err; 1078 int ret, err;
937 __u32 hashval; 1079 __u32 hashval;
1080 struct ext4_str tmp_str;
938 1081
939 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 1082 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
940 start_hash, start_minor_hash)); 1083 start_hash, start_minor_hash));
@@ -970,14 +1113,22 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
970 /* Add '.' and '..' from the htree header */ 1113 /* Add '.' and '..' from the htree header */
971 if (!start_hash && !start_minor_hash) { 1114 if (!start_hash && !start_minor_hash) {
972 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; 1115 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
973 if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0) 1116 tmp_str.name = de->name;
1117 tmp_str.len = de->name_len;
1118 err = ext4_htree_store_dirent(dir_file, 0, 0,
1119 de, &tmp_str);
1120 if (err != 0)
974 goto errout; 1121 goto errout;
975 count++; 1122 count++;
976 } 1123 }
977 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { 1124 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
978 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; 1125 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
979 de = ext4_next_entry(de, dir->i_sb->s_blocksize); 1126 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
980 if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) 1127 tmp_str.name = de->name;
1128 tmp_str.len = de->name_len;
1129 err = ext4_htree_store_dirent(dir_file, 2, 0,
1130 de, &tmp_str);
1131 if (err != 0)
981 goto errout; 1132 goto errout;
982 count++; 1133 count++;
983 } 1134 }
@@ -1035,17 +1186,33 @@ static inline int search_dirblock(struct buffer_head *bh,
1035 * Create map of hash values, offsets, and sizes, stored at end of block. 1186 * Create map of hash values, offsets, and sizes, stored at end of block.
1036 * Returns number of entries mapped. 1187 * Returns number of entries mapped.
1037 */ 1188 */
1038static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, 1189static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
1039 struct dx_hash_info *hinfo, 1190 unsigned blocksize, struct dx_hash_info *hinfo,
1040 struct dx_map_entry *map_tail) 1191 struct dx_map_entry *map_tail)
1041{ 1192{
1042 int count = 0; 1193 int count = 0;
1043 char *base = (char *) de; 1194 char *base = (char *) de;
1044 struct dx_hash_info h = *hinfo; 1195 struct dx_hash_info h = *hinfo;
1196#ifdef CONFIG_EXT4_FS_ENCRYPTION
1197 struct ext4_fname_crypto_ctx *ctx = NULL;
1198 int err;
1199
1200 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1201 if (IS_ERR(ctx))
1202 return PTR_ERR(ctx);
1203#endif
1045 1204
1046 while ((char *) de < base + blocksize) { 1205 while ((char *) de < base + blocksize) {
1047 if (de->name_len && de->inode) { 1206 if (de->name_len && de->inode) {
1207#ifdef CONFIG_EXT4_FS_ENCRYPTION
1208 err = ext4_fname_disk_to_hash(ctx, de, &h);
1209 if (err < 0) {
1210 ext4_put_fname_crypto_ctx(&ctx);
1211 return err;
1212 }
1213#else
1048 ext4fs_dirhash(de->name, de->name_len, &h); 1214 ext4fs_dirhash(de->name, de->name_len, &h);
1215#endif
1049 map_tail--; 1216 map_tail--;
1050 map_tail->hash = h.hash; 1217 map_tail->hash = h.hash;
1051 map_tail->offs = ((char *) de - base)>>2; 1218 map_tail->offs = ((char *) de - base)>>2;
@@ -1056,6 +1223,9 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
1056 /* XXX: do we need to check rec_len == 0 case? -Chris */ 1223 /* XXX: do we need to check rec_len == 0 case? -Chris */
1057 de = ext4_next_entry(de, blocksize); 1224 de = ext4_next_entry(de, blocksize);
1058 } 1225 }
1226#ifdef CONFIG_EXT4_FS_ENCRYPTION
1227 ext4_put_fname_crypto_ctx(&ctx);
1228#endif
1059 return count; 1229 return count;
1060} 1230}
1061 1231
@@ -1106,57 +1276,107 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1106 * `len <= EXT4_NAME_LEN' is guaranteed by caller. 1276 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1107 * `de != NULL' is guaranteed by caller. 1277 * `de != NULL' is guaranteed by caller.
1108 */ 1278 */
1109static inline int ext4_match (int len, const char * const name, 1279static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
1110 struct ext4_dir_entry_2 * de) 1280 struct ext4_str *fname_crypto_str,
1281 int len, const char * const name,
1282 struct ext4_dir_entry_2 *de)
1111{ 1283{
1112 if (len != de->name_len) 1284 int res;
1113 return 0; 1285
1114 if (!de->inode) 1286 if (!de->inode)
1115 return 0; 1287 return 0;
1116 return !memcmp(name, de->name, len); 1288
1289#ifdef CONFIG_EXT4_FS_ENCRYPTION
1290 if (ctx) {
1291 /* Directory is encrypted */
1292 res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str);
1293 if (res < 0)
1294 return res;
1295 if (len != res)
1296 return 0;
1297 res = memcmp(name, fname_crypto_str->name, len);
1298 return (res == 0) ? 1 : 0;
1299 }
1300#endif
1301 if (len != de->name_len)
1302 return 0;
1303 res = memcmp(name, de->name, len);
1304 return (res == 0) ? 1 : 0;
1117} 1305}
1118 1306
1119/* 1307/*
1120 * Returns 0 if not found, -1 on failure, and 1 on success 1308 * Returns 0 if not found, -1 on failure, and 1 on success
1121 */ 1309 */
1122int search_dir(struct buffer_head *bh, 1310int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1123 char *search_buf, 1311 struct inode *dir, const struct qstr *d_name,
1124 int buf_size, 1312 unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1125 struct inode *dir,
1126 const struct qstr *d_name,
1127 unsigned int offset,
1128 struct ext4_dir_entry_2 **res_dir)
1129{ 1313{
1130 struct ext4_dir_entry_2 * de; 1314 struct ext4_dir_entry_2 * de;
1131 char * dlimit; 1315 char * dlimit;
1132 int de_len; 1316 int de_len;
1133 const char *name = d_name->name; 1317 const char *name = d_name->name;
1134 int namelen = d_name->len; 1318 int namelen = d_name->len;
1319 struct ext4_fname_crypto_ctx *ctx = NULL;
1320 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
1321 int res;
1322
1323 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1324 if (IS_ERR(ctx))
1325 return -1;
1326
1327 if (ctx != NULL) {
1328 /* Allocate buffer to hold maximum name length */
1329 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1330 &fname_crypto_str);
1331 if (res < 0) {
1332 ext4_put_fname_crypto_ctx(&ctx);
1333 return -1;
1334 }
1335 }
1135 1336
1136 de = (struct ext4_dir_entry_2 *)search_buf; 1337 de = (struct ext4_dir_entry_2 *)search_buf;
1137 dlimit = search_buf + buf_size; 1338 dlimit = search_buf + buf_size;
1138 while ((char *) de < dlimit) { 1339 while ((char *) de < dlimit) {
1139 /* this code is executed quadratically often */ 1340 /* this code is executed quadratically often */
1140 /* do minimal checking `by hand' */ 1341 /* do minimal checking `by hand' */
1342 if ((char *) de + de->name_len <= dlimit) {
1343 res = ext4_match(ctx, &fname_crypto_str, namelen,
1344 name, de);
1345 if (res < 0) {
1346 res = -1;
1347 goto return_result;
1348 }
1349 if (res > 0) {
1350 /* found a match - just to be sure, do
1351 * a full check */
1352 if (ext4_check_dir_entry(dir, NULL, de, bh,
1353 bh->b_data,
1354 bh->b_size, offset)) {
1355 res = -1;
1356 goto return_result;
1357 }
1358 *res_dir = de;
1359 res = 1;
1360 goto return_result;
1361 }
1141 1362
1142 if ((char *) de + namelen <= dlimit &&
1143 ext4_match (namelen, name, de)) {
1144 /* found a match - just to be sure, do a full check */
1145 if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
1146 bh->b_size, offset))
1147 return -1;
1148 *res_dir = de;
1149 return 1;
1150 } 1363 }
1151 /* prevent looping on a bad block */ 1364 /* prevent looping on a bad block */
1152 de_len = ext4_rec_len_from_disk(de->rec_len, 1365 de_len = ext4_rec_len_from_disk(de->rec_len,
1153 dir->i_sb->s_blocksize); 1366 dir->i_sb->s_blocksize);
1154 if (de_len <= 0) 1367 if (de_len <= 0) {
1155 return -1; 1368 res = -1;
1369 goto return_result;
1370 }
1156 offset += de_len; 1371 offset += de_len;
1157 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 1372 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1158 } 1373 }
1159 return 0; 1374
1375 res = 0;
1376return_result:
1377 ext4_put_fname_crypto_ctx(&ctx);
1378 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1379 return res;
1160} 1380}
1161 1381
1162static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, 1382static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
@@ -1345,6 +1565,9 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1345 ext4_lblk_t block; 1565 ext4_lblk_t block;
1346 int retval; 1566 int retval;
1347 1567
1568#ifdef CONFIG_EXT4_FS_ENCRYPTION
1569 *res_dir = NULL;
1570#endif
1348 frame = dx_probe(d_name, dir, &hinfo, frames); 1571 frame = dx_probe(d_name, dir, &hinfo, frames);
1349 if (IS_ERR(frame)) 1572 if (IS_ERR(frame))
1350 return (struct buffer_head *) frame; 1573 return (struct buffer_head *) frame;
@@ -1417,6 +1640,18 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1417 ino); 1640 ino);
1418 return ERR_PTR(-EIO); 1641 return ERR_PTR(-EIO);
1419 } 1642 }
1643 if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
1644 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1645 S_ISLNK(inode->i_mode)) &&
1646 !ext4_is_child_context_consistent_with_parent(dir,
1647 inode)) {
1648 iput(inode);
1649 ext4_warning(inode->i_sb,
1650 "Inconsistent encryption contexts: %lu/%lu\n",
1651 (unsigned long) dir->i_ino,
1652 (unsigned long) inode->i_ino);
1653 return ERR_PTR(-EPERM);
1654 }
1420 } 1655 }
1421 return d_splice_alias(inode, dentry); 1656 return d_splice_alias(inode, dentry);
1422} 1657}
@@ -1541,7 +1776,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1541 1776
1542 /* create map in the end of data2 block */ 1777 /* create map in the end of data2 block */
1543 map = (struct dx_map_entry *) (data2 + blocksize); 1778 map = (struct dx_map_entry *) (data2 + blocksize);
1544 count = dx_make_map((struct ext4_dir_entry_2 *) data1, 1779 count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1,
1545 blocksize, hinfo, map); 1780 blocksize, hinfo, map);
1546 map -= count; 1781 map -= count;
1547 dx_sort_map(map, count); 1782 dx_sort_map(map, count);
@@ -1564,7 +1799,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1564 hash2, split, count-split)); 1799 hash2, split, count-split));
1565 1800
1566 /* Fancy dance to stay within two buffers */ 1801 /* Fancy dance to stay within two buffers */
1567 de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); 1802 de2 = dx_move_dirents(data1, data2, map + split, count - split,
1803 blocksize);
1568 de = dx_pack_dirents(data1, blocksize); 1804 de = dx_pack_dirents(data1, blocksize);
1569 de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - 1805 de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1570 (char *) de, 1806 (char *) de,
@@ -1580,8 +1816,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1580 initialize_dirent_tail(t, blocksize); 1816 initialize_dirent_tail(t, blocksize);
1581 } 1817 }
1582 1818
1583 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); 1819 dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
1584 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); 1820 blocksize, 1));
1821 dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
1822 blocksize, 1));
1585 1823
1586 /* Which block gets the new entry? */ 1824 /* Which block gets the new entry? */
1587 if (hinfo->hash >= hash2) { 1825 if (hinfo->hash >= hash2) {
@@ -1618,15 +1856,48 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1618 int nlen, rlen; 1856 int nlen, rlen;
1619 unsigned int offset = 0; 1857 unsigned int offset = 0;
1620 char *top; 1858 char *top;
1859 struct ext4_fname_crypto_ctx *ctx = NULL;
1860 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
1861 int res;
1862
1863 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1864 if (IS_ERR(ctx))
1865 return -1;
1866
1867 if (ctx != NULL) {
1868 /* Calculate record length needed to store the entry */
1869 res = ext4_fname_crypto_namelen_on_disk(ctx, namelen);
1870 if (res < 0) {
1871 ext4_put_fname_crypto_ctx(&ctx);
1872 return res;
1873 }
1874 reclen = EXT4_DIR_REC_LEN(res);
1875
1876 /* Allocate buffer to hold maximum name length */
1877 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1878 &fname_crypto_str);
1879 if (res < 0) {
1880 ext4_put_fname_crypto_ctx(&ctx);
1881 return -1;
1882 }
1883 }
1621 1884
1622 de = (struct ext4_dir_entry_2 *)buf; 1885 de = (struct ext4_dir_entry_2 *)buf;
1623 top = buf + buf_size - reclen; 1886 top = buf + buf_size - reclen;
1624 while ((char *) de <= top) { 1887 while ((char *) de <= top) {
1625 if (ext4_check_dir_entry(dir, NULL, de, bh, 1888 if (ext4_check_dir_entry(dir, NULL, de, bh,
1626 buf, buf_size, offset)) 1889 buf, buf_size, offset)) {
1627 return -EIO; 1890 res = -EIO;
1628 if (ext4_match(namelen, name, de)) 1891 goto return_result;
1629 return -EEXIST; 1892 }
1893 /* Provide crypto context and crypto buffer to ext4 match */
1894 res = ext4_match(ctx, &fname_crypto_str, namelen, name, de);
1895 if (res < 0)
1896 goto return_result;
1897 if (res > 0) {
1898 res = -EEXIST;
1899 goto return_result;
1900 }
1630 nlen = EXT4_DIR_REC_LEN(de->name_len); 1901 nlen = EXT4_DIR_REC_LEN(de->name_len);
1631 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); 1902 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1632 if ((de->inode ? rlen - nlen : rlen) >= reclen) 1903 if ((de->inode ? rlen - nlen : rlen) >= reclen)
@@ -1634,26 +1905,62 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1634 de = (struct ext4_dir_entry_2 *)((char *)de + rlen); 1905 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1635 offset += rlen; 1906 offset += rlen;
1636 } 1907 }
1637 if ((char *) de > top)
1638 return -ENOSPC;
1639 1908
1640 *dest_de = de; 1909 if ((char *) de > top)
1641 return 0; 1910 res = -ENOSPC;
1911 else {
1912 *dest_de = de;
1913 res = 0;
1914 }
1915return_result:
1916 ext4_put_fname_crypto_ctx(&ctx);
1917 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1918 return res;
1642} 1919}
1643 1920
1644void ext4_insert_dentry(struct inode *inode, 1921int ext4_insert_dentry(struct inode *dir,
1645 struct ext4_dir_entry_2 *de, 1922 struct inode *inode,
1646 int buf_size, 1923 struct ext4_dir_entry_2 *de,
1647 const char *name, int namelen) 1924 int buf_size,
1925 const struct qstr *iname,
1926 const char *name, int namelen)
1648{ 1927{
1649 1928
1650 int nlen, rlen; 1929 int nlen, rlen;
1930 struct ext4_fname_crypto_ctx *ctx = NULL;
1931 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
1932 struct ext4_str tmp_str;
1933 int res;
1934
1935 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1936 if (IS_ERR(ctx))
1937 return -EIO;
1938 /* By default, the input name would be written to the disk */
1939 tmp_str.name = (unsigned char *)name;
1940 tmp_str.len = namelen;
1941 if (ctx != NULL) {
1942 /* Directory is encrypted */
1943 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1944 &fname_crypto_str);
1945 if (res < 0) {
1946 ext4_put_fname_crypto_ctx(&ctx);
1947 return -ENOMEM;
1948 }
1949 res = ext4_fname_usr_to_disk(ctx, iname, &fname_crypto_str);
1950 if (res < 0) {
1951 ext4_put_fname_crypto_ctx(&ctx);
1952 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1953 return res;
1954 }
1955 tmp_str.name = fname_crypto_str.name;
1956 tmp_str.len = fname_crypto_str.len;
1957 }
1651 1958
1652 nlen = EXT4_DIR_REC_LEN(de->name_len); 1959 nlen = EXT4_DIR_REC_LEN(de->name_len);
1653 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); 1960 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1654 if (de->inode) { 1961 if (de->inode) {
1655 struct ext4_dir_entry_2 *de1 = 1962 struct ext4_dir_entry_2 *de1 =
1656 (struct ext4_dir_entry_2 *)((char *)de + nlen); 1963 (struct ext4_dir_entry_2 *)((char *)de + nlen);
1657 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size); 1964 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
1658 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size); 1965 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
1659 de = de1; 1966 de = de1;
@@ -1661,9 +1968,14 @@ void ext4_insert_dentry(struct inode *inode,
1661 de->file_type = EXT4_FT_UNKNOWN; 1968 de->file_type = EXT4_FT_UNKNOWN;
1662 de->inode = cpu_to_le32(inode->i_ino); 1969 de->inode = cpu_to_le32(inode->i_ino);
1663 ext4_set_de_type(inode->i_sb, de, inode->i_mode); 1970 ext4_set_de_type(inode->i_sb, de, inode->i_mode);
1664 de->name_len = namelen; 1971 de->name_len = tmp_str.len;
1665 memcpy(de->name, name, namelen); 1972
1973 memcpy(de->name, tmp_str.name, tmp_str.len);
1974 ext4_put_fname_crypto_ctx(&ctx);
1975 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1976 return 0;
1666} 1977}
1978
1667/* 1979/*
1668 * Add a new entry into a directory (leaf) block. If de is non-NULL, 1980 * Add a new entry into a directory (leaf) block. If de is non-NULL,
1669 * it points to a directory entry which is guaranteed to be large 1981 * it points to a directory entry which is guaranteed to be large
@@ -1700,8 +2012,12 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1700 return err; 2012 return err;
1701 } 2013 }
1702 2014
1703 /* By now the buffer is marked for journaling */ 2015 /* By now the buffer is marked for journaling. Due to crypto operations,
1704 ext4_insert_dentry(inode, de, blocksize, name, namelen); 2016 * the following function call may fail */
2017 err = ext4_insert_dentry(dir, inode, de, blocksize, &dentry->d_name,
2018 name, namelen);
2019 if (err < 0)
2020 return err;
1705 2021
1706 /* 2022 /*
1707 * XXX shouldn't update any times until successful 2023 * XXX shouldn't update any times until successful
@@ -1733,8 +2049,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1733 struct inode *inode, struct buffer_head *bh) 2049 struct inode *inode, struct buffer_head *bh)
1734{ 2050{
1735 struct inode *dir = dentry->d_parent->d_inode; 2051 struct inode *dir = dentry->d_parent->d_inode;
2052#ifdef CONFIG_EXT4_FS_ENCRYPTION
2053 struct ext4_fname_crypto_ctx *ctx = NULL;
2054 int res;
2055#else
1736 const char *name = dentry->d_name.name; 2056 const char *name = dentry->d_name.name;
1737 int namelen = dentry->d_name.len; 2057 int namelen = dentry->d_name.len;
2058#endif
1738 struct buffer_head *bh2; 2059 struct buffer_head *bh2;
1739 struct dx_root *root; 2060 struct dx_root *root;
1740 struct dx_frame frames[2], *frame; 2061 struct dx_frame frames[2], *frame;
@@ -1748,7 +2069,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1748 struct dx_hash_info hinfo; 2069 struct dx_hash_info hinfo;
1749 ext4_lblk_t block; 2070 ext4_lblk_t block;
1750 struct fake_dirent *fde; 2071 struct fake_dirent *fde;
1751 int csum_size = 0; 2072 int csum_size = 0;
2073
2074#ifdef CONFIG_EXT4_FS_ENCRYPTION
2075 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
2076 if (IS_ERR(ctx))
2077 return PTR_ERR(ctx);
2078#endif
1752 2079
1753 if (ext4_has_metadata_csum(inode->i_sb)) 2080 if (ext4_has_metadata_csum(inode->i_sb))
1754 csum_size = sizeof(struct ext4_dir_entry_tail); 2081 csum_size = sizeof(struct ext4_dir_entry_tail);
@@ -1815,7 +2142,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1815 if (hinfo.hash_version <= DX_HASH_TEA) 2142 if (hinfo.hash_version <= DX_HASH_TEA)
1816 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; 2143 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1817 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; 2144 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2145#ifdef CONFIG_EXT4_FS_ENCRYPTION
2146 res = ext4_fname_usr_to_hash(ctx, &dentry->d_name, &hinfo);
2147 if (res < 0) {
2148 ext4_put_fname_crypto_ctx(&ctx);
2149 ext4_mark_inode_dirty(handle, dir);
2150 brelse(bh);
2151 return res;
2152 }
2153 ext4_put_fname_crypto_ctx(&ctx);
2154#else
1818 ext4fs_dirhash(name, namelen, &hinfo); 2155 ext4fs_dirhash(name, namelen, &hinfo);
2156#endif
1819 memset(frames, 0, sizeof(frames)); 2157 memset(frames, 0, sizeof(frames));
1820 frame = frames; 2158 frame = frames;
1821 frame->entries = entries; 2159 frame->entries = entries;
@@ -1865,7 +2203,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1865 struct inode *inode) 2203 struct inode *inode)
1866{ 2204{
1867 struct inode *dir = dentry->d_parent->d_inode; 2205 struct inode *dir = dentry->d_parent->d_inode;
1868 struct buffer_head *bh; 2206 struct buffer_head *bh = NULL;
1869 struct ext4_dir_entry_2 *de; 2207 struct ext4_dir_entry_2 *de;
1870 struct ext4_dir_entry_tail *t; 2208 struct ext4_dir_entry_tail *t;
1871 struct super_block *sb; 2209 struct super_block *sb;
@@ -1889,14 +2227,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1889 return retval; 2227 return retval;
1890 if (retval == 1) { 2228 if (retval == 1) {
1891 retval = 0; 2229 retval = 0;
1892 return retval; 2230 goto out;
1893 } 2231 }
1894 } 2232 }
1895 2233
1896 if (is_dx(dir)) { 2234 if (is_dx(dir)) {
1897 retval = ext4_dx_add_entry(handle, dentry, inode); 2235 retval = ext4_dx_add_entry(handle, dentry, inode);
1898 if (!retval || (retval != ERR_BAD_DX_DIR)) 2236 if (!retval || (retval != ERR_BAD_DX_DIR))
1899 return retval; 2237 goto out;
1900 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); 2238 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
1901 dx_fallback++; 2239 dx_fallback++;
1902 ext4_mark_inode_dirty(handle, dir); 2240 ext4_mark_inode_dirty(handle, dir);
@@ -1908,14 +2246,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1908 return PTR_ERR(bh); 2246 return PTR_ERR(bh);
1909 2247
1910 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 2248 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1911 if (retval != -ENOSPC) { 2249 if (retval != -ENOSPC)
1912 brelse(bh); 2250 goto out;
1913 return retval;
1914 }
1915 2251
1916 if (blocks == 1 && !dx_fallback && 2252 if (blocks == 1 && !dx_fallback &&
1917 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) 2253 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
1918 return make_indexed_dir(handle, dentry, inode, bh); 2254 retval = make_indexed_dir(handle, dentry, inode, bh);
2255 bh = NULL; /* make_indexed_dir releases bh */
2256 goto out;
2257 }
1919 brelse(bh); 2258 brelse(bh);
1920 } 2259 }
1921 bh = ext4_append(handle, dir, &block); 2260 bh = ext4_append(handle, dir, &block);
@@ -1931,6 +2270,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1931 } 2270 }
1932 2271
1933 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 2272 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
2273out:
1934 brelse(bh); 2274 brelse(bh);
1935 if (retval == 0) 2275 if (retval == 0)
1936 ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); 2276 ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -2235,12 +2575,22 @@ retry:
2235 err = PTR_ERR(inode); 2575 err = PTR_ERR(inode);
2236 if (!IS_ERR(inode)) { 2576 if (!IS_ERR(inode)) {
2237 inode->i_op = &ext4_file_inode_operations; 2577 inode->i_op = &ext4_file_inode_operations;
2238 if (test_opt(inode->i_sb, DAX)) 2578 inode->i_fop = &ext4_file_operations;
2239 inode->i_fop = &ext4_dax_file_operations;
2240 else
2241 inode->i_fop = &ext4_file_operations;
2242 ext4_set_aops(inode); 2579 ext4_set_aops(inode);
2243 err = ext4_add_nondir(handle, dentry, inode); 2580 err = 0;
2581#ifdef CONFIG_EXT4_FS_ENCRYPTION
2582 if (!err && (ext4_encrypted_inode(dir) ||
2583 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)))) {
2584 err = ext4_inherit_context(dir, inode);
2585 if (err) {
2586 clear_nlink(inode);
2587 unlock_new_inode(inode);
2588 iput(inode);
2589 }
2590 }
2591#endif
2592 if (!err)
2593 err = ext4_add_nondir(handle, dentry, inode);
2244 if (!err && IS_DIRSYNC(dir)) 2594 if (!err && IS_DIRSYNC(dir))
2245 ext4_handle_sync(handle); 2595 ext4_handle_sync(handle);
2246 } 2596 }
@@ -2302,10 +2652,7 @@ retry:
2302 err = PTR_ERR(inode); 2652 err = PTR_ERR(inode);
2303 if (!IS_ERR(inode)) { 2653 if (!IS_ERR(inode)) {
2304 inode->i_op = &ext4_file_inode_operations; 2654 inode->i_op = &ext4_file_inode_operations;
2305 if (test_opt(inode->i_sb, DAX)) 2655 inode->i_fop = &ext4_file_operations;
2306 inode->i_fop = &ext4_dax_file_operations;
2307 else
2308 inode->i_fop = &ext4_file_operations;
2309 ext4_set_aops(inode); 2656 ext4_set_aops(inode);
2310 d_tmpfile(dentry, inode); 2657 d_tmpfile(dentry, inode);
2311 err = ext4_orphan_add(handle, inode); 2658 err = ext4_orphan_add(handle, inode);
@@ -2424,6 +2771,14 @@ retry:
2424 err = ext4_init_new_dir(handle, dir, inode); 2771 err = ext4_init_new_dir(handle, dir, inode);
2425 if (err) 2772 if (err)
2426 goto out_clear_inode; 2773 goto out_clear_inode;
2774#ifdef CONFIG_EXT4_FS_ENCRYPTION
2775 if (ext4_encrypted_inode(dir) ||
2776 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) {
2777 err = ext4_inherit_context(dir, inode);
2778 if (err)
2779 goto out_clear_inode;
2780 }
2781#endif
2427 err = ext4_mark_inode_dirty(handle, inode); 2782 err = ext4_mark_inode_dirty(handle, inode);
2428 if (!err) 2783 if (!err)
2429 err = ext4_add_entry(handle, dentry, inode); 2784 err = ext4_add_entry(handle, dentry, inode);
@@ -2456,7 +2811,7 @@ out_stop:
2456/* 2811/*
2457 * routine to check that the specified directory is empty (for rmdir) 2812 * routine to check that the specified directory is empty (for rmdir)
2458 */ 2813 */
2459static int empty_dir(struct inode *inode) 2814int ext4_empty_dir(struct inode *inode)
2460{ 2815{
2461 unsigned int offset; 2816 unsigned int offset;
2462 struct buffer_head *bh; 2817 struct buffer_head *bh;
@@ -2724,7 +3079,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2724 goto end_rmdir; 3079 goto end_rmdir;
2725 3080
2726 retval = -ENOTEMPTY; 3081 retval = -ENOTEMPTY;
2727 if (!empty_dir(inode)) 3082 if (!ext4_empty_dir(inode))
2728 goto end_rmdir; 3083 goto end_rmdir;
2729 3084
2730 handle = ext4_journal_start(dir, EXT4_HT_DIR, 3085 handle = ext4_journal_start(dir, EXT4_HT_DIR,
@@ -2834,16 +3189,25 @@ static int ext4_symlink(struct inode *dir,
2834{ 3189{
2835 handle_t *handle; 3190 handle_t *handle;
2836 struct inode *inode; 3191 struct inode *inode;
2837 int l, err, retries = 0; 3192 int err, len = strlen(symname);
2838 int credits; 3193 int credits;
2839 3194 bool encryption_required;
2840 l = strlen(symname)+1; 3195 struct ext4_str disk_link;
2841 if (l > dir->i_sb->s_blocksize) 3196 struct ext4_encrypted_symlink_data *sd = NULL;
3197
3198 disk_link.len = len + 1;
3199 disk_link.name = (char *) symname;
3200
3201 encryption_required = (ext4_encrypted_inode(dir) ||
3202 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
3203 if (encryption_required)
3204 disk_link.len = encrypted_symlink_data_len(len) + 1;
3205 if (disk_link.len > dir->i_sb->s_blocksize)
2842 return -ENAMETOOLONG; 3206 return -ENAMETOOLONG;
2843 3207
2844 dquot_initialize(dir); 3208 dquot_initialize(dir);
2845 3209
2846 if (l > EXT4_N_BLOCKS * 4) { 3210 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
2847 /* 3211 /*
2848 * For non-fast symlinks, we just allocate inode and put it on 3212 * For non-fast symlinks, we just allocate inode and put it on
2849 * orphan list in the first transaction => we need bitmap, 3213 * orphan list in the first transaction => we need bitmap,
@@ -2862,16 +3226,49 @@ static int ext4_symlink(struct inode *dir,
2862 credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 3226 credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2863 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; 3227 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
2864 } 3228 }
2865retry: 3229
2866 inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, 3230 inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
2867 &dentry->d_name, 0, NULL, 3231 &dentry->d_name, 0, NULL,
2868 EXT4_HT_DIR, credits); 3232 EXT4_HT_DIR, credits);
2869 handle = ext4_journal_current_handle(); 3233 handle = ext4_journal_current_handle();
2870 err = PTR_ERR(inode); 3234 if (IS_ERR(inode)) {
2871 if (IS_ERR(inode)) 3235 if (handle)
2872 goto out_stop; 3236 ext4_journal_stop(handle);
3237 return PTR_ERR(inode);
3238 }
3239
3240 if (encryption_required) {
3241 struct ext4_fname_crypto_ctx *ctx = NULL;
3242 struct qstr istr;
3243 struct ext4_str ostr;
3244
3245 sd = kzalloc(disk_link.len, GFP_NOFS);
3246 if (!sd) {
3247 err = -ENOMEM;
3248 goto err_drop_inode;
3249 }
3250 err = ext4_inherit_context(dir, inode);
3251 if (err)
3252 goto err_drop_inode;
3253 ctx = ext4_get_fname_crypto_ctx(inode,
3254 inode->i_sb->s_blocksize);
3255 if (IS_ERR_OR_NULL(ctx)) {
3256 /* We just set the policy, so ctx should not be NULL */
3257 err = (ctx == NULL) ? -EIO : PTR_ERR(ctx);
3258 goto err_drop_inode;
3259 }
3260 istr.name = (const unsigned char *) symname;
3261 istr.len = len;
3262 ostr.name = sd->encrypted_path;
3263 err = ext4_fname_usr_to_disk(ctx, &istr, &ostr);
3264 ext4_put_fname_crypto_ctx(&ctx);
3265 if (err < 0)
3266 goto err_drop_inode;
3267 sd->len = cpu_to_le16(ostr.len);
3268 disk_link.name = (char *) sd;
3269 }
2873 3270
2874 if (l > EXT4_N_BLOCKS * 4) { 3271 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
2875 inode->i_op = &ext4_symlink_inode_operations; 3272 inode->i_op = &ext4_symlink_inode_operations;
2876 ext4_set_aops(inode); 3273 ext4_set_aops(inode);
2877 /* 3274 /*
@@ -2887,9 +3284,10 @@ retry:
2887 drop_nlink(inode); 3284 drop_nlink(inode);
2888 err = ext4_orphan_add(handle, inode); 3285 err = ext4_orphan_add(handle, inode);
2889 ext4_journal_stop(handle); 3286 ext4_journal_stop(handle);
3287 handle = NULL;
2890 if (err) 3288 if (err)
2891 goto err_drop_inode; 3289 goto err_drop_inode;
2892 err = __page_symlink(inode, symname, l, 1); 3290 err = __page_symlink(inode, disk_link.name, disk_link.len, 1);
2893 if (err) 3291 if (err)
2894 goto err_drop_inode; 3292 goto err_drop_inode;
2895 /* 3293 /*
@@ -2901,34 +3299,37 @@ retry:
2901 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); 3299 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2902 if (IS_ERR(handle)) { 3300 if (IS_ERR(handle)) {
2903 err = PTR_ERR(handle); 3301 err = PTR_ERR(handle);
3302 handle = NULL;
2904 goto err_drop_inode; 3303 goto err_drop_inode;
2905 } 3304 }
2906 set_nlink(inode, 1); 3305 set_nlink(inode, 1);
2907 err = ext4_orphan_del(handle, inode); 3306 err = ext4_orphan_del(handle, inode);
2908 if (err) { 3307 if (err)
2909 ext4_journal_stop(handle);
2910 clear_nlink(inode);
2911 goto err_drop_inode; 3308 goto err_drop_inode;
2912 }
2913 } else { 3309 } else {
2914 /* clear the extent format for fast symlink */ 3310 /* clear the extent format for fast symlink */
2915 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 3311 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
2916 inode->i_op = &ext4_fast_symlink_inode_operations; 3312 inode->i_op = encryption_required ?
2917 memcpy((char *)&EXT4_I(inode)->i_data, symname, l); 3313 &ext4_symlink_inode_operations :
2918 inode->i_size = l-1; 3314 &ext4_fast_symlink_inode_operations;
3315 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3316 disk_link.len);
3317 inode->i_size = disk_link.len - 1;
2919 } 3318 }
2920 EXT4_I(inode)->i_disksize = inode->i_size; 3319 EXT4_I(inode)->i_disksize = inode->i_size;
2921 err = ext4_add_nondir(handle, dentry, inode); 3320 err = ext4_add_nondir(handle, dentry, inode);
2922 if (!err && IS_DIRSYNC(dir)) 3321 if (!err && IS_DIRSYNC(dir))
2923 ext4_handle_sync(handle); 3322 ext4_handle_sync(handle);
2924 3323
2925out_stop:
2926 if (handle) 3324 if (handle)
2927 ext4_journal_stop(handle); 3325 ext4_journal_stop(handle);
2928 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 3326 kfree(sd);
2929 goto retry;
2930 return err; 3327 return err;
2931err_drop_inode: 3328err_drop_inode:
3329 if (handle)
3330 ext4_journal_stop(handle);
3331 kfree(sd);
3332 clear_nlink(inode);
2932 unlock_new_inode(inode); 3333 unlock_new_inode(inode);
2933 iput(inode); 3334 iput(inode);
2934 return err; 3335 return err;
@@ -2943,7 +3344,9 @@ static int ext4_link(struct dentry *old_dentry,
2943 3344
2944 if (inode->i_nlink >= EXT4_LINK_MAX) 3345 if (inode->i_nlink >= EXT4_LINK_MAX)
2945 return -EMLINK; 3346 return -EMLINK;
2946 3347 if (ext4_encrypted_inode(dir) &&
3348 !ext4_is_child_context_consistent_with_parent(dir, inode))
3349 return -EPERM;
2947 dquot_initialize(dir); 3350 dquot_initialize(dir);
2948 3351
2949retry: 3352retry:
@@ -3244,6 +3647,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3244 if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) 3647 if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3245 goto end_rename; 3648 goto end_rename;
3246 3649
3650 if ((old.dir != new.dir) &&
3651 ext4_encrypted_inode(new.dir) &&
3652 !ext4_is_child_context_consistent_with_parent(new.dir,
3653 old.inode)) {
3654 retval = -EPERM;
3655 goto end_rename;
3656 }
3657
3247 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3658 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3248 &new.de, &new.inlined); 3659 &new.de, &new.inlined);
3249 if (IS_ERR(new.bh)) { 3660 if (IS_ERR(new.bh)) {
@@ -3264,12 +3675,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3264 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); 3675 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3265 if (!(flags & RENAME_WHITEOUT)) { 3676 if (!(flags & RENAME_WHITEOUT)) {
3266 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); 3677 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3267 if (IS_ERR(handle)) 3678 if (IS_ERR(handle)) {
3268 return PTR_ERR(handle); 3679 retval = PTR_ERR(handle);
3680 handle = NULL;
3681 goto end_rename;
3682 }
3269 } else { 3683 } else {
3270 whiteout = ext4_whiteout_for_rename(&old, credits, &handle); 3684 whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
3271 if (IS_ERR(whiteout)) 3685 if (IS_ERR(whiteout)) {
3272 return PTR_ERR(whiteout); 3686 retval = PTR_ERR(whiteout);
3687 whiteout = NULL;
3688 goto end_rename;
3689 }
3273 } 3690 }
3274 3691
3275 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) 3692 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
@@ -3278,7 +3695,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3278 if (S_ISDIR(old.inode->i_mode)) { 3695 if (S_ISDIR(old.inode->i_mode)) {
3279 if (new.inode) { 3696 if (new.inode) {
3280 retval = -ENOTEMPTY; 3697 retval = -ENOTEMPTY;
3281 if (!empty_dir(new.inode)) 3698 if (!ext4_empty_dir(new.inode))
3282 goto end_rename; 3699 goto end_rename;
3283 } else { 3700 } else {
3284 retval = -EMLINK; 3701 retval = -EMLINK;
@@ -3352,8 +3769,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3352 3769
3353 ext4_dec_count(handle, old.dir); 3770 ext4_dec_count(handle, old.dir);
3354 if (new.inode) { 3771 if (new.inode) {
3355 /* checked empty_dir above, can't have another parent, 3772 /* checked ext4_empty_dir above, can't have another
3356 * ext4_dec_count() won't work for many-linked dirs */ 3773 * parent, ext4_dec_count() won't work for many-linked
3774 * dirs */
3357 clear_nlink(new.inode); 3775 clear_nlink(new.inode);
3358 } else { 3776 } else {
3359 ext4_inc_count(handle, new.dir); 3777 ext4_inc_count(handle, new.dir);
@@ -3433,8 +3851,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3433 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, 3851 handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
3434 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + 3852 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3435 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); 3853 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3436 if (IS_ERR(handle)) 3854 if (IS_ERR(handle)) {
3437 return PTR_ERR(handle); 3855 retval = PTR_ERR(handle);
3856 handle = NULL;
3857 goto end_rename;
3858 }
3438 3859
3439 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) 3860 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3440 ext4_handle_sync(handle); 3861 ext4_handle_sync(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a2541a9ba..5765f88b3904 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/jbd2.h>
12#include <linux/highuid.h> 11#include <linux/highuid.h>
13#include <linux/pagemap.h> 12#include <linux/pagemap.h>
14#include <linux/quotaops.h> 13#include <linux/quotaops.h>
@@ -18,14 +17,12 @@
18#include <linux/pagevec.h> 17#include <linux/pagevec.h>
19#include <linux/mpage.h> 18#include <linux/mpage.h>
20#include <linux/namei.h> 19#include <linux/namei.h>
21#include <linux/aio.h>
22#include <linux/uio.h> 20#include <linux/uio.h>
23#include <linux/bio.h> 21#include <linux/bio.h>
24#include <linux/workqueue.h> 22#include <linux/workqueue.h>
25#include <linux/kernel.h> 23#include <linux/kernel.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/mm.h> 25#include <linux/mm.h>
28#include <linux/ratelimit.h>
29 26
30#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
31#include "xattr.h" 28#include "xattr.h"
@@ -69,6 +66,10 @@ static void ext4_finish_bio(struct bio *bio)
69 66
70 bio_for_each_segment_all(bvec, bio, i) { 67 bio_for_each_segment_all(bvec, bio, i) {
71 struct page *page = bvec->bv_page; 68 struct page *page = bvec->bv_page;
69#ifdef CONFIG_EXT4_FS_ENCRYPTION
70 struct page *data_page = NULL;
71 struct ext4_crypto_ctx *ctx = NULL;
72#endif
72 struct buffer_head *bh, *head; 73 struct buffer_head *bh, *head;
73 unsigned bio_start = bvec->bv_offset; 74 unsigned bio_start = bvec->bv_offset;
74 unsigned bio_end = bio_start + bvec->bv_len; 75 unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,6 +79,15 @@ static void ext4_finish_bio(struct bio *bio)
78 if (!page) 79 if (!page)
79 continue; 80 continue;
80 81
82#ifdef CONFIG_EXT4_FS_ENCRYPTION
83 if (!page->mapping) {
84 /* The bounce data pages are unmapped. */
85 data_page = page;
86 ctx = (struct ext4_crypto_ctx *)page_private(data_page);
87 page = ctx->control_page;
88 }
89#endif
90
81 if (error) { 91 if (error) {
82 SetPageError(page); 92 SetPageError(page);
83 set_bit(AS_EIO, &page->mapping->flags); 93 set_bit(AS_EIO, &page->mapping->flags);
@@ -102,8 +112,13 @@ static void ext4_finish_bio(struct bio *bio)
102 } while ((bh = bh->b_this_page) != head); 112 } while ((bh = bh->b_this_page) != head);
103 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); 113 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
104 local_irq_restore(flags); 114 local_irq_restore(flags);
105 if (!under_io) 115 if (!under_io) {
116#ifdef CONFIG_EXT4_FS_ENCRYPTION
117 if (ctx)
118 ext4_restore_control_page(data_page);
119#endif
106 end_page_writeback(page); 120 end_page_writeback(page);
121 }
107 } 122 }
108} 123}
109 124
@@ -378,6 +393,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
378 393
379static int io_submit_add_bh(struct ext4_io_submit *io, 394static int io_submit_add_bh(struct ext4_io_submit *io,
380 struct inode *inode, 395 struct inode *inode,
396 struct page *page,
381 struct buffer_head *bh) 397 struct buffer_head *bh)
382{ 398{
383 int ret; 399 int ret;
@@ -391,7 +407,7 @@ submit_and_retry:
391 if (ret) 407 if (ret)
392 return ret; 408 return ret;
393 } 409 }
394 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 410 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
395 if (ret != bh->b_size) 411 if (ret != bh->b_size)
396 goto submit_and_retry; 412 goto submit_and_retry;
397 io->io_next_block++; 413 io->io_next_block++;
@@ -404,6 +420,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
404 struct writeback_control *wbc, 420 struct writeback_control *wbc,
405 bool keep_towrite) 421 bool keep_towrite)
406{ 422{
423 struct page *data_page = NULL;
407 struct inode *inode = page->mapping->host; 424 struct inode *inode = page->mapping->host;
408 unsigned block_start, blocksize; 425 unsigned block_start, blocksize;
409 struct buffer_head *bh, *head; 426 struct buffer_head *bh, *head;
@@ -463,19 +480,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
463 set_buffer_async_write(bh); 480 set_buffer_async_write(bh);
464 } while ((bh = bh->b_this_page) != head); 481 } while ((bh = bh->b_this_page) != head);
465 482
466 /* Now submit buffers to write */
467 bh = head = page_buffers(page); 483 bh = head = page_buffers(page);
484
485 if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
486 data_page = ext4_encrypt(inode, page);
487 if (IS_ERR(data_page)) {
488 ret = PTR_ERR(data_page);
489 data_page = NULL;
490 goto out;
491 }
492 }
493
494 /* Now submit buffers to write */
468 do { 495 do {
469 if (!buffer_async_write(bh)) 496 if (!buffer_async_write(bh))
470 continue; 497 continue;
471 ret = io_submit_add_bh(io, inode, bh); 498 ret = io_submit_add_bh(io, inode,
499 data_page ? data_page : page, bh);
472 if (ret) { 500 if (ret) {
473 /* 501 /*
474 * We only get here on ENOMEM. Not much else 502 * We only get here on ENOMEM. Not much else
475 * we can do but mark the page as dirty, and 503 * we can do but mark the page as dirty, and
476 * better luck next time. 504 * better luck next time.
477 */ 505 */
478 redirty_page_for_writepage(wbc, page);
479 break; 506 break;
480 } 507 }
481 nr_submitted++; 508 nr_submitted++;
@@ -484,6 +511,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
484 511
485 /* Error stopped previous loop? Clean up buffers... */ 512 /* Error stopped previous loop? Clean up buffers... */
486 if (ret) { 513 if (ret) {
514 out:
515 if (data_page)
516 ext4_restore_control_page(data_page);
517 printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
518 redirty_page_for_writepage(wbc, page);
487 do { 519 do {
488 clear_buffer_async_write(bh); 520 clear_buffer_async_write(bh);
489 bh = bh->b_this_page; 521 bh = bh->b_this_page;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
new file mode 100644
index 000000000000..171b9ac4b45e
--- /dev/null
+++ b/fs/ext4/readpage.c
@@ -0,0 +1,328 @@
1/*
2 * linux/fs/ext4/readpage.c
3 *
4 * Copyright (C) 2002, Linus Torvalds.
5 * Copyright (C) 2015, Google, Inc.
6 *
7 * This was originally taken from fs/mpage.c
8 *
9 * The intent is the ext4_mpage_readpages() function here is intended
10 * to replace mpage_readpages() in the general case, not just for
11 * encrypted files. It has some limitations (see below), where it
12 * will fall back to read_block_full_page(), but these limitations
13 * should only be hit when page_size != block_size.
14 *
15 * This will allow us to attach a callback function to support ext4
16 * encryption.
17 *
18 * If anything unusual happens, such as:
19 *
20 * - encountering a page which has buffers
21 * - encountering a page which has a non-hole after a hole
22 * - encountering a page with non-contiguous blocks
23 *
24 * then this code just gives up and calls the buffer_head-based read function.
25 * It does handle a page which has holes at the end - that is a common case:
26 * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
27 *
28 */
29
30#include <linux/kernel.h>
31#include <linux/export.h>
32#include <linux/mm.h>
33#include <linux/kdev_t.h>
34#include <linux/gfp.h>
35#include <linux/bio.h>
36#include <linux/fs.h>
37#include <linux/buffer_head.h>
38#include <linux/blkdev.h>
39#include <linux/highmem.h>
40#include <linux/prefetch.h>
41#include <linux/mpage.h>
42#include <linux/writeback.h>
43#include <linux/backing-dev.h>
44#include <linux/pagevec.h>
45#include <linux/cleancache.h>
46
47#include "ext4.h"
48
49/*
50 * Call ext4_decrypt on every single page, reusing the encryption
51 * context.
52 */
53static void completion_pages(struct work_struct *work)
54{
55#ifdef CONFIG_EXT4_FS_ENCRYPTION
56 struct ext4_crypto_ctx *ctx =
57 container_of(work, struct ext4_crypto_ctx, work);
58 struct bio *bio = ctx->bio;
59 struct bio_vec *bv;
60 int i;
61
62 bio_for_each_segment_all(bv, bio, i) {
63 struct page *page = bv->bv_page;
64
65 int ret = ext4_decrypt(ctx, page);
66 if (ret) {
67 WARN_ON_ONCE(1);
68 SetPageError(page);
69 } else
70 SetPageUptodate(page);
71 unlock_page(page);
72 }
73 ext4_release_crypto_ctx(ctx);
74 bio_put(bio);
75#else
76 BUG();
77#endif
78}
79
80static inline bool ext4_bio_encrypted(struct bio *bio)
81{
82#ifdef CONFIG_EXT4_FS_ENCRYPTION
83 return unlikely(bio->bi_private != NULL);
84#else
85 return false;
86#endif
87}
88
89/*
90 * I/O completion handler for multipage BIOs.
91 *
92 * The mpage code never puts partial pages into a BIO (except for end-of-file).
93 * If a page does not map to a contiguous run of blocks then it simply falls
94 * back to block_read_full_page().
95 *
96 * Why is this? If a page's completion depends on a number of different BIOs
97 * which can complete in any order (or at the same time) then determining the
98 * status of that page is hard. See end_buffer_async_read() for the details.
99 * There is no point in duplicating all that complexity.
100 */
101static void mpage_end_io(struct bio *bio, int err)
102{
103 struct bio_vec *bv;
104 int i;
105
106 if (ext4_bio_encrypted(bio)) {
107 struct ext4_crypto_ctx *ctx = bio->bi_private;
108
109 if (err) {
110 ext4_release_crypto_ctx(ctx);
111 } else {
112 INIT_WORK(&ctx->work, completion_pages);
113 ctx->bio = bio;
114 queue_work(ext4_read_workqueue, &ctx->work);
115 return;
116 }
117 }
118 bio_for_each_segment_all(bv, bio, i) {
119 struct page *page = bv->bv_page;
120
121 if (!err) {
122 SetPageUptodate(page);
123 } else {
124 ClearPageUptodate(page);
125 SetPageError(page);
126 }
127 unlock_page(page);
128 }
129
130 bio_put(bio);
131}
132
133int ext4_mpage_readpages(struct address_space *mapping,
134 struct list_head *pages, struct page *page,
135 unsigned nr_pages)
136{
137 struct bio *bio = NULL;
138 unsigned page_idx;
139 sector_t last_block_in_bio = 0;
140
141 struct inode *inode = mapping->host;
142 const unsigned blkbits = inode->i_blkbits;
143 const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
144 const unsigned blocksize = 1 << blkbits;
145 sector_t block_in_file;
146 sector_t last_block;
147 sector_t last_block_in_file;
148 sector_t blocks[MAX_BUF_PER_PAGE];
149 unsigned page_block;
150 struct block_device *bdev = inode->i_sb->s_bdev;
151 int length;
152 unsigned relative_block = 0;
153 struct ext4_map_blocks map;
154
155 map.m_pblk = 0;
156 map.m_lblk = 0;
157 map.m_len = 0;
158 map.m_flags = 0;
159
160 for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
161 int fully_mapped = 1;
162 unsigned first_hole = blocks_per_page;
163
164 prefetchw(&page->flags);
165 if (pages) {
166 page = list_entry(pages->prev, struct page, lru);
167 list_del(&page->lru);
168 if (add_to_page_cache_lru(page, mapping,
169 page->index, GFP_KERNEL))
170 goto next_page;
171 }
172
173 if (page_has_buffers(page))
174 goto confused;
175
176 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
177 last_block = block_in_file + nr_pages * blocks_per_page;
178 last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
179 if (last_block > last_block_in_file)
180 last_block = last_block_in_file;
181 page_block = 0;
182
183 /*
184 * Map blocks using the previous result first.
185 */
186 if ((map.m_flags & EXT4_MAP_MAPPED) &&
187 block_in_file > map.m_lblk &&
188 block_in_file < (map.m_lblk + map.m_len)) {
189 unsigned map_offset = block_in_file - map.m_lblk;
190 unsigned last = map.m_len - map_offset;
191
192 for (relative_block = 0; ; relative_block++) {
193 if (relative_block == last) {
194 /* needed? */
195 map.m_flags &= ~EXT4_MAP_MAPPED;
196 break;
197 }
198 if (page_block == blocks_per_page)
199 break;
200 blocks[page_block] = map.m_pblk + map_offset +
201 relative_block;
202 page_block++;
203 block_in_file++;
204 }
205 }
206
207 /*
208 * Then do more ext4_map_blocks() calls until we are
209 * done with this page.
210 */
211 while (page_block < blocks_per_page) {
212 if (block_in_file < last_block) {
213 map.m_lblk = block_in_file;
214 map.m_len = last_block - block_in_file;
215
216 if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
217 set_error_page:
218 SetPageError(page);
219 zero_user_segment(page, 0,
220 PAGE_CACHE_SIZE);
221 unlock_page(page);
222 goto next_page;
223 }
224 }
225 if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
226 fully_mapped = 0;
227 if (first_hole == blocks_per_page)
228 first_hole = page_block;
229 page_block++;
230 block_in_file++;
231 continue;
232 }
233 if (first_hole != blocks_per_page)
234 goto confused; /* hole -> non-hole */
235
236 /* Contiguous blocks? */
237 if (page_block && blocks[page_block-1] != map.m_pblk-1)
238 goto confused;
239 for (relative_block = 0; ; relative_block++) {
240 if (relative_block == map.m_len) {
241 /* needed? */
242 map.m_flags &= ~EXT4_MAP_MAPPED;
243 break;
244 } else if (page_block == blocks_per_page)
245 break;
246 blocks[page_block] = map.m_pblk+relative_block;
247 page_block++;
248 block_in_file++;
249 }
250 }
251 if (first_hole != blocks_per_page) {
252 zero_user_segment(page, first_hole << blkbits,
253 PAGE_CACHE_SIZE);
254 if (first_hole == 0) {
255 SetPageUptodate(page);
256 unlock_page(page);
257 goto next_page;
258 }
259 } else if (fully_mapped) {
260 SetPageMappedToDisk(page);
261 }
262 if (fully_mapped && blocks_per_page == 1 &&
263 !PageUptodate(page) && cleancache_get_page(page) == 0) {
264 SetPageUptodate(page);
265 goto confused;
266 }
267
268 /*
269 * This page will go to BIO. Do we need to send this
270 * BIO off first?
271 */
272 if (bio && (last_block_in_bio != blocks[0] - 1)) {
273 submit_and_realloc:
274 submit_bio(READ, bio);
275 bio = NULL;
276 }
277 if (bio == NULL) {
278 struct ext4_crypto_ctx *ctx = NULL;
279
280 if (ext4_encrypted_inode(inode) &&
281 S_ISREG(inode->i_mode)) {
282 ctx = ext4_get_crypto_ctx(inode);
283 if (IS_ERR(ctx))
284 goto set_error_page;
285 }
286 bio = bio_alloc(GFP_KERNEL,
287 min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
288 if (!bio) {
289 if (ctx)
290 ext4_release_crypto_ctx(ctx);
291 goto set_error_page;
292 }
293 bio->bi_bdev = bdev;
294 bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
295 bio->bi_end_io = mpage_end_io;
296 bio->bi_private = ctx;
297 }
298
299 length = first_hole << blkbits;
300 if (bio_add_page(bio, page, length, 0) < length)
301 goto submit_and_realloc;
302
303 if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
304 (relative_block == map.m_len)) ||
305 (first_hole != blocks_per_page)) {
306 submit_bio(READ, bio);
307 bio = NULL;
308 } else
309 last_block_in_bio = blocks[blocks_per_page - 1];
310 goto next_page;
311 confused:
312 if (bio) {
313 submit_bio(READ, bio);
314 bio = NULL;
315 }
316 if (!PageUptodate(page))
317 block_read_full_page(page, ext4_get_block);
318 else
319 unlock_page(page);
320 next_page:
321 if (pages)
322 page_cache_release(page);
323 }
324 BUG_ON(pages && !list_empty(pages));
325 if (bio)
326 submit_bio(READ, bio);
327 return 0;
328}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e061e66c8280..821f22dbe825 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,7 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/jbd2.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
26#include <linux/init.h> 25#include <linux/init.h>
27#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -323,22 +322,6 @@ static void save_error_info(struct super_block *sb, const char *func,
323 ext4_commit_super(sb, 1); 322 ext4_commit_super(sb, 1);
324} 323}
325 324
326/*
327 * The del_gendisk() function uninitializes the disk-specific data
328 * structures, including the bdi structure, without telling anyone
329 * else. Once this happens, any attempt to call mark_buffer_dirty()
330 * (for example, by ext4_commit_super), will cause a kernel OOPS.
331 * This is a kludge to prevent these oops until we can put in a proper
332 * hook in del_gendisk() to inform the VFS and file system layers.
333 */
334static int block_device_ejected(struct super_block *sb)
335{
336 struct inode *bd_inode = sb->s_bdev->bd_inode;
337 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
338
339 return bdi->dev == NULL;
340}
341
342static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 325static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
343{ 326{
344 struct super_block *sb = journal->j_private; 327 struct super_block *sb = journal->j_private;
@@ -893,6 +876,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
893 atomic_set(&ei->i_ioend_count, 0); 876 atomic_set(&ei->i_ioend_count, 0);
894 atomic_set(&ei->i_unwritten, 0); 877 atomic_set(&ei->i_unwritten, 0);
895 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); 878 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
879#ifdef CONFIG_EXT4_FS_ENCRYPTION
880 ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
881#endif
896 882
897 return &ei->vfs_inode; 883 return &ei->vfs_inode;
898} 884}
@@ -1076,7 +1062,7 @@ static const struct quotactl_ops ext4_qctl_operations = {
1076 .quota_on = ext4_quota_on, 1062 .quota_on = ext4_quota_on,
1077 .quota_off = ext4_quota_off, 1063 .quota_off = ext4_quota_off,
1078 .quota_sync = dquot_quota_sync, 1064 .quota_sync = dquot_quota_sync,
1079 .get_info = dquot_get_dqinfo, 1065 .get_state = dquot_get_state,
1080 .set_info = dquot_set_dqinfo, 1066 .set_info = dquot_set_dqinfo,
1081 .get_dqblk = dquot_get_dqblk, 1067 .get_dqblk = dquot_get_dqblk,
1082 .set_dqblk = dquot_set_dqblk 1068 .set_dqblk = dquot_set_dqblk
@@ -1120,7 +1106,7 @@ enum {
1120 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, 1106 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1121 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, 1107 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1122 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1108 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1123 Opt_data_err_abort, Opt_data_err_ignore, 1109 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1124 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1110 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1125 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1111 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1126 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1112 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
@@ -1211,6 +1197,7 @@ static const match_table_t tokens = {
1211 {Opt_init_itable, "init_itable"}, 1197 {Opt_init_itable, "init_itable"},
1212 {Opt_noinit_itable, "noinit_itable"}, 1198 {Opt_noinit_itable, "noinit_itable"},
1213 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1199 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1200 {Opt_test_dummy_encryption, "test_dummy_encryption"},
1214 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1201 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1215 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1202 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1216 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1203 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1412,6 +1399,7 @@ static const struct mount_opts {
1412 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1399 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1413 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1400 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1414 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1401 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1402 {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1415 {Opt_err, 0, 0} 1403 {Opt_err, 0, 0}
1416}; 1404};
1417 1405
@@ -1588,6 +1576,15 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1588 } 1576 }
1589 *journal_ioprio = 1577 *journal_ioprio =
1590 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1578 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1579 } else if (token == Opt_test_dummy_encryption) {
1580#ifdef CONFIG_EXT4_FS_ENCRYPTION
1581 sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
1582 ext4_msg(sb, KERN_WARNING,
1583 "Test dummy encryption mode enabled");
1584#else
1585 ext4_msg(sb, KERN_WARNING,
1586 "Test dummy encryption mount option ignored");
1587#endif
1591 } else if (m->flags & MOPT_DATAJ) { 1588 } else if (m->flags & MOPT_DATAJ) {
1592 if (is_remount) { 1589 if (is_remount) {
1593 if (!sbi->s_journal) 1590 if (!sbi->s_journal)
@@ -2685,11 +2682,13 @@ static struct attribute *ext4_attrs[] = {
2685EXT4_INFO_ATTR(lazy_itable_init); 2682EXT4_INFO_ATTR(lazy_itable_init);
2686EXT4_INFO_ATTR(batched_discard); 2683EXT4_INFO_ATTR(batched_discard);
2687EXT4_INFO_ATTR(meta_bg_resize); 2684EXT4_INFO_ATTR(meta_bg_resize);
2685EXT4_INFO_ATTR(encryption);
2688 2686
2689static struct attribute *ext4_feat_attrs[] = { 2687static struct attribute *ext4_feat_attrs[] = {
2690 ATTR_LIST(lazy_itable_init), 2688 ATTR_LIST(lazy_itable_init),
2691 ATTR_LIST(batched_discard), 2689 ATTR_LIST(batched_discard),
2692 ATTR_LIST(meta_bg_resize), 2690 ATTR_LIST(meta_bg_resize),
2691 ATTR_LIST(encryption),
2693 NULL, 2692 NULL,
2694}; 2693};
2695 2694
@@ -3448,6 +3447,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3448 if (sb->s_bdev->bd_part) 3447 if (sb->s_bdev->bd_part)
3449 sbi->s_sectors_written_start = 3448 sbi->s_sectors_written_start =
3450 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3449 part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3450#ifdef CONFIG_EXT4_FS_ENCRYPTION
3451 /* Modes of operations for file and directory encryption. */
3452 sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
3453 sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
3454#endif
3451 3455
3452 /* Cleanup superblock name */ 3456 /* Cleanup superblock name */
3453 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3457 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
@@ -3692,6 +3696,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3692 } 3696 }
3693 } 3697 }
3694 3698
3699 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) &&
3700 es->s_encryption_level) {
3701 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
3702 es->s_encryption_level);
3703 goto failed_mount;
3704 }
3705
3695 if (sb->s_blocksize != blocksize) { 3706 if (sb->s_blocksize != blocksize) {
3696 /* Validate the filesystem blocksize */ 3707 /* Validate the filesystem blocksize */
3697 if (!sb_set_blocksize(sb, blocksize)) { 3708 if (!sb_set_blocksize(sb, blocksize)) {
@@ -4054,6 +4065,13 @@ no_journal:
4054 } 4065 }
4055 } 4066 }
4056 4067
4068 if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
4069 !(sb->s_flags & MS_RDONLY) &&
4070 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
4071 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
4072 ext4_commit_super(sb, 1);
4073 }
4074
4057 /* 4075 /*
4058 * Get the # of file system overhead blocks from the 4076 * Get the # of file system overhead blocks from the
4059 * superblock if present. 4077 * superblock if present.
@@ -4570,7 +4588,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4570 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4588 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4571 int error = 0; 4589 int error = 0;
4572 4590
4573 if (!sbh || block_device_ejected(sb)) 4591 if (!sbh)
4574 return error; 4592 return error;
4575 if (buffer_write_io_error(sbh)) { 4593 if (buffer_write_io_error(sbh)) {
4576 /* 4594 /*
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index ff3711932018..136ca0e911fd 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -18,13 +18,101 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/namei.h> 21#include <linux/namei.h>
23#include "ext4.h" 22#include "ext4.h"
24#include "xattr.h" 23#include "xattr.h"
25 24
25#ifdef CONFIG_EXT4_FS_ENCRYPTION
26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
27{ 27{
28 struct page *cpage = NULL;
29 char *caddr, *paddr = NULL;
30 struct ext4_str cstr, pstr;
31 struct inode *inode = dentry->d_inode;
32 struct ext4_fname_crypto_ctx *ctx = NULL;
33 struct ext4_encrypted_symlink_data *sd;
34 loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
35 int res;
36 u32 plen, max_size = inode->i_sb->s_blocksize;
37
38 if (!ext4_encrypted_inode(inode))
39 return page_follow_link_light(dentry, nd);
40
41 ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
42 if (IS_ERR(ctx))
43 return ctx;
44
45 if (ext4_inode_is_fast_symlink(inode)) {
46 caddr = (char *) EXT4_I(dentry->d_inode)->i_data;
47 max_size = sizeof(EXT4_I(dentry->d_inode)->i_data);
48 } else {
49 cpage = read_mapping_page(inode->i_mapping, 0, NULL);
50 if (IS_ERR(cpage)) {
51 ext4_put_fname_crypto_ctx(&ctx);
52 return cpage;
53 }
54 caddr = kmap(cpage);
55 caddr[size] = 0;
56 }
57
58 /* Symlink is encrypted */
59 sd = (struct ext4_encrypted_symlink_data *)caddr;
60 cstr.name = sd->encrypted_path;
61 cstr.len = le32_to_cpu(sd->len);
62 if ((cstr.len +
63 sizeof(struct ext4_encrypted_symlink_data) - 1) >
64 max_size) {
65 /* Symlink data on the disk is corrupted */
66 res = -EIO;
67 goto errout;
68 }
69 plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ?
70 EXT4_FNAME_CRYPTO_DIGEST_SIZE*2 : cstr.len;
71 paddr = kmalloc(plen + 1, GFP_NOFS);
72 if (!paddr) {
73 res = -ENOMEM;
74 goto errout;
75 }
76 pstr.name = paddr;
77 res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr);
78 if (res < 0)
79 goto errout;
80 /* Null-terminate the name */
81 if (res <= plen)
82 paddr[res] = '\0';
83 nd_set_link(nd, paddr);
84 ext4_put_fname_crypto_ctx(&ctx);
85 if (cpage) {
86 kunmap(cpage);
87 page_cache_release(cpage);
88 }
89 return NULL;
90errout:
91 ext4_put_fname_crypto_ctx(&ctx);
92 if (cpage) {
93 kunmap(cpage);
94 page_cache_release(cpage);
95 }
96 kfree(paddr);
97 return ERR_PTR(res);
98}
99
100static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
101 void *cookie)
102{
103 struct page *page = cookie;
104
105 if (!page) {
106 kfree(nd_get_link(nd));
107 } else {
108 kunmap(page);
109 page_cache_release(page);
110 }
111}
112#endif
113
114static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
115{
28 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); 116 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
29 nd_set_link(nd, (char *) ei->i_data); 117 nd_set_link(nd, (char *) ei->i_data);
30 return NULL; 118 return NULL;
@@ -32,8 +120,13 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
32 120
33const struct inode_operations ext4_symlink_inode_operations = { 121const struct inode_operations ext4_symlink_inode_operations = {
34 .readlink = generic_readlink, 122 .readlink = generic_readlink,
123#ifdef CONFIG_EXT4_FS_ENCRYPTION
124 .follow_link = ext4_follow_link,
125 .put_link = ext4_put_link,
126#else
35 .follow_link = page_follow_link_light, 127 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 128 .put_link = page_put_link,
129#endif
37 .setattr = ext4_setattr, 130 .setattr = ext4_setattr,
38 .setxattr = generic_setxattr, 131 .setxattr = generic_setxattr,
39 .getxattr = generic_getxattr, 132 .getxattr = generic_getxattr,
@@ -43,7 +136,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
43 136
44const struct inode_operations ext4_fast_symlink_inode_operations = { 137const struct inode_operations ext4_fast_symlink_inode_operations = {
45 .readlink = generic_readlink, 138 .readlink = generic_readlink,
46 .follow_link = ext4_follow_link, 139 .follow_link = ext4_follow_fast_link,
47 .setattr = ext4_setattr, 140 .setattr = ext4_setattr,
48 .setxattr = generic_setxattr, 141 .setxattr = generic_setxattr,
49 .getxattr = generic_getxattr, 142 .getxattr = generic_getxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1e09fc77395c..759842ff8af0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -55,7 +55,6 @@
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/mbcache.h> 56#include <linux/mbcache.h>
57#include <linux/quotaops.h> 57#include <linux/quotaops.h>
58#include <linux/rwsem.h>
59#include "ext4_jbd2.h" 58#include "ext4_jbd2.h"
60#include "ext4.h" 59#include "ext4.h"
61#include "xattr.h" 60#include "xattr.h"
@@ -639,8 +638,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
639 free += EXT4_XATTR_LEN(name_len); 638 free += EXT4_XATTR_LEN(name_len);
640 } 639 }
641 if (i->value) { 640 if (i->value) {
642 if (free < EXT4_XATTR_SIZE(i->value_len) || 641 if (free < EXT4_XATTR_LEN(name_len) +
643 free < EXT4_XATTR_LEN(name_len) +
644 EXT4_XATTR_SIZE(i->value_len)) 642 EXT4_XATTR_SIZE(i->value_len))
645 return -ENOSPC; 643 return -ENOSPC;
646 } 644 }
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 29bedf5589f6..ddc0957760ba 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -23,6 +23,7 @@
23#define EXT4_XATTR_INDEX_SECURITY 6 23#define EXT4_XATTR_INDEX_SECURITY 6
24#define EXT4_XATTR_INDEX_SYSTEM 7 24#define EXT4_XATTR_INDEX_SYSTEM 7
25#define EXT4_XATTR_INDEX_RICHACL 8 25#define EXT4_XATTR_INDEX_RICHACL 8
26#define EXT4_XATTR_INDEX_ENCRYPTION 9
26 27
27struct ext4_xattr_header { 28struct ext4_xattr_header {
28 __le32 h_magic; /* magic number for identification */ 29 __le32 h_magic; /* magic number for identification */
@@ -98,6 +99,8 @@ extern const struct xattr_handler ext4_xattr_user_handler;
98extern const struct xattr_handler ext4_xattr_trusted_handler; 99extern const struct xattr_handler ext4_xattr_trusted_handler;
99extern const struct xattr_handler ext4_xattr_security_handler; 100extern const struct xattr_handler ext4_xattr_security_handler;
100 101
102#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
103
101extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 104extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
102 105
103extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 106extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 94e2d2ffabe1..05f0f663f14c 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -1,5 +1,5 @@
1config F2FS_FS 1config F2FS_FS
2 tristate "F2FS filesystem support (EXPERIMENTAL)" 2 tristate "F2FS filesystem support"
3 depends on BLOCK 3 depends on BLOCK
4 help 4 help
5 F2FS is based on Log-structured File System (LFS), which supports 5 F2FS is based on Log-structured File System (LFS), which supports
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 742202779bd5..4320ffab3495 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -351,13 +351,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
351 351
352 *acl = f2fs_acl_clone(p, GFP_NOFS); 352 *acl = f2fs_acl_clone(p, GFP_NOFS);
353 if (!*acl) 353 if (!*acl)
354 return -ENOMEM; 354 goto no_mem;
355 355
356 ret = f2fs_acl_create_masq(*acl, mode); 356 ret = f2fs_acl_create_masq(*acl, mode);
357 if (ret < 0) { 357 if (ret < 0)
358 posix_acl_release(*acl); 358 goto no_mem_clone;
359 return -ENOMEM;
360 }
361 359
362 if (ret == 0) { 360 if (ret == 0) {
363 posix_acl_release(*acl); 361 posix_acl_release(*acl);
@@ -378,6 +376,12 @@ no_acl:
378 *default_acl = NULL; 376 *default_acl = NULL;
379 *acl = NULL; 377 *acl = NULL;
380 return 0; 378 return 0;
379
380no_mem_clone:
381 posix_acl_release(*acl);
382no_mem:
383 posix_acl_release(p);
384 return -ENOMEM;
381} 385}
382 386
383int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, 387int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7f794b72b3b7..a5e17a2a0781 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -276,7 +276,7 @@ continue_unlock:
276 if (!clear_page_dirty_for_io(page)) 276 if (!clear_page_dirty_for_io(page))
277 goto continue_unlock; 277 goto continue_unlock;
278 278
279 if (f2fs_write_meta_page(page, &wbc)) { 279 if (mapping->a_ops->writepage(page, &wbc)) {
280 unlock_page(page); 280 unlock_page(page);
281 break; 281 break;
282 } 282 }
@@ -464,20 +464,19 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
464 464
465void recover_orphan_inodes(struct f2fs_sb_info *sbi) 465void recover_orphan_inodes(struct f2fs_sb_info *sbi)
466{ 466{
467 block_t start_blk, orphan_blkaddr, i, j; 467 block_t start_blk, orphan_blocks, i, j;
468 468
469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
470 return; 470 return;
471 471
472 set_sbi_flag(sbi, SBI_POR_DOING); 472 set_sbi_flag(sbi, SBI_POR_DOING);
473 473
474 start_blk = __start_cp_addr(sbi) + 1 + 474 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
475 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 475 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
476 orphan_blkaddr = __start_sum_addr(sbi) - 1;
477 476
478 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); 477 ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
479 478
480 for (i = 0; i < orphan_blkaddr; i++) { 479 for (i = 0; i < orphan_blocks; i++) {
481 struct page *page = get_meta_page(sbi, start_blk + i); 480 struct page *page = get_meta_page(sbi, start_blk + i);
482 struct f2fs_orphan_block *orphan_blk; 481 struct f2fs_orphan_block *orphan_blk;
483 482
@@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
615 unsigned long blk_size = sbi->blocksize; 614 unsigned long blk_size = sbi->blocksize;
616 unsigned long long cp1_version = 0, cp2_version = 0; 615 unsigned long long cp1_version = 0, cp2_version = 0;
617 unsigned long long cp_start_blk_no; 616 unsigned long long cp_start_blk_no;
618 unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 617 unsigned int cp_blks = 1 + __cp_payload(sbi);
619 block_t cp_blk_no; 618 block_t cp_blk_no;
620 int i; 619 int i;
621 620
@@ -796,6 +795,7 @@ retry:
796 * wribacking dentry pages in the freeing inode. 795 * wribacking dentry pages in the freeing inode.
797 */ 796 */
798 f2fs_submit_merged_bio(sbi, DATA, WRITE); 797 f2fs_submit_merged_bio(sbi, DATA, WRITE);
798 cond_resched();
799 } 799 }
800 goto retry; 800 goto retry;
801} 801}
@@ -884,7 +884,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
884 __u32 crc32 = 0; 884 __u32 crc32 = 0;
885 void *kaddr; 885 void *kaddr;
886 int i; 886 int i;
887 int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 887 int cp_payload_blks = __cp_payload(sbi);
888 888
889 /* 889 /*
890 * This avoids to conduct wrong roll-forward operations and uses 890 * This avoids to conduct wrong roll-forward operations and uses
@@ -1048,17 +1048,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1048 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1048 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1049 unsigned long long ckpt_ver; 1049 unsigned long long ckpt_ver;
1050 1050
1051 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1052
1053 mutex_lock(&sbi->cp_mutex); 1051 mutex_lock(&sbi->cp_mutex);
1054 1052
1055 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1053 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1056 cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) 1054 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
1057 goto out; 1055 goto out;
1058 if (unlikely(f2fs_cp_error(sbi))) 1056 if (unlikely(f2fs_cp_error(sbi)))
1059 goto out; 1057 goto out;
1060 if (f2fs_readonly(sbi->sb)) 1058 if (f2fs_readonly(sbi->sb))
1061 goto out; 1059 goto out;
1060
1061 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1062
1062 if (block_operations(sbi)) 1063 if (block_operations(sbi))
1063 goto out; 1064 goto out;
1064 1065
@@ -1085,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1085 1086
1086 unblock_operations(sbi); 1087 unblock_operations(sbi);
1087 stat_inc_cp_count(sbi->stat_info); 1088 stat_inc_cp_count(sbi->stat_info);
1089
1090 if (cpc->reason == CP_RECOVERY)
1091 f2fs_msg(sbi->sb, KERN_NOTICE,
1092 "checkpoint: version = %llx", ckpt_ver);
1088out: 1093out:
1089 mutex_unlock(&sbi->cp_mutex); 1094 mutex_unlock(&sbi->cp_mutex);
1090 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1095 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
@@ -1103,14 +1108,9 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
1103 im->ino_num = 0; 1108 im->ino_num = 0;
1104 } 1109 }
1105 1110
1106 /*
1107 * considering 512 blocks in a segment 8 blocks are needed for cp
1108 * and log segment summaries. Remaining blocks are used to keep
1109 * orphan entries with the limitation one reserved segment
1110 * for cp pack we can have max 1020*504 orphan entries
1111 */
1112 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1111 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1113 NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; 1112 NR_CURSEG_TYPE - __cp_payload(sbi)) *
1113 F2FS_ORPHANS_PER_BLOCK;
1114} 1114}
1115 1115
1116int __init create_checkpoint_caches(void) 1116int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed023a750..b91b0e10678e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/mpage.h> 14#include <linux/mpage.h>
15#include <linux/aio.h>
16#include <linux/writeback.h> 15#include <linux/writeback.h>
17#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
18#include <linux/blkdev.h> 17#include <linux/blkdev.h>
19#include <linux/bio.h> 18#include <linux/bio.h>
20#include <linux/prefetch.h> 19#include <linux/prefetch.h>
20#include <linux/uio.h>
21 21
22#include "f2fs.h" 22#include "f2fs.h"
23#include "node.h" 23#include "node.h"
@@ -25,6 +25,9 @@
25#include "trace.h" 25#include "trace.h"
26#include <trace/events/f2fs.h> 26#include <trace/events/f2fs.h>
27 27
28static struct kmem_cache *extent_tree_slab;
29static struct kmem_cache *extent_node_slab;
30
28static void f2fs_read_end_io(struct bio *bio, int err) 31static void f2fs_read_end_io(struct bio *bio, int err)
29{ 32{
30 struct bio_vec *bvec; 33 struct bio_vec *bvec;
@@ -197,7 +200,7 @@ alloc_new:
197 * ->node_page 200 * ->node_page
198 * update block addresses in the node page 201 * update block addresses in the node page
199 */ 202 */
200static void __set_data_blkaddr(struct dnode_of_data *dn) 203void set_data_blkaddr(struct dnode_of_data *dn)
201{ 204{
202 struct f2fs_node *rn; 205 struct f2fs_node *rn;
203 __le32 *addr_array; 206 __le32 *addr_array;
@@ -226,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn)
226 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 229 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
227 230
228 dn->data_blkaddr = NEW_ADDR; 231 dn->data_blkaddr = NEW_ADDR;
229 __set_data_blkaddr(dn); 232 set_data_blkaddr(dn);
230 mark_inode_dirty(dn->inode); 233 mark_inode_dirty(dn->inode);
231 sync_inode_page(dn); 234 sync_inode_page(dn);
232 return 0; 235 return 0;
@@ -248,73 +251,62 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
248 return err; 251 return err;
249} 252}
250 253
251static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 254static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs,
252 struct buffer_head *bh_result) 255 struct extent_info *ei, struct buffer_head *bh_result)
256{
257 unsigned int blkbits = sb->s_blocksize_bits;
258 size_t max_size = bh_result->b_size;
259 size_t mapped_size;
260
261 clear_buffer_new(bh_result);
262 map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs);
263 mapped_size = (ei->fofs + ei->len - pgofs) << blkbits;
264 bh_result->b_size = min(max_size, mapped_size);
265}
266
267static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
268 struct extent_info *ei)
253{ 269{
254 struct f2fs_inode_info *fi = F2FS_I(inode); 270 struct f2fs_inode_info *fi = F2FS_I(inode);
255 pgoff_t start_fofs, end_fofs; 271 pgoff_t start_fofs, end_fofs;
256 block_t start_blkaddr; 272 block_t start_blkaddr;
257 273
258 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 274 read_lock(&fi->ext_lock);
259 return 0;
260
261 read_lock(&fi->ext.ext_lock);
262 if (fi->ext.len == 0) { 275 if (fi->ext.len == 0) {
263 read_unlock(&fi->ext.ext_lock); 276 read_unlock(&fi->ext_lock);
264 return 0; 277 return false;
265 } 278 }
266 279
267 stat_inc_total_hit(inode->i_sb); 280 stat_inc_total_hit(inode->i_sb);
268 281
269 start_fofs = fi->ext.fofs; 282 start_fofs = fi->ext.fofs;
270 end_fofs = fi->ext.fofs + fi->ext.len - 1; 283 end_fofs = fi->ext.fofs + fi->ext.len - 1;
271 start_blkaddr = fi->ext.blk_addr; 284 start_blkaddr = fi->ext.blk;
272 285
273 if (pgofs >= start_fofs && pgofs <= end_fofs) { 286 if (pgofs >= start_fofs && pgofs <= end_fofs) {
274 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 287 *ei = fi->ext;
275 size_t count;
276
277 set_buffer_new(bh_result);
278 map_bh(bh_result, inode->i_sb,
279 start_blkaddr + pgofs - start_fofs);
280 count = end_fofs - pgofs + 1;
281 if (count < (UINT_MAX >> blkbits))
282 bh_result->b_size = (count << blkbits);
283 else
284 bh_result->b_size = UINT_MAX;
285
286 stat_inc_read_hit(inode->i_sb); 288 stat_inc_read_hit(inode->i_sb);
287 read_unlock(&fi->ext.ext_lock); 289 read_unlock(&fi->ext_lock);
288 return 1; 290 return true;
289 } 291 }
290 read_unlock(&fi->ext.ext_lock); 292 read_unlock(&fi->ext_lock);
291 return 0; 293 return false;
292} 294}
293 295
294void update_extent_cache(struct dnode_of_data *dn) 296static bool update_extent_info(struct inode *inode, pgoff_t fofs,
297 block_t blkaddr)
295{ 298{
296 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 299 struct f2fs_inode_info *fi = F2FS_I(inode);
297 pgoff_t fofs, start_fofs, end_fofs; 300 pgoff_t start_fofs, end_fofs;
298 block_t start_blkaddr, end_blkaddr; 301 block_t start_blkaddr, end_blkaddr;
299 int need_update = true; 302 int need_update = true;
300 303
301 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 304 write_lock(&fi->ext_lock);
302
303 /* Update the page address in the parent node */
304 __set_data_blkaddr(dn);
305
306 if (is_inode_flag_set(fi, FI_NO_EXTENT))
307 return;
308
309 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
310 dn->ofs_in_node;
311
312 write_lock(&fi->ext.ext_lock);
313 305
314 start_fofs = fi->ext.fofs; 306 start_fofs = fi->ext.fofs;
315 end_fofs = fi->ext.fofs + fi->ext.len - 1; 307 end_fofs = fi->ext.fofs + fi->ext.len - 1;
316 start_blkaddr = fi->ext.blk_addr; 308 start_blkaddr = fi->ext.blk;
317 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; 309 end_blkaddr = fi->ext.blk + fi->ext.len - 1;
318 310
319 /* Drop and initialize the matched extent */ 311 /* Drop and initialize the matched extent */
320 if (fi->ext.len == 1 && fofs == start_fofs) 312 if (fi->ext.len == 1 && fofs == start_fofs)
@@ -322,24 +314,24 @@ void update_extent_cache(struct dnode_of_data *dn)
322 314
323 /* Initial extent */ 315 /* Initial extent */
324 if (fi->ext.len == 0) { 316 if (fi->ext.len == 0) {
325 if (dn->data_blkaddr != NULL_ADDR) { 317 if (blkaddr != NULL_ADDR) {
326 fi->ext.fofs = fofs; 318 fi->ext.fofs = fofs;
327 fi->ext.blk_addr = dn->data_blkaddr; 319 fi->ext.blk = blkaddr;
328 fi->ext.len = 1; 320 fi->ext.len = 1;
329 } 321 }
330 goto end_update; 322 goto end_update;
331 } 323 }
332 324
333 /* Front merge */ 325 /* Front merge */
334 if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { 326 if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
335 fi->ext.fofs--; 327 fi->ext.fofs--;
336 fi->ext.blk_addr--; 328 fi->ext.blk--;
337 fi->ext.len++; 329 fi->ext.len++;
338 goto end_update; 330 goto end_update;
339 } 331 }
340 332
341 /* Back merge */ 333 /* Back merge */
342 if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { 334 if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
343 fi->ext.len++; 335 fi->ext.len++;
344 goto end_update; 336 goto end_update;
345 } 337 }
@@ -351,8 +343,7 @@ void update_extent_cache(struct dnode_of_data *dn)
351 fi->ext.len = fofs - start_fofs; 343 fi->ext.len = fofs - start_fofs;
352 } else { 344 } else {
353 fi->ext.fofs = fofs + 1; 345 fi->ext.fofs = fofs + 1;
354 fi->ext.blk_addr = start_blkaddr + 346 fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
355 fofs - start_fofs + 1;
356 fi->ext.len -= fofs - start_fofs + 1; 347 fi->ext.len -= fofs - start_fofs + 1;
357 } 348 }
358 } else { 349 } else {
@@ -366,27 +357,583 @@ void update_extent_cache(struct dnode_of_data *dn)
366 need_update = true; 357 need_update = true;
367 } 358 }
368end_update: 359end_update:
369 write_unlock(&fi->ext.ext_lock); 360 write_unlock(&fi->ext_lock);
370 if (need_update) 361 return need_update;
371 sync_inode_page(dn); 362}
363
364static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
365 struct extent_tree *et, struct extent_info *ei,
366 struct rb_node *parent, struct rb_node **p)
367{
368 struct extent_node *en;
369
370 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
371 if (!en)
372 return NULL;
373
374 en->ei = *ei;
375 INIT_LIST_HEAD(&en->list);
376
377 rb_link_node(&en->rb_node, parent, p);
378 rb_insert_color(&en->rb_node, &et->root);
379 et->count++;
380 atomic_inc(&sbi->total_ext_node);
381 return en;
382}
383
384static void __detach_extent_node(struct f2fs_sb_info *sbi,
385 struct extent_tree *et, struct extent_node *en)
386{
387 rb_erase(&en->rb_node, &et->root);
388 et->count--;
389 atomic_dec(&sbi->total_ext_node);
390
391 if (et->cached_en == en)
392 et->cached_en = NULL;
393}
394
395static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
396 nid_t ino)
397{
398 struct extent_tree *et;
399
400 down_read(&sbi->extent_tree_lock);
401 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
402 if (!et) {
403 up_read(&sbi->extent_tree_lock);
404 return NULL;
405 }
406 atomic_inc(&et->refcount);
407 up_read(&sbi->extent_tree_lock);
408
409 return et;
410}
411
412static struct extent_tree *__grab_extent_tree(struct inode *inode)
413{
414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
415 struct extent_tree *et;
416 nid_t ino = inode->i_ino;
417
418 down_write(&sbi->extent_tree_lock);
419 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
420 if (!et) {
421 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
422 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
423 memset(et, 0, sizeof(struct extent_tree));
424 et->ino = ino;
425 et->root = RB_ROOT;
426 et->cached_en = NULL;
427 rwlock_init(&et->lock);
428 atomic_set(&et->refcount, 0);
429 et->count = 0;
430 sbi->total_ext_tree++;
431 }
432 atomic_inc(&et->refcount);
433 up_write(&sbi->extent_tree_lock);
434
435 return et;
436}
437
438static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
439 unsigned int fofs)
440{
441 struct rb_node *node = et->root.rb_node;
442 struct extent_node *en;
443
444 if (et->cached_en) {
445 struct extent_info *cei = &et->cached_en->ei;
446
447 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
448 return et->cached_en;
449 }
450
451 while (node) {
452 en = rb_entry(node, struct extent_node, rb_node);
453
454 if (fofs < en->ei.fofs) {
455 node = node->rb_left;
456 } else if (fofs >= en->ei.fofs + en->ei.len) {
457 node = node->rb_right;
458 } else {
459 et->cached_en = en;
460 return en;
461 }
462 }
463 return NULL;
464}
465
466static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
467 struct extent_tree *et, struct extent_node *en)
468{
469 struct extent_node *prev;
470 struct rb_node *node;
471
472 node = rb_prev(&en->rb_node);
473 if (!node)
474 return NULL;
475
476 prev = rb_entry(node, struct extent_node, rb_node);
477 if (__is_back_mergeable(&en->ei, &prev->ei)) {
478 en->ei.fofs = prev->ei.fofs;
479 en->ei.blk = prev->ei.blk;
480 en->ei.len += prev->ei.len;
481 __detach_extent_node(sbi, et, prev);
482 return prev;
483 }
484 return NULL;
485}
486
487static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
488 struct extent_tree *et, struct extent_node *en)
489{
490 struct extent_node *next;
491 struct rb_node *node;
492
493 node = rb_next(&en->rb_node);
494 if (!node)
495 return NULL;
496
497 next = rb_entry(node, struct extent_node, rb_node);
498 if (__is_front_mergeable(&en->ei, &next->ei)) {
499 en->ei.len += next->ei.len;
500 __detach_extent_node(sbi, et, next);
501 return next;
502 }
503 return NULL;
504}
505
506static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
507 struct extent_tree *et, struct extent_info *ei,
508 struct extent_node **den)
509{
510 struct rb_node **p = &et->root.rb_node;
511 struct rb_node *parent = NULL;
512 struct extent_node *en;
513
514 while (*p) {
515 parent = *p;
516 en = rb_entry(parent, struct extent_node, rb_node);
517
518 if (ei->fofs < en->ei.fofs) {
519 if (__is_front_mergeable(ei, &en->ei)) {
520 f2fs_bug_on(sbi, !den);
521 en->ei.fofs = ei->fofs;
522 en->ei.blk = ei->blk;
523 en->ei.len += ei->len;
524 *den = __try_back_merge(sbi, et, en);
525 return en;
526 }
527 p = &(*p)->rb_left;
528 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
529 if (__is_back_mergeable(ei, &en->ei)) {
530 f2fs_bug_on(sbi, !den);
531 en->ei.len += ei->len;
532 *den = __try_front_merge(sbi, et, en);
533 return en;
534 }
535 p = &(*p)->rb_right;
536 } else {
537 f2fs_bug_on(sbi, 1);
538 }
539 }
540
541 return __attach_extent_node(sbi, et, ei, parent, p);
542}
543
544static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
545 struct extent_tree *et, bool free_all)
546{
547 struct rb_node *node, *next;
548 struct extent_node *en;
549 unsigned int count = et->count;
550
551 node = rb_first(&et->root);
552 while (node) {
553 next = rb_next(node);
554 en = rb_entry(node, struct extent_node, rb_node);
555
556 if (free_all) {
557 spin_lock(&sbi->extent_lock);
558 if (!list_empty(&en->list))
559 list_del_init(&en->list);
560 spin_unlock(&sbi->extent_lock);
561 }
562
563 if (free_all || list_empty(&en->list)) {
564 __detach_extent_node(sbi, et, en);
565 kmem_cache_free(extent_node_slab, en);
566 }
567 node = next;
568 }
569
570 return count - et->count;
571}
572
573static void f2fs_init_extent_tree(struct inode *inode,
574 struct f2fs_extent *i_ext)
575{
576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
577 struct extent_tree *et;
578 struct extent_node *en;
579 struct extent_info ei;
580
581 if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
582 return;
583
584 et = __grab_extent_tree(inode);
585
586 write_lock(&et->lock);
587 if (et->count)
588 goto out;
589
590 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
591 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
592
593 en = __insert_extent_tree(sbi, et, &ei, NULL);
594 if (en) {
595 et->cached_en = en;
596
597 spin_lock(&sbi->extent_lock);
598 list_add_tail(&en->list, &sbi->extent_list);
599 spin_unlock(&sbi->extent_lock);
600 }
601out:
602 write_unlock(&et->lock);
603 atomic_dec(&et->refcount);
604}
605
606static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
607 struct extent_info *ei)
608{
609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
610 struct extent_tree *et;
611 struct extent_node *en;
612
613 trace_f2fs_lookup_extent_tree_start(inode, pgofs);
614
615 et = __find_extent_tree(sbi, inode->i_ino);
616 if (!et)
617 return false;
618
619 read_lock(&et->lock);
620 en = __lookup_extent_tree(et, pgofs);
621 if (en) {
622 *ei = en->ei;
623 spin_lock(&sbi->extent_lock);
624 if (!list_empty(&en->list))
625 list_move_tail(&en->list, &sbi->extent_list);
626 spin_unlock(&sbi->extent_lock);
627 stat_inc_read_hit(sbi->sb);
628 }
629 stat_inc_total_hit(sbi->sb);
630 read_unlock(&et->lock);
631
632 trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
633
634 atomic_dec(&et->refcount);
635 return en ? true : false;
636}
637
638static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
639 block_t blkaddr)
640{
641 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
642 struct extent_tree *et;
643 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
644 struct extent_node *den = NULL;
645 struct extent_info ei, dei;
646 unsigned int endofs;
647
648 trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
649
650 et = __grab_extent_tree(inode);
651
652 write_lock(&et->lock);
653
654 /* 1. lookup and remove existing extent info in cache */
655 en = __lookup_extent_tree(et, fofs);
656 if (!en)
657 goto update_extent;
658
659 dei = en->ei;
660 __detach_extent_node(sbi, et, en);
661
662 /* 2. if extent can be split more, split and insert the left part */
663 if (dei.len > 1) {
664 /* insert left part of split extent into cache */
665 if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
666 set_extent_info(&ei, dei.fofs, dei.blk,
667 fofs - dei.fofs);
668 en1 = __insert_extent_tree(sbi, et, &ei, NULL);
669 }
670
671 /* insert right part of split extent into cache */
672 endofs = dei.fofs + dei.len - 1;
673 if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
674 set_extent_info(&ei, fofs + 1,
675 fofs - dei.fofs + dei.blk, endofs - fofs);
676 en2 = __insert_extent_tree(sbi, et, &ei, NULL);
677 }
678 }
679
680update_extent:
681 /* 3. update extent in extent cache */
682 if (blkaddr) {
683 set_extent_info(&ei, fofs, blkaddr, 1);
684 en3 = __insert_extent_tree(sbi, et, &ei, &den);
685 }
686
687 /* 4. update in global extent list */
688 spin_lock(&sbi->extent_lock);
689 if (en && !list_empty(&en->list))
690 list_del(&en->list);
691 /*
692 * en1 and en2 split from en, they will become more and more smaller
693 * fragments after splitting several times. So if the length is smaller
694 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
695 */
696 if (en1)
697 list_add_tail(&en1->list, &sbi->extent_list);
698 if (en2)
699 list_add_tail(&en2->list, &sbi->extent_list);
700 if (en3) {
701 if (list_empty(&en3->list))
702 list_add_tail(&en3->list, &sbi->extent_list);
703 else
704 list_move_tail(&en3->list, &sbi->extent_list);
705 }
706 if (den && !list_empty(&den->list))
707 list_del(&den->list);
708 spin_unlock(&sbi->extent_lock);
709
710 /* 5. release extent node */
711 if (en)
712 kmem_cache_free(extent_node_slab, en);
713 if (den)
714 kmem_cache_free(extent_node_slab, den);
715
716 write_unlock(&et->lock);
717 atomic_dec(&et->refcount);
718}
719
720void f2fs_preserve_extent_tree(struct inode *inode)
721{
722 struct extent_tree *et;
723 struct extent_info *ext = &F2FS_I(inode)->ext;
724 bool sync = false;
725
726 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
727 return;
728
729 et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
730 if (!et) {
731 if (ext->len) {
732 ext->len = 0;
733 update_inode_page(inode);
734 }
735 return;
736 }
737
738 read_lock(&et->lock);
739 if (et->count) {
740 struct extent_node *en;
741
742 if (et->cached_en) {
743 en = et->cached_en;
744 } else {
745 struct rb_node *node = rb_first(&et->root);
746
747 if (!node)
748 node = rb_last(&et->root);
749 en = rb_entry(node, struct extent_node, rb_node);
750 }
751
752 if (__is_extent_same(ext, &en->ei))
753 goto out;
754
755 *ext = en->ei;
756 sync = true;
757 } else if (ext->len) {
758 ext->len = 0;
759 sync = true;
760 }
761out:
762 read_unlock(&et->lock);
763 atomic_dec(&et->refcount);
764
765 if (sync)
766 update_inode_page(inode);
767}
768
769void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
770{
771 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
772 struct extent_node *en, *tmp;
773 unsigned long ino = F2FS_ROOT_INO(sbi);
774 struct radix_tree_iter iter;
775 void **slot;
776 unsigned int found;
777 unsigned int node_cnt = 0, tree_cnt = 0;
778
779 if (!test_opt(sbi, EXTENT_CACHE))
780 return;
781
782 if (available_free_memory(sbi, EXTENT_CACHE))
783 return;
784
785 spin_lock(&sbi->extent_lock);
786 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
787 if (!nr_shrink--)
788 break;
789 list_del_init(&en->list);
790 }
791 spin_unlock(&sbi->extent_lock);
792
793 down_read(&sbi->extent_tree_lock);
794 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
795 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
796 unsigned i;
797
798 ino = treevec[found - 1]->ino + 1;
799 for (i = 0; i < found; i++) {
800 struct extent_tree *et = treevec[i];
801
802 atomic_inc(&et->refcount);
803 write_lock(&et->lock);
804 node_cnt += __free_extent_tree(sbi, et, false);
805 write_unlock(&et->lock);
806 atomic_dec(&et->refcount);
807 }
808 }
809 up_read(&sbi->extent_tree_lock);
810
811 down_write(&sbi->extent_tree_lock);
812 radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
813 F2FS_ROOT_INO(sbi)) {
814 struct extent_tree *et = (struct extent_tree *)*slot;
815
816 if (!atomic_read(&et->refcount) && !et->count) {
817 radix_tree_delete(&sbi->extent_tree_root, et->ino);
818 kmem_cache_free(extent_tree_slab, et);
819 sbi->total_ext_tree--;
820 tree_cnt++;
821 }
822 }
823 up_write(&sbi->extent_tree_lock);
824
825 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
826}
827
828void f2fs_destroy_extent_tree(struct inode *inode)
829{
830 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
831 struct extent_tree *et;
832 unsigned int node_cnt = 0;
833
834 if (!test_opt(sbi, EXTENT_CACHE))
835 return;
836
837 et = __find_extent_tree(sbi, inode->i_ino);
838 if (!et)
839 goto out;
840
841 /* free all extent info belong to this extent tree */
842 write_lock(&et->lock);
843 node_cnt = __free_extent_tree(sbi, et, true);
844 write_unlock(&et->lock);
845
846 atomic_dec(&et->refcount);
847
848 /* try to find and delete extent tree entry in radix tree */
849 down_write(&sbi->extent_tree_lock);
850 et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
851 if (!et) {
852 up_write(&sbi->extent_tree_lock);
853 goto out;
854 }
855 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
856 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
857 kmem_cache_free(extent_tree_slab, et);
858 sbi->total_ext_tree--;
859 up_write(&sbi->extent_tree_lock);
860out:
861 trace_f2fs_destroy_extent_tree(inode, node_cnt);
372 return; 862 return;
373} 863}
374 864
865void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
866{
867 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
868 f2fs_init_extent_tree(inode, i_ext);
869
870 write_lock(&F2FS_I(inode)->ext_lock);
871 get_extent_info(&F2FS_I(inode)->ext, *i_ext);
872 write_unlock(&F2FS_I(inode)->ext_lock);
873}
874
875static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
876 struct extent_info *ei)
877{
878 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
879 return false;
880
881 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
882 return f2fs_lookup_extent_tree(inode, pgofs, ei);
883
884 return lookup_extent_info(inode, pgofs, ei);
885}
886
887void f2fs_update_extent_cache(struct dnode_of_data *dn)
888{
889 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
890 pgoff_t fofs;
891
892 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
893
894 if (is_inode_flag_set(fi, FI_NO_EXTENT))
895 return;
896
897 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
898 dn->ofs_in_node;
899
900 if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
901 return f2fs_update_extent_tree(dn->inode, fofs,
902 dn->data_blkaddr);
903
904 if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
905 sync_inode_page(dn);
906}
907
375struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 908struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
376{ 909{
377 struct address_space *mapping = inode->i_mapping; 910 struct address_space *mapping = inode->i_mapping;
378 struct dnode_of_data dn; 911 struct dnode_of_data dn;
379 struct page *page; 912 struct page *page;
913 struct extent_info ei;
380 int err; 914 int err;
381 struct f2fs_io_info fio = { 915 struct f2fs_io_info fio = {
382 .type = DATA, 916 .type = DATA,
383 .rw = sync ? READ_SYNC : READA, 917 .rw = sync ? READ_SYNC : READA,
384 }; 918 };
385 919
920 /*
921 * If sync is false, it needs to check its block allocation.
922 * This is need and triggered by two flows:
923 * gc and truncate_partial_data_page.
924 */
925 if (!sync)
926 goto search;
927
386 page = find_get_page(mapping, index); 928 page = find_get_page(mapping, index);
387 if (page && PageUptodate(page)) 929 if (page && PageUptodate(page))
388 return page; 930 return page;
389 f2fs_put_page(page, 0); 931 f2fs_put_page(page, 0);
932search:
933 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
934 dn.data_blkaddr = ei.blk + index - ei.fofs;
935 goto got_it;
936 }
390 937
391 set_new_dnode(&dn, inode, NULL, NULL, 0); 938 set_new_dnode(&dn, inode, NULL, NULL, 0);
392 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 939 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
@@ -401,6 +948,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
401 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 948 if (unlikely(dn.data_blkaddr == NEW_ADDR))
402 return ERR_PTR(-EINVAL); 949 return ERR_PTR(-EINVAL);
403 950
951got_it:
404 page = grab_cache_page(mapping, index); 952 page = grab_cache_page(mapping, index);
405 if (!page) 953 if (!page)
406 return ERR_PTR(-ENOMEM); 954 return ERR_PTR(-ENOMEM);
@@ -435,6 +983,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
435 struct address_space *mapping = inode->i_mapping; 983 struct address_space *mapping = inode->i_mapping;
436 struct dnode_of_data dn; 984 struct dnode_of_data dn;
437 struct page *page; 985 struct page *page;
986 struct extent_info ei;
438 int err; 987 int err;
439 struct f2fs_io_info fio = { 988 struct f2fs_io_info fio = {
440 .type = DATA, 989 .type = DATA,
@@ -445,6 +994,11 @@ repeat:
445 if (!page) 994 if (!page)
446 return ERR_PTR(-ENOMEM); 995 return ERR_PTR(-ENOMEM);
447 996
997 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
998 dn.data_blkaddr = ei.blk + index - ei.fofs;
999 goto got_it;
1000 }
1001
448 set_new_dnode(&dn, inode, NULL, NULL, 0); 1002 set_new_dnode(&dn, inode, NULL, NULL, 0);
449 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1003 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
450 if (err) { 1004 if (err) {
@@ -458,6 +1012,7 @@ repeat:
458 return ERR_PTR(-ENOENT); 1012 return ERR_PTR(-ENOENT);
459 } 1013 }
460 1014
1015got_it:
461 if (PageUptodate(page)) 1016 if (PageUptodate(page))
462 return page; 1017 return page;
463 1018
@@ -569,19 +1124,26 @@ static int __allocate_data_block(struct dnode_of_data *dn)
569 1124
570 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1125 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
571 return -EPERM; 1126 return -EPERM;
1127
1128 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
1129 if (dn->data_blkaddr == NEW_ADDR)
1130 goto alloc;
1131
572 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 1132 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
573 return -ENOSPC; 1133 return -ENOSPC;
574 1134
1135alloc:
575 get_node_info(sbi, dn->nid, &ni); 1136 get_node_info(sbi, dn->nid, &ni);
576 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1137 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
577 1138
578 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) 1139 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
579 seg = CURSEG_DIRECT_IO; 1140 seg = CURSEG_DIRECT_IO;
580 1141
581 allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); 1142 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
1143 &sum, seg);
582 1144
583 /* direct IO doesn't use extent cache to maximize the performance */ 1145 /* direct IO doesn't use extent cache to maximize the performance */
584 __set_data_blkaddr(dn); 1146 set_data_blkaddr(dn);
585 1147
586 /* update i_size */ 1148 /* update i_size */
587 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 1149 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -615,7 +1177,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
615 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1177 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
616 1178
617 while (dn.ofs_in_node < end_offset && len) { 1179 while (dn.ofs_in_node < end_offset && len) {
618 if (dn.data_blkaddr == NULL_ADDR) { 1180 block_t blkaddr;
1181
1182 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
1183 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
619 if (__allocate_data_block(&dn)) 1184 if (__allocate_data_block(&dn))
620 goto sync_out; 1185 goto sync_out;
621 allocated = true; 1186 allocated = true;
@@ -659,13 +1224,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
659 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 1224 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
660 pgoff_t pgofs, end_offset; 1225 pgoff_t pgofs, end_offset;
661 int err = 0, ofs = 1; 1226 int err = 0, ofs = 1;
1227 struct extent_info ei;
662 bool allocated = false; 1228 bool allocated = false;
663 1229
664 /* Get the page offset from the block offset(iblock) */ 1230 /* Get the page offset from the block offset(iblock) */
665 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 1231 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
666 1232
667 if (check_extent_cache(inode, pgofs, bh_result)) 1233 if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1234 f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result);
668 goto out; 1235 goto out;
1236 }
669 1237
670 if (create) 1238 if (create)
671 f2fs_lock_op(F2FS_I_SB(inode)); 1239 f2fs_lock_op(F2FS_I_SB(inode));
@@ -682,7 +1250,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
682 goto put_out; 1250 goto put_out;
683 1251
684 if (dn.data_blkaddr != NULL_ADDR) { 1252 if (dn.data_blkaddr != NULL_ADDR) {
685 set_buffer_new(bh_result); 1253 clear_buffer_new(bh_result);
686 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 1254 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
687 } else if (create) { 1255 } else if (create) {
688 err = __allocate_data_block(&dn); 1256 err = __allocate_data_block(&dn);
@@ -727,6 +1295,7 @@ get_next:
727 if (err) 1295 if (err)
728 goto sync_out; 1296 goto sync_out;
729 allocated = true; 1297 allocated = true;
1298 set_buffer_new(bh_result);
730 blkaddr = dn.data_blkaddr; 1299 blkaddr = dn.data_blkaddr;
731 } 1300 }
732 /* Give more consecutive addresses for the readahead */ 1301 /* Give more consecutive addresses for the readahead */
@@ -813,8 +1382,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
813 fio->blk_addr = dn.data_blkaddr; 1382 fio->blk_addr = dn.data_blkaddr;
814 1383
815 /* This page is already truncated */ 1384 /* This page is already truncated */
816 if (fio->blk_addr == NULL_ADDR) 1385 if (fio->blk_addr == NULL_ADDR) {
1386 ClearPageUptodate(page);
817 goto out_writepage; 1387 goto out_writepage;
1388 }
818 1389
819 set_page_writeback(page); 1390 set_page_writeback(page);
820 1391
@@ -827,10 +1398,15 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
827 need_inplace_update(inode))) { 1398 need_inplace_update(inode))) {
828 rewrite_data_page(page, fio); 1399 rewrite_data_page(page, fio);
829 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1400 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1401 trace_f2fs_do_write_data_page(page, IPU);
830 } else { 1402 } else {
831 write_data_page(page, &dn, fio); 1403 write_data_page(page, &dn, fio);
832 update_extent_cache(&dn); 1404 set_data_blkaddr(&dn);
1405 f2fs_update_extent_cache(&dn);
1406 trace_f2fs_do_write_data_page(page, OPU);
833 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 1407 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1408 if (page->index == 0)
1409 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
834 } 1410 }
835out_writepage: 1411out_writepage:
836 f2fs_put_dnode(&dn); 1412 f2fs_put_dnode(&dn);
@@ -909,6 +1485,8 @@ done:
909 clear_cold_data(page); 1485 clear_cold_data(page);
910out: 1486out:
911 inode_dec_dirty_pages(inode); 1487 inode_dec_dirty_pages(inode);
1488 if (err)
1489 ClearPageUptodate(page);
912 unlock_page(page); 1490 unlock_page(page);
913 if (need_balance_fs) 1491 if (need_balance_fs)
914 f2fs_balance_fs(sbi); 1492 f2fs_balance_fs(sbi);
@@ -935,7 +1513,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
935{ 1513{
936 struct inode *inode = mapping->host; 1514 struct inode *inode = mapping->host;
937 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
938 bool locked = false;
939 int ret; 1516 int ret;
940 long diff; 1517 long diff;
941 1518
@@ -950,15 +1527,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
950 available_free_memory(sbi, DIRTY_DENTS)) 1527 available_free_memory(sbi, DIRTY_DENTS))
951 goto skip_write; 1528 goto skip_write;
952 1529
1530 /* during POR, we don't need to trigger writepage at all. */
1531 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1532 goto skip_write;
1533
953 diff = nr_pages_to_write(sbi, DATA, wbc); 1534 diff = nr_pages_to_write(sbi, DATA, wbc);
954 1535
955 if (!S_ISDIR(inode->i_mode)) {
956 mutex_lock(&sbi->writepages);
957 locked = true;
958 }
959 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1536 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
960 if (locked)
961 mutex_unlock(&sbi->writepages);
962 1537
963 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1538 f2fs_submit_merged_bio(sbi, DATA, WRITE);
964 1539
@@ -1118,12 +1693,12 @@ static int f2fs_write_end(struct file *file,
1118 return copied; 1693 return copied;
1119} 1694}
1120 1695
1121static int check_direct_IO(struct inode *inode, int rw, 1696static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
1122 struct iov_iter *iter, loff_t offset) 1697 loff_t offset)
1123{ 1698{
1124 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 1699 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1125 1700
1126 if (rw == READ) 1701 if (iov_iter_rw(iter) == READ)
1127 return 0; 1702 return 0;
1128 1703
1129 if (offset & blocksize_mask) 1704 if (offset & blocksize_mask)
@@ -1135,8 +1710,8 @@ static int check_direct_IO(struct inode *inode, int rw,
1135 return 0; 1710 return 0;
1136} 1711}
1137 1712
1138static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 1713static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1139 struct iov_iter *iter, loff_t offset) 1714 loff_t offset)
1140{ 1715{
1141 struct file *file = iocb->ki_filp; 1716 struct file *file = iocb->ki_filp;
1142 struct address_space *mapping = file->f_mapping; 1717 struct address_space *mapping = file->f_mapping;
@@ -1151,19 +1726,19 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1151 return err; 1726 return err;
1152 } 1727 }
1153 1728
1154 if (check_direct_IO(inode, rw, iter, offset)) 1729 if (check_direct_IO(inode, iter, offset))
1155 return 0; 1730 return 0;
1156 1731
1157 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1732 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1158 1733
1159 if (rw & WRITE) 1734 if (iov_iter_rw(iter) == WRITE)
1160 __allocate_data_blocks(inode, offset, count); 1735 __allocate_data_blocks(inode, offset, count);
1161 1736
1162 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); 1737 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
1163 if (err < 0 && (rw & WRITE)) 1738 if (err < 0 && iov_iter_rw(iter) == WRITE)
1164 f2fs_write_failed(mapping, offset + count); 1739 f2fs_write_failed(mapping, offset + count);
1165 1740
1166 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); 1741 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1167 1742
1168 return err; 1743 return err;
1169} 1744}
@@ -1236,6 +1811,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1236 return generic_block_bmap(mapping, block, get_data_block); 1811 return generic_block_bmap(mapping, block, get_data_block);
1237} 1812}
1238 1813
1814void init_extent_cache_info(struct f2fs_sb_info *sbi)
1815{
1816 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
1817 init_rwsem(&sbi->extent_tree_lock);
1818 INIT_LIST_HEAD(&sbi->extent_list);
1819 spin_lock_init(&sbi->extent_lock);
1820 sbi->total_ext_tree = 0;
1821 atomic_set(&sbi->total_ext_node, 0);
1822}
1823
1824int __init create_extent_cache(void)
1825{
1826 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1827 sizeof(struct extent_tree));
1828 if (!extent_tree_slab)
1829 return -ENOMEM;
1830 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1831 sizeof(struct extent_node));
1832 if (!extent_node_slab) {
1833 kmem_cache_destroy(extent_tree_slab);
1834 return -ENOMEM;
1835 }
1836 return 0;
1837}
1838
1839void destroy_extent_cache(void)
1840{
1841 kmem_cache_destroy(extent_node_slab);
1842 kmem_cache_destroy(extent_tree_slab);
1843}
1844
1239const struct address_space_operations f2fs_dblock_aops = { 1845const struct address_space_operations f2fs_dblock_aops = {
1240 .readpage = f2fs_read_data_page, 1846 .readpage = f2fs_read_data_page,
1241 .readpages = f2fs_read_data_pages, 1847 .readpages = f2fs_read_data_pages,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index e671373cc8ab..f5388f37217e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
35 /* validation check of the segment numbers */ 35 /* validation check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext; 36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext; 37 si->total_ext = sbi->total_hit_ext;
38 si->ext_tree = sbi->total_ext_tree;
39 si->ext_node = atomic_read(&sbi->total_ext_node);
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 40 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
39 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); 41 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
40 si->ndirty_dirs = sbi->n_dirty_dirs; 42 si->ndirty_dirs = sbi->n_dirty_dirs;
@@ -185,6 +187,9 @@ get_cache:
185 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); 187 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
186 for (i = 0; i <= UPDATE_INO; i++) 188 for (i = 0; i <= UPDATE_INO; i++)
187 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 189 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
190 si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
191 si->cache_mem += atomic_read(&sbi->total_ext_node) *
192 sizeof(struct extent_node);
188 193
189 si->page_mem = 0; 194 si->page_mem = 0;
190 npages = NODE_MAPPING(sbi)->nrpages; 195 npages = NODE_MAPPING(sbi)->nrpages;
@@ -260,13 +265,20 @@ static int stat_show(struct seq_file *s, void *v)
260 seq_printf(s, "CP calls: %d\n", si->cp_count); 265 seq_printf(s, "CP calls: %d\n", si->cp_count);
261 seq_printf(s, "GC calls: %d (BG: %d)\n", 266 seq_printf(s, "GC calls: %d (BG: %d)\n",
262 si->call_count, si->bg_gc); 267 si->call_count, si->bg_gc);
263 seq_printf(s, " - data segments : %d\n", si->data_segs); 268 seq_printf(s, " - data segments : %d (%d)\n",
264 seq_printf(s, " - node segments : %d\n", si->node_segs); 269 si->data_segs, si->bg_data_segs);
265 seq_printf(s, "Try to move %d blocks\n", si->tot_blks); 270 seq_printf(s, " - node segments : %d (%d)\n",
266 seq_printf(s, " - data blocks : %d\n", si->data_blks); 271 si->node_segs, si->bg_node_segs);
267 seq_printf(s, " - node blocks : %d\n", si->node_blks); 272 seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
273 si->bg_data_blks + si->bg_node_blks);
274 seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks,
275 si->bg_data_blks);
276 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
277 si->bg_node_blks);
268 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 278 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
269 si->hit_ext, si->total_ext); 279 si->hit_ext, si->total_ext);
280 seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree);
281 seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node);
270 seq_puts(s, "\nBalancing F2FS Async:\n"); 282 seq_puts(s, "\nBalancing F2FS Async:\n");
271 seq_printf(s, " - inmem: %4d, wb: %4d\n", 283 seq_printf(s, " - inmem: %4d, wb: %4d\n",
272 si->inmem_pages, si->wb_pages); 284 si->inmem_pages, si->wb_pages);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b74097a7f6d9..3a3302ab7871 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -59,9 +59,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
59 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, 59 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
60}; 60};
61 61
62void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) 62void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
63{ 63{
64 umode_t mode = inode->i_mode;
65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
66} 65}
67 66
@@ -127,22 +126,19 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
127 *max_slots = 0; 126 *max_slots = 0;
128 while (bit_pos < d->max) { 127 while (bit_pos < d->max) {
129 if (!test_bit_le(bit_pos, d->bitmap)) { 128 if (!test_bit_le(bit_pos, d->bitmap)) {
130 if (bit_pos == 0)
131 max_len = 1;
132 else if (!test_bit_le(bit_pos - 1, d->bitmap))
133 max_len++;
134 bit_pos++; 129 bit_pos++;
130 max_len++;
135 continue; 131 continue;
136 } 132 }
133
137 de = &d->dentry[bit_pos]; 134 de = &d->dentry[bit_pos];
138 if (early_match_name(name->len, namehash, de) && 135 if (early_match_name(name->len, namehash, de) &&
139 !memcmp(d->filename[bit_pos], name->name, name->len)) 136 !memcmp(d->filename[bit_pos], name->name, name->len))
140 goto found; 137 goto found;
141 138
142 if (max_slots && *max_slots >= 0 && max_len > *max_slots) { 139 if (max_slots && max_len > *max_slots)
143 *max_slots = max_len; 140 *max_slots = max_len;
144 max_len = 0; 141 max_len = 0;
145 }
146 142
147 /* remain bug on condition */ 143 /* remain bug on condition */
148 if (unlikely(!de->name_len)) 144 if (unlikely(!de->name_len))
@@ -219,14 +215,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
219 unsigned int max_depth; 215 unsigned int max_depth;
220 unsigned int level; 216 unsigned int level;
221 217
218 *res_page = NULL;
219
222 if (f2fs_has_inline_dentry(dir)) 220 if (f2fs_has_inline_dentry(dir))
223 return find_in_inline_dir(dir, child, res_page); 221 return find_in_inline_dir(dir, child, res_page);
224 222
225 if (npages == 0) 223 if (npages == 0)
226 return NULL; 224 return NULL;
227 225
228 *res_page = NULL;
229
230 name_hash = f2fs_dentry_hash(child); 226 name_hash = f2fs_dentry_hash(child);
231 max_depth = F2FS_I(dir)->i_current_depth; 227 max_depth = F2FS_I(dir)->i_current_depth;
232 228
@@ -285,7 +281,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
285 lock_page(page); 281 lock_page(page);
286 f2fs_wait_on_page_writeback(page, type); 282 f2fs_wait_on_page_writeback(page, type);
287 de->ino = cpu_to_le32(inode->i_ino); 283 de->ino = cpu_to_le32(inode->i_ino);
288 set_de_type(de, inode); 284 set_de_type(de, inode->i_mode);
289 f2fs_dentry_kunmap(dir, page); 285 f2fs_dentry_kunmap(dir, page);
290 set_page_dirty(page); 286 set_page_dirty(page);
291 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 287 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
@@ -331,14 +327,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent,
331 de->hash_code = 0; 327 de->hash_code = 0;
332 de->ino = cpu_to_le32(inode->i_ino); 328 de->ino = cpu_to_le32(inode->i_ino);
333 memcpy(d->filename[0], ".", 1); 329 memcpy(d->filename[0], ".", 1);
334 set_de_type(de, inode); 330 set_de_type(de, inode->i_mode);
335 331
336 de = &d->dentry[1]; 332 de = &d->dentry[1];
337 de->hash_code = 0; 333 de->hash_code = 0;
338 de->name_len = cpu_to_le16(2); 334 de->name_len = cpu_to_le16(2);
339 de->ino = cpu_to_le32(parent->i_ino); 335 de->ino = cpu_to_le32(parent->i_ino);
340 memcpy(d->filename[1], "..", 2); 336 memcpy(d->filename[1], "..", 2);
341 set_de_type(de, inode); 337 set_de_type(de, parent->i_mode);
342 338
343 test_and_set_bit_le(0, (void *)d->bitmap); 339 test_and_set_bit_le(0, (void *)d->bitmap);
344 test_and_set_bit_le(1, (void *)d->bitmap); 340 test_and_set_bit_le(1, (void *)d->bitmap);
@@ -435,7 +431,7 @@ error:
435void update_parent_metadata(struct inode *dir, struct inode *inode, 431void update_parent_metadata(struct inode *dir, struct inode *inode,
436 unsigned int current_depth) 432 unsigned int current_depth)
437{ 433{
438 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 434 if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
439 if (S_ISDIR(inode->i_mode)) { 435 if (S_ISDIR(inode->i_mode)) {
440 inc_nlink(dir); 436 inc_nlink(dir);
441 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 437 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -450,7 +446,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
450 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 446 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
451 } 447 }
452 448
453 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 449 if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
454 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 450 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
455} 451}
456 452
@@ -474,30 +470,47 @@ next:
474 goto next; 470 goto next;
475} 471}
476 472
473void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
474 const struct qstr *name, f2fs_hash_t name_hash,
475 unsigned int bit_pos)
476{
477 struct f2fs_dir_entry *de;
478 int slots = GET_DENTRY_SLOTS(name->len);
479 int i;
480
481 de = &d->dentry[bit_pos];
482 de->hash_code = name_hash;
483 de->name_len = cpu_to_le16(name->len);
484 memcpy(d->filename[bit_pos], name->name, name->len);
485 de->ino = cpu_to_le32(ino);
486 set_de_type(de, mode);
487 for (i = 0; i < slots; i++)
488 test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
489}
490
477/* 491/*
478 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 492 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
479 * f2fs_unlock_op(). 493 * f2fs_unlock_op().
480 */ 494 */
481int __f2fs_add_link(struct inode *dir, const struct qstr *name, 495int __f2fs_add_link(struct inode *dir, const struct qstr *name,
482 struct inode *inode) 496 struct inode *inode, nid_t ino, umode_t mode)
483{ 497{
484 unsigned int bit_pos; 498 unsigned int bit_pos;
485 unsigned int level; 499 unsigned int level;
486 unsigned int current_depth; 500 unsigned int current_depth;
487 unsigned long bidx, block; 501 unsigned long bidx, block;
488 f2fs_hash_t dentry_hash; 502 f2fs_hash_t dentry_hash;
489 struct f2fs_dir_entry *de;
490 unsigned int nbucket, nblock; 503 unsigned int nbucket, nblock;
491 size_t namelen = name->len; 504 size_t namelen = name->len;
492 struct page *dentry_page = NULL; 505 struct page *dentry_page = NULL;
493 struct f2fs_dentry_block *dentry_blk = NULL; 506 struct f2fs_dentry_block *dentry_blk = NULL;
507 struct f2fs_dentry_ptr d;
494 int slots = GET_DENTRY_SLOTS(namelen); 508 int slots = GET_DENTRY_SLOTS(namelen);
495 struct page *page; 509 struct page *page = NULL;
496 int err = 0; 510 int err = 0;
497 int i;
498 511
499 if (f2fs_has_inline_dentry(dir)) { 512 if (f2fs_has_inline_dentry(dir)) {
500 err = f2fs_add_inline_entry(dir, name, inode); 513 err = f2fs_add_inline_entry(dir, name, inode, ino, mode);
501 if (!err || err != -EAGAIN) 514 if (!err || err != -EAGAIN)
502 return err; 515 return err;
503 else 516 else
@@ -547,30 +560,31 @@ start:
547add_dentry: 560add_dentry:
548 f2fs_wait_on_page_writeback(dentry_page, DATA); 561 f2fs_wait_on_page_writeback(dentry_page, DATA);
549 562
550 down_write(&F2FS_I(inode)->i_sem); 563 if (inode) {
551 page = init_inode_metadata(inode, dir, name, NULL); 564 down_write(&F2FS_I(inode)->i_sem);
552 if (IS_ERR(page)) { 565 page = init_inode_metadata(inode, dir, name, NULL);
553 err = PTR_ERR(page); 566 if (IS_ERR(page)) {
554 goto fail; 567 err = PTR_ERR(page);
568 goto fail;
569 }
555 } 570 }
556 de = &dentry_blk->dentry[bit_pos]; 571
557 de->hash_code = dentry_hash; 572 make_dentry_ptr(&d, (void *)dentry_blk, 1);
558 de->name_len = cpu_to_le16(namelen); 573 f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos);
559 memcpy(dentry_blk->filename[bit_pos], name->name, name->len); 574
560 de->ino = cpu_to_le32(inode->i_ino);
561 set_de_type(de, inode);
562 for (i = 0; i < slots; i++)
563 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
564 set_page_dirty(dentry_page); 575 set_page_dirty(dentry_page);
565 576
566 /* we don't need to mark_inode_dirty now */ 577 if (inode) {
567 F2FS_I(inode)->i_pino = dir->i_ino; 578 /* we don't need to mark_inode_dirty now */
568 update_inode(inode, page); 579 F2FS_I(inode)->i_pino = dir->i_ino;
569 f2fs_put_page(page, 1); 580 update_inode(inode, page);
581 f2fs_put_page(page, 1);
582 }
570 583
571 update_parent_metadata(dir, inode, current_depth); 584 update_parent_metadata(dir, inode, current_depth);
572fail: 585fail:
573 up_write(&F2FS_I(inode)->i_sem); 586 if (inode)
587 up_write(&F2FS_I(inode)->i_sem);
574 588
575 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { 589 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
576 update_inode_page(dir); 590 update_inode_page(dir);
@@ -669,6 +683,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
669 if (bit_pos == NR_DENTRY_IN_BLOCK) { 683 if (bit_pos == NR_DENTRY_IN_BLOCK) {
670 truncate_hole(dir, page->index, page->index + 1); 684 truncate_hole(dir, page->index, page->index + 1);
671 clear_page_dirty_for_io(page); 685 clear_page_dirty_for_io(page);
686 ClearPagePrivate(page);
672 ClearPageUptodate(page); 687 ClearPageUptodate(page);
673 inode_dec_dirty_pages(dir); 688 inode_dec_dirty_pages(dir);
674 } 689 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7fa3313ab0e2..c06a25e5cec3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -50,6 +50,7 @@
50#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 50#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
51#define F2FS_MOUNT_NOBARRIER 0x00000800 51#define F2FS_MOUNT_NOBARRIER 0x00000800
52#define F2FS_MOUNT_FASTBOOT 0x00001000 52#define F2FS_MOUNT_FASTBOOT 0x00001000
53#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
53 54
54#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 55#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
55#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 56#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -102,6 +103,7 @@ enum {
102 CP_UMOUNT, 103 CP_UMOUNT,
103 CP_FASTBOOT, 104 CP_FASTBOOT,
104 CP_SYNC, 105 CP_SYNC,
106 CP_RECOVERY,
105 CP_DISCARD, 107 CP_DISCARD,
106}; 108};
107 109
@@ -216,6 +218,15 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
216#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 218#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
217#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 219#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
218 220
221/*
222 * should be same as XFS_IOC_GOINGDOWN.
223 * Flags for going down operation used by FS_IOC_GOINGDOWN
224 */
225#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */
226#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
227#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
228#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
229
219#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 230#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
220/* 231/*
221 * ioctl commands in 32 bit emulation 232 * ioctl commands in 32 bit emulation
@@ -273,14 +284,34 @@ enum {
273 284
274#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ 285#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
275 286
287/* vector size for gang look-up from extent cache that consists of radix tree */
288#define EXT_TREE_VEC_SIZE 64
289
276/* for in-memory extent cache entry */ 290/* for in-memory extent cache entry */
277#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ 291#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
292
293/* number of extent info in extent cache we try to shrink */
294#define EXTENT_CACHE_SHRINK_NUMBER 128
278 295
279struct extent_info { 296struct extent_info {
280 rwlock_t ext_lock; /* rwlock for consistency */ 297 unsigned int fofs; /* start offset in a file */
281 unsigned int fofs; /* start offset in a file */ 298 u32 blk; /* start block address of the extent */
282 u32 blk_addr; /* start block address of the extent */ 299 unsigned int len; /* length of the extent */
283 unsigned int len; /* length of the extent */ 300};
301
302struct extent_node {
303 struct rb_node rb_node; /* rb node located in rb-tree */
304 struct list_head list; /* node in global extent list of sbi */
305 struct extent_info ei; /* extent info */
306};
307
308struct extent_tree {
309 nid_t ino; /* inode number */
310 struct rb_root root; /* root of extent info rb-tree */
311 struct extent_node *cached_en; /* recently accessed extent node */
312 rwlock_t lock; /* protect extent info rb-tree */
313 atomic_t refcount; /* reference count of rb-tree */
314 unsigned int count; /* # of extent node in rb-tree*/
284}; 315};
285 316
286/* 317/*
@@ -309,6 +340,7 @@ struct f2fs_inode_info {
309 nid_t i_xattr_nid; /* node id that contains xattrs */ 340 nid_t i_xattr_nid; /* node id that contains xattrs */
310 unsigned long long xattr_ver; /* cp version of xattr modification */ 341 unsigned long long xattr_ver; /* cp version of xattr modification */
311 struct extent_info ext; /* in-memory extent cache entry */ 342 struct extent_info ext; /* in-memory extent cache entry */
343 rwlock_t ext_lock; /* rwlock for single extent cache */
312 struct inode_entry *dirty_dir; /* the pointer of dirty dir */ 344 struct inode_entry *dirty_dir; /* the pointer of dirty dir */
313 345
314 struct radix_tree_root inmem_root; /* radix tree for inmem pages */ 346 struct radix_tree_root inmem_root; /* radix tree for inmem pages */
@@ -319,21 +351,51 @@ struct f2fs_inode_info {
319static inline void get_extent_info(struct extent_info *ext, 351static inline void get_extent_info(struct extent_info *ext,
320 struct f2fs_extent i_ext) 352 struct f2fs_extent i_ext)
321{ 353{
322 write_lock(&ext->ext_lock);
323 ext->fofs = le32_to_cpu(i_ext.fofs); 354 ext->fofs = le32_to_cpu(i_ext.fofs);
324 ext->blk_addr = le32_to_cpu(i_ext.blk_addr); 355 ext->blk = le32_to_cpu(i_ext.blk);
325 ext->len = le32_to_cpu(i_ext.len); 356 ext->len = le32_to_cpu(i_ext.len);
326 write_unlock(&ext->ext_lock);
327} 357}
328 358
329static inline void set_raw_extent(struct extent_info *ext, 359static inline void set_raw_extent(struct extent_info *ext,
330 struct f2fs_extent *i_ext) 360 struct f2fs_extent *i_ext)
331{ 361{
332 read_lock(&ext->ext_lock);
333 i_ext->fofs = cpu_to_le32(ext->fofs); 362 i_ext->fofs = cpu_to_le32(ext->fofs);
334 i_ext->blk_addr = cpu_to_le32(ext->blk_addr); 363 i_ext->blk = cpu_to_le32(ext->blk);
335 i_ext->len = cpu_to_le32(ext->len); 364 i_ext->len = cpu_to_le32(ext->len);
336 read_unlock(&ext->ext_lock); 365}
366
367static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
368 u32 blk, unsigned int len)
369{
370 ei->fofs = fofs;
371 ei->blk = blk;
372 ei->len = len;
373}
374
375static inline bool __is_extent_same(struct extent_info *ei1,
376 struct extent_info *ei2)
377{
378 return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
379 ei1->len == ei2->len);
380}
381
382static inline bool __is_extent_mergeable(struct extent_info *back,
383 struct extent_info *front)
384{
385 return (back->fofs + back->len == front->fofs &&
386 back->blk + back->len == front->blk);
387}
388
389static inline bool __is_back_mergeable(struct extent_info *cur,
390 struct extent_info *back)
391{
392 return __is_extent_mergeable(back, cur);
393}
394
395static inline bool __is_front_mergeable(struct extent_info *cur,
396 struct extent_info *front)
397{
398 return __is_extent_mergeable(cur, front);
337} 399}
338 400
339struct f2fs_nm_info { 401struct f2fs_nm_info {
@@ -502,6 +564,10 @@ enum page_type {
502 META, 564 META,
503 NR_PAGE_TYPE, 565 NR_PAGE_TYPE,
504 META_FLUSH, 566 META_FLUSH,
567 INMEM, /* the below types are used by tracepoints only. */
568 INMEM_DROP,
569 IPU,
570 OPU,
505}; 571};
506 572
507struct f2fs_io_info { 573struct f2fs_io_info {
@@ -559,7 +625,6 @@ struct f2fs_sb_info {
559 struct mutex cp_mutex; /* checkpoint procedure lock */ 625 struct mutex cp_mutex; /* checkpoint procedure lock */
560 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 626 struct rw_semaphore cp_rwsem; /* blocking FS operations */
561 struct rw_semaphore node_write; /* locking node writes */ 627 struct rw_semaphore node_write; /* locking node writes */
562 struct mutex writepages; /* mutex for writepages() */
563 wait_queue_head_t cp_wait; 628 wait_queue_head_t cp_wait;
564 629
565 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 630 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@@ -571,6 +636,14 @@ struct f2fs_sb_info {
571 struct list_head dir_inode_list; /* dir inode list */ 636 struct list_head dir_inode_list; /* dir inode list */
572 spinlock_t dir_inode_lock; /* for dir inode list lock */ 637 spinlock_t dir_inode_lock; /* for dir inode list lock */
573 638
639 /* for extent tree cache */
640 struct radix_tree_root extent_tree_root;/* cache extent cache entries */
641 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
642 struct list_head extent_list; /* lru list for shrinker */
643 spinlock_t extent_lock; /* locking extent lru list */
644 int total_ext_tree; /* extent tree count */
645 atomic_t total_ext_node; /* extent info count */
646
574 /* basic filesystem units */ 647 /* basic filesystem units */
575 unsigned int log_sectors_per_block; /* log2 sectors per block */ 648 unsigned int log_sectors_per_block; /* log2 sectors per block */
576 unsigned int log_blocksize; /* log2 block size */ 649 unsigned int log_blocksize; /* log2 block size */
@@ -920,12 +993,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
920 return 0; 993 return 0;
921} 994}
922 995
996static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
997{
998 return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
999}
1000
923static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) 1001static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
924{ 1002{
925 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1003 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
926 int offset; 1004 int offset;
927 1005
928 if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { 1006 if (__cp_payload(sbi) > 0) {
929 if (flag == NAT_BITMAP) 1007 if (flag == NAT_BITMAP)
930 return &ckpt->sit_nat_version_bitmap; 1008 return &ckpt->sit_nat_version_bitmap;
931 else 1009 else
@@ -1166,8 +1244,10 @@ enum {
1166 FI_NEED_IPU, /* used for ipu per file */ 1244 FI_NEED_IPU, /* used for ipu per file */
1167 FI_ATOMIC_FILE, /* indicate atomic file */ 1245 FI_ATOMIC_FILE, /* indicate atomic file */
1168 FI_VOLATILE_FILE, /* indicate volatile file */ 1246 FI_VOLATILE_FILE, /* indicate volatile file */
1247 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
1169 FI_DROP_CACHE, /* drop dirty page cache */ 1248 FI_DROP_CACHE, /* drop dirty page cache */
1170 FI_DATA_EXIST, /* indicate data exists */ 1249 FI_DATA_EXIST, /* indicate data exists */
1250 FI_INLINE_DOTS, /* indicate inline dot dentries */
1171}; 1251};
1172 1252
1173static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1253static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1204,6 +1284,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
1204 set_inode_flag(fi, FI_INLINE_DENTRY); 1284 set_inode_flag(fi, FI_INLINE_DENTRY);
1205 if (ri->i_inline & F2FS_DATA_EXIST) 1285 if (ri->i_inline & F2FS_DATA_EXIST)
1206 set_inode_flag(fi, FI_DATA_EXIST); 1286 set_inode_flag(fi, FI_DATA_EXIST);
1287 if (ri->i_inline & F2FS_INLINE_DOTS)
1288 set_inode_flag(fi, FI_INLINE_DOTS);
1207} 1289}
1208 1290
1209static inline void set_raw_inline(struct f2fs_inode_info *fi, 1291static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1219,6 +1301,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
1219 ri->i_inline |= F2FS_INLINE_DENTRY; 1301 ri->i_inline |= F2FS_INLINE_DENTRY;
1220 if (is_inode_flag_set(fi, FI_DATA_EXIST)) 1302 if (is_inode_flag_set(fi, FI_DATA_EXIST))
1221 ri->i_inline |= F2FS_DATA_EXIST; 1303 ri->i_inline |= F2FS_DATA_EXIST;
1304 if (is_inode_flag_set(fi, FI_INLINE_DOTS))
1305 ri->i_inline |= F2FS_INLINE_DOTS;
1222} 1306}
1223 1307
1224static inline int f2fs_has_inline_xattr(struct inode *inode) 1308static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1264,6 +1348,11 @@ static inline int f2fs_exist_data(struct inode *inode)
1264 return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); 1348 return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
1265} 1349}
1266 1350
1351static inline int f2fs_has_inline_dots(struct inode *inode)
1352{
1353 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS);
1354}
1355
1267static inline bool f2fs_is_atomic_file(struct inode *inode) 1356static inline bool f2fs_is_atomic_file(struct inode *inode)
1268{ 1357{
1269 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); 1358 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1274,6 +1363,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode)
1274 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); 1363 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
1275} 1364}
1276 1365
1366static inline bool f2fs_is_first_block_written(struct inode *inode)
1367{
1368 return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1369}
1370
1277static inline bool f2fs_is_drop_cache(struct inode *inode) 1371static inline bool f2fs_is_drop_cache(struct inode *inode)
1278{ 1372{
1279 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); 1373 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
@@ -1290,12 +1384,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode)
1290 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); 1384 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
1291} 1385}
1292 1386
1293static inline void *inline_dentry_addr(struct page *page)
1294{
1295 struct f2fs_inode *ri = F2FS_INODE(page);
1296 return (void *)&(ri->i_addr[1]);
1297}
1298
1299static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) 1387static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
1300{ 1388{
1301 if (!f2fs_has_inline_dentry(dir)) 1389 if (!f2fs_has_inline_dentry(dir))
@@ -1363,7 +1451,7 @@ struct dentry *f2fs_get_parent(struct dentry *child);
1363 * dir.c 1451 * dir.c
1364 */ 1452 */
1365extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; 1453extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
1366void set_de_type(struct f2fs_dir_entry *, struct inode *); 1454void set_de_type(struct f2fs_dir_entry *, umode_t);
1367struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, 1455struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
1368 struct f2fs_dentry_ptr *); 1456 struct f2fs_dentry_ptr *);
1369bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 1457bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
@@ -1382,7 +1470,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
1382void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 1470void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1383 struct page *, struct inode *); 1471 struct page *, struct inode *);
1384int update_dent_inode(struct inode *, const struct qstr *); 1472int update_dent_inode(struct inode *, const struct qstr *);
1385int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1473void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
1474 const struct qstr *, f2fs_hash_t , unsigned int);
1475int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
1476 umode_t);
1386void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, 1477void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
1387 struct inode *); 1478 struct inode *);
1388int f2fs_do_tmpfile(struct inode *, struct inode *); 1479int f2fs_do_tmpfile(struct inode *, struct inode *);
@@ -1392,7 +1483,7 @@ bool f2fs_empty_dir(struct inode *);
1392static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) 1483static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
1393{ 1484{
1394 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, 1485 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
1395 inode); 1486 inode, inode->i_ino, inode->i_mode);
1396} 1487}
1397 1488
1398/* 1489/*
@@ -1519,14 +1610,22 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
1519 struct f2fs_io_info *); 1610 struct f2fs_io_info *);
1520void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, 1611void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
1521 struct f2fs_io_info *); 1612 struct f2fs_io_info *);
1613void set_data_blkaddr(struct dnode_of_data *);
1522int reserve_new_block(struct dnode_of_data *); 1614int reserve_new_block(struct dnode_of_data *);
1523int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1615int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
1524void update_extent_cache(struct dnode_of_data *); 1616void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
1617void f2fs_destroy_extent_tree(struct inode *);
1618void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
1619void f2fs_update_extent_cache(struct dnode_of_data *);
1620void f2fs_preserve_extent_tree(struct inode *);
1525struct page *find_data_page(struct inode *, pgoff_t, bool); 1621struct page *find_data_page(struct inode *, pgoff_t, bool);
1526struct page *get_lock_data_page(struct inode *, pgoff_t); 1622struct page *get_lock_data_page(struct inode *, pgoff_t);
1527struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1623struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1528int do_write_data_page(struct page *, struct f2fs_io_info *); 1624int do_write_data_page(struct page *, struct f2fs_io_info *);
1529int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1625int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1626void init_extent_cache_info(struct f2fs_sb_info *);
1627int __init create_extent_cache(void);
1628void destroy_extent_cache(void);
1530void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 1629void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
1531int f2fs_release_page(struct page *, gfp_t); 1630int f2fs_release_page(struct page *, gfp_t);
1532 1631
@@ -1554,7 +1653,7 @@ struct f2fs_stat_info {
1554 struct f2fs_sb_info *sbi; 1653 struct f2fs_sb_info *sbi;
1555 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1654 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1556 int main_area_segs, main_area_sections, main_area_zones; 1655 int main_area_segs, main_area_sections, main_area_zones;
1557 int hit_ext, total_ext; 1656 int hit_ext, total_ext, ext_tree, ext_node;
1558 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1657 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1559 int nats, dirty_nats, sits, dirty_sits, fnids; 1658 int nats, dirty_nats, sits, dirty_sits, fnids;
1560 int total_count, utilization; 1659 int total_count, utilization;
@@ -1566,7 +1665,9 @@ struct f2fs_stat_info {
1566 int dirty_count, node_pages, meta_pages; 1665 int dirty_count, node_pages, meta_pages;
1567 int prefree_count, call_count, cp_count; 1666 int prefree_count, call_count, cp_count;
1568 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1667 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1668 int bg_node_segs, bg_data_segs;
1569 int tot_blks, data_blks, node_blks; 1669 int tot_blks, data_blks, node_blks;
1670 int bg_data_blks, bg_node_blks;
1570 int curseg[NR_CURSEG_TYPE]; 1671 int curseg[NR_CURSEG_TYPE];
1571 int cursec[NR_CURSEG_TYPE]; 1672 int cursec[NR_CURSEG_TYPE];
1572 int curzone[NR_CURSEG_TYPE]; 1673 int curzone[NR_CURSEG_TYPE];
@@ -1615,31 +1716,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1615 ((sbi)->block_count[(curseg)->alloc_type]++) 1716 ((sbi)->block_count[(curseg)->alloc_type]++)
1616#define stat_inc_inplace_blocks(sbi) \ 1717#define stat_inc_inplace_blocks(sbi) \
1617 (atomic_inc(&(sbi)->inplace_count)) 1718 (atomic_inc(&(sbi)->inplace_count))
1618#define stat_inc_seg_count(sbi, type) \ 1719#define stat_inc_seg_count(sbi, type, gc_type) \
1619 do { \ 1720 do { \
1620 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1721 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1621 (si)->tot_segs++; \ 1722 (si)->tot_segs++; \
1622 if (type == SUM_TYPE_DATA) \ 1723 if (type == SUM_TYPE_DATA) { \
1623 si->data_segs++; \ 1724 si->data_segs++; \
1624 else \ 1725 si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \
1726 } else { \
1625 si->node_segs++; \ 1727 si->node_segs++; \
1728 si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \
1729 } \
1626 } while (0) 1730 } while (0)
1627 1731
1628#define stat_inc_tot_blk_count(si, blks) \ 1732#define stat_inc_tot_blk_count(si, blks) \
1629 (si->tot_blks += (blks)) 1733 (si->tot_blks += (blks))
1630 1734
1631#define stat_inc_data_blk_count(sbi, blks) \ 1735#define stat_inc_data_blk_count(sbi, blks, gc_type) \
1632 do { \ 1736 do { \
1633 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1737 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1634 stat_inc_tot_blk_count(si, blks); \ 1738 stat_inc_tot_blk_count(si, blks); \
1635 si->data_blks += (blks); \ 1739 si->data_blks += (blks); \
1740 si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \
1636 } while (0) 1741 } while (0)
1637 1742
1638#define stat_inc_node_blk_count(sbi, blks) \ 1743#define stat_inc_node_blk_count(sbi, blks, gc_type) \
1639 do { \ 1744 do { \
1640 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1745 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1641 stat_inc_tot_blk_count(si, blks); \ 1746 stat_inc_tot_blk_count(si, blks); \
1642 si->node_blks += (blks); \ 1747 si->node_blks += (blks); \
1748 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
1643 } while (0) 1749 } while (0)
1644 1750
1645int f2fs_build_stats(struct f2fs_sb_info *); 1751int f2fs_build_stats(struct f2fs_sb_info *);
@@ -1661,10 +1767,10 @@ void f2fs_destroy_root_stats(void);
1661#define stat_inc_seg_type(sbi, curseg) 1767#define stat_inc_seg_type(sbi, curseg)
1662#define stat_inc_block_count(sbi, curseg) 1768#define stat_inc_block_count(sbi, curseg)
1663#define stat_inc_inplace_blocks(sbi) 1769#define stat_inc_inplace_blocks(sbi)
1664#define stat_inc_seg_count(si, type) 1770#define stat_inc_seg_count(sbi, type, gc_type)
1665#define stat_inc_tot_blk_count(si, blks) 1771#define stat_inc_tot_blk_count(si, blks)
1666#define stat_inc_data_blk_count(si, blks) 1772#define stat_inc_data_blk_count(sbi, blks, gc_type)
1667#define stat_inc_node_blk_count(sbi, blks) 1773#define stat_inc_node_blk_count(sbi, blks, gc_type)
1668 1774
1669static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 1775static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1670static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 1776static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
@@ -1688,6 +1794,7 @@ extern struct kmem_cache *inode_entry_slab;
1688 */ 1794 */
1689bool f2fs_may_inline(struct inode *); 1795bool f2fs_may_inline(struct inode *);
1690void read_inline_data(struct page *, struct page *); 1796void read_inline_data(struct page *, struct page *);
1797bool truncate_inline_inode(struct page *, u64);
1691int f2fs_read_inline_data(struct inode *, struct page *); 1798int f2fs_read_inline_data(struct inode *, struct page *);
1692int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); 1799int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
1693int f2fs_convert_inline_inode(struct inode *); 1800int f2fs_convert_inline_inode(struct inode *);
@@ -1697,7 +1804,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
1697 struct page **); 1804 struct page **);
1698struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); 1805struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
1699int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); 1806int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
1700int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); 1807int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
1808 nid_t, umode_t);
1701void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, 1809void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
1702 struct inode *, struct inode *); 1810 struct inode *, struct inode *);
1703bool f2fs_empty_inline_dir(struct inode *); 1811bool f2fs_empty_inline_dir(struct inode *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 98dac27bc3f7..a6f3f6186588 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -241,6 +241,8 @@ go_write:
241 * will be used only for fsynced inodes after checkpoint. 241 * will be used only for fsynced inodes after checkpoint.
242 */ 242 */
243 try_to_fix_pino(inode); 243 try_to_fix_pino(inode);
244 clear_inode_flag(fi, FI_APPEND_WRITE);
245 clear_inode_flag(fi, FI_UPDATE_WRITE);
244 goto out; 246 goto out;
245 } 247 }
246sync_nodes: 248sync_nodes:
@@ -433,8 +435,12 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
433 continue; 435 continue;
434 436
435 dn->data_blkaddr = NULL_ADDR; 437 dn->data_blkaddr = NULL_ADDR;
436 update_extent_cache(dn); 438 set_data_blkaddr(dn);
439 f2fs_update_extent_cache(dn);
437 invalidate_blocks(sbi, blkaddr); 440 invalidate_blocks(sbi, blkaddr);
441 if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
442 clear_inode_flag(F2FS_I(dn->inode),
443 FI_FIRST_BLOCK_WRITTEN);
438 nr_free++; 444 nr_free++;
439 } 445 }
440 if (nr_free) { 446 if (nr_free) {
@@ -454,15 +460,16 @@ void truncate_data_blocks(struct dnode_of_data *dn)
454 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); 460 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
455} 461}
456 462
457static int truncate_partial_data_page(struct inode *inode, u64 from) 463static int truncate_partial_data_page(struct inode *inode, u64 from,
464 bool force)
458{ 465{
459 unsigned offset = from & (PAGE_CACHE_SIZE - 1); 466 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
460 struct page *page; 467 struct page *page;
461 468
462 if (!offset) 469 if (!offset && !force)
463 return 0; 470 return 0;
464 471
465 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); 472 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force);
466 if (IS_ERR(page)) 473 if (IS_ERR(page))
467 return 0; 474 return 0;
468 475
@@ -473,7 +480,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from)
473 480
474 f2fs_wait_on_page_writeback(page, DATA); 481 f2fs_wait_on_page_writeback(page, DATA);
475 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 482 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
476 set_page_dirty(page); 483 if (!force)
484 set_page_dirty(page);
477out: 485out:
478 f2fs_put_page(page, 1); 486 f2fs_put_page(page, 1);
479 return 0; 487 return 0;
@@ -487,6 +495,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
487 pgoff_t free_from; 495 pgoff_t free_from;
488 int count = 0, err = 0; 496 int count = 0, err = 0;
489 struct page *ipage; 497 struct page *ipage;
498 bool truncate_page = false;
490 499
491 trace_f2fs_truncate_blocks_enter(inode, from); 500 trace_f2fs_truncate_blocks_enter(inode, from);
492 501
@@ -502,7 +511,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
502 } 511 }
503 512
504 if (f2fs_has_inline_data(inode)) { 513 if (f2fs_has_inline_data(inode)) {
514 if (truncate_inline_inode(ipage, from))
515 set_page_dirty(ipage);
505 f2fs_put_page(ipage, 1); 516 f2fs_put_page(ipage, 1);
517 truncate_page = true;
506 goto out; 518 goto out;
507 } 519 }
508 520
@@ -533,7 +545,7 @@ out:
533 545
534 /* lastly zero out the first data page */ 546 /* lastly zero out the first data page */
535 if (!err) 547 if (!err)
536 err = truncate_partial_data_page(inode, from); 548 err = truncate_partial_data_page(inode, from, truncate_page);
537 549
538 trace_f2fs_truncate_blocks_exit(inode, err); 550 trace_f2fs_truncate_blocks_exit(inode, err);
539 return err; 551 return err;
@@ -997,6 +1009,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
997 if (!f2fs_is_volatile_file(inode)) 1009 if (!f2fs_is_volatile_file(inode))
998 return 0; 1010 return 0;
999 1011
1012 if (!f2fs_is_first_block_written(inode))
1013 return truncate_partial_data_page(inode, 0, true);
1014
1000 punch_hole(inode, 0, F2FS_BLKSIZE); 1015 punch_hole(inode, 0, F2FS_BLKSIZE);
1001 return 0; 1016 return 0;
1002} 1017}
@@ -1029,6 +1044,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
1029 return ret; 1044 return ret;
1030} 1045}
1031 1046
1047static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1048{
1049 struct inode *inode = file_inode(filp);
1050 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1051 struct super_block *sb = sbi->sb;
1052 __u32 in;
1053
1054 if (!capable(CAP_SYS_ADMIN))
1055 return -EPERM;
1056
1057 if (get_user(in, (__u32 __user *)arg))
1058 return -EFAULT;
1059
1060 switch (in) {
1061 case F2FS_GOING_DOWN_FULLSYNC:
1062 sb = freeze_bdev(sb->s_bdev);
1063 if (sb && !IS_ERR(sb)) {
1064 f2fs_stop_checkpoint(sbi);
1065 thaw_bdev(sb->s_bdev, sb);
1066 }
1067 break;
1068 case F2FS_GOING_DOWN_METASYNC:
1069 /* do checkpoint only */
1070 f2fs_sync_fs(sb, 1);
1071 f2fs_stop_checkpoint(sbi);
1072 break;
1073 case F2FS_GOING_DOWN_NOSYNC:
1074 f2fs_stop_checkpoint(sbi);
1075 break;
1076 default:
1077 return -EINVAL;
1078 }
1079 return 0;
1080}
1081
1032static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 1082static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1033{ 1083{
1034 struct inode *inode = file_inode(filp); 1084 struct inode *inode = file_inode(filp);
@@ -1078,6 +1128,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1078 return f2fs_ioc_release_volatile_write(filp); 1128 return f2fs_ioc_release_volatile_write(filp);
1079 case F2FS_IOC_ABORT_VOLATILE_WRITE: 1129 case F2FS_IOC_ABORT_VOLATILE_WRITE:
1080 return f2fs_ioc_abort_volatile_write(filp); 1130 return f2fs_ioc_abort_volatile_write(filp);
1131 case F2FS_IOC_SHUTDOWN:
1132 return f2fs_ioc_shutdown(filp, arg);
1081 case FITRIM: 1133 case FITRIM:
1082 return f2fs_ioc_fitrim(filp, arg); 1134 return f2fs_ioc_fitrim(filp, arg);
1083 default: 1135 default:
@@ -1104,8 +1156,6 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1104 1156
1105const struct file_operations f2fs_file_operations = { 1157const struct file_operations f2fs_file_operations = {
1106 .llseek = f2fs_llseek, 1158 .llseek = f2fs_llseek,
1107 .read = new_sync_read,
1108 .write = new_sync_write,
1109 .read_iter = generic_file_read_iter, 1159 .read_iter = generic_file_read_iter,
1110 .write_iter = generic_file_write_iter, 1160 .write_iter = generic_file_write_iter,
1111 .open = generic_file_open, 1161 .open = generic_file_open,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 76adbc3641f1..ed58211fe79b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -435,7 +435,7 @@ next_step:
435 set_page_dirty(node_page); 435 set_page_dirty(node_page);
436 } 436 }
437 f2fs_put_page(node_page, 1); 437 f2fs_put_page(node_page, 1);
438 stat_inc_node_blk_count(sbi, 1); 438 stat_inc_node_blk_count(sbi, 1, gc_type);
439 } 439 }
440 440
441 if (initial) { 441 if (initial) {
@@ -622,7 +622,7 @@ next_step:
622 if (IS_ERR(data_page)) 622 if (IS_ERR(data_page))
623 continue; 623 continue;
624 move_data_page(inode, data_page, gc_type); 624 move_data_page(inode, data_page, gc_type);
625 stat_inc_data_blk_count(sbi, 1); 625 stat_inc_data_blk_count(sbi, 1, gc_type);
626 } 626 }
627 } 627 }
628 628
@@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
680 } 680 }
681 blk_finish_plug(&plug); 681 blk_finish_plug(&plug);
682 682
683 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); 683 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
684 stat_inc_call_count(sbi->stat_info); 684 stat_inc_call_count(sbi->stat_info);
685 685
686 f2fs_put_page(sum_page, 1); 686 f2fs_put_page(sum_page, 1);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1484c00133cd..8140e4f0e538 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode)
21 if (f2fs_is_atomic_file(inode)) 21 if (f2fs_is_atomic_file(inode))
22 return false; 22 return false;
23 23
24 if (!S_ISREG(inode->i_mode)) 24 if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
25 return false; 25 return false;
26 26
27 if (i_size_read(inode) > MAX_INLINE_DATA) 27 if (i_size_read(inode) > MAX_INLINE_DATA)
@@ -50,10 +50,19 @@ void read_inline_data(struct page *page, struct page *ipage)
50 SetPageUptodate(page); 50 SetPageUptodate(page);
51} 51}
52 52
53static void truncate_inline_data(struct page *ipage) 53bool truncate_inline_inode(struct page *ipage, u64 from)
54{ 54{
55 void *addr;
56
57 if (from >= MAX_INLINE_DATA)
58 return false;
59
60 addr = inline_data_addr(ipage);
61
55 f2fs_wait_on_page_writeback(ipage, NODE); 62 f2fs_wait_on_page_writeback(ipage, NODE);
56 memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); 63 memset(addr + from, 0, MAX_INLINE_DATA - from);
64
65 return true;
57} 66}
58 67
59int f2fs_read_inline_data(struct inode *inode, struct page *page) 68int f2fs_read_inline_data(struct inode *inode, struct page *page)
@@ -122,7 +131,8 @@ no_update:
122 set_page_writeback(page); 131 set_page_writeback(page);
123 fio.blk_addr = dn->data_blkaddr; 132 fio.blk_addr = dn->data_blkaddr;
124 write_data_page(page, dn, &fio); 133 write_data_page(page, dn, &fio);
125 update_extent_cache(dn); 134 set_data_blkaddr(dn);
135 f2fs_update_extent_cache(dn);
126 f2fs_wait_on_page_writeback(page, DATA); 136 f2fs_wait_on_page_writeback(page, DATA);
127 if (dirty) 137 if (dirty)
128 inode_dec_dirty_pages(dn->inode); 138 inode_dec_dirty_pages(dn->inode);
@@ -131,7 +141,7 @@ no_update:
131 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); 141 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
132 142
133 /* clear inline data and flag after data writeback */ 143 /* clear inline data and flag after data writeback */
134 truncate_inline_data(dn->inode_page); 144 truncate_inline_inode(dn->inode_page, 0);
135clear_out: 145clear_out:
136 stat_dec_inline_inode(dn->inode); 146 stat_dec_inline_inode(dn->inode);
137 f2fs_clear_inline_inode(dn->inode); 147 f2fs_clear_inline_inode(dn->inode);
@@ -245,7 +255,7 @@ process_inline:
245 if (f2fs_has_inline_data(inode)) { 255 if (f2fs_has_inline_data(inode)) {
246 ipage = get_node_page(sbi, inode->i_ino); 256 ipage = get_node_page(sbi, inode->i_ino);
247 f2fs_bug_on(sbi, IS_ERR(ipage)); 257 f2fs_bug_on(sbi, IS_ERR(ipage));
248 truncate_inline_data(ipage); 258 truncate_inline_inode(ipage, 0);
249 f2fs_clear_inline_inode(inode); 259 f2fs_clear_inline_inode(inode);
250 update_inode(inode, ipage); 260 update_inode(inode, ipage);
251 f2fs_put_page(ipage, 1); 261 f2fs_put_page(ipage, 1);
@@ -363,7 +373,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
363 set_page_dirty(page); 373 set_page_dirty(page);
364 374
365 /* clear inline dir and flag after data writeback */ 375 /* clear inline dir and flag after data writeback */
366 truncate_inline_data(ipage); 376 truncate_inline_inode(ipage, 0);
367 377
368 stat_dec_inline_dir(dir); 378 stat_dec_inline_dir(dir);
369 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 379 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
@@ -380,21 +390,18 @@ out:
380} 390}
381 391
382int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, 392int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
383 struct inode *inode) 393 struct inode *inode, nid_t ino, umode_t mode)
384{ 394{
385 struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 395 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
386 struct page *ipage; 396 struct page *ipage;
387 unsigned int bit_pos; 397 unsigned int bit_pos;
388 f2fs_hash_t name_hash; 398 f2fs_hash_t name_hash;
389 struct f2fs_dir_entry *de;
390 size_t namelen = name->len; 399 size_t namelen = name->len;
391 struct f2fs_inline_dentry *dentry_blk = NULL; 400 struct f2fs_inline_dentry *dentry_blk = NULL;
401 struct f2fs_dentry_ptr d;
392 int slots = GET_DENTRY_SLOTS(namelen); 402 int slots = GET_DENTRY_SLOTS(namelen);
393 struct page *page; 403 struct page *page = NULL;
394 int err = 0; 404 int err = 0;
395 int i;
396
397 name_hash = f2fs_dentry_hash(name);
398 405
399 ipage = get_node_page(sbi, dir->i_ino); 406 ipage = get_node_page(sbi, dir->i_ino);
400 if (IS_ERR(ipage)) 407 if (IS_ERR(ipage))
@@ -410,32 +417,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
410 goto out; 417 goto out;
411 } 418 }
412 419
413 down_write(&F2FS_I(inode)->i_sem); 420 if (inode) {
414 page = init_inode_metadata(inode, dir, name, ipage); 421 down_write(&F2FS_I(inode)->i_sem);
415 if (IS_ERR(page)) { 422 page = init_inode_metadata(inode, dir, name, ipage);
416 err = PTR_ERR(page); 423 if (IS_ERR(page)) {
417 goto fail; 424 err = PTR_ERR(page);
425 goto fail;
426 }
418 } 427 }
419 428
420 f2fs_wait_on_page_writeback(ipage, NODE); 429 f2fs_wait_on_page_writeback(ipage, NODE);
421 de = &dentry_blk->dentry[bit_pos]; 430
422 de->hash_code = name_hash; 431 name_hash = f2fs_dentry_hash(name);
423 de->name_len = cpu_to_le16(namelen); 432 make_dentry_ptr(&d, (void *)dentry_blk, 2);
424 memcpy(dentry_blk->filename[bit_pos], name->name, name->len); 433 f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
425 de->ino = cpu_to_le32(inode->i_ino); 434
426 set_de_type(de, inode);
427 for (i = 0; i < slots; i++)
428 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
429 set_page_dirty(ipage); 435 set_page_dirty(ipage);
430 436
431 /* we don't need to mark_inode_dirty now */ 437 /* we don't need to mark_inode_dirty now */
432 F2FS_I(inode)->i_pino = dir->i_ino; 438 if (inode) {
433 update_inode(inode, page); 439 F2FS_I(inode)->i_pino = dir->i_ino;
434 f2fs_put_page(page, 1); 440 update_inode(inode, page);
441 f2fs_put_page(page, 1);
442 }
435 443
436 update_parent_metadata(dir, inode, 0); 444 update_parent_metadata(dir, inode, 0);
437fail: 445fail:
438 up_write(&F2FS_I(inode)->i_sem); 446 if (inode)
447 up_write(&F2FS_I(inode)->i_sem);
439 448
440 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { 449 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
441 update_inode(dir, ipage); 450 update_inode(dir, ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2d002e3738a7..e622ec95409e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -51,6 +51,15 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
51 } 51 }
52} 52}
53 53
54static bool __written_first_block(struct f2fs_inode *ri)
55{
56 block_t addr = le32_to_cpu(ri->i_addr[0]);
57
58 if (addr != NEW_ADDR && addr != NULL_ADDR)
59 return true;
60 return false;
61}
62
54static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 63static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
55{ 64{
56 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 65 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -130,7 +139,8 @@ static int do_read_inode(struct inode *inode)
130 fi->i_pino = le32_to_cpu(ri->i_pino); 139 fi->i_pino = le32_to_cpu(ri->i_pino);
131 fi->i_dir_level = ri->i_dir_level; 140 fi->i_dir_level = ri->i_dir_level;
132 141
133 get_extent_info(&fi->ext, ri->i_ext); 142 f2fs_init_extent_cache(inode, &ri->i_ext);
143
134 get_inline_info(fi, ri); 144 get_inline_info(fi, ri);
135 145
136 /* check data exist */ 146 /* check data exist */
@@ -140,6 +150,9 @@ static int do_read_inode(struct inode *inode)
140 /* get rdev by using inline_info */ 150 /* get rdev by using inline_info */
141 __get_inode_rdev(inode, ri); 151 __get_inode_rdev(inode, ri);
142 152
153 if (__written_first_block(ri))
154 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
155
143 f2fs_put_page(node_page, 1); 156 f2fs_put_page(node_page, 1);
144 157
145 stat_inc_inline_inode(inode); 158 stat_inc_inline_inode(inode);
@@ -220,7 +233,11 @@ void update_inode(struct inode *inode, struct page *node_page)
220 ri->i_links = cpu_to_le32(inode->i_nlink); 233 ri->i_links = cpu_to_le32(inode->i_nlink);
221 ri->i_size = cpu_to_le64(i_size_read(inode)); 234 ri->i_size = cpu_to_le64(i_size_read(inode));
222 ri->i_blocks = cpu_to_le64(inode->i_blocks); 235 ri->i_blocks = cpu_to_le64(inode->i_blocks);
236
237 read_lock(&F2FS_I(inode)->ext_lock);
223 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); 238 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
239 read_unlock(&F2FS_I(inode)->ext_lock);
240
224 set_raw_inline(F2FS_I(inode), ri); 241 set_raw_inline(F2FS_I(inode), ri);
225 242
226 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 243 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -328,6 +345,12 @@ void f2fs_evict_inode(struct inode *inode)
328no_delete: 345no_delete:
329 stat_dec_inline_dir(inode); 346 stat_dec_inline_dir(inode);
330 stat_dec_inline_inode(inode); 347 stat_dec_inline_inode(inode);
348
349 /* update extent info in inode */
350 if (inode->i_nlink)
351 f2fs_preserve_extent_tree(inode);
352 f2fs_destroy_extent_tree(inode);
353
331 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 354 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
332 if (xnid) 355 if (xnid)
333 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 356 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e79639a9787a..407dde3d7a92 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -14,6 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dcache.h> 16#include <linux/dcache.h>
17#include <linux/namei.h>
17 18
18#include "f2fs.h" 19#include "f2fs.h"
19#include "node.h" 20#include "node.h"
@@ -187,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child)
187 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); 188 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
188} 189}
189 190
191static int __recover_dot_dentries(struct inode *dir, nid_t pino)
192{
193 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
194 struct qstr dot = QSTR_INIT(".", 1);
195 struct qstr dotdot = QSTR_INIT("..", 2);
196 struct f2fs_dir_entry *de;
197 struct page *page;
198 int err = 0;
199
200 f2fs_lock_op(sbi);
201
202 de = f2fs_find_entry(dir, &dot, &page);
203 if (de) {
204 f2fs_dentry_kunmap(dir, page);
205 f2fs_put_page(page, 0);
206 } else {
207 err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
208 if (err)
209 goto out;
210 }
211
212 de = f2fs_find_entry(dir, &dotdot, &page);
213 if (de) {
214 f2fs_dentry_kunmap(dir, page);
215 f2fs_put_page(page, 0);
216 } else {
217 err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
218 }
219out:
220 if (!err) {
221 clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS);
222 mark_inode_dirty(dir);
223 }
224
225 f2fs_unlock_op(sbi);
226 return err;
227}
228
190static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, 229static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
191 unsigned int flags) 230 unsigned int flags)
192{ 231{
@@ -206,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
206 inode = f2fs_iget(dir->i_sb, ino); 245 inode = f2fs_iget(dir->i_sb, ino);
207 if (IS_ERR(inode)) 246 if (IS_ERR(inode))
208 return ERR_CAST(inode); 247 return ERR_CAST(inode);
248
249 if (f2fs_has_inline_dots(inode)) {
250 int err;
251
252 err = __recover_dot_dentries(inode, dir->i_ino);
253 if (err) {
254 iget_failed(inode);
255 return ERR_PTR(err);
256 }
257 }
209 } 258 }
210 259
211 return d_splice_alias(inode, dentry); 260 return d_splice_alias(inode, dentry);
@@ -247,6 +296,23 @@ fail:
247 return err; 296 return err;
248} 297}
249 298
299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
300{
301 struct page *page;
302
303 page = page_follow_link_light(dentry, nd);
304 if (IS_ERR(page))
305 return page;
306
307 /* this is broken symlink case */
308 if (*nd_get_link(nd) == 0) {
309 kunmap(page);
310 page_cache_release(page);
311 return ERR_PTR(-ENOENT);
312 }
313 return page;
314}
315
250static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 316static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
251 const char *symname) 317 const char *symname)
252{ 318{
@@ -276,6 +342,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
276 d_instantiate(dentry, inode); 342 d_instantiate(dentry, inode);
277 unlock_new_inode(inode); 343 unlock_new_inode(inode);
278 344
345 /*
346 * Let's flush symlink data in order to avoid broken symlink as much as
347 * possible. Nevertheless, fsyncing is the best way, but there is no
348 * way to get a file descriptor in order to flush that.
349 *
350 * Note that, it needs to do dir->fsync to make this recoverable.
351 * If the symlink path is stored into inline_data, there is no
352 * performance regression.
353 */
354 filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1);
355
279 if (IS_DIRSYNC(dir)) 356 if (IS_DIRSYNC(dir))
280 f2fs_sync_fs(sbi->sb, 1); 357 f2fs_sync_fs(sbi->sb, 1);
281 return err; 358 return err;
@@ -693,6 +770,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
693 f2fs_unlock_op(sbi); 770 f2fs_unlock_op(sbi);
694 771
695 alloc_nid_done(sbi, inode->i_ino); 772 alloc_nid_done(sbi, inode->i_ino);
773
774 stat_inc_inline_inode(inode);
696 d_tmpfile(dentry, inode); 775 d_tmpfile(dentry, inode);
697 unlock_new_inode(inode); 776 unlock_new_inode(inode);
698 return 0; 777 return 0;
@@ -729,7 +808,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
729 808
730const struct inode_operations f2fs_symlink_inode_operations = { 809const struct inode_operations f2fs_symlink_inode_operations = {
731 .readlink = generic_readlink, 810 .readlink = generic_readlink,
732 .follow_link = page_follow_link_light, 811 .follow_link = f2fs_follow_link,
733 .put_link = page_put_link, 812 .put_link = page_put_link,
734 .getattr = f2fs_getattr, 813 .getattr = f2fs_getattr,
735 .setattr = f2fs_setattr, 814 .setattr = f2fs_setattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 97bd9d3db882..8ab0cf1930bd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
41 /* only uses low memory */ 41 /* only uses low memory */
42 avail_ram = val.totalram - val.totalhigh; 42 avail_ram = val.totalram - val.totalhigh;
43 43
44 /* give 25%, 25%, 50%, 50% memory for each components respectively */ 44 /*
45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
46 */
45 if (type == FREE_NIDS) { 47 if (type == FREE_NIDS) {
46 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 48 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
47 PAGE_CACHE_SHIFT; 49 PAGE_CACHE_SHIFT;
@@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
62 mem_size += (sbi->im[i].ino_num * 64 mem_size += (sbi->im[i].ino_num *
63 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; 65 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
64 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 66 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
67 } else if (type == EXTENT_CACHE) {
68 mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
69 atomic_read(&sbi->total_ext_node) *
70 sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
71 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
65 } else { 72 } else {
66 if (sbi->sb->s_bdi->dirty_exceeded) 73 if (sbi->sb->s_bdi->dirty_exceeded)
67 return false; 74 return false;
@@ -494,7 +501,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
494 501
495 /* if inline_data is set, should not report any block indices */ 502 /* if inline_data is set, should not report any block indices */
496 if (f2fs_has_inline_data(dn->inode) && index) { 503 if (f2fs_has_inline_data(dn->inode) && index) {
497 err = -EINVAL; 504 err = -ENOENT;
498 f2fs_put_page(npage[0], 1); 505 f2fs_put_page(npage[0], 1);
499 goto release_out; 506 goto release_out;
500 } 507 }
@@ -995,6 +1002,7 @@ static int read_node_page(struct page *page, int rw)
995 get_node_info(sbi, page->index, &ni); 1002 get_node_info(sbi, page->index, &ni);
996 1003
997 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1004 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1005 ClearPageUptodate(page);
998 f2fs_put_page(page, 1); 1006 f2fs_put_page(page, 1);
999 return -ENOENT; 1007 return -ENOENT;
1000 } 1008 }
@@ -1306,6 +1314,7 @@ static int f2fs_write_node_page(struct page *page,
1306 1314
1307 /* This page is already truncated */ 1315 /* This page is already truncated */
1308 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1316 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1317 ClearPageUptodate(page);
1309 dec_page_count(sbi, F2FS_DIRTY_NODES); 1318 dec_page_count(sbi, F2FS_DIRTY_NODES);
1310 unlock_page(page); 1319 unlock_page(page);
1311 return 0; 1320 return 0;
@@ -1821,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1821 struct f2fs_nat_block *nat_blk; 1830 struct f2fs_nat_block *nat_blk;
1822 struct nat_entry *ne, *cur; 1831 struct nat_entry *ne, *cur;
1823 struct page *page = NULL; 1832 struct page *page = NULL;
1833 struct f2fs_nm_info *nm_i = NM_I(sbi);
1824 1834
1825 /* 1835 /*
1826 * there are two steps to flush nat entries: 1836 * there are two steps to flush nat entries:
@@ -1874,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1874 1884
1875 f2fs_bug_on(sbi, set->entry_cnt); 1885 f2fs_bug_on(sbi, set->entry_cnt);
1876 1886
1887 down_write(&nm_i->nat_tree_lock);
1877 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 1888 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1889 up_write(&nm_i->nat_tree_lock);
1878 kmem_cache_free(nat_entry_set_slab, set); 1890 kmem_cache_free(nat_entry_set_slab, set);
1879} 1891}
1880 1892
@@ -1902,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1902 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 1914 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1903 remove_nats_in_journal(sbi); 1915 remove_nats_in_journal(sbi);
1904 1916
1917 down_write(&nm_i->nat_tree_lock);
1905 while ((found = __gang_lookup_nat_set(nm_i, 1918 while ((found = __gang_lookup_nat_set(nm_i,
1906 set_idx, SETVEC_SIZE, setvec))) { 1919 set_idx, SETVEC_SIZE, setvec))) {
1907 unsigned idx; 1920 unsigned idx;
@@ -1910,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1910 __adjust_nat_entry_set(setvec[idx], &sets, 1923 __adjust_nat_entry_set(setvec[idx], &sets,
1911 MAX_NAT_JENTRIES(sum)); 1924 MAX_NAT_JENTRIES(sum));
1912 } 1925 }
1926 up_write(&nm_i->nat_tree_lock);
1913 1927
1914 /* flush dirty nats in nat entry set */ 1928 /* flush dirty nats in nat entry set */
1915 list_for_each_entry_safe(set, tmp, &sets, set_list) 1929 list_for_each_entry_safe(set, tmp, &sets, set_list)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index f405bbf2435a..c56026f1725c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -120,6 +120,7 @@ enum mem_type {
120 NAT_ENTRIES, /* indicates the cached nat entry */ 120 NAT_ENTRIES, /* indicates the cached nat entry */
121 DIRTY_DENTS, /* indicates dirty dentry pages */ 121 DIRTY_DENTS, /* indicates dirty dentry pages */
122 INO_ENTRIES, /* indicates inode entries */ 122 INO_ENTRIES, /* indicates inode entries */
123 EXTENT_CACHE, /* indicates extent cache */
123 BASE_CHECK, /* check kernel status */ 124 BASE_CHECK, /* check kernel status */
124}; 125};
125 126
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 41afb9534bbd..8d8ea99f2156 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
93 } 93 }
94retry: 94retry:
95 de = f2fs_find_entry(dir, &name, &page); 95 de = f2fs_find_entry(dir, &name, &page);
96 if (de && inode->i_ino == le32_to_cpu(de->ino)) { 96 if (de && inode->i_ino == le32_to_cpu(de->ino))
97 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98 goto out_unmap_put; 97 goto out_unmap_put;
99 } 98
100 if (de) { 99 if (de) {
101 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 100 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
102 if (IS_ERR(einode)) { 101 if (IS_ERR(einode)) {
@@ -115,7 +114,7 @@ retry:
115 iput(einode); 114 iput(einode);
116 goto retry; 115 goto retry;
117 } 116 }
118 err = __f2fs_add_link(dir, &name, inode); 117 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
119 if (err) 118 if (err)
120 goto out_err; 119 goto out_err;
121 120
@@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
187 goto next; 186 goto next;
188 187
189 entry = get_fsync_inode(head, ino_of_node(page)); 188 entry = get_fsync_inode(head, ino_of_node(page));
190 if (entry) { 189 if (!entry) {
191 if (IS_INODE(page) && is_dent_dnode(page))
192 set_inode_flag(F2FS_I(entry->inode),
193 FI_INC_LINK);
194 } else {
195 if (IS_INODE(page) && is_dent_dnode(page)) { 190 if (IS_INODE(page) && is_dent_dnode(page)) {
196 err = recover_inode_page(sbi, page); 191 err = recover_inode_page(sbi, page);
197 if (err) 192 if (err)
@@ -212,8 +207,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
212 if (IS_ERR(entry->inode)) { 207 if (IS_ERR(entry->inode)) {
213 err = PTR_ERR(entry->inode); 208 err = PTR_ERR(entry->inode);
214 kmem_cache_free(fsync_entry_slab, entry); 209 kmem_cache_free(fsync_entry_slab, entry);
215 if (err == -ENOENT) 210 if (err == -ENOENT) {
211 err = 0;
216 goto next; 212 goto next;
213 }
217 break; 214 break;
218 } 215 }
219 list_add_tail(&entry->list, head); 216 list_add_tail(&entry->list, head);
@@ -256,6 +253,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
256 struct f2fs_summary_block *sum_node; 253 struct f2fs_summary_block *sum_node;
257 struct f2fs_summary sum; 254 struct f2fs_summary sum;
258 struct page *sum_page, *node_page; 255 struct page *sum_page, *node_page;
256 struct dnode_of_data tdn = *dn;
259 nid_t ino, nid; 257 nid_t ino, nid;
260 struct inode *inode; 258 struct inode *inode;
261 unsigned int offset; 259 unsigned int offset;
@@ -283,17 +281,15 @@ got_it:
283 /* Use the locked dnode page and inode */ 281 /* Use the locked dnode page and inode */
284 nid = le32_to_cpu(sum.nid); 282 nid = le32_to_cpu(sum.nid);
285 if (dn->inode->i_ino == nid) { 283 if (dn->inode->i_ino == nid) {
286 struct dnode_of_data tdn = *dn;
287 tdn.nid = nid; 284 tdn.nid = nid;
285 if (!dn->inode_page_locked)
286 lock_page(dn->inode_page);
288 tdn.node_page = dn->inode_page; 287 tdn.node_page = dn->inode_page;
289 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 288 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
290 truncate_data_blocks_range(&tdn, 1); 289 goto truncate_out;
291 return 0;
292 } else if (dn->nid == nid) { 290 } else if (dn->nid == nid) {
293 struct dnode_of_data tdn = *dn;
294 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 291 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
295 truncate_data_blocks_range(&tdn, 1); 292 goto truncate_out;
296 return 0;
297 } 293 }
298 294
299 /* Get the node page */ 295 /* Get the node page */
@@ -317,18 +313,33 @@ got_it:
317 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 313 bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
318 le16_to_cpu(sum.ofs_in_node); 314 le16_to_cpu(sum.ofs_in_node);
319 315
320 if (ino != dn->inode->i_ino) { 316 /*
321 truncate_hole(inode, bidx, bidx + 1); 317 * if inode page is locked, unlock temporarily, but its reference
318 * count keeps alive.
319 */
320 if (ino == dn->inode->i_ino && dn->inode_page_locked)
321 unlock_page(dn->inode_page);
322
323 set_new_dnode(&tdn, inode, NULL, NULL, 0);
324 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
325 goto out;
326
327 if (tdn.data_blkaddr == blkaddr)
328 truncate_data_blocks_range(&tdn, 1);
329
330 f2fs_put_dnode(&tdn);
331out:
332 if (ino != dn->inode->i_ino)
322 iput(inode); 333 iput(inode);
323 } else { 334 else if (dn->inode_page_locked)
324 struct dnode_of_data tdn; 335 lock_page(dn->inode_page);
325 set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); 336 return 0;
326 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) 337
327 return 0; 338truncate_out:
328 if (tdn.data_blkaddr != NULL_ADDR) 339 if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
329 truncate_data_blocks_range(&tdn, 1); 340 truncate_data_blocks_range(&tdn, 1);
330 f2fs_put_page(tdn.node_page, 1); 341 if (dn->inode->i_ino == nid && !dn->inode_page_locked)
331 } 342 unlock_page(dn->inode_page);
332 return 0; 343 return 0;
333} 344}
334 345
@@ -384,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
384 src = datablock_addr(dn.node_page, dn.ofs_in_node); 395 src = datablock_addr(dn.node_page, dn.ofs_in_node);
385 dest = datablock_addr(page, dn.ofs_in_node); 396 dest = datablock_addr(page, dn.ofs_in_node);
386 397
387 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { 398 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
399 dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) {
400
388 if (src == NULL_ADDR) { 401 if (src == NULL_ADDR) {
389 err = reserve_new_block(&dn); 402 err = reserve_new_block(&dn);
390 /* We should not get -ENOSPC */ 403 /* We should not get -ENOSPC */
@@ -401,14 +414,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
401 /* write dummy data page */ 414 /* write dummy data page */
402 recover_data_page(sbi, NULL, &sum, src, dest); 415 recover_data_page(sbi, NULL, &sum, src, dest);
403 dn.data_blkaddr = dest; 416 dn.data_blkaddr = dest;
404 update_extent_cache(&dn); 417 set_data_blkaddr(&dn);
418 f2fs_update_extent_cache(&dn);
405 recovered++; 419 recovered++;
406 } 420 }
407 dn.ofs_in_node++; 421 dn.ofs_in_node++;
408 } 422 }
409 423
410 /* write node page in place */
411 set_summary(&sum, dn.nid, 0, 0);
412 if (IS_INODE(dn.node_page)) 424 if (IS_INODE(dn.node_page))
413 sync_inode_page(&dn); 425 sync_inode_page(&dn);
414 426
@@ -552,7 +564,7 @@ out:
552 mutex_unlock(&sbi->cp_mutex); 564 mutex_unlock(&sbi->cp_mutex);
553 } else if (need_writecp) { 565 } else if (need_writecp) {
554 struct cp_control cpc = { 566 struct cp_control cpc = {
555 .reason = CP_SYNC, 567 .reason = CP_RECOVERY,
556 }; 568 };
557 mutex_unlock(&sbi->cp_mutex); 569 mutex_unlock(&sbi->cp_mutex);
558 write_checkpoint(sbi, &cpc); 570 write_checkpoint(sbi, &cpc);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index daee4ab913da..f939660941bb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -205,6 +205,8 @@ retry:
205 list_add_tail(&new->list, &fi->inmem_pages); 205 list_add_tail(&new->list, &fi->inmem_pages);
206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
207 mutex_unlock(&fi->inmem_lock); 207 mutex_unlock(&fi->inmem_lock);
208
209 trace_f2fs_register_inmem_page(page, INMEM);
208} 210}
209 211
210void commit_inmem_pages(struct inode *inode, bool abort) 212void commit_inmem_pages(struct inode *inode, bool abort)
@@ -238,11 +240,13 @@ void commit_inmem_pages(struct inode *inode, bool abort)
238 f2fs_wait_on_page_writeback(cur->page, DATA); 240 f2fs_wait_on_page_writeback(cur->page, DATA);
239 if (clear_page_dirty_for_io(cur->page)) 241 if (clear_page_dirty_for_io(cur->page))
240 inode_dec_dirty_pages(inode); 242 inode_dec_dirty_pages(inode);
243 trace_f2fs_commit_inmem_page(cur->page, INMEM);
241 do_write_data_page(cur->page, &fio); 244 do_write_data_page(cur->page, &fio);
242 submit_bio = true; 245 submit_bio = true;
243 } 246 }
244 f2fs_put_page(cur->page, 1); 247 f2fs_put_page(cur->page, 1);
245 } else { 248 } else {
249 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
246 put_page(cur->page); 250 put_page(cur->page);
247 } 251 }
248 radix_tree_delete(&fi->inmem_root, cur->page->index); 252 radix_tree_delete(&fi->inmem_root, cur->page->index);
@@ -277,6 +281,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
277 281
278void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 282void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
279{ 283{
284 /* try to shrink extent cache when there is no enough memory */
285 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
286
280 /* check the # of cached NAT entries and prefree segments */ 287 /* check the # of cached NAT entries and prefree segments */
281 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || 288 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
282 excess_prefree_segs(sbi) || 289 excess_prefree_segs(sbi) ||
@@ -549,7 +556,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
549 556
550 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 557 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
551 558
552 if (end - start < cpc->trim_minlen) 559 if (force && end - start < cpc->trim_minlen)
553 continue; 560 continue;
554 561
555 __add_discard_entry(sbi, cpc, start, end); 562 __add_discard_entry(sbi, cpc, start, end);
@@ -1164,6 +1171,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1164 curseg = CURSEG_I(sbi, type); 1171 curseg = CURSEG_I(sbi, type);
1165 1172
1166 mutex_lock(&curseg->curseg_mutex); 1173 mutex_lock(&curseg->curseg_mutex);
1174 mutex_lock(&sit_i->sentry_lock);
1167 1175
1168 /* direct_io'ed data is aligned to the segment for better performance */ 1176 /* direct_io'ed data is aligned to the segment for better performance */
1169 if (direct_io && curseg->next_blkoff) 1177 if (direct_io && curseg->next_blkoff)
@@ -1178,7 +1186,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1178 */ 1186 */
1179 __add_sum_entry(sbi, type, sum); 1187 __add_sum_entry(sbi, type, sum);
1180 1188
1181 mutex_lock(&sit_i->sentry_lock);
1182 __refresh_next_blkoff(sbi, curseg); 1189 __refresh_next_blkoff(sbi, curseg);
1183 1190
1184 stat_inc_block_count(sbi, curseg); 1191 stat_inc_block_count(sbi, curseg);
@@ -1730,6 +1737,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1730 mutex_lock(&curseg->curseg_mutex); 1737 mutex_lock(&curseg->curseg_mutex);
1731 mutex_lock(&sit_i->sentry_lock); 1738 mutex_lock(&sit_i->sentry_lock);
1732 1739
1740 if (!sit_i->dirty_sentries)
1741 goto out;
1742
1733 /* 1743 /*
1734 * add and account sit entries of dirty bitmap in sit entry 1744 * add and account sit entries of dirty bitmap in sit entry
1735 * set temporarily 1745 * set temporarily
@@ -1744,9 +1754,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1744 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) 1754 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1745 remove_sits_in_journal(sbi); 1755 remove_sits_in_journal(sbi);
1746 1756
1747 if (!sit_i->dirty_sentries)
1748 goto out;
1749
1750 /* 1757 /*
1751 * there are two steps to flush sit entries: 1758 * there are two steps to flush sit entries:
1752 * #1, flush sit entries to journal in current cold data summary block. 1759 * #1, flush sit entries to journal in current cold data summary block.
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7fd35111cf62..85d7fa7514b2 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
336 clear_bit(segno, free_i->free_segmap); 336 clear_bit(segno, free_i->free_segmap);
337 free_i->free_segments++; 337 free_i->free_segments++;
338 338
339 next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); 339 next = find_next_bit(free_i->free_segmap,
340 start_segno + sbi->segs_per_sec, start_segno);
340 if (next >= start_segno + sbi->segs_per_sec) { 341 if (next >= start_segno + sbi->segs_per_sec) {
341 clear_bit(secno, free_i->free_secmap); 342 clear_bit(secno, free_i->free_secmap);
342 free_i->free_sections++; 343 free_i->free_sections++;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2fe666a6ea9..160b88346b24 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -57,6 +57,8 @@ enum {
57 Opt_flush_merge, 57 Opt_flush_merge,
58 Opt_nobarrier, 58 Opt_nobarrier,
59 Opt_fastboot, 59 Opt_fastboot,
60 Opt_extent_cache,
61 Opt_noinline_data,
60 Opt_err, 62 Opt_err,
61}; 63};
62 64
@@ -78,6 +80,8 @@ static match_table_t f2fs_tokens = {
78 {Opt_flush_merge, "flush_merge"}, 80 {Opt_flush_merge, "flush_merge"},
79 {Opt_nobarrier, "nobarrier"}, 81 {Opt_nobarrier, "nobarrier"},
80 {Opt_fastboot, "fastboot"}, 82 {Opt_fastboot, "fastboot"},
83 {Opt_extent_cache, "extent_cache"},
84 {Opt_noinline_data, "noinline_data"},
81 {Opt_err, NULL}, 85 {Opt_err, NULL},
82}; 86};
83 87
@@ -367,6 +371,12 @@ static int parse_options(struct super_block *sb, char *options)
367 case Opt_fastboot: 371 case Opt_fastboot:
368 set_opt(sbi, FASTBOOT); 372 set_opt(sbi, FASTBOOT);
369 break; 373 break;
374 case Opt_extent_cache:
375 set_opt(sbi, EXTENT_CACHE);
376 break;
377 case Opt_noinline_data:
378 clear_opt(sbi, INLINE_DATA);
379 break;
370 default: 380 default:
371 f2fs_msg(sb, KERN_ERR, 381 f2fs_msg(sb, KERN_ERR,
372 "Unrecognized mount option \"%s\" or missing value", 382 "Unrecognized mount option \"%s\" or missing value",
@@ -392,7 +402,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
392 atomic_set(&fi->dirty_pages, 0); 402 atomic_set(&fi->dirty_pages, 0);
393 fi->i_current_depth = 1; 403 fi->i_current_depth = 1;
394 fi->i_advise = 0; 404 fi->i_advise = 0;
395 rwlock_init(&fi->ext.ext_lock); 405 rwlock_init(&fi->ext_lock);
396 init_rwsem(&fi->i_sem); 406 init_rwsem(&fi->i_sem);
397 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); 407 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
398 INIT_LIST_HEAD(&fi->inmem_pages); 408 INIT_LIST_HEAD(&fi->inmem_pages);
@@ -591,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
591 seq_puts(seq, ",disable_ext_identify"); 601 seq_puts(seq, ",disable_ext_identify");
592 if (test_opt(sbi, INLINE_DATA)) 602 if (test_opt(sbi, INLINE_DATA))
593 seq_puts(seq, ",inline_data"); 603 seq_puts(seq, ",inline_data");
604 else
605 seq_puts(seq, ",noinline_data");
594 if (test_opt(sbi, INLINE_DENTRY)) 606 if (test_opt(sbi, INLINE_DENTRY))
595 seq_puts(seq, ",inline_dentry"); 607 seq_puts(seq, ",inline_dentry");
596 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 608 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
@@ -599,6 +611,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
599 seq_puts(seq, ",nobarrier"); 611 seq_puts(seq, ",nobarrier");
600 if (test_opt(sbi, FASTBOOT)) 612 if (test_opt(sbi, FASTBOOT))
601 seq_puts(seq, ",fastboot"); 613 seq_puts(seq, ",fastboot");
614 if (test_opt(sbi, EXTENT_CACHE))
615 seq_puts(seq, ",extent_cache");
602 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 616 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
603 617
604 return 0; 618 return 0;
@@ -959,7 +973,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
959 struct buffer_head *raw_super_buf; 973 struct buffer_head *raw_super_buf;
960 struct inode *root; 974 struct inode *root;
961 long err = -EINVAL; 975 long err = -EINVAL;
962 bool retry = true; 976 bool retry = true, need_fsck = false;
963 char *options = NULL; 977 char *options = NULL;
964 int i; 978 int i;
965 979
@@ -984,6 +998,7 @@ try_onemore:
984 sbi->active_logs = NR_CURSEG_TYPE; 998 sbi->active_logs = NR_CURSEG_TYPE;
985 999
986 set_opt(sbi, BG_GC); 1000 set_opt(sbi, BG_GC);
1001 set_opt(sbi, INLINE_DATA);
987 1002
988#ifdef CONFIG_F2FS_FS_XATTR 1003#ifdef CONFIG_F2FS_FS_XATTR
989 set_opt(sbi, XATTR_USER); 1004 set_opt(sbi, XATTR_USER);
@@ -1020,7 +1035,6 @@ try_onemore:
1020 sbi->raw_super = raw_super; 1035 sbi->raw_super = raw_super;
1021 sbi->raw_super_buf = raw_super_buf; 1036 sbi->raw_super_buf = raw_super_buf;
1022 mutex_init(&sbi->gc_mutex); 1037 mutex_init(&sbi->gc_mutex);
1023 mutex_init(&sbi->writepages);
1024 mutex_init(&sbi->cp_mutex); 1038 mutex_init(&sbi->cp_mutex);
1025 init_rwsem(&sbi->node_write); 1039 init_rwsem(&sbi->node_write);
1026 clear_sbi_flag(sbi, SBI_POR_DOING); 1040 clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -1072,6 +1086,8 @@ try_onemore:
1072 INIT_LIST_HEAD(&sbi->dir_inode_list); 1086 INIT_LIST_HEAD(&sbi->dir_inode_list);
1073 spin_lock_init(&sbi->dir_inode_lock); 1087 spin_lock_init(&sbi->dir_inode_lock);
1074 1088
1089 init_extent_cache_info(sbi);
1090
1075 init_ino_entry_info(sbi); 1091 init_ino_entry_info(sbi);
1076 1092
1077 /* setup f2fs internal modules */ 1093 /* setup f2fs internal modules */
@@ -1146,9 +1162,6 @@ try_onemore:
1146 if (err) 1162 if (err)
1147 goto free_proc; 1163 goto free_proc;
1148 1164
1149 if (!retry)
1150 set_sbi_flag(sbi, SBI_NEED_FSCK);
1151
1152 /* recover fsynced data */ 1165 /* recover fsynced data */
1153 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1166 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1154 /* 1167 /*
@@ -1160,8 +1173,13 @@ try_onemore:
1160 err = -EROFS; 1173 err = -EROFS;
1161 goto free_kobj; 1174 goto free_kobj;
1162 } 1175 }
1176
1177 if (need_fsck)
1178 set_sbi_flag(sbi, SBI_NEED_FSCK);
1179
1163 err = recover_fsync_data(sbi); 1180 err = recover_fsync_data(sbi);
1164 if (err) { 1181 if (err) {
1182 need_fsck = true;
1165 f2fs_msg(sb, KERN_ERR, 1183 f2fs_msg(sb, KERN_ERR,
1166 "Cannot recover all fsync data errno=%ld", err); 1184 "Cannot recover all fsync data errno=%ld", err);
1167 goto free_kobj; 1185 goto free_kobj;
@@ -1212,7 +1230,7 @@ free_sbi:
1212 1230
1213 /* give only one another chance */ 1231 /* give only one another chance */
1214 if (retry) { 1232 if (retry) {
1215 retry = 0; 1233 retry = false;
1216 shrink_dcache_sb(sb); 1234 shrink_dcache_sb(sb);
1217 goto try_onemore; 1235 goto try_onemore;
1218 } 1236 }
@@ -1278,10 +1296,13 @@ static int __init init_f2fs_fs(void)
1278 err = create_checkpoint_caches(); 1296 err = create_checkpoint_caches();
1279 if (err) 1297 if (err)
1280 goto free_segment_manager_caches; 1298 goto free_segment_manager_caches;
1299 err = create_extent_cache();
1300 if (err)
1301 goto free_checkpoint_caches;
1281 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); 1302 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
1282 if (!f2fs_kset) { 1303 if (!f2fs_kset) {
1283 err = -ENOMEM; 1304 err = -ENOMEM;
1284 goto free_checkpoint_caches; 1305 goto free_extent_cache;
1285 } 1306 }
1286 err = register_filesystem(&f2fs_fs_type); 1307 err = register_filesystem(&f2fs_fs_type);
1287 if (err) 1308 if (err)
@@ -1292,6 +1313,8 @@ static int __init init_f2fs_fs(void)
1292 1313
1293free_kset: 1314free_kset:
1294 kset_unregister(f2fs_kset); 1315 kset_unregister(f2fs_kset);
1316free_extent_cache:
1317 destroy_extent_cache();
1295free_checkpoint_caches: 1318free_checkpoint_caches:
1296 destroy_checkpoint_caches(); 1319 destroy_checkpoint_caches();
1297free_segment_manager_caches: 1320free_segment_manager_caches:
@@ -1309,6 +1332,7 @@ static void __exit exit_f2fs_fs(void)
1309 remove_proc_entry("fs/f2fs", NULL); 1332 remove_proc_entry("fs/f2fs", NULL);
1310 f2fs_destroy_root_stats(); 1333 f2fs_destroy_root_stats();
1311 unregister_filesystem(&f2fs_fs_type); 1334 unregister_filesystem(&f2fs_fs_type);
1335 destroy_extent_cache();
1312 destroy_checkpoint_caches(); 1336 destroy_checkpoint_caches();
1313 destroy_segment_manager_caches(); 1337 destroy_segment_manager_caches();
1314 destroy_node_manager_caches(); 1338 destroy_node_manager_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 5072bf9ae0ef..b0fd2f2d0716 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -135,7 +135,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
135 if (strcmp(name, "") != 0) 135 if (strcmp(name, "") != 0)
136 return -EINVAL; 136 return -EINVAL;
137 137
138 *((char *)buffer) = F2FS_I(inode)->i_advise; 138 if (buffer)
139 *((char *)buffer) = F2FS_I(inode)->i_advise;
139 return sizeof(char); 140 return sizeof(char);
140} 141}
141 142
@@ -152,6 +153,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
152 return -EINVAL; 153 return -EINVAL;
153 154
154 F2FS_I(inode)->i_advise |= *(char *)value; 155 F2FS_I(inode)->i_advise |= *(char *)value;
156 mark_inode_dirty(inode);
155 return 0; 157 return 0;
156} 158}
157 159
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..93fc62232ec2 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -8,9 +8,7 @@
8 * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers. 8 * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers.
9 */ 9 */
10 10
11#include <linux/fs.h>
12#include <linux/slab.h> 11#include <linux/slab.h>
13#include <linux/buffer_head.h>
14#include "fat.h" 12#include "fat.h"
15 13
16/* this must be > 0. */ 14/* this must be > 0. */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c5d6bb939d19..4afc4d9d2e41 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -13,13 +13,9 @@
13 * Short name translation 1999, 2001 by Wolfram Pienkoss <wp@bszh.de> 13 * Short name translation 1999, 2001 by Wolfram Pienkoss <wp@bszh.de>
14 */ 14 */
15 15
16#include <linux/module.h>
17#include <linux/slab.h> 16#include <linux/slab.h>
18#include <linux/time.h>
19#include <linux/buffer_head.h>
20#include <linux/compat.h> 17#include <linux/compat.h>
21#include <linux/uaccess.h> 18#include <linux/uaccess.h>
22#include <linux/kernel.h>
23#include "fat.h" 19#include "fat.h"
24 20
25/* 21/*
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 64e295e8ff38..be5e15323bab 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -2,11 +2,8 @@
2#define _FAT_H 2#define _FAT_H
3 3
4#include <linux/buffer_head.h> 4#include <linux/buffer_head.h>
5#include <linux/string.h>
6#include <linux/nls.h> 5#include <linux/nls.h>
7#include <linux/fs.h>
8#include <linux/hash.h> 6#include <linux/hash.h>
9#include <linux/mutex.h>
10#include <linux/ratelimit.h> 7#include <linux/ratelimit.h>
11#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
12 9
@@ -66,7 +63,7 @@ struct msdos_sb_info {
66 unsigned short sec_per_clus; /* sectors/cluster */ 63 unsigned short sec_per_clus; /* sectors/cluster */
67 unsigned short cluster_bits; /* log2(cluster_size) */ 64 unsigned short cluster_bits; /* log2(cluster_size) */
68 unsigned int cluster_size; /* cluster size */ 65 unsigned int cluster_size; /* cluster size */
69 unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */ 66 unsigned char fats, fat_bits; /* number of FATs, FAT bits (12,16 or 32) */
70 unsigned short fat_start; 67 unsigned short fat_start;
71 unsigned long fat_length; /* FAT start & length (sec.) */ 68 unsigned long fat_length; /* FAT start & length (sec.) */
72 unsigned long dir_start; 69 unsigned long dir_start;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 260705c58062..8226557130a2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -3,9 +3,6 @@
3 * Released under GPL v2. 3 * Released under GPL v2.
4 */ 4 */
5 5
6#include <linux/module.h>
7#include <linux/fs.h>
8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h> 6#include <linux/blkdev.h>
10#include "fat.h" 7#include "fat.h"
11 8
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8429c68e3057..cf50d93565a2 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -10,10 +10,6 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/compat.h> 11#include <linux/compat.h>
12#include <linux/mount.h> 12#include <linux/mount.h>
13#include <linux/time.h>
14#include <linux/buffer_head.h>
15#include <linux/writeback.h>
16#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 13#include <linux/blkdev.h>
18#include <linux/fsnotify.h> 14#include <linux/fsnotify.h>
19#include <linux/security.h> 15#include <linux/security.h>
@@ -170,8 +166,6 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
170 166
171const struct file_operations fat_file_operations = { 167const struct file_operations fat_file_operations = {
172 .llseek = generic_file_llseek, 168 .llseek = generic_file_llseek,
173 .read = new_sync_read,
174 .write = new_sync_write,
175 .read_iter = generic_file_read_iter, 169 .read_iter = generic_file_read_iter,
176 .write_iter = generic_file_write_iter, 170 .write_iter = generic_file_write_iter,
177 .mmap = generic_file_mmap, 171 .mmap = generic_file_mmap,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5263c7..c06774658345 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -11,21 +11,12 @@
11 */ 11 */
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/time.h>
16#include <linux/slab.h>
17#include <linux/seq_file.h>
18#include <linux/pagemap.h> 14#include <linux/pagemap.h>
19#include <linux/mpage.h> 15#include <linux/mpage.h>
20#include <linux/buffer_head.h>
21#include <linux/mount.h>
22#include <linux/aio.h>
23#include <linux/vfs.h> 16#include <linux/vfs.h>
17#include <linux/seq_file.h>
24#include <linux/parser.h> 18#include <linux/parser.h>
25#include <linux/uio.h> 19#include <linux/uio.h>
26#include <linux/writeback.h>
27#include <linux/log2.h>
28#include <linux/hash.h>
29#include <linux/blkdev.h> 20#include <linux/blkdev.h>
30#include <asm/unaligned.h> 21#include <asm/unaligned.h>
31#include "fat.h" 22#include "fat.h"
@@ -246,8 +237,7 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
246 return err; 237 return err;
247} 238}
248 239
249static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, 240static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
250 struct iov_iter *iter,
251 loff_t offset) 241 loff_t offset)
252{ 242{
253 struct file *file = iocb->ki_filp; 243 struct file *file = iocb->ki_filp;
@@ -256,7 +246,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
256 size_t count = iov_iter_count(iter); 246 size_t count = iov_iter_count(iter);
257 ssize_t ret; 247 ssize_t ret;
258 248
259 if (rw == WRITE) { 249 if (iov_iter_rw(iter) == WRITE) {
260 /* 250 /*
261 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), 251 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
262 * so we need to update the ->mmu_private to block boundary. 252 * so we need to update the ->mmu_private to block boundary.
@@ -275,8 +265,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
275 * FAT need to use the DIO_LOCKING for avoiding the race 265 * FAT need to use the DIO_LOCKING for avoiding the race
276 * condition of fat_get_block() and ->truncate(). 266 * condition of fat_get_block() and ->truncate().
277 */ 267 */
278 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); 268 ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block);
279 if (ret < 0 && (rw & WRITE)) 269 if (ret < 0 && iov_iter_rw(iter) == WRITE)
280 fat_write_failed(mapping, offset + count); 270 fat_write_failed(mapping, offset + count);
281 271
282 return ret; 272 return ret;
@@ -1280,8 +1270,7 @@ out:
1280 1270
1281static int fat_read_root(struct inode *inode) 1271static int fat_read_root(struct inode *inode)
1282{ 1272{
1283 struct super_block *sb = inode->i_sb; 1273 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
1284 struct msdos_sb_info *sbi = MSDOS_SB(sb);
1285 int error; 1274 int error;
1286 1275
1287 MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO; 1276 MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index d8da2d2e30ae..c4589e981760 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -6,10 +6,6 @@
6 * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru) 6 * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
7 */ 7 */
8 8
9#include <linux/module.h>
10#include <linux/fs.h>
11#include <linux/buffer_head.h>
12#include <linux/time.h>
13#include "fat.h" 9#include "fat.h"
14 10
15/* 11/*
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index a783b0e1272a..cc6a8541b668 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -7,8 +7,6 @@
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h>
11#include <linux/buffer_head.h>
12#include "fat.h" 10#include "fat.h"
13 11
14/* Characters that are undesirable in an MS-DOS file name */ 12/* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b8b92c2f9683..7e0974eebd8e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -16,10 +16,8 @@
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/jiffies.h>
20#include <linux/ctype.h> 19#include <linux/ctype.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/buffer_head.h>
23#include <linux/namei.h> 21#include <linux/namei.h>
24#include "fat.h" 22#include "fat.h"
25 23
diff --git a/fs/file.c b/fs/file.c
index ee738ea028fa..93c5f89c248b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,8 +638,7 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
638 file = fcheck_files(files, fd); 638 file = fcheck_files(files, fd);
639 if (file) { 639 if (file) {
640 /* File object ref couldn't be taken */ 640 /* File object ref couldn't be taken */
641 if ((file->f_mode & mask) || 641 if ((file->f_mode & mask) || !get_file_rcu(file))
642 !atomic_long_inc_not_zero(&file->f_count))
643 file = NULL; 642 file = NULL;
644 } 643 }
645 rcu_read_unlock(); 644 rcu_read_unlock();
diff --git a/fs/file_table.c b/fs/file_table.c
index 3f85411b03ce..294174dcc226 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -168,10 +168,10 @@ struct file *alloc_file(struct path *path, fmode_t mode,
168 file->f_inode = path->dentry->d_inode; 168 file->f_inode = path->dentry->d_inode;
169 file->f_mapping = path->dentry->d_inode->i_mapping; 169 file->f_mapping = path->dentry->d_inode->i_mapping;
170 if ((mode & FMODE_READ) && 170 if ((mode & FMODE_READ) &&
171 likely(fop->read || fop->aio_read || fop->read_iter)) 171 likely(fop->read || fop->read_iter))
172 mode |= FMODE_CAN_READ; 172 mode |= FMODE_CAN_READ;
173 if ((mode & FMODE_WRITE) && 173 if ((mode & FMODE_WRITE) &&
174 likely(fop->write || fop->aio_write || fop->write_iter)) 174 likely(fop->write || fop->write_iter))
175 mode |= FMODE_CAN_WRITE; 175 mode |= FMODE_CAN_WRITE;
176 file->f_mode = mode; 176 file->f_mode = mode;
177 file->f_op = fop; 177 file->f_op = fop;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
53 struct completion *done; /* set if the caller waits */ 53 struct completion *done; /* set if the caller waits */
54}; 54};
55 55
56/*
57 * If an inode is constantly having its pages dirtied, but then the
58 * updates stop dirtytime_expire_interval seconds in the past, it's
59 * possible for the worst case time between when an inode has its
60 * timestamps updated and when they finally get written out to be two
61 * dirtytime_expire_intervals. We set the default to 12 hours (in
62 * seconds), which means most of the time inodes will have their
63 * timestamps written to disk after 12 hours, but in the worst case a
64 * few inodes might not their timestamps updated for 24 hours.
65 */
66unsigned int dirtytime_expire_interval = 12 * 60 * 60;
67
56/** 68/**
57 * writeback_in_progress - determine whether there is writeback in progress 69 * writeback_in_progress - determine whether there is writeback in progress
58 * @bdi: the device's backing_dev_info structure. 70 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
275 287
276 if ((flags & EXPIRE_DIRTY_ATIME) == 0) 288 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
277 older_than_this = work->older_than_this; 289 older_than_this = work->older_than_this;
278 else if ((work->reason == WB_REASON_SYNC) == 0) { 290 else if (!work->for_sync) {
279 expire_time = jiffies - (HZ * 86400); 291 expire_time = jiffies - (dirtytime_expire_interval * HZ);
280 older_than_this = &expire_time; 292 older_than_this = &expire_time;
281 } 293 }
282 while (!list_empty(delaying_queue)) { 294 while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
458 */ 470 */
459 redirty_tail(inode, wb); 471 redirty_tail(inode, wb);
460 } else if (inode->i_state & I_DIRTY_TIME) { 472 } else if (inode->i_state & I_DIRTY_TIME) {
473 inode->dirtied_when = jiffies;
461 list_move(&inode->i_wb_list, &wb->b_dirty_time); 474 list_move(&inode->i_wb_list, &wb->b_dirty_time);
462 } else { 475 } else {
463 /* The inode is clean. Remove from writeback lists. */ 476 /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
505 spin_lock(&inode->i_lock); 518 spin_lock(&inode->i_lock);
506 519
507 dirty = inode->i_state & I_DIRTY; 520 dirty = inode->i_state & I_DIRTY;
508 if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && 521 if (inode->i_state & I_DIRTY_TIME) {
509 (inode->i_state & I_DIRTY_TIME)) || 522 if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
510 (inode->i_state & I_DIRTY_TIME_EXPIRED)) { 523 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
511 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; 524 unlikely(time_after(jiffies,
512 trace_writeback_lazytime(inode); 525 (inode->dirtied_time_when +
513 } 526 dirtytime_expire_interval * HZ)))) {
527 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
528 trace_writeback_lazytime(inode);
529 }
530 } else
531 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
514 inode->i_state &= ~dirty; 532 inode->i_state &= ~dirty;
515 533
516 /* 534 /*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1131 rcu_read_unlock(); 1149 rcu_read_unlock();
1132} 1150}
1133 1151
1152/*
1153 * Wake up bdi's periodically to make sure dirtytime inodes gets
1154 * written back periodically. We deliberately do *not* check the
1155 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
1156 * kernel to be constantly waking up once there are any dirtytime
1157 * inodes on the system. So instead we define a separate delayed work
1158 * function which gets called much more rarely. (By default, only
1159 * once every 12 hours.)
1160 *
1161 * If there is any other write activity going on in the file system,
1162 * this function won't be necessary. But if the only thing that has
1163 * happened on the file system is a dirtytime inode caused by an atime
1164 * update, we need this infrastructure below to make sure that inode
1165 * eventually gets pushed out to disk.
1166 */
1167static void wakeup_dirtytime_writeback(struct work_struct *w);
1168static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
1169
1170static void wakeup_dirtytime_writeback(struct work_struct *w)
1171{
1172 struct backing_dev_info *bdi;
1173
1174 rcu_read_lock();
1175 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1176 if (list_empty(&bdi->wb.b_dirty_time))
1177 continue;
1178 bdi_wakeup_thread(bdi);
1179 }
1180 rcu_read_unlock();
1181 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1182}
1183
1184static int __init start_dirtytime_writeback(void)
1185{
1186 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1187 return 0;
1188}
1189__initcall(start_dirtytime_writeback);
1190
1191int dirtytime_interval_handler(struct ctl_table *table, int write,
1192 void __user *buffer, size_t *lenp, loff_t *ppos)
1193{
1194 int ret;
1195
1196 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1197 if (ret == 0 && write)
1198 mod_delayed_work(system_wq, &dirtytime_work, 0);
1199 return ret;
1200}
1201
1134static noinline void block_dump___mark_inode_dirty(struct inode *inode) 1202static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1135{ 1203{
1136 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 1204 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1269 } 1337 }
1270 1338
1271 inode->dirtied_when = jiffies; 1339 inode->dirtied_when = jiffies;
1272 list_move(&inode->i_wb_list, dirtytime ? 1340 if (dirtytime)
1273 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); 1341 inode->dirtied_time_when = jiffies;
1342 if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
1343 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1344 else
1345 list_move(&inode->i_wb_list,
1346 &bdi->wb.b_dirty_time);
1274 spin_unlock(&bdi->wb.list_lock); 1347 spin_unlock(&bdi->wb.list_lock);
1275 trace_writeback_dirty_inode_enqueue(inode); 1348 trace_writeback_dirty_inode_enqueue(inode);
1276 1349
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index b06c98796afb..611b5408f6ec 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock);
9void pin_remove(struct fs_pin *pin) 9void pin_remove(struct fs_pin *pin)
10{ 10{
11 spin_lock(&pin_lock); 11 spin_lock(&pin_lock);
12 hlist_del(&pin->m_list); 12 hlist_del_init(&pin->m_list);
13 hlist_del(&pin->s_list); 13 hlist_del_init(&pin->s_list);
14 spin_unlock(&pin_lock); 14 spin_unlock(&pin_lock);
15 spin_lock_irq(&pin->wait.lock); 15 spin_lock_irq(&pin->wait.lock);
16 pin->done = 1; 16 pin->done = 1;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7abba1c..e5bbf748b698 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
38#include <linux/device.h> 38#include <linux/device.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/fs.h> 40#include <linux/fs.h>
41#include <linux/aio.h>
42#include <linux/kdev_t.h> 41#include <linux/kdev_t.h>
43#include <linux/kthread.h> 42#include <linux/kthread.h>
44#include <linux/list.h> 43#include <linux/list.h>
@@ -48,6 +47,7 @@
48#include <linux/slab.h> 47#include <linux/slab.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/uio.h>
51 51
52#include "fuse_i.h" 52#include "fuse_i.h"
53 53
@@ -88,32 +88,23 @@ static struct list_head *cuse_conntbl_head(dev_t devt)
88 * FUSE file. 88 * FUSE file.
89 */ 89 */
90 90
91static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, 91static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
92 loff_t *ppos)
93{ 92{
93 struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
94 loff_t pos = 0; 94 loff_t pos = 0;
95 struct iovec iov = { .iov_base = buf, .iov_len = count };
96 struct fuse_io_priv io = { .async = 0, .file = file };
97 struct iov_iter ii;
98 iov_iter_init(&ii, READ, &iov, 1, count);
99 95
100 return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE); 96 return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE);
101} 97}
102 98
103static ssize_t cuse_write(struct file *file, const char __user *buf, 99static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from)
104 size_t count, loff_t *ppos)
105{ 100{
101 struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
106 loff_t pos = 0; 102 loff_t pos = 0;
107 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
108 struct fuse_io_priv io = { .async = 0, .file = file };
109 struct iov_iter ii;
110 iov_iter_init(&ii, WRITE, &iov, 1, count);
111
112 /* 103 /*
113 * No locking or generic_write_checks(), the server is 104 * No locking or generic_write_checks(), the server is
114 * responsible for locking and sanity checks. 105 * responsible for locking and sanity checks.
115 */ 106 */
116 return fuse_direct_io(&io, &ii, &pos, 107 return fuse_direct_io(&io, from, &pos,
117 FUSE_DIO_WRITE | FUSE_DIO_CUSE); 108 FUSE_DIO_WRITE | FUSE_DIO_CUSE);
118} 109}
119 110
@@ -186,8 +177,8 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
186 177
187static const struct file_operations cuse_frontend_fops = { 178static const struct file_operations cuse_frontend_fops = {
188 .owner = THIS_MODULE, 179 .owner = THIS_MODULE,
189 .read = cuse_read, 180 .read_iter = cuse_read_iter,
190 .write = cuse_write, 181 .write_iter = cuse_write_iter,
191 .open = cuse_open, 182 .open = cuse_open,
192 .release = cuse_release, 183 .release = cuse_release,
193 .unlocked_ioctl = cuse_file_ioctl, 184 .unlocked_ioctl = cuse_file_ioctl,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d622fa..c8b68ab2e574 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
19#include <linux/pipe_fs_i.h> 19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/splice.h> 21#include <linux/splice.h>
22#include <linux/aio.h>
23 22
24MODULE_ALIAS_MISCDEV(FUSE_MINOR); 23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
25MODULE_ALIAS("devname:fuse"); 24MODULE_ALIAS("devname:fuse");
@@ -711,28 +710,26 @@ struct fuse_copy_state {
711 struct fuse_conn *fc; 710 struct fuse_conn *fc;
712 int write; 711 int write;
713 struct fuse_req *req; 712 struct fuse_req *req;
714 const struct iovec *iov; 713 struct iov_iter *iter;
715 struct pipe_buffer *pipebufs; 714 struct pipe_buffer *pipebufs;
716 struct pipe_buffer *currbuf; 715 struct pipe_buffer *currbuf;
717 struct pipe_inode_info *pipe; 716 struct pipe_inode_info *pipe;
718 unsigned long nr_segs; 717 unsigned long nr_segs;
719 unsigned long seglen;
720 unsigned long addr;
721 struct page *pg; 718 struct page *pg;
722 unsigned len; 719 unsigned len;
723 unsigned offset; 720 unsigned offset;
724 unsigned move_pages:1; 721 unsigned move_pages:1;
725}; 722};
726 723
727static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, 724static void fuse_copy_init(struct fuse_copy_state *cs,
725 struct fuse_conn *fc,
728 int write, 726 int write,
729 const struct iovec *iov, unsigned long nr_segs) 727 struct iov_iter *iter)
730{ 728{
731 memset(cs, 0, sizeof(*cs)); 729 memset(cs, 0, sizeof(*cs));
732 cs->fc = fc; 730 cs->fc = fc;
733 cs->write = write; 731 cs->write = write;
734 cs->iov = iov; 732 cs->iter = iter;
735 cs->nr_segs = nr_segs;
736} 733}
737 734
738/* Unmap and put previous page of userspace buffer */ 735/* Unmap and put previous page of userspace buffer */
@@ -800,22 +797,16 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
800 cs->nr_segs++; 797 cs->nr_segs++;
801 } 798 }
802 } else { 799 } else {
803 if (!cs->seglen) { 800 size_t off;
804 BUG_ON(!cs->nr_segs); 801 err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
805 cs->seglen = cs->iov[0].iov_len;
806 cs->addr = (unsigned long) cs->iov[0].iov_base;
807 cs->iov++;
808 cs->nr_segs--;
809 }
810 err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
811 if (err < 0) 802 if (err < 0)
812 return err; 803 return err;
813 BUG_ON(err != 1); 804 BUG_ON(!err);
805 cs->len = err;
806 cs->offset = off;
814 cs->pg = page; 807 cs->pg = page;
815 cs->offset = cs->addr % PAGE_SIZE; 808 cs->offset = off;
816 cs->len = min(PAGE_SIZE - cs->offset, cs->seglen); 809 iov_iter_advance(cs->iter, err);
817 cs->seglen -= cs->len;
818 cs->addr += cs->len;
819 } 810 }
820 811
821 return lock_request(cs->fc, cs->req); 812 return lock_request(cs->fc, cs->req);
@@ -890,8 +881,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
890 881
891 newpage = buf->page; 882 newpage = buf->page;
892 883
893 if (WARN_ON(!PageUptodate(newpage))) 884 if (!PageUptodate(newpage))
894 return -EIO; 885 SetPageUptodate(newpage);
895 886
896 ClearPageMappedToDisk(newpage); 887 ClearPageMappedToDisk(newpage);
897 888
@@ -1353,8 +1344,18 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1353 return err; 1344 return err;
1354} 1345}
1355 1346
1356static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 1347static int fuse_dev_open(struct inode *inode, struct file *file)
1357 unsigned long nr_segs, loff_t pos) 1348{
1349 /*
1350 * The fuse device's file's private_data is used to hold
1351 * the fuse_conn(ection) when it is mounted, and is used to
1352 * keep track of whether the file has been mounted already.
1353 */
1354 file->private_data = NULL;
1355 return 0;
1356}
1357
1358static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1358{ 1359{
1359 struct fuse_copy_state cs; 1360 struct fuse_copy_state cs;
1360 struct file *file = iocb->ki_filp; 1361 struct file *file = iocb->ki_filp;
@@ -1362,9 +1363,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1362 if (!fc) 1363 if (!fc)
1363 return -EPERM; 1364 return -EPERM;
1364 1365
1365 fuse_copy_init(&cs, fc, 1, iov, nr_segs); 1366 if (!iter_is_iovec(to))
1367 return -EINVAL;
1368
1369 fuse_copy_init(&cs, fc, 1, to);
1366 1370
1367 return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); 1371 return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to));
1368} 1372}
1369 1373
1370static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, 1374static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
@@ -1384,7 +1388,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1384 if (!bufs) 1388 if (!bufs)
1385 return -ENOMEM; 1389 return -ENOMEM;
1386 1390
1387 fuse_copy_init(&cs, fc, 1, NULL, 0); 1391 fuse_copy_init(&cs, fc, 1, NULL);
1388 cs.pipebufs = bufs; 1392 cs.pipebufs = bufs;
1389 cs.pipe = pipe; 1393 cs.pipe = pipe;
1390 ret = fuse_dev_do_read(fc, in, &cs, len); 1394 ret = fuse_dev_do_read(fc, in, &cs, len);
@@ -1797,6 +1801,9 @@ copy_finish:
1797static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 1801static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1798 unsigned int size, struct fuse_copy_state *cs) 1802 unsigned int size, struct fuse_copy_state *cs)
1799{ 1803{
1804 /* Don't try to move pages (yet) */
1805 cs->move_pages = 0;
1806
1800 switch (code) { 1807 switch (code) {
1801 case FUSE_NOTIFY_POLL: 1808 case FUSE_NOTIFY_POLL:
1802 return fuse_notify_poll(fc, size, cs); 1809 return fuse_notify_poll(fc, size, cs);
@@ -1957,17 +1964,19 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1957 return err; 1964 return err;
1958} 1965}
1959 1966
1960static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, 1967static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1961 unsigned long nr_segs, loff_t pos)
1962{ 1968{
1963 struct fuse_copy_state cs; 1969 struct fuse_copy_state cs;
1964 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); 1970 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1965 if (!fc) 1971 if (!fc)
1966 return -EPERM; 1972 return -EPERM;
1967 1973
1968 fuse_copy_init(&cs, fc, 0, iov, nr_segs); 1974 if (!iter_is_iovec(from))
1975 return -EINVAL;
1976
1977 fuse_copy_init(&cs, fc, 0, from);
1969 1978
1970 return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); 1979 return fuse_dev_do_write(fc, &cs, iov_iter_count(from));
1971} 1980}
1972 1981
1973static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, 1982static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
@@ -2030,8 +2039,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2030 } 2039 }
2031 pipe_unlock(pipe); 2040 pipe_unlock(pipe);
2032 2041
2033 fuse_copy_init(&cs, fc, 0, NULL, nbuf); 2042 fuse_copy_init(&cs, fc, 0, NULL);
2034 cs.pipebufs = bufs; 2043 cs.pipebufs = bufs;
2044 cs.nr_segs = nbuf;
2035 cs.pipe = pipe; 2045 cs.pipe = pipe;
2036 2046
2037 if (flags & SPLICE_F_MOVE) 2047 if (flags & SPLICE_F_MOVE)
@@ -2217,12 +2227,11 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
2217 2227
2218const struct file_operations fuse_dev_operations = { 2228const struct file_operations fuse_dev_operations = {
2219 .owner = THIS_MODULE, 2229 .owner = THIS_MODULE,
2230 .open = fuse_dev_open,
2220 .llseek = no_llseek, 2231 .llseek = no_llseek,
2221 .read = do_sync_read, 2232 .read_iter = fuse_dev_read,
2222 .aio_read = fuse_dev_read,
2223 .splice_read = fuse_dev_splice_read, 2233 .splice_read = fuse_dev_splice_read,
2224 .write = do_sync_write, 2234 .write_iter = fuse_dev_write,
2225 .aio_write = fuse_dev_write,
2226 .splice_write = fuse_dev_splice_write, 2235 .splice_write = fuse_dev_splice_write,
2227 .poll = fuse_dev_poll, 2236 .poll = fuse_dev_poll,
2228 .release = fuse_dev_release, 2237 .release = fuse_dev_release,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3bdcfd8..5ef05b5c4cff 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/aio.h>
19#include <linux/falloc.h> 18#include <linux/falloc.h>
19#include <linux/uio.h>
20 20
21static const struct file_operations fuse_direct_io_file_operations; 21static const struct file_operations fuse_direct_io_file_operations;
22 22
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
528 } 528 }
529} 529}
530 530
531static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
532{
533 if (io->err)
534 return io->err;
535
536 if (io->bytes >= 0 && io->write)
537 return -EIO;
538
539 return io->bytes < 0 ? io->size : io->bytes;
540}
541
531/** 542/**
532 * In case of short read, the caller sets 'pos' to the position of 543 * In case of short read, the caller sets 'pos' to the position of
533 * actual end of fuse request in IO request. Otherwise, if bytes_requested 544 * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
546 */ 557 */
547static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 558static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
548{ 559{
560 bool is_sync = is_sync_kiocb(io->iocb);
549 int left; 561 int left;
550 562
551 spin_lock(&io->lock); 563 spin_lock(&io->lock);
@@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
555 io->bytes = pos; 567 io->bytes = pos;
556 568
557 left = --io->reqs; 569 left = --io->reqs;
570 if (!left && is_sync)
571 complete(io->done);
558 spin_unlock(&io->lock); 572 spin_unlock(&io->lock);
559 573
560 if (!left) { 574 if (!left && !is_sync) {
561 long res; 575 ssize_t res = fuse_get_res_by_io(io);
562 576
563 if (io->err) 577 if (res >= 0) {
564 res = io->err; 578 struct inode *inode = file_inode(io->iocb->ki_filp);
565 else if (io->bytes >= 0 && io->write) 579 struct fuse_conn *fc = get_fuse_conn(inode);
566 res = -EIO; 580 struct fuse_inode *fi = get_fuse_inode(inode);
567 else {
568 res = io->bytes < 0 ? io->size : io->bytes;
569
570 if (!is_sync_kiocb(io->iocb)) {
571 struct inode *inode = file_inode(io->iocb->ki_filp);
572 struct fuse_conn *fc = get_fuse_conn(inode);
573 struct fuse_inode *fi = get_fuse_inode(inode);
574 581
575 spin_lock(&fc->lock); 582 spin_lock(&fc->lock);
576 fi->attr_version = ++fc->attr_version; 583 fi->attr_version = ++fc->attr_version;
577 spin_unlock(&fc->lock); 584 spin_unlock(&fc->lock);
578 }
579 } 585 }
580 586
581 aio_complete(io->iocb, res, 0); 587 io->iocb->ki_complete(io->iocb, res, 0);
582 kfree(io); 588 kfree(io);
583 } 589 }
584} 590}
@@ -1139,13 +1145,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1139{ 1145{
1140 struct file *file = iocb->ki_filp; 1146 struct file *file = iocb->ki_filp;
1141 struct address_space *mapping = file->f_mapping; 1147 struct address_space *mapping = file->f_mapping;
1142 size_t count = iov_iter_count(from);
1143 ssize_t written = 0; 1148 ssize_t written = 0;
1144 ssize_t written_buffered = 0; 1149 ssize_t written_buffered = 0;
1145 struct inode *inode = mapping->host; 1150 struct inode *inode = mapping->host;
1146 ssize_t err; 1151 ssize_t err;
1147 loff_t endbyte = 0; 1152 loff_t endbyte = 0;
1148 loff_t pos = iocb->ki_pos;
1149 1153
1150 if (get_fuse_conn(inode)->writeback_cache) { 1154 if (get_fuse_conn(inode)->writeback_cache) {
1151 /* Update size (EOF optimization) and mode (SUID clearing) */ 1155 /* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1161,14 +1165,10 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1161 /* We can write back this queue in page reclaim */ 1165 /* We can write back this queue in page reclaim */
1162 current->backing_dev_info = inode_to_bdi(inode); 1166 current->backing_dev_info = inode_to_bdi(inode);
1163 1167
1164 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1168 err = generic_write_checks(iocb, from);
1165 if (err) 1169 if (err <= 0)
1166 goto out; 1170 goto out;
1167 1171
1168 if (count == 0)
1169 goto out;
1170
1171 iov_iter_truncate(from, count);
1172 err = file_remove_suid(file); 1172 err = file_remove_suid(file);
1173 if (err) 1173 if (err)
1174 goto out; 1174 goto out;
@@ -1177,7 +1177,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1177 if (err) 1177 if (err)
1178 goto out; 1178 goto out;
1179 1179
1180 if (file->f_flags & O_DIRECT) { 1180 if (iocb->ki_flags & IOCB_DIRECT) {
1181 loff_t pos = iocb->ki_pos;
1181 written = generic_file_direct_write(iocb, from, pos); 1182 written = generic_file_direct_write(iocb, from, pos);
1182 if (written < 0 || !iov_iter_count(from)) 1183 if (written < 0 || !iov_iter_count(from))
1183 goto out; 1184 goto out;
@@ -1203,9 +1204,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1203 written += written_buffered; 1204 written += written_buffered;
1204 iocb->ki_pos = pos + written_buffered; 1205 iocb->ki_pos = pos + written_buffered;
1205 } else { 1206 } else {
1206 written = fuse_perform_write(file, mapping, from, pos); 1207 written = fuse_perform_write(file, mapping, from, iocb->ki_pos);
1207 if (written >= 0) 1208 if (written >= 0)
1208 iocb->ki_pos = pos + written; 1209 iocb->ki_pos += written;
1209 } 1210 }
1210out: 1211out:
1211 current->backing_dev_info = NULL; 1212 current->backing_dev_info = NULL;
@@ -1395,55 +1396,30 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
1395 return res; 1396 return res;
1396} 1397}
1397 1398
1398static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1399static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
1399 size_t count, loff_t *ppos)
1400{
1401 struct fuse_io_priv io = { .async = 0, .file = file };
1402 struct iovec iov = { .iov_base = buf, .iov_len = count };
1403 struct iov_iter ii;
1404 iov_iter_init(&ii, READ, &iov, 1, count);
1405 return __fuse_direct_read(&io, &ii, ppos);
1406}
1407
1408static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
1409 struct iov_iter *iter,
1410 loff_t *ppos)
1411{ 1400{
1412 struct file *file = io->file; 1401 struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp };
1413 struct inode *inode = file_inode(file); 1402 return __fuse_direct_read(&io, to, &iocb->ki_pos);
1414 size_t count = iov_iter_count(iter);
1415 ssize_t res;
1416
1417
1418 res = generic_write_checks(file, ppos, &count, 0);
1419 if (!res) {
1420 iov_iter_truncate(iter, count);
1421 res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
1422 }
1423
1424 fuse_invalidate_attr(inode);
1425
1426 return res;
1427} 1403}
1428 1404
1429static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1405static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
1430 size_t count, loff_t *ppos)
1431{ 1406{
1432 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 1407 struct file *file = iocb->ki_filp;
1433 struct inode *inode = file_inode(file); 1408 struct inode *inode = file_inode(file);
1434 ssize_t res;
1435 struct fuse_io_priv io = { .async = 0, .file = file }; 1409 struct fuse_io_priv io = { .async = 0, .file = file };
1436 struct iov_iter ii; 1410 ssize_t res;
1437 iov_iter_init(&ii, WRITE, &iov, 1, count);
1438 1411
1439 if (is_bad_inode(inode)) 1412 if (is_bad_inode(inode))
1440 return -EIO; 1413 return -EIO;
1441 1414
1442 /* Don't allow parallel writes to the same file */ 1415 /* Don't allow parallel writes to the same file */
1443 mutex_lock(&inode->i_mutex); 1416 mutex_lock(&inode->i_mutex);
1444 res = __fuse_direct_write(&io, &ii, ppos); 1417 res = generic_write_checks(iocb, from);
1445 if (res > 0) 1418 if (res > 0)
1446 fuse_write_update_size(inode, *ppos); 1419 res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
1420 fuse_invalidate_attr(inode);
1421 if (res > 0)
1422 fuse_write_update_size(inode, iocb->ki_pos);
1447 mutex_unlock(&inode->i_mutex); 1423 mutex_unlock(&inode->i_mutex);
1448 1424
1449 return res; 1425 return res;
@@ -2798,9 +2774,9 @@ static inline loff_t fuse_round_up(loff_t off)
2798} 2774}
2799 2775
2800static ssize_t 2776static ssize_t
2801fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 2777fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2802 loff_t offset)
2803{ 2778{
2779 DECLARE_COMPLETION_ONSTACK(wait);
2804 ssize_t ret = 0; 2780 ssize_t ret = 0;
2805 struct file *file = iocb->ki_filp; 2781 struct file *file = iocb->ki_filp;
2806 struct fuse_file *ff = file->private_data; 2782 struct fuse_file *ff = file->private_data;
@@ -2815,15 +2791,15 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2815 inode = file->f_mapping->host; 2791 inode = file->f_mapping->host;
2816 i_size = i_size_read(inode); 2792 i_size = i_size_read(inode);
2817 2793
2818 if ((rw == READ) && (offset > i_size)) 2794 if ((iov_iter_rw(iter) == READ) && (offset > i_size))
2819 return 0; 2795 return 0;
2820 2796
2821 /* optimization for short read */ 2797 /* optimization for short read */
2822 if (async_dio && rw != WRITE && offset + count > i_size) { 2798 if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
2823 if (offset >= i_size) 2799 if (offset >= i_size)
2824 return 0; 2800 return 0;
2825 count = min_t(loff_t, count, fuse_round_up(i_size - offset)); 2801 iov_iter_truncate(iter, fuse_round_up(i_size - offset));
2826 iov_iter_truncate(iter, count); 2802 count = iov_iter_count(iter);
2827 } 2803 }
2828 2804
2829 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 2805 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2834,7 +2810,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2834 io->bytes = -1; 2810 io->bytes = -1;
2835 io->size = 0; 2811 io->size = 0;
2836 io->offset = offset; 2812 io->offset = offset;
2837 io->write = (rw == WRITE); 2813 io->write = (iov_iter_rw(iter) == WRITE);
2838 io->err = 0; 2814 io->err = 0;
2839 io->file = file; 2815 io->file = file;
2840 /* 2816 /*
@@ -2849,13 +2825,19 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2849 * to wait on real async I/O requests, so we must submit this request 2825 * to wait on real async I/O requests, so we must submit this request
2850 * synchronously. 2826 * synchronously.
2851 */ 2827 */
2852 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) 2828 if (!is_sync_kiocb(iocb) && (offset + count > i_size) &&
2829 iov_iter_rw(iter) == WRITE)
2853 io->async = false; 2830 io->async = false;
2854 2831
2855 if (rw == WRITE) 2832 if (io->async && is_sync_kiocb(iocb))
2856 ret = __fuse_direct_write(io, iter, &pos); 2833 io->done = &wait;
2857 else 2834
2835 if (iov_iter_rw(iter) == WRITE) {
2836 ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
2837 fuse_invalidate_attr(inode);
2838 } else {
2858 ret = __fuse_direct_read(io, iter, &pos); 2839 ret = __fuse_direct_read(io, iter, &pos);
2840 }
2859 2841
2860 if (io->async) { 2842 if (io->async) {
2861 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 2843 fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -2864,12 +2846,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2864 if (!is_sync_kiocb(iocb)) 2846 if (!is_sync_kiocb(iocb))
2865 return -EIOCBQUEUED; 2847 return -EIOCBQUEUED;
2866 2848
2867 ret = wait_on_sync_kiocb(iocb); 2849 wait_for_completion(&wait);
2868 } else { 2850 ret = fuse_get_res_by_io(io);
2869 kfree(io);
2870 } 2851 }
2871 2852
2872 if (rw == WRITE) { 2853 kfree(io);
2854
2855 if (iov_iter_rw(iter) == WRITE) {
2873 if (ret > 0) 2856 if (ret > 0)
2874 fuse_write_update_size(inode, pos); 2857 fuse_write_update_size(inode, pos);
2875 else if (ret < 0 && offset + count > i_size) 2858 else if (ret < 0 && offset + count > i_size)
@@ -2957,9 +2940,7 @@ out:
2957 2940
2958static const struct file_operations fuse_file_operations = { 2941static const struct file_operations fuse_file_operations = {
2959 .llseek = fuse_file_llseek, 2942 .llseek = fuse_file_llseek,
2960 .read = new_sync_read,
2961 .read_iter = fuse_file_read_iter, 2943 .read_iter = fuse_file_read_iter,
2962 .write = new_sync_write,
2963 .write_iter = fuse_file_write_iter, 2944 .write_iter = fuse_file_write_iter,
2964 .mmap = fuse_file_mmap, 2945 .mmap = fuse_file_mmap,
2965 .open = fuse_open, 2946 .open = fuse_open,
@@ -2977,8 +2958,8 @@ static const struct file_operations fuse_file_operations = {
2977 2958
2978static const struct file_operations fuse_direct_io_file_operations = { 2959static const struct file_operations fuse_direct_io_file_operations = {
2979 .llseek = fuse_file_llseek, 2960 .llseek = fuse_file_llseek,
2980 .read = fuse_direct_read, 2961 .read_iter = fuse_direct_read_iter,
2981 .write = fuse_direct_write, 2962 .write_iter = fuse_direct_write_iter,
2982 .mmap = fuse_direct_mmap, 2963 .mmap = fuse_direct_mmap,
2983 .open = fuse_open, 2964 .open = fuse_open,
2984 .flush = fuse_flush, 2965 .flush = fuse_flush,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07c1376..7354dc142a50 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@ struct fuse_io_priv {
263 int err; 263 int err;
264 struct kiocb *iocb; 264 struct kiocb *iocb;
265 struct file *file; 265 struct file *file;
266 struct completion *done;
266}; 267};
267 268
268/** 269/**
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); 110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
111 if (error) 111 if (error)
112 goto out; 112 goto out;
113 113 set_cached_acl(inode, type, acl);
114 if (acl)
115 set_cached_acl(inode, type, acl);
116 else
117 forget_cached_acl(inode, type);
118out: 114out:
119 kfree(data); 115 kfree(data);
120 return error; 116 return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94edebe..5551fea0afd7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/aio.h> 23#include <linux/uio.h>
24#include <trace/events/writeback.h> 24#include <trace/events/writeback.h>
25 25
26#include "gfs2.h" 26#include "gfs2.h"
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
671 671
672 if (alloc_required) { 672 if (alloc_required) {
673 struct gfs2_alloc_parms ap = { .aflags = 0, }; 673 struct gfs2_alloc_parms ap = { .aflags = 0, };
674 error = gfs2_quota_lock_check(ip); 674 requested = data_blocks + ind_blocks;
675 ap.target = requested;
676 error = gfs2_quota_lock_check(ip, &ap);
675 if (error) 677 if (error)
676 goto out_unlock; 678 goto out_unlock;
677 679
678 requested = data_blocks + ind_blocks;
679 ap.target = requested;
680 error = gfs2_inplace_reserve(ip, &ap); 680 error = gfs2_inplace_reserve(ip, &ap);
681 if (error) 681 if (error)
682 goto out_qunlock; 682 goto out_qunlock;
@@ -1016,13 +1016,12 @@ out:
1016/** 1016/**
1017 * gfs2_ok_for_dio - check that dio is valid on this file 1017 * gfs2_ok_for_dio - check that dio is valid on this file
1018 * @ip: The inode 1018 * @ip: The inode
1019 * @rw: READ or WRITE
1020 * @offset: The offset at which we are reading or writing 1019 * @offset: The offset at which we are reading or writing
1021 * 1020 *
1022 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) 1021 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
1023 * 1 (to accept the i/o request) 1022 * 1 (to accept the i/o request)
1024 */ 1023 */
1025static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) 1024static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
1026{ 1025{
1027 /* 1026 /*
1028 * Should we return an error here? I can't see that O_DIRECT for 1027 * Should we return an error here? I can't see that O_DIRECT for
@@ -1039,8 +1038,8 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
1039 1038
1040 1039
1041 1040
1042static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 1041static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1043 struct iov_iter *iter, loff_t offset) 1042 loff_t offset)
1044{ 1043{
1045 struct file *file = iocb->ki_filp; 1044 struct file *file = iocb->ki_filp;
1046 struct inode *inode = file->f_mapping->host; 1045 struct inode *inode = file->f_mapping->host;
@@ -1061,7 +1060,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1061 rv = gfs2_glock_nq(&gh); 1060 rv = gfs2_glock_nq(&gh);
1062 if (rv) 1061 if (rv)
1063 return rv; 1062 return rv;
1064 rv = gfs2_ok_for_dio(ip, rw, offset); 1063 rv = gfs2_ok_for_dio(ip, offset);
1065 if (rv != 1) 1064 if (rv != 1)
1066 goto out; /* dio not valid, fall back to buffered i/o */ 1065 goto out; /* dio not valid, fall back to buffered i/o */
1067 1066
@@ -1091,13 +1090,12 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1091 rv = filemap_write_and_wait_range(mapping, lstart, end); 1090 rv = filemap_write_and_wait_range(mapping, lstart, end);
1092 if (rv) 1091 if (rv)
1093 goto out; 1092 goto out;
1094 if (rw == WRITE) 1093 if (iov_iter_rw(iter) == WRITE)
1095 truncate_inode_pages_range(mapping, lstart, end); 1094 truncate_inode_pages_range(mapping, lstart, end);
1096 } 1095 }
1097 1096
1098 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 1097 rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
1099 iter, offset, 1098 offset, gfs2_get_block_direct, NULL, NULL, 0);
1100 gfs2_get_block_direct, NULL, NULL, 0);
1101out: 1099out:
1102 gfs2_glock_dq(&gh); 1100 gfs2_glock_dq(&gh);
1103 gfs2_holder_uninit(&gh); 1101 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
1224 1224
1225 if (gfs2_is_stuffed(ip) && 1225 if (gfs2_is_stuffed(ip) &&
1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1227 error = gfs2_quota_lock_check(ip); 1227 error = gfs2_quota_lock_check(ip, &ap);
1228 if (error) 1228 if (error)
1229 return error; 1229 return error;
1230 1230
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8e2d7e..31892871ea87 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/dlm.h> 26#include <linux/dlm.h>
27#include <linux/dlm_plock.h> 27#include <linux/dlm_plock.h>
28#include <linux/aio.h>
29#include <linux/delay.h> 28#include <linux/delay.h>
30 29
31#include "gfs2.h" 30#include "gfs2.h"
@@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
429 if (ret) 428 if (ret)
430 goto out_unlock; 429 goto out_unlock;
431 430
432 ret = gfs2_quota_lock_check(ip);
433 if (ret)
434 goto out_unlock;
435 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 431 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
436 ap.target = data_blocks + ind_blocks; 432 ap.target = data_blocks + ind_blocks;
433 ret = gfs2_quota_lock_check(ip, &ap);
434 if (ret)
435 goto out_unlock;
437 ret = gfs2_inplace_reserve(ip, &ap); 436 ret = gfs2_inplace_reserve(ip, &ap);
438 if (ret) 437 if (ret)
439 goto out_quota_unlock; 438 goto out_quota_unlock;
@@ -710,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
710 709
711 gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); 710 gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
712 711
713 if (file->f_flags & O_APPEND) { 712 if (iocb->ki_flags & IOCB_APPEND) {
714 struct gfs2_holder gh; 713 struct gfs2_holder gh;
715 714
716 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 715 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -765,22 +764,30 @@ out:
765 brelse(dibh); 764 brelse(dibh);
766 return error; 765 return error;
767} 766}
768 767/**
769static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, 768 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
770 unsigned int *data_blocks, unsigned int *ind_blocks) 769 * blocks, determine how many bytes can be written.
770 * @ip: The inode in question.
771 * @len: Max cap of bytes. What we return in *len must be <= this.
772 * @data_blocks: Compute and return the number of data blocks needed
773 * @ind_blocks: Compute and return the number of indirect blocks needed
774 * @max_blocks: The total blocks available to work with.
775 *
776 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
777 */
778static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
779 unsigned int *data_blocks, unsigned int *ind_blocks,
780 unsigned int max_blocks)
771{ 781{
782 loff_t max = *len;
772 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 783 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
773 unsigned int max_blocks = ip->i_rgd->rd_free_clone;
774 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); 784 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
775 785
776 for (tmp = max_data; tmp > sdp->sd_diptrs;) { 786 for (tmp = max_data; tmp > sdp->sd_diptrs;) {
777 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); 787 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
778 max_data -= tmp; 788 max_data -= tmp;
779 } 789 }
780 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, 790
781 so it might end up with fewer data blocks */
782 if (max_data <= *data_blocks)
783 return;
784 *data_blocks = max_data; 791 *data_blocks = max_data;
785 *ind_blocks = max_blocks - max_data; 792 *ind_blocks = max_blocks - max_data;
786 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; 793 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
797 struct gfs2_inode *ip = GFS2_I(inode); 804 struct gfs2_inode *ip = GFS2_I(inode);
798 struct gfs2_alloc_parms ap = { .aflags = 0, }; 805 struct gfs2_alloc_parms ap = { .aflags = 0, };
799 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 806 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
800 loff_t bytes, max_bytes; 807 loff_t bytes, max_bytes, max_blks = UINT_MAX;
801 int error; 808 int error;
802 const loff_t pos = offset; 809 const loff_t pos = offset;
803 const loff_t count = len; 810 const loff_t count = len;
@@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
819 826
820 gfs2_size_hint(file, offset, len); 827 gfs2_size_hint(file, offset, len);
821 828
829 gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
830 ap.min_target = data_blocks + ind_blocks;
831
822 while (len > 0) { 832 while (len > 0) {
823 if (len < bytes) 833 if (len < bytes)
824 bytes = len; 834 bytes = len;
@@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
827 offset += bytes; 837 offset += bytes;
828 continue; 838 continue;
829 } 839 }
830 error = gfs2_quota_lock_check(ip); 840
841 /* We need to determine how many bytes we can actually
842 * fallocate without exceeding quota or going over the
843 * end of the fs. We start off optimistically by assuming
844 * we can write max_bytes */
845 max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
846
847 /* Since max_bytes is most likely a theoretical max, we
848 * calculate a more realistic 'bytes' to serve as a good
849 * starting point for the number of bytes we may be able
850 * to write */
851 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
852 ap.target = data_blocks + ind_blocks;
853
854 error = gfs2_quota_lock_check(ip, &ap);
831 if (error) 855 if (error)
832 return error; 856 return error;
833retry: 857 /* ap.allowed tells us how many blocks quota will allow
834 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 858 * us to write. Check if this reduces max_blks */
859 if (ap.allowed && ap.allowed < max_blks)
860 max_blks = ap.allowed;
835 861
836 ap.target = data_blocks + ind_blocks;
837 error = gfs2_inplace_reserve(ip, &ap); 862 error = gfs2_inplace_reserve(ip, &ap);
838 if (error) { 863 if (error)
839 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
840 bytes >>= 1;
841 bytes &= bsize_mask;
842 if (bytes == 0)
843 bytes = sdp->sd_sb.sb_bsize;
844 goto retry;
845 }
846 goto out_qunlock; 864 goto out_qunlock;
847 } 865
848 max_bytes = bytes; 866 /* check if the selected rgrp limits our max_blks further */
849 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 867 if (ap.allowed && ap.allowed < max_blks)
850 &max_bytes, &data_blocks, &ind_blocks); 868 max_blks = ap.allowed;
869
870 /* Almost done. Calculate bytes that can be written using
871 * max_blks. We also recompute max_bytes, data_blocks and
872 * ind_blocks */
873 calc_max_reserv(ip, &max_bytes, &data_blocks,
874 &ind_blocks, max_blks);
851 875
852 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 876 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
853 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); 877 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -931,6 +955,22 @@ out_uninit:
931 return ret; 955 return ret;
932} 956}
933 957
958static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
959 struct file *out, loff_t *ppos,
960 size_t len, unsigned int flags)
961{
962 int error;
963 struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
964
965 error = gfs2_rs_alloc(ip);
966 if (error)
967 return (ssize_t)error;
968
969 gfs2_size_hint(out, *ppos, len);
970
971 return iter_file_splice_write(pipe, out, ppos, len, flags);
972}
973
934#ifdef CONFIG_GFS2_FS_LOCKING_DLM 974#ifdef CONFIG_GFS2_FS_LOCKING_DLM
935 975
936/** 976/**
@@ -1065,9 +1105,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1065 1105
1066const struct file_operations gfs2_file_fops = { 1106const struct file_operations gfs2_file_fops = {
1067 .llseek = gfs2_llseek, 1107 .llseek = gfs2_llseek,
1068 .read = new_sync_read,
1069 .read_iter = generic_file_read_iter, 1108 .read_iter = generic_file_read_iter,
1070 .write = new_sync_write,
1071 .write_iter = gfs2_file_write_iter, 1109 .write_iter = gfs2_file_write_iter,
1072 .unlocked_ioctl = gfs2_ioctl, 1110 .unlocked_ioctl = gfs2_ioctl,
1073 .mmap = gfs2_mmap, 1111 .mmap = gfs2_mmap,
@@ -1077,7 +1115,7 @@ const struct file_operations gfs2_file_fops = {
1077 .lock = gfs2_lock, 1115 .lock = gfs2_lock,
1078 .flock = gfs2_flock, 1116 .flock = gfs2_flock,
1079 .splice_read = generic_file_splice_read, 1117 .splice_read = generic_file_splice_read,
1080 .splice_write = iter_file_splice_write, 1118 .splice_write = gfs2_file_splice_write,
1081 .setlease = simple_nosetlease, 1119 .setlease = simple_nosetlease,
1082 .fallocate = gfs2_fallocate, 1120 .fallocate = gfs2_fallocate,
1083}; 1121};
@@ -1097,9 +1135,7 @@ const struct file_operations gfs2_dir_fops = {
1097 1135
1098const struct file_operations gfs2_file_fops_nolock = { 1136const struct file_operations gfs2_file_fops_nolock = {
1099 .llseek = gfs2_llseek, 1137 .llseek = gfs2_llseek,
1100 .read = new_sync_read,
1101 .read_iter = generic_file_read_iter, 1138 .read_iter = generic_file_read_iter,
1102 .write = new_sync_write,
1103 .write_iter = gfs2_file_write_iter, 1139 .write_iter = gfs2_file_write_iter,
1104 .unlocked_ioctl = gfs2_ioctl, 1140 .unlocked_ioctl = gfs2_ioctl,
1105 .mmap = gfs2_mmap, 1141 .mmap = gfs2_mmap,
@@ -1107,7 +1143,7 @@ const struct file_operations gfs2_file_fops_nolock = {
1107 .release = gfs2_release, 1143 .release = gfs2_release,
1108 .fsync = gfs2_fsync, 1144 .fsync = gfs2_fsync,
1109 .splice_read = generic_file_splice_read, 1145 .splice_read = generic_file_splice_read,
1110 .splice_write = iter_file_splice_write, 1146 .splice_write = gfs2_file_splice_write,
1111 .setlease = generic_setlease, 1147 .setlease = generic_setlease,
1112 .fallocate = gfs2_fallocate, 1148 .fallocate = gfs2_fallocate,
1113}; 1149};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
2047 2047
2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2049{ 2049{
2050 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2050 struct dentry *dent;
2051 if (!sdp->debugfs_dir) 2051
2052 return -ENOMEM; 2052 dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2053 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2053 if (IS_ERR_OR_NULL(dent))
2054 S_IFREG | S_IRUGO, 2054 goto fail;
2055 sdp->debugfs_dir, sdp, 2055 sdp->debugfs_dir = dent;
2056 &gfs2_glocks_fops); 2056
2057 if (!sdp->debugfs_dentry_glocks) 2057 dent = debugfs_create_file("glocks",
2058 S_IFREG | S_IRUGO,
2059 sdp->debugfs_dir, sdp,
2060 &gfs2_glocks_fops);
2061 if (IS_ERR_OR_NULL(dent))
2058 goto fail; 2062 goto fail;
2063 sdp->debugfs_dentry_glocks = dent;
2059 2064
2060 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", 2065 dent = debugfs_create_file("glstats",
2061 S_IFREG | S_IRUGO, 2066 S_IFREG | S_IRUGO,
2062 sdp->debugfs_dir, sdp, 2067 sdp->debugfs_dir, sdp,
2063 &gfs2_glstats_fops); 2068 &gfs2_glstats_fops);
2064 if (!sdp->debugfs_dentry_glstats) 2069 if (IS_ERR_OR_NULL(dent))
2065 goto fail; 2070 goto fail;
2071 sdp->debugfs_dentry_glstats = dent;
2066 2072
2067 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", 2073 dent = debugfs_create_file("sbstats",
2068 S_IFREG | S_IRUGO, 2074 S_IFREG | S_IRUGO,
2069 sdp->debugfs_dir, sdp, 2075 sdp->debugfs_dir, sdp,
2070 &gfs2_sbstats_fops); 2076 &gfs2_sbstats_fops);
2071 if (!sdp->debugfs_dentry_sbstats) 2077 if (IS_ERR_OR_NULL(dent))
2072 goto fail; 2078 goto fail;
2079 sdp->debugfs_dentry_sbstats = dent;
2073 2080
2074 return 0; 2081 return 0;
2075fail: 2082fail:
2076 gfs2_delete_debugfs_file(sdp); 2083 gfs2_delete_debugfs_file(sdp);
2077 return -ENOMEM; 2084 return dent ? PTR_ERR(dent) : -ENOMEM;
2078} 2085}
2079 2086
2080void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2087void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2100int gfs2_register_debugfs(void) 2107int gfs2_register_debugfs(void)
2101{ 2108{
2102 gfs2_root = debugfs_create_dir("gfs2", NULL); 2109 gfs2_root = debugfs_create_dir("gfs2", NULL);
2110 if (IS_ERR(gfs2_root))
2111 return PTR_ERR(gfs2_root);
2103 return gfs2_root ? 0 : -ENOMEM; 2112 return gfs2_root ? 0 : -ENOMEM;
2104} 2113}
2105 2114
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
301 * to the allocation code. 301 * to the allocation code.
302 */ 302 */
303struct gfs2_alloc_parms { 303struct gfs2_alloc_parms {
304 u32 target; 304 u64 target;
305 u32 min_target;
305 u32 aflags; 306 u32 aflags;
307 u64 allowed;
306}; 308};
307 309
308enum { 310enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; 382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
383 int error; 383 int error;
384 384
385 error = gfs2_quota_lock_check(ip); 385 error = gfs2_quota_lock_check(ip, &ap);
386 if (error) 386 if (error)
387 goto out; 387 goto out;
388 388
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
525 int error; 525 int error;
526 526
527 if (da->nr_blocks) { 527 if (da->nr_blocks) {
528 error = gfs2_quota_lock_check(dip); 528 error = gfs2_quota_lock_check(dip, &ap);
529 if (error) 529 if (error)
530 goto fail_quota_locks; 530 goto fail_quota_locks;
531 531
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
953 953
954 if (da.nr_blocks) { 954 if (da.nr_blocks) {
955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
956 error = gfs2_quota_lock_check(dip); 956 error = gfs2_quota_lock_check(dip, &ap);
957 if (error) 957 if (error)
958 goto out_gunlock; 958 goto out_gunlock;
959 959
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1470 1470
1471 if (da.nr_blocks) { 1471 if (da.nr_blocks) {
1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
1473 error = gfs2_quota_lock_check(ndip); 1473 error = gfs2_quota_lock_check(ndip, &ap);
1474 if (error) 1474 if (error)
1475 goto out_gunlock; 1475 goto out_gunlock;
1476 1476
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1669 kuid_t ouid, nuid; 1669 kuid_t ouid, nuid;
1670 kgid_t ogid, ngid; 1670 kgid_t ogid, ngid;
1671 int error; 1671 int error;
1672 struct gfs2_alloc_parms ap;
1672 1673
1673 ouid = inode->i_uid; 1674 ouid = inode->i_uid;
1674 ogid = inode->i_gid; 1675 ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1696 if (error) 1697 if (error)
1697 goto out; 1698 goto out;
1698 1699
1700 ap.target = gfs2_get_inode_blocks(&ip->i_inode);
1701
1699 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1702 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1700 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1703 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1701 error = gfs2_quota_check(ip, nuid, ngid); 1704 error = gfs2_quota_check(ip, nuid, ngid, &ap);
1702 if (error) 1705 if (error)
1703 goto out_gunlock_q; 1706 goto out_gunlock_q;
1704 } 1707 }
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1713 1716
1714 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1717 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1715 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1718 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1716 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1719 gfs2_quota_change(ip, -ap.target, ouid, ogid);
1717 gfs2_quota_change(ip, -blocks, ouid, ogid); 1720 gfs2_quota_change(ip, ap.target, nuid, ngid);
1718 gfs2_quota_change(ip, blocks, nuid, ngid);
1719 } 1721 }
1720 1722
1721out_end_trans: 1723out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..e3065cb9ab08 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
923 if (error) 923 if (error)
924 return error; 924 return error;
925 925
926 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
927 force_refresh = FORCE;
928
926 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 929 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
927 930
928 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 931 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
974 sizeof(struct gfs2_quota_data *), sort_qd, NULL); 977 sizeof(struct gfs2_quota_data *), sort_qd, NULL);
975 978
976 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 979 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
977 int force = NO_FORCE;
978 qd = ip->i_res->rs_qa_qd[x]; 980 qd = ip->i_res->rs_qa_qd[x];
979 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 981 error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
980 force = FORCE;
981 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
982 if (error) 982 if (error)
983 break; 983 break;
984 } 984 }
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1094 return 0; 1094 return 0;
1095} 1095}
1096 1096
1097int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) 1097/**
1098 * gfs2_quota_check - check if allocating new blocks will exceed quota
1099 * @ip: The inode for which this check is being performed
1100 * @uid: The uid to check against
1101 * @gid: The gid to check against
1102 * @ap: The allocation parameters. ap->target contains the requested
1103 * blocks. ap->min_target, if set, contains the minimum blks
1104 * requested.
1105 *
1106 * Returns: 0 on success.
1107 * min_req = ap->min_target ? ap->min_target : ap->target;
1108 * quota must allow atleast min_req blks for success and
1109 * ap->allowed is set to the number of blocks allowed
1110 *
1111 * -EDQUOT otherwise, quota violation. ap->allowed is set to number
1112 * of blocks available.
1113 */
1114int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
1115 struct gfs2_alloc_parms *ap)
1098{ 1116{
1099 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1117 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1100 struct gfs2_quota_data *qd; 1118 struct gfs2_quota_data *qd;
1101 s64 value; 1119 s64 value, warn, limit;
1102 unsigned int x; 1120 unsigned int x;
1103 int error = 0; 1121 int error = 0;
1104 1122
1123 ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
1105 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) 1124 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
1106 return 0; 1125 return 0;
1107 1126
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
1115 qid_eq(qd->qd_id, make_kqid_gid(gid)))) 1134 qid_eq(qd->qd_id, make_kqid_gid(gid))))
1116 continue; 1135 continue;
1117 1136
1137 warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
1138 limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
1118 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1139 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
1119 spin_lock(&qd_lock); 1140 spin_lock(&qd_lock);
1120 value += qd->qd_change; 1141 value += qd->qd_change;
1121 spin_unlock(&qd_lock); 1142 spin_unlock(&qd_lock);
1122 1143
1123 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1144 if (limit > 0 && (limit - value) < ap->allowed)
1124 print_message(qd, "exceeded"); 1145 ap->allowed = limit - value;
1125 quota_send_warning(qd->qd_id, 1146 /* If we can't meet the target */
1126 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1147 if (limit && limit < (value + (s64)ap->target)) {
1127 1148 /* If no min_target specified or we don't meet
1128 error = -EDQUOT; 1149 * min_target, return -EDQUOT */
1129 break; 1150 if (!ap->min_target || ap->min_target > ap->allowed) {
1130 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1151 print_message(qd, "exceeded");
1131 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && 1152 quota_send_warning(qd->qd_id,
1153 sdp->sd_vfs->s_dev,
1154 QUOTA_NL_BHARDWARN);
1155 error = -EDQUOT;
1156 break;
1157 }
1158 } else if (warn && warn < value &&
1132 time_after_eq(jiffies, qd->qd_last_warn + 1159 time_after_eq(jiffies, qd->qd_last_warn +
1133 gfs2_tune_get(sdp, 1160 gfs2_tune_get(sdp, gt_quota_warn_period)
1134 gt_quota_warn_period) * HZ)) { 1161 * HZ)) {
1135 quota_send_warning(qd->qd_id, 1162 quota_send_warning(qd->qd_id,
1136 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1163 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1137 error = print_message(qd, "warning"); 1164 error = print_message(qd, "warning");
1138 qd->qd_last_warn = jiffies; 1165 qd->qd_last_warn = jiffies;
1139 } 1166 }
1140 } 1167 }
1141
1142 return error; 1168 return error;
1143} 1169}
1144 1170
@@ -1468,32 +1494,34 @@ int gfs2_quotad(void *data)
1468 return 0; 1494 return 0;
1469} 1495}
1470 1496
1471static int gfs2_quota_get_xstate(struct super_block *sb, 1497static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
1472 struct fs_quota_stat *fqs)
1473{ 1498{
1474 struct gfs2_sbd *sdp = sb->s_fs_info; 1499 struct gfs2_sbd *sdp = sb->s_fs_info;
1475 1500
1476 memset(fqs, 0, sizeof(struct fs_quota_stat)); 1501 memset(state, 0, sizeof(*state));
1477 fqs->qs_version = FS_QSTAT_VERSION;
1478 1502
1479 switch (sdp->sd_args.ar_quota) { 1503 switch (sdp->sd_args.ar_quota) {
1480 case GFS2_QUOTA_ON: 1504 case GFS2_QUOTA_ON:
1481 fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD); 1505 state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
1506 state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
1482 /*FALLTHRU*/ 1507 /*FALLTHRU*/
1483 case GFS2_QUOTA_ACCOUNT: 1508 case GFS2_QUOTA_ACCOUNT:
1484 fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT); 1509 state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED |
1510 QCI_SYSFILE;
1511 state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED |
1512 QCI_SYSFILE;
1485 break; 1513 break;
1486 case GFS2_QUOTA_OFF: 1514 case GFS2_QUOTA_OFF:
1487 break; 1515 break;
1488 } 1516 }
1489
1490 if (sdp->sd_quota_inode) { 1517 if (sdp->sd_quota_inode) {
1491 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; 1518 state->s_state[USRQUOTA].ino =
1492 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; 1519 GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1520 state->s_state[USRQUOTA].blocks = sdp->sd_quota_inode->i_blocks;
1493 } 1521 }
1494 fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ 1522 state->s_state[USRQUOTA].nextents = 1; /* unsupported */
1495 fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ 1523 state->s_state[GRPQUOTA] = state->s_state[USRQUOTA];
1496 fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru); 1524 state->s_incoredqs = list_lru_count(&gfs2_qd_lru);
1497 return 0; 1525 return 0;
1498} 1526}
1499 1527
@@ -1638,7 +1666,7 @@ out_put:
1638 1666
1639const struct quotactl_ops gfs2_quotactl_ops = { 1667const struct quotactl_ops gfs2_quotactl_ops = {
1640 .quota_sync = gfs2_quota_sync, 1668 .quota_sync = gfs2_quota_sync,
1641 .get_xstate = gfs2_quota_get_xstate, 1669 .get_state = gfs2_quota_get_state,
1642 .get_dqblk = gfs2_get_dqblk, 1670 .get_dqblk = gfs2_get_dqblk,
1643 .set_dqblk = gfs2_set_dqblk, 1671 .set_dqblk = gfs2_set_dqblk,
1644}; 1672};
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
25extern void gfs2_quota_unlock(struct gfs2_inode *ip); 25extern void gfs2_quota_unlock(struct gfs2_inode *ip);
26 26
27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
28 struct gfs2_alloc_parms *ap);
28extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 29extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
29 kuid_t uid, kgid_t gid); 30 kuid_t uid, kgid_t gid);
30 31
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
37 38
38extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); 39extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
39 40
40static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) 41static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
42 struct gfs2_alloc_parms *ap)
41{ 43{
42 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 44 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
43 int ret; 45 int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
48 return ret; 50 return ret;
49 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 51 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
50 return 0; 52 return 0;
51 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); 53 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
52 if (ret) 54 if (ret)
53 gfs2_quota_unlock(ip); 55 gfs2_quota_unlock(ip);
54 return ret; 56 return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1946 * @ip: the inode to reserve space for 1946 * @ip: the inode to reserve space for
1947 * @ap: the allocation parameters 1947 * @ap: the allocation parameters
1948 * 1948 *
1949 * Returns: errno 1949 * We try our best to find an rgrp that has at least ap->target blocks
1950 * available. After a couple of passes (loops == 2), the prospects of finding
1951 * such an rgrp diminish. At this stage, we return the first rgrp that has
1952 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1953 * the number of blocks available in the chosen rgrp.
1954 *
1955 * Returns: 0 on success,
1956 * -ENOMEM if a suitable rgrp can't be found
1957 * errno otherwise
1950 */ 1958 */
1951 1959
1952int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) 1960int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1953{ 1961{
1954 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1962 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1955 struct gfs2_rgrpd *begin = NULL; 1963 struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2012 /* Skip unuseable resource groups */ 2020 /* Skip unuseable resource groups */
2013 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | 2021 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2014 GFS2_RDF_ERROR)) || 2022 GFS2_RDF_ERROR)) ||
2015 (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) 2023 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2016 goto skip_rgrp; 2024 goto skip_rgrp;
2017 2025
2018 if (sdp->sd_args.ar_rgrplvb) 2026 if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2027 goto check_rgrp; 2035 goto check_rgrp;
2028 2036
2029 /* If rgrp has enough free space, use it */ 2037 /* If rgrp has enough free space, use it */
2030 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { 2038 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2039 (loops == 2 && ap->min_target &&
2040 rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2031 ip->i_rgd = rs->rs_rbm.rgd; 2041 ip->i_rgd = rs->rs_rbm.rgd;
2042 ap->allowed = ip->i_rgd->rd_free_clone;
2032 return 0; 2043 return 0;
2033 } 2044 }
2034
2035check_rgrp: 2045check_rgrp:
2036 /* Check for unlinked inodes which can be reclaimed */ 2046 /* Check for unlinked inodes which can be reclaimed */
2037 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 2047 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
42 42
43#define GFS2_AF_ORLOV 1 43#define GFS2_AF_ORLOV 1
44extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); 44extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
45 struct gfs2_alloc_parms *ap);
45extern void gfs2_inplace_release(struct gfs2_inode *ip); 46extern void gfs2_inplace_release(struct gfs2_inode *ip);
46 47
47extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 48extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
732 if (error) 732 if (error)
733 return error; 733 return error;
734 734
735 error = gfs2_quota_lock_check(ip); 735 error = gfs2_quota_lock_check(ip, &ap);
736 if (error) 736 if (error)
737 return error; 737 return error;
738 738
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 145566851e7a..36d1a6ae7655 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -197,7 +197,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
197 197
198 inode = hfs_new_inode(dir, &dentry->d_name, mode); 198 inode = hfs_new_inode(dir, &dentry->d_name, mode);
199 if (!inode) 199 if (!inode)
200 return -ENOSPC; 200 return -ENOMEM;
201 201
202 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 202 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
203 if (res) { 203 if (res) {
@@ -226,7 +226,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
226 226
227 inode = hfs_new_inode(dir, &dentry->d_name, S_IFDIR | mode); 227 inode = hfs_new_inode(dir, &dentry->d_name, S_IFDIR | mode);
228 if (!inode) 228 if (!inode)
229 return -ENOSPC; 229 return -ENOMEM;
230 230
231 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 231 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
232 if (res) { 232 if (res) {
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc81782..75fd5d873c19 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfs_fs.h" 19#include "hfs_fs.h"
20#include "btree.h" 20#include "btree.h"
@@ -124,8 +124,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
124 return res ? try_to_free_buffers(page) : 0; 124 return res ? try_to_free_buffers(page) : 0;
125} 125}
126 126
127static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, 127static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
128 struct iov_iter *iter, loff_t offset) 128 loff_t offset)
129{ 129{
130 struct file *file = iocb->ki_filp; 130 struct file *file = iocb->ki_filp;
131 struct address_space *mapping = file->f_mapping; 131 struct address_space *mapping = file->f_mapping;
@@ -133,13 +133,13 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
133 size_t count = iov_iter_count(iter); 133 size_t count = iov_iter_count(iter);
134 ssize_t ret; 134 ssize_t ret;
135 135
136 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); 136 ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block);
137 137
138 /* 138 /*
139 * In case of error extending write may have instantiated a few 139 * In case of error extending write may have instantiated a few
140 * blocks outside i_size. Trim these off again. 140 * blocks outside i_size. Trim these off again.
141 */ 141 */
142 if (unlikely((rw & WRITE) && ret < 0)) { 142 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
143 loff_t isize = i_size_read(inode); 143 loff_t isize = i_size_read(inode);
144 loff_t end = offset + count; 144 loff_t end = offset + count;
145 145
@@ -674,9 +674,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
674 674
675static const struct file_operations hfs_file_operations = { 675static const struct file_operations hfs_file_operations = {
676 .llseek = generic_file_llseek, 676 .llseek = generic_file_llseek,
677 .read = new_sync_read,
678 .read_iter = generic_file_read_iter, 677 .read_iter = generic_file_read_iter,
679 .write = new_sync_write,
680 .write_iter = generic_file_write_iter, 678 .write_iter = generic_file_write_iter,
681 .mmap = generic_file_mmap, 679 .mmap = generic_file_mmap,
682 .splice_read = generic_file_splice_read, 680 .splice_read = generic_file_splice_read,
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index c1422d91cd36..528e38b5af7f 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -118,9 +118,7 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd,
118 int b, e; 118 int b, e;
119 int res; 119 int res;
120 120
121 if (!rec_found) 121 BUG_ON(!rec_found);
122 BUG();
123
124 b = 0; 122 b = 0;
125 e = bnode->num_recs - 1; 123 e = bnode->num_recs - 1;
126 res = -ENOENT; 124 res = -ENOENT;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
131 hfs_bnode_write(node, entry, data_off + key_len, entry_len); 131 hfs_bnode_write(node, entry, data_off + key_len, entry_len);
132 hfs_bnode_dump(node); 132 hfs_bnode_dump(node);
133 133
134 if (new_node) { 134 /*
135 /* update parent key if we inserted a key 135 * update parent key if we inserted a key
136 * at the start of the first node 136 * at the start of the node and it is not the new node
137 */ 137 */
138 if (!rec && new_node != node) 138 if (!rec && new_node != node) {
139 hfs_brec_update_parent(fd); 139 hfs_bnode_read_key(node, fd->search_key, data_off + size);
140 hfs_brec_update_parent(fd);
141 }
140 142
143 if (new_node) {
141 hfs_bnode_put(fd->bnode); 144 hfs_bnode_put(fd->bnode);
142 if (!new_node->parent) { 145 if (!new_node->parent) {
143 hfs_btree_inc_height(tree); 146 hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
168 goto again; 171 goto again;
169 } 172 }
170 173
171 if (!rec)
172 hfs_brec_update_parent(fd);
173
174 return 0; 174 return 0;
175} 175}
176 176
@@ -370,6 +370,8 @@ again:
370 if (IS_ERR(parent)) 370 if (IS_ERR(parent))
371 return PTR_ERR(parent); 371 return PTR_ERR(parent);
372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key); 372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
373 if (fd->record < 0)
374 return -ENOENT;
373 hfs_bnode_dump(parent); 375 hfs_bnode_dump(parent);
374 rec = fd->record; 376 rec = fd->record;
375 377
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 7892e6fddb66..022974ab6e3c 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -350,10 +350,11 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
350 &fd.search_key->cat.name.unicode, 350 &fd.search_key->cat.name.unicode,
351 off + 2, len); 351 off + 2, len);
352 fd.search_key->key_len = cpu_to_be16(6 + len); 352 fd.search_key->key_len = cpu_to_be16(6 + len);
353 } else 353 } else {
354 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 354 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
355 if (unlikely(err)) 355 if (unlikely(err))
356 goto out; 356 goto out;
357 }
357 358
358 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 359 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
359 if (err) 360 if (err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f0235c1640af..3074609befc3 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -434,7 +434,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
434{ 434{
435 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); 435 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
436 struct inode *inode; 436 struct inode *inode;
437 int res = -ENOSPC; 437 int res = -ENOMEM;
438 438
439 mutex_lock(&sbi->vh_mutex); 439 mutex_lock(&sbi->vh_mutex);
440 inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); 440 inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO);
@@ -476,7 +476,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
476{ 476{
477 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); 477 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
478 struct inode *inode; 478 struct inode *inode;
479 int res = -ENOSPC; 479 int res = -ENOMEM;
480 480
481 mutex_lock(&sbi->vh_mutex); 481 mutex_lock(&sbi->vh_mutex);
482 inode = hfsplus_new_inode(dir->i_sb, mode); 482 inode = hfsplus_new_inode(dir->i_sb, mode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f2d046..b0afedbef12b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfsplus_fs.h" 19#include "hfsplus_fs.h"
20#include "hfsplus_raw.h" 20#include "hfsplus_raw.h"
@@ -122,8 +122,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
122 return res ? try_to_free_buffers(page) : 0; 122 return res ? try_to_free_buffers(page) : 0;
123} 123}
124 124
125static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, 125static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
126 struct iov_iter *iter, loff_t offset) 126 loff_t offset)
127{ 127{
128 struct file *file = iocb->ki_filp; 128 struct file *file = iocb->ki_filp;
129 struct address_space *mapping = file->f_mapping; 129 struct address_space *mapping = file->f_mapping;
@@ -131,14 +131,13 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
131 size_t count = iov_iter_count(iter); 131 size_t count = iov_iter_count(iter);
132 ssize_t ret; 132 ssize_t ret;
133 133
134 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 134 ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block);
135 hfsplus_get_block);
136 135
137 /* 136 /*
138 * In case of error extending write may have instantiated a few 137 * In case of error extending write may have instantiated a few
139 * blocks outside i_size. Trim these off again. 138 * blocks outside i_size. Trim these off again.
140 */ 139 */
141 if (unlikely((rw & WRITE) && ret < 0)) { 140 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
142 loff_t isize = i_size_read(inode); 141 loff_t isize = i_size_read(inode);
143 loff_t end = offset + count; 142 loff_t end = offset + count;
144 143
@@ -254,6 +253,12 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
254 if ((attr->ia_valid & ATTR_SIZE) && 253 if ((attr->ia_valid & ATTR_SIZE) &&
255 attr->ia_size != i_size_read(inode)) { 254 attr->ia_size != i_size_read(inode)) {
256 inode_dio_wait(inode); 255 inode_dio_wait(inode);
256 if (attr->ia_size > inode->i_size) {
257 error = generic_cont_expand_simple(inode,
258 attr->ia_size);
259 if (error)
260 return error;
261 }
257 truncate_setsize(inode, attr->ia_size); 262 truncate_setsize(inode, attr->ia_size);
258 hfsplus_file_truncate(inode); 263 hfsplus_file_truncate(inode);
259 } 264 }
@@ -341,9 +346,7 @@ static const struct inode_operations hfsplus_file_inode_operations = {
341 346
342static const struct file_operations hfsplus_file_operations = { 347static const struct file_operations hfsplus_file_operations = {
343 .llseek = generic_file_llseek, 348 .llseek = generic_file_llseek,
344 .read = new_sync_read,
345 .read_iter = generic_file_read_iter, 349 .read_iter = generic_file_read_iter,
346 .write = new_sync_write,
347 .write_iter = generic_file_write_iter, 350 .write_iter = generic_file_write_iter,
348 .mmap = generic_file_mmap, 351 .mmap = generic_file_mmap,
349 .splice_read = generic_file_splice_read, 352 .splice_read = generic_file_splice_read,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index d3ff5cc317d7..8e98f5db6ad6 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -76,7 +76,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
76{ 76{
77 struct inode *inode = file_inode(file); 77 struct inode *inode = file_inode(file);
78 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 78 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
79 unsigned int flags; 79 unsigned int flags, new_fl = 0;
80 int err = 0; 80 int err = 0;
81 81
82 err = mnt_want_write_file(file); 82 err = mnt_want_write_file(file);
@@ -110,14 +110,12 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
110 } 110 }
111 111
112 if (flags & FS_IMMUTABLE_FL) 112 if (flags & FS_IMMUTABLE_FL)
113 inode->i_flags |= S_IMMUTABLE; 113 new_fl |= S_IMMUTABLE;
114 else
115 inode->i_flags &= ~S_IMMUTABLE;
116 114
117 if (flags & FS_APPEND_FL) 115 if (flags & FS_APPEND_FL)
118 inode->i_flags |= S_APPEND; 116 new_fl |= S_APPEND;
119 else 117
120 inode->i_flags &= ~S_APPEND; 118 inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND);
121 119
122 if (flags & FS_NODUMP_FL) 120 if (flags & FS_NODUMP_FL)
123 hip->userflags |= HFSPLUS_FLG_NODUMP; 121 hip->userflags |= HFSPLUS_FLG_NODUMP;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index d98094a9f476..89f262d8fcd8 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -44,7 +44,7 @@ static int strcmp_xattr_acl(const char *name)
44 return -1; 44 return -1;
45} 45}
46 46
47static inline int is_known_namespace(const char *name) 47static bool is_known_namespace(const char *name)
48{ 48{
49 if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && 49 if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
50 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && 50 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
@@ -424,6 +424,28 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len)
424 return len; 424 return len;
425} 425}
426 426
427int hfsplus_setxattr(struct dentry *dentry, const char *name,
428 const void *value, size_t size, int flags,
429 const char *prefix, size_t prefixlen)
430{
431 char *xattr_name;
432 int res;
433
434 if (!strcmp(name, ""))
435 return -EINVAL;
436
437 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
438 GFP_KERNEL);
439 if (!xattr_name)
440 return -ENOMEM;
441 strcpy(xattr_name, prefix);
442 strcpy(xattr_name + prefixlen, name);
443 res = __hfsplus_setxattr(dentry->d_inode, xattr_name, value, size,
444 flags);
445 kfree(xattr_name);
446 return res;
447}
448
427static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, 449static ssize_t hfsplus_getxattr_finder_info(struct inode *inode,
428 void *value, size_t size) 450 void *value, size_t size)
429{ 451{
@@ -560,6 +582,30 @@ failed_getxattr_init:
560 return res; 582 return res;
561} 583}
562 584
585ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
586 void *value, size_t size,
587 const char *prefix, size_t prefixlen)
588{
589 int res;
590 char *xattr_name;
591
592 if (!strcmp(name, ""))
593 return -EINVAL;
594
595 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
596 GFP_KERNEL);
597 if (!xattr_name)
598 return -ENOMEM;
599
600 strcpy(xattr_name, prefix);
601 strcpy(xattr_name + prefixlen, name);
602
603 res = __hfsplus_getxattr(dentry->d_inode, xattr_name, value, size);
604 kfree(xattr_name);
605 return res;
606
607}
608
563static inline int can_list(const char *xattr_name) 609static inline int can_list(const char *xattr_name)
564{ 610{
565 if (!xattr_name) 611 if (!xattr_name)
@@ -806,9 +852,6 @@ end_removexattr:
806static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, 852static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
807 void *buffer, size_t size, int type) 853 void *buffer, size_t size, int type)
808{ 854{
809 char *xattr_name;
810 int res;
811
812 if (!strcmp(name, "")) 855 if (!strcmp(name, ""))
813 return -EINVAL; 856 return -EINVAL;
814 857
@@ -818,24 +861,19 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
818 */ 861 */
819 if (is_known_namespace(name)) 862 if (is_known_namespace(name))
820 return -EOPNOTSUPP; 863 return -EOPNOTSUPP;
821 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
822 + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
823 if (!xattr_name)
824 return -ENOMEM;
825 strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
826 strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
827 864
828 res = hfsplus_getxattr(dentry, xattr_name, buffer, size); 865 /*
829 kfree(xattr_name); 866 * osx is the namespace we use to indicate an unprefixed
830 return res; 867 * attribute on the filesystem (like the ones that OS X
868 * creates), so we pass the name through unmodified (after
869 * ensuring it doesn't conflict with another namespace).
870 */
871 return __hfsplus_getxattr(dentry->d_inode, name, buffer, size);
831} 872}
832 873
833static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, 874static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
834 const void *buffer, size_t size, int flags, int type) 875 const void *buffer, size_t size, int flags, int type)
835{ 876{
836 char *xattr_name;
837 int res;
838
839 if (!strcmp(name, "")) 877 if (!strcmp(name, ""))
840 return -EINVAL; 878 return -EINVAL;
841 879
@@ -845,16 +883,14 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
845 */ 883 */
846 if (is_known_namespace(name)) 884 if (is_known_namespace(name))
847 return -EOPNOTSUPP; 885 return -EOPNOTSUPP;
848 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
849 + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
850 if (!xattr_name)
851 return -ENOMEM;
852 strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
853 strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
854 886
855 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); 887 /*
856 kfree(xattr_name); 888 * osx is the namespace we use to indicate an unprefixed
857 return res; 889 * attribute on the filesystem (like the ones that OS X
890 * creates), so we pass the name through unmodified (after
891 * ensuring it doesn't conflict with another namespace).
892 */
893 return __hfsplus_setxattr(dentry->d_inode, name, buffer, size, flags);
858} 894}
859 895
860static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, 896static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
index 288530cf80b5..f9b0955b3d28 100644
--- a/fs/hfsplus/xattr.h
+++ b/fs/hfsplus/xattr.h
@@ -21,22 +21,16 @@ extern const struct xattr_handler *hfsplus_xattr_handlers[];
21int __hfsplus_setxattr(struct inode *inode, const char *name, 21int __hfsplus_setxattr(struct inode *inode, const char *name,
22 const void *value, size_t size, int flags); 22 const void *value, size_t size, int flags);
23 23
24static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, 24int hfsplus_setxattr(struct dentry *dentry, const char *name,
25 const void *value, size_t size, int flags) 25 const void *value, size_t size, int flags,
26{ 26 const char *prefix, size_t prefixlen);
27 return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
28}
29 27
30ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, 28ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
31 void *value, size_t size); 29 void *value, size_t size);
32 30
33static inline ssize_t hfsplus_getxattr(struct dentry *dentry, 31ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
34 const char *name, 32 void *value, size_t size,
35 void *value, 33 const char *prefix, size_t prefixlen);
36 size_t size)
37{
38 return __hfsplus_getxattr(dentry->d_inode, name, value, size);
39}
40 34
41ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); 35ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
42 36
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index 6ec5e107691f..aacff00a9ff9 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -16,43 +16,17 @@
16static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, 16static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
17 void *buffer, size_t size, int type) 17 void *buffer, size_t size, int type)
18{ 18{
19 char *xattr_name; 19 return hfsplus_getxattr(dentry, name, buffer, size,
20 int res; 20 XATTR_SECURITY_PREFIX,
21 21 XATTR_SECURITY_PREFIX_LEN);
22 if (!strcmp(name, ""))
23 return -EINVAL;
24
25 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
26 GFP_KERNEL);
27 if (!xattr_name)
28 return -ENOMEM;
29 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
30 strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
31
32 res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
33 kfree(xattr_name);
34 return res;
35} 22}
36 23
37static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, 24static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
38 const void *buffer, size_t size, int flags, int type) 25 const void *buffer, size_t size, int flags, int type)
39{ 26{
40 char *xattr_name; 27 return hfsplus_setxattr(dentry, name, buffer, size, flags,
41 int res; 28 XATTR_SECURITY_PREFIX,
42 29 XATTR_SECURITY_PREFIX_LEN);
43 if (!strcmp(name, ""))
44 return -EINVAL;
45
46 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
47 GFP_KERNEL);
48 if (!xattr_name)
49 return -ENOMEM;
50 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
51 strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
52
53 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
54 kfree(xattr_name);
55 return res;
56} 30}
57 31
58static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, 32static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
index 3c5f27e4746a..bcf65089b7f7 100644
--- a/fs/hfsplus/xattr_trusted.c
+++ b/fs/hfsplus/xattr_trusted.c
@@ -14,43 +14,16 @@
14static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, 14static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
15 void *buffer, size_t size, int type) 15 void *buffer, size_t size, int type)
16{ 16{
17 char *xattr_name; 17 return hfsplus_getxattr(dentry, name, buffer, size,
18 int res; 18 XATTR_TRUSTED_PREFIX,
19 19 XATTR_TRUSTED_PREFIX_LEN);
20 if (!strcmp(name, ""))
21 return -EINVAL;
22
23 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
24 GFP_KERNEL);
25 if (!xattr_name)
26 return -ENOMEM;
27 strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
28 strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
29
30 res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
31 kfree(xattr_name);
32 return res;
33} 20}
34 21
35static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, 22static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
36 const void *buffer, size_t size, int flags, int type) 23 const void *buffer, size_t size, int flags, int type)
37{ 24{
38 char *xattr_name; 25 return hfsplus_setxattr(dentry, name, buffer, size, flags,
39 int res; 26 XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
40
41 if (!strcmp(name, ""))
42 return -EINVAL;
43
44 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
45 GFP_KERNEL);
46 if (!xattr_name)
47 return -ENOMEM;
48 strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
49 strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
50
51 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
52 kfree(xattr_name);
53 return res;
54} 27}
55 28
56static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, 29static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
index 2b625a538b64..5aa0e6dc4a1e 100644
--- a/fs/hfsplus/xattr_user.c
+++ b/fs/hfsplus/xattr_user.c
@@ -14,43 +14,16 @@
14static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, 14static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
15 void *buffer, size_t size, int type) 15 void *buffer, size_t size, int type)
16{ 16{
17 char *xattr_name;
18 int res;
19 17
20 if (!strcmp(name, "")) 18 return hfsplus_getxattr(dentry, name, buffer, size,
21 return -EINVAL; 19 XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
22
23 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
24 GFP_KERNEL);
25 if (!xattr_name)
26 return -ENOMEM;
27 strcpy(xattr_name, XATTR_USER_PREFIX);
28 strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
29
30 res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
31 kfree(xattr_name);
32 return res;
33} 20}
34 21
35static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, 22static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
36 const void *buffer, size_t size, int flags, int type) 23 const void *buffer, size_t size, int flags, int type)
37{ 24{
38 char *xattr_name; 25 return hfsplus_setxattr(dentry, name, buffer, size, flags,
39 int res; 26 XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
40
41 if (!strcmp(name, ""))
42 return -EINVAL;
43
44 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
45 GFP_KERNEL);
46 if (!xattr_name)
47 return -ENOMEM;
48 strcpy(xattr_name, XATTR_USER_PREFIX);
49 strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
50
51 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
52 kfree(xattr_name);
53 return res;
54} 27}
55 28
56static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, 29static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 4fcd40d6f308..91e19f9dffe5 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -66,7 +66,8 @@ extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
66extern int access_file(char *path, int r, int w, int x); 66extern int access_file(char *path, int r, int w, int x);
67extern int open_file(char *path, int r, int w, int append); 67extern int open_file(char *path, int r, int w, int append);
68extern void *open_dir(char *path, int *err_out); 68extern void *open_dir(char *path, int *err_out);
69extern char *read_dir(void *stream, unsigned long long *pos, 69extern void seek_dir(void *stream, unsigned long long pos);
70extern char *read_dir(void *stream, unsigned long long *pos_out,
70 unsigned long long *ino_out, int *len_out, 71 unsigned long long *ino_out, int *len_out,
71 unsigned int *type_out); 72 unsigned int *type_out);
72extern void close_file(void *stream); 73extern void close_file(void *stream);
@@ -77,8 +78,7 @@ extern int write_file(int fd, unsigned long long *offset, const char *buf,
77 int len); 78 int len);
78extern int lseek_file(int fd, long long offset, int whence); 79extern int lseek_file(int fd, long long offset, int whence);
79extern int fsync_file(int fd, int datasync); 80extern int fsync_file(int fd, int datasync);
80extern int file_create(char *name, int ur, int uw, int ux, int gr, 81extern int file_create(char *name, int mode);
81 int gw, int gx, int or, int ow, int ox);
82extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd); 82extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd);
83extern int make_symlink(const char *from, const char *to); 83extern int make_symlink(const char *from, const char *to);
84extern int unlink_file(const char *file); 84extern int unlink_file(const char *file);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fd62cae0fdcb..b83a0343378b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -24,6 +24,7 @@ struct hostfs_inode_info {
24 int fd; 24 int fd;
25 fmode_t mode; 25 fmode_t mode;
26 struct inode vfs_inode; 26 struct inode vfs_inode;
27 struct mutex open_mutex;
27}; 28};
28 29
29static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) 30static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
@@ -92,16 +93,22 @@ static char *__dentry_name(struct dentry *dentry, char *name)
92 __putname(name); 93 __putname(name);
93 return NULL; 94 return NULL;
94 } 95 }
96
97 /*
98 * This function relies on the fact that dentry_path_raw() will place
99 * the path name at the end of the provided buffer.
100 */
101 BUG_ON(p + strlen(p) + 1 != name + PATH_MAX);
102
95 strlcpy(name, root, PATH_MAX); 103 strlcpy(name, root, PATH_MAX);
96 if (len > p - name) { 104 if (len > p - name) {
97 __putname(name); 105 __putname(name);
98 return NULL; 106 return NULL;
99 } 107 }
100 if (p > name + len) { 108
101 char *s = name + len; 109 if (p > name + len)
102 while ((*s++ = *p++) != '\0') 110 strcpy(name + len, p);
103 ; 111
104 }
105 return name; 112 return name;
106} 113}
107 114
@@ -135,21 +142,19 @@ static char *follow_link(char *link)
135 int len, n; 142 int len, n;
136 char *name, *resolved, *end; 143 char *name, *resolved, *end;
137 144
138 len = 64; 145 name = __getname();
139 while (1) { 146 if (!name) {
140 n = -ENOMEM; 147 n = -ENOMEM;
141 name = kmalloc(len, GFP_KERNEL); 148 goto out_free;
142 if (name == NULL)
143 goto out;
144
145 n = hostfs_do_readlink(link, name, len);
146 if (n < len)
147 break;
148 len *= 2;
149 kfree(name);
150 } 149 }
150
151 n = hostfs_do_readlink(link, name, PATH_MAX);
151 if (n < 0) 152 if (n < 0)
152 goto out_free; 153 goto out_free;
154 else if (n == PATH_MAX) {
155 n = -E2BIG;
156 goto out_free;
157 }
153 158
154 if (*name == '/') 159 if (*name == '/')
155 return name; 160 return name;
@@ -168,13 +173,12 @@ static char *follow_link(char *link)
168 } 173 }
169 174
170 sprintf(resolved, "%s%s", link, name); 175 sprintf(resolved, "%s%s", link, name);
171 kfree(name); 176 __putname(name);
172 kfree(link); 177 kfree(link);
173 return resolved; 178 return resolved;
174 179
175 out_free: 180 out_free:
176 kfree(name); 181 __putname(name);
177 out:
178 return ERR_PTR(n); 182 return ERR_PTR(n);
179} 183}
180 184
@@ -225,6 +229,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
225 hi->fd = -1; 229 hi->fd = -1;
226 hi->mode = 0; 230 hi->mode = 0;
227 inode_init_once(&hi->vfs_inode); 231 inode_init_once(&hi->vfs_inode);
232 mutex_init(&hi->open_mutex);
228 return &hi->vfs_inode; 233 return &hi->vfs_inode;
229} 234}
230 235
@@ -257,6 +262,9 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
257 if (strlen(root_path) > offset) 262 if (strlen(root_path) > offset)
258 seq_printf(seq, ",%s", root_path + offset); 263 seq_printf(seq, ",%s", root_path + offset);
259 264
265 if (append)
266 seq_puts(seq, ",append");
267
260 return 0; 268 return 0;
261} 269}
262 270
@@ -284,6 +292,7 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx)
284 if (dir == NULL) 292 if (dir == NULL)
285 return -error; 293 return -error;
286 next = ctx->pos; 294 next = ctx->pos;
295 seek_dir(dir, next);
287 while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { 296 while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) {
288 if (!dir_emit(ctx, name, len, ino, type)) 297 if (!dir_emit(ctx, name, len, ino, type))
289 break; 298 break;
@@ -293,13 +302,12 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx)
293 return 0; 302 return 0;
294} 303}
295 304
296static int hostfs_file_open(struct inode *ino, struct file *file) 305static int hostfs_open(struct inode *ino, struct file *file)
297{ 306{
298 static DEFINE_MUTEX(open_mutex);
299 char *name; 307 char *name;
300 fmode_t mode = 0; 308 fmode_t mode;
301 int err; 309 int err;
302 int r = 0, w = 0, fd; 310 int r, w, fd;
303 311
304 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 312 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
305 if ((mode & HOSTFS_I(ino)->mode) == mode) 313 if ((mode & HOSTFS_I(ino)->mode) == mode)
@@ -308,12 +316,12 @@ static int hostfs_file_open(struct inode *ino, struct file *file)
308 mode |= HOSTFS_I(ino)->mode; 316 mode |= HOSTFS_I(ino)->mode;
309 317
310retry: 318retry:
319 r = w = 0;
320
311 if (mode & FMODE_READ) 321 if (mode & FMODE_READ)
312 r = 1; 322 r = 1;
313 if (mode & FMODE_WRITE) 323 if (mode & FMODE_WRITE)
314 w = 1; 324 r = w = 1;
315 if (w)
316 r = 1;
317 325
318 name = dentry_name(file->f_path.dentry); 326 name = dentry_name(file->f_path.dentry);
319 if (name == NULL) 327 if (name == NULL)
@@ -324,15 +332,16 @@ retry:
324 if (fd < 0) 332 if (fd < 0)
325 return fd; 333 return fd;
326 334
327 mutex_lock(&open_mutex); 335 mutex_lock(&HOSTFS_I(ino)->open_mutex);
328 /* somebody else had handled it first? */ 336 /* somebody else had handled it first? */
329 if ((mode & HOSTFS_I(ino)->mode) == mode) { 337 if ((mode & HOSTFS_I(ino)->mode) == mode) {
330 mutex_unlock(&open_mutex); 338 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
339 close_file(&fd);
331 return 0; 340 return 0;
332 } 341 }
333 if ((mode | HOSTFS_I(ino)->mode) != mode) { 342 if ((mode | HOSTFS_I(ino)->mode) != mode) {
334 mode |= HOSTFS_I(ino)->mode; 343 mode |= HOSTFS_I(ino)->mode;
335 mutex_unlock(&open_mutex); 344 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
336 close_file(&fd); 345 close_file(&fd);
337 goto retry; 346 goto retry;
338 } 347 }
@@ -342,12 +351,12 @@ retry:
342 err = replace_file(fd, HOSTFS_I(ino)->fd); 351 err = replace_file(fd, HOSTFS_I(ino)->fd);
343 close_file(&fd); 352 close_file(&fd);
344 if (err < 0) { 353 if (err < 0) {
345 mutex_unlock(&open_mutex); 354 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
346 return err; 355 return err;
347 } 356 }
348 } 357 }
349 HOSTFS_I(ino)->mode = mode; 358 HOSTFS_I(ino)->mode = mode;
350 mutex_unlock(&open_mutex); 359 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
351 360
352 return 0; 361 return 0;
353} 362}
@@ -378,13 +387,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
378 387
379static const struct file_operations hostfs_file_fops = { 388static const struct file_operations hostfs_file_fops = {
380 .llseek = generic_file_llseek, 389 .llseek = generic_file_llseek,
381 .read = new_sync_read,
382 .splice_read = generic_file_splice_read, 390 .splice_read = generic_file_splice_read,
383 .read_iter = generic_file_read_iter, 391 .read_iter = generic_file_read_iter,
384 .write_iter = generic_file_write_iter, 392 .write_iter = generic_file_write_iter,
385 .write = new_sync_write,
386 .mmap = generic_file_mmap, 393 .mmap = generic_file_mmap,
387 .open = hostfs_file_open, 394 .open = hostfs_open,
388 .release = hostfs_file_release, 395 .release = hostfs_file_release,
389 .fsync = hostfs_fsync, 396 .fsync = hostfs_fsync,
390}; 397};
@@ -393,6 +400,8 @@ static const struct file_operations hostfs_dir_fops = {
393 .llseek = generic_file_llseek, 400 .llseek = generic_file_llseek,
394 .iterate = hostfs_readdir, 401 .iterate = hostfs_readdir,
395 .read = generic_read_dir, 402 .read = generic_read_dir,
403 .open = hostfs_open,
404 .fsync = hostfs_fsync,
396}; 405};
397 406
398static int hostfs_writepage(struct page *page, struct writeback_control *wbc) 407static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -400,7 +409,7 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
400 struct address_space *mapping = page->mapping; 409 struct address_space *mapping = page->mapping;
401 struct inode *inode = mapping->host; 410 struct inode *inode = mapping->host;
402 char *buffer; 411 char *buffer;
403 unsigned long long base; 412 loff_t base = page_offset(page);
404 int count = PAGE_CACHE_SIZE; 413 int count = PAGE_CACHE_SIZE;
405 int end_index = inode->i_size >> PAGE_CACHE_SHIFT; 414 int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
406 int err; 415 int err;
@@ -409,7 +418,6 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
409 count = inode->i_size & (PAGE_CACHE_SIZE-1); 418 count = inode->i_size & (PAGE_CACHE_SIZE-1);
410 419
411 buffer = kmap(page); 420 buffer = kmap(page);
412 base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
413 421
414 err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); 422 err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
415 if (err != count) { 423 if (err != count) {
@@ -434,26 +442,29 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
434static int hostfs_readpage(struct file *file, struct page *page) 442static int hostfs_readpage(struct file *file, struct page *page)
435{ 443{
436 char *buffer; 444 char *buffer;
437 long long start; 445 loff_t start = page_offset(page);
438 int err = 0; 446 int bytes_read, ret = 0;
439 447
440 start = (long long) page->index << PAGE_CACHE_SHIFT;
441 buffer = kmap(page); 448 buffer = kmap(page);
442 err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, 449 bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
443 PAGE_CACHE_SIZE); 450 PAGE_CACHE_SIZE);
444 if (err < 0) 451 if (bytes_read < 0) {
452 ClearPageUptodate(page);
453 SetPageError(page);
454 ret = bytes_read;
445 goto out; 455 goto out;
456 }
446 457
447 memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); 458 memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read);
448 459
449 flush_dcache_page(page); 460 ClearPageError(page);
450 SetPageUptodate(page); 461 SetPageUptodate(page);
451 if (PageError(page)) ClearPageError(page); 462
452 err = 0;
453 out: 463 out:
464 flush_dcache_page(page);
454 kunmap(page); 465 kunmap(page);
455 unlock_page(page); 466 unlock_page(page);
456 return err; 467 return ret;
457} 468}
458 469
459static int hostfs_write_begin(struct file *file, struct address_space *mapping, 470static int hostfs_write_begin(struct file *file, struct address_space *mapping,
@@ -530,11 +541,13 @@ static int read_name(struct inode *ino, char *name)
530 init_special_inode(ino, st.mode & S_IFMT, rdev); 541 init_special_inode(ino, st.mode & S_IFMT, rdev);
531 ino->i_op = &hostfs_iops; 542 ino->i_op = &hostfs_iops;
532 break; 543 break;
533 544 case S_IFREG:
534 default:
535 ino->i_op = &hostfs_iops; 545 ino->i_op = &hostfs_iops;
536 ino->i_fop = &hostfs_file_fops; 546 ino->i_fop = &hostfs_file_fops;
537 ino->i_mapping->a_ops = &hostfs_aops; 547 ino->i_mapping->a_ops = &hostfs_aops;
548 break;
549 default:
550 return -EIO;
538 } 551 }
539 552
540 ino->i_ino = st.ino; 553 ino->i_ino = st.ino;
@@ -568,10 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
568 if (name == NULL) 581 if (name == NULL)
569 goto out_put; 582 goto out_put;
570 583
571 fd = file_create(name, 584 fd = file_create(name, mode & S_IFMT);
572 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
573 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
574 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
575 if (fd < 0) 585 if (fd < 0)
576 error = fd; 586 error = fd;
577 else 587 else
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 9765dab95cbd..9c1e0f019880 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -97,21 +97,27 @@ void *open_dir(char *path, int *err_out)
97 return dir; 97 return dir;
98} 98}
99 99
100char *read_dir(void *stream, unsigned long long *pos, 100void seek_dir(void *stream, unsigned long long pos)
101{
102 DIR *dir = stream;
103
104 seekdir(dir, pos);
105}
106
107char *read_dir(void *stream, unsigned long long *pos_out,
101 unsigned long long *ino_out, int *len_out, 108 unsigned long long *ino_out, int *len_out,
102 unsigned int *type_out) 109 unsigned int *type_out)
103{ 110{
104 DIR *dir = stream; 111 DIR *dir = stream;
105 struct dirent *ent; 112 struct dirent *ent;
106 113
107 seekdir(dir, *pos);
108 ent = readdir(dir); 114 ent = readdir(dir);
109 if (ent == NULL) 115 if (ent == NULL)
110 return NULL; 116 return NULL;
111 *len_out = strlen(ent->d_name); 117 *len_out = strlen(ent->d_name);
112 *ino_out = ent->d_ino; 118 *ino_out = ent->d_ino;
113 *type_out = ent->d_type; 119 *type_out = ent->d_type;
114 *pos = telldir(dir); 120 *pos_out = ent->d_off;
115 return ent->d_name; 121 return ent->d_name;
116} 122}
117 123
@@ -175,21 +181,10 @@ void close_dir(void *stream)
175 closedir(stream); 181 closedir(stream);
176} 182}
177 183
178int file_create(char *name, int ur, int uw, int ux, int gr, 184int file_create(char *name, int mode)
179 int gw, int gx, int or, int ow, int ox)
180{ 185{
181 int mode, fd; 186 int fd;
182 187
183 mode = 0;
184 mode |= ur ? S_IRUSR : 0;
185 mode |= uw ? S_IWUSR : 0;
186 mode |= ux ? S_IXUSR : 0;
187 mode |= gr ? S_IRGRP : 0;
188 mode |= gw ? S_IWGRP : 0;
189 mode |= gx ? S_IXGRP : 0;
190 mode |= or ? S_IROTH : 0;
191 mode |= ow ? S_IWOTH : 0;
192 mode |= ox ? S_IXOTH : 0;
193 fd = open64(name, O_CREAT | O_RDWR, mode); 188 fd = open64(name, O_CREAT | O_RDWR, mode);
194 if (fd < 0) 189 if (fd < 0)
195 return -errno; 190 return -errno;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 7f54e5f76cec..6d8cfe9b52d6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -197,9 +197,7 @@ const struct address_space_operations hpfs_aops = {
197const struct file_operations hpfs_file_ops = 197const struct file_operations hpfs_file_ops =
198{ 198{
199 .llseek = generic_file_llseek, 199 .llseek = generic_file_llseek,
200 .read = new_sync_read,
201 .read_iter = generic_file_read_iter, 200 .read_iter = generic_file_read_iter,
202 .write = new_sync_write,
203 .write_iter = generic_file_write_iter, 201 .write_iter = generic_file_write_iter,
204 .mmap = generic_file_mmap, 202 .mmap = generic_file_mmap,
205 .release = hpfs_file_release, 203 .release = hpfs_file_release,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca8e8dc..2640d88b0e63 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,6 +34,7 @@
34#include <linux/security.h> 34#include <linux/security.h>
35#include <linux/magic.h> 35#include <linux/magic.h>
36#include <linux/migrate.h> 36#include <linux/migrate.h>
37#include <linux/uio.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -47,9 +48,10 @@ struct hugetlbfs_config {
47 kuid_t uid; 48 kuid_t uid;
48 kgid_t gid; 49 kgid_t gid;
49 umode_t mode; 50 umode_t mode;
50 long nr_blocks; 51 long max_hpages;
51 long nr_inodes; 52 long nr_inodes;
52 struct hstate *hstate; 53 struct hstate *hstate;
54 long min_hpages;
53}; 55};
54 56
55struct hugetlbfs_inode_info { 57struct hugetlbfs_inode_info {
@@ -67,7 +69,7 @@ int sysctl_hugetlb_shm_group;
67enum { 69enum {
68 Opt_size, Opt_nr_inodes, 70 Opt_size, Opt_nr_inodes,
69 Opt_mode, Opt_uid, Opt_gid, 71 Opt_mode, Opt_uid, Opt_gid,
70 Opt_pagesize, 72 Opt_pagesize, Opt_min_size,
71 Opt_err, 73 Opt_err,
72}; 74};
73 75
@@ -78,6 +80,7 @@ static const match_table_t tokens = {
78 {Opt_uid, "uid=%u"}, 80 {Opt_uid, "uid=%u"},
79 {Opt_gid, "gid=%u"}, 81 {Opt_gid, "gid=%u"},
80 {Opt_pagesize, "pagesize=%s"}, 82 {Opt_pagesize, "pagesize=%s"},
83 {Opt_min_size, "min_size=%s"},
81 {Opt_err, NULL}, 84 {Opt_err, NULL},
82}; 85};
83 86
@@ -179,42 +182,33 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
179} 182}
180#endif 183#endif
181 184
182static int 185static size_t
183hugetlbfs_read_actor(struct page *page, unsigned long offset, 186hugetlbfs_read_actor(struct page *page, unsigned long offset,
184 char __user *buf, unsigned long count, 187 struct iov_iter *to, unsigned long size)
185 unsigned long size)
186{ 188{
187 char *kaddr; 189 size_t copied = 0;
188 unsigned long left, copied = 0;
189 int i, chunksize; 190 int i, chunksize;
190 191
191 if (size > count)
192 size = count;
193
194 /* Find which 4k chunk and offset with in that chunk */ 192 /* Find which 4k chunk and offset with in that chunk */
195 i = offset >> PAGE_CACHE_SHIFT; 193 i = offset >> PAGE_CACHE_SHIFT;
196 offset = offset & ~PAGE_CACHE_MASK; 194 offset = offset & ~PAGE_CACHE_MASK;
197 195
198 while (size) { 196 while (size) {
197 size_t n;
199 chunksize = PAGE_CACHE_SIZE; 198 chunksize = PAGE_CACHE_SIZE;
200 if (offset) 199 if (offset)
201 chunksize -= offset; 200 chunksize -= offset;
202 if (chunksize > size) 201 if (chunksize > size)
203 chunksize = size; 202 chunksize = size;
204 kaddr = kmap(&page[i]); 203 n = copy_page_to_iter(&page[i], offset, chunksize, to);
205 left = __copy_to_user(buf, kaddr + offset, chunksize); 204 copied += n;
206 kunmap(&page[i]); 205 if (n != chunksize)
207 if (left) { 206 return copied;
208 copied += (chunksize - left);
209 break;
210 }
211 offset = 0; 207 offset = 0;
212 size -= chunksize; 208 size -= chunksize;
213 buf += chunksize;
214 copied += chunksize;
215 i++; 209 i++;
216 } 210 }
217 return copied ? copied : -EFAULT; 211 return copied;
218} 212}
219 213
220/* 214/*
@@ -222,39 +216,34 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
222 * data. Its *very* similar to do_generic_mapping_read(), we can't use that 216 * data. Its *very* similar to do_generic_mapping_read(), we can't use that
223 * since it has PAGE_CACHE_SIZE assumptions. 217 * since it has PAGE_CACHE_SIZE assumptions.
224 */ 218 */
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 219static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
226 size_t len, loff_t *ppos)
227{ 220{
228 struct hstate *h = hstate_file(filp); 221 struct file *file = iocb->ki_filp;
229 struct address_space *mapping = filp->f_mapping; 222 struct hstate *h = hstate_file(file);
223 struct address_space *mapping = file->f_mapping;
230 struct inode *inode = mapping->host; 224 struct inode *inode = mapping->host;
231 unsigned long index = *ppos >> huge_page_shift(h); 225 unsigned long index = iocb->ki_pos >> huge_page_shift(h);
232 unsigned long offset = *ppos & ~huge_page_mask(h); 226 unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
233 unsigned long end_index; 227 unsigned long end_index;
234 loff_t isize; 228 loff_t isize;
235 ssize_t retval = 0; 229 ssize_t retval = 0;
236 230
237 /* validate length */ 231 while (iov_iter_count(to)) {
238 if (len == 0)
239 goto out;
240
241 for (;;) {
242 struct page *page; 232 struct page *page;
243 unsigned long nr, ret; 233 size_t nr, copied;
244 int ra;
245 234
246 /* nr is the maximum number of bytes to copy from this page */ 235 /* nr is the maximum number of bytes to copy from this page */
247 nr = huge_page_size(h); 236 nr = huge_page_size(h);
248 isize = i_size_read(inode); 237 isize = i_size_read(inode);
249 if (!isize) 238 if (!isize)
250 goto out; 239 break;
251 end_index = (isize - 1) >> huge_page_shift(h); 240 end_index = (isize - 1) >> huge_page_shift(h);
252 if (index >= end_index) { 241 if (index > end_index)
253 if (index > end_index) 242 break;
254 goto out; 243 if (index == end_index) {
255 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 244 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
256 if (nr <= offset) 245 if (nr <= offset)
257 goto out; 246 break;
258 } 247 }
259 nr = nr - offset; 248 nr = nr - offset;
260 249
@@ -265,39 +254,27 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
265 * We have a HOLE, zero out the user-buffer for the 254 * We have a HOLE, zero out the user-buffer for the
266 * length of the hole or request. 255 * length of the hole or request.
267 */ 256 */
268 ret = len < nr ? len : nr; 257 copied = iov_iter_zero(nr, to);
269 if (clear_user(buf, ret))
270 ra = -EFAULT;
271 else
272 ra = 0;
273 } else { 258 } else {
274 unlock_page(page); 259 unlock_page(page);
275 260
276 /* 261 /*
277 * We have the page, copy it to user space buffer. 262 * We have the page, copy it to user space buffer.
278 */ 263 */
279 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 264 copied = hugetlbfs_read_actor(page, offset, to, nr);
280 ret = ra;
281 page_cache_release(page); 265 page_cache_release(page);
282 } 266 }
283 if (ra < 0) { 267 offset += copied;
284 if (retval == 0) 268 retval += copied;
285 retval = ra; 269 if (copied != nr && iov_iter_count(to)) {
286 goto out; 270 if (!retval)
271 retval = -EFAULT;
272 break;
287 } 273 }
288
289 offset += ret;
290 retval += ret;
291 len -= ret;
292 index += offset >> huge_page_shift(h); 274 index += offset >> huge_page_shift(h);
293 offset &= ~huge_page_mask(h); 275 offset &= ~huge_page_mask(h);
294
295 /* short read or no more work */
296 if ((ret != nr) || (len == 0))
297 break;
298 } 276 }
299out: 277 iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
300 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
301 return retval; 278 return retval;
302} 279}
303 280
@@ -319,7 +296,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
319 296
320static void truncate_huge_page(struct page *page) 297static void truncate_huge_page(struct page *page)
321{ 298{
322 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 299 ClearPageDirty(page);
323 ClearPageUptodate(page); 300 ClearPageUptodate(page);
324 delete_from_page_cache(page); 301 delete_from_page_cache(page);
325} 302}
@@ -721,7 +698,7 @@ static void init_once(void *foo)
721} 698}
722 699
723const struct file_operations hugetlbfs_file_operations = { 700const struct file_operations hugetlbfs_file_operations = {
724 .read = hugetlbfs_read, 701 .read_iter = hugetlbfs_read_iter,
725 .mmap = hugetlbfs_file_mmap, 702 .mmap = hugetlbfs_file_mmap,
726 .fsync = noop_fsync, 703 .fsync = noop_fsync,
727 .get_unmapped_area = hugetlb_get_unmapped_area, 704 .get_unmapped_area = hugetlb_get_unmapped_area,
@@ -754,14 +731,38 @@ static const struct super_operations hugetlbfs_ops = {
754 .show_options = generic_show_options, 731 .show_options = generic_show_options,
755}; 732};
756 733
734enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
735
736/*
737 * Convert size option passed from command line to number of huge pages
738 * in the pool specified by hstate. Size option could be in bytes
739 * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
740 */
741static long long
742hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
743 int val_type)
744{
745 if (val_type == NO_SIZE)
746 return -1;
747
748 if (val_type == SIZE_PERCENT) {
749 size_opt <<= huge_page_shift(h);
750 size_opt *= h->max_huge_pages;
751 do_div(size_opt, 100);
752 }
753
754 size_opt >>= huge_page_shift(h);
755 return size_opt;
756}
757
757static int 758static int
758hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 759hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
759{ 760{
760 char *p, *rest; 761 char *p, *rest;
761 substring_t args[MAX_OPT_ARGS]; 762 substring_t args[MAX_OPT_ARGS];
762 int option; 763 int option;
763 unsigned long long size = 0; 764 unsigned long long max_size_opt = 0, min_size_opt = 0;
764 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; 765 int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
765 766
766 if (!options) 767 if (!options)
767 return 0; 768 return 0;
@@ -799,10 +800,10 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
799 /* memparse() will accept a K/M/G without a digit */ 800 /* memparse() will accept a K/M/G without a digit */
800 if (!isdigit(*args[0].from)) 801 if (!isdigit(*args[0].from))
801 goto bad_val; 802 goto bad_val;
802 size = memparse(args[0].from, &rest); 803 max_size_opt = memparse(args[0].from, &rest);
803 setsize = SIZE_STD; 804 max_val_type = SIZE_STD;
804 if (*rest == '%') 805 if (*rest == '%')
805 setsize = SIZE_PERCENT; 806 max_val_type = SIZE_PERCENT;
806 break; 807 break;
807 } 808 }
808 809
@@ -825,6 +826,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
825 break; 826 break;
826 } 827 }
827 828
829 case Opt_min_size: {
830 /* memparse() will accept a K/M/G without a digit */
831 if (!isdigit(*args[0].from))
832 goto bad_val;
833 min_size_opt = memparse(args[0].from, &rest);
834 min_val_type = SIZE_STD;
835 if (*rest == '%')
836 min_val_type = SIZE_PERCENT;
837 break;
838 }
839
828 default: 840 default:
829 pr_err("Bad mount option: \"%s\"\n", p); 841 pr_err("Bad mount option: \"%s\"\n", p);
830 return -EINVAL; 842 return -EINVAL;
@@ -832,15 +844,22 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
832 } 844 }
833 } 845 }
834 846
835 /* Do size after hstate is set up */ 847 /*
836 if (setsize > NO_SIZE) { 848 * Use huge page pool size (in hstate) to convert the size
837 struct hstate *h = pconfig->hstate; 849 * options to number of huge pages. If NO_SIZE, -1 is returned.
838 if (setsize == SIZE_PERCENT) { 850 */
839 size <<= huge_page_shift(h); 851 pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
840 size *= h->max_huge_pages; 852 max_size_opt, max_val_type);
841 do_div(size, 100); 853 pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
842 } 854 min_size_opt, min_val_type);
843 pconfig->nr_blocks = (size >> huge_page_shift(h)); 855
856 /*
857 * If max_size was specified, then min_size must be smaller
858 */
859 if (max_val_type > NO_SIZE &&
860 pconfig->min_hpages > pconfig->max_hpages) {
861 pr_err("minimum size can not be greater than maximum size\n");
862 return -EINVAL;
844 } 863 }
845 864
846 return 0; 865 return 0;
@@ -859,12 +878,13 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
859 878
860 save_mount_options(sb, data); 879 save_mount_options(sb, data);
861 880
862 config.nr_blocks = -1; /* No limit on size by default */ 881 config.max_hpages = -1; /* No limit on size by default */
863 config.nr_inodes = -1; /* No limit on number of inodes by default */ 882 config.nr_inodes = -1; /* No limit on number of inodes by default */
864 config.uid = current_fsuid(); 883 config.uid = current_fsuid();
865 config.gid = current_fsgid(); 884 config.gid = current_fsgid();
866 config.mode = 0755; 885 config.mode = 0755;
867 config.hstate = &default_hstate; 886 config.hstate = &default_hstate;
887 config.min_hpages = -1; /* No default minimum size */
868 ret = hugetlbfs_parse_options(data, &config); 888 ret = hugetlbfs_parse_options(data, &config);
869 if (ret) 889 if (ret)
870 return ret; 890 return ret;
@@ -878,8 +898,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
878 sbinfo->max_inodes = config.nr_inodes; 898 sbinfo->max_inodes = config.nr_inodes;
879 sbinfo->free_inodes = config.nr_inodes; 899 sbinfo->free_inodes = config.nr_inodes;
880 sbinfo->spool = NULL; 900 sbinfo->spool = NULL;
881 if (config.nr_blocks != -1) { 901 /*
882 sbinfo->spool = hugepage_new_subpool(config.nr_blocks); 902 * Allocate and initialize subpool if maximum or minimum size is
903 * specified. Any needed reservations (for minimim size) are taken
904 * taken when the subpool is created.
905 */
906 if (config.max_hpages != -1 || config.min_hpages != -1) {
907 sbinfo->spool = hugepage_new_subpool(config.hstate,
908 config.max_hpages,
909 config.min_hpages);
883 if (!sbinfo->spool) 910 if (!sbinfo->spool)
884 goto out_free; 911 goto out_free;
885 } 912 }
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 64989ca9ba90..f509f62e12f6 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -51,9 +51,7 @@ const struct file_operations jffs2_file_operations =
51{ 51{
52 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
53 .open = generic_file_open, 53 .open = generic_file_open,
54 .read = new_sync_read,
55 .read_iter = generic_file_read_iter, 54 .read_iter = generic_file_read_iter,
56 .write = new_sync_write,
57 .write_iter = generic_file_write_iter, 55 .write_iter = generic_file_write_iter,
58 .unlocked_ioctl=jffs2_ioctl, 56 .unlocked_ioctl=jffs2_ioctl,
59 .mmap = generic_file_readonly_mmap, 57 .mmap = generic_file_readonly_mmap,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817ac51f6..2eac55379239 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
195 /* unchecked xdatum is chained with c->xattr_unchecked */ 195 /* unchecked xdatum is chained with c->xattr_unchecked */
196 list_del_init(&xd->xindex); 196 list_del_init(&xd->xindex);
197 197
198 dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", 198 dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
199 xd->xid, xd->version); 199 xd->xid, xd->version);
200 200
201 return 0; 201 return 0;
@@ -1266,7 +1266,6 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1266 if (rc) { 1266 if (rc) {
1267 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", 1267 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1268 __func__, rc, totlen); 1268 __func__, rc, totlen);
1269 rc = rc ? rc : -EBADFD;
1270 goto out; 1269 goto out;
1271 } 1270 }
1272 rc = save_xattr_ref(c, ref); 1271 rc = save_xattr_ref(c, ref);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 10815f8dfd8b..ae46788b9723 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -151,8 +151,6 @@ const struct inode_operations jfs_file_inode_operations = {
151const struct file_operations jfs_file_operations = { 151const struct file_operations jfs_file_operations = {
152 .open = jfs_open, 152 .open = jfs_open,
153 .llseek = generic_file_llseek, 153 .llseek = generic_file_llseek,
154 .write = new_sync_write,
155 .read = new_sync_read,
156 .read_iter = generic_file_read_iter, 154 .read_iter = generic_file_read_iter,
157 .write_iter = generic_file_write_iter, 155 .write_iter = generic_file_write_iter,
158 .mmap = generic_file_mmap, 156 .mmap = generic_file_mmap,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1ca3c9b..070dc4b33544 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/quotaops.h> 24#include <linux/quotaops.h>
25#include <linux/uio.h>
25#include <linux/writeback.h> 26#include <linux/writeback.h>
26#include <linux/aio.h>
27#include "jfs_incore.h" 27#include "jfs_incore.h"
28#include "jfs_inode.h" 28#include "jfs_inode.h"
29#include "jfs_filsys.h" 29#include "jfs_filsys.h"
@@ -330,8 +330,8 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
330 return generic_block_bmap(mapping, block, jfs_get_block); 330 return generic_block_bmap(mapping, block, jfs_get_block);
331} 331}
332 332
333static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, 333static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
334 struct iov_iter *iter, loff_t offset) 334 loff_t offset)
335{ 335{
336 struct file *file = iocb->ki_filp; 336 struct file *file = iocb->ki_filp;
337 struct address_space *mapping = file->f_mapping; 337 struct address_space *mapping = file->f_mapping;
@@ -339,13 +339,13 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
339 size_t count = iov_iter_count(iter); 339 size_t count = iov_iter_count(iter);
340 ssize_t ret; 340 ssize_t ret;
341 341
342 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); 342 ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block);
343 343
344 /* 344 /*
345 * In case of error extending write may have instantiated a few 345 * In case of error extending write may have instantiated a few
346 * blocks outside i_size. Trim these off again. 346 * blocks outside i_size. Trim these off again.
347 */ 347 */
348 if (unlikely((rw & WRITE) && ret < 0)) { 348 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
349 loff_t isize = i_size_read(inode); 349 loff_t isize = i_size_read(inode);
350 loff_t end = offset + count; 350 loff_t end = offset + count;
351 351
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 49ba7ff1bbb9..16a0922beb59 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
183 183
184#endif 184#endif
185 185
186static void init_once(void *foo)
187{
188 struct metapage *mp = (struct metapage *)foo;
189
190 mp->lid = 0;
191 mp->lsn = 0;
192 mp->flag = 0;
193 mp->data = NULL;
194 mp->clsn = 0;
195 mp->log = NULL;
196 set_bit(META_free, &mp->flag);
197 init_waitqueue_head(&mp->wait);
198}
199
200static inline struct metapage *alloc_metapage(gfp_t gfp_mask) 186static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
201{ 187{
202 return mempool_alloc(metapage_mempool, gfp_mask); 188 struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
189
190 if (mp) {
191 mp->lid = 0;
192 mp->lsn = 0;
193 mp->data = NULL;
194 mp->clsn = 0;
195 mp->log = NULL;
196 init_waitqueue_head(&mp->wait);
197 }
198 return mp;
203} 199}
204 200
205static inline void free_metapage(struct metapage *mp) 201static inline void free_metapage(struct metapage *mp)
206{ 202{
207 mp->flag = 0;
208 set_bit(META_free, &mp->flag);
209
210 mempool_free(mp, metapage_mempool); 203 mempool_free(mp, metapage_mempool);
211} 204}
212 205
@@ -216,7 +209,7 @@ int __init metapage_init(void)
216 * Allocate the metapage structures 209 * Allocate the metapage structures
217 */ 210 */
218 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), 211 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
219 0, 0, init_once); 212 0, 0, NULL);
220 if (metapage_cache == NULL) 213 if (metapage_cache == NULL)
221 return -ENOMEM; 214 return -ENOMEM;
222 215
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a78beda85f68..337e9e51ac06 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -48,7 +48,6 @@ struct metapage {
48 48
49/* metapage flag */ 49/* metapage flag */
50#define META_locked 0 50#define META_locked 0
51#define META_free 1
52#define META_dirty 2 51#define META_dirty 2
53#define META_sync 3 52#define META_sync 3
54#define META_discard 4 53#define META_discard 4
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
102 vaf.fmt = fmt; 102 vaf.fmt = fmt;
103 vaf.va = &args; 103 vaf.va = &args;
104 104
105 pr_err("ERROR: (device %s): %pf: %pV\n", 105 pr_err("ERROR: (device %s): %ps: %pV\n",
106 sb->s_id, __builtin_return_address(0), &vaf); 106 sb->s_id, __builtin_return_address(0), &vaf);
107 107
108 va_end(args); 108 va_end(args);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b684e8a132e6..2bacb9988566 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
207 goto out_free; 207 goto out_free;
208 } 208 }
209 209
210 of->event = atomic_read(&of->kn->attr.open->event);
210 ops = kernfs_ops(of->kn); 211 ops = kernfs_ops(of->kn);
211 if (ops->read) 212 if (ops->read)
212 len = ops->read(of, buf, len, *ppos); 213 len = ops->read(of, buf, len, *ppos);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 665ef5a05183..a563ddbc19e6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -31,7 +31,7 @@
31static struct hlist_head nlm_files[FILE_NRHASH]; 31static struct hlist_head nlm_files[FILE_NRHASH];
32static DEFINE_MUTEX(nlm_file_mutex); 32static DEFINE_MUTEX(nlm_file_mutex);
33 33
34#ifdef NFSD_DEBUG 34#ifdef CONFIG_SUNRPC_DEBUG
35static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f) 35static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
36{ 36{
37 u32 *fhp = (u32*)f->data; 37 u32 *fhp = (u32*)f->data;
diff --git a/fs/locks.c b/fs/locks.c
index 365c82e1b3a9..653faabb07f4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -203,11 +203,11 @@ static struct kmem_cache *flctx_cache __read_mostly;
203static struct kmem_cache *filelock_cache __read_mostly; 203static struct kmem_cache *filelock_cache __read_mostly;
204 204
205static struct file_lock_context * 205static struct file_lock_context *
206locks_get_lock_context(struct inode *inode) 206locks_get_lock_context(struct inode *inode, int type)
207{ 207{
208 struct file_lock_context *new; 208 struct file_lock_context *new;
209 209
210 if (likely(inode->i_flctx)) 210 if (likely(inode->i_flctx) || type == F_UNLCK)
211 goto out; 211 goto out;
212 212
213 new = kmem_cache_alloc(flctx_cache, GFP_KERNEL); 213 new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
@@ -223,14 +223,7 @@ locks_get_lock_context(struct inode *inode)
223 * Assign the pointer if it's not already assigned. If it is, then 223 * Assign the pointer if it's not already assigned. If it is, then
224 * free the context we just allocated. 224 * free the context we just allocated.
225 */ 225 */
226 spin_lock(&inode->i_lock); 226 if (cmpxchg(&inode->i_flctx, NULL, new))
227 if (likely(!inode->i_flctx)) {
228 inode->i_flctx = new;
229 new = NULL;
230 }
231 spin_unlock(&inode->i_lock);
232
233 if (new)
234 kmem_cache_free(flctx_cache, new); 227 kmem_cache_free(flctx_cache, new);
235out: 228out:
236 return inode->i_flctx; 229 return inode->i_flctx;
@@ -276,8 +269,10 @@ void locks_release_private(struct file_lock *fl)
276 } 269 }
277 270
278 if (fl->fl_lmops) { 271 if (fl->fl_lmops) {
279 if (fl->fl_lmops->lm_put_owner) 272 if (fl->fl_lmops->lm_put_owner) {
280 fl->fl_lmops->lm_put_owner(fl); 273 fl->fl_lmops->lm_put_owner(fl->fl_owner);
274 fl->fl_owner = NULL;
275 }
281 fl->fl_lmops = NULL; 276 fl->fl_lmops = NULL;
282 } 277 }
283} 278}
@@ -333,7 +328,7 @@ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
333 328
334 if (fl->fl_lmops) { 329 if (fl->fl_lmops) {
335 if (fl->fl_lmops->lm_get_owner) 330 if (fl->fl_lmops->lm_get_owner)
336 fl->fl_lmops->lm_get_owner(new, fl); 331 fl->fl_lmops->lm_get_owner(fl->fl_owner);
337 } 332 }
338} 333}
339EXPORT_SYMBOL(locks_copy_conflock); 334EXPORT_SYMBOL(locks_copy_conflock);
@@ -592,11 +587,15 @@ posix_owner_key(struct file_lock *fl)
592 587
593static void locks_insert_global_blocked(struct file_lock *waiter) 588static void locks_insert_global_blocked(struct file_lock *waiter)
594{ 589{
590 lockdep_assert_held(&blocked_lock_lock);
591
595 hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); 592 hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
596} 593}
597 594
598static void locks_delete_global_blocked(struct file_lock *waiter) 595static void locks_delete_global_blocked(struct file_lock *waiter)
599{ 596{
597 lockdep_assert_held(&blocked_lock_lock);
598
600 hash_del(&waiter->fl_link); 599 hash_del(&waiter->fl_link);
601} 600}
602 601
@@ -730,7 +729,7 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
730 /* POSIX locks owned by the same process do not conflict with 729 /* POSIX locks owned by the same process do not conflict with
731 * each other. 730 * each other.
732 */ 731 */
733 if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) 732 if (posix_same_owner(caller_fl, sys_fl))
734 return (0); 733 return (0);
735 734
736 /* Check whether they overlap */ 735 /* Check whether they overlap */
@@ -748,7 +747,7 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
748 /* FLOCK locks referring to the same filp do not conflict with 747 /* FLOCK locks referring to the same filp do not conflict with
749 * each other. 748 * each other.
750 */ 749 */
751 if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) 750 if (caller_fl->fl_file == sys_fl->fl_file)
752 return (0); 751 return (0);
753 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) 752 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
754 return 0; 753 return 0;
@@ -838,6 +837,8 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
838{ 837{
839 int i = 0; 838 int i = 0;
840 839
840 lockdep_assert_held(&blocked_lock_lock);
841
841 /* 842 /*
842 * This deadlock detector can't reasonably detect deadlocks with 843 * This deadlock detector can't reasonably detect deadlocks with
843 * FL_OFDLCK locks, since they aren't owned by a process, per-se. 844 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
@@ -871,9 +872,12 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
871 bool found = false; 872 bool found = false;
872 LIST_HEAD(dispose); 873 LIST_HEAD(dispose);
873 874
874 ctx = locks_get_lock_context(inode); 875 ctx = locks_get_lock_context(inode, request->fl_type);
875 if (!ctx) 876 if (!ctx) {
876 return -ENOMEM; 877 if (request->fl_type != F_UNLCK)
878 return -ENOMEM;
879 return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
880 }
877 881
878 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 882 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
879 new_fl = locks_alloc_lock(); 883 new_fl = locks_alloc_lock();
@@ -939,9 +943,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
939 bool added = false; 943 bool added = false;
940 LIST_HEAD(dispose); 944 LIST_HEAD(dispose);
941 945
942 ctx = locks_get_lock_context(inode); 946 ctx = locks_get_lock_context(inode, request->fl_type);
943 if (!ctx) 947 if (!ctx)
944 return -ENOMEM; 948 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
945 949
946 /* 950 /*
947 * We may need two file_lock structures for this operation, 951 * We may need two file_lock structures for this operation,
@@ -964,8 +968,6 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
964 */ 968 */
965 if (request->fl_type != F_UNLCK) { 969 if (request->fl_type != F_UNLCK) {
966 list_for_each_entry(fl, &ctx->flc_posix, fl_list) { 970 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
967 if (!IS_POSIX(fl))
968 continue;
969 if (!posix_locks_conflict(request, fl)) 971 if (!posix_locks_conflict(request, fl))
970 continue; 972 continue;
971 if (conflock) 973 if (conflock)
@@ -1388,9 +1390,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1388int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) 1390int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1389{ 1391{
1390 int error = 0; 1392 int error = 0;
1391 struct file_lock *new_fl;
1392 struct file_lock_context *ctx = inode->i_flctx; 1393 struct file_lock_context *ctx = inode->i_flctx;
1393 struct file_lock *fl; 1394 struct file_lock *new_fl, *fl, *tmp;
1394 unsigned long break_time; 1395 unsigned long break_time;
1395 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1396 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1396 LIST_HEAD(dispose); 1397 LIST_HEAD(dispose);
@@ -1420,7 +1421,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1420 break_time++; /* so that 0 means no break time */ 1421 break_time++; /* so that 0 means no break time */
1421 } 1422 }
1422 1423
1423 list_for_each_entry(fl, &ctx->flc_lease, fl_list) { 1424 list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1424 if (!leases_conflict(fl, new_fl)) 1425 if (!leases_conflict(fl, new_fl))
1425 continue; 1426 continue;
1426 if (want_write) { 1427 if (want_write) {
@@ -1606,7 +1607,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1606 lease = *flp; 1607 lease = *flp;
1607 trace_generic_add_lease(inode, lease); 1608 trace_generic_add_lease(inode, lease);
1608 1609
1609 ctx = locks_get_lock_context(inode); 1610 /* Note that arg is never F_UNLCK here */
1611 ctx = locks_get_lock_context(inode, arg);
1610 if (!ctx) 1612 if (!ctx)
1611 return -ENOMEM; 1613 return -ENOMEM;
1612 1614
@@ -1665,7 +1667,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1665 } 1667 }
1666 1668
1667 if (my_fl != NULL) { 1669 if (my_fl != NULL) {
1668 error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); 1670 lease = my_fl;
1671 error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1669 if (error) 1672 if (error)
1670 goto out; 1673 goto out;
1671 goto out_setup; 1674 goto out_setup;
@@ -1727,7 +1730,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
1727 break; 1730 break;
1728 } 1731 }
1729 } 1732 }
1730 trace_generic_delete_lease(inode, fl); 1733 trace_generic_delete_lease(inode, victim);
1731 if (victim) 1734 if (victim)
1732 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); 1735 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1733 spin_unlock(&ctx->flc_lock); 1736 spin_unlock(&ctx->flc_lock);
@@ -2555,15 +2558,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2555 : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); 2558 : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ ");
2556 } 2559 }
2557 if (inode) { 2560 if (inode) {
2558#ifdef WE_CAN_BREAK_LSLK_NOW 2561 /* userspace relies on this representation of dev_t */
2559 seq_printf(f, "%d %s:%ld ", fl_pid,
2560 inode->i_sb->s_id, inode->i_ino);
2561#else
2562 /* userspace relies on this representation of dev_t ;-( */
2563 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, 2562 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
2564 MAJOR(inode->i_sb->s_dev), 2563 MAJOR(inode->i_sb->s_dev),
2565 MINOR(inode->i_sb->s_dev), inode->i_ino); 2564 MINOR(inode->i_sb->s_dev), inode->i_ino);
2566#endif
2567 } else { 2565 } else {
2568 seq_printf(f, "%d <none>:0 ", fl_pid); 2566 seq_printf(f, "%d <none>:0 ", fl_pid);
2569 } 2567 }
@@ -2592,6 +2590,44 @@ static int locks_show(struct seq_file *f, void *v)
2592 return 0; 2590 return 0;
2593} 2591}
2594 2592
2593static void __show_fd_locks(struct seq_file *f,
2594 struct list_head *head, int *id,
2595 struct file *filp, struct files_struct *files)
2596{
2597 struct file_lock *fl;
2598
2599 list_for_each_entry(fl, head, fl_list) {
2600
2601 if (filp != fl->fl_file)
2602 continue;
2603 if (fl->fl_owner != files &&
2604 fl->fl_owner != filp)
2605 continue;
2606
2607 (*id)++;
2608 seq_puts(f, "lock:\t");
2609 lock_get_status(f, fl, *id, "");
2610 }
2611}
2612
2613void show_fd_locks(struct seq_file *f,
2614 struct file *filp, struct files_struct *files)
2615{
2616 struct inode *inode = file_inode(filp);
2617 struct file_lock_context *ctx;
2618 int id = 0;
2619
2620 ctx = inode->i_flctx;
2621 if (!ctx)
2622 return;
2623
2624 spin_lock(&ctx->flc_lock);
2625 __show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
2626 __show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
2627 __show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
2628 spin_unlock(&ctx->flc_lock);
2629}
2630
2595static void *locks_start(struct seq_file *f, loff_t *pos) 2631static void *locks_start(struct seq_file *f, loff_t *pos)
2596 __acquires(&blocked_lock_lock) 2632 __acquires(&blocked_lock_lock)
2597{ 2633{
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 8538752df2f6..b2c13f739ffa 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -271,8 +271,6 @@ const struct file_operations logfs_reg_fops = {
271 .llseek = generic_file_llseek, 271 .llseek = generic_file_llseek,
272 .mmap = generic_file_readonly_mmap, 272 .mmap = generic_file_readonly_mmap,
273 .open = generic_file_open, 273 .open = generic_file_open,
274 .read = new_sync_read,
275 .write = new_sync_write,
276}; 274};
277 275
278const struct address_space_operations logfs_reg_aops = { 276const struct address_space_operations logfs_reg_aops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index a967de085ac0..6d63e27ec961 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -14,9 +14,7 @@
14 */ 14 */
15const struct file_operations minix_file_operations = { 15const struct file_operations minix_file_operations = {
16 .llseek = generic_file_llseek, 16 .llseek = generic_file_llseek,
17 .read = new_sync_read,
18 .read_iter = generic_file_read_iter, 17 .read_iter = generic_file_read_iter,
19 .write = new_sync_write,
20 .write_iter = generic_file_write_iter, 18 .write_iter = generic_file_write_iter,
21 .mmap = generic_file_mmap, 19 .mmap = generic_file_mmap,
22 .fsync = generic_file_fsync, 20 .fsync = generic_file_fsync,
diff --git a/fs/namei.c b/fs/namei.c
index c83145af4bfc..ffab2e06e147 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
119 * PATH_MAX includes the nul terminator --RR. 119 * PATH_MAX includes the nul terminator --RR.
120 */ 120 */
121 121
122#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) 122#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
123 123
124struct filename * 124struct filename *
125getname_flags(const char __user *filename, int flags, int *empty) 125getname_flags(const char __user *filename, int flags, int *empty)
126{ 126{
127 struct filename *result, *err; 127 struct filename *result;
128 int len;
129 long max;
130 char *kname; 128 char *kname;
129 int len;
131 130
132 result = audit_reusename(filename); 131 result = audit_reusename(filename);
133 if (result) 132 if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
136 result = __getname(); 135 result = __getname();
137 if (unlikely(!result)) 136 if (unlikely(!result))
138 return ERR_PTR(-ENOMEM); 137 return ERR_PTR(-ENOMEM);
139 result->refcnt = 1;
140 138
141 /* 139 /*
142 * First, try to embed the struct filename inside the names_cache 140 * First, try to embed the struct filename inside the names_cache
143 * allocation 141 * allocation
144 */ 142 */
145 kname = (char *)result + sizeof(*result); 143 kname = (char *)result->iname;
146 result->name = kname; 144 result->name = kname;
147 result->separate = false;
148 max = EMBEDDED_NAME_MAX;
149 145
150recopy: 146 len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
151 len = strncpy_from_user(kname, filename, max);
152 if (unlikely(len < 0)) { 147 if (unlikely(len < 0)) {
153 err = ERR_PTR(len); 148 __putname(result);
154 goto error; 149 return ERR_PTR(len);
155 } 150 }
156 151
157 /* 152 /*
@@ -160,43 +155,49 @@ recopy:
160 * names_cache allocation for the pathname, and re-do the copy from 155 * names_cache allocation for the pathname, and re-do the copy from
161 * userland. 156 * userland.
162 */ 157 */
163 if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { 158 if (unlikely(len == EMBEDDED_NAME_MAX)) {
159 const size_t size = offsetof(struct filename, iname[1]);
164 kname = (char *)result; 160 kname = (char *)result;
165 161
166 result = kzalloc(sizeof(*result), GFP_KERNEL); 162 /*
167 if (!result) { 163 * size is chosen that way we to guarantee that
168 err = ERR_PTR(-ENOMEM); 164 * result->iname[0] is within the same object and that
169 result = (struct filename *)kname; 165 * kname can't be equal to result->iname, no matter what.
170 goto error; 166 */
167 result = kzalloc(size, GFP_KERNEL);
168 if (unlikely(!result)) {
169 __putname(kname);
170 return ERR_PTR(-ENOMEM);
171 } 171 }
172 result->name = kname; 172 result->name = kname;
173 result->separate = true; 173 len = strncpy_from_user(kname, filename, PATH_MAX);
174 result->refcnt = 1; 174 if (unlikely(len < 0)) {
175 max = PATH_MAX; 175 __putname(kname);
176 goto recopy; 176 kfree(result);
177 return ERR_PTR(len);
178 }
179 if (unlikely(len == PATH_MAX)) {
180 __putname(kname);
181 kfree(result);
182 return ERR_PTR(-ENAMETOOLONG);
183 }
177 } 184 }
178 185
186 result->refcnt = 1;
179 /* The empty path is special. */ 187 /* The empty path is special. */
180 if (unlikely(!len)) { 188 if (unlikely(!len)) {
181 if (empty) 189 if (empty)
182 *empty = 1; 190 *empty = 1;
183 err = ERR_PTR(-ENOENT); 191 if (!(flags & LOOKUP_EMPTY)) {
184 if (!(flags & LOOKUP_EMPTY)) 192 putname(result);
185 goto error; 193 return ERR_PTR(-ENOENT);
194 }
186 } 195 }
187 196
188 err = ERR_PTR(-ENAMETOOLONG);
189 if (unlikely(len >= PATH_MAX))
190 goto error;
191
192 result->uptr = filename; 197 result->uptr = filename;
193 result->aname = NULL; 198 result->aname = NULL;
194 audit_getname(result); 199 audit_getname(result);
195 return result; 200 return result;
196
197error:
198 putname(result);
199 return err;
200} 201}
201 202
202struct filename * 203struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
216 return ERR_PTR(-ENOMEM); 217 return ERR_PTR(-ENOMEM);
217 218
218 if (len <= EMBEDDED_NAME_MAX) { 219 if (len <= EMBEDDED_NAME_MAX) {
219 result->name = (char *)(result) + sizeof(*result); 220 result->name = (char *)result->iname;
220 result->separate = false;
221 } else if (len <= PATH_MAX) { 221 } else if (len <= PATH_MAX) {
222 struct filename *tmp; 222 struct filename *tmp;
223 223
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
227 return ERR_PTR(-ENOMEM); 227 return ERR_PTR(-ENOMEM);
228 } 228 }
229 tmp->name = (char *)result; 229 tmp->name = (char *)result;
230 tmp->separate = true;
231 result = tmp; 230 result = tmp;
232 } else { 231 } else {
233 __putname(result); 232 __putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
249 if (--name->refcnt > 0) 248 if (--name->refcnt > 0)
250 return; 249 return;
251 250
252 if (name->separate) { 251 if (name->name != name->iname) {
253 __putname(name->name); 252 __putname(name->name);
254 kfree(name); 253 kfree(name);
255 } else 254 } else
@@ -1586,7 +1585,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1586 inode = path->dentry->d_inode; 1585 inode = path->dentry->d_inode;
1587 } 1586 }
1588 err = -ENOENT; 1587 err = -ENOENT;
1589 if (!inode || d_is_negative(path->dentry)) 1588 if (d_is_negative(path->dentry))
1590 goto out_path_put; 1589 goto out_path_put;
1591 1590
1592 if (should_follow_link(path->dentry, follow)) { 1591 if (should_follow_link(path->dentry, follow)) {
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1851 return err; 1850 return err;
1852} 1851}
1853 1852
1854static int path_init(int dfd, const char *name, unsigned int flags, 1853static int path_init(int dfd, const struct filename *name, unsigned int flags,
1855 struct nameidata *nd) 1854 struct nameidata *nd)
1856{ 1855{
1857 int retval = 0; 1856 int retval = 0;
1857 const char *s = name->name;
1858 1858
1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; 1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1863 if (flags & LOOKUP_ROOT) { 1863 if (flags & LOOKUP_ROOT) {
1864 struct dentry *root = nd->root.dentry; 1864 struct dentry *root = nd->root.dentry;
1865 struct inode *inode = root->d_inode; 1865 struct inode *inode = root->d_inode;
1866 if (*name) { 1866 if (*s) {
1867 if (!d_can_lookup(root)) 1867 if (!d_can_lookup(root))
1868 return -ENOTDIR; 1868 return -ENOTDIR;
1869 retval = inode_permission(inode, MAY_EXEC); 1869 retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1885 nd->root.mnt = NULL; 1885 nd->root.mnt = NULL;
1886 1886
1887 nd->m_seq = read_seqbegin(&mount_lock); 1887 nd->m_seq = read_seqbegin(&mount_lock);
1888 if (*name=='/') { 1888 if (*s == '/') {
1889 if (flags & LOOKUP_RCU) { 1889 if (flags & LOOKUP_RCU) {
1890 rcu_read_lock(); 1890 rcu_read_lock();
1891 nd->seq = set_root_rcu(nd); 1891 nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1919 1919
1920 dentry = f.file->f_path.dentry; 1920 dentry = f.file->f_path.dentry;
1921 1921
1922 if (*name) { 1922 if (*s) {
1923 if (!d_can_lookup(dentry)) { 1923 if (!d_can_lookup(dentry)) {
1924 fdput(f); 1924 fdput(f);
1925 return -ENOTDIR; 1925 return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1949 return -ECHILD; 1949 return -ECHILD;
1950done: 1950done:
1951 current->total_link_count = 0; 1951 current->total_link_count = 0;
1952 return link_path_walk(name, nd); 1952 return link_path_walk(s, nd);
1953} 1953}
1954 1954
1955static void path_cleanup(struct nameidata *nd) 1955static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
1972} 1972}
1973 1973
1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1975static int path_lookupat(int dfd, const char *name, 1975static int path_lookupat(int dfd, const struct filename *name,
1976 unsigned int flags, struct nameidata *nd) 1976 unsigned int flags, struct nameidata *nd)
1977{ 1977{
1978 struct path path; 1978 struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
2027static int filename_lookup(int dfd, struct filename *name, 2027static int filename_lookup(int dfd, struct filename *name,
2028 unsigned int flags, struct nameidata *nd) 2028 unsigned int flags, struct nameidata *nd)
2029{ 2029{
2030 int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); 2030 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
2031 if (unlikely(retval == -ECHILD)) 2031 if (unlikely(retval == -ECHILD))
2032 retval = path_lookupat(dfd, name->name, flags, nd); 2032 retval = path_lookupat(dfd, name, flags, nd);
2033 if (unlikely(retval == -ESTALE)) 2033 if (unlikely(retval == -ESTALE))
2034 retval = path_lookupat(dfd, name->name, 2034 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
2035 flags | LOOKUP_REVAL, nd);
2036 2035
2037 if (likely(!retval)) 2036 if (likely(!retval))
2038 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); 2037 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
2039 return retval; 2038 return retval;
2040} 2039}
2041 2040
2042static int do_path_lookup(int dfd, const char *name,
2043 unsigned int flags, struct nameidata *nd)
2044{
2045 struct filename *filename = getname_kernel(name);
2046 int retval = PTR_ERR(filename);
2047
2048 if (!IS_ERR(filename)) {
2049 retval = filename_lookup(dfd, filename, flags, nd);
2050 putname(filename);
2051 }
2052 return retval;
2053}
2054
2055/* does lookup, returns the object with parent locked */ 2041/* does lookup, returns the object with parent locked */
2056struct dentry *kern_path_locked(const char *name, struct path *path) 2042struct dentry *kern_path_locked(const char *name, struct path *path)
2057{ 2043{
@@ -2089,9 +2075,15 @@ out:
2089int kern_path(const char *name, unsigned int flags, struct path *path) 2075int kern_path(const char *name, unsigned int flags, struct path *path)
2090{ 2076{
2091 struct nameidata nd; 2077 struct nameidata nd;
2092 int res = do_path_lookup(AT_FDCWD, name, flags, &nd); 2078 struct filename *filename = getname_kernel(name);
2093 if (!res) 2079 int res = PTR_ERR(filename);
2094 *path = nd.path; 2080
2081 if (!IS_ERR(filename)) {
2082 res = filename_lookup(AT_FDCWD, filename, flags, &nd);
2083 putname(filename);
2084 if (!res)
2085 *path = nd.path;
2086 }
2095 return res; 2087 return res;
2096} 2088}
2097EXPORT_SYMBOL(kern_path); 2089EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2108 const char *name, unsigned int flags, 2100 const char *name, unsigned int flags,
2109 struct path *path) 2101 struct path *path)
2110{ 2102{
2111 struct nameidata nd; 2103 struct filename *filename = getname_kernel(name);
2112 int err; 2104 int err = PTR_ERR(filename);
2113 nd.root.dentry = dentry; 2105
2114 nd.root.mnt = mnt;
2115 BUG_ON(flags & LOOKUP_PARENT); 2106 BUG_ON(flags & LOOKUP_PARENT);
2116 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 2107
2117 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); 2108 /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
2118 if (!err) 2109 if (!IS_ERR(filename)) {
2119 *path = nd.path; 2110 struct nameidata nd;
2111 nd.root.dentry = dentry;
2112 nd.root.mnt = mnt;
2113 err = filename_lookup(AT_FDCWD, filename,
2114 flags | LOOKUP_ROOT, &nd);
2115 if (!err)
2116 *path = nd.path;
2117 putname(filename);
2118 }
2120 return err; 2119 return err;
2121} 2120}
2122EXPORT_SYMBOL(vfs_path_lookup); 2121EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
2138 * @len: maximum length @len should be interpreted to 2137 * @len: maximum length @len should be interpreted to
2139 * 2138 *
2140 * Note that this routine is purely a helper for filesystem usage and should 2139 * Note that this routine is purely a helper for filesystem usage and should
2141 * not be called by generic code. Also note that by using this function the 2140 * not be called by generic code.
2142 * nameidata argument is passed to the filesystem methods and a filesystem
2143 * using this helper needs to be prepared for that.
2144 */ 2141 */
2145struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 2142struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2146{ 2143{
@@ -2313,7 +2310,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2313 mutex_unlock(&dir->d_inode->i_mutex); 2310 mutex_unlock(&dir->d_inode->i_mutex);
2314 2311
2315done: 2312done:
2316 if (!dentry->d_inode || d_is_negative(dentry)) { 2313 if (d_is_negative(dentry)) {
2317 error = -ENOENT; 2314 error = -ENOENT;
2318 dput(dentry); 2315 dput(dentry);
2319 goto out; 2316 goto out;
@@ -2341,7 +2338,8 @@ out:
2341 * Returns 0 and "path" will be valid on success; Returns error otherwise. 2338 * Returns 0 and "path" will be valid on success; Returns error otherwise.
2342 */ 2339 */
2343static int 2340static int
2344path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) 2341path_mountpoint(int dfd, const struct filename *name, struct path *path,
2342 unsigned int flags)
2345{ 2343{
2346 struct nameidata nd; 2344 struct nameidata nd;
2347 int err; 2345 int err;
@@ -2370,20 +2368,20 @@ out:
2370} 2368}
2371 2369
2372static int 2370static int
2373filename_mountpoint(int dfd, struct filename *s, struct path *path, 2371filename_mountpoint(int dfd, struct filename *name, struct path *path,
2374 unsigned int flags) 2372 unsigned int flags)
2375{ 2373{
2376 int error; 2374 int error;
2377 if (IS_ERR(s)) 2375 if (IS_ERR(name))
2378 return PTR_ERR(s); 2376 return PTR_ERR(name);
2379 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); 2377 error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
2380 if (unlikely(error == -ECHILD)) 2378 if (unlikely(error == -ECHILD))
2381 error = path_mountpoint(dfd, s->name, path, flags); 2379 error = path_mountpoint(dfd, name, path, flags);
2382 if (unlikely(error == -ESTALE)) 2380 if (unlikely(error == -ESTALE))
2383 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); 2381 error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
2384 if (likely(!error)) 2382 if (likely(!error))
2385 audit_inode(s, path->dentry, 0); 2383 audit_inode(name, path->dentry, 0);
2386 putname(s); 2384 putname(name);
2387 return error; 2385 return error;
2388} 2386}
2389 2387
@@ -3040,7 +3038,7 @@ retry_lookup:
3040finish_lookup: 3038finish_lookup:
3041 /* we _can_ be in RCU mode here */ 3039 /* we _can_ be in RCU mode here */
3042 error = -ENOENT; 3040 error = -ENOENT;
3043 if (!inode || d_is_negative(path->dentry)) { 3041 if (d_is_negative(path->dentry)) {
3044 path_to_nameidata(path, nd); 3042 path_to_nameidata(path, nd);
3045 goto out; 3043 goto out;
3046 } 3044 }
@@ -3079,7 +3077,7 @@ finish_open:
3079 error = -ENOTDIR; 3077 error = -ENOTDIR;
3080 if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) 3078 if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
3081 goto out; 3079 goto out;
3082 if (!S_ISREG(nd->inode->i_mode)) 3080 if (!d_is_reg(nd->path.dentry))
3083 will_truncate = false; 3081 will_truncate = false;
3084 3082
3085 if (will_truncate) { 3083 if (will_truncate) {
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
3156 static const struct qstr name = QSTR_INIT("/", 1); 3154 static const struct qstr name = QSTR_INIT("/", 1);
3157 struct dentry *dentry, *child; 3155 struct dentry *dentry, *child;
3158 struct inode *dir; 3156 struct inode *dir;
3159 int error = path_lookupat(dfd, pathname->name, 3157 int error = path_lookupat(dfd, pathname,
3160 flags | LOOKUP_DIRECTORY, nd); 3158 flags | LOOKUP_DIRECTORY, nd);
3161 if (unlikely(error)) 3159 if (unlikely(error))
3162 return error; 3160 return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3229 goto out; 3227 goto out;
3230 } 3228 }
3231 3229
3232 error = path_init(dfd, pathname->name, flags, nd); 3230 error = path_init(dfd, pathname, flags, nd);
3233 if (unlikely(error)) 3231 if (unlikely(error))
3234 goto out; 3232 goto out;
3235 3233
diff --git a/fs/namespace.c b/fs/namespace.c
index 82ef1405260e..1f4f9dac6e5a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
632 */ 632 */
633struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) 633struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
634{ 634{
635 struct mount *p, *res; 635 struct mount *p, *res = NULL;
636 res = p = __lookup_mnt(mnt, dentry); 636 p = __lookup_mnt(mnt, dentry);
637 if (!p) 637 if (!p)
638 goto out; 638 goto out;
639 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
640 res = p;
639 hlist_for_each_entry_continue(p, mnt_hash) { 641 hlist_for_each_entry_continue(p, mnt_hash) {
640 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) 642 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
641 break; 643 break;
642 res = p; 644 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
645 res = p;
643 } 646 }
644out: 647out:
645 return res; 648 return res;
@@ -795,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
795/* 798/*
796 * vfsmount lock must be held for write 799 * vfsmount lock must be held for write
797 */ 800 */
798static void detach_mnt(struct mount *mnt, struct path *old_path) 801static void unhash_mnt(struct mount *mnt)
799{ 802{
800 old_path->dentry = mnt->mnt_mountpoint;
801 old_path->mnt = &mnt->mnt_parent->mnt;
802 mnt->mnt_parent = mnt; 803 mnt->mnt_parent = mnt;
803 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 804 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
804 list_del_init(&mnt->mnt_child); 805 list_del_init(&mnt->mnt_child);
@@ -811,6 +812,26 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
811/* 812/*
812 * vfsmount lock must be held for write 813 * vfsmount lock must be held for write
813 */ 814 */
815static void detach_mnt(struct mount *mnt, struct path *old_path)
816{
817 old_path->dentry = mnt->mnt_mountpoint;
818 old_path->mnt = &mnt->mnt_parent->mnt;
819 unhash_mnt(mnt);
820}
821
822/*
823 * vfsmount lock must be held for write
824 */
825static void umount_mnt(struct mount *mnt)
826{
827 /* old mountpoint will be dropped when we can do that */
828 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
829 unhash_mnt(mnt);
830}
831
832/*
833 * vfsmount lock must be held for write
834 */
814void mnt_set_mountpoint(struct mount *mnt, 835void mnt_set_mountpoint(struct mount *mnt,
815 struct mountpoint *mp, 836 struct mountpoint *mp,
816 struct mount *child_mnt) 837 struct mount *child_mnt)
@@ -1078,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt)
1078 rcu_read_unlock(); 1099 rcu_read_unlock();
1079 1100
1080 list_del(&mnt->mnt_instance); 1101 list_del(&mnt->mnt_instance);
1102
1103 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1104 struct mount *p, *tmp;
1105 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1106 umount_mnt(p);
1107 }
1108 }
1081 unlock_mount_hash(); 1109 unlock_mount_hash();
1082 1110
1083 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { 1111 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
@@ -1298,17 +1326,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
1298 1326
1299static void namespace_unlock(void) 1327static void namespace_unlock(void)
1300{ 1328{
1301 struct hlist_head head = unmounted; 1329 struct hlist_head head;
1302 1330
1303 if (likely(hlist_empty(&head))) { 1331 hlist_move_list(&unmounted, &head);
1304 up_write(&namespace_sem);
1305 return;
1306 }
1307 1332
1308 head.first->pprev = &head.first;
1309 INIT_HLIST_HEAD(&unmounted);
1310 up_write(&namespace_sem); 1333 up_write(&namespace_sem);
1311 1334
1335 if (likely(hlist_empty(&head)))
1336 return;
1337
1312 synchronize_rcu(); 1338 synchronize_rcu();
1313 1339
1314 group_pin_kill(&head); 1340 group_pin_kill(&head);
@@ -1319,49 +1345,63 @@ static inline void namespace_lock(void)
1319 down_write(&namespace_sem); 1345 down_write(&namespace_sem);
1320} 1346}
1321 1347
1348enum umount_tree_flags {
1349 UMOUNT_SYNC = 1,
1350 UMOUNT_PROPAGATE = 2,
1351 UMOUNT_CONNECTED = 4,
1352};
1322/* 1353/*
1323 * mount_lock must be held 1354 * mount_lock must be held
1324 * namespace_sem must be held for write 1355 * namespace_sem must be held for write
1325 * how = 0 => just this tree, don't propagate
1326 * how = 1 => propagate; we know that nobody else has reference to any victims
1327 * how = 2 => lazy umount
1328 */ 1356 */
1329void umount_tree(struct mount *mnt, int how) 1357static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1330{ 1358{
1331 HLIST_HEAD(tmp_list); 1359 LIST_HEAD(tmp_list);
1332 struct mount *p; 1360 struct mount *p;
1333 1361
1362 if (how & UMOUNT_PROPAGATE)
1363 propagate_mount_unlock(mnt);
1364
1365 /* Gather the mounts to umount */
1334 for (p = mnt; p; p = next_mnt(p, mnt)) { 1366 for (p = mnt; p; p = next_mnt(p, mnt)) {
1335 hlist_del_init_rcu(&p->mnt_hash); 1367 p->mnt.mnt_flags |= MNT_UMOUNT;
1336 hlist_add_head(&p->mnt_hash, &tmp_list); 1368 list_move(&p->mnt_list, &tmp_list);
1337 } 1369 }
1338 1370
1339 hlist_for_each_entry(p, &tmp_list, mnt_hash) 1371 /* Hide the mounts from mnt_mounts */
1372 list_for_each_entry(p, &tmp_list, mnt_list) {
1340 list_del_init(&p->mnt_child); 1373 list_del_init(&p->mnt_child);
1374 }
1341 1375
1342 if (how) 1376 /* Add propogated mounts to the tmp_list */
1377 if (how & UMOUNT_PROPAGATE)
1343 propagate_umount(&tmp_list); 1378 propagate_umount(&tmp_list);
1344 1379
1345 while (!hlist_empty(&tmp_list)) { 1380 while (!list_empty(&tmp_list)) {
1346 p = hlist_entry(tmp_list.first, struct mount, mnt_hash); 1381 bool disconnect;
1347 hlist_del_init_rcu(&p->mnt_hash); 1382 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1348 list_del_init(&p->mnt_expire); 1383 list_del_init(&p->mnt_expire);
1349 list_del_init(&p->mnt_list); 1384 list_del_init(&p->mnt_list);
1350 __touch_mnt_namespace(p->mnt_ns); 1385 __touch_mnt_namespace(p->mnt_ns);
1351 p->mnt_ns = NULL; 1386 p->mnt_ns = NULL;
1352 if (how < 2) 1387 if (how & UMOUNT_SYNC)
1353 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1388 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1354 1389
1355 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); 1390 disconnect = !(((how & UMOUNT_CONNECTED) &&
1391 mnt_has_parent(p) &&
1392 (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) ||
1393 IS_MNT_LOCKED_AND_LAZY(p));
1394
1395 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1396 disconnect ? &unmounted : NULL);
1356 if (mnt_has_parent(p)) { 1397 if (mnt_has_parent(p)) {
1357 hlist_del_init(&p->mnt_mp_list);
1358 put_mountpoint(p->mnt_mp);
1359 mnt_add_count(p->mnt_parent, -1); 1398 mnt_add_count(p->mnt_parent, -1);
1360 /* old mountpoint will be dropped when we can do that */ 1399 if (!disconnect) {
1361 p->mnt_ex_mountpoint = p->mnt_mountpoint; 1400 /* Don't forget about p */
1362 p->mnt_mountpoint = p->mnt.mnt_root; 1401 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1363 p->mnt_parent = p; 1402 } else {
1364 p->mnt_mp = NULL; 1403 umount_mnt(p);
1404 }
1365 } 1405 }
1366 change_mnt_propagation(p, MS_PRIVATE); 1406 change_mnt_propagation(p, MS_PRIVATE);
1367 } 1407 }
@@ -1447,14 +1487,14 @@ static int do_umount(struct mount *mnt, int flags)
1447 1487
1448 if (flags & MNT_DETACH) { 1488 if (flags & MNT_DETACH) {
1449 if (!list_empty(&mnt->mnt_list)) 1489 if (!list_empty(&mnt->mnt_list))
1450 umount_tree(mnt, 2); 1490 umount_tree(mnt, UMOUNT_PROPAGATE);
1451 retval = 0; 1491 retval = 0;
1452 } else { 1492 } else {
1453 shrink_submounts(mnt); 1493 shrink_submounts(mnt);
1454 retval = -EBUSY; 1494 retval = -EBUSY;
1455 if (!propagate_mount_busy(mnt, 2)) { 1495 if (!propagate_mount_busy(mnt, 2)) {
1456 if (!list_empty(&mnt->mnt_list)) 1496 if (!list_empty(&mnt->mnt_list))
1457 umount_tree(mnt, 1); 1497 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1458 retval = 0; 1498 retval = 0;
1459 } 1499 }
1460 } 1500 }
@@ -1480,13 +1520,20 @@ void __detach_mounts(struct dentry *dentry)
1480 1520
1481 namespace_lock(); 1521 namespace_lock();
1482 mp = lookup_mountpoint(dentry); 1522 mp = lookup_mountpoint(dentry);
1483 if (!mp) 1523 if (IS_ERR_OR_NULL(mp))
1484 goto out_unlock; 1524 goto out_unlock;
1485 1525
1486 lock_mount_hash(); 1526 lock_mount_hash();
1487 while (!hlist_empty(&mp->m_list)) { 1527 while (!hlist_empty(&mp->m_list)) {
1488 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); 1528 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1489 umount_tree(mnt, 2); 1529 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1530 struct mount *p, *tmp;
1531 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1532 hlist_add_head(&p->mnt_umount.s_list, &unmounted);
1533 umount_mnt(p);
1534 }
1535 }
1536 else umount_tree(mnt, UMOUNT_CONNECTED);
1490 } 1537 }
1491 unlock_mount_hash(); 1538 unlock_mount_hash();
1492 put_mountpoint(mp); 1539 put_mountpoint(mp);
@@ -1648,7 +1695,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1648out: 1695out:
1649 if (res) { 1696 if (res) {
1650 lock_mount_hash(); 1697 lock_mount_hash();
1651 umount_tree(res, 0); 1698 umount_tree(res, UMOUNT_SYNC);
1652 unlock_mount_hash(); 1699 unlock_mount_hash();
1653 } 1700 }
1654 return q; 1701 return q;
@@ -1660,8 +1707,11 @@ struct vfsmount *collect_mounts(struct path *path)
1660{ 1707{
1661 struct mount *tree; 1708 struct mount *tree;
1662 namespace_lock(); 1709 namespace_lock();
1663 tree = copy_tree(real_mount(path->mnt), path->dentry, 1710 if (!check_mnt(real_mount(path->mnt)))
1664 CL_COPY_ALL | CL_PRIVATE); 1711 tree = ERR_PTR(-EINVAL);
1712 else
1713 tree = copy_tree(real_mount(path->mnt), path->dentry,
1714 CL_COPY_ALL | CL_PRIVATE);
1665 namespace_unlock(); 1715 namespace_unlock();
1666 if (IS_ERR(tree)) 1716 if (IS_ERR(tree))
1667 return ERR_CAST(tree); 1717 return ERR_CAST(tree);
@@ -1672,7 +1722,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
1672{ 1722{
1673 namespace_lock(); 1723 namespace_lock();
1674 lock_mount_hash(); 1724 lock_mount_hash();
1675 umount_tree(real_mount(mnt), 0); 1725 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1676 unlock_mount_hash(); 1726 unlock_mount_hash();
1677 namespace_unlock(); 1727 namespace_unlock();
1678} 1728}
@@ -1855,7 +1905,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1855 out_cleanup_ids: 1905 out_cleanup_ids:
1856 while (!hlist_empty(&tree_list)) { 1906 while (!hlist_empty(&tree_list)) {
1857 child = hlist_entry(tree_list.first, struct mount, mnt_hash); 1907 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1858 umount_tree(child, 0); 1908 umount_tree(child, UMOUNT_SYNC);
1859 } 1909 }
1860 unlock_mount_hash(); 1910 unlock_mount_hash();
1861 cleanup_group_ids(source_mnt, NULL); 1911 cleanup_group_ids(source_mnt, NULL);
@@ -2035,7 +2085,7 @@ static int do_loopback(struct path *path, const char *old_name,
2035 err = graft_tree(mnt, parent, mp); 2085 err = graft_tree(mnt, parent, mp);
2036 if (err) { 2086 if (err) {
2037 lock_mount_hash(); 2087 lock_mount_hash();
2038 umount_tree(mnt, 0); 2088 umount_tree(mnt, UMOUNT_SYNC);
2039 unlock_mount_hash(); 2089 unlock_mount_hash();
2040 } 2090 }
2041out2: 2091out2:
@@ -2406,7 +2456,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2406 while (!list_empty(&graveyard)) { 2456 while (!list_empty(&graveyard)) {
2407 mnt = list_first_entry(&graveyard, struct mount, mnt_expire); 2457 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2408 touch_mnt_namespace(mnt->mnt_ns); 2458 touch_mnt_namespace(mnt->mnt_ns);
2409 umount_tree(mnt, 1); 2459 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2410 } 2460 }
2411 unlock_mount_hash(); 2461 unlock_mount_hash();
2412 namespace_unlock(); 2462 namespace_unlock();
@@ -2477,7 +2527,7 @@ static void shrink_submounts(struct mount *mnt)
2477 m = list_first_entry(&graveyard, struct mount, 2527 m = list_first_entry(&graveyard, struct mount,
2478 mnt_expire); 2528 mnt_expire);
2479 touch_mnt_namespace(m->mnt_ns); 2529 touch_mnt_namespace(m->mnt_ns);
2480 umount_tree(m, 1); 2530 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2481 } 2531 }
2482 } 2532 }
2483} 2533}
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 1dd7007f974d..011324ce9df2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -98,30 +98,24 @@ out:
98} 98}
99 99
100static ssize_t 100static ssize_t
101ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 101ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
102{ 102{
103 struct file *file = iocb->ki_filp;
103 struct inode *inode = file_inode(file); 104 struct inode *inode = file_inode(file);
104 size_t already_read = 0; 105 size_t already_read = 0;
105 off_t pos; 106 off_t pos = iocb->ki_pos;
106 size_t bufsize; 107 size_t bufsize;
107 int error; 108 int error;
108 void* freepage; 109 void *freepage;
109 size_t freelen; 110 size_t freelen;
110 111
111 ncp_dbg(1, "enter %pD2\n", file); 112 ncp_dbg(1, "enter %pD2\n", file);
112 113
113 pos = *ppos; 114 if (!iov_iter_count(to))
114
115 if ((ssize_t) count < 0) {
116 return -EINVAL;
117 }
118 if (!count)
119 return 0; 115 return 0;
120 if (pos > inode->i_sb->s_maxbytes) 116 if (pos > inode->i_sb->s_maxbytes)
121 return 0; 117 return 0;
122 if (pos + count > inode->i_sb->s_maxbytes) { 118 iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos);
123 count = inode->i_sb->s_maxbytes - pos;
124 }
125 119
126 error = ncp_make_open(inode, O_RDONLY); 120 error = ncp_make_open(inode, O_RDONLY);
127 if (error) { 121 if (error) {
@@ -138,31 +132,29 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
138 goto outrel; 132 goto outrel;
139 error = 0; 133 error = 0;
140 /* First read in as much as possible for each bufsize. */ 134 /* First read in as much as possible for each bufsize. */
141 while (already_read < count) { 135 while (iov_iter_count(to)) {
142 int read_this_time; 136 int read_this_time;
143 size_t to_read = min_t(unsigned int, 137 size_t to_read = min_t(size_t,
144 bufsize - (pos % bufsize), 138 bufsize - (pos % bufsize),
145 count - already_read); 139 iov_iter_count(to));
146 140
147 error = ncp_read_bounce(NCP_SERVER(inode), 141 error = ncp_read_bounce(NCP_SERVER(inode),
148 NCP_FINFO(inode)->file_handle, 142 NCP_FINFO(inode)->file_handle,
149 pos, to_read, buf, &read_this_time, 143 pos, to_read, to, &read_this_time,
150 freepage, freelen); 144 freepage, freelen);
151 if (error) { 145 if (error) {
152 error = -EIO; /* NW errno -> Linux errno */ 146 error = -EIO; /* NW errno -> Linux errno */
153 break; 147 break;
154 } 148 }
155 pos += read_this_time; 149 pos += read_this_time;
156 buf += read_this_time;
157 already_read += read_this_time; 150 already_read += read_this_time;
158 151
159 if (read_this_time != to_read) { 152 if (read_this_time != to_read)
160 break; 153 break;
161 }
162 } 154 }
163 vfree(freepage); 155 vfree(freepage);
164 156
165 *ppos = pos; 157 iocb->ki_pos = pos;
166 158
167 file_accessed(file); 159 file_accessed(file);
168 160
@@ -173,42 +165,21 @@ outrel:
173} 165}
174 166
175static ssize_t 167static ssize_t
176ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 168ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
177{ 169{
170 struct file *file = iocb->ki_filp;
178 struct inode *inode = file_inode(file); 171 struct inode *inode = file_inode(file);
179 size_t already_written = 0; 172 size_t already_written = 0;
180 off_t pos;
181 size_t bufsize; 173 size_t bufsize;
182 int errno; 174 int errno;
183 void* bouncebuffer; 175 void *bouncebuffer;
176 off_t pos;
184 177
185 ncp_dbg(1, "enter %pD2\n", file); 178 ncp_dbg(1, "enter %pD2\n", file);
186 if ((ssize_t) count < 0) 179 errno = generic_write_checks(iocb, from);
187 return -EINVAL; 180 if (errno <= 0)
188 pos = *ppos; 181 return errno;
189 if (file->f_flags & O_APPEND) {
190 pos = i_size_read(inode);
191 }
192 182
193 if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
194 if (pos >= MAX_NON_LFS) {
195 return -EFBIG;
196 }
197 if (count > MAX_NON_LFS - (u32)pos) {
198 count = MAX_NON_LFS - (u32)pos;
199 }
200 }
201 if (pos >= inode->i_sb->s_maxbytes) {
202 if (count || pos > inode->i_sb->s_maxbytes) {
203 return -EFBIG;
204 }
205 }
206 if (pos + count > inode->i_sb->s_maxbytes) {
207 count = inode->i_sb->s_maxbytes - pos;
208 }
209
210 if (!count)
211 return 0;
212 errno = ncp_make_open(inode, O_WRONLY); 183 errno = ncp_make_open(inode, O_WRONLY);
213 if (errno) { 184 if (errno) {
214 ncp_dbg(1, "open failed, error=%d\n", errno); 185 ncp_dbg(1, "open failed, error=%d\n", errno);
@@ -216,8 +187,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
216 } 187 }
217 bufsize = NCP_SERVER(inode)->buffer_size; 188 bufsize = NCP_SERVER(inode)->buffer_size;
218 189
219 already_written = 0;
220
221 errno = file_update_time(file); 190 errno = file_update_time(file);
222 if (errno) 191 if (errno)
223 goto outrel; 192 goto outrel;
@@ -227,13 +196,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
227 errno = -EIO; /* -ENOMEM */ 196 errno = -EIO; /* -ENOMEM */
228 goto outrel; 197 goto outrel;
229 } 198 }
230 while (already_written < count) { 199 pos = iocb->ki_pos;
200 while (iov_iter_count(from)) {
231 int written_this_time; 201 int written_this_time;
232 size_t to_write = min_t(unsigned int, 202 size_t to_write = min_t(size_t,
233 bufsize - (pos % bufsize), 203 bufsize - (pos % bufsize),
234 count - already_written); 204 iov_iter_count(from));
235 205
236 if (copy_from_user(bouncebuffer, buf, to_write)) { 206 if (copy_from_iter(bouncebuffer, to_write, from) != to_write) {
237 errno = -EFAULT; 207 errno = -EFAULT;
238 break; 208 break;
239 } 209 }
@@ -244,16 +214,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
244 break; 214 break;
245 } 215 }
246 pos += written_this_time; 216 pos += written_this_time;
247 buf += written_this_time;
248 already_written += written_this_time; 217 already_written += written_this_time;
249 218
250 if (written_this_time != to_write) { 219 if (written_this_time != to_write)
251 break; 220 break;
252 }
253 } 221 }
254 vfree(bouncebuffer); 222 vfree(bouncebuffer);
255 223
256 *ppos = pos; 224 iocb->ki_pos = pos;
257 225
258 if (pos > i_size_read(inode)) { 226 if (pos > i_size_read(inode)) {
259 mutex_lock(&inode->i_mutex); 227 mutex_lock(&inode->i_mutex);
@@ -277,8 +245,8 @@ static int ncp_release(struct inode *inode, struct file *file) {
277const struct file_operations ncp_file_operations = 245const struct file_operations ncp_file_operations =
278{ 246{
279 .llseek = generic_file_llseek, 247 .llseek = generic_file_llseek,
280 .read = ncp_file_read, 248 .read_iter = ncp_file_read_iter,
281 .write = ncp_file_write, 249 .write_iter = ncp_file_write_iter,
282 .unlocked_ioctl = ncp_ioctl, 250 .unlocked_ioctl = ncp_ioctl,
283#ifdef CONFIG_COMPAT 251#ifdef CONFIG_COMPAT
284 .compat_ioctl = ncp_compat_ioctl, 252 .compat_ioctl = ncp_compat_ioctl,
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 482387532f54..2b502a0d7941 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -1001,8 +1001,8 @@ out:
1001 */ 1001 */
1002int 1002int
1003ncp_read_bounce(struct ncp_server *server, const char *file_id, 1003ncp_read_bounce(struct ncp_server *server, const char *file_id,
1004 __u32 offset, __u16 to_read, char __user *target, int *bytes_read, 1004 __u32 offset, __u16 to_read, struct iov_iter *to,
1005 void* bounce, __u32 bufsize) 1005 int *bytes_read, void *bounce, __u32 bufsize)
1006{ 1006{
1007 int result; 1007 int result;
1008 1008
@@ -1025,7 +1025,7 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
1025 (offset & 1); 1025 (offset & 1);
1026 *bytes_read = len; 1026 *bytes_read = len;
1027 result = 0; 1027 result = 0;
1028 if (copy_to_user(target, source, len)) 1028 if (copy_to_iter(source, len, to) != len)
1029 result = -EFAULT; 1029 result = -EFAULT;
1030 } 1030 }
1031 } 1031 }
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 250e443a07f3..5233fbc1747a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -53,7 +53,7 @@ static inline int ncp_read_bounce_size(__u32 size) {
53 return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8; 53 return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8;
54}; 54};
55int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16, 55int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16,
56 char __user *, int *, void* bounce, __u32 bouncelen); 56 struct iov_iter *, int *, void *bounce, __u32 bouncelen);
57int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16, 57int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16,
58 char *, int *); 58 char *, int *);
59int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, 59int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16,
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index c7abc10279af..f31fd0dd92c6 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,6 @@
1config NFS_FS 1config NFS_FS
2 tristate "NFS client support" 2 tristate "NFS client support"
3 depends on INET && FILE_LOCKING 3 depends on INET && FILE_LOCKING && MULTIUSER
4 select LOCKD 4 select LOCKD
5 select SUNRPC 5 select SUNRPC
6 select NFS_ACL_SUPPORT if NFS_V3_ACL 6 select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f9f4845db989..19874151e95c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
433 433
434static bool nfs_client_init_is_complete(const struct nfs_client *clp) 434static bool nfs_client_init_is_complete(const struct nfs_client *clp)
435{ 435{
436 return clp->cl_cons_state != NFS_CS_INITING; 436 return clp->cl_cons_state <= NFS_CS_READY;
437} 437}
438 438
439int nfs_wait_client_init_complete(const struct nfs_client *clp) 439int nfs_wait_client_init_complete(const struct nfs_client *clp)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index a1f0685b42ff..a6ad68865880 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
181 clear_bit(NFS_DELEGATION_NEED_RECLAIM, 181 clear_bit(NFS_DELEGATION_NEED_RECLAIM,
182 &delegation->flags); 182 &delegation->flags);
183 spin_unlock(&delegation->lock); 183 spin_unlock(&delegation->lock);
184 put_rpccred(oldcred);
185 rcu_read_unlock(); 184 rcu_read_unlock();
185 put_rpccred(oldcred);
186 trace_nfs4_reclaim_delegation(inode, res->delegation_type); 186 trace_nfs4_reclaim_delegation(inode, res->delegation_type);
187 } else { 187 } else {
188 /* We appear to have raced with a delegation return. */ 188 /* We appear to have raced with a delegation return. */
@@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
370 delegation = NULL; 370 delegation = NULL;
371 goto out; 371 goto out;
372 } 372 }
373 freeme = nfs_detach_delegation_locked(nfsi, 373 if (test_and_set_bit(NFS_DELEGATION_RETURNING,
374 &old_delegation->flags))
375 goto out;
376 freeme = nfs_detach_delegation_locked(nfsi,
374 old_delegation, clp); 377 old_delegation, clp);
375 if (freeme == NULL) 378 if (freeme == NULL)
376 goto out; 379 goto out;
@@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
433{ 436{
434 bool ret = false; 437 bool ret = false;
435 438
439 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
440 goto out;
436 if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) 441 if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
437 ret = true; 442 ret = true;
438 if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { 443 if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
444 ret = true; 449 ret = true;
445 spin_unlock(&delegation->lock); 450 spin_unlock(&delegation->lock);
446 } 451 }
452out:
447 return ret; 453 return ret;
448} 454}
449 455
@@ -471,14 +477,20 @@ restart:
471 super_list) { 477 super_list) {
472 if (!nfs_delegation_need_return(delegation)) 478 if (!nfs_delegation_need_return(delegation))
473 continue; 479 continue;
474 inode = nfs_delegation_grab_inode(delegation); 480 if (!nfs_sb_active(server->super))
475 if (inode == NULL)
476 continue; 481 continue;
482 inode = nfs_delegation_grab_inode(delegation);
483 if (inode == NULL) {
484 rcu_read_unlock();
485 nfs_sb_deactive(server->super);
486 goto restart;
487 }
477 delegation = nfs_start_delegation_return_locked(NFS_I(inode)); 488 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
478 rcu_read_unlock(); 489 rcu_read_unlock();
479 490
480 err = nfs_end_delegation_return(inode, delegation, 0); 491 err = nfs_end_delegation_return(inode, delegation, 0);
481 iput(inode); 492 iput(inode);
493 nfs_sb_deactive(server->super);
482 if (!err) 494 if (!err)
483 goto restart; 495 goto restart;
484 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 496 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
@@ -809,19 +821,30 @@ restart:
809 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 821 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
810 list_for_each_entry_rcu(delegation, &server->delegations, 822 list_for_each_entry_rcu(delegation, &server->delegations,
811 super_list) { 823 super_list) {
824 if (test_bit(NFS_DELEGATION_RETURNING,
825 &delegation->flags))
826 continue;
812 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, 827 if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
813 &delegation->flags) == 0) 828 &delegation->flags) == 0)
814 continue; 829 continue;
815 inode = nfs_delegation_grab_inode(delegation); 830 if (!nfs_sb_active(server->super))
816 if (inode == NULL)
817 continue; 831 continue;
818 delegation = nfs_detach_delegation(NFS_I(inode), 832 inode = nfs_delegation_grab_inode(delegation);
819 delegation, server); 833 if (inode == NULL) {
834 rcu_read_unlock();
835 nfs_sb_deactive(server->super);
836 goto restart;
837 }
838 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
820 rcu_read_unlock(); 839 rcu_read_unlock();
821 840 if (delegation != NULL) {
822 if (delegation != NULL) 841 delegation = nfs_detach_delegation(NFS_I(inode),
823 nfs_free_delegation(delegation); 842 delegation, server);
843 if (delegation != NULL)
844 nfs_free_delegation(delegation);
845 }
824 iput(inode); 846 iput(inode);
847 nfs_sb_deactive(server->super);
825 goto restart; 848 goto restart;
826 } 849 }
827 } 850 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b0c55cb2a2e..c19e16f0b2d0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
408 return 0; 408 return 0;
409} 409}
410 410
411/* Match file and dirent using either filehandle or fileid
412 * Note: caller is responsible for checking the fsid
413 */
411static 414static
412int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) 415int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
413{ 416{
417 struct nfs_inode *nfsi;
418
414 if (dentry->d_inode == NULL) 419 if (dentry->d_inode == NULL)
415 goto different; 420 goto different;
416 if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0) 421
417 goto different; 422 nfsi = NFS_I(dentry->d_inode);
418 return 1; 423 if (entry->fattr->fileid == nfsi->fileid)
424 return 1;
425 if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
426 return 1;
419different: 427different:
420 return 0; 428 return 0;
421} 429}
@@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
469 struct inode *inode; 477 struct inode *inode;
470 int status; 478 int status;
471 479
480 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
481 return;
482 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
483 return;
472 if (filename.name[0] == '.') { 484 if (filename.name[0] == '.') {
473 if (filename.len == 1) 485 if (filename.len == 1)
474 return; 486 return;
@@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
479 491
480 dentry = d_lookup(parent, &filename); 492 dentry = d_lookup(parent, &filename);
481 if (dentry != NULL) { 493 if (dentry != NULL) {
494 /* Is there a mountpoint here? If so, just exit */
495 if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
496 &entry->fattr->fsid))
497 goto out;
482 if (nfs_same_file(dentry, entry)) { 498 if (nfs_same_file(dentry, entry)) {
483 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 499 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
484 status = nfs_refresh_inode(dentry->d_inode, entry->fattr); 500 status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8cf732e..682f65fe09b5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -240,7 +240,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
240 240
241/** 241/**
242 * nfs_direct_IO - NFS address space operation for direct I/O 242 * nfs_direct_IO - NFS address space operation for direct I/O
243 * @rw: direction (read or write)
244 * @iocb: target I/O control block 243 * @iocb: target I/O control block
245 * @iov: array of vectors that define I/O buffer 244 * @iov: array of vectors that define I/O buffer
246 * @pos: offset in file to begin the operation 245 * @pos: offset in file to begin the operation
@@ -251,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
251 * shunt off direct read and write requests before the VFS gets them, 250 * shunt off direct read and write requests before the VFS gets them,
252 * so this method is only ever called for swap. 251 * so this method is only ever called for swap.
253 */ 252 */
254ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) 253ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
255{ 254{
256 struct inode *inode = iocb->ki_filp->f_mapping->host; 255 struct inode *inode = iocb->ki_filp->f_mapping->host;
257 256
@@ -265,11 +264,11 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
265 264
266 return -EINVAL; 265 return -EINVAL;
267#else 266#else
268 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 267 VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
269 268
270 if (rw == READ) 269 if (iov_iter_rw(iter) == READ)
271 return nfs_file_direct_read(iocb, iter, pos); 270 return nfs_file_direct_read(iocb, iter, pos);
272 return nfs_file_direct_write(iocb, iter, pos); 271 return nfs_file_direct_write(iocb, iter);
273#endif /* CONFIG_NFS_SWAP */ 272#endif /* CONFIG_NFS_SWAP */
274} 273}
275 274
@@ -393,7 +392,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
393 long res = (long) dreq->error; 392 long res = (long) dreq->error;
394 if (!res) 393 if (!res)
395 res = (long) dreq->count; 394 res = (long) dreq->count;
396 aio_complete(dreq->iocb, res, 0); 395 dreq->iocb->ki_complete(dreq->iocb, res, 0);
397 } 396 }
398 397
399 complete_all(&dreq->completion); 398 complete_all(&dreq->completion);
@@ -960,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
960 * Note that O_APPEND is not supported for NFS direct writes, as there 959 * Note that O_APPEND is not supported for NFS direct writes, as there
961 * is no atomic O_APPEND write facility in the NFS protocol. 960 * is no atomic O_APPEND write facility in the NFS protocol.
962 */ 961 */
963ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 962ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
964 loff_t pos)
965{ 963{
966 ssize_t result = -EINVAL; 964 ssize_t result = -EINVAL;
967 struct file *file = iocb->ki_filp; 965 struct file *file = iocb->ki_filp;
@@ -969,25 +967,16 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
969 struct inode *inode = mapping->host; 967 struct inode *inode = mapping->host;
970 struct nfs_direct_req *dreq; 968 struct nfs_direct_req *dreq;
971 struct nfs_lock_context *l_ctx; 969 struct nfs_lock_context *l_ctx;
972 loff_t end; 970 loff_t pos, end;
973 size_t count = iov_iter_count(iter);
974 end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
975
976 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
977 971
978 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", 972 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
979 file, count, (long long) pos); 973 file, iov_iter_count(iter), (long long) iocb->ki_pos);
980 974
981 result = generic_write_checks(file, &pos, &count, 0); 975 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES,
982 if (result) 976 iov_iter_count(iter));
983 goto out;
984 977
985 result = -EINVAL; 978 pos = iocb->ki_pos;
986 if ((ssize_t) count < 0) 979 end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
987 goto out;
988 result = 0;
989 if (!count)
990 goto out;
991 980
992 mutex_lock(&inode->i_mutex); 981 mutex_lock(&inode->i_mutex);
993 982
@@ -1002,7 +991,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
1002 goto out_unlock; 991 goto out_unlock;
1003 } 992 }
1004 993
1005 task_io_account_write(count); 994 task_io_account_write(iov_iter_count(iter));
1006 995
1007 result = -ENOMEM; 996 result = -ENOMEM;
1008 dreq = nfs_direct_req_alloc(); 997 dreq = nfs_direct_req_alloc();
@@ -1010,7 +999,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
1010 goto out_unlock; 999 goto out_unlock;
1011 1000
1012 dreq->inode = inode; 1001 dreq->inode = inode;
1013 dreq->bytes_left = count; 1002 dreq->bytes_left = iov_iter_count(iter);
1014 dreq->io_start = pos; 1003 dreq->io_start = pos;
1015 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 1004 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
1016 l_ctx = nfs_get_lock_context(dreq->ctx); 1005 l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -1050,7 +1039,6 @@ out_release:
1050 nfs_direct_req_release(dreq); 1039 nfs_direct_req_release(dreq);
1051out_unlock: 1040out_unlock:
1052 mutex_unlock(&inode->i_mutex); 1041 mutex_unlock(&inode->i_mutex);
1053out:
1054 return result; 1042 return result;
1055} 1043}
1056 1044
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94712fc781fa..c40e4363e746 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
26#include <linux/nfs_mount.h> 26#include <linux/nfs_mount.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/aio.h>
30#include <linux/gfp.h> 29#include <linux/gfp.h>
31#include <linux/swap.h> 30#include <linux/swap.h>
32 31
@@ -171,14 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
171 struct inode *inode = file_inode(iocb->ki_filp); 170 struct inode *inode = file_inode(iocb->ki_filp);
172 ssize_t result; 171 ssize_t result;
173 172
174 if (iocb->ki_filp->f_flags & O_DIRECT) 173 if (iocb->ki_flags & IOCB_DIRECT)
175 return nfs_file_direct_read(iocb, to, iocb->ki_pos); 174 return nfs_file_direct_read(iocb, to, iocb->ki_pos);
176 175
177 dprintk("NFS: read(%pD2, %zu@%lu)\n", 176 dprintk("NFS: read(%pD2, %zu@%lu)\n",
178 iocb->ki_filp, 177 iocb->ki_filp,
179 iov_iter_count(to), (unsigned long) iocb->ki_pos); 178 iov_iter_count(to), (unsigned long) iocb->ki_pos);
180 179
181 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 180 result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
182 if (!result) { 181 if (!result) {
183 result = generic_file_read_iter(iocb, to); 182 result = generic_file_read_iter(iocb, to);
184 if (result > 0) 183 if (result > 0)
@@ -199,7 +198,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
199 dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", 198 dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
200 filp, (unsigned long) count, (unsigned long long) *ppos); 199 filp, (unsigned long) count, (unsigned long long) *ppos);
201 200
202 res = nfs_revalidate_mapping(inode, filp->f_mapping); 201 res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
203 if (!res) { 202 if (!res) {
204 res = generic_file_splice_read(filp, ppos, pipe, count, flags); 203 res = generic_file_splice_read(filp, ppos, pipe, count, flags);
205 if (res > 0) 204 if (res > 0)
@@ -372,6 +371,10 @@ start:
372 nfs_wait_bit_killable, TASK_KILLABLE); 371 nfs_wait_bit_killable, TASK_KILLABLE);
373 if (ret) 372 if (ret)
374 return ret; 373 return ret;
374 /*
375 * Wait for O_DIRECT to complete
376 */
377 nfs_inode_dio_wait(mapping->host);
375 378
376 page = grab_cache_page_write_begin(mapping, index, flags); 379 page = grab_cache_page_write_begin(mapping, index, flags);
377 if (!page) 380 if (!page)
@@ -619,6 +622,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
619 /* make sure the cache has finished storing the page */ 622 /* make sure the cache has finished storing the page */
620 nfs_fscache_wait_on_page_write(NFS_I(inode), page); 623 nfs_fscache_wait_on_page_write(NFS_I(inode), page);
621 624
625 wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
626 nfs_wait_bit_killable, TASK_KILLABLE);
627
622 lock_page(page); 628 lock_page(page);
623 mapping = page_file_mapping(page); 629 mapping = page_file_mapping(page);
624 if (mapping != inode->i_mapping) 630 if (mapping != inode->i_mapping)
@@ -668,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
668 unsigned long written = 0; 674 unsigned long written = 0;
669 ssize_t result; 675 ssize_t result;
670 size_t count = iov_iter_count(from); 676 size_t count = iov_iter_count(from);
671 loff_t pos = iocb->ki_pos;
672 677
673 result = nfs_key_timeout_notify(file, inode); 678 result = nfs_key_timeout_notify(file, inode);
674 if (result) 679 if (result)
675 return result; 680 return result;
676 681
677 if (file->f_flags & O_DIRECT) 682 if (iocb->ki_flags & IOCB_DIRECT) {
678 return nfs_file_direct_write(iocb, from, pos); 683 result = generic_write_checks(iocb, from);
684 if (result <= 0)
685 return result;
686 return nfs_file_direct_write(iocb, from);
687 }
679 688
680 dprintk("NFS: write(%pD2, %zu@%Ld)\n", 689 dprintk("NFS: write(%pD2, %zu@%Ld)\n",
681 file, count, (long long) pos); 690 file, count, (long long) iocb->ki_pos);
682 691
683 result = -EBUSY; 692 result = -EBUSY;
684 if (IS_SWAPFILE(inode)) 693 if (IS_SWAPFILE(inode))
@@ -686,7 +695,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
686 /* 695 /*
687 * O_APPEND implies that we must revalidate the file length. 696 * O_APPEND implies that we must revalidate the file length.
688 */ 697 */
689 if (file->f_flags & O_APPEND) { 698 if (iocb->ki_flags & IOCB_APPEND) {
690 result = nfs_revalidate_file_size(inode, file); 699 result = nfs_revalidate_file_size(inode, file);
691 if (result) 700 if (result)
692 goto out; 701 goto out;
@@ -920,8 +929,6 @@ EXPORT_SYMBOL_GPL(nfs_flock);
920 929
921const struct file_operations nfs_file_operations = { 930const struct file_operations nfs_file_operations = {
922 .llseek = nfs_file_llseek, 931 .llseek = nfs_file_llseek,
923 .read = new_sync_read,
924 .write = new_sync_write,
925 .read_iter = nfs_file_read, 932 .read_iter = nfs_file_read,
926 .write_iter = nfs_file_write, 933 .write_iter = nfs_file_write,
927 .mmap = nfs_file_mmap, 934 .mmap = nfs_file_mmap,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..d42dff6d5e98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
556 * This is a copy of the common vmtruncate, but with the locking 556 * This is a copy of the common vmtruncate, but with the locking
557 * corrected to take into account the fact that NFS requires 557 * corrected to take into account the fact that NFS requires
558 * inode->i_size to be updated under the inode->i_lock. 558 * inode->i_size to be updated under the inode->i_lock.
559 * Note: must be called with inode->i_lock held!
559 */ 560 */
560static int nfs_vmtruncate(struct inode * inode, loff_t offset) 561static int nfs_vmtruncate(struct inode * inode, loff_t offset)
561{ 562{
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
565 if (err) 566 if (err)
566 goto out; 567 goto out;
567 568
568 spin_lock(&inode->i_lock);
569 i_size_write(inode, offset); 569 i_size_write(inode, offset);
570 /* Optimisation */ 570 /* Optimisation */
571 if (offset == 0) 571 if (offset == 0)
572 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; 572 NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
573 spin_unlock(&inode->i_lock);
574 573
574 spin_unlock(&inode->i_lock);
575 truncate_pagecache(inode, offset); 575 truncate_pagecache(inode, offset);
576 spin_lock(&inode->i_lock);
576out: 577out:
577 return err; 578 return err;
578} 579}
@@ -585,10 +586,15 @@ out:
585 * Note: we do this in the *proc.c in order to ensure that 586 * Note: we do this in the *proc.c in order to ensure that
586 * it works for things like exclusive creates too. 587 * it works for things like exclusive creates too.
587 */ 588 */
588void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) 589void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
590 struct nfs_fattr *fattr)
589{ 591{
592 /* Barrier: bump the attribute generation count. */
593 nfs_fattr_set_barrier(fattr);
594
595 spin_lock(&inode->i_lock);
596 NFS_I(inode)->attr_gencount = fattr->gencount;
590 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { 597 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
591 spin_lock(&inode->i_lock);
592 if ((attr->ia_valid & ATTR_MODE) != 0) { 598 if ((attr->ia_valid & ATTR_MODE) != 0) {
593 int mode = attr->ia_mode & S_IALLUGO; 599 int mode = attr->ia_mode & S_IALLUGO;
594 mode |= inode->i_mode & ~S_IALLUGO; 600 mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
600 inode->i_gid = attr->ia_gid; 606 inode->i_gid = attr->ia_gid;
601 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS 607 nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
602 | NFS_INO_INVALID_ACL); 608 | NFS_INO_INVALID_ACL);
603 spin_unlock(&inode->i_lock);
604 } 609 }
605 if ((attr->ia_valid & ATTR_SIZE) != 0) { 610 if ((attr->ia_valid & ATTR_SIZE) != 0) {
606 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); 611 nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
607 nfs_vmtruncate(inode, attr->ia_size); 612 nfs_vmtruncate(inode, attr->ia_size);
608 } 613 }
614 nfs_update_inode(inode, fattr);
615 spin_unlock(&inode->i_lock);
609} 616}
610EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); 617EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
611 618
@@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
1028 1035
1029 if (mapping->nrpages != 0) { 1036 if (mapping->nrpages != 0) {
1030 if (S_ISREG(inode->i_mode)) { 1037 if (S_ISREG(inode->i_mode)) {
1038 unmap_mapping_range(mapping, 0, 0, 0);
1031 ret = nfs_sync_mapping(mapping); 1039 ret = nfs_sync_mapping(mapping);
1032 if (ret < 0) 1040 if (ret < 0)
1033 return ret; 1041 return ret;
@@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
1060} 1068}
1061 1069
1062/** 1070/**
1063 * nfs_revalidate_mapping - Revalidate the pagecache 1071 * __nfs_revalidate_mapping - Revalidate the pagecache
1064 * @inode - pointer to host inode 1072 * @inode - pointer to host inode
1065 * @mapping - pointer to mapping 1073 * @mapping - pointer to mapping
1074 * @may_lock - take inode->i_mutex?
1066 */ 1075 */
1067int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) 1076static int __nfs_revalidate_mapping(struct inode *inode,
1077 struct address_space *mapping,
1078 bool may_lock)
1068{ 1079{
1069 struct nfs_inode *nfsi = NFS_I(inode); 1080 struct nfs_inode *nfsi = NFS_I(inode);
1070 unsigned long *bitlock = &nfsi->flags; 1081 unsigned long *bitlock = &nfsi->flags;
@@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1113 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; 1124 nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
1114 spin_unlock(&inode->i_lock); 1125 spin_unlock(&inode->i_lock);
1115 trace_nfs_invalidate_mapping_enter(inode); 1126 trace_nfs_invalidate_mapping_enter(inode);
1116 ret = nfs_invalidate_mapping(inode, mapping); 1127 if (may_lock) {
1128 mutex_lock(&inode->i_mutex);
1129 ret = nfs_invalidate_mapping(inode, mapping);
1130 mutex_unlock(&inode->i_mutex);
1131 } else
1132 ret = nfs_invalidate_mapping(inode, mapping);
1117 trace_nfs_invalidate_mapping_exit(inode, ret); 1133 trace_nfs_invalidate_mapping_exit(inode, ret);
1118 1134
1119 clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); 1135 clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1123,6 +1139,29 @@ out:
1123 return ret; 1139 return ret;
1124} 1140}
1125 1141
1142/**
1143 * nfs_revalidate_mapping - Revalidate the pagecache
1144 * @inode - pointer to host inode
1145 * @mapping - pointer to mapping
1146 */
1147int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
1148{
1149 return __nfs_revalidate_mapping(inode, mapping, false);
1150}
1151
1152/**
1153 * nfs_revalidate_mapping_protected - Revalidate the pagecache
1154 * @inode - pointer to host inode
1155 * @mapping - pointer to mapping
1156 *
1157 * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
1158 * while invalidating the mapping.
1159 */
1160int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
1161{
1162 return __nfs_revalidate_mapping(inode, mapping, true);
1163}
1164
1126static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) 1165static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1127{ 1166{
1128 struct nfs_inode *nfsi = NFS_I(inode); 1167 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat
1231 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; 1270 return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
1232} 1271}
1233 1272
1234static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
1235{
1236 if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1237 return 0;
1238 return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
1239}
1240
1241static atomic_long_t nfs_attr_generation_counter; 1273static atomic_long_t nfs_attr_generation_counter;
1242 1274
1243static unsigned long nfs_read_attr_generation_counter(void) 1275static unsigned long nfs_read_attr_generation_counter(void)
@@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void)
1249{ 1281{
1250 return atomic_long_inc_return(&nfs_attr_generation_counter); 1282 return atomic_long_inc_return(&nfs_attr_generation_counter);
1251} 1283}
1284EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter);
1252 1285
1253void nfs_fattr_init(struct nfs_fattr *fattr) 1286void nfs_fattr_init(struct nfs_fattr *fattr)
1254{ 1287{
@@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
1260} 1293}
1261EXPORT_SYMBOL_GPL(nfs_fattr_init); 1294EXPORT_SYMBOL_GPL(nfs_fattr_init);
1262 1295
1296/**
1297 * nfs_fattr_set_barrier
1298 * @fattr: attributes
1299 *
1300 * Used to set a barrier after an attribute was updated. This
1301 * barrier ensures that older attributes from RPC calls that may
1302 * have raced with our update cannot clobber these new values.
1303 * Note that you are still responsible for ensuring that other
1304 * operations which change the attribute on the server do not
1305 * collide.
1306 */
1307void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
1308{
1309 fattr->gencount = nfs_inc_attr_generation_counter();
1310}
1311
1263struct nfs_fattr *nfs_alloc_fattr(void) 1312struct nfs_fattr *nfs_alloc_fattr(void)
1264{ 1313{
1265 struct nfs_fattr *fattr; 1314 struct nfs_fattr *fattr;
@@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
1370 1419
1371 return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || 1420 return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
1372 nfs_ctime_need_update(inode, fattr) || 1421 nfs_ctime_need_update(inode, fattr) ||
1373 nfs_size_need_update(inode, fattr) ||
1374 ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); 1422 ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
1375} 1423}
1376 1424
@@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1460 int status; 1508 int status;
1461 1509
1462 spin_lock(&inode->i_lock); 1510 spin_lock(&inode->i_lock);
1511 nfs_fattr_set_barrier(fattr);
1463 status = nfs_post_op_update_inode_locked(inode, fattr); 1512 status = nfs_post_op_update_inode_locked(inode, fattr);
1464 spin_unlock(&inode->i_lock); 1513 spin_unlock(&inode->i_lock);
1465 1514
@@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1468EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); 1517EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
1469 1518
1470/** 1519/**
1471 * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache 1520 * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
1472 * @inode - pointer to inode 1521 * @inode - pointer to inode
1473 * @fattr - updated attributes 1522 * @fattr - updated attributes
1474 * 1523 *
@@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
1478 * 1527 *
1479 * This function is mainly designed to be used by the ->write_done() functions. 1528 * This function is mainly designed to be used by the ->write_done() functions.
1480 */ 1529 */
1481int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) 1530int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
1482{ 1531{
1483 int status; 1532 int status;
1484 1533
1485 spin_lock(&inode->i_lock);
1486 /* Don't do a WCC update if these attributes are already stale */ 1534 /* Don't do a WCC update if these attributes are already stale */
1487 if ((fattr->valid & NFS_ATTR_FATTR) == 0 || 1535 if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
1488 !nfs_inode_attrs_need_update(inode, fattr)) { 1536 !nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
1514 } 1562 }
1515out_noforce: 1563out_noforce:
1516 status = nfs_post_op_update_inode_locked(inode, fattr); 1564 status = nfs_post_op_update_inode_locked(inode, fattr);
1565 return status;
1566}
1567
1568/**
1569 * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
1570 * @inode - pointer to inode
1571 * @fattr - updated attributes
1572 *
1573 * After an operation that has changed the inode metadata, mark the
1574 * attribute cache as being invalid, then try to update it. Fake up
1575 * weak cache consistency data, if none exist.
1576 *
1577 * This function is mainly designed to be used by the ->write_done() functions.
1578 */
1579int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
1580{
1581 int status;
1582
1583 spin_lock(&inode->i_lock);
1584 nfs_fattr_set_barrier(fattr);
1585 status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1517 spin_unlock(&inode->i_lock); 1586 spin_unlock(&inode->i_lock);
1518 return status; 1587 return status;
1519} 1588}
@@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1715 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); 1784 nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
1716 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 1785 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
1717 nfsi->attrtimeo_timestamp = now; 1786 nfsi->attrtimeo_timestamp = now;
1787 /* Set barrier to be more recent than all outstanding updates */
1718 nfsi->attr_gencount = nfs_inc_attr_generation_counter(); 1788 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
1719 } else { 1789 } else {
1720 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { 1790 if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
@@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1722 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); 1792 nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
1723 nfsi->attrtimeo_timestamp = now; 1793 nfsi->attrtimeo_timestamp = now;
1724 } 1794 }
1795 /* Set the barrier to be more recent than this fattr */
1796 if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
1797 nfsi->attr_gencount = fattr->gencount;
1725 } 1798 }
1726 invalid &= ~NFS_INO_INVALID_ATTR; 1799 invalid &= ~NFS_INO_INVALID_ATTR;
1727 /* Don't invalidate the data if we were to blame */ 1800 /* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
459 struct nfs_commit_info *cinfo, 459 struct nfs_commit_info *cinfo,
460 u32 ds_commit_idx); 460 u32 ds_commit_idx);
461int nfs_write_need_commit(struct nfs_pgio_header *); 461int nfs_write_need_commit(struct nfs_pgio_header *);
462void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
462int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 463int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
463 int how, struct nfs_commit_info *cinfo); 464 int how, struct nfs_commit_info *cinfo);
464void nfs_retry_commit(struct list_head *page_list, 465void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
138 nfs_fattr_init(fattr); 138 nfs_fattr_init(fattr);
139 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 139 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
140 if (status == 0) 140 if (status == 0)
141 nfs_setattr_update_inode(inode, sattr); 141 nfs_setattr_update_inode(inode, sattr, fattr);
142 dprintk("NFS reply setattr: %d\n", status); 142 dprintk("NFS reply setattr: %d\n", status);
143 return status; 143 return status;
144} 144}
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
834 if (nfs3_async_handle_jukebox(task, inode)) 834 if (nfs3_async_handle_jukebox(task, inode))
835 return -EAGAIN; 835 return -EAGAIN;
836 if (task->tk_status >= 0) 836 if (task->tk_status >= 0)
837 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); 837 nfs_writeback_update_inode(hdr);
838 return 0; 838 return 0;
839} 839}
840 840
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 2a932fdc57cb..53852a4bd88b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
1987 if (entry->fattr->valid & NFS_ATTR_FATTR_V3) 1987 if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
1988 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); 1988 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
1989 1989
1990 if (entry->fattr->fileid != entry->ino) {
1991 entry->fattr->mounted_on_fileid = entry->ino;
1992 entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
1993 }
1994
1990 /* In fact, a post_op_fh3: */ 1995 /* In fact, a post_op_fh3: */
1991 p = xdr_inline_decode(xdr, 4); 1996 p = xdr_inline_decode(xdr, 4);
1992 if (unlikely(p == NULL)) 1997 if (unlikely(p == NULL))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8646af9b11d2..86d6214ea022 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new,
621 spin_lock(&nn->nfs_client_lock); 621 spin_lock(&nn->nfs_client_lock);
622 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 622 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
623 623
624 if (pos == new)
625 goto found;
626
624 if (pos->rpc_ops != new->rpc_ops) 627 if (pos->rpc_ops != new->rpc_ops)
625 continue; 628 continue;
626 629
@@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
639 prev = pos; 642 prev = pos;
640 643
641 status = nfs_wait_client_init_complete(pos); 644 status = nfs_wait_client_init_complete(pos);
642 if (pos->cl_cons_state == NFS_CS_SESSION_INITING) {
643 nfs4_schedule_lease_recovery(pos);
644 status = nfs4_wait_clnt_recover(pos);
645 }
646 spin_lock(&nn->nfs_client_lock); 645 spin_lock(&nn->nfs_client_lock);
647 if (status < 0) 646 if (status < 0)
648 break; 647 break;
@@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
668 */ 667 */
669 if (!nfs4_match_client_owner_id(pos, new)) 668 if (!nfs4_match_client_owner_id(pos, new))
670 continue; 669 continue;
671 670found:
672 atomic_inc(&pos->cl_count); 671 atomic_inc(&pos->cl_count);
673 *result = pos; 672 *result = pos;
674 status = 0; 673 status = 0;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8b46389c4c5b..0181cde1d102 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -170,8 +170,6 @@ const struct file_operations nfs4_file_operations = {
170#else 170#else
171 .llseek = nfs_file_llseek, 171 .llseek = nfs_file_llseek,
172#endif 172#endif
173 .read = new_sync_read,
174 .write = new_sync_write,
175 .read_iter = nfs_file_read, 173 .read_iter = nfs_file_read,
176 .write_iter = nfs_file_write, 174 .write_iter = nfs_file_write,
177 .mmap = nfs_file_mmap, 175 .mmap = nfs_file_mmap,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 88180ac5ea0e..627f37c44456 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
901 if (!cinfo->atomic || cinfo->before != dir->i_version) 901 if (!cinfo->atomic || cinfo->before != dir->i_version)
902 nfs_force_lookup_revalidate(dir); 902 nfs_force_lookup_revalidate(dir);
903 dir->i_version = cinfo->after; 903 dir->i_version = cinfo->after;
904 nfsi->attr_gencount = nfs_inc_attr_generation_counter();
904 nfs_fscache_invalidate(dir); 905 nfs_fscache_invalidate(dir);
905 spin_unlock(&dir->i_lock); 906 spin_unlock(&dir->i_lock);
906} 907}
@@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1552 1553
1553 opendata->o_arg.open_flags = 0; 1554 opendata->o_arg.open_flags = 0;
1554 opendata->o_arg.fmode = fmode; 1555 opendata->o_arg.fmode = fmode;
1556 opendata->o_arg.share_access = nfs4_map_atomic_open_share(
1557 NFS_SB(opendata->dentry->d_sb),
1558 fmode, 0);
1555 memset(&opendata->o_res, 0, sizeof(opendata->o_res)); 1559 memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1556 memset(&opendata->c_res, 0, sizeof(opendata->c_res)); 1560 memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1557 nfs4_init_opendata_res(opendata); 1561 nfs4_init_opendata_res(opendata);
@@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir,
2413 opendata->o_res.f_attr, sattr, 2417 opendata->o_res.f_attr, sattr,
2414 state, label, olabel); 2418 state, label, olabel);
2415 if (status == 0) { 2419 if (status == 0) {
2416 nfs_setattr_update_inode(state->inode, sattr); 2420 nfs_setattr_update_inode(state->inode, sattr,
2417 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); 2421 opendata->o_res.f_attr);
2418 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); 2422 nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
2419 } 2423 }
2420 } 2424 }
@@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
2651 case -NFS4ERR_BAD_STATEID: 2655 case -NFS4ERR_BAD_STATEID:
2652 case -NFS4ERR_EXPIRED: 2656 case -NFS4ERR_EXPIRED:
2653 if (!nfs4_stateid_match(&calldata->arg.stateid, 2657 if (!nfs4_stateid_match(&calldata->arg.stateid,
2654 &state->stateid)) { 2658 &state->open_stateid)) {
2655 rpc_restart_call_prepare(task); 2659 rpc_restart_call_prepare(task);
2656 goto out_release; 2660 goto out_release;
2657 } 2661 }
@@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2687 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); 2691 is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
2688 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); 2692 is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
2689 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); 2693 is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
2690 nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid); 2694 nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
2691 /* Calculate the change in open mode */ 2695 /* Calculate the change in open mode */
2692 calldata->arg.fmode = 0; 2696 calldata->arg.fmode = 0;
2693 if (state->n_rdwr == 0) { 2697 if (state->n_rdwr == 0) {
@@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
3288 3292
3289 status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); 3293 status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
3290 if (status == 0) { 3294 if (status == 0) {
3291 nfs_setattr_update_inode(inode, sattr); 3295 nfs_setattr_update_inode(inode, sattr, fattr);
3292 nfs_setsecurity(inode, fattr, label); 3296 nfs_setsecurity(inode, fattr, label);
3293 } 3297 }
3294 nfs4_label_free(label); 3298 nfs4_label_free(label);
@@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
4234 } 4238 }
4235 if (task->tk_status >= 0) { 4239 if (task->tk_status >= 0) {
4236 renew_lease(NFS_SERVER(inode), hdr->timestamp); 4240 renew_lease(NFS_SERVER(inode), hdr->timestamp);
4237 nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); 4241 nfs_writeback_update_inode(hdr);
4238 } 4242 }
4239 return 0; 4243 return 0;
4240} 4244}
@@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
6893 6897
6894 if (status == 0) { 6898 if (status == 0) {
6895 clp->cl_clientid = res.clientid; 6899 clp->cl_clientid = res.clientid;
6896 clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); 6900 clp->cl_exchange_flags = res.flags;
6897 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) 6901 /* Client ID is not confirmed */
6902 if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
6903 clear_bit(NFS4_SESSION_ESTABLISHED,
6904 &clp->cl_session->session_state);
6898 clp->cl_seqid = res.seqid; 6905 clp->cl_seqid = res.seqid;
6906 }
6899 6907
6900 kfree(clp->cl_serverowner); 6908 kfree(clp->cl_serverowner);
6901 clp->cl_serverowner = res.server_owner; 6909 clp->cl_serverowner = res.server_owner;
@@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session,
7227 struct nfs41_create_session_res *res) 7235 struct nfs41_create_session_res *res)
7228{ 7236{
7229 nfs4_copy_sessionid(&session->sess_id, &res->sessionid); 7237 nfs4_copy_sessionid(&session->sess_id, &res->sessionid);
7238 /* Mark client id and session as being confirmed */
7239 session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
7240 set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state);
7230 session->flags = res->flags; 7241 session->flags = res->flags;
7231 memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs)); 7242 memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs));
7232 if (res->flags & SESSION4_BACK_CHAN) 7243 if (res->flags & SESSION4_BACK_CHAN)
@@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
7322 dprintk("--> nfs4_proc_destroy_session\n"); 7333 dprintk("--> nfs4_proc_destroy_session\n");
7323 7334
7324 /* session is still being setup */ 7335 /* session is still being setup */
7325 if (session->clp->cl_cons_state != NFS_CS_READY) 7336 if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state))
7326 return status; 7337 return 0;
7327 7338
7328 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 7339 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
7329 trace_nfs4_destroy_session(session->clp, status); 7340 trace_nfs4_destroy_session(session->clp, status);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index fc46c7455898..e3ea2c5324d6 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -70,6 +70,7 @@ struct nfs4_session {
70 70
71enum nfs4_session_state { 71enum nfs4_session_state {
72 NFS4_SESSION_INITING, 72 NFS4_SESSION_INITING,
73 NFS4_SESSION_ESTABLISHED,
73}; 74};
74 75
75extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, 76extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ad908e9ce9c..f95e3b58bbc3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
346 status = nfs4_proc_exchange_id(clp, cred); 346 status = nfs4_proc_exchange_id(clp, cred);
347 if (status != NFS4_OK) 347 if (status != NFS4_OK)
348 return status; 348 return status;
349 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
350 349
351 return nfs41_walk_client_list(clp, result, cred); 350 status = nfs41_walk_client_list(clp, result, cred);
351 if (status < 0)
352 return status;
353 if (clp != *result)
354 return 0;
355
356 /* Purge state if the client id was established in a prior instance */
357 if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
358 set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
359 else
360 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
361 nfs4_schedule_state_manager(clp);
362 status = nfs_wait_client_init_complete(clp);
363 if (status < 0)
364 nfs_put_client(clp);
365 return status;
352} 366}
353 367
354#endif /* CONFIG_NFS_V4_1 */ 368#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
139 nfs_fattr_init(fattr); 139 nfs_fattr_init(fattr);
140 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 140 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
141 if (status == 0) 141 if (status == 0)
142 nfs_setattr_update_inode(inode, sattr); 142 nfs_setattr_update_inode(inode, sattr, fattr);
143 dprintk("NFS reply setattr: %d\n", status); 143 dprintk("NFS reply setattr: %d\n", status);
144 return status; 144 return status;
145} 145}
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
609 609
610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) 610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
611{ 611{
612 struct inode *inode = hdr->inode;
613
614 if (task->tk_status >= 0) 612 if (task->tk_status >= 0)
615 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); 613 nfs_writeback_update_inode(hdr);
616 return 0; 614 return 0;
617} 615}
618 616
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 568ecf0a880f..b8f5c63f77b2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -117,15 +117,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
117 117
118static void nfs_readpage_release(struct nfs_page *req) 118static void nfs_readpage_release(struct nfs_page *req)
119{ 119{
120 struct inode *d_inode = req->wb_context->dentry->d_inode; 120 struct inode *inode = req->wb_context->dentry->d_inode;
121 121
122 dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, 122 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
123 (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, 123 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
124 (long long)req_offset(req)); 124 (long long)req_offset(req));
125 125
126 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { 126 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
127 if (PageUptodate(req->wb_page)) 127 if (PageUptodate(req->wb_page))
128 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 128 nfs_readpage_to_fscache(inode, req->wb_page, 0);
129 129
130 unlock_page(req->wb_page); 130 unlock_page(req->wb_page);
131 } 131 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..759931088094 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
1377 return 0; 1377 return 0;
1378} 1378}
1379 1379
1380static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
1381 struct nfs_fattr *fattr)
1382{
1383 struct nfs_pgio_args *argp = &hdr->args;
1384 struct nfs_pgio_res *resp = &hdr->res;
1385
1386 if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1387 return;
1388 if (argp->offset + resp->count != fattr->size)
1389 return;
1390 if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
1391 return;
1392 /* Set attribute barrier */
1393 nfs_fattr_set_barrier(fattr);
1394}
1395
1396void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
1397{
1398 struct nfs_fattr *fattr = hdr->res.fattr;
1399 struct inode *inode = hdr->inode;
1400
1401 if (fattr == NULL)
1402 return;
1403 spin_lock(&inode->i_lock);
1404 nfs_writeback_check_extend(hdr, fattr);
1405 nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1406 spin_unlock(&inode->i_lock);
1407}
1408EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
1409
1380/* 1410/*
1381 * This function is called when the WRITE call is complete. 1411 * This function is called when the WRITE call is complete.
1382 */ 1412 */
@@ -1846,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1846 * request from the inode / page_private pointer and 1876 * request from the inode / page_private pointer and
1847 * release it */ 1877 * release it */
1848 nfs_inode_remove_request(req); 1878 nfs_inode_remove_request(req);
1849 /*
1850 * In case nfs_inode_remove_request has marked the
1851 * page as being dirty
1852 */
1853 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1854 nfs_unlock_and_release_request(req); 1879 nfs_unlock_and_release_request(req);
1855 } 1880 }
1856 1881
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 683bf718aead..a0b77fc1bd39 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -6,6 +6,7 @@ config NFSD
6 select SUNRPC 6 select SUNRPC
7 select EXPORTFS 7 select EXPORTFS
8 select NFS_ACL_SUPPORT if NFSD_V2_ACL 8 select NFS_ACL_SUPPORT if NFSD_V2_ACL
9 depends on MULTIUSER
9 help 10 help
10 Choose Y here if you want to allow other computers to access 11 Choose Y here if you want to allow other computers to access
11 files residing on this system using Sun's Network File System 12 files residing on this system using Sun's Network File System
@@ -107,7 +108,7 @@ config NFSD_V4_SECURITY_LABEL
107 108
108config NFSD_FAULT_INJECTION 109config NFSD_FAULT_INJECTION
109 bool "NFS server manual fault injection" 110 bool "NFS server manual fault injection"
110 depends on NFSD_V4 && DEBUG_KERNEL 111 depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
111 help 112 help
112 This option enables support for manually injecting faults 113 This option enables support for manually injecting faults
113 into the NFS server. This is intended to be used for 114 into the NFS server. This is intended to be used for
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdbc78c72542..03d647bf195d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
137 seg->offset = iomap.offset; 137 seg->offset = iomap.offset;
138 seg->length = iomap.length; 138 seg->length = iomap.length;
139 139
140 dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); 140 dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
141 return 0; 141 return 0;
142 142
143out_error: 143out_error:
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 9da89fddab33..9aa2796da90d 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
122 122
123 p = xdr_decode_hyper(p, &bex.foff); 123 p = xdr_decode_hyper(p, &bex.foff);
124 if (bex.foff & (block_size - 1)) { 124 if (bex.foff & (block_size - 1)) {
125 dprintk("%s: unaligned offset %lld\n", 125 dprintk("%s: unaligned offset 0x%llx\n",
126 __func__, bex.foff); 126 __func__, bex.foff);
127 goto fail; 127 goto fail;
128 } 128 }
129 p = xdr_decode_hyper(p, &bex.len); 129 p = xdr_decode_hyper(p, &bex.len);
130 if (bex.len & (block_size - 1)) { 130 if (bex.len & (block_size - 1)) {
131 dprintk("%s: unaligned length %lld\n", 131 dprintk("%s: unaligned length 0x%llx\n",
132 __func__, bex.foff); 132 __func__, bex.foff);
133 goto fail; 133 goto fail;
134 } 134 }
135 p = xdr_decode_hyper(p, &bex.soff); 135 p = xdr_decode_hyper(p, &bex.soff);
136 if (bex.soff & (block_size - 1)) { 136 if (bex.soff & (block_size - 1)) {
137 dprintk("%s: unaligned disk offset %lld\n", 137 dprintk("%s: unaligned disk offset 0x%llx\n",
138 __func__, bex.soff); 138 __func__, bex.soff);
139 goto fail; 139 goto fail;
140 } 140 }
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c3e3b6e55ae2..900c3ae94adc 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -691,8 +691,7 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b)
691 struct svc_export *orig = container_of(a, struct svc_export, h); 691 struct svc_export *orig = container_of(a, struct svc_export, h);
692 struct svc_export *new = container_of(b, struct svc_export, h); 692 struct svc_export *new = container_of(b, struct svc_export, h);
693 return orig->ex_client == new->ex_client && 693 return orig->ex_client == new->ex_client &&
694 orig->ex_path.dentry == new->ex_path.dentry && 694 path_equal(&orig->ex_path, &new->ex_path);
695 orig->ex_path.mnt == new->ex_path.mnt;
696} 695}
697 696
698static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) 697static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
@@ -1159,6 +1158,7 @@ static struct flags {
1159 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, 1158 { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
1160 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, 1159 { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
1161 { NFSEXP_V4ROOT, {"v4root", ""}}, 1160 { NFSEXP_V4ROOT, {"v4root", ""}},
1161 { NFSEXP_PNFS, {"pnfs", ""}},
1162 { 0, {"", ""}} 1162 { 0, {"", ""}}
1163}; 1163};
1164 1164
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 59fd76651781..eaf4605a4b9e 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -499,43 +499,13 @@ static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_s
499 state->mask.allow |= astate->allow; 499 state->mask.allow |= astate->allow;
500} 500}
501 501
502/*
503 * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
504 * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
505 * to traditional read/write/execute permissions.
506 *
507 * It's problematic to reject acls that use certain mode bits, because it
508 * places the burden on users to learn the rules about which bits one
509 * particular server sets, without giving the user a lot of help--we return an
510 * error that could mean any number of different things. To make matters
511 * worse, the problematic bits might be introduced by some application that's
512 * automatically mapping from some other acl model.
513 *
514 * So wherever possible we accept anything, possibly erring on the side of
515 * denying more permissions than necessary.
516 *
517 * However we do reject *explicit* DENY's of a few bits representing
518 * permissions we could never deny:
519 */
520
521static inline int check_deny(u32 mask, int isowner)
522{
523 if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
524 return -EINVAL;
525 if (!isowner)
526 return 0;
527 if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
528 return -EINVAL;
529 return 0;
530}
531
532static struct posix_acl * 502static struct posix_acl *
533posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) 503posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
534{ 504{
535 struct posix_acl_entry *pace; 505 struct posix_acl_entry *pace;
536 struct posix_acl *pacl; 506 struct posix_acl *pacl;
537 int nace; 507 int nace;
538 int i, error = 0; 508 int i;
539 509
540 /* 510 /*
541 * ACLs with no ACEs are treated differently in the inheritable 511 * ACLs with no ACEs are treated differently in the inheritable
@@ -560,17 +530,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
560 530
561 pace = pacl->a_entries; 531 pace = pacl->a_entries;
562 pace->e_tag = ACL_USER_OBJ; 532 pace->e_tag = ACL_USER_OBJ;
563 error = check_deny(state->owner.deny, 1);
564 if (error)
565 goto out_err;
566 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); 533 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
567 534
568 for (i=0; i < state->users->n; i++) { 535 for (i=0; i < state->users->n; i++) {
569 pace++; 536 pace++;
570 pace->e_tag = ACL_USER; 537 pace->e_tag = ACL_USER;
571 error = check_deny(state->users->aces[i].perms.deny, 0);
572 if (error)
573 goto out_err;
574 low_mode_from_nfs4(state->users->aces[i].perms.allow, 538 low_mode_from_nfs4(state->users->aces[i].perms.allow,
575 &pace->e_perm, flags); 539 &pace->e_perm, flags);
576 pace->e_uid = state->users->aces[i].uid; 540 pace->e_uid = state->users->aces[i].uid;
@@ -579,18 +543,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
579 543
580 pace++; 544 pace++;
581 pace->e_tag = ACL_GROUP_OBJ; 545 pace->e_tag = ACL_GROUP_OBJ;
582 error = check_deny(state->group.deny, 0);
583 if (error)
584 goto out_err;
585 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); 546 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
586 add_to_mask(state, &state->group); 547 add_to_mask(state, &state->group);
587 548
588 for (i=0; i < state->groups->n; i++) { 549 for (i=0; i < state->groups->n; i++) {
589 pace++; 550 pace++;
590 pace->e_tag = ACL_GROUP; 551 pace->e_tag = ACL_GROUP;
591 error = check_deny(state->groups->aces[i].perms.deny, 0);
592 if (error)
593 goto out_err;
594 low_mode_from_nfs4(state->groups->aces[i].perms.allow, 552 low_mode_from_nfs4(state->groups->aces[i].perms.allow,
595 &pace->e_perm, flags); 553 &pace->e_perm, flags);
596 pace->e_gid = state->groups->aces[i].gid; 554 pace->e_gid = state->groups->aces[i].gid;
@@ -605,15 +563,9 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
605 563
606 pace++; 564 pace++;
607 pace->e_tag = ACL_OTHER; 565 pace->e_tag = ACL_OTHER;
608 error = check_deny(state->other.deny, 0);
609 if (error)
610 goto out_err;
611 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); 566 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
612 567
613 return pacl; 568 return pacl;
614out_err:
615 posix_acl_release(pacl);
616 return ERR_PTR(error);
617} 569}
618 570
619static inline void allow_bits(struct posix_ace_state *astate, u32 mask) 571static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3c1bfa155571..6904213a4363 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
118{ 118{
119 struct super_block *sb = exp->ex_path.mnt->mnt_sb; 119 struct super_block *sb = exp->ex_path.mnt->mnt_sb;
120 120
121 if (exp->ex_flags & NFSEXP_NOPNFS) 121 if (!(exp->ex_flags & NFSEXP_PNFS))
122 return; 122 return;
123 123
124 if (sb->s_export_op->get_uuid && 124 if (sb->s_export_op->get_uuid &&
@@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
440 list_move_tail(&lp->lo_perstate, reaplist); 440 list_move_tail(&lp->lo_perstate, reaplist);
441 return; 441 return;
442 } 442 }
443 end = seg->offset; 443 lo->offset = layout_end(seg);
444 } else { 444 } else {
445 /* retain the whole layout segment on a split. */ 445 /* retain the whole layout segment on a split. */
446 if (layout_end(seg) < end) { 446 if (layout_end(seg) < end) {
447 dprintk("%s: split not supported\n", __func__); 447 dprintk("%s: split not supported\n", __func__);
448 return; 448 return;
449 } 449 }
450 450 end = seg->offset;
451 lo->offset = layout_end(seg);
452 } 451 }
453 452
454 layout_update_len(lo, end); 453 layout_update_len(lo, end);
@@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
513 512
514 spin_lock(&clp->cl_lock); 513 spin_lock(&clp->cl_lock);
515 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { 514 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
515 if (ls->ls_layout_type != lrp->lr_layout_type)
516 continue;
517
516 if (lrp->lr_return_type == RETURN_FSID && 518 if (lrp->lr_return_type == RETURN_FSID &&
517 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, 519 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
518 &cstate->current_fh.fh_handle)) 520 &cstate->current_fh.fh_handle))
@@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
587 589
588 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); 590 rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
589 591
590 nfsd4_cb_layout_fail(ls); 592 trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
591 593
592 printk(KERN_WARNING 594 printk(KERN_WARNING
593 "nfsd: client %s failed to respond to layout recall. " 595 "nfsd: client %s failed to respond to layout recall. "
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d30bea8d0277..4a8314f08a0e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -470,7 +470,7 @@ out:
470 fh_put(resfh); 470 fh_put(resfh);
471 kfree(resfh); 471 kfree(resfh);
472 } 472 }
473 nfsd4_cleanup_open_state(cstate, open, status); 473 nfsd4_cleanup_open_state(cstate, open);
474 nfsd4_bump_seqid(cstate, status); 474 nfsd4_bump_seqid(cstate, status);
475 return status; 475 return status;
476} 476}
@@ -1030,6 +1030,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1030 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); 1030 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
1031 return status; 1031 return status;
1032 } 1032 }
1033 if (!file)
1034 return nfserr_bad_stateid;
1033 1035
1034 status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, 1036 status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
1035 fallocate->falloc_offset, 1037 fallocate->falloc_offset,
@@ -1069,6 +1071,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1069 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); 1071 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
1070 return status; 1072 return status;
1071 } 1073 }
1074 if (!file)
1075 return nfserr_bad_stateid;
1072 1076
1073 switch (seek->seek_whence) { 1077 switch (seek->seek_whence) {
1074 case NFS4_CONTENT_DATA: 1078 case NFS4_CONTENT_DATA:
@@ -1237,8 +1241,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
1237 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); 1241 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
1238 1242
1239 gdp->gd_notify_types &= ops->notify_types; 1243 gdp->gd_notify_types &= ops->notify_types;
1240 exp_put(exp);
1241out: 1244out:
1245 exp_put(exp);
1242 return nfserr; 1246 return nfserr;
1243} 1247}
1244 1248
@@ -1815,7 +1819,7 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
1815 bmap0 &= ~FATTR4_WORD0_FILEHANDLE; 1819 bmap0 &= ~FATTR4_WORD0_FILEHANDLE;
1816 } 1820 }
1817 if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { 1821 if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) {
1818 ret += NFSD4_MAX_SEC_LABEL_LEN + 12; 1822 ret += NFS4_MAXLABELLEN + 12;
1819 bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; 1823 bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL;
1820 } 1824 }
1821 /* 1825 /*
@@ -2282,13 +2286,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
2282 .op_func = (nfsd4op_func)nfsd4_allocate, 2286 .op_func = (nfsd4op_func)nfsd4_allocate,
2283 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 2287 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
2284 .op_name = "OP_ALLOCATE", 2288 .op_name = "OP_ALLOCATE",
2285 .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, 2289 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
2286 }, 2290 },
2287 [OP_DEALLOCATE] = { 2291 [OP_DEALLOCATE] = {
2288 .op_func = (nfsd4op_func)nfsd4_deallocate, 2292 .op_func = (nfsd4op_func)nfsd4_deallocate,
2289 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, 2293 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
2290 .op_name = "OP_DEALLOCATE", 2294 .op_name = "OP_DEALLOCATE",
2291 .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, 2295 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
2292 }, 2296 },
2293 [OP_SEEK] = { 2297 [OP_SEEK] = {
2294 .op_func = (nfsd4op_func)nfsd4_seek, 2298 .op_func = (nfsd4op_func)nfsd4_seek,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6b2a09f793f..d42786ee39af 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1139,7 +1139,7 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
1139 return sid->sequence % SESSION_HASH_SIZE; 1139 return sid->sequence % SESSION_HASH_SIZE;
1140} 1140}
1141 1141
1142#ifdef NFSD_DEBUG 1142#ifdef CONFIG_SUNRPC_DEBUG
1143static inline void 1143static inline void
1144dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) 1144dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
1145{ 1145{
@@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp)
1638 nfs4_put_stid(&dp->dl_stid); 1638 nfs4_put_stid(&dp->dl_stid);
1639 } 1639 }
1640 while (!list_empty(&clp->cl_revoked)) { 1640 while (!list_empty(&clp->cl_revoked)) {
1641 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1641 dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
1642 list_del_init(&dp->dl_recall_lru); 1642 list_del_init(&dp->dl_recall_lru);
1643 nfs4_put_stid(&dp->dl_stid); 1643 nfs4_put_stid(&dp->dl_stid);
1644 } 1644 }
@@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
3221 } else 3221 } else
3222 nfs4_free_openowner(&oo->oo_owner); 3222 nfs4_free_openowner(&oo->oo_owner);
3223 spin_unlock(&clp->cl_lock); 3223 spin_unlock(&clp->cl_lock);
3224 return oo; 3224 return ret;
3225} 3225}
3226 3226
3227static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 3227static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
@@ -4049,7 +4049,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
4049 status = nfserr_bad_stateid; 4049 status = nfserr_bad_stateid;
4050 if (nfsd4_is_deleg_cur(open)) 4050 if (nfsd4_is_deleg_cur(open))
4051 goto out; 4051 goto out;
4052 status = nfserr_jukebox;
4053 } 4052 }
4054 4053
4055 /* 4054 /*
@@ -4118,7 +4117,7 @@ out:
4118} 4117}
4119 4118
4120void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, 4119void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
4121 struct nfsd4_open *open, __be32 status) 4120 struct nfsd4_open *open)
4122{ 4121{
4123 if (open->op_openowner) { 4122 if (open->op_openowner) {
4124 struct nfs4_stateowner *so = &open->op_openowner->oo_owner; 4123 struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
@@ -4932,20 +4931,22 @@ nfs4_transform_lock_offset(struct file_lock *lock)
4932 lock->fl_end = OFFSET_MAX; 4931 lock->fl_end = OFFSET_MAX;
4933} 4932}
4934 4933
4935static void nfsd4_fl_get_owner(struct file_lock *dst, struct file_lock *src) 4934static fl_owner_t
4935nfsd4_fl_get_owner(fl_owner_t owner)
4936{ 4936{
4937 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)src->fl_owner; 4937 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
4938 dst->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lo->lo_owner)); 4938
4939 nfs4_get_stateowner(&lo->lo_owner);
4940 return owner;
4939} 4941}
4940 4942
4941static void nfsd4_fl_put_owner(struct file_lock *fl) 4943static void
4944nfsd4_fl_put_owner(fl_owner_t owner)
4942{ 4945{
4943 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; 4946 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
4944 4947
4945 if (lo) { 4948 if (lo)
4946 nfs4_put_stateowner(&lo->lo_owner); 4949 nfs4_put_stateowner(&lo->lo_owner);
4947 fl->fl_owner = NULL;
4948 }
4949} 4950}
4950 4951
4951static const struct lock_manager_operations nfsd_posix_mng_ops = { 4952static const struct lock_manager_operations nfsd_posix_mng_ops = {
@@ -5062,7 +5063,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
5062 } else 5063 } else
5063 nfs4_free_lockowner(&lo->lo_owner); 5064 nfs4_free_lockowner(&lo->lo_owner);
5064 spin_unlock(&clp->cl_lock); 5065 spin_unlock(&clp->cl_lock);
5065 return lo; 5066 return ret;
5066} 5067}
5067 5068
5068static void 5069static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index df5e66caf100..a45032ce7b80 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -424,7 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
424 len += 4; 424 len += 4;
425 dummy32 = be32_to_cpup(p++); 425 dummy32 = be32_to_cpup(p++);
426 READ_BUF(dummy32); 426 READ_BUF(dummy32);
427 if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) 427 if (dummy32 > NFS4_MAXLABELLEN)
428 return nfserr_badlabel; 428 return nfserr_badlabel;
429 len += (XDR_QUADLEN(dummy32) << 2); 429 len += (XDR_QUADLEN(dummy32) << 2);
430 READMEM(buf, dummy32); 430 READMEM(buf, dummy32);
@@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset); 1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length); 1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length);
1564 p = xdr_decode_hyper(p, &lgp->lg_minlength); 1564 p = xdr_decode_hyper(p, &lgp->lg_minlength);
1565 nfsd4_decode_stateid(argp, &lgp->lg_sid); 1565
1566 status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
1567 if (status)
1568 return status;
1569
1566 READ_BUF(4); 1570 READ_BUF(4);
1567 lgp->lg_maxcount = be32_to_cpup(p++); 1571 lgp->lg_maxcount = be32_to_cpup(p++);
1568 1572
@@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
1580 p = xdr_decode_hyper(p, &lcp->lc_seg.offset); 1584 p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
1581 p = xdr_decode_hyper(p, &lcp->lc_seg.length); 1585 p = xdr_decode_hyper(p, &lcp->lc_seg.length);
1582 lcp->lc_reclaim = be32_to_cpup(p++); 1586 lcp->lc_reclaim = be32_to_cpup(p++);
1583 nfsd4_decode_stateid(argp, &lcp->lc_sid); 1587
1588 status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
1589 if (status)
1590 return status;
1591
1584 READ_BUF(4); 1592 READ_BUF(4);
1585 lcp->lc_newoffset = be32_to_cpup(p++); 1593 lcp->lc_newoffset = be32_to_cpup(p++);
1586 if (lcp->lc_newoffset) { 1594 if (lcp->lc_newoffset) {
@@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
1628 READ_BUF(16); 1636 READ_BUF(16);
1629 p = xdr_decode_hyper(p, &lrp->lr_seg.offset); 1637 p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
1630 p = xdr_decode_hyper(p, &lrp->lr_seg.length); 1638 p = xdr_decode_hyper(p, &lrp->lr_seg.length);
1631 nfsd4_decode_stateid(argp, &lrp->lr_sid); 1639
1640 status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
1641 if (status)
1642 return status;
1643
1632 READ_BUF(4); 1644 READ_BUF(4);
1633 lrp->lrf_body_len = be32_to_cpup(p++); 1645 lrp->lrf_body_len = be32_to_cpup(p++);
1634 if (lrp->lrf_body_len > 0) { 1646 if (lrp->lrf_body_len > 0) {
@@ -2008,7 +2020,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
2008 * dentries/path components in an array. 2020 * dentries/path components in an array.
2009 */ 2021 */
2010 for (;;) { 2022 for (;;) {
2011 if (cur.dentry == root->dentry && cur.mnt == root->mnt) 2023 if (path_equal(&cur, root))
2012 break; 2024 break;
2013 if (cur.dentry == cur.mnt->mnt_root) { 2025 if (cur.dentry == cur.mnt->mnt_root) {
2014 if (follow_up(&cur)) 2026 if (follow_up(&cur))
@@ -3410,6 +3422,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3410 unsigned long maxcount; 3422 unsigned long maxcount;
3411 struct xdr_stream *xdr = &resp->xdr; 3423 struct xdr_stream *xdr = &resp->xdr;
3412 struct file *file = read->rd_filp; 3424 struct file *file = read->rd_filp;
3425 struct svc_fh *fhp = read->rd_fhp;
3413 int starting_len = xdr->buf->len; 3426 int starting_len = xdr->buf->len;
3414 struct raparms *ra; 3427 struct raparms *ra;
3415 __be32 *p; 3428 __be32 *p;
@@ -3433,12 +3446,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3433 maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); 3446 maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
3434 maxcount = min_t(unsigned long, maxcount, read->rd_length); 3447 maxcount = min_t(unsigned long, maxcount, read->rd_length);
3435 3448
3436 if (!read->rd_filp) { 3449 if (read->rd_filp)
3450 err = nfsd_permission(resp->rqstp, fhp->fh_export,
3451 fhp->fh_dentry,
3452 NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
3453 else
3437 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, 3454 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
3438 &file, &ra); 3455 &file, &ra);
3439 if (err) 3456 if (err)
3440 goto err_truncate; 3457 goto err_truncate;
3441 }
3442 3458
3443 if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) 3459 if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
3444 err = nfsd4_encode_splice_read(resp, read, file, maxcount); 3460 err = nfsd4_encode_splice_read(resp, read, file, maxcount);
@@ -4123,7 +4139,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4123 return nfserr_resource; 4139 return nfserr_resource;
4124 *p++ = cpu_to_be32(lrp->lrs_present); 4140 *p++ = cpu_to_be32(lrp->lrs_present);
4125 if (lrp->lrs_present) 4141 if (lrp->lrs_present)
4126 nfsd4_encode_stateid(xdr, &lrp->lr_sid); 4142 return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
4127 return nfs_ok; 4143 return nfs_ok;
4128} 4144}
4129#endif /* CONFIG_NFSD_PNFS */ 4145#endif /* CONFIG_NFSD_PNFS */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 83a9694ec485..46ec934f5dee 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void)
165{ 165{
166 unsigned int hashsize; 166 unsigned int hashsize;
167 unsigned int i; 167 unsigned int i;
168 int status = 0;
168 169
169 max_drc_entries = nfsd_cache_size_limit(); 170 max_drc_entries = nfsd_cache_size_limit();
170 atomic_set(&num_drc_entries, 0); 171 atomic_set(&num_drc_entries, 0);
171 hashsize = nfsd_hashsize(max_drc_entries); 172 hashsize = nfsd_hashsize(max_drc_entries);
172 maskbits = ilog2(hashsize); 173 maskbits = ilog2(hashsize);
173 174
174 register_shrinker(&nfsd_reply_cache_shrinker); 175 status = register_shrinker(&nfsd_reply_cache_shrinker);
176 if (status)
177 return status;
178
175 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 179 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
176 0, 0, NULL); 180 0, 0, NULL);
177 if (!drc_slab) 181 if (!drc_slab)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index aa47d75ddb26..9690cb4dd588 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1250,15 +1250,15 @@ static int __init init_nfsd(void)
1250 int retval; 1250 int retval;
1251 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); 1251 printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
1252 1252
1253 retval = register_cld_notifier();
1254 if (retval)
1255 return retval;
1256 retval = register_pernet_subsys(&nfsd_net_ops); 1253 retval = register_pernet_subsys(&nfsd_net_ops);
1257 if (retval < 0) 1254 if (retval < 0)
1258 goto out_unregister_notifier; 1255 return retval;
1259 retval = nfsd4_init_slabs(); 1256 retval = register_cld_notifier();
1260 if (retval) 1257 if (retval)
1261 goto out_unregister_pernet; 1258 goto out_unregister_pernet;
1259 retval = nfsd4_init_slabs();
1260 if (retval)
1261 goto out_unregister_notifier;
1262 retval = nfsd4_init_pnfs(); 1262 retval = nfsd4_init_pnfs();
1263 if (retval) 1263 if (retval)
1264 goto out_free_slabs; 1264 goto out_free_slabs;
@@ -1290,10 +1290,10 @@ out_exit_pnfs:
1290 nfsd4_exit_pnfs(); 1290 nfsd4_exit_pnfs();
1291out_free_slabs: 1291out_free_slabs:
1292 nfsd4_free_slabs(); 1292 nfsd4_free_slabs();
1293out_unregister_pernet:
1294 unregister_pernet_subsys(&nfsd_net_ops);
1295out_unregister_notifier: 1293out_unregister_notifier:
1296 unregister_cld_notifier(); 1294 unregister_cld_notifier();
1295out_unregister_pernet:
1296 unregister_pernet_subsys(&nfsd_net_ops);
1297 return retval; 1297 return retval;
1298} 1298}
1299 1299
@@ -1308,8 +1308,8 @@ static void __exit exit_nfsd(void)
1308 nfsd4_exit_pnfs(); 1308 nfsd4_exit_pnfs();
1309 nfsd_fault_inject_cleanup(); 1309 nfsd_fault_inject_cleanup();
1310 unregister_filesystem(&nfsd_fs_type); 1310 unregister_filesystem(&nfsd_fs_type);
1311 unregister_pernet_subsys(&nfsd_net_ops);
1312 unregister_cld_notifier(); 1311 unregister_cld_notifier();
1312 unregister_pernet_subsys(&nfsd_net_ops);
1313} 1313}
1314 1314
1315MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); 1315MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 565c4da1a9eb..cf980523898b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -24,7 +24,7 @@
24#include "export.h" 24#include "export.h"
25 25
26#undef ifdebug 26#undef ifdebug
27#ifdef NFSD_DEBUG 27#ifdef CONFIG_SUNRPC_DEBUG
28# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag) 28# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
29#else 29#else
30# define ifdebug(flag) if (0) 30# define ifdebug(flag) if (0)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0bda93e58e1b..556ce2e47555 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -40,7 +40,6 @@
40#include "state.h" 40#include "state.h"
41#include "nfsd.h" 41#include "nfsd.h"
42 42
43#define NFSD4_MAX_SEC_LABEL_LEN 2048
44#define NFSD4_MAX_TAGLEN 128 43#define NFSD4_MAX_TAGLEN 128
45#define XDR_LEN(n) (((n) + 3) & ~3) 44#define XDR_LEN(n) (((n) + 3) & ~3)
46 45
@@ -683,7 +682,7 @@ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
683 struct svc_fh *current_fh, struct nfsd4_open *open); 682 struct svc_fh *current_fh, struct nfsd4_open *open);
684extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate); 683extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
685extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, 684extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
686 struct nfsd4_open *open, __be32 status); 685 struct nfsd4_open *open);
687extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, 686extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
688 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); 687 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
689extern __be32 nfsd4_close(struct svc_rqst *rqstp, 688extern __be32 nfsd4_close(struct svc_rqst *rqstp,
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 741fd02e0444..8df0f3b7839b 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -405,13 +405,14 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
405static int nilfs_palloc_count_desc_blocks(struct inode *inode, 405static int nilfs_palloc_count_desc_blocks(struct inode *inode,
406 unsigned long *desc_blocks) 406 unsigned long *desc_blocks)
407{ 407{
408 unsigned long blknum; 408 __u64 blknum;
409 int ret; 409 int ret;
410 410
411 ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); 411 ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
412 if (likely(!ret)) 412 if (likely(!ret))
413 *desc_blocks = DIV_ROUND_UP( 413 *desc_blocks = DIV_ROUND_UP(
414 blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); 414 (unsigned long)blknum,
415 NILFS_MDT(inode)->mi_blocks_per_desc_block);
415 return ret; 416 return ret;
416} 417}
417 418
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index aadbd0b5e3e8..27f75bcbeb30 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -152,9 +152,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
152 * 152 *
153 * %-EEXIST - A record associated with @key already exist. 153 * %-EEXIST - A record associated with @key already exist.
154 */ 154 */
155int nilfs_bmap_insert(struct nilfs_bmap *bmap, 155int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec)
156 unsigned long key,
157 unsigned long rec)
158{ 156{
159 int ret; 157 int ret;
160 158
@@ -191,19 +189,47 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
191 return bmap->b_ops->bop_delete(bmap, key); 189 return bmap->b_ops->bop_delete(bmap, key);
192} 190}
193 191
194int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) 192/**
193 * nilfs_bmap_seek_key - seek a valid entry and return its key
194 * @bmap: bmap struct
195 * @start: start key number
196 * @keyp: place to store valid key
197 *
198 * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap
199 * starting from @start, and stores it to @keyp if found.
200 *
201 * Return Value: On success, 0 is returned. On error, one of the following
202 * negative error codes is returned.
203 *
204 * %-EIO - I/O error.
205 *
206 * %-ENOMEM - Insufficient amount of memory available.
207 *
208 * %-ENOENT - No valid entry was found
209 */
210int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp)
195{ 211{
196 __u64 lastkey;
197 int ret; 212 int ret;
198 213
199 down_read(&bmap->b_sem); 214 down_read(&bmap->b_sem);
200 ret = bmap->b_ops->bop_last_key(bmap, &lastkey); 215 ret = bmap->b_ops->bop_seek_key(bmap, start, keyp);
216 up_read(&bmap->b_sem);
217
218 if (ret < 0)
219 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
220 return ret;
221}
222
223int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp)
224{
225 int ret;
226
227 down_read(&bmap->b_sem);
228 ret = bmap->b_ops->bop_last_key(bmap, keyp);
201 up_read(&bmap->b_sem); 229 up_read(&bmap->b_sem);
202 230
203 if (ret < 0) 231 if (ret < 0)
204 ret = nilfs_bmap_convert_error(bmap, __func__, ret); 232 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
205 else
206 *key = lastkey;
207 return ret; 233 return ret;
208} 234}
209 235
@@ -224,7 +250,7 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
224 * 250 *
225 * %-ENOENT - A record associated with @key does not exist. 251 * %-ENOENT - A record associated with @key does not exist.
226 */ 252 */
227int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) 253int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key)
228{ 254{
229 int ret; 255 int ret;
230 256
@@ -235,7 +261,7 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
235 return nilfs_bmap_convert_error(bmap, __func__, ret); 261 return nilfs_bmap_convert_error(bmap, __func__, ret);
236} 262}
237 263
238static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) 264static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key)
239{ 265{
240 __u64 lastkey; 266 __u64 lastkey;
241 int ret; 267 int ret;
@@ -276,7 +302,7 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
276 * 302 *
277 * %-ENOMEM - Insufficient amount of memory available. 303 * %-ENOMEM - Insufficient amount of memory available.
278 */ 304 */
279int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) 305int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key)
280{ 306{
281 int ret; 307 int ret;
282 308
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b89e68076adc..bfa817ce40b3 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -76,8 +76,10 @@ struct nilfs_bmap_operations {
76 union nilfs_binfo *); 76 union nilfs_binfo *);
77 int (*bop_mark)(struct nilfs_bmap *, __u64, int); 77 int (*bop_mark)(struct nilfs_bmap *, __u64, int);
78 78
79 /* The following functions are internal use only. */ 79 int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *);
80 int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); 80 int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
81
82 /* The following functions are internal use only. */
81 int (*bop_check_insert)(const struct nilfs_bmap *, __u64); 83 int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
82 int (*bop_check_delete)(struct nilfs_bmap *, __u64); 84 int (*bop_check_delete)(struct nilfs_bmap *, __u64);
83 int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); 85 int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
@@ -153,10 +155,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
153int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); 155int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
154void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); 156void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
155int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); 157int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
156int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); 158int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec);
157int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); 159int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key);
158int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); 160int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp);
159int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long); 161int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp);
162int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key);
160void nilfs_bmap_clear(struct nilfs_bmap *); 163void nilfs_bmap_clear(struct nilfs_bmap *);
161int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *); 164int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *);
162void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *); 165void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b2e3ff347620..059f37137f9a 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,6 +31,8 @@
31#include "alloc.h" 31#include "alloc.h"
32#include "dat.h" 32#include "dat.h"
33 33
34static void __nilfs_btree_init(struct nilfs_bmap *bmap);
35
34static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 36static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35{ 37{
36 struct nilfs_btree_path *path; 38 struct nilfs_btree_path *path;
@@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
368 return ret; 370 return ret;
369} 371}
370 372
373/**
374 * nilfs_btree_root_broken - verify consistency of btree root node
375 * @node: btree root node to be examined
376 * @ino: inode number
377 *
378 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
379 */
380static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
381 unsigned long ino)
382{
383 int level, flags, nchildren;
384 int ret = 0;
385
386 level = nilfs_btree_node_get_level(node);
387 flags = nilfs_btree_node_get_flags(node);
388 nchildren = nilfs_btree_node_get_nchildren(node);
389
390 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
391 level > NILFS_BTREE_LEVEL_MAX ||
392 nchildren < 0 ||
393 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
394 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
395 ino, level, flags, nchildren);
396 ret = 1;
397 }
398 return ret;
399}
400
371int nilfs_btree_broken_node_block(struct buffer_head *bh) 401int nilfs_btree_broken_node_block(struct buffer_head *bh)
372{ 402{
373 int ret; 403 int ret;
@@ -603,6 +633,44 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
603 return 0; 633 return 0;
604} 634}
605 635
636/**
637 * nilfs_btree_get_next_key - get next valid key from btree path array
638 * @btree: bmap struct of btree
639 * @path: array of nilfs_btree_path struct
640 * @minlevel: start level
641 * @nextkey: place to store the next valid key
642 *
643 * Return Value: If a next key was found, 0 is returned. Otherwise,
644 * -ENOENT is returned.
645 */
646static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
647 const struct nilfs_btree_path *path,
648 int minlevel, __u64 *nextkey)
649{
650 struct nilfs_btree_node *node;
651 int maxlevel = nilfs_btree_height(btree) - 1;
652 int index, next_adj, level;
653
654 /* Next index is already set to bp_index for leaf nodes. */
655 next_adj = 0;
656 for (level = minlevel; level <= maxlevel; level++) {
657 if (level == maxlevel)
658 node = nilfs_btree_get_root(btree);
659 else
660 node = nilfs_btree_get_nonroot_node(path, level);
661
662 index = path[level].bp_index + next_adj;
663 if (index < nilfs_btree_node_get_nchildren(node)) {
664 /* Next key is in this node */
665 *nextkey = nilfs_btree_node_get_key(node, index);
666 return 0;
667 }
668 /* For non-leaf nodes, next index is stored at bp_index + 1. */
669 next_adj = 1;
670 }
671 return -ENOENT;
672}
673
606static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 674static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
607 __u64 key, int level, __u64 *ptrp) 675 __u64 key, int level, __u64 *ptrp)
608{ 676{
@@ -1533,6 +1601,27 @@ out:
1533 return ret; 1601 return ret;
1534} 1602}
1535 1603
1604static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1605 __u64 *keyp)
1606{
1607 struct nilfs_btree_path *path;
1608 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1609 int ret;
1610
1611 path = nilfs_btree_alloc_path();
1612 if (!path)
1613 return -ENOMEM;
1614
1615 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1616 if (!ret)
1617 *keyp = start;
1618 else if (ret == -ENOENT)
1619 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1620
1621 nilfs_btree_free_path(path);
1622 return ret;
1623}
1624
1536static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1625static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1537{ 1626{
1538 struct nilfs_btree_path *path; 1627 struct nilfs_btree_path *path;
@@ -1713,7 +1802,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1713 1802
1714 /* convert and insert */ 1803 /* convert and insert */
1715 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1804 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1716 nilfs_btree_init(btree); 1805 __nilfs_btree_init(btree);
1717 if (nreq != NULL) { 1806 if (nreq != NULL) {
1718 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1807 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1719 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1808 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
@@ -2268,7 +2357,9 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = {
2268 .bop_assign = nilfs_btree_assign, 2357 .bop_assign = nilfs_btree_assign,
2269 .bop_mark = nilfs_btree_mark, 2358 .bop_mark = nilfs_btree_mark,
2270 2359
2360 .bop_seek_key = nilfs_btree_seek_key,
2271 .bop_last_key = nilfs_btree_last_key, 2361 .bop_last_key = nilfs_btree_last_key,
2362
2272 .bop_check_insert = NULL, 2363 .bop_check_insert = NULL,
2273 .bop_check_delete = nilfs_btree_check_delete, 2364 .bop_check_delete = nilfs_btree_check_delete,
2274 .bop_gather_data = nilfs_btree_gather_data, 2365 .bop_gather_data = nilfs_btree_gather_data,
@@ -2288,18 +2379,31 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2288 .bop_assign = nilfs_btree_assign_gc, 2379 .bop_assign = nilfs_btree_assign_gc,
2289 .bop_mark = NULL, 2380 .bop_mark = NULL,
2290 2381
2382 .bop_seek_key = NULL,
2291 .bop_last_key = NULL, 2383 .bop_last_key = NULL,
2384
2292 .bop_check_insert = NULL, 2385 .bop_check_insert = NULL,
2293 .bop_check_delete = NULL, 2386 .bop_check_delete = NULL,
2294 .bop_gather_data = NULL, 2387 .bop_gather_data = NULL,
2295}; 2388};
2296 2389
2297int nilfs_btree_init(struct nilfs_bmap *bmap) 2390static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2298{ 2391{
2299 bmap->b_ops = &nilfs_btree_ops; 2392 bmap->b_ops = &nilfs_btree_ops;
2300 bmap->b_nchildren_per_block = 2393 bmap->b_nchildren_per_block =
2301 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2394 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2302 return 0; 2395}
2396
2397int nilfs_btree_init(struct nilfs_bmap *bmap)
2398{
2399 int ret = 0;
2400
2401 __nilfs_btree_init(bmap);
2402
2403 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
2404 bmap->b_inode->i_ino))
2405 ret = -EIO;
2406 return ret;
2303} 2407}
2304 2408
2305void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2409void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 0d58075f34e2..b6596cab9e99 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -53,6 +53,13 @@ nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno)
53 return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); 53 return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
54} 54}
55 55
56static __u64 nilfs_cpfile_first_checkpoint_in_block(const struct inode *cpfile,
57 unsigned long blkoff)
58{
59 return (__u64)nilfs_cpfile_checkpoints_per_block(cpfile) * blkoff
60 + 1 - NILFS_MDT(cpfile)->mi_first_entry_offset;
61}
62
56static unsigned long 63static unsigned long
57nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile, 64nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile,
58 __u64 curr, 65 __u64 curr,
@@ -146,6 +153,44 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
146 create, nilfs_cpfile_block_init, bhp); 153 create, nilfs_cpfile_block_init, bhp);
147} 154}
148 155
156/**
157 * nilfs_cpfile_find_checkpoint_block - find and get a buffer on cpfile
158 * @cpfile: inode of cpfile
159 * @start_cno: start checkpoint number (inclusive)
160 * @end_cno: end checkpoint number (inclusive)
161 * @cnop: place to store the next checkpoint number
162 * @bhp: place to store a pointer to buffer_head struct
163 *
164 * Return Value: On success, it returns 0. On error, the following negative
165 * error code is returned.
166 *
167 * %-ENOMEM - Insufficient memory available.
168 *
169 * %-EIO - I/O error
170 *
171 * %-ENOENT - no block exists in the range.
172 */
173static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile,
174 __u64 start_cno, __u64 end_cno,
175 __u64 *cnop,
176 struct buffer_head **bhp)
177{
178 unsigned long start, end, blkoff;
179 int ret;
180
181 if (unlikely(start_cno > end_cno))
182 return -ENOENT;
183
184 start = nilfs_cpfile_get_blkoff(cpfile, start_cno);
185 end = nilfs_cpfile_get_blkoff(cpfile, end_cno);
186
187 ret = nilfs_mdt_find_block(cpfile, start, end, &blkoff, bhp);
188 if (!ret)
189 *cnop = (blkoff == start) ? start_cno :
190 nilfs_cpfile_first_checkpoint_in_block(cpfile, blkoff);
191 return ret;
192}
193
149static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, 194static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
150 __u64 cno) 195 __u64 cno)
151{ 196{
@@ -403,14 +448,15 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
403 return -ENOENT; /* checkpoint number 0 is invalid */ 448 return -ENOENT; /* checkpoint number 0 is invalid */
404 down_read(&NILFS_MDT(cpfile)->mi_sem); 449 down_read(&NILFS_MDT(cpfile)->mi_sem);
405 450
406 for (n = 0; cno < cur_cno && n < nci; cno += ncps) { 451 for (n = 0; n < nci; cno += ncps) {
407 ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); 452 ret = nilfs_cpfile_find_checkpoint_block(
408 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 453 cpfile, cno, cur_cno - 1, &cno, &bh);
409 if (ret < 0) { 454 if (ret < 0) {
410 if (ret != -ENOENT) 455 if (likely(ret == -ENOENT))
411 goto out; 456 break;
412 continue; /* skip hole */ 457 goto out;
413 } 458 }
459 ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
414 460
415 kaddr = kmap_atomic(bh->b_page); 461 kaddr = kmap_atomic(bh->b_page);
416 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 462 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 82f4865e86dd..ebf89fd8ac1a 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -173,6 +173,21 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
173 return ret; 173 return ret;
174} 174}
175 175
176static int nilfs_direct_seek_key(const struct nilfs_bmap *direct, __u64 start,
177 __u64 *keyp)
178{
179 __u64 key;
180
181 for (key = start; key <= NILFS_DIRECT_KEY_MAX; key++) {
182 if (nilfs_direct_get_ptr(direct, key) !=
183 NILFS_BMAP_INVALID_PTR) {
184 *keyp = key;
185 return 0;
186 }
187 }
188 return -ENOENT;
189}
190
176static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) 191static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
177{ 192{
178 __u64 key, lastkey; 193 __u64 key, lastkey;
@@ -355,7 +370,9 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = {
355 .bop_assign = nilfs_direct_assign, 370 .bop_assign = nilfs_direct_assign,
356 .bop_mark = NULL, 371 .bop_mark = NULL,
357 372
373 .bop_seek_key = nilfs_direct_seek_key,
358 .bop_last_key = nilfs_direct_last_key, 374 .bop_last_key = nilfs_direct_last_key,
375
359 .bop_check_insert = nilfs_direct_check_insert, 376 .bop_check_insert = nilfs_direct_check_insert,
360 .bop_check_delete = NULL, 377 .bop_check_delete = NULL,
361 .bop_gather_data = nilfs_direct_gather_data, 378 .bop_gather_data = nilfs_direct_gather_data,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a8c728acb7a8..54575e3cc1a2 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -143,8 +143,6 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
143 */ 143 */
144const struct file_operations nilfs_file_operations = { 144const struct file_operations nilfs_file_operations = {
145 .llseek = generic_file_llseek, 145 .llseek = generic_file_llseek,
146 .read = new_sync_read,
147 .write = new_sync_write,
148 .read_iter = generic_file_read_iter, 146 .read_iter = generic_file_read_iter,
149 .write_iter = generic_file_write_iter, 147 .write_iter = generic_file_write_iter,
150 .unlocked_ioctl = nilfs_ioctl, 148 .unlocked_ioctl = nilfs_ioctl,
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b5969538f39..be936df4ba73 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/aio.h> 29#include <linux/uio.h>
30#include "nilfs.h" 30#include "nilfs.h"
31#include "btnode.h" 31#include "btnode.h"
32#include "segment.h" 32#include "segment.h"
@@ -106,7 +106,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
106 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 106 err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
107 if (unlikely(err)) 107 if (unlikely(err))
108 goto out; 108 goto out;
109 err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, 109 err = nilfs_bmap_insert(ii->i_bmap, blkoff,
110 (unsigned long)bh_result); 110 (unsigned long)bh_result);
111 if (unlikely(err != 0)) { 111 if (unlikely(err != 0)) {
112 if (err == -EEXIST) { 112 if (err == -EEXIST) {
@@ -305,8 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
305} 305}
306 306
307static ssize_t 307static ssize_t
308nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 308nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
309 loff_t offset)
310{ 309{
311 struct file *file = iocb->ki_filp; 310 struct file *file = iocb->ki_filp;
312 struct address_space *mapping = file->f_mapping; 311 struct address_space *mapping = file->f_mapping;
@@ -314,18 +313,17 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
314 size_t count = iov_iter_count(iter); 313 size_t count = iov_iter_count(iter);
315 ssize_t size; 314 ssize_t size;
316 315
317 if (rw == WRITE) 316 if (iov_iter_rw(iter) == WRITE)
318 return 0; 317 return 0;
319 318
320 /* Needs synchronization with the cleaner */ 319 /* Needs synchronization with the cleaner */
321 size = blockdev_direct_IO(rw, iocb, inode, iter, offset, 320 size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block);
322 nilfs_get_block);
323 321
324 /* 322 /*
325 * In case of error extending write may have instantiated a few 323 * In case of error extending write may have instantiated a few
326 * blocks outside i_size. Trim these off again. 324 * blocks outside i_size. Trim these off again.
327 */ 325 */
328 if (unlikely((rw & WRITE) && size < 0)) { 326 if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) {
329 loff_t isize = i_size_read(inode); 327 loff_t isize = i_size_read(inode);
330 loff_t end = offset + count; 328 loff_t end = offset + count;
331 329
@@ -443,21 +441,20 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
443void nilfs_set_inode_flags(struct inode *inode) 441void nilfs_set_inode_flags(struct inode *inode)
444{ 442{
445 unsigned int flags = NILFS_I(inode)->i_flags; 443 unsigned int flags = NILFS_I(inode)->i_flags;
444 unsigned int new_fl = 0;
446 445
447 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
448 S_DIRSYNC);
449 if (flags & FS_SYNC_FL) 446 if (flags & FS_SYNC_FL)
450 inode->i_flags |= S_SYNC; 447 new_fl |= S_SYNC;
451 if (flags & FS_APPEND_FL) 448 if (flags & FS_APPEND_FL)
452 inode->i_flags |= S_APPEND; 449 new_fl |= S_APPEND;
453 if (flags & FS_IMMUTABLE_FL) 450 if (flags & FS_IMMUTABLE_FL)
454 inode->i_flags |= S_IMMUTABLE; 451 new_fl |= S_IMMUTABLE;
455 if (flags & FS_NOATIME_FL) 452 if (flags & FS_NOATIME_FL)
456 inode->i_flags |= S_NOATIME; 453 new_fl |= S_NOATIME;
457 if (flags & FS_DIRSYNC_FL) 454 if (flags & FS_DIRSYNC_FL)
458 inode->i_flags |= S_DIRSYNC; 455 new_fl |= S_DIRSYNC;
459 mapping_set_gfp_mask(inode->i_mapping, 456 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
460 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 457 S_NOATIME | S_DIRSYNC);
461} 458}
462 459
463int nilfs_read_inode_common(struct inode *inode, 460int nilfs_read_inode_common(struct inode *inode,
@@ -542,6 +539,8 @@ static int __nilfs_read_inode(struct super_block *sb,
542 brelse(bh); 539 brelse(bh);
543 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 540 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
544 nilfs_set_inode_flags(inode); 541 nilfs_set_inode_flags(inode);
542 mapping_set_gfp_mask(inode->i_mapping,
543 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
545 return 0; 544 return 0;
546 545
547 failed_unmap: 546 failed_unmap:
@@ -714,7 +713,7 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
714static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 713static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
715 unsigned long from) 714 unsigned long from)
716{ 715{
717 unsigned long b; 716 __u64 b;
718 int ret; 717 int ret;
719 718
720 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 719 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
@@ -729,7 +728,7 @@ repeat:
729 if (b < from) 728 if (b < from)
730 return; 729 return;
731 730
732 b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 731 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
733 ret = nilfs_bmap_truncate(ii->i_bmap, b); 732 ret = nilfs_bmap_truncate(ii->i_bmap, b);
734 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 733 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
735 if (!ret || (ret == -ENOMEM && 734 if (!ret || (ret == -ENOMEM &&
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 892cf5ffdb8e..dee34d990281 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -261,6 +261,60 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
261} 261}
262 262
263/** 263/**
264 * nilfs_mdt_find_block - find and get a buffer on meta data file.
265 * @inode: inode of the meta data file
266 * @start: start block offset (inclusive)
267 * @end: end block offset (inclusive)
268 * @blkoff: block offset
269 * @out_bh: place to store a pointer to buffer_head struct
270 *
271 * nilfs_mdt_find_block() looks up an existing block in range of
272 * [@start, @end] and stores pointer to a buffer head of the block to
273 * @out_bh, and block offset to @blkoff, respectively. @out_bh and
274 * @blkoff are substituted only when zero is returned.
275 *
276 * Return Value: On success, it returns 0. On error, the following negative
277 * error code is returned.
278 *
279 * %-ENOMEM - Insufficient memory available.
280 *
281 * %-EIO - I/O error
282 *
283 * %-ENOENT - no block was found in the range
284 */
285int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
286 unsigned long end, unsigned long *blkoff,
287 struct buffer_head **out_bh)
288{
289 __u64 next;
290 int ret;
291
292 if (unlikely(start > end))
293 return -ENOENT;
294
295 ret = nilfs_mdt_read_block(inode, start, true, out_bh);
296 if (!ret) {
297 *blkoff = start;
298 goto out;
299 }
300 if (unlikely(ret != -ENOENT || start == ULONG_MAX))
301 goto out;
302
303 ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
304 if (!ret) {
305 if (next <= end) {
306 ret = nilfs_mdt_read_block(inode, next, true, out_bh);
307 if (!ret)
308 *blkoff = next;
309 } else {
310 ret = -ENOENT;
311 }
312 }
313out:
314 return ret;
315}
316
317/**
264 * nilfs_mdt_delete_block - make a hole on the meta data file. 318 * nilfs_mdt_delete_block - make a hole on the meta data file.
265 * @inode: inode of the meta data file 319 * @inode: inode of the meta data file
266 * @block: block offset 320 * @block: block offset
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ab172e8549c5..fe529a87a208 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -78,6 +78,9 @@ int nilfs_mdt_get_block(struct inode *, unsigned long, int,
78 void (*init_block)(struct inode *, 78 void (*init_block)(struct inode *,
79 struct buffer_head *, void *), 79 struct buffer_head *, void *),
80 struct buffer_head **); 80 struct buffer_head **);
81int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
82 unsigned long end, unsigned long *blkoff,
83 struct buffer_head **out_bh);
81int nilfs_mdt_delete_block(struct inode *, unsigned long); 84int nilfs_mdt_delete_block(struct inode *, unsigned long);
82int nilfs_mdt_forget_block(struct inode *, unsigned long); 85int nilfs_mdt_forget_block(struct inode *, unsigned long);
83int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); 86int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
@@ -111,7 +114,10 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
111 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; 114 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 115}
113 116
114#define nilfs_mdt_bgl_lock(inode, bg) \ 117static inline spinlock_t *
115 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) 118nilfs_mdt_bgl_lock(struct inode *inode, unsigned int block_group)
119{
120 return bgl_lock_ptr(NILFS_MDT(inode)->mi_bgl, block_group);
121}
116 122
117#endif /* _NILFS_MDT_H */ 123#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 700ecbcca55d..45d650addd56 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -89,18 +89,16 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
89void nilfs_forget_buffer(struct buffer_head *bh) 89void nilfs_forget_buffer(struct buffer_head *bh)
90{ 90{
91 struct page *page = bh->b_page; 91 struct page *page = bh->b_page;
92 const unsigned long clear_bits =
93 (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
94 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
95 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
92 96
93 lock_buffer(bh); 97 lock_buffer(bh);
94 clear_buffer_nilfs_volatile(bh); 98 set_mask_bits(&bh->b_state, clear_bits, 0);
95 clear_buffer_nilfs_checked(bh);
96 clear_buffer_nilfs_redirected(bh);
97 clear_buffer_async_write(bh);
98 clear_buffer_dirty(bh);
99 if (nilfs_page_buffers_clean(page)) 99 if (nilfs_page_buffers_clean(page))
100 __nilfs_clear_page_dirty(page); 100 __nilfs_clear_page_dirty(page);
101 101
102 clear_buffer_uptodate(bh);
103 clear_buffer_mapped(bh);
104 bh->b_blocknr = -1; 102 bh->b_blocknr = -1;
105 ClearPageUptodate(page); 103 ClearPageUptodate(page);
106 ClearPageMappedToDisk(page); 104 ClearPageMappedToDisk(page);
@@ -421,6 +419,10 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
421 419
422 if (page_has_buffers(page)) { 420 if (page_has_buffers(page)) {
423 struct buffer_head *bh, *head; 421 struct buffer_head *bh, *head;
422 const unsigned long clear_bits =
423 (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
424 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
425 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
424 426
425 bh = head = page_buffers(page); 427 bh = head = page_buffers(page);
426 do { 428 do {
@@ -430,13 +432,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
430 "discard block %llu, size %zu", 432 "discard block %llu, size %zu",
431 (u64)bh->b_blocknr, bh->b_size); 433 (u64)bh->b_blocknr, bh->b_size);
432 } 434 }
433 clear_buffer_async_write(bh); 435 set_mask_bits(&bh->b_state, clear_bits, 0);
434 clear_buffer_dirty(bh);
435 clear_buffer_nilfs_volatile(bh);
436 clear_buffer_nilfs_checked(bh);
437 clear_buffer_nilfs_redirected(bh);
438 clear_buffer_uptodate(bh);
439 clear_buffer_mapped(bh);
440 unlock_buffer(bh); 436 unlock_buffer(bh);
441 } while (bh = bh->b_this_page, bh != head); 437 } while (bh = bh->b_this_page, bh != head);
442 } 438 }
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 469086b9f99b..c6abbad9b8e3 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -24,6 +24,7 @@
24#include <linux/pagemap.h> 24#include <linux/pagemap.h>
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include <linux/writeback.h> 26#include <linux/writeback.h>
27#include <linux/bitops.h>
27#include <linux/bio.h> 28#include <linux/bio.h>
28#include <linux/completion.h> 29#include <linux/completion.h>
29#include <linux/blkdev.h> 30#include <linux/blkdev.h>
@@ -1588,7 +1589,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1588 1589
1589 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1590 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1590 b_assoc_buffers) { 1591 b_assoc_buffers) {
1591 set_buffer_async_write(bh);
1592 if (bh->b_page != bd_page) { 1592 if (bh->b_page != bd_page) {
1593 if (bd_page) { 1593 if (bd_page) {
1594 lock_page(bd_page); 1594 lock_page(bd_page);
@@ -1688,7 +1688,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
1688 list_for_each_entry(segbuf, logs, sb_list) { 1688 list_for_each_entry(segbuf, logs, sb_list) {
1689 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1689 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1690 b_assoc_buffers) { 1690 b_assoc_buffers) {
1691 clear_buffer_async_write(bh);
1692 if (bh->b_page != bd_page) { 1691 if (bh->b_page != bd_page) {
1693 if (bd_page) 1692 if (bd_page)
1694 end_page_writeback(bd_page); 1693 end_page_writeback(bd_page);
@@ -1768,7 +1767,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1768 b_assoc_buffers) { 1767 b_assoc_buffers) {
1769 set_buffer_uptodate(bh); 1768 set_buffer_uptodate(bh);
1770 clear_buffer_dirty(bh); 1769 clear_buffer_dirty(bh);
1771 clear_buffer_async_write(bh);
1772 if (bh->b_page != bd_page) { 1770 if (bh->b_page != bd_page) {
1773 if (bd_page) 1771 if (bd_page)
1774 end_page_writeback(bd_page); 1772 end_page_writeback(bd_page);
@@ -1788,12 +1786,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1788 */ 1786 */
1789 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1787 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1790 b_assoc_buffers) { 1788 b_assoc_buffers) {
1791 set_buffer_uptodate(bh); 1789 const unsigned long set_bits = (1 << BH_Uptodate);
1792 clear_buffer_dirty(bh); 1790 const unsigned long clear_bits =
1793 clear_buffer_async_write(bh); 1791 (1 << BH_Dirty | 1 << BH_Async_Write |
1794 clear_buffer_delay(bh); 1792 1 << BH_Delay | 1 << BH_NILFS_Volatile |
1795 clear_buffer_nilfs_volatile(bh); 1793 1 << BH_NILFS_Redirected);
1796 clear_buffer_nilfs_redirected(bh); 1794
1795 set_mask_bits(&bh->b_state, clear_bits, set_bits);
1797 if (bh == segbuf->sb_super_root) { 1796 if (bh == segbuf->sb_super_root) {
1798 if (bh->b_page != bd_page) { 1797 if (bh->b_page != bd_page) {
1799 end_page_writeback(bd_page); 1798 end_page_writeback(bd_page);
@@ -1907,6 +1906,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1907 struct the_nilfs *nilfs) 1906 struct the_nilfs *nilfs)
1908{ 1907{
1909 struct nilfs_inode_info *ii, *n; 1908 struct nilfs_inode_info *ii, *n;
1909 int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
1910 int defer_iput = false; 1910 int defer_iput = false;
1911 1911
1912 spin_lock(&nilfs->ns_inode_lock); 1912 spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1919,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
1919 brelse(ii->i_bh); 1919 brelse(ii->i_bh);
1920 ii->i_bh = NULL; 1920 ii->i_bh = NULL;
1921 list_del_init(&ii->i_dirty); 1921 list_del_init(&ii->i_dirty);
1922 if (!ii->vfs_inode.i_nlink) { 1922 if (!ii->vfs_inode.i_nlink || during_mount) {
1923 /* 1923 /*
1924 * Defer calling iput() to avoid a deadlock 1924 * Defer calling iput() to avoid deadlocks if
1925 * over I_SYNC flag for inodes with i_nlink == 0 1925 * i_nlink == 0 or mount is not yet finished.
1926 */ 1926 */
1927 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); 1927 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
1928 defer_iput = true; 1928 defer_iput = true;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5bc2a1cf73c3..c1725f20a9d1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1020,7 +1020,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
1020 struct dentry *dentry; 1020 struct dentry *dentry;
1021 int ret; 1021 int ret;
1022 1022
1023 if (cno < 0 || cno > nilfs->ns_cno) 1023 if (cno > nilfs->ns_cno)
1024 return false; 1024 return false;
1025 1025
1026 if (cno >= nilfs_last_cno(nilfs)) 1026 if (cno >= nilfs_last_cno(nilfs))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9a66ff79ff27..d2f97ecca6a5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) 143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
144 return false; 144 return false;
145 145
146 if (event_mask & marks_mask & ~marks_ignored_mask) 146 if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
147 ~marks_ignored_mask)
147 return true; 148 return true;
148 149
149 return false; 150 return false;
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529511c4..2ff263e6d363 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
8 8
9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ccflags-y := -DNTFS_VERSION=\"2.1.31\" 11ccflags-y := -DNTFS_VERSION=\"2.1.32\"
12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d184dc..7bb487e663b4 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
28#include <linux/swap.h> 28#include <linux/swap.h>
29#include <linux/uio.h> 29#include <linux/uio.h>
30#include <linux/writeback.h> 30#include <linux/writeback.h>
31#include <linux/aio.h>
32 31
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -329,62 +328,166 @@ err_out:
329 return err; 328 return err;
330} 329}
331 330
332/** 331static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
333 * ntfs_fault_in_pages_readable - 332 struct iov_iter *from)
334 *
335 * Fault a number of userspace pages into pagetables.
336 *
337 * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
338 * with more than two userspace pages as well as handling the single page case
339 * elegantly.
340 *
341 * If you find this difficult to understand, then think of the while loop being
342 * the following code, except that we do without the integer variable ret:
343 *
344 * do {
345 * ret = __get_user(c, uaddr);
346 * uaddr += PAGE_SIZE;
347 * } while (!ret && uaddr < end);
348 *
349 * Note, the final __get_user() may well run out-of-bounds of the user buffer,
350 * but _not_ out-of-bounds of the page the user buffer belongs to, and since
351 * this is only a read and not a write, and since it is still in the same page,
352 * it should not matter and this makes the code much simpler.
353 */
354static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
355 int bytes)
356{
357 const char __user *end;
358 volatile char c;
359
360 /* Set @end to the first byte outside the last page we care about. */
361 end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
362
363 while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
364 ;
365}
366
367/**
368 * ntfs_fault_in_pages_readable_iovec -
369 *
370 * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
371 */
372static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
373 size_t iov_ofs, int bytes)
374{ 333{
375 do { 334 loff_t pos;
376 const char __user *buf; 335 s64 end, ll;
377 unsigned len; 336 ssize_t err;
337 unsigned long flags;
338 struct file *file = iocb->ki_filp;
339 struct inode *vi = file_inode(file);
340 ntfs_inode *base_ni, *ni = NTFS_I(vi);
341 ntfs_volume *vol = ni->vol;
378 342
379 buf = iov->iov_base + iov_ofs; 343 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
380 len = iov->iov_len - iov_ofs; 344 "0x%llx, count 0x%zx.", vi->i_ino,
381 if (len > bytes) 345 (unsigned)le32_to_cpu(ni->type),
382 len = bytes; 346 (unsigned long long)iocb->ki_pos,
383 ntfs_fault_in_pages_readable(buf, len); 347 iov_iter_count(from));
384 bytes -= len; 348 err = generic_write_checks(iocb, from);
385 iov++; 349 if (unlikely(err <= 0))
386 iov_ofs = 0; 350 goto out;
387 } while (bytes); 351 /*
352 * All checks have passed. Before we start doing any writing we want
353 * to abort any totally illegal writes.
354 */
355 BUG_ON(NInoMstProtected(ni));
356 BUG_ON(ni->type != AT_DATA);
357 /* If file is encrypted, deny access, just like NT4. */
358 if (NInoEncrypted(ni)) {
359 /* Only $DATA attributes can be encrypted. */
360 /*
361 * Reminder for later: Encrypted files are _always_
362 * non-resident so that the content can always be encrypted.
363 */
364 ntfs_debug("Denying write access to encrypted file.");
365 err = -EACCES;
366 goto out;
367 }
368 if (NInoCompressed(ni)) {
369 /* Only unnamed $DATA attribute can be compressed. */
370 BUG_ON(ni->name_len);
371 /*
372 * Reminder for later: If resident, the data is not actually
373 * compressed. Only on the switch to non-resident does
374 * compression kick in. This is in contrast to encrypted files
375 * (see above).
376 */
377 ntfs_error(vi->i_sb, "Writing to compressed files is not "
378 "implemented yet. Sorry.");
379 err = -EOPNOTSUPP;
380 goto out;
381 }
382 base_ni = ni;
383 if (NInoAttr(ni))
384 base_ni = ni->ext.base_ntfs_ino;
385 err = file_remove_suid(file);
386 if (unlikely(err))
387 goto out;
388 /*
389 * Our ->update_time method always succeeds thus file_update_time()
390 * cannot fail either so there is no need to check the return code.
391 */
392 file_update_time(file);
393 pos = iocb->ki_pos;
394 /* The first byte after the last cluster being written to. */
395 end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
396 ~(u64)vol->cluster_size_mask;
397 /*
398 * If the write goes beyond the allocated size, extend the allocation
399 * to cover the whole of the write, rounded up to the nearest cluster.
400 */
401 read_lock_irqsave(&ni->size_lock, flags);
402 ll = ni->allocated_size;
403 read_unlock_irqrestore(&ni->size_lock, flags);
404 if (end > ll) {
405 /*
406 * Extend the allocation without changing the data size.
407 *
408 * Note we ensure the allocation is big enough to at least
409 * write some data but we do not require the allocation to be
410 * complete, i.e. it may be partial.
411 */
412 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
413 if (likely(ll >= 0)) {
414 BUG_ON(pos >= ll);
415 /* If the extension was partial truncate the write. */
416 if (end > ll) {
417 ntfs_debug("Truncating write to inode 0x%lx, "
418 "attribute type 0x%x, because "
419 "the allocation was only "
420 "partially extended.",
421 vi->i_ino, (unsigned)
422 le32_to_cpu(ni->type));
423 iov_iter_truncate(from, ll - pos);
424 }
425 } else {
426 err = ll;
427 read_lock_irqsave(&ni->size_lock, flags);
428 ll = ni->allocated_size;
429 read_unlock_irqrestore(&ni->size_lock, flags);
430 /* Perform a partial write if possible or fail. */
431 if (pos < ll) {
432 ntfs_debug("Truncating write to inode 0x%lx "
433 "attribute type 0x%x, because "
434 "extending the allocation "
435 "failed (error %d).",
436 vi->i_ino, (unsigned)
437 le32_to_cpu(ni->type),
438 (int)-err);
439 iov_iter_truncate(from, ll - pos);
440 } else {
441 if (err != -ENOSPC)
442 ntfs_error(vi->i_sb, "Cannot perform "
443 "write to inode "
444 "0x%lx, attribute "
445 "type 0x%x, because "
446 "extending the "
447 "allocation failed "
448 "(error %ld).",
449 vi->i_ino, (unsigned)
450 le32_to_cpu(ni->type),
451 (long)-err);
452 else
453 ntfs_debug("Cannot perform write to "
454 "inode 0x%lx, "
455 "attribute type 0x%x, "
456 "because there is not "
457 "space left.",
458 vi->i_ino, (unsigned)
459 le32_to_cpu(ni->type));
460 goto out;
461 }
462 }
463 }
464 /*
465 * If the write starts beyond the initialized size, extend it up to the
466 * beginning of the write and initialize all non-sparse space between
467 * the old initialized size and the new one. This automatically also
468 * increments the vfs inode->i_size to keep it above or equal to the
469 * initialized_size.
470 */
471 read_lock_irqsave(&ni->size_lock, flags);
472 ll = ni->initialized_size;
473 read_unlock_irqrestore(&ni->size_lock, flags);
474 if (pos > ll) {
475 /*
476 * Wait for ongoing direct i/o to complete before proceeding.
477 * New direct i/o cannot start as we hold i_mutex.
478 */
479 inode_dio_wait(vi);
480 err = ntfs_attr_extend_initialized(ni, pos);
481 if (unlikely(err < 0))
482 ntfs_error(vi->i_sb, "Cannot perform write to inode "
483 "0x%lx, attribute type 0x%x, because "
484 "extending the initialized size "
485 "failed (error %d).", vi->i_ino,
486 (unsigned)le32_to_cpu(ni->type),
487 (int)-err);
488 }
489out:
490 return err;
388} 491}
389 492
390/** 493/**
@@ -421,8 +524,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
421 goto err_out; 524 goto err_out;
422 } 525 }
423 } 526 }
424 err = add_to_page_cache_lru(*cached_page, mapping, index, 527 err = add_to_page_cache_lru(*cached_page, mapping,
425 GFP_KERNEL); 528 index, GFP_KERNEL);
426 if (unlikely(err)) { 529 if (unlikely(err)) {
427 if (err == -EEXIST) 530 if (err == -EEXIST)
428 continue; 531 continue;
@@ -1268,180 +1371,6 @@ rl_not_mapped_enoent:
1268 return err; 1371 return err;
1269} 1372}
1270 1373
1271/*
1272 * Copy as much as we can into the pages and return the number of bytes which
1273 * were successfully copied. If a fault is encountered then clear the pages
1274 * out to (ofs + bytes) and return the number of bytes which were copied.
1275 */
1276static inline size_t ntfs_copy_from_user(struct page **pages,
1277 unsigned nr_pages, unsigned ofs, const char __user *buf,
1278 size_t bytes)
1279{
1280 struct page **last_page = pages + nr_pages;
1281 char *addr;
1282 size_t total = 0;
1283 unsigned len;
1284 int left;
1285
1286 do {
1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes)
1289 len = bytes;
1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr);
1293 if (unlikely(left)) {
1294 /* Do it the slow way. */
1295 addr = kmap(*pages);
1296 left = __copy_from_user(addr + ofs, buf, len);
1297 kunmap(*pages);
1298 if (unlikely(left))
1299 goto err_out;
1300 }
1301 total += len;
1302 bytes -= len;
1303 if (!bytes)
1304 break;
1305 buf += len;
1306 ofs = 0;
1307 } while (++pages < last_page);
1308out:
1309 return total;
1310err_out:
1311 total += len - left;
1312 /* Zero the rest of the target like __copy_from_user(). */
1313 while (++pages < last_page) {
1314 bytes -= len;
1315 if (!bytes)
1316 break;
1317 len = PAGE_CACHE_SIZE;
1318 if (len > bytes)
1319 len = bytes;
1320 zero_user(*pages, 0, len);
1321 }
1322 goto out;
1323}
1324
1325static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
1326 const struct iovec *iov, size_t iov_ofs, size_t bytes)
1327{
1328 size_t total = 0;
1329
1330 while (1) {
1331 const char __user *buf = iov->iov_base + iov_ofs;
1332 unsigned len;
1333 size_t left;
1334
1335 len = iov->iov_len - iov_ofs;
1336 if (len > bytes)
1337 len = bytes;
1338 left = __copy_from_user_inatomic(vaddr, buf, len);
1339 total += len;
1340 bytes -= len;
1341 vaddr += len;
1342 if (unlikely(left)) {
1343 total -= left;
1344 break;
1345 }
1346 if (!bytes)
1347 break;
1348 iov++;
1349 iov_ofs = 0;
1350 }
1351 return total;
1352}
1353
1354static inline void ntfs_set_next_iovec(const struct iovec **iovp,
1355 size_t *iov_ofsp, size_t bytes)
1356{
1357 const struct iovec *iov = *iovp;
1358 size_t iov_ofs = *iov_ofsp;
1359
1360 while (bytes) {
1361 unsigned len;
1362
1363 len = iov->iov_len - iov_ofs;
1364 if (len > bytes)
1365 len = bytes;
1366 bytes -= len;
1367 iov_ofs += len;
1368 if (iov->iov_len == iov_ofs) {
1369 iov++;
1370 iov_ofs = 0;
1371 }
1372 }
1373 *iovp = iov;
1374 *iov_ofsp = iov_ofs;
1375}
1376
1377/*
1378 * This has the same side-effects and return value as ntfs_copy_from_user().
1379 * The difference is that on a fault we need to memset the remainder of the
1380 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
1381 * single-segment behaviour.
1382 *
1383 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
1384 * atomic and when not atomic. This is ok because it calls
1385 * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
1386 * fact, the only difference between __copy_from_user_inatomic() and
1387 * __copy_from_user() is that the latter calls might_sleep() and the former
1388 * should not zero the tail of the buffer on error. And on many architectures
1389 * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
1390 * makes no difference at all on those architectures.
1391 */
1392static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1393 unsigned nr_pages, unsigned ofs, const struct iovec **iov,
1394 size_t *iov_ofs, size_t bytes)
1395{
1396 struct page **last_page = pages + nr_pages;
1397 char *addr;
1398 size_t copied, len, total = 0;
1399
1400 do {
1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes)
1403 len = bytes;
1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */
1410 addr = kmap(*pages);
1411 copied = __ntfs_copy_from_user_iovec_inatomic(addr +
1412 ofs, *iov, *iov_ofs, len);
1413 if (unlikely(copied != len))
1414 goto err_out;
1415 kunmap(*pages);
1416 }
1417 total += len;
1418 ntfs_set_next_iovec(iov, iov_ofs, len);
1419 bytes -= len;
1420 if (!bytes)
1421 break;
1422 ofs = 0;
1423 } while (++pages < last_page);
1424out:
1425 return total;
1426err_out:
1427 BUG_ON(copied > len);
1428 /* Zero the rest of the target like __copy_from_user(). */
1429 memset(addr + ofs + copied, 0, len - copied);
1430 kunmap(*pages);
1431 total += copied;
1432 ntfs_set_next_iovec(iov, iov_ofs, copied);
1433 while (++pages < last_page) {
1434 bytes -= len;
1435 if (!bytes)
1436 break;
1437 len = PAGE_CACHE_SIZE;
1438 if (len > bytes)
1439 len = bytes;
1440 zero_user(*pages, 0, len);
1441 }
1442 goto out;
1443}
1444
1445static inline void ntfs_flush_dcache_pages(struct page **pages, 1374static inline void ntfs_flush_dcache_pages(struct page **pages,
1446 unsigned nr_pages) 1375 unsigned nr_pages)
1447{ 1376{
@@ -1762,86 +1691,83 @@ err_out:
1762 return err; 1691 return err;
1763} 1692}
1764 1693
1765static void ntfs_write_failed(struct address_space *mapping, loff_t to) 1694/*
1695 * Copy as much as we can into the pages and return the number of bytes which
1696 * were successfully copied. If a fault is encountered then clear the pages
1697 * out to (ofs + bytes) and return the number of bytes which were copied.
1698 */
1699static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
1700 unsigned ofs, struct iov_iter *i, size_t bytes)
1766{ 1701{
1767 struct inode *inode = mapping->host; 1702 struct page **last_page = pages + nr_pages;
1703 size_t total = 0;
1704 struct iov_iter data = *i;
1705 unsigned len, copied;
1768 1706
1769 if (to > inode->i_size) { 1707 do {
1770 truncate_pagecache(inode, inode->i_size); 1708 len = PAGE_CACHE_SIZE - ofs;
1771 ntfs_truncate_vfs(inode); 1709 if (len > bytes)
1772 } 1710 len = bytes;
1711 copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
1712 len);
1713 total += copied;
1714 bytes -= copied;
1715 if (!bytes)
1716 break;
1717 iov_iter_advance(&data, copied);
1718 if (copied < len)
1719 goto err;
1720 ofs = 0;
1721 } while (++pages < last_page);
1722out:
1723 return total;
1724err:
1725 /* Zero the rest of the target like __copy_from_user(). */
1726 len = PAGE_CACHE_SIZE - copied;
1727 do {
1728 if (len > bytes)
1729 len = bytes;
1730 zero_user(*pages, copied, len);
1731 bytes -= len;
1732 copied = 0;
1733 len = PAGE_CACHE_SIZE;
1734 } while (++pages < last_page);
1735 goto out;
1773} 1736}
1774 1737
1775/** 1738/**
1776 * ntfs_file_buffered_write - 1739 * ntfs_perform_write - perform buffered write to a file
1777 * 1740 * @file: file to write to
1778 * Locking: The vfs is holding ->i_mutex on the inode. 1741 * @i: iov_iter with data to write
1742 * @pos: byte offset in file at which to begin writing to
1779 */ 1743 */
1780static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, 1744static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
1781 const struct iovec *iov, unsigned long nr_segs, 1745 loff_t pos)
1782 loff_t pos, loff_t *ppos, size_t count)
1783{ 1746{
1784 struct file *file = iocb->ki_filp;
1785 struct address_space *mapping = file->f_mapping; 1747 struct address_space *mapping = file->f_mapping;
1786 struct inode *vi = mapping->host; 1748 struct inode *vi = mapping->host;
1787 ntfs_inode *ni = NTFS_I(vi); 1749 ntfs_inode *ni = NTFS_I(vi);
1788 ntfs_volume *vol = ni->vol; 1750 ntfs_volume *vol = ni->vol;
1789 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1751 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
1790 struct page *cached_page = NULL; 1752 struct page *cached_page = NULL;
1791 char __user *buf = NULL;
1792 s64 end, ll;
1793 VCN last_vcn; 1753 VCN last_vcn;
1794 LCN lcn; 1754 LCN lcn;
1795 unsigned long flags; 1755 size_t bytes;
1796 size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ 1756 ssize_t status, written = 0;
1797 ssize_t status, written;
1798 unsigned nr_pages; 1757 unsigned nr_pages;
1799 int err;
1800 1758
1801 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 1759 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
1802 "pos 0x%llx, count 0x%lx.", 1760 "0x%llx, count 0x%lx.", vi->i_ino,
1803 vi->i_ino, (unsigned)le32_to_cpu(ni->type), 1761 (unsigned)le32_to_cpu(ni->type),
1804 (unsigned long long)pos, (unsigned long)count); 1762 (unsigned long long)pos,
1805 if (unlikely(!count)) 1763 (unsigned long)iov_iter_count(i));
1806 return 0;
1807 BUG_ON(NInoMstProtected(ni));
1808 /*
1809 * If the attribute is not an index root and it is encrypted or
1810 * compressed, we cannot write to it yet. Note we need to check for
1811 * AT_INDEX_ALLOCATION since this is the type of both directory and
1812 * index inodes.
1813 */
1814 if (ni->type != AT_INDEX_ALLOCATION) {
1815 /* If file is encrypted, deny access, just like NT4. */
1816 if (NInoEncrypted(ni)) {
1817 /*
1818 * Reminder for later: Encrypted files are _always_
1819 * non-resident so that the content can always be
1820 * encrypted.
1821 */
1822 ntfs_debug("Denying write access to encrypted file.");
1823 return -EACCES;
1824 }
1825 if (NInoCompressed(ni)) {
1826 /* Only unnamed $DATA attribute can be compressed. */
1827 BUG_ON(ni->type != AT_DATA);
1828 BUG_ON(ni->name_len);
1829 /*
1830 * Reminder for later: If resident, the data is not
1831 * actually compressed. Only on the switch to non-
1832 * resident does compression kick in. This is in
1833 * contrast to encrypted files (see above).
1834 */
1835 ntfs_error(vi->i_sb, "Writing to compressed files is "
1836 "not implemented yet. Sorry.");
1837 return -EOPNOTSUPP;
1838 }
1839 }
1840 /* 1764 /*
1841 * If a previous ntfs_truncate() failed, repeat it and abort if it 1765 * If a previous ntfs_truncate() failed, repeat it and abort if it
1842 * fails again. 1766 * fails again.
1843 */ 1767 */
1844 if (unlikely(NInoTruncateFailed(ni))) { 1768 if (unlikely(NInoTruncateFailed(ni))) {
1769 int err;
1770
1845 inode_dio_wait(vi); 1771 inode_dio_wait(vi);
1846 err = ntfs_truncate(vi); 1772 err = ntfs_truncate(vi);
1847 if (err || NInoTruncateFailed(ni)) { 1773 if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1781,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1855 return err; 1781 return err;
1856 } 1782 }
1857 } 1783 }
1858 /* The first byte after the write. */
1859 end = pos + count;
1860 /*
1861 * If the write goes beyond the allocated size, extend the allocation
1862 * to cover the whole of the write, rounded up to the nearest cluster.
1863 */
1864 read_lock_irqsave(&ni->size_lock, flags);
1865 ll = ni->allocated_size;
1866 read_unlock_irqrestore(&ni->size_lock, flags);
1867 if (end > ll) {
1868 /* Extend the allocation without changing the data size. */
1869 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
1870 if (likely(ll >= 0)) {
1871 BUG_ON(pos >= ll);
1872 /* If the extension was partial truncate the write. */
1873 if (end > ll) {
1874 ntfs_debug("Truncating write to inode 0x%lx, "
1875 "attribute type 0x%x, because "
1876 "the allocation was only "
1877 "partially extended.",
1878 vi->i_ino, (unsigned)
1879 le32_to_cpu(ni->type));
1880 end = ll;
1881 count = ll - pos;
1882 }
1883 } else {
1884 err = ll;
1885 read_lock_irqsave(&ni->size_lock, flags);
1886 ll = ni->allocated_size;
1887 read_unlock_irqrestore(&ni->size_lock, flags);
1888 /* Perform a partial write if possible or fail. */
1889 if (pos < ll) {
1890 ntfs_debug("Truncating write to inode 0x%lx, "
1891 "attribute type 0x%x, because "
1892 "extending the allocation "
1893 "failed (error code %i).",
1894 vi->i_ino, (unsigned)
1895 le32_to_cpu(ni->type), err);
1896 end = ll;
1897 count = ll - pos;
1898 } else {
1899 ntfs_error(vol->sb, "Cannot perform write to "
1900 "inode 0x%lx, attribute type "
1901 "0x%x, because extending the "
1902 "allocation failed (error "
1903 "code %i).", vi->i_ino,
1904 (unsigned)
1905 le32_to_cpu(ni->type), err);
1906 return err;
1907 }
1908 }
1909 }
1910 written = 0;
1911 /*
1912 * If the write starts beyond the initialized size, extend it up to the
1913 * beginning of the write and initialize all non-sparse space between
1914 * the old initialized size and the new one. This automatically also
1915 * increments the vfs inode->i_size to keep it above or equal to the
1916 * initialized_size.
1917 */
1918 read_lock_irqsave(&ni->size_lock, flags);
1919 ll = ni->initialized_size;
1920 read_unlock_irqrestore(&ni->size_lock, flags);
1921 if (pos > ll) {
1922 err = ntfs_attr_extend_initialized(ni, pos);
1923 if (err < 0) {
1924 ntfs_error(vol->sb, "Cannot perform write to inode "
1925 "0x%lx, attribute type 0x%x, because "
1926 "extending the initialized size "
1927 "failed (error code %i).", vi->i_ino,
1928 (unsigned)le32_to_cpu(ni->type), err);
1929 status = err;
1930 goto err_out;
1931 }
1932 }
1933 /* 1784 /*
1934 * Determine the number of pages per cluster for non-resident 1785 * Determine the number of pages per cluster for non-resident
1935 * attributes. 1786 * attributes.
@@ -1937,10 +1788,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1937 nr_pages = 1; 1788 nr_pages = 1;
1938 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) 1789 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
1939 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; 1790 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
1940 /* Finally, perform the actual write. */
1941 last_vcn = -1; 1791 last_vcn = -1;
1942 if (likely(nr_segs == 1))
1943 buf = iov->iov_base;
1944 do { 1792 do {
1945 VCN vcn; 1793 VCN vcn;
1946 pgoff_t idx, start_idx; 1794 pgoff_t idx, start_idx;
@@ -1965,10 +1813,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1965 vol->cluster_size_bits, false); 1813 vol->cluster_size_bits, false);
1966 up_read(&ni->runlist.lock); 1814 up_read(&ni->runlist.lock);
1967 if (unlikely(lcn < LCN_HOLE)) { 1815 if (unlikely(lcn < LCN_HOLE)) {
1968 status = -EIO;
1969 if (lcn == LCN_ENOMEM) 1816 if (lcn == LCN_ENOMEM)
1970 status = -ENOMEM; 1817 status = -ENOMEM;
1971 else 1818 else {
1819 status = -EIO;
1972 ntfs_error(vol->sb, "Cannot " 1820 ntfs_error(vol->sb, "Cannot "
1973 "perform write to " 1821 "perform write to "
1974 "inode 0x%lx, " 1822 "inode 0x%lx, "
@@ -1977,6 +1825,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1977 "is corrupt.", 1825 "is corrupt.",
1978 vi->i_ino, (unsigned) 1826 vi->i_ino, (unsigned)
1979 le32_to_cpu(ni->type)); 1827 le32_to_cpu(ni->type));
1828 }
1980 break; 1829 break;
1981 } 1830 }
1982 if (lcn == LCN_HOLE) { 1831 if (lcn == LCN_HOLE) {
@@ -1989,8 +1838,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1989 } 1838 }
1990 } 1839 }
1991 } 1840 }
1992 if (bytes > count) 1841 if (bytes > iov_iter_count(i))
1993 bytes = count; 1842 bytes = iov_iter_count(i);
1843again:
1994 /* 1844 /*
1995 * Bring in the user page(s) that we will copy from _first_. 1845 * Bring in the user page(s) that we will copy from _first_.
1996 * Otherwise there is a nasty deadlock on copying from the same 1846 * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1849,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1999 * pages being swapped out between us bringing them into memory 1849 * pages being swapped out between us bringing them into memory
2000 * and doing the actual copying. 1850 * and doing the actual copying.
2001 */ 1851 */
2002 if (likely(nr_segs == 1)) 1852 if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
2003 ntfs_fault_in_pages_readable(buf, bytes); 1853 status = -EFAULT;
2004 else 1854 break;
2005 ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); 1855 }
2006 /* Get and lock @do_pages starting at index @start_idx. */ 1856 /* Get and lock @do_pages starting at index @start_idx. */
2007 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1857 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
2008 pages, &cached_page); 1858 pages, &cached_page);
@@ -2018,56 +1868,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
2018 status = ntfs_prepare_pages_for_non_resident_write( 1868 status = ntfs_prepare_pages_for_non_resident_write(
2019 pages, do_pages, pos, bytes); 1869 pages, do_pages, pos, bytes);
2020 if (unlikely(status)) { 1870 if (unlikely(status)) {
2021 loff_t i_size;
2022
2023 do { 1871 do {
2024 unlock_page(pages[--do_pages]); 1872 unlock_page(pages[--do_pages]);
2025 page_cache_release(pages[do_pages]); 1873 page_cache_release(pages[do_pages]);
2026 } while (do_pages); 1874 } while (do_pages);
2027 /*
2028 * The write preparation may have instantiated
2029 * allocated space outside i_size. Trim this
2030 * off again. We can ignore any errors in this
2031 * case as we will just be waisting a bit of
2032 * allocated space, which is not a disaster.
2033 */
2034 i_size = i_size_read(vi);
2035 if (pos + bytes > i_size) {
2036 ntfs_write_failed(mapping, pos + bytes);
2037 }
2038 break; 1875 break;
2039 } 1876 }
2040 } 1877 }
2041 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; 1878 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
2042 if (likely(nr_segs == 1)) { 1879 copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
2043 copied = ntfs_copy_from_user(pages + u, do_pages - u, 1880 i, bytes);
2044 ofs, buf, bytes);
2045 buf += copied;
2046 } else
2047 copied = ntfs_copy_from_user_iovec(pages + u,
2048 do_pages - u, ofs, &iov, &iov_ofs,
2049 bytes);
2050 ntfs_flush_dcache_pages(pages + u, do_pages - u); 1881 ntfs_flush_dcache_pages(pages + u, do_pages - u);
2051 status = ntfs_commit_pages_after_write(pages, do_pages, pos, 1882 status = 0;
2052 bytes); 1883 if (likely(copied == bytes)) {
2053 if (likely(!status)) { 1884 status = ntfs_commit_pages_after_write(pages, do_pages,
2054 written += copied; 1885 pos, bytes);
2055 count -= copied; 1886 if (!status)
2056 pos += copied; 1887 status = bytes;
2057 if (unlikely(copied != bytes))
2058 status = -EFAULT;
2059 } 1888 }
2060 do { 1889 do {
2061 unlock_page(pages[--do_pages]); 1890 unlock_page(pages[--do_pages]);
2062 page_cache_release(pages[do_pages]); 1891 page_cache_release(pages[do_pages]);
2063 } while (do_pages); 1892 } while (do_pages);
2064 if (unlikely(status)) 1893 if (unlikely(status < 0))
2065 break; 1894 break;
2066 balance_dirty_pages_ratelimited(mapping); 1895 copied = status;
2067 cond_resched(); 1896 cond_resched();
2068 } while (count); 1897 if (unlikely(!copied)) {
2069err_out: 1898 size_t sc;
2070 *ppos = pos; 1899
1900 /*
1901 * We failed to copy anything. Fall back to single
1902 * segment length write.
1903 *
1904 * This is needed to avoid possible livelock in the
1905 * case that all segments in the iov cannot be copied
1906 * at once without a pagefault.
1907 */
1908 sc = iov_iter_single_seg_count(i);
1909 if (bytes > sc)
1910 bytes = sc;
1911 goto again;
1912 }
1913 iov_iter_advance(i, copied);
1914 pos += copied;
1915 written += copied;
1916 balance_dirty_pages_ratelimited(mapping);
1917 if (fatal_signal_pending(current)) {
1918 status = -EINTR;
1919 break;
1920 }
1921 } while (iov_iter_count(i));
2071 if (cached_page) 1922 if (cached_page)
2072 page_cache_release(cached_page); 1923 page_cache_release(cached_page);
2073 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1924 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
@@ -2077,63 +1928,36 @@ err_out:
2077} 1928}
2078 1929
2079/** 1930/**
2080 * ntfs_file_aio_write_nolock - 1931 * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
1932 * @iocb: IO state structure
1933 * @from: iov_iter with data to write
1934 *
1935 * Basically the same as generic_file_write_iter() except that it ends up
1936 * up calling ntfs_perform_write() instead of generic_perform_write() and that
1937 * O_DIRECT is not implemented.
2081 */ 1938 */
2082static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, 1939static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2083 const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
2084{ 1940{
2085 struct file *file = iocb->ki_filp; 1941 struct file *file = iocb->ki_filp;
2086 struct address_space *mapping = file->f_mapping; 1942 struct inode *vi = file_inode(file);
2087 struct inode *inode = mapping->host; 1943 ssize_t written = 0;
2088 loff_t pos; 1944 ssize_t err;
2089 size_t count; /* after file limit checks */
2090 ssize_t written, err;
2091 1945
2092 count = iov_length(iov, nr_segs); 1946 mutex_lock(&vi->i_mutex);
2093 pos = *ppos;
2094 /* We can write back this queue in page reclaim. */ 1947 /* We can write back this queue in page reclaim. */
2095 current->backing_dev_info = inode_to_bdi(inode); 1948 current->backing_dev_info = inode_to_bdi(vi);
2096 written = 0; 1949 err = ntfs_prepare_file_for_write(iocb, from);
2097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1950 if (iov_iter_count(from) && !err)
2098 if (err) 1951 written = ntfs_perform_write(file, from, iocb->ki_pos);
2099 goto out;
2100 if (!count)
2101 goto out;
2102 err = file_remove_suid(file);
2103 if (err)
2104 goto out;
2105 err = file_update_time(file);
2106 if (err)
2107 goto out;
2108 written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
2109 count);
2110out:
2111 current->backing_dev_info = NULL; 1952 current->backing_dev_info = NULL;
2112 return written ? written : err; 1953 mutex_unlock(&vi->i_mutex);
2113} 1954 if (likely(written > 0)) {
2114 1955 err = generic_write_sync(file, iocb->ki_pos, written);
2115/**
2116 * ntfs_file_aio_write -
2117 */
2118static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2119 unsigned long nr_segs, loff_t pos)
2120{
2121 struct file *file = iocb->ki_filp;
2122 struct address_space *mapping = file->f_mapping;
2123 struct inode *inode = mapping->host;
2124 ssize_t ret;
2125
2126 BUG_ON(iocb->ki_pos != pos);
2127
2128 mutex_lock(&inode->i_mutex);
2129 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2130 mutex_unlock(&inode->i_mutex);
2131 if (ret > 0) {
2132 int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
2133 if (err < 0) 1956 if (err < 0)
2134 ret = err; 1957 written = 0;
2135 } 1958 }
2136 return ret; 1959 iocb->ki_pos += written;
1960 return written ? written : err;
2137} 1961}
2138 1962
2139/** 1963/**
@@ -2197,37 +2021,15 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2197#endif /* NTFS_RW */ 2021#endif /* NTFS_RW */
2198 2022
2199const struct file_operations ntfs_file_ops = { 2023const struct file_operations ntfs_file_ops = {
2200 .llseek = generic_file_llseek, /* Seek inside file. */ 2024 .llseek = generic_file_llseek,
2201 .read = new_sync_read, /* Read from file. */ 2025 .read_iter = generic_file_read_iter,
2202 .read_iter = generic_file_read_iter, /* Async read from file. */
2203#ifdef NTFS_RW 2026#ifdef NTFS_RW
2204 .write = do_sync_write, /* Write to file. */ 2027 .write_iter = ntfs_file_write_iter,
2205 .aio_write = ntfs_file_aio_write, /* Async write to file. */ 2028 .fsync = ntfs_file_fsync,
2206 /*.release = ,*/ /* Last file is closed. See
2207 fs/ext2/file.c::
2208 ext2_release_file() for
2209 how to use this to discard
2210 preallocated space for
2211 write opened files. */
2212 .fsync = ntfs_file_fsync, /* Sync a file to disk. */
2213 /*.aio_fsync = ,*/ /* Sync all outstanding async
2214 i/o operations on a
2215 kiocb. */
2216#endif /* NTFS_RW */ 2029#endif /* NTFS_RW */
2217 /*.ioctl = ,*/ /* Perform function on the 2030 .mmap = generic_file_mmap,
2218 mounted filesystem. */ 2031 .open = ntfs_file_open,
2219 .mmap = generic_file_mmap, /* Mmap file. */ 2032 .splice_read = generic_file_splice_read,
2220 .open = ntfs_file_open, /* Open file. */
2221 .splice_read = generic_file_splice_read /* Zero-copy data send with
2222 the data source being on
2223 the ntfs partition. We do
2224 not need to care about the
2225 data destination. */
2226 /*.sendpage = ,*/ /* Zero-copy data send with
2227 the data destination being
2228 on the ntfs partition. We
2229 do not need to care about
2230 the data source. */
2231}; 2033};
2232 2034
2233const struct inode_operations ntfs_file_inode_ops = { 2035const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b9949d363..1d0c21df0d80 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/aio.h>
32 31
33#include "aops.h" 32#include "aops.h"
34#include "attrib.h" 33#include "attrib.h"
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 044158bd22be..2d7f76e52c37 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3370 ret = ocfs2_get_right_path(et, left_path, &right_path); 3370 ret = ocfs2_get_right_path(et, left_path, &right_path);
3371 if (ret) { 3371 if (ret) {
3372 mlog_errno(ret); 3372 mlog_errno(ret);
3373 goto out; 3373 return ret;
3374 } 3374 }
3375 3375
3376 right_el = path_leaf_el(right_path); 3376 right_el = path_leaf_el(right_path);
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3453 subtree_index); 3453 subtree_index);
3454 } 3454 }
3455out: 3455out:
3456 if (right_path) 3456 ocfs2_free_path(right_path);
3457 ocfs2_free_path(right_path);
3458 return ret; 3457 return ret;
3459} 3458}
3460 3459
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3536 ret = ocfs2_get_left_path(et, right_path, &left_path); 3535 ret = ocfs2_get_left_path(et, right_path, &left_path);
3537 if (ret) { 3536 if (ret) {
3538 mlog_errno(ret); 3537 mlog_errno(ret);
3539 goto out; 3538 return ret;
3540 } 3539 }
3541 3540
3542 left_el = path_leaf_el(left_path); 3541 left_el = path_leaf_el(left_path);
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3647 right_path, subtree_index); 3646 right_path, subtree_index);
3648 } 3647 }
3649out: 3648out:
3650 if (left_path) 3649 ocfs2_free_path(left_path);
3651 ocfs2_free_path(left_path);
3652 return ret; 3650 return ret;
3653} 3651}
3654 3652
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4334 } else if (path->p_tree_depth > 0) { 4332 } else if (path->p_tree_depth > 0) {
4335 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); 4333 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
4336 if (status) 4334 if (status)
4337 goto out; 4335 goto exit;
4338 4336
4339 if (left_cpos != 0) { 4337 if (left_cpos != 0) {
4340 left_path = ocfs2_new_path_from_path(path); 4338 left_path = ocfs2_new_path_from_path(path);
4341 if (!left_path) 4339 if (!left_path)
4342 goto out; 4340 goto exit;
4343 4341
4344 status = ocfs2_find_path(et->et_ci, left_path, 4342 status = ocfs2_find_path(et->et_ci, left_path,
4345 left_cpos); 4343 left_cpos);
4346 if (status) 4344 if (status)
4347 goto out; 4345 goto free_left_path;
4348 4346
4349 new_el = path_leaf_el(left_path); 4347 new_el = path_leaf_el(left_path);
4350 4348
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4361 le16_to_cpu(new_el->l_next_free_rec), 4359 le16_to_cpu(new_el->l_next_free_rec),
4362 le16_to_cpu(new_el->l_count)); 4360 le16_to_cpu(new_el->l_count));
4363 status = -EINVAL; 4361 status = -EINVAL;
4364 goto out; 4362 goto free_left_path;
4365 } 4363 }
4366 rec = &new_el->l_recs[ 4364 rec = &new_el->l_recs[
4367 le16_to_cpu(new_el->l_next_free_rec) - 1]; 4365 le16_to_cpu(new_el->l_next_free_rec) - 1];
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4388 path->p_tree_depth > 0) { 4386 path->p_tree_depth > 0) {
4389 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); 4387 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
4390 if (status) 4388 if (status)
4391 goto out; 4389 goto free_left_path;
4392 4390
4393 if (right_cpos == 0) 4391 if (right_cpos == 0)
4394 goto out; 4392 goto free_left_path;
4395 4393
4396 right_path = ocfs2_new_path_from_path(path); 4394 right_path = ocfs2_new_path_from_path(path);
4397 if (!right_path) 4395 if (!right_path)
4398 goto out; 4396 goto free_left_path;
4399 4397
4400 status = ocfs2_find_path(et->et_ci, right_path, right_cpos); 4398 status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
4401 if (status) 4399 if (status)
4402 goto out; 4400 goto free_right_path;
4403 4401
4404 new_el = path_leaf_el(right_path); 4402 new_el = path_leaf_el(right_path);
4405 rec = &new_el->l_recs[0]; 4403 rec = &new_el->l_recs[0];
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4413 (unsigned long long)le64_to_cpu(eb->h_blkno), 4411 (unsigned long long)le64_to_cpu(eb->h_blkno),
4414 le16_to_cpu(new_el->l_next_free_rec)); 4412 le16_to_cpu(new_el->l_next_free_rec));
4415 status = -EINVAL; 4413 status = -EINVAL;
4416 goto out; 4414 goto free_right_path;
4417 } 4415 }
4418 rec = &new_el->l_recs[1]; 4416 rec = &new_el->l_recs[1];
4419 } 4417 }
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4430 ret = contig_type; 4428 ret = contig_type;
4431 } 4429 }
4432 4430
4433out: 4431free_right_path:
4434 if (left_path) 4432 ocfs2_free_path(right_path);
4435 ocfs2_free_path(left_path); 4433free_left_path:
4436 if (right_path) 4434 ocfs2_free_path(left_path);
4437 ocfs2_free_path(right_path); 4435exit:
4438
4439 return ret; 4436 return ret;
4440} 4437}
4441 4438
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6858 if (pages == NULL) { 6855 if (pages == NULL) {
6859 ret = -ENOMEM; 6856 ret = -ENOMEM;
6860 mlog_errno(ret); 6857 mlog_errno(ret);
6861 goto out; 6858 return ret;
6862 } 6859 }
6863 6860
6864 ret = ocfs2_reserve_clusters(osb, 1, &data_ac); 6861 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
6865 if (ret) { 6862 if (ret) {
6866 mlog_errno(ret); 6863 mlog_errno(ret);
6867 goto out; 6864 goto free_pages;
6868 } 6865 }
6869 } 6866 }
6870 6867
@@ -6996,9 +6993,8 @@ out_commit:
6996out: 6993out:
6997 if (data_ac) 6994 if (data_ac)
6998 ocfs2_free_alloc_context(data_ac); 6995 ocfs2_free_alloc_context(data_ac);
6999 if (pages) 6996free_pages:
7000 kfree(pages); 6997 kfree(pages);
7001
7002 return ret; 6998 return ret;
7003} 6999}
7004 7000
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 44db1808cdb5..f906a250da6a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/uio.h>
32 33
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
34 35
@@ -663,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb,
663 return 0; 664 return 0;
664} 665}
665 666
667static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
668 struct inode *inode, loff_t offset,
669 u64 zero_len, int cluster_align)
670{
671 u32 p_cpos = 0;
672 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
673 unsigned int num_clusters = 0;
674 unsigned int ext_flags = 0;
675 int ret = 0;
676
677 if (offset <= i_size_read(inode) || cluster_align)
678 return 0;
679
680 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
681 &ext_flags);
682 if (ret < 0) {
683 mlog_errno(ret);
684 return ret;
685 }
686
687 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
688 u64 s = i_size_read(inode);
689 sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
690 (do_div(s, osb->s_clustersize) >> 9);
691
692 ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
693 zero_len >> 9, GFP_NOFS, false);
694 if (ret < 0)
695 mlog_errno(ret);
696 }
697
698 return ret;
699}
700
701static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
702 struct inode *inode, loff_t offset)
703{
704 u64 zero_start, zero_len, total_zero_len;
705 u32 p_cpos = 0, clusters_to_add;
706 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
707 unsigned int num_clusters = 0;
708 unsigned int ext_flags = 0;
709 u32 size_div, offset_div;
710 int ret = 0;
711
712 {
713 u64 o = offset;
714 u64 s = i_size_read(inode);
715
716 offset_div = do_div(o, osb->s_clustersize);
717 size_div = do_div(s, osb->s_clustersize);
718 }
719
720 if (offset <= i_size_read(inode))
721 return 0;
722
723 clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
724 ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
725 total_zero_len = offset - i_size_read(inode);
726 if (clusters_to_add)
727 total_zero_len -= offset_div;
728
729 /* Allocate clusters to fill out holes, and this is only needed
730 * when we add more than one clusters. Otherwise the cluster will
731 * be allocated during direct IO */
732 if (clusters_to_add > 1) {
733 ret = ocfs2_extend_allocation(inode,
734 OCFS2_I(inode)->ip_clusters,
735 clusters_to_add - 1, 0);
736 if (ret) {
737 mlog_errno(ret);
738 goto out;
739 }
740 }
741
742 while (total_zero_len) {
743 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
744 &ext_flags);
745 if (ret < 0) {
746 mlog_errno(ret);
747 goto out;
748 }
749
750 zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
751 size_div;
752 zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
753 size_div;
754 zero_len = min(total_zero_len, zero_len);
755
756 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
757 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
758 zero_start >> 9, zero_len >> 9,
759 GFP_NOFS, false);
760 if (ret < 0) {
761 mlog_errno(ret);
762 goto out;
763 }
764 }
765
766 total_zero_len -= zero_len;
767 v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
768
769 /* Only at first iteration can be cluster not aligned.
770 * So set size_div to 0 for the rest */
771 size_div = 0;
772 }
773
774out:
775 return ret;
776}
777
666static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, 778static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
667 struct iov_iter *iter, 779 struct iov_iter *iter,
668 loff_t offset) 780 loff_t offset)
@@ -677,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
677 struct buffer_head *di_bh = NULL; 789 struct buffer_head *di_bh = NULL;
678 size_t count = iter->count; 790 size_t count = iter->count;
679 journal_t *journal = osb->journal->j_journal; 791 journal_t *journal = osb->journal->j_journal;
680 u32 zero_len; 792 u64 zero_len_head, zero_len_tail;
681 int cluster_align; 793 int cluster_align_head, cluster_align_tail;
682 loff_t final_size = offset + count; 794 loff_t final_size = offset + count;
683 int append_write = offset >= i_size_read(inode) ? 1 : 0; 795 int append_write = offset >= i_size_read(inode) ? 1 : 0;
684 unsigned int num_clusters = 0; 796 unsigned int num_clusters = 0;
@@ -686,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
686 798
687 { 799 {
688 u64 o = offset; 800 u64 o = offset;
801 u64 s = i_size_read(inode);
802
803 zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
804 cluster_align_head = !zero_len_head;
689 805
690 zero_len = do_div(o, 1 << osb->s_clustersize_bits); 806 zero_len_tail = osb->s_clustersize -
691 cluster_align = !zero_len; 807 do_div(s, osb->s_clustersize);
808 if ((offset - i_size_read(inode)) < zero_len_tail)
809 zero_len_tail = offset - i_size_read(inode);
810 cluster_align_tail = !zero_len_tail;
692 } 811 }
693 812
694 /* 813 /*
@@ -706,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
706 } 825 }
707 826
708 if (append_write) { 827 if (append_write) {
709 ret = ocfs2_inode_lock(inode, &di_bh, 1); 828 ret = ocfs2_inode_lock(inode, NULL, 1);
710 if (ret < 0) { 829 if (ret < 0) {
711 mlog_errno(ret); 830 mlog_errno(ret);
712 goto clean_orphan; 831 goto clean_orphan;
713 } 832 }
714 833
834 /* zeroing out the previously allocated cluster tail
835 * that but not zeroed */
715 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 836 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
716 ret = ocfs2_zero_extend(inode, di_bh, offset); 837 ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
838 zero_len_tail, cluster_align_tail);
717 else 839 else
718 ret = ocfs2_extend_no_holes(inode, di_bh, offset, 840 ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
719 offset); 841 offset);
720 if (ret < 0) { 842 if (ret < 0) {
721 mlog_errno(ret); 843 mlog_errno(ret);
722 ocfs2_inode_unlock(inode, 1); 844 ocfs2_inode_unlock(inode, 1);
723 brelse(di_bh);
724 goto clean_orphan; 845 goto clean_orphan;
725 } 846 }
726 847
@@ -728,19 +849,15 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
728 if (is_overwrite < 0) { 849 if (is_overwrite < 0) {
729 mlog_errno(is_overwrite); 850 mlog_errno(is_overwrite);
730 ocfs2_inode_unlock(inode, 1); 851 ocfs2_inode_unlock(inode, 1);
731 brelse(di_bh);
732 goto clean_orphan; 852 goto clean_orphan;
733 } 853 }
734 854
735 ocfs2_inode_unlock(inode, 1); 855 ocfs2_inode_unlock(inode, 1);
736 brelse(di_bh);
737 di_bh = NULL;
738 } 856 }
739 857
740 written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, 858 written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
741 iter, offset, 859 offset, ocfs2_direct_IO_get_blocks,
742 ocfs2_direct_IO_get_blocks, 860 ocfs2_dio_end_io, NULL, 0);
743 ocfs2_dio_end_io, NULL, 0);
744 if (unlikely(written < 0)) { 861 if (unlikely(written < 0)) {
745 loff_t i_size = i_size_read(inode); 862 loff_t i_size = i_size_read(inode);
746 863
@@ -771,15 +888,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
771 if (ret < 0) 888 if (ret < 0)
772 mlog_errno(ret); 889 mlog_errno(ret);
773 } 890 }
774 } else if (written < 0 && append_write && !is_overwrite && 891 } else if (written > 0 && append_write && !is_overwrite &&
775 !cluster_align) { 892 !cluster_align_head) {
893 /* zeroing out the allocated cluster head */
776 u32 p_cpos = 0; 894 u32 p_cpos = 0;
777 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); 895 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
778 896
897 ret = ocfs2_inode_lock(inode, NULL, 0);
898 if (ret < 0) {
899 mlog_errno(ret);
900 goto clean_orphan;
901 }
902
779 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, 903 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
780 &num_clusters, &ext_flags); 904 &num_clusters, &ext_flags);
781 if (ret < 0) { 905 if (ret < 0) {
782 mlog_errno(ret); 906 mlog_errno(ret);
907 ocfs2_inode_unlock(inode, 0);
783 goto clean_orphan; 908 goto clean_orphan;
784 } 909 }
785 910
@@ -787,9 +912,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
787 912
788 ret = blkdev_issue_zeroout(osb->sb->s_bdev, 913 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
789 p_cpos << (osb->s_clustersize_bits - 9), 914 p_cpos << (osb->s_clustersize_bits - 9),
790 zero_len >> 9, GFP_KERNEL, false); 915 zero_len_head >> 9, GFP_NOFS, false);
791 if (ret < 0) 916 if (ret < 0)
792 mlog_errno(ret); 917 mlog_errno(ret);
918
919 ocfs2_inode_unlock(inode, 0);
793 } 920 }
794 921
795clean_orphan: 922clean_orphan:
@@ -818,9 +945,7 @@ out:
818 return ret; 945 return ret;
819} 946}
820 947
821static ssize_t ocfs2_direct_IO(int rw, 948static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
822 struct kiocb *iocb,
823 struct iov_iter *iter,
824 loff_t offset) 949 loff_t offset)
825{ 950{
826 struct file *file = iocb->ki_filp; 951 struct file *file = iocb->ki_filp;
@@ -842,12 +967,11 @@ static ssize_t ocfs2_direct_IO(int rw,
842 if (i_size_read(inode) <= offset && !full_coherency) 967 if (i_size_read(inode) <= offset && !full_coherency)
843 return 0; 968 return 0;
844 969
845 if (rw == READ) 970 if (iov_iter_rw(iter) == READ)
846 return __blockdev_direct_IO(rw, iocb, inode, 971 return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
847 inode->i_sb->s_bdev, 972 iter, offset,
848 iter, offset, 973 ocfs2_direct_IO_get_blocks,
849 ocfs2_direct_IO_get_blocks, 974 ocfs2_dio_end_io, NULL, 0);
850 ocfs2_dio_end_io, NULL, 0);
851 else 975 else
852 return ocfs2_direct_IO_write(iocb, iter, offset); 976 return ocfs2_direct_IO_write(iocb, iter, offset);
853} 977}
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155d54df..dd59599b022d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25#include <linux/aio.h> 25#include <linux/fs.h>
26 26
27handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 27handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
28 struct page *page, 28 struct page *page,
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9e6508..7fdc25a4d8c0 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
196 } \ 196 } \
197} while (0) 197} while (0)
198 198
199#define mlog_errno(st) do { \ 199#define mlog_errno(st) ({ \
200 int _st = (st); \ 200 int _st = (st); \
201 if (_st != -ERESTARTSYS && _st != -EINTR && \ 201 if (_st != -ERESTARTSYS && _st != -EINTR && \
202 _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ 202 _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
203 _st != -EDQUOT) \ 203 _st != -EDQUOT) \
204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
205} while (0) 205 _st; \
206})
206 207
207#define mlog_bug_on_msg(cond, fmt, args...) do { \ 208#define mlog_bug_on_msg(cond, fmt, args...) do { \
208 if (cond) { \ 209 if (cond) { \
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b08050bd3f2e..ccd4dcfc3645 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -18,7 +18,7 @@
18 * 18 *
19 * linux/fs/minix/dir.c 19 * linux/fs/minix/dir.c
20 * 20 *
21 * Copyright (C) 1991, 1992 Linux Torvalds 21 * Copyright (C) 1991, 1992 Linus Torvalds
22 * 22 *
23 * This program is free software; you can redistribute it and/or 23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public 24 * modify it under the terms of the GNU General Public
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2047 const char *name, 2047 const char *name,
2048 int namelen) 2048 int namelen)
2049{ 2049{
2050 int ret; 2050 int ret = 0;
2051 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2051 struct ocfs2_dir_lookup_result lookup = { NULL, };
2052 2052
2053 trace_ocfs2_check_dir_for_entry( 2053 trace_ocfs2_check_dir_for_entry(
2054 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 2054 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2055 2055
2056 ret = -EEXIST; 2056 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
2057 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) 2057 ret = -EEXIST;
2058 goto bail; 2058 mlog_errno(ret);
2059 }
2059 2060
2060 ret = 0;
2061bail:
2062 ocfs2_free_dir_lookup_result(&lookup); 2061 ocfs2_free_dir_lookup_result(&lookup);
2063 2062
2064 if (ret)
2065 mlog_errno(ret);
2066 return ret; 2063 return ret;
2067} 2064}
2068 2065
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 11849a44dc5a..8b23aa2f52dd 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1391 int noqueue_attempted = 0; 1391 int noqueue_attempted = 0;
1392 int dlm_locked = 0; 1392 int dlm_locked = 0;
1393 1393
1394 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
1395 mlog_errno(-EINVAL);
1396 return -EINVAL;
1397 }
1398
1394 ocfs2_init_mask_waiter(&mw); 1399 ocfs2_init_mask_waiter(&mw);
1395 1400
1396 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1401 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 29651167190d..540dc4bdd042 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
82 } 82 }
83 83
84 status = ocfs2_test_inode_bit(osb, blkno, &set); 84 status = ocfs2_test_inode_bit(osb, blkno, &set);
85 trace_ocfs2_get_dentry_test_bit(status, set);
86 if (status < 0) { 85 if (status < 0) {
87 if (status == -EINVAL) { 86 if (status == -EINVAL) {
88 /* 87 /*
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
96 goto unlock_nfs_sync; 95 goto unlock_nfs_sync;
97 } 96 }
98 97
98 trace_ocfs2_get_dentry_test_bit(status, set);
99 /* If the inode allocator bit is clear, this inode must be stale */ 99 /* If the inode allocator bit is clear, this inode must be stale */
100 if (!set) { 100 if (!set) {
101 status = -ESTALE; 101 status = -ESTALE;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46e0d4e857c7..913fc250d85a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2106,7 +2106,7 @@ out:
2106} 2106}
2107 2107
2108static int ocfs2_prepare_inode_for_write(struct file *file, 2108static int ocfs2_prepare_inode_for_write(struct file *file,
2109 loff_t *ppos, 2109 loff_t pos,
2110 size_t count, 2110 size_t count,
2111 int appending, 2111 int appending,
2112 int *direct_io, 2112 int *direct_io,
@@ -2115,7 +2115,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2115 int ret = 0, meta_level = 0; 2115 int ret = 0, meta_level = 0;
2116 struct dentry *dentry = file->f_path.dentry; 2116 struct dentry *dentry = file->f_path.dentry;
2117 struct inode *inode = dentry->d_inode; 2117 struct inode *inode = dentry->d_inode;
2118 loff_t saved_pos = 0, end; 2118 loff_t end;
2119 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2119 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2120 int full_coherency = !(osb->s_mount_opt & 2120 int full_coherency = !(osb->s_mount_opt &
2121 OCFS2_MOUNT_COHERENCY_BUFFERED); 2121 OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2155,23 +2155,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2155 } 2155 }
2156 } 2156 }
2157 2157
2158 /* work on a copy of ppos until we're sure that we won't have 2158 end = pos + count;
2159 * to recalculate it due to relocking. */
2160 if (appending)
2161 saved_pos = i_size_read(inode);
2162 else
2163 saved_pos = *ppos;
2164
2165 end = saved_pos + count;
2166 2159
2167 ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); 2160 ret = ocfs2_check_range_for_refcount(inode, pos, count);
2168 if (ret == 1) { 2161 if (ret == 1) {
2169 ocfs2_inode_unlock(inode, meta_level); 2162 ocfs2_inode_unlock(inode, meta_level);
2170 meta_level = -1; 2163 meta_level = -1;
2171 2164
2172 ret = ocfs2_prepare_inode_for_refcount(inode, 2165 ret = ocfs2_prepare_inode_for_refcount(inode,
2173 file, 2166 file,
2174 saved_pos, 2167 pos,
2175 count, 2168 count,
2176 &meta_level); 2169 &meta_level);
2177 if (has_refcount) 2170 if (has_refcount)
@@ -2227,7 +2220,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2227 * caller will have to retake some cluster 2220 * caller will have to retake some cluster
2228 * locks and initiate the io as buffered. 2221 * locks and initiate the io as buffered.
2229 */ 2222 */
2230 ret = ocfs2_check_range_for_holes(inode, saved_pos, count); 2223 ret = ocfs2_check_range_for_holes(inode, pos, count);
2231 if (ret == 1) { 2224 if (ret == 1) {
2232 /* 2225 /*
2233 * Fallback to old way if the feature bit is not set. 2226 * Fallback to old way if the feature bit is not set.
@@ -2242,12 +2235,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2242 break; 2235 break;
2243 } 2236 }
2244 2237
2245 if (appending)
2246 *ppos = saved_pos;
2247
2248out_unlock: 2238out_unlock:
2249 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, 2239 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2250 saved_pos, appending, count, 2240 pos, appending, count,
2251 direct_io, has_refcount); 2241 direct_io, has_refcount);
2252 2242
2253 if (meta_level >= 0) 2243 if (meta_level >= 0)
@@ -2260,19 +2250,20 @@ out:
2260static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, 2250static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2261 struct iov_iter *from) 2251 struct iov_iter *from)
2262{ 2252{
2263 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 2253 int direct_io, appending, rw_level, have_alloc_sem = 0;
2264 int can_do_direct, has_refcount = 0; 2254 int can_do_direct, has_refcount = 0;
2265 ssize_t written = 0; 2255 ssize_t written = 0;
2266 size_t count = iov_iter_count(from); 2256 ssize_t ret;
2267 loff_t old_size, *ppos = &iocb->ki_pos; 2257 size_t count = iov_iter_count(from), orig_count;
2258 loff_t old_size;
2268 u32 old_clusters; 2259 u32 old_clusters;
2269 struct file *file = iocb->ki_filp; 2260 struct file *file = iocb->ki_filp;
2270 struct inode *inode = file_inode(file); 2261 struct inode *inode = file_inode(file);
2271 struct address_space *mapping = file->f_mapping;
2272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2262 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2273 int full_coherency = !(osb->s_mount_opt & 2263 int full_coherency = !(osb->s_mount_opt &
2274 OCFS2_MOUNT_COHERENCY_BUFFERED); 2264 OCFS2_MOUNT_COHERENCY_BUFFERED);
2275 int unaligned_dio = 0; 2265 int unaligned_dio = 0;
2266 int dropped_dio = 0;
2276 2267
2277 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, 2268 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
2278 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2269 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2280,11 +2271,11 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2280 file->f_path.dentry->d_name.name, 2271 file->f_path.dentry->d_name.name,
2281 (unsigned int)from->nr_segs); /* GRRRRR */ 2272 (unsigned int)from->nr_segs); /* GRRRRR */
2282 2273
2283 if (iocb->ki_nbytes == 0) 2274 if (count == 0)
2284 return 0; 2275 return 0;
2285 2276
2286 appending = file->f_flags & O_APPEND ? 1 : 0; 2277 appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
2287 direct_io = file->f_flags & O_DIRECT ? 1 : 0; 2278 direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2288 2279
2289 mutex_lock(&inode->i_mutex); 2280 mutex_lock(&inode->i_mutex);
2290 2281
@@ -2329,9 +2320,17 @@ relock:
2329 ocfs2_inode_unlock(inode, 1); 2320 ocfs2_inode_unlock(inode, 1);
2330 } 2321 }
2331 2322
2323 orig_count = iov_iter_count(from);
2324 ret = generic_write_checks(iocb, from);
2325 if (ret <= 0) {
2326 if (ret)
2327 mlog_errno(ret);
2328 goto out;
2329 }
2330 count = ret;
2331
2332 can_do_direct = direct_io; 2332 can_do_direct = direct_io;
2333 ret = ocfs2_prepare_inode_for_write(file, ppos, 2333 ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
2334 iocb->ki_nbytes, appending,
2335 &can_do_direct, &has_refcount); 2334 &can_do_direct, &has_refcount);
2336 if (ret < 0) { 2335 if (ret < 0) {
2337 mlog_errno(ret); 2336 mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
2339 } 2338 }
2340 2339
2341 if (direct_io && !is_sync_kiocb(iocb)) 2340 if (direct_io && !is_sync_kiocb(iocb))
2342 unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, 2341 unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
2343 *ppos);
2344 2342
2345 /* 2343 /*
2346 * We can't complete the direct I/O as requested, fall back to 2344 * We can't complete the direct I/O as requested, fall back to
@@ -2353,6 +2351,9 @@ relock:
2353 rw_level = -1; 2351 rw_level = -1;
2354 2352
2355 direct_io = 0; 2353 direct_io = 0;
2354 iocb->ki_flags &= ~IOCB_DIRECT;
2355 iov_iter_reexpand(from, orig_count);
2356 dropped_dio = 1;
2356 goto relock; 2357 goto relock;
2357 } 2358 }
2358 2359
@@ -2376,74 +2377,18 @@ relock:
2376 /* communicate with ocfs2_dio_end_io */ 2377 /* communicate with ocfs2_dio_end_io */
2377 ocfs2_iocb_set_rw_locked(iocb, rw_level); 2378 ocfs2_iocb_set_rw_locked(iocb, rw_level);
2378 2379
2379 ret = generic_write_checks(file, ppos, &count, 2380 written = __generic_file_write_iter(iocb, from);
2380 S_ISBLK(inode->i_mode));
2381 if (ret)
2382 goto out_dio;
2383
2384 iov_iter_truncate(from, count);
2385 if (direct_io) {
2386 loff_t endbyte;
2387 ssize_t written_buffered;
2388 written = generic_file_direct_write(iocb, from, *ppos);
2389 if (written < 0 || written == count) {
2390 ret = written;
2391 goto out_dio;
2392 }
2393
2394 /*
2395 * for completing the rest of the request.
2396 */
2397 *ppos += written;
2398 count -= written;
2399 written_buffered = generic_perform_write(file, from, *ppos);
2400 /*
2401 * If generic_file_buffered_write() returned a synchronous error
2402 * then we want to return the number of bytes which were
2403 * direct-written, or the error code if that was zero. Note
2404 * that this differs from normal direct-io semantics, which
2405 * will return -EFOO even if some bytes were written.
2406 */
2407 if (written_buffered < 0) {
2408 ret = written_buffered;
2409 goto out_dio;
2410 }
2411
2412 iocb->ki_pos = *ppos + written_buffered;
2413 /* We need to ensure that the page cache pages are written to
2414 * disk and invalidated to preserve the expected O_DIRECT
2415 * semantics.
2416 */
2417 endbyte = *ppos + written_buffered - 1;
2418 ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
2419 endbyte);
2420 if (ret == 0) {
2421 written += written_buffered;
2422 invalidate_mapping_pages(mapping,
2423 *ppos >> PAGE_CACHE_SHIFT,
2424 endbyte >> PAGE_CACHE_SHIFT);
2425 } else {
2426 /*
2427 * We don't know how much we wrote, so just return
2428 * the number of bytes which were direct-written
2429 */
2430 }
2431 } else {
2432 current->backing_dev_info = inode_to_bdi(inode);
2433 written = generic_perform_write(file, from, *ppos);
2434 if (likely(written >= 0))
2435 iocb->ki_pos = *ppos + written;
2436 current->backing_dev_info = NULL;
2437 }
2438
2439out_dio:
2440 /* buffered aio wouldn't have proper lock coverage today */ 2381 /* buffered aio wouldn't have proper lock coverage today */
2441 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2382 BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
2383
2384 if (unlikely(written <= 0))
2385 goto no_sync;
2442 2386
2443 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || 2387 if (((file->f_flags & O_DSYNC) && !direct_io) ||
2444 ((file->f_flags & O_DIRECT) && !direct_io)) { 2388 IS_SYNC(inode) || dropped_dio) {
2445 ret = filemap_fdatawrite_range(file->f_mapping, *ppos, 2389 ret = filemap_fdatawrite_range(file->f_mapping,
2446 *ppos + count - 1); 2390 iocb->ki_pos - written,
2391 iocb->ki_pos - 1);
2447 if (ret < 0) 2392 if (ret < 0)
2448 written = ret; 2393 written = ret;
2449 2394
@@ -2454,10 +2399,12 @@ out_dio:
2454 } 2399 }
2455 2400
2456 if (!ret) 2401 if (!ret)
2457 ret = filemap_fdatawait_range(file->f_mapping, *ppos, 2402 ret = filemap_fdatawait_range(file->f_mapping,
2458 *ppos + count - 1); 2403 iocb->ki_pos - written,
2404 iocb->ki_pos - 1);
2459 } 2405 }
2460 2406
2407no_sync:
2461 /* 2408 /*
2462 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2409 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2463 * function pointer which is called when o_direct io completes so that 2410 * function pointer which is called when o_direct io completes so that
@@ -2549,7 +2496,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2549 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2496 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2550 * need locks to protect pending reads from racing with truncate. 2497 * need locks to protect pending reads from racing with truncate.
2551 */ 2498 */
2552 if (filp->f_flags & O_DIRECT) { 2499 if (iocb->ki_flags & IOCB_DIRECT) {
2553 have_alloc_sem = 1; 2500 have_alloc_sem = 1;
2554 ocfs2_iocb_set_sem_locked(iocb); 2501 ocfs2_iocb_set_sem_locked(iocb);
2555 2502
@@ -2583,7 +2530,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2583 trace_generic_file_aio_read_ret(ret); 2530 trace_generic_file_aio_read_ret(ret);
2584 2531
2585 /* buffered aio wouldn't have proper lock coverage today */ 2532 /* buffered aio wouldn't have proper lock coverage today */
2586 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 2533 BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
2587 2534
2588 /* see ocfs2_file_write_iter */ 2535 /* see ocfs2_file_write_iter */
2589 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 2536 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
@@ -2678,8 +2625,6 @@ const struct inode_operations ocfs2_special_file_iops = {
2678 */ 2625 */
2679const struct file_operations ocfs2_fops = { 2626const struct file_operations ocfs2_fops = {
2680 .llseek = ocfs2_file_llseek, 2627 .llseek = ocfs2_file_llseek,
2681 .read = new_sync_read,
2682 .write = new_sync_write,
2683 .mmap = ocfs2_mmap, 2628 .mmap = ocfs2_mmap,
2684 .fsync = ocfs2_sync_file, 2629 .fsync = ocfs2_sync_file,
2685 .release = ocfs2_file_release, 2630 .release = ocfs2_file_release,
@@ -2726,8 +2671,6 @@ const struct file_operations ocfs2_dops = {
2726 */ 2671 */
2727const struct file_operations ocfs2_fops_no_plocks = { 2672const struct file_operations ocfs2_fops_no_plocks = {
2728 .llseek = ocfs2_file_llseek, 2673 .llseek = ocfs2_file_llseek,
2729 .read = new_sync_read,
2730 .write = new_sync_write,
2731 .mmap = ocfs2_mmap, 2674 .mmap = ocfs2_mmap,
2732 .fsync = ocfs2_sync_file, 2675 .fsync = ocfs2_sync_file,
2733 .release = ocfs2_file_release, 2676 .release = ocfs2_file_release,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3025c0da6b8a..be71ca0937f7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode,
624 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, 624 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
625 le16_to_cpu(di->i_suballoc_slot)); 625 le16_to_cpu(di->i_suballoc_slot));
626 if (!inode_alloc_inode) { 626 if (!inode_alloc_inode) {
627 status = -EEXIST; 627 status = -ENOENT;
628 mlog_errno(status); 628 mlog_errno(status);
629 goto bail; 629 goto bail;
630 } 630 }
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
742 ORPHAN_DIR_SYSTEM_INODE, 742 ORPHAN_DIR_SYSTEM_INODE,
743 orphaned_slot); 743 orphaned_slot);
744 if (!orphan_dir_inode) { 744 if (!orphan_dir_inode) {
745 status = -EEXIST; 745 status = -ENOENT;
746 mlog_errno(status); 746 mlog_errno(status);
747 goto bail; 747 goto bail;
748 } 748 }
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 044013455621..857bbbcd39f3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
666 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 666 if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
667 ocfs2_local_alloc_count_bits(alloc)) { 667 ocfs2_local_alloc_count_bits(alloc)) {
668 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 668 ocfs2_error(osb->sb, "local alloc inode %llu says it has "
669 "%u free bits, but a count shows %u", 669 "%u used bits, but a count shows %u",
670 (unsigned long long)le64_to_cpu(alloc->i_blkno), 670 (unsigned long long)le64_to_cpu(alloc->i_blkno),
671 le32_to_cpu(alloc->id1.bitmap1.i_used), 671 le32_to_cpu(alloc->id1.bitmap1.i_used),
672 ocfs2_local_alloc_count_bits(alloc)); 672 ocfs2_local_alloc_count_bits(alloc));
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
839 u32 *numbits, 839 u32 *numbits,
840 struct ocfs2_alloc_reservation *resv) 840 struct ocfs2_alloc_reservation *resv)
841{ 841{
842 int numfound, bitoff, left, startoff, lastzero; 842 int numfound = 0, bitoff, left, startoff, lastzero;
843 int local_resv = 0; 843 int local_resv = 0;
844 struct ocfs2_alloc_reservation r; 844 struct ocfs2_alloc_reservation r;
845 void *bitmap = NULL; 845 void *bitmap = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b5c3a5ea3ee6..09f90cbf0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2322 2322
2323 trace_ocfs2_orphan_del( 2323 trace_ocfs2_orphan_del(
2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, 2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2325 name, namelen); 2325 name, strlen(name));
2326 2326
2327 /* find it's spot in the orphan directory */ 2327 /* find it's spot in the orphan directory */
2328 status = ocfs2_find_entry(name, namelen, orphan_dir_inode, 2328 status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
2329 &lookup); 2329 &lookup);
2330 if (status) { 2330 if (status) {
2331 mlog_errno(status); 2331 mlog_errno(status);
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2808 ORPHAN_DIR_SYSTEM_INODE, 2808 ORPHAN_DIR_SYSTEM_INODE,
2809 osb->slot_num); 2809 osb->slot_num);
2810 if (!orphan_dir_inode) { 2810 if (!orphan_dir_inode) {
2811 status = -EEXIST; 2811 status = -ENOENT;
2812 mlog_errno(status); 2812 mlog_errno(status);
2813 goto leave; 2813 goto leave;
2814 } 2814 }
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8490c64d34fe..460c6c37e683 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
502 502
503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) 503static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
504{ 504{
505 if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) 505 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
506 return 1; 506 return 1;
507 return 0; 507 return 0;
508} 508}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 20e37a3ed26f..db64ce2d4667 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -102,11 +102,11 @@
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ 104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) 105 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
106 | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
106#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 107#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
107 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 108 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
108 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \ 109 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
109 | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
110 110
111/* 111/*
112 * Heartbeat-only devices are missing journals and other files. The 112 * Heartbeat-only devices are missing journals and other files. The
@@ -179,6 +179,11 @@
179#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 179#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
180 180
181/* 181/*
182 * Append Direct IO support
183 */
184#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000
185
186/*
182 * backup superblock flag is used to indicate that this volume 187 * backup superblock flag is used to indicate that this volume
183 * has backup superblocks. 188 * has backup superblocks.
184 */ 189 */
@@ -200,10 +205,6 @@
200#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 205#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
201#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 206#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
202 207
203/*
204 * Append Direct IO support
205 */
206#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
207 208
208/* The byte offset of the first backup block will be 1G. 209/* The byte offset of the first backup block will be 1G.
209 * The following will be 4G, 16G, 64G, 256G and 1T. 210 * The following will be 4G, 16G, 64G, 256G and 1T.
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee541f92dab4..df3a500789c7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4276 error = posix_acl_create(dir, &mode, &default_acl, &acl); 4276 error = posix_acl_create(dir, &mode, &default_acl, &acl);
4277 if (error) { 4277 if (error) {
4278 mlog_errno(error); 4278 mlog_errno(error);
4279 goto out; 4279 return error;
4280 } 4280 }
4281 4281
4282 error = ocfs2_create_inode_in_orphan(dir, mode, 4282 error = ocfs2_create_inode_in_orphan(dir, mode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d5493e361a38..e78a203d44c8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
427 if (!si) { 427 if (!si) {
428 status = -ENOMEM; 428 status = -ENOMEM;
429 mlog_errno(status); 429 mlog_errno(status);
430 goto bail; 430 return status;
431 } 431 }
432 432
433 si->si_extended = ocfs2_uses_extended_slot_map(osb); 433 si->si_extended = ocfs2_uses_extended_slot_map(osb);
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
452 452
453 osb->slot_info = (struct ocfs2_slot_info *)si; 453 osb->slot_info = (struct ocfs2_slot_info *)si;
454bail: 454bail:
455 if (status < 0 && si) 455 if (status < 0)
456 __ocfs2_free_slot_info(si); 456 __ocfs2_free_slot_info(si);
457 457
458 return status; 458 return status;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 1724d43d3da1..220cae7bbdbc 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void)
295 set_bit(node_num, netmap); 295 set_bit(node_num, netmap);
296 if (!memcmp(hbmap, netmap, sizeof(hbmap))) 296 if (!memcmp(hbmap, netmap, sizeof(hbmap)))
297 return 0; 297 return 0;
298 if (i < O2CB_MAP_STABILIZE_COUNT) 298 if (i < O2CB_MAP_STABILIZE_COUNT - 1)
299 msleep(1000); 299 msleep(1000);
300 } 300 }
301 301
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 720aa389e0ea..2768eb1da2b8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
1004 BUG_ON(conn == NULL); 1004 BUG_ON(conn == NULL);
1005 1005
1006 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 1006 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
1007 if (!lc) { 1007 if (!lc)
1008 rc = -ENOMEM; 1008 return -ENOMEM;
1009 goto out;
1010 }
1011 1009
1012 init_waitqueue_head(&lc->oc_wait); 1010 init_waitqueue_head(&lc->oc_wait);
1013 init_completion(&lc->oc_sync_wait); 1011 init_completion(&lc->oc_sync_wait);
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
1063 } 1061 }
1064 1062
1065out: 1063out:
1066 if (rc && lc) 1064 if (rc)
1067 kfree(lc); 1065 kfree(lc);
1068 return rc; 1066 return rc;
1069} 1067}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0cb889a17ae1..4479029630bb 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2499 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); 2499 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2500 if (status < 0) { 2500 if (status < 0) {
2501 mlog_errno(status); 2501 mlog_errno(status);
2502 ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
2503 start_bit, count);
2502 goto bail; 2504 goto bail;
2503 } 2505 }
2504 2506
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26675185b886..403c5660b306 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2069,6 +2069,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2069 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); 2069 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
2070 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 2070 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
2071 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); 2071 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
2072 memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
2073 sizeof(di->id2.i_super.s_uuid));
2072 2074
2073 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; 2075 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
2074 2076
@@ -2333,7 +2335,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2333 mlog_errno(status); 2335 mlog_errno(status);
2334 goto bail; 2336 goto bail;
2335 } 2337 }
2336 cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); 2338 cleancache_init_shared_fs(sb);
2337 2339
2338bail: 2340bail:
2339 return status; 2341 return status;
@@ -2563,22 +2565,22 @@ static void ocfs2_handle_error(struct super_block *sb)
2563 ocfs2_set_ro_flag(osb, 0); 2565 ocfs2_set_ro_flag(osb, 0);
2564} 2566}
2565 2567
2566static char error_buf[1024]; 2568void __ocfs2_error(struct super_block *sb, const char *function,
2567 2569 const char *fmt, ...)
2568void __ocfs2_error(struct super_block *sb,
2569 const char *function,
2570 const char *fmt, ...)
2571{ 2570{
2571 struct va_format vaf;
2572 va_list args; 2572 va_list args;
2573 2573
2574 va_start(args, fmt); 2574 va_start(args, fmt);
2575 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2575 vaf.fmt = fmt;
2576 va_end(args); 2576 vaf.va = &args;
2577 2577
2578 /* Not using mlog here because we want to show the actual 2578 /* Not using mlog here because we want to show the actual
2579 * function the error came from. */ 2579 * function the error came from. */
2580 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", 2580 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
2581 sb->s_id, function, error_buf); 2581 sb->s_id, function, &vaf);
2582
2583 va_end(args);
2582 2584
2583 ocfs2_handle_error(sb); 2585 ocfs2_handle_error(sb);
2584} 2586}
@@ -2586,18 +2588,21 @@ void __ocfs2_error(struct super_block *sb,
2586/* Handle critical errors. This is intentionally more drastic than 2588/* Handle critical errors. This is intentionally more drastic than
2587 * ocfs2_handle_error, so we only use for things like journal errors, 2589 * ocfs2_handle_error, so we only use for things like journal errors,
2588 * etc. */ 2590 * etc. */
2589void __ocfs2_abort(struct super_block* sb, 2591void __ocfs2_abort(struct super_block *sb, const char *function,
2590 const char *function,
2591 const char *fmt, ...) 2592 const char *fmt, ...)
2592{ 2593{
2594 struct va_format vaf;
2593 va_list args; 2595 va_list args;
2594 2596
2595 va_start(args, fmt); 2597 va_start(args, fmt);
2596 vsnprintf(error_buf, sizeof(error_buf), fmt, args);
2597 va_end(args);
2598 2598
2599 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", 2599 vaf.fmt = fmt;
2600 sb->s_id, function, error_buf); 2600 vaf.va = &args;
2601
2602 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
2603 sb->s_id, function, &vaf);
2604
2605 va_end(args);
2601 2606
2602 /* We don't have the cluster support yet to go straight to 2607 /* We don't have the cluster support yet to go straight to
2603 * hard readonly in here. Until then, we want to keep 2608 * hard readonly in here. Until then, we want to keep
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 85b190dc132f..4ca7533be479 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1238 i, 1238 i,
1239 &block_off, 1239 &block_off,
1240 &name_offset); 1240 &name_offset);
1241 if (ret) {
1242 mlog_errno(ret);
1243 goto cleanup;
1244 }
1241 xs->base = bucket_block(xs->bucket, block_off); 1245 xs->base = bucket_block(xs->bucket, block_off);
1242 } 1246 }
1243 if (ocfs2_xattr_is_local(xs->here)) { 1247 if (ocfs2_xattr_is_local(xs->here)) {
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5665 5669
5666 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5670 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5667 i, &xv, NULL); 5671 i, &xv, NULL);
5672 if (ret) {
5673 mlog_errno(ret);
5674 break;
5675 }
5668 5676
5669 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5677 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5670 args->ref_ci, 5678 args->ref_ci,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 902e88527fce..f993be7f2156 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -337,8 +337,6 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
337 337
338const struct file_operations omfs_file_operations = { 338const struct file_operations omfs_file_operations = {
339 .llseek = generic_file_llseek, 339 .llseek = generic_file_llseek,
340 .read = new_sync_read,
341 .write = new_sync_write,
342 .read_iter = generic_file_read_iter, 340 .read_iter = generic_file_read_iter,
343 .write_iter = generic_file_write_iter, 341 .write_iter = generic_file_write_iter,
344 .mmap = generic_file_mmap, 342 .mmap = generic_file_mmap,
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..98e5a52dc68c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
231 return -EINVAL; 231 return -EINVAL;
232 232
233 /* Return error if mode is not supported */ 233 /* Return error if mode is not supported */
234 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 234 if (mode & ~FALLOC_FL_SUPPORTED_MASK)
235 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
236 return -EOPNOTSUPP; 235 return -EOPNOTSUPP;
237 236
238 /* Punch hole and zero range are mutually exclusive */ 237 /* Punch hole and zero range are mutually exclusive */
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
250 (mode & ~FALLOC_FL_COLLAPSE_RANGE)) 249 (mode & ~FALLOC_FL_COLLAPSE_RANGE))
251 return -EINVAL; 250 return -EINVAL;
252 251
252 /* Insert range should only be used exclusively. */
253 if ((mode & FALLOC_FL_INSERT_RANGE) &&
254 (mode & ~FALLOC_FL_INSERT_RANGE))
255 return -EINVAL;
256
253 if (!(file->f_mode & FMODE_WRITE)) 257 if (!(file->f_mode & FMODE_WRITE))
254 return -EBADF; 258 return -EBADF;
255 259
@@ -570,6 +574,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
570 uid = make_kuid(current_user_ns(), user); 574 uid = make_kuid(current_user_ns(), user);
571 gid = make_kgid(current_user_ns(), group); 575 gid = make_kgid(current_user_ns(), group);
572 576
577retry_deleg:
573 newattrs.ia_valid = ATTR_CTIME; 578 newattrs.ia_valid = ATTR_CTIME;
574 if (user != (uid_t) -1) { 579 if (user != (uid_t) -1) {
575 if (!uid_valid(uid)) 580 if (!uid_valid(uid))
@@ -586,7 +591,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
586 if (!S_ISDIR(inode->i_mode)) 591 if (!S_ISDIR(inode->i_mode))
587 newattrs.ia_valid |= 592 newattrs.ia_valid |=
588 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 593 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
589retry_deleg:
590 mutex_lock(&inode->i_mutex); 594 mutex_lock(&inode->i_mutex);
591 error = security_path_chown(path, uid, gid); 595 error = security_path_chown(path, uid, gid);
592 if (!error) 596 if (!error)
@@ -734,10 +738,10 @@ static int do_dentry_open(struct file *f,
734 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 738 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
735 i_readcount_inc(inode); 739 i_readcount_inc(inode);
736 if ((f->f_mode & FMODE_READ) && 740 if ((f->f_mode & FMODE_READ) &&
737 likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) 741 likely(f->f_op->read || f->f_op->read_iter))
738 f->f_mode |= FMODE_CAN_READ; 742 f->f_mode |= FMODE_CAN_READ;
739 if ((f->f_mode & FMODE_WRITE) && 743 if ((f->f_mode & FMODE_WRITE) &&
740 likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) 744 likely(f->f_op->write || f->f_op->write_iter))
741 f->f_mode |= FMODE_CAN_WRITE; 745 f->f_mode |= FMODE_CAN_WRITE;
742 746
743 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 747 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -988,9 +992,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
988 return ERR_PTR(err); 992 return ERR_PTR(err);
989 if (flags & O_CREAT) 993 if (flags & O_CREAT)
990 return ERR_PTR(-EINVAL); 994 return ERR_PTR(-EINVAL);
991 if (!filename && (flags & O_DIRECTORY))
992 if (!dentry->d_inode->i_op->lookup)
993 return ERR_PTR(-ENOTDIR);
994 return do_file_open_root(dentry, mnt, filename, &op); 995 return do_file_open_root(dentry, mnt, filename, &op);
995} 996}
996EXPORT_SYMBOL(file_open_root); 997EXPORT_SYMBOL(file_open_root);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b90952f528b1..5f0d1993e6e3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
529{ 529{
530 struct ovl_fs *ufs = sb->s_fs_info; 530 struct ovl_fs *ufs = sb->s_fs_info;
531 531
532 if (!(*flags & MS_RDONLY) && 532 if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
533 (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
534 return -EROFS; 533 return -EROFS;
535 534
536 return 0; 535 return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
615 break; 614 break;
616 615
617 default: 616 default:
617 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
618 return -EINVAL; 618 return -EINVAL;
619 } 619 }
620 } 620 }
621
622 /* Workdir is useless in non-upper mount */
623 if (!config->upperdir && config->workdir) {
624 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
625 config->workdir);
626 kfree(config->workdir);
627 config->workdir = NULL;
628 }
629
621 return 0; 630 return 0;
622} 631}
623 632
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
837 846
838 sb->s_stack_depth = 0; 847 sb->s_stack_depth = 0;
839 if (ufs->config.upperdir) { 848 if (ufs->config.upperdir) {
840 /* FIXME: workdir is not needed for a R/O mount */
841 if (!ufs->config.workdir) { 849 if (!ufs->config.workdir) {
842 pr_err("overlayfs: missing 'workdir'\n"); 850 pr_err("overlayfs: missing 'workdir'\n");
843 goto out_free_config; 851 goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
847 if (err) 855 if (err)
848 goto out_free_config; 856 goto out_free_config;
849 857
858 /* Upper fs should not be r/o */
859 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
860 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
861 err = -EINVAL;
862 goto out_put_upperpath;
863 }
864
850 err = ovl_mount_dir(ufs->config.workdir, &workpath); 865 err = ovl_mount_dir(ufs->config.workdir, &workpath);
851 if (err) 866 if (err)
852 goto out_put_upperpath; 867 goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
869 884
870 err = -EINVAL; 885 err = -EINVAL;
871 stacklen = ovl_split_lowerdirs(lowertmp); 886 stacklen = ovl_split_lowerdirs(lowertmp);
872 if (stacklen > OVL_MAX_STACK) 887 if (stacklen > OVL_MAX_STACK) {
888 pr_err("overlayfs: too many lower directries, limit is %d\n",
889 OVL_MAX_STACK);
873 goto out_free_lowertmp; 890 goto out_free_lowertmp;
891 } else if (!ufs->config.upperdir && stacklen == 1) {
892 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
893 goto out_free_lowertmp;
894 }
874 895
875 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 896 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
876 if (!stack) 897 if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
932 ufs->numlower++; 953 ufs->numlower++;
933 } 954 }
934 955
935 /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ 956 /* If the upper fs is nonexistent, we mark overlayfs r/o too */
936 if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) 957 if (!ufs->upper_mnt)
937 sb->s_flags |= MS_RDONLY; 958 sb->s_flags |= MS_RDONLY;
938 959
939 sb->s_d_op = &ovl_dentry_operations; 960 sb->s_d_op = &ovl_dentry_operations;
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e58e2a6..822da5b7cff0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23#include <linux/fcntl.h> 23#include <linux/fcntl.h>
24#include <linux/aio.h>
25 24
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
27#include <asm/ioctls.h> 26#include <asm/ioctls.h>
@@ -947,9 +946,7 @@ err:
947const struct file_operations pipefifo_fops = { 946const struct file_operations pipefifo_fops = {
948 .open = fifo_open, 947 .open = fifo_open,
949 .llseek = no_llseek, 948 .llseek = no_llseek,
950 .read = new_sync_read,
951 .read_iter = pipe_read, 949 .read_iter = pipe_read,
952 .write = new_sync_write,
953 .write_iter = pipe_write, 950 .write_iter = pipe_write,
954 .poll = pipe_poll, 951 .poll = pipe_poll,
955 .unlocked_ioctl = pipe_ioctl, 952 .unlocked_ioctl = pipe_ioctl,
diff --git a/fs/pnode.c b/fs/pnode.c
index 260ac8f898a4..6367e1e435c6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -362,6 +362,46 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
362} 362}
363 363
364/* 364/*
365 * Clear MNT_LOCKED when it can be shown to be safe.
366 *
367 * mount_lock lock must be held for write
368 */
369void propagate_mount_unlock(struct mount *mnt)
370{
371 struct mount *parent = mnt->mnt_parent;
372 struct mount *m, *child;
373
374 BUG_ON(parent == mnt);
375
376 for (m = propagation_next(parent, parent); m;
377 m = propagation_next(m, parent)) {
378 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
379 if (child)
380 child->mnt.mnt_flags &= ~MNT_LOCKED;
381 }
382}
383
384/*
385 * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
386 */
387static void mark_umount_candidates(struct mount *mnt)
388{
389 struct mount *parent = mnt->mnt_parent;
390 struct mount *m;
391
392 BUG_ON(parent == mnt);
393
394 for (m = propagation_next(parent, parent); m;
395 m = propagation_next(m, parent)) {
396 struct mount *child = __lookup_mnt_last(&m->mnt,
397 mnt->mnt_mountpoint);
398 if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
399 SET_MNT_MARK(child);
400 }
401 }
402}
403
404/*
365 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its 405 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
366 * parent propagates to. 406 * parent propagates to.
367 */ 407 */
@@ -378,13 +418,16 @@ static void __propagate_umount(struct mount *mnt)
378 struct mount *child = __lookup_mnt_last(&m->mnt, 418 struct mount *child = __lookup_mnt_last(&m->mnt,
379 mnt->mnt_mountpoint); 419 mnt->mnt_mountpoint);
380 /* 420 /*
381 * umount the child only if the child has no 421 * umount the child only if the child has no children
382 * other children 422 * and the child is marked safe to unmount.
383 */ 423 */
384 if (child && list_empty(&child->mnt_mounts)) { 424 if (!child || !IS_MNT_MARKED(child))
425 continue;
426 CLEAR_MNT_MARK(child);
427 if (list_empty(&child->mnt_mounts)) {
385 list_del_init(&child->mnt_child); 428 list_del_init(&child->mnt_child);
386 hlist_del_init_rcu(&child->mnt_hash); 429 child->mnt.mnt_flags |= MNT_UMOUNT;
387 hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); 430 list_move_tail(&child->mnt_list, &mnt->mnt_list);
388 } 431 }
389 } 432 }
390} 433}
@@ -396,11 +439,14 @@ static void __propagate_umount(struct mount *mnt)
396 * 439 *
397 * vfsmount lock must be held for write 440 * vfsmount lock must be held for write
398 */ 441 */
399int propagate_umount(struct hlist_head *list) 442int propagate_umount(struct list_head *list)
400{ 443{
401 struct mount *mnt; 444 struct mount *mnt;
402 445
403 hlist_for_each_entry(mnt, list, mnt_hash) 446 list_for_each_entry_reverse(mnt, list, mnt_list)
447 mark_umount_candidates(mnt);
448
449 list_for_each_entry(mnt, list, mnt_list)
404 __propagate_umount(mnt); 450 __propagate_umount(mnt);
405 return 0; 451 return 0;
406} 452}
diff --git a/fs/pnode.h b/fs/pnode.h
index 4a246358b031..7114ce6e6b9e 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -19,6 +19,9 @@
19#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) 19#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
20#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) 20#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
21#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) 21#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
22#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
23#define IS_MNT_LOCKED_AND_LAZY(m) \
24 (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED)
22 25
23#define CL_EXPIRE 0x01 26#define CL_EXPIRE 0x01
24#define CL_SLAVE 0x02 27#define CL_SLAVE 0x02
@@ -40,14 +43,14 @@ static inline void set_mnt_shared(struct mount *mnt)
40void change_mnt_propagation(struct mount *, int); 43void change_mnt_propagation(struct mount *, int);
41int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, 44int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
42 struct hlist_head *); 45 struct hlist_head *);
43int propagate_umount(struct hlist_head *); 46int propagate_umount(struct list_head *);
44int propagate_mount_busy(struct mount *, int); 47int propagate_mount_busy(struct mount *, int);
48void propagate_mount_unlock(struct mount *);
45void mnt_release_group_id(struct mount *); 49void mnt_release_group_id(struct mount *);
46int get_dominating_id(struct mount *mnt, const struct path *root); 50int get_dominating_id(struct mount *mnt, const struct path *root);
47unsigned int mnt_get_count(struct mount *mnt); 51unsigned int mnt_get_count(struct mount *mnt);
48void mnt_set_mountpoint(struct mount *, struct mountpoint *, 52void mnt_set_mountpoint(struct mount *, struct mountpoint *,
49 struct mount *); 53 struct mount *);
50void umount_tree(struct mount *, int);
51struct mount *copy_tree(struct mount *, struct dentry *, int); 54struct mount *copy_tree(struct mount *, struct dentry *, int);
52bool is_path_reachable(struct mount *, struct dentry *, 55bool is_path_reachable(struct mount *, struct dentry *,
53 const struct path *root); 56 const struct path *root);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1295a00ca316..fd02a9ebfc30 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -99,8 +99,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
99 buf = m->buf + m->count; 99 buf = m->buf + m->count;
100 100
101 /* Ignore error for now */ 101 /* Ignore error for now */
102 string_escape_str(tcomm, &buf, m->size - m->count, 102 buf += string_escape_str(tcomm, buf, m->size - m->count,
103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); 103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
104 104
105 m->count = buf - m->buf; 105 m->count = buf - m->buf;
106 seq_putc(m, '\n'); 106 seq_putc(m, '\n');
@@ -188,6 +188,24 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
188 from_kgid_munged(user_ns, GROUP_AT(group_info, g))); 188 from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
189 put_cred(cred); 189 put_cred(cred);
190 190
191#ifdef CONFIG_PID_NS
192 seq_puts(m, "\nNStgid:");
193 for (g = ns->level; g <= pid->level; g++)
194 seq_printf(m, "\t%d",
195 task_tgid_nr_ns(p, pid->numbers[g].ns));
196 seq_puts(m, "\nNSpid:");
197 for (g = ns->level; g <= pid->level; g++)
198 seq_printf(m, "\t%d",
199 task_pid_nr_ns(p, pid->numbers[g].ns));
200 seq_puts(m, "\nNSpgid:");
201 for (g = ns->level; g <= pid->level; g++)
202 seq_printf(m, "\t%d",
203 task_pgrp_nr_ns(p, pid->numbers[g].ns));
204 seq_puts(m, "\nNSsid:");
205 for (g = ns->level; g <= pid->level; g++)
206 seq_printf(m, "\t%d",
207 task_session_nr_ns(p, pid->numbers[g].ns));
208#endif
191 seq_putc(m, '\n'); 209 seq_putc(m, '\n');
192} 210}
193 211
@@ -614,7 +632,9 @@ static int children_seq_show(struct seq_file *seq, void *v)
614 pid_t pid; 632 pid_t pid;
615 633
616 pid = pid_nr_ns(v, inode->i_sb->s_fs_info); 634 pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
617 return seq_printf(seq, "%d ", pid); 635 seq_printf(seq, "%d ", pid);
636
637 return 0;
618} 638}
619 639
620static void *children_seq_start(struct seq_file *seq, loff_t *pos) 640static void *children_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3f3d7aeb0712..7a3b82f986dd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -238,13 +238,15 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
238 238
239 wchan = get_wchan(task); 239 wchan = get_wchan(task);
240 240
241 if (lookup_symbol_name(wchan, symname) < 0) 241 if (lookup_symbol_name(wchan, symname) < 0) {
242 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 242 if (!ptrace_may_access(task, PTRACE_MODE_READ))
243 return 0; 243 return 0;
244 else 244 seq_printf(m, "%lu", wchan);
245 return seq_printf(m, "%lu", wchan); 245 } else {
246 else 246 seq_printf(m, "%s", symname);
247 return seq_printf(m, "%s", symname); 247 }
248
249 return 0;
248} 250}
249#endif /* CONFIG_KALLSYMS */ 251#endif /* CONFIG_KALLSYMS */
250 252
@@ -309,10 +311,12 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
309static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 311static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
310 struct pid *pid, struct task_struct *task) 312 struct pid *pid, struct task_struct *task)
311{ 313{
312 return seq_printf(m, "%llu %llu %lu\n", 314 seq_printf(m, "%llu %llu %lu\n",
313 (unsigned long long)task->se.sum_exec_runtime, 315 (unsigned long long)task->se.sum_exec_runtime,
314 (unsigned long long)task->sched_info.run_delay, 316 (unsigned long long)task->sched_info.run_delay,
315 task->sched_info.pcount); 317 task->sched_info.pcount);
318
319 return 0;
316} 320}
317#endif 321#endif
318 322
@@ -387,7 +391,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
387 points = oom_badness(task, NULL, NULL, totalpages) * 391 points = oom_badness(task, NULL, NULL, totalpages) *
388 1000 / totalpages; 392 1000 / totalpages;
389 read_unlock(&tasklist_lock); 393 read_unlock(&tasklist_lock);
390 return seq_printf(m, "%lu\n", points); 394 seq_printf(m, "%lu\n", points);
395
396 return 0;
391} 397}
392 398
393struct limit_names { 399struct limit_names {
@@ -432,15 +438,15 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
432 * print the file header 438 * print the file header
433 */ 439 */
434 seq_printf(m, "%-25s %-20s %-20s %-10s\n", 440 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
435 "Limit", "Soft Limit", "Hard Limit", "Units"); 441 "Limit", "Soft Limit", "Hard Limit", "Units");
436 442
437 for (i = 0; i < RLIM_NLIMITS; i++) { 443 for (i = 0; i < RLIM_NLIMITS; i++) {
438 if (rlim[i].rlim_cur == RLIM_INFINITY) 444 if (rlim[i].rlim_cur == RLIM_INFINITY)
439 seq_printf(m, "%-25s %-20s ", 445 seq_printf(m, "%-25s %-20s ",
440 lnames[i].name, "unlimited"); 446 lnames[i].name, "unlimited");
441 else 447 else
442 seq_printf(m, "%-25s %-20lu ", 448 seq_printf(m, "%-25s %-20lu ",
443 lnames[i].name, rlim[i].rlim_cur); 449 lnames[i].name, rlim[i].rlim_cur);
444 450
445 if (rlim[i].rlim_max == RLIM_INFINITY) 451 if (rlim[i].rlim_max == RLIM_INFINITY)
446 seq_printf(m, "%-20s ", "unlimited"); 452 seq_printf(m, "%-20s ", "unlimited");
@@ -462,7 +468,9 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
462{ 468{
463 long nr; 469 long nr;
464 unsigned long args[6], sp, pc; 470 unsigned long args[6], sp, pc;
465 int res = lock_trace(task); 471 int res;
472
473 res = lock_trace(task);
466 if (res) 474 if (res)
467 return res; 475 return res;
468 476
@@ -477,7 +485,8 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
477 args[0], args[1], args[2], args[3], args[4], args[5], 485 args[0], args[1], args[2], args[3], args[4], args[5],
478 sp, pc); 486 sp, pc);
479 unlock_trace(task); 487 unlock_trace(task);
480 return res; 488
489 return 0;
481} 490}
482#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 491#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
483 492
@@ -2002,12 +2011,13 @@ static int show_timer(struct seq_file *m, void *v)
2002 notify = timer->it_sigev_notify; 2011 notify = timer->it_sigev_notify;
2003 2012
2004 seq_printf(m, "ID: %d\n", timer->it_id); 2013 seq_printf(m, "ID: %d\n", timer->it_id);
2005 seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, 2014 seq_printf(m, "signal: %d/%p\n",
2006 timer->sigq->info.si_value.sival_ptr); 2015 timer->sigq->info.si_signo,
2016 timer->sigq->info.si_value.sival_ptr);
2007 seq_printf(m, "notify: %s/%s.%d\n", 2017 seq_printf(m, "notify: %s/%s.%d\n",
2008 nstr[notify & ~SIGEV_THREAD_ID], 2018 nstr[notify & ~SIGEV_THREAD_ID],
2009 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2019 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2010 pid_nr_ns(timer->it_pid, tp->ns)); 2020 pid_nr_ns(timer->it_pid, tp->ns));
2011 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2021 seq_printf(m, "ClockID: %d\n", timer->it_clock);
2012 2022
2013 return 0; 2023 return 0;
@@ -2352,21 +2362,23 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
2352 2362
2353 unlock_task_sighand(task, &flags); 2363 unlock_task_sighand(task, &flags);
2354 } 2364 }
2355 result = seq_printf(m, 2365 seq_printf(m,
2356 "rchar: %llu\n" 2366 "rchar: %llu\n"
2357 "wchar: %llu\n" 2367 "wchar: %llu\n"
2358 "syscr: %llu\n" 2368 "syscr: %llu\n"
2359 "syscw: %llu\n" 2369 "syscw: %llu\n"
2360 "read_bytes: %llu\n" 2370 "read_bytes: %llu\n"
2361 "write_bytes: %llu\n" 2371 "write_bytes: %llu\n"
2362 "cancelled_write_bytes: %llu\n", 2372 "cancelled_write_bytes: %llu\n",
2363 (unsigned long long)acct.rchar, 2373 (unsigned long long)acct.rchar,
2364 (unsigned long long)acct.wchar, 2374 (unsigned long long)acct.wchar,
2365 (unsigned long long)acct.syscr, 2375 (unsigned long long)acct.syscr,
2366 (unsigned long long)acct.syscw, 2376 (unsigned long long)acct.syscw,
2367 (unsigned long long)acct.read_bytes, 2377 (unsigned long long)acct.read_bytes,
2368 (unsigned long long)acct.write_bytes, 2378 (unsigned long long)acct.write_bytes,
2369 (unsigned long long)acct.cancelled_write_bytes); 2379 (unsigned long long)acct.cancelled_write_bytes);
2380 result = 0;
2381
2370out_unlock: 2382out_unlock:
2371 mutex_unlock(&task->signal->cred_guard_mutex); 2383 mutex_unlock(&task->signal->cred_guard_mutex);
2372 return result; 2384 return result;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 8e5ad83b629a..af84ad04df77 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -8,6 +8,7 @@
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/fs.h>
11 12
12#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
13 14
@@ -48,17 +49,23 @@ static int seq_show(struct seq_file *m, void *v)
48 put_files_struct(files); 49 put_files_struct(files);
49 } 50 }
50 51
51 if (!ret) { 52 if (ret)
52 seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n", 53 return ret;
53 (long long)file->f_pos, f_flags,
54 real_mount(file->f_path.mnt)->mnt_id);
55 if (file->f_op->show_fdinfo)
56 file->f_op->show_fdinfo(m, file);
57 ret = seq_has_overflowed(m);
58 fput(file);
59 }
60 54
61 return ret; 55 seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
56 (long long)file->f_pos, f_flags,
57 real_mount(file->f_path.mnt)->mnt_id);
58
59 show_fd_locks(m, file, files);
60 if (seq_has_overflowed(m))
61 goto out;
62
63 if (file->f_op->show_fdinfo)
64 file->f_op->show_fdinfo(m, file);
65
66out:
67 fput(file);
68 return 0;
62} 69}
63 70
64static int seq_fdinfo_open(struct inode *inode, struct file *file) 71static int seq_fdinfo_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
1325 1325
1326static int pagemap_open(struct inode *inode, struct file *file) 1326static int pagemap_open(struct inode *inode, struct file *file)
1327{ 1327{
1328 /* do not disclose physical addresses: attack vector */
1329 if (!capable(CAP_SYS_ADMIN))
1330 return -EPERM;
1328 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " 1331 pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
1329 "to stop being page-shift some time soon. See the " 1332 "to stop being page-shift some time soon. See the "
1330 "linux/Documentation/vm/pagemap.txt for details.\n"); 1333 "linux/Documentation/vm/pagemap.txt for details.\n");
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b32ce53d24ee..56e1ffda4d89 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -364,6 +364,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
364 case PSTORE_TYPE_PMSG: 364 case PSTORE_TYPE_PMSG:
365 scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); 365 scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
366 break; 366 break;
367 case PSTORE_TYPE_PPC_OPAL:
368 sprintf(name, "powerpc-opal-%s-%lld", psname, id);
369 break;
367 case PSTORE_TYPE_UNKNOWN: 370 case PSTORE_TYPE_UNKNOWN:
368 scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); 371 scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
369 break; 372 break;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
539 mem_address = pdata->mem_address; 539 mem_address = pdata->mem_address;
540 record_size = pdata->record_size; 540 record_size = pdata->record_size;
541 dump_oops = pdata->dump_oops; 541 dump_oops = pdata->dump_oops;
542 ramoops_console_size = pdata->console_size;
543 ramoops_pmsg_size = pdata->pmsg_size;
544 ramoops_ftrace_size = pdata->ftrace_size;
542 545
543 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", 546 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
544 cxt->size, (unsigned long long)cxt->phys_addr, 547 cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0ccd4ba3a246..ecc25cf0ee6e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -900,14 +900,17 @@ static inline struct dquot **i_dquot(struct inode *inode)
900 900
901static int dqinit_needed(struct inode *inode, int type) 901static int dqinit_needed(struct inode *inode, int type)
902{ 902{
903 struct dquot * const *dquots;
903 int cnt; 904 int cnt;
904 905
905 if (IS_NOQUOTA(inode)) 906 if (IS_NOQUOTA(inode))
906 return 0; 907 return 0;
908
909 dquots = i_dquot(inode);
907 if (type != -1) 910 if (type != -1)
908 return !i_dquot(inode)[type]; 911 return !dquots[type];
909 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 912 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
910 if (!i_dquot(inode)[cnt]) 913 if (!dquots[cnt])
911 return 1; 914 return 1;
912 return 0; 915 return 0;
913} 916}
@@ -970,12 +973,13 @@ static void add_dquot_ref(struct super_block *sb, int type)
970static void remove_inode_dquot_ref(struct inode *inode, int type, 973static void remove_inode_dquot_ref(struct inode *inode, int type,
971 struct list_head *tofree_head) 974 struct list_head *tofree_head)
972{ 975{
973 struct dquot *dquot = i_dquot(inode)[type]; 976 struct dquot **dquots = i_dquot(inode);
977 struct dquot *dquot = dquots[type];
974 978
975 i_dquot(inode)[type] = NULL;
976 if (!dquot) 979 if (!dquot)
977 return; 980 return;
978 981
982 dquots[type] = NULL;
979 if (list_empty(&dquot->dq_free)) { 983 if (list_empty(&dquot->dq_free)) {
980 /* 984 /*
981 * The inode still has reference to dquot so it can't be in the 985 * The inode still has reference to dquot so it can't be in the
@@ -1159,8 +1163,8 @@ static int need_print_warning(struct dquot_warn *warn)
1159 return uid_eq(current_fsuid(), warn->w_dq_id.uid); 1163 return uid_eq(current_fsuid(), warn->w_dq_id.uid);
1160 case GRPQUOTA: 1164 case GRPQUOTA:
1161 return in_group_p(warn->w_dq_id.gid); 1165 return in_group_p(warn->w_dq_id.gid);
1162 case PRJQUOTA: /* Never taken... Just make gcc happy */ 1166 case PRJQUOTA:
1163 return 0; 1167 return 1;
1164 } 1168 }
1165 return 0; 1169 return 0;
1166} 1170}
@@ -1389,16 +1393,21 @@ static int dquot_active(const struct inode *inode)
1389static void __dquot_initialize(struct inode *inode, int type) 1393static void __dquot_initialize(struct inode *inode, int type)
1390{ 1394{
1391 int cnt, init_needed = 0; 1395 int cnt, init_needed = 0;
1392 struct dquot *got[MAXQUOTAS]; 1396 struct dquot **dquots, *got[MAXQUOTAS];
1393 struct super_block *sb = inode->i_sb; 1397 struct super_block *sb = inode->i_sb;
1394 qsize_t rsv; 1398 qsize_t rsv;
1395 1399
1396 if (!dquot_active(inode)) 1400 if (!dquot_active(inode))
1397 return; 1401 return;
1398 1402
1403 dquots = i_dquot(inode);
1404
1399 /* First get references to structures we might need. */ 1405 /* First get references to structures we might need. */
1400 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1406 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1401 struct kqid qid; 1407 struct kqid qid;
1408 kprojid_t projid;
1409 int rc;
1410
1402 got[cnt] = NULL; 1411 got[cnt] = NULL;
1403 if (type != -1 && cnt != type) 1412 if (type != -1 && cnt != type)
1404 continue; 1413 continue;
@@ -1407,8 +1416,12 @@ static void __dquot_initialize(struct inode *inode, int type)
1407 * we check it without locking here to avoid unnecessary 1416 * we check it without locking here to avoid unnecessary
1408 * dqget()/dqput() calls. 1417 * dqget()/dqput() calls.
1409 */ 1418 */
1410 if (i_dquot(inode)[cnt]) 1419 if (dquots[cnt])
1420 continue;
1421
1422 if (!sb_has_quota_active(sb, cnt))
1411 continue; 1423 continue;
1424
1412 init_needed = 1; 1425 init_needed = 1;
1413 1426
1414 switch (cnt) { 1427 switch (cnt) {
@@ -1418,6 +1431,12 @@ static void __dquot_initialize(struct inode *inode, int type)
1418 case GRPQUOTA: 1431 case GRPQUOTA:
1419 qid = make_kqid_gid(inode->i_gid); 1432 qid = make_kqid_gid(inode->i_gid);
1420 break; 1433 break;
1434 case PRJQUOTA:
1435 rc = inode->i_sb->dq_op->get_projid(inode, &projid);
1436 if (rc)
1437 continue;
1438 qid = make_kqid_projid(projid);
1439 break;
1421 } 1440 }
1422 got[cnt] = dqget(sb, qid); 1441 got[cnt] = dqget(sb, qid);
1423 } 1442 }
@@ -1438,8 +1457,8 @@ static void __dquot_initialize(struct inode *inode, int type)
1438 /* We could race with quotaon or dqget() could have failed */ 1457 /* We could race with quotaon or dqget() could have failed */
1439 if (!got[cnt]) 1458 if (!got[cnt])
1440 continue; 1459 continue;
1441 if (!i_dquot(inode)[cnt]) { 1460 if (!dquots[cnt]) {
1442 i_dquot(inode)[cnt] = got[cnt]; 1461 dquots[cnt] = got[cnt];
1443 got[cnt] = NULL; 1462 got[cnt] = NULL;
1444 /* 1463 /*
1445 * Make quota reservation system happy if someone 1464 * Make quota reservation system happy if someone
@@ -1447,7 +1466,7 @@ static void __dquot_initialize(struct inode *inode, int type)
1447 */ 1466 */
1448 rsv = inode_get_rsv_space(inode); 1467 rsv = inode_get_rsv_space(inode);
1449 if (unlikely(rsv)) 1468 if (unlikely(rsv))
1450 dquot_resv_space(i_dquot(inode)[cnt], rsv); 1469 dquot_resv_space(dquots[cnt], rsv);
1451 } 1470 }
1452 } 1471 }
1453out_err: 1472out_err:
@@ -1473,12 +1492,13 @@ EXPORT_SYMBOL(dquot_initialize);
1473static void __dquot_drop(struct inode *inode) 1492static void __dquot_drop(struct inode *inode)
1474{ 1493{
1475 int cnt; 1494 int cnt;
1495 struct dquot **dquots = i_dquot(inode);
1476 struct dquot *put[MAXQUOTAS]; 1496 struct dquot *put[MAXQUOTAS];
1477 1497
1478 spin_lock(&dq_data_lock); 1498 spin_lock(&dq_data_lock);
1479 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1499 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1480 put[cnt] = i_dquot(inode)[cnt]; 1500 put[cnt] = dquots[cnt];
1481 i_dquot(inode)[cnt] = NULL; 1501 dquots[cnt] = NULL;
1482 } 1502 }
1483 spin_unlock(&dq_data_lock); 1503 spin_unlock(&dq_data_lock);
1484 dqput_all(put); 1504 dqput_all(put);
@@ -1486,6 +1506,7 @@ static void __dquot_drop(struct inode *inode)
1486 1506
1487void dquot_drop(struct inode *inode) 1507void dquot_drop(struct inode *inode)
1488{ 1508{
1509 struct dquot * const *dquots;
1489 int cnt; 1510 int cnt;
1490 1511
1491 if (IS_NOQUOTA(inode)) 1512 if (IS_NOQUOTA(inode))
@@ -1498,8 +1519,9 @@ void dquot_drop(struct inode *inode)
1498 * must assure that nobody can come after the DQUOT_DROP and 1519 * must assure that nobody can come after the DQUOT_DROP and
1499 * add quota pointers back anyway. 1520 * add quota pointers back anyway.
1500 */ 1521 */
1522 dquots = i_dquot(inode);
1501 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1523 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1502 if (i_dquot(inode)[cnt]) 1524 if (dquots[cnt])
1503 break; 1525 break;
1504 } 1526 }
1505 1527
@@ -1600,8 +1622,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1600{ 1622{
1601 int cnt, ret = 0, index; 1623 int cnt, ret = 0, index;
1602 struct dquot_warn warn[MAXQUOTAS]; 1624 struct dquot_warn warn[MAXQUOTAS];
1603 struct dquot **dquots = i_dquot(inode);
1604 int reserve = flags & DQUOT_SPACE_RESERVE; 1625 int reserve = flags & DQUOT_SPACE_RESERVE;
1626 struct dquot **dquots;
1605 1627
1606 if (!dquot_active(inode)) { 1628 if (!dquot_active(inode)) {
1607 inode_incr_space(inode, number, reserve); 1629 inode_incr_space(inode, number, reserve);
@@ -1611,6 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1611 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1633 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1612 warn[cnt].w_type = QUOTA_NL_NOWARN; 1634 warn[cnt].w_type = QUOTA_NL_NOWARN;
1613 1635
1636 dquots = i_dquot(inode);
1614 index = srcu_read_lock(&dquot_srcu); 1637 index = srcu_read_lock(&dquot_srcu);
1615 spin_lock(&dq_data_lock); 1638 spin_lock(&dq_data_lock);
1616 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1639 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1652,13 +1675,14 @@ int dquot_alloc_inode(struct inode *inode)
1652{ 1675{
1653 int cnt, ret = 0, index; 1676 int cnt, ret = 0, index;
1654 struct dquot_warn warn[MAXQUOTAS]; 1677 struct dquot_warn warn[MAXQUOTAS];
1655 struct dquot * const *dquots = i_dquot(inode); 1678 struct dquot * const *dquots;
1656 1679
1657 if (!dquot_active(inode)) 1680 if (!dquot_active(inode))
1658 return 0; 1681 return 0;
1659 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1682 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1660 warn[cnt].w_type = QUOTA_NL_NOWARN; 1683 warn[cnt].w_type = QUOTA_NL_NOWARN;
1661 1684
1685 dquots = i_dquot(inode);
1662 index = srcu_read_lock(&dquot_srcu); 1686 index = srcu_read_lock(&dquot_srcu);
1663 spin_lock(&dq_data_lock); 1687 spin_lock(&dq_data_lock);
1664 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1688 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1690,6 +1714,7 @@ EXPORT_SYMBOL(dquot_alloc_inode);
1690 */ 1714 */
1691int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) 1715int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1692{ 1716{
1717 struct dquot **dquots;
1693 int cnt, index; 1718 int cnt, index;
1694 1719
1695 if (!dquot_active(inode)) { 1720 if (!dquot_active(inode)) {
@@ -1697,18 +1722,18 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1697 return 0; 1722 return 0;
1698 } 1723 }
1699 1724
1725 dquots = i_dquot(inode);
1700 index = srcu_read_lock(&dquot_srcu); 1726 index = srcu_read_lock(&dquot_srcu);
1701 spin_lock(&dq_data_lock); 1727 spin_lock(&dq_data_lock);
1702 /* Claim reserved quotas to allocated quotas */ 1728 /* Claim reserved quotas to allocated quotas */
1703 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1729 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1704 if (i_dquot(inode)[cnt]) 1730 if (dquots[cnt])
1705 dquot_claim_reserved_space(i_dquot(inode)[cnt], 1731 dquot_claim_reserved_space(dquots[cnt], number);
1706 number);
1707 } 1732 }
1708 /* Update inode bytes */ 1733 /* Update inode bytes */
1709 inode_claim_rsv_space(inode, number); 1734 inode_claim_rsv_space(inode, number);
1710 spin_unlock(&dq_data_lock); 1735 spin_unlock(&dq_data_lock);
1711 mark_all_dquot_dirty(i_dquot(inode)); 1736 mark_all_dquot_dirty(dquots);
1712 srcu_read_unlock(&dquot_srcu, index); 1737 srcu_read_unlock(&dquot_srcu, index);
1713 return 0; 1738 return 0;
1714} 1739}
@@ -1719,6 +1744,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
1719 */ 1744 */
1720void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) 1745void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1721{ 1746{
1747 struct dquot **dquots;
1722 int cnt, index; 1748 int cnt, index;
1723 1749
1724 if (!dquot_active(inode)) { 1750 if (!dquot_active(inode)) {
@@ -1726,18 +1752,18 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1726 return; 1752 return;
1727 } 1753 }
1728 1754
1755 dquots = i_dquot(inode);
1729 index = srcu_read_lock(&dquot_srcu); 1756 index = srcu_read_lock(&dquot_srcu);
1730 spin_lock(&dq_data_lock); 1757 spin_lock(&dq_data_lock);
1731 /* Claim reserved quotas to allocated quotas */ 1758 /* Claim reserved quotas to allocated quotas */
1732 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1759 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1733 if (i_dquot(inode)[cnt]) 1760 if (dquots[cnt])
1734 dquot_reclaim_reserved_space(i_dquot(inode)[cnt], 1761 dquot_reclaim_reserved_space(dquots[cnt], number);
1735 number);
1736 } 1762 }
1737 /* Update inode bytes */ 1763 /* Update inode bytes */
1738 inode_reclaim_rsv_space(inode, number); 1764 inode_reclaim_rsv_space(inode, number);
1739 spin_unlock(&dq_data_lock); 1765 spin_unlock(&dq_data_lock);
1740 mark_all_dquot_dirty(i_dquot(inode)); 1766 mark_all_dquot_dirty(dquots);
1741 srcu_read_unlock(&dquot_srcu, index); 1767 srcu_read_unlock(&dquot_srcu, index);
1742 return; 1768 return;
1743} 1769}
@@ -1750,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1750{ 1776{
1751 unsigned int cnt; 1777 unsigned int cnt;
1752 struct dquot_warn warn[MAXQUOTAS]; 1778 struct dquot_warn warn[MAXQUOTAS];
1753 struct dquot **dquots = i_dquot(inode); 1779 struct dquot **dquots;
1754 int reserve = flags & DQUOT_SPACE_RESERVE, index; 1780 int reserve = flags & DQUOT_SPACE_RESERVE, index;
1755 1781
1756 if (!dquot_active(inode)) { 1782 if (!dquot_active(inode)) {
@@ -1758,6 +1784,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1758 return; 1784 return;
1759 } 1785 }
1760 1786
1787 dquots = i_dquot(inode);
1761 index = srcu_read_lock(&dquot_srcu); 1788 index = srcu_read_lock(&dquot_srcu);
1762 spin_lock(&dq_data_lock); 1789 spin_lock(&dq_data_lock);
1763 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1790 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1793,12 +1820,13 @@ void dquot_free_inode(struct inode *inode)
1793{ 1820{
1794 unsigned int cnt; 1821 unsigned int cnt;
1795 struct dquot_warn warn[MAXQUOTAS]; 1822 struct dquot_warn warn[MAXQUOTAS];
1796 struct dquot * const *dquots = i_dquot(inode); 1823 struct dquot * const *dquots;
1797 int index; 1824 int index;
1798 1825
1799 if (!dquot_active(inode)) 1826 if (!dquot_active(inode))
1800 return; 1827 return;
1801 1828
1829 dquots = i_dquot(inode);
1802 index = srcu_read_lock(&dquot_srcu); 1830 index = srcu_read_lock(&dquot_srcu);
1803 spin_lock(&dq_data_lock); 1831 spin_lock(&dq_data_lock);
1804 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1832 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -2161,7 +2189,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2161 error = -EROFS; 2189 error = -EROFS;
2162 goto out_fmt; 2190 goto out_fmt;
2163 } 2191 }
2164 if (!sb->s_op->quota_write || !sb->s_op->quota_read) { 2192 if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
2193 (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
2165 error = -EINVAL; 2194 error = -EINVAL;
2166 goto out_fmt; 2195 goto out_fmt;
2167 } 2196 }
@@ -2614,55 +2643,73 @@ out:
2614EXPORT_SYMBOL(dquot_set_dqblk); 2643EXPORT_SYMBOL(dquot_set_dqblk);
2615 2644
2616/* Generic routine for getting common part of quota file information */ 2645/* Generic routine for getting common part of quota file information */
2617int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) 2646int dquot_get_state(struct super_block *sb, struct qc_state *state)
2618{ 2647{
2619 struct mem_dqinfo *mi; 2648 struct mem_dqinfo *mi;
2649 struct qc_type_state *tstate;
2650 struct quota_info *dqopt = sb_dqopt(sb);
2651 int type;
2620 2652
2621 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 2653 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
2622 if (!sb_has_quota_active(sb, type)) { 2654 memset(state, 0, sizeof(*state));
2623 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 2655 for (type = 0; type < MAXQUOTAS; type++) {
2624 return -ESRCH; 2656 if (!sb_has_quota_active(sb, type))
2657 continue;
2658 tstate = state->s_state + type;
2659 mi = sb_dqopt(sb)->info + type;
2660 tstate->flags = QCI_ACCT_ENABLED;
2661 spin_lock(&dq_data_lock);
2662 if (mi->dqi_flags & DQF_SYS_FILE)
2663 tstate->flags |= QCI_SYSFILE;
2664 if (mi->dqi_flags & DQF_ROOT_SQUASH)
2665 tstate->flags |= QCI_ROOT_SQUASH;
2666 if (sb_has_quota_limits_enabled(sb, type))
2667 tstate->flags |= QCI_LIMITS_ENFORCED;
2668 tstate->spc_timelimit = mi->dqi_bgrace;
2669 tstate->ino_timelimit = mi->dqi_igrace;
2670 tstate->ino = dqopt->files[type]->i_ino;
2671 tstate->blocks = dqopt->files[type]->i_blocks;
2672 tstate->nextents = 1; /* We don't know... */
2673 spin_unlock(&dq_data_lock);
2625 } 2674 }
2626 mi = sb_dqopt(sb)->info + type;
2627 spin_lock(&dq_data_lock);
2628 ii->dqi_bgrace = mi->dqi_bgrace;
2629 ii->dqi_igrace = mi->dqi_igrace;
2630 ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
2631 ii->dqi_valid = IIF_ALL;
2632 spin_unlock(&dq_data_lock);
2633 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 2675 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
2634 return 0; 2676 return 0;
2635} 2677}
2636EXPORT_SYMBOL(dquot_get_dqinfo); 2678EXPORT_SYMBOL(dquot_get_state);
2637 2679
2638/* Generic routine for setting common part of quota file information */ 2680/* Generic routine for setting common part of quota file information */
2639int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) 2681int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
2640{ 2682{
2641 struct mem_dqinfo *mi; 2683 struct mem_dqinfo *mi;
2642 int err = 0; 2684 int err = 0;
2643 2685
2686 if ((ii->i_fieldmask & QC_WARNS_MASK) ||
2687 (ii->i_fieldmask & QC_RT_SPC_TIMER))
2688 return -EINVAL;
2644 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 2689 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
2645 if (!sb_has_quota_active(sb, type)) { 2690 if (!sb_has_quota_active(sb, type)) {
2646 err = -ESRCH; 2691 err = -ESRCH;
2647 goto out; 2692 goto out;
2648 } 2693 }
2649 mi = sb_dqopt(sb)->info + type; 2694 mi = sb_dqopt(sb)->info + type;
2650 if (ii->dqi_valid & IIF_FLAGS) { 2695 if (ii->i_fieldmask & QC_FLAGS) {
2651 if (ii->dqi_flags & ~DQF_SETINFO_MASK || 2696 if ((ii->i_flags & QCI_ROOT_SQUASH &&
2652 (ii->dqi_flags & DQF_ROOT_SQUASH &&
2653 mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) { 2697 mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) {
2654 err = -EINVAL; 2698 err = -EINVAL;
2655 goto out; 2699 goto out;
2656 } 2700 }
2657 } 2701 }
2658 spin_lock(&dq_data_lock); 2702 spin_lock(&dq_data_lock);
2659 if (ii->dqi_valid & IIF_BGRACE) 2703 if (ii->i_fieldmask & QC_SPC_TIMER)
2660 mi->dqi_bgrace = ii->dqi_bgrace; 2704 mi->dqi_bgrace = ii->i_spc_timelimit;
2661 if (ii->dqi_valid & IIF_IGRACE) 2705 if (ii->i_fieldmask & QC_INO_TIMER)
2662 mi->dqi_igrace = ii->dqi_igrace; 2706 mi->dqi_igrace = ii->i_ino_timelimit;
2663 if (ii->dqi_valid & IIF_FLAGS) 2707 if (ii->i_fieldmask & QC_FLAGS) {
2664 mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) | 2708 if (ii->i_flags & QCI_ROOT_SQUASH)
2665 (ii->dqi_flags & DQF_SETINFO_MASK); 2709 mi->dqi_flags |= DQF_ROOT_SQUASH;
2710 else
2711 mi->dqi_flags &= ~DQF_ROOT_SQUASH;
2712 }
2666 spin_unlock(&dq_data_lock); 2713 spin_unlock(&dq_data_lock);
2667 mark_info_dirty(sb, type); 2714 mark_info_dirty(sb, type);
2668 /* Force write to disk */ 2715 /* Force write to disk */
@@ -2677,7 +2724,7 @@ const struct quotactl_ops dquot_quotactl_ops = {
2677 .quota_on = dquot_quota_on, 2724 .quota_on = dquot_quota_on,
2678 .quota_off = dquot_quota_off, 2725 .quota_off = dquot_quota_off,
2679 .quota_sync = dquot_quota_sync, 2726 .quota_sync = dquot_quota_sync,
2680 .get_info = dquot_get_dqinfo, 2727 .get_state = dquot_get_state,
2681 .set_info = dquot_set_dqinfo, 2728 .set_info = dquot_set_dqinfo,
2682 .get_dqblk = dquot_get_dqblk, 2729 .get_dqblk = dquot_get_dqblk,
2683 .set_dqblk = dquot_set_dqblk 2730 .set_dqblk = dquot_set_dqblk
@@ -2688,7 +2735,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
2688 .quota_enable = dquot_quota_enable, 2735 .quota_enable = dquot_quota_enable,
2689 .quota_disable = dquot_quota_disable, 2736 .quota_disable = dquot_quota_disable,
2690 .quota_sync = dquot_quota_sync, 2737 .quota_sync = dquot_quota_sync,
2691 .get_info = dquot_get_dqinfo, 2738 .get_state = dquot_get_state,
2692 .set_info = dquot_set_dqinfo, 2739 .set_info = dquot_set_dqinfo,
2693 .get_dqblk = dquot_get_dqblk, 2740 .get_dqblk = dquot_get_dqblk,
2694 .set_dqblk = dquot_set_dqblk 2741 .set_dqblk = dquot_set_dqblk
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d14a799c7785..86ded7375c21 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -118,13 +118,30 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
118 118
119static int quota_getinfo(struct super_block *sb, int type, void __user *addr) 119static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
120{ 120{
121 struct if_dqinfo info; 121 struct qc_state state;
122 struct qc_type_state *tstate;
123 struct if_dqinfo uinfo;
122 int ret; 124 int ret;
123 125
124 if (!sb->s_qcop->get_info) 126 /* This checks whether qc_state has enough entries... */
127 BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS);
128 if (!sb->s_qcop->get_state)
125 return -ENOSYS; 129 return -ENOSYS;
126 ret = sb->s_qcop->get_info(sb, type, &info); 130 ret = sb->s_qcop->get_state(sb, &state);
127 if (!ret && copy_to_user(addr, &info, sizeof(info))) 131 if (ret)
132 return ret;
133 tstate = state.s_state + type;
134 if (!(tstate->flags & QCI_ACCT_ENABLED))
135 return -ESRCH;
136 memset(&uinfo, 0, sizeof(uinfo));
137 uinfo.dqi_bgrace = tstate->spc_timelimit;
138 uinfo.dqi_igrace = tstate->ino_timelimit;
139 if (tstate->flags & QCI_SYSFILE)
140 uinfo.dqi_flags |= DQF_SYS_FILE;
141 if (tstate->flags & QCI_ROOT_SQUASH)
142 uinfo.dqi_flags |= DQF_ROOT_SQUASH;
143 uinfo.dqi_valid = IIF_ALL;
144 if (!ret && copy_to_user(addr, &uinfo, sizeof(uinfo)))
128 return -EFAULT; 145 return -EFAULT;
129 return ret; 146 return ret;
130} 147}
@@ -132,12 +149,31 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
132static int quota_setinfo(struct super_block *sb, int type, void __user *addr) 149static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
133{ 150{
134 struct if_dqinfo info; 151 struct if_dqinfo info;
152 struct qc_info qinfo;
135 153
136 if (copy_from_user(&info, addr, sizeof(info))) 154 if (copy_from_user(&info, addr, sizeof(info)))
137 return -EFAULT; 155 return -EFAULT;
138 if (!sb->s_qcop->set_info) 156 if (!sb->s_qcop->set_info)
139 return -ENOSYS; 157 return -ENOSYS;
140 return sb->s_qcop->set_info(sb, type, &info); 158 if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE))
159 return -EINVAL;
160 memset(&qinfo, 0, sizeof(qinfo));
161 if (info.dqi_valid & IIF_FLAGS) {
162 if (info.dqi_flags & ~DQF_SETINFO_MASK)
163 return -EINVAL;
164 if (info.dqi_flags & DQF_ROOT_SQUASH)
165 qinfo.i_flags |= QCI_ROOT_SQUASH;
166 qinfo.i_fieldmask |= QC_FLAGS;
167 }
168 if (info.dqi_valid & IIF_BGRACE) {
169 qinfo.i_spc_timelimit = info.dqi_bgrace;
170 qinfo.i_fieldmask |= QC_SPC_TIMER;
171 }
172 if (info.dqi_valid & IIF_IGRACE) {
173 qinfo.i_ino_timelimit = info.dqi_igrace;
174 qinfo.i_fieldmask |= QC_INO_TIMER;
175 }
176 return sb->s_qcop->set_info(sb, type, &qinfo);
141} 177}
142 178
143static inline qsize_t qbtos(qsize_t blocks) 179static inline qsize_t qbtos(qsize_t blocks)
@@ -252,25 +288,149 @@ static int quota_disable(struct super_block *sb, void __user *addr)
252 return sb->s_qcop->quota_disable(sb, flags); 288 return sb->s_qcop->quota_disable(sb, flags);
253} 289}
254 290
291static int quota_state_to_flags(struct qc_state *state)
292{
293 int flags = 0;
294
295 if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED)
296 flags |= FS_QUOTA_UDQ_ACCT;
297 if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED)
298 flags |= FS_QUOTA_UDQ_ENFD;
299 if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)
300 flags |= FS_QUOTA_GDQ_ACCT;
301 if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED)
302 flags |= FS_QUOTA_GDQ_ENFD;
303 if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED)
304 flags |= FS_QUOTA_PDQ_ACCT;
305 if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED)
306 flags |= FS_QUOTA_PDQ_ENFD;
307 return flags;
308}
309
310static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
311{
312 int type;
313 struct qc_state state;
314 int ret;
315
316 ret = sb->s_qcop->get_state(sb, &state);
317 if (ret < 0)
318 return ret;
319
320 memset(fqs, 0, sizeof(*fqs));
321 fqs->qs_version = FS_QSTAT_VERSION;
322 fqs->qs_flags = quota_state_to_flags(&state);
323 /* No quota enabled? */
324 if (!fqs->qs_flags)
325 return -ENOSYS;
326 fqs->qs_incoredqs = state.s_incoredqs;
327 /*
328 * GETXSTATE quotactl has space for just one set of time limits so
329 * report them for the first enabled quota type
330 */
331 for (type = 0; type < XQM_MAXQUOTAS; type++)
332 if (state.s_state[type].flags & QCI_ACCT_ENABLED)
333 break;
334 BUG_ON(type == XQM_MAXQUOTAS);
335 fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
336 fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
337 fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
338 fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
339 fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
340 if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
341 fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
342 fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
343 fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
344 }
345 if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
346 fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
347 fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
348 fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
349 }
350 if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
351 /*
352 * Q_XGETQSTAT doesn't have room for both group and project
353 * quotas. So, allow the project quota values to be copied out
354 * only if there is no group quota information available.
355 */
356 if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) {
357 fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino;
358 fqs->qs_gquota.qfs_nblks =
359 state.s_state[PRJQUOTA].blocks;
360 fqs->qs_gquota.qfs_nextents =
361 state.s_state[PRJQUOTA].nextents;
362 }
363 }
364 return 0;
365}
366
255static int quota_getxstate(struct super_block *sb, void __user *addr) 367static int quota_getxstate(struct super_block *sb, void __user *addr)
256{ 368{
257 struct fs_quota_stat fqs; 369 struct fs_quota_stat fqs;
258 int ret; 370 int ret;
259 371
260 if (!sb->s_qcop->get_xstate) 372 if (!sb->s_qcop->get_state)
261 return -ENOSYS; 373 return -ENOSYS;
262 ret = sb->s_qcop->get_xstate(sb, &fqs); 374 ret = quota_getstate(sb, &fqs);
263 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) 375 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
264 return -EFAULT; 376 return -EFAULT;
265 return ret; 377 return ret;
266} 378}
267 379
380static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
381{
382 int type;
383 struct qc_state state;
384 int ret;
385
386 ret = sb->s_qcop->get_state(sb, &state);
387 if (ret < 0)
388 return ret;
389
390 memset(fqs, 0, sizeof(*fqs));
391 fqs->qs_version = FS_QSTAT_VERSION;
392 fqs->qs_flags = quota_state_to_flags(&state);
393 /* No quota enabled? */
394 if (!fqs->qs_flags)
395 return -ENOSYS;
396 fqs->qs_incoredqs = state.s_incoredqs;
397 /*
398 * GETXSTATV quotactl has space for just one set of time limits so
399 * report them for the first enabled quota type
400 */
401 for (type = 0; type < XQM_MAXQUOTAS; type++)
402 if (state.s_state[type].flags & QCI_ACCT_ENABLED)
403 break;
404 BUG_ON(type == XQM_MAXQUOTAS);
405 fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
406 fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
407 fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
408 fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
409 fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
410 if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
411 fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
412 fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
413 fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
414 }
415 if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
416 fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
417 fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
418 fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
419 }
420 if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
421 fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino;
422 fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks;
423 fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents;
424 }
425 return 0;
426}
427
268static int quota_getxstatev(struct super_block *sb, void __user *addr) 428static int quota_getxstatev(struct super_block *sb, void __user *addr)
269{ 429{
270 struct fs_quota_statv fqs; 430 struct fs_quota_statv fqs;
271 int ret; 431 int ret;
272 432
273 if (!sb->s_qcop->get_xstatev) 433 if (!sb->s_qcop->get_state)
274 return -ENOSYS; 434 return -ENOSYS;
275 435
276 memset(&fqs, 0, sizeof(fqs)); 436 memset(&fqs, 0, sizeof(fqs));
@@ -284,7 +444,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
284 default: 444 default:
285 return -EINVAL; 445 return -EINVAL;
286 } 446 }
287 ret = sb->s_qcop->get_xstatev(sb, &fqs); 447 ret = quota_getstatev(sb, &fqs);
288 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) 448 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
289 return -EFAULT; 449 return -EFAULT;
290 return ret; 450 return ret;
@@ -357,6 +517,30 @@ static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
357 dst->d_fieldmask |= QC_RT_SPACE; 517 dst->d_fieldmask |= QC_RT_SPACE;
358} 518}
359 519
520static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst,
521 struct fs_disk_quota *src)
522{
523 memset(dst, 0, sizeof(*dst));
524 dst->i_spc_timelimit = src->d_btimer;
525 dst->i_ino_timelimit = src->d_itimer;
526 dst->i_rt_spc_timelimit = src->d_rtbtimer;
527 dst->i_ino_warnlimit = src->d_iwarns;
528 dst->i_spc_warnlimit = src->d_bwarns;
529 dst->i_rt_spc_warnlimit = src->d_rtbwarns;
530 if (src->d_fieldmask & FS_DQ_BWARNS)
531 dst->i_fieldmask |= QC_SPC_WARNS;
532 if (src->d_fieldmask & FS_DQ_IWARNS)
533 dst->i_fieldmask |= QC_INO_WARNS;
534 if (src->d_fieldmask & FS_DQ_RTBWARNS)
535 dst->i_fieldmask |= QC_RT_SPC_WARNS;
536 if (src->d_fieldmask & FS_DQ_BTIMER)
537 dst->i_fieldmask |= QC_SPC_TIMER;
538 if (src->d_fieldmask & FS_DQ_ITIMER)
539 dst->i_fieldmask |= QC_INO_TIMER;
540 if (src->d_fieldmask & FS_DQ_RTBTIMER)
541 dst->i_fieldmask |= QC_RT_SPC_TIMER;
542}
543
360static int quota_setxquota(struct super_block *sb, int type, qid_t id, 544static int quota_setxquota(struct super_block *sb, int type, qid_t id,
361 void __user *addr) 545 void __user *addr)
362{ 546{
@@ -371,6 +555,21 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
371 qid = make_kqid(current_user_ns(), type, id); 555 qid = make_kqid(current_user_ns(), type, id);
372 if (!qid_valid(qid)) 556 if (!qid_valid(qid))
373 return -EINVAL; 557 return -EINVAL;
558 /* Are we actually setting timer / warning limits for all users? */
559 if (from_kqid(&init_user_ns, qid) == 0 &&
560 fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
561 struct qc_info qinfo;
562 int ret;
563
564 if (!sb->s_qcop->set_info)
565 return -EINVAL;
566 copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq);
567 ret = sb->s_qcop->set_info(sb, type, &qinfo);
568 if (ret)
569 return ret;
570 /* These are already done */
571 fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK);
572 }
374 copy_from_xfs_dqblk(&qdq, &fdq); 573 copy_from_xfs_dqblk(&qdq, &fdq);
375 return sb->s_qcop->set_dqblk(sb, qid, &qdq); 574 return sb->s_qcop->set_dqblk(sb, qid, &qdq);
376} 575}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index d65877fbe8f4..58efb83dec1c 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -349,6 +349,13 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
349 struct dquot *dquot) 349 struct dquot *dquot)
350{ 350{
351 int tmp = QT_TREEOFF; 351 int tmp = QT_TREEOFF;
352
353#ifdef __QUOTA_QT_PARANOIA
354 if (info->dqi_blocks <= QT_TREEOFF) {
355 quota_error(dquot->dq_sb, "Quota tree root isn't allocated!");
356 return -EIO;
357 }
358#endif
352 return do_insert_tree(info, dquot, &tmp, 0); 359 return do_insert_tree(info, dquot, &tmp, 0);
353} 360}
354 361
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 9cb10d7197f7..2aa012a68e90 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -117,12 +117,16 @@ static int v2_read_file_info(struct super_block *sb, int type)
117 qinfo = info->dqi_priv; 117 qinfo = info->dqi_priv;
118 if (version == 0) { 118 if (version == 0) {
119 /* limits are stored as unsigned 32-bit data */ 119 /* limits are stored as unsigned 32-bit data */
120 info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS; 120 info->dqi_max_spc_limit = 0xffffffffLL << QUOTABLOCK_BITS;
121 info->dqi_max_ino_limit = 0xffffffff; 121 info->dqi_max_ino_limit = 0xffffffff;
122 } else { 122 } else {
123 /* used space is stored as unsigned 64-bit value in bytes */ 123 /*
124 info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */ 124 * Used space is stored as unsigned 64-bit value in bytes but
125 info->dqi_max_ino_limit = 0xffffffffffffffffULL; 125 * quota core supports only signed 64-bit values so use that
126 * as a limit
127 */
128 info->dqi_max_spc_limit = 0x7fffffffffffffffLL; /* 2^63-1 */
129 info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
126 } 130 }
127 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 131 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
128 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 132 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index f1966b42c2fd..4e95430093d9 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -13,12 +13,14 @@
13 */ 13 */
14#define V2_INITQMAGICS {\ 14#define V2_INITQMAGICS {\
15 0xd9c01f11, /* USRQUOTA */\ 15 0xd9c01f11, /* USRQUOTA */\
16 0xd9c01927 /* GRPQUOTA */\ 16 0xd9c01927, /* GRPQUOTA */\
17 0xd9c03f14, /* PRJQUOTA */\
17} 18}
18 19
19#define V2_INITQVERSIONS {\ 20#define V2_INITQVERSIONS {\
20 1, /* USRQUOTA */\ 21 1, /* USRQUOTA */\
21 1 /* GRPQUOTA */\ 22 1, /* GRPQUOTA */\
23 1, /* PRJQUOTA */\
22} 24}
23 25
24/* First generic header */ 26/* First generic header */
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 4f56de822d2f..183a212694bf 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -31,9 +31,7 @@
31#include "internal.h" 31#include "internal.h"
32 32
33const struct file_operations ramfs_file_operations = { 33const struct file_operations ramfs_file_operations = {
34 .read = new_sync_read,
35 .read_iter = generic_file_read_iter, 34 .read_iter = generic_file_read_iter,
36 .write = new_sync_write,
37 .write_iter = generic_file_write_iter, 35 .write_iter = generic_file_write_iter,
38 .mmap = generic_file_mmap, 36 .mmap = generic_file_mmap,
39 .fsync = noop_fsync, 37 .fsync = noop_fsync,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index f6ab41b39612..0b38befa69f3 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -44,9 +44,7 @@ const struct file_operations ramfs_file_operations = {
44 .mmap_capabilities = ramfs_mmap_capabilities, 44 .mmap_capabilities = ramfs_mmap_capabilities,
45 .mmap = ramfs_nommu_mmap, 45 .mmap = ramfs_nommu_mmap,
46 .get_unmapped_area = ramfs_nommu_get_unmapped_area, 46 .get_unmapped_area = ramfs_nommu_get_unmapped_area,
47 .read = new_sync_read,
48 .read_iter = generic_file_read_iter, 47 .read_iter = generic_file_read_iter,
49 .write = new_sync_write,
50 .write_iter = generic_file_write_iter, 48 .write_iter = generic_file_write_iter,
51 .fsync = noop_fsync, 49 .fsync = noop_fsync,
52 .splice_read = generic_file_splice_read, 50 .splice_read = generic_file_splice_read,
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b68786d66..819ef3faf1bb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
9#include <linux/fcntl.h> 9#include <linux/fcntl.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/aio.h>
13#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
14#include <linux/security.h> 13#include <linux/security.h>
15#include <linux/export.h> 14#include <linux/export.h>
@@ -23,13 +22,10 @@
23#include <asm/unistd.h> 22#include <asm/unistd.h>
24 23
25typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 24typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
26typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
27 unsigned long, loff_t);
28typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); 25typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
29 26
30const struct file_operations generic_ro_fops = { 27const struct file_operations generic_ro_fops = {
31 .llseek = generic_file_llseek, 28 .llseek = generic_file_llseek,
32 .read = new_sync_read,
33 .read_iter = generic_file_read_iter, 29 .read_iter = generic_file_read_iter,
34 .mmap = generic_file_readonly_mmap, 30 .mmap = generic_file_readonly_mmap,
35 .splice_read = generic_file_splice_read, 31 .splice_read = generic_file_splice_read,
@@ -343,13 +339,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
343 339
344 init_sync_kiocb(&kiocb, file); 340 init_sync_kiocb(&kiocb, file);
345 kiocb.ki_pos = *ppos; 341 kiocb.ki_pos = *ppos;
346 kiocb.ki_nbytes = iov_iter_count(iter);
347 342
348 iter->type |= READ; 343 iter->type |= READ;
349 ret = file->f_op->read_iter(&kiocb, iter); 344 ret = file->f_op->read_iter(&kiocb, iter);
350 if (ret == -EIOCBQUEUED) 345 BUG_ON(ret == -EIOCBQUEUED);
351 ret = wait_on_sync_kiocb(&kiocb);
352
353 if (ret > 0) 346 if (ret > 0)
354 *ppos = kiocb.ki_pos; 347 *ppos = kiocb.ki_pos;
355 return ret; 348 return ret;
@@ -366,13 +359,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
366 359
367 init_sync_kiocb(&kiocb, file); 360 init_sync_kiocb(&kiocb, file);
368 kiocb.ki_pos = *ppos; 361 kiocb.ki_pos = *ppos;
369 kiocb.ki_nbytes = iov_iter_count(iter);
370 362
371 iter->type |= WRITE; 363 iter->type |= WRITE;
372 ret = file->f_op->write_iter(&kiocb, iter); 364 ret = file->f_op->write_iter(&kiocb, iter);
373 if (ret == -EIOCBQUEUED) 365 BUG_ON(ret == -EIOCBQUEUED);
374 ret = wait_on_sync_kiocb(&kiocb);
375
376 if (ret > 0) 366 if (ret > 0)
377 *ppos = kiocb.ki_pos; 367 *ppos = kiocb.ki_pos;
378 return ret; 368 return ret;
@@ -418,26 +408,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
418 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 408 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
419} 409}
420 410
421ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 411static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
422{
423 struct iovec iov = { .iov_base = buf, .iov_len = len };
424 struct kiocb kiocb;
425 ssize_t ret;
426
427 init_sync_kiocb(&kiocb, filp);
428 kiocb.ki_pos = *ppos;
429 kiocb.ki_nbytes = len;
430
431 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
432 if (-EIOCBQUEUED == ret)
433 ret = wait_on_sync_kiocb(&kiocb);
434 *ppos = kiocb.ki_pos;
435 return ret;
436}
437
438EXPORT_SYMBOL(do_sync_read);
439
440ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
441{ 412{
442 struct iovec iov = { .iov_base = buf, .iov_len = len }; 413 struct iovec iov = { .iov_base = buf, .iov_len = len };
443 struct kiocb kiocb; 414 struct kiocb kiocb;
@@ -446,34 +417,25 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
446 417
447 init_sync_kiocb(&kiocb, filp); 418 init_sync_kiocb(&kiocb, filp);
448 kiocb.ki_pos = *ppos; 419 kiocb.ki_pos = *ppos;
449 kiocb.ki_nbytes = len;
450 iov_iter_init(&iter, READ, &iov, 1, len); 420 iov_iter_init(&iter, READ, &iov, 1, len);
451 421
452 ret = filp->f_op->read_iter(&kiocb, &iter); 422 ret = filp->f_op->read_iter(&kiocb, &iter);
453 if (-EIOCBQUEUED == ret) 423 BUG_ON(ret == -EIOCBQUEUED);
454 ret = wait_on_sync_kiocb(&kiocb);
455 *ppos = kiocb.ki_pos; 424 *ppos = kiocb.ki_pos;
456 return ret; 425 return ret;
457} 426}
458 427
459EXPORT_SYMBOL(new_sync_read);
460
461ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, 428ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
462 loff_t *pos) 429 loff_t *pos)
463{ 430{
464 ssize_t ret;
465
466 if (file->f_op->read) 431 if (file->f_op->read)
467 ret = file->f_op->read(file, buf, count, pos); 432 return file->f_op->read(file, buf, count, pos);
468 else if (file->f_op->aio_read)
469 ret = do_sync_read(file, buf, count, pos);
470 else if (file->f_op->read_iter) 433 else if (file->f_op->read_iter)
471 ret = new_sync_read(file, buf, count, pos); 434 return new_sync_read(file, buf, count, pos);
472 else 435 else
473 ret = -EINVAL; 436 return -EINVAL;
474
475 return ret;
476} 437}
438EXPORT_SYMBOL(__vfs_read);
477 439
478ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 440ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
479{ 441{
@@ -502,26 +464,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
502 464
503EXPORT_SYMBOL(vfs_read); 465EXPORT_SYMBOL(vfs_read);
504 466
505ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 467static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
506{
507 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
508 struct kiocb kiocb;
509 ssize_t ret;
510
511 init_sync_kiocb(&kiocb, filp);
512 kiocb.ki_pos = *ppos;
513 kiocb.ki_nbytes = len;
514
515 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
516 if (-EIOCBQUEUED == ret)
517 ret = wait_on_sync_kiocb(&kiocb);
518 *ppos = kiocb.ki_pos;
519 return ret;
520}
521
522EXPORT_SYMBOL(do_sync_write);
523
524ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
525{ 468{
526 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 469 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
527 struct kiocb kiocb; 470 struct kiocb kiocb;
@@ -530,17 +473,26 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
530 473
531 init_sync_kiocb(&kiocb, filp); 474 init_sync_kiocb(&kiocb, filp);
532 kiocb.ki_pos = *ppos; 475 kiocb.ki_pos = *ppos;
533 kiocb.ki_nbytes = len;
534 iov_iter_init(&iter, WRITE, &iov, 1, len); 476 iov_iter_init(&iter, WRITE, &iov, 1, len);
535 477
536 ret = filp->f_op->write_iter(&kiocb, &iter); 478 ret = filp->f_op->write_iter(&kiocb, &iter);
537 if (-EIOCBQUEUED == ret) 479 BUG_ON(ret == -EIOCBQUEUED);
538 ret = wait_on_sync_kiocb(&kiocb); 480 if (ret > 0)
539 *ppos = kiocb.ki_pos; 481 *ppos = kiocb.ki_pos;
540 return ret; 482 return ret;
541} 483}
542 484
543EXPORT_SYMBOL(new_sync_write); 485ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
486 loff_t *pos)
487{
488 if (file->f_op->write)
489 return file->f_op->write(file, p, count, pos);
490 else if (file->f_op->write_iter)
491 return new_sync_write(file, p, count, pos);
492 else
493 return -EINVAL;
494}
495EXPORT_SYMBOL(__vfs_write);
544 496
545ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 497ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
546{ 498{
@@ -556,12 +508,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
556 p = (__force const char __user *)buf; 508 p = (__force const char __user *)buf;
557 if (count > MAX_RW_COUNT) 509 if (count > MAX_RW_COUNT)
558 count = MAX_RW_COUNT; 510 count = MAX_RW_COUNT;
559 if (file->f_op->write) 511 ret = __vfs_write(file, p, count, pos);
560 ret = file->f_op->write(file, p, count, pos);
561 else if (file->f_op->aio_write)
562 ret = do_sync_write(file, p, count, pos);
563 else
564 ret = new_sync_write(file, p, count, pos);
565 set_fs(old_fs); 512 set_fs(old_fs);
566 if (ret > 0) { 513 if (ret > 0) {
567 fsnotify_modify(file); 514 fsnotify_modify(file);
@@ -588,12 +535,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
588 if (ret >= 0) { 535 if (ret >= 0) {
589 count = ret; 536 count = ret;
590 file_start_write(file); 537 file_start_write(file);
591 if (file->f_op->write) 538 ret = __vfs_write(file, buf, count, pos);
592 ret = file->f_op->write(file, buf, count, pos);
593 else if (file->f_op->aio_write)
594 ret = do_sync_write(file, buf, count, pos);
595 else
596 ret = new_sync_write(file, buf, count, pos);
597 if (ret > 0) { 539 if (ret > 0) {
598 fsnotify_modify(file); 540 fsnotify_modify(file);
599 add_wchar(current, ret); 541 add_wchar(current, ret);
@@ -710,60 +652,32 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
710} 652}
711EXPORT_SYMBOL(iov_shorten); 653EXPORT_SYMBOL(iov_shorten);
712 654
713static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, 655static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
714 unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) 656 loff_t *ppos, iter_fn_t fn)
715{
716 struct kiocb kiocb;
717 struct iov_iter iter;
718 ssize_t ret;
719
720 init_sync_kiocb(&kiocb, filp);
721 kiocb.ki_pos = *ppos;
722 kiocb.ki_nbytes = len;
723
724 iov_iter_init(&iter, rw, iov, nr_segs, len);
725 ret = fn(&kiocb, &iter);
726 if (ret == -EIOCBQUEUED)
727 ret = wait_on_sync_kiocb(&kiocb);
728 *ppos = kiocb.ki_pos;
729 return ret;
730}
731
732static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
733 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
734{ 657{
735 struct kiocb kiocb; 658 struct kiocb kiocb;
736 ssize_t ret; 659 ssize_t ret;
737 660
738 init_sync_kiocb(&kiocb, filp); 661 init_sync_kiocb(&kiocb, filp);
739 kiocb.ki_pos = *ppos; 662 kiocb.ki_pos = *ppos;
740 kiocb.ki_nbytes = len;
741 663
742 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 664 ret = fn(&kiocb, iter);
743 if (ret == -EIOCBQUEUED) 665 BUG_ON(ret == -EIOCBQUEUED);
744 ret = wait_on_sync_kiocb(&kiocb);
745 *ppos = kiocb.ki_pos; 666 *ppos = kiocb.ki_pos;
746 return ret; 667 return ret;
747} 668}
748 669
749/* Do it by hand, with file-ops */ 670/* Do it by hand, with file-ops */
750static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 671static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
751 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 672 loff_t *ppos, io_fn_t fn)
752{ 673{
753 struct iovec *vector = iov;
754 ssize_t ret = 0; 674 ssize_t ret = 0;
755 675
756 while (nr_segs > 0) { 676 while (iov_iter_count(iter)) {
757 void __user *base; 677 struct iovec iovec = iov_iter_iovec(iter);
758 size_t len;
759 ssize_t nr; 678 ssize_t nr;
760 679
761 base = vector->iov_base; 680 nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
762 len = vector->iov_len;
763 vector++;
764 nr_segs--;
765
766 nr = fn(filp, base, len, ppos);
767 681
768 if (nr < 0) { 682 if (nr < 0) {
769 if (!ret) 683 if (!ret)
@@ -771,8 +685,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
771 break; 685 break;
772 } 686 }
773 ret += nr; 687 ret += nr;
774 if (nr != len) 688 if (nr != iovec.iov_len)
775 break; 689 break;
690 iov_iter_advance(iter, nr);
776 } 691 }
777 692
778 return ret; 693 return ret;
@@ -863,48 +778,42 @@ static ssize_t do_readv_writev(int type, struct file *file,
863 size_t tot_len; 778 size_t tot_len;
864 struct iovec iovstack[UIO_FASTIOV]; 779 struct iovec iovstack[UIO_FASTIOV];
865 struct iovec *iov = iovstack; 780 struct iovec *iov = iovstack;
781 struct iov_iter iter;
866 ssize_t ret; 782 ssize_t ret;
867 io_fn_t fn; 783 io_fn_t fn;
868 iov_fn_t fnv;
869 iter_fn_t iter_fn; 784 iter_fn_t iter_fn;
870 785
871 ret = rw_copy_check_uvector(type, uvector, nr_segs, 786 ret = import_iovec(type, uvector, nr_segs,
872 ARRAY_SIZE(iovstack), iovstack, &iov); 787 ARRAY_SIZE(iovstack), &iov, &iter);
873 if (ret <= 0) 788 if (ret < 0)
874 goto out; 789 return ret;
875 790
876 tot_len = ret; 791 tot_len = iov_iter_count(&iter);
792 if (!tot_len)
793 goto out;
877 ret = rw_verify_area(type, file, pos, tot_len); 794 ret = rw_verify_area(type, file, pos, tot_len);
878 if (ret < 0) 795 if (ret < 0)
879 goto out; 796 goto out;
880 797
881 fnv = NULL;
882 if (type == READ) { 798 if (type == READ) {
883 fn = file->f_op->read; 799 fn = file->f_op->read;
884 fnv = file->f_op->aio_read;
885 iter_fn = file->f_op->read_iter; 800 iter_fn = file->f_op->read_iter;
886 } else { 801 } else {
887 fn = (io_fn_t)file->f_op->write; 802 fn = (io_fn_t)file->f_op->write;
888 fnv = file->f_op->aio_write;
889 iter_fn = file->f_op->write_iter; 803 iter_fn = file->f_op->write_iter;
890 file_start_write(file); 804 file_start_write(file);
891 } 805 }
892 806
893 if (iter_fn) 807 if (iter_fn)
894 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 808 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
895 pos, iter_fn);
896 else if (fnv)
897 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
898 pos, fnv);
899 else 809 else
900 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 810 ret = do_loop_readv_writev(file, &iter, pos, fn);
901 811
902 if (type != READ) 812 if (type != READ)
903 file_end_write(file); 813 file_end_write(file);
904 814
905out: 815out:
906 if (iov != iovstack) 816 kfree(iov);
907 kfree(iov);
908 if ((ret + (type == READ)) > 0) { 817 if ((ret + (type == READ)) > 0) {
909 if (type == READ) 818 if (type == READ)
910 fsnotify_access(file); 819 fsnotify_access(file);
@@ -1043,48 +952,42 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1043 compat_ssize_t tot_len; 952 compat_ssize_t tot_len;
1044 struct iovec iovstack[UIO_FASTIOV]; 953 struct iovec iovstack[UIO_FASTIOV];
1045 struct iovec *iov = iovstack; 954 struct iovec *iov = iovstack;
955 struct iov_iter iter;
1046 ssize_t ret; 956 ssize_t ret;
1047 io_fn_t fn; 957 io_fn_t fn;
1048 iov_fn_t fnv;
1049 iter_fn_t iter_fn; 958 iter_fn_t iter_fn;
1050 959
1051 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 960 ret = compat_import_iovec(type, uvector, nr_segs,
1052 UIO_FASTIOV, iovstack, &iov); 961 UIO_FASTIOV, &iov, &iter);
1053 if (ret <= 0) 962 if (ret < 0)
1054 goto out; 963 return ret;
1055 964
1056 tot_len = ret; 965 tot_len = iov_iter_count(&iter);
966 if (!tot_len)
967 goto out;
1057 ret = rw_verify_area(type, file, pos, tot_len); 968 ret = rw_verify_area(type, file, pos, tot_len);
1058 if (ret < 0) 969 if (ret < 0)
1059 goto out; 970 goto out;
1060 971
1061 fnv = NULL;
1062 if (type == READ) { 972 if (type == READ) {
1063 fn = file->f_op->read; 973 fn = file->f_op->read;
1064 fnv = file->f_op->aio_read;
1065 iter_fn = file->f_op->read_iter; 974 iter_fn = file->f_op->read_iter;
1066 } else { 975 } else {
1067 fn = (io_fn_t)file->f_op->write; 976 fn = (io_fn_t)file->f_op->write;
1068 fnv = file->f_op->aio_write;
1069 iter_fn = file->f_op->write_iter; 977 iter_fn = file->f_op->write_iter;
1070 file_start_write(file); 978 file_start_write(file);
1071 } 979 }
1072 980
1073 if (iter_fn) 981 if (iter_fn)
1074 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 982 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
1075 pos, iter_fn);
1076 else if (fnv)
1077 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
1078 pos, fnv);
1079 else 983 else
1080 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 984 ret = do_loop_readv_writev(file, &iter, pos, fn);
1081 985
1082 if (type != READ) 986 if (type != READ)
1083 file_end_write(file); 987 file_end_write(file);
1084 988
1085out: 989out:
1086 if (iov != iovstack) 990 kfree(iov);
1087 kfree(iov);
1088 if ((ret + (type == READ)) > 0) { 991 if ((ret + (type == READ)) > 0) {
1089 if (type == READ) 992 if (type == READ)
1090 fsnotify_access(file); 993 fsnotify_access(file);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 751dd3f4346b..96a1bcf33db4 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -243,8 +243,6 @@ drop_write_lock:
243} 243}
244 244
245const struct file_operations reiserfs_file_operations = { 245const struct file_operations reiserfs_file_operations = {
246 .read = new_sync_read,
247 .write = new_sync_write,
248 .unlocked_ioctl = reiserfs_ioctl, 246 .unlocked_ioctl = reiserfs_ioctl,
249#ifdef CONFIG_COMPAT 247#ifdef CONFIG_COMPAT
250 .compat_ioctl = reiserfs_compat_ioctl, 248 .compat_ioctl = reiserfs_compat_ioctl,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e1f995..742242b60972 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/aio.h> 21#include <linux/uio.h>
22 22
23int reiserfs_commit_write(struct file *f, struct page *page, 23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
@@ -3278,22 +3278,22 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3278 * We thank Mingming Cao for helping us understand in great detail what 3278 * We thank Mingming Cao for helping us understand in great detail what
3279 * to do in this section of the code. 3279 * to do in this section of the code.
3280 */ 3280 */
3281static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 3281static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3282 struct iov_iter *iter, loff_t offset) 3282 loff_t offset)
3283{ 3283{
3284 struct file *file = iocb->ki_filp; 3284 struct file *file = iocb->ki_filp;
3285 struct inode *inode = file->f_mapping->host; 3285 struct inode *inode = file->f_mapping->host;
3286 size_t count = iov_iter_count(iter); 3286 size_t count = iov_iter_count(iter);
3287 ssize_t ret; 3287 ssize_t ret;
3288 3288
3289 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 3289 ret = blockdev_direct_IO(iocb, inode, iter, offset,
3290 reiserfs_get_blocks_direct_io); 3290 reiserfs_get_blocks_direct_io);
3291 3291
3292 /* 3292 /*
3293 * In case of error extending write may have instantiated a few 3293 * In case of error extending write may have instantiated a few
3294 * blocks outside i_size. Trim these off again. 3294 * blocks outside i_size. Trim these off again.
3295 */ 3295 */
3296 if (unlikely((rw & WRITE) && ret < 0)) { 3296 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
3297 loff_t isize = i_size_read(inode); 3297 loff_t isize = i_size_read(inode);
3298 loff_t end = offset + count; 3298 loff_t end = offset + count;
3299 3299
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index bb79cddf0a1f..2adcde137c3f 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -910,7 +910,6 @@ do { \
910 if (!(cond)) \ 910 if (!(cond)) \
911 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ 911 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
912 __FILE__ ":%i:%s: " format "\n", \ 912 __FILE__ ":%i:%s: " format "\n", \
913 in_interrupt() ? -1 : task_pid_nr(current), \
914 __LINE__, __func__ , ##args); \ 913 __LINE__, __func__ , ##args); \
915} while (0) 914} while (0)
916 915
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 71fbbe3e2dab..68b5f182984e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -805,7 +805,7 @@ static const struct quotactl_ops reiserfs_qctl_operations = {
805 .quota_on = reiserfs_quota_on, 805 .quota_on = reiserfs_quota_on,
806 .quota_off = dquot_quota_off, 806 .quota_off = dquot_quota_off,
807 .quota_sync = dquot_quota_sync, 807 .quota_sync = dquot_quota_sync,
808 .get_info = dquot_get_dqinfo, 808 .get_state = dquot_get_state,
809 .set_info = dquot_set_dqinfo, 809 .set_info = dquot_set_dqinfo,
810 .get_dqblk = dquot_get_dqblk, 810 .get_dqblk = dquot_get_dqblk,
811 .set_dqblk = dquot_set_dqblk, 811 .set_dqblk = dquot_set_dqblk,
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index 7da9e2153953..1118a0dc6b45 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -81,7 +81,6 @@ static unsigned romfs_mmap_capabilities(struct file *file)
81 81
82const struct file_operations romfs_ro_fops = { 82const struct file_operations romfs_ro_fops = {
83 .llseek = generic_file_llseek, 83 .llseek = generic_file_llseek,
84 .read = new_sync_read,
85 .read_iter = generic_file_read_iter, 84 .read_iter = generic_file_read_iter,
86 .splice_read = generic_file_splice_read, 85 .splice_read = generic_file_splice_read,
87 .mmap = romfs_mmap, 86 .mmap = romfs_mmap,
diff --git a/fs/splice.c b/fs/splice.c
index 7968da96bebb..476024bb6546 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include "internal.h" 35#include "internal.h"
37 36
38/* 37/*
@@ -524,6 +523,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
524 loff_t isize, left; 523 loff_t isize, left;
525 int ret; 524 int ret;
526 525
526 if (IS_DAX(in->f_mapping->host))
527 return default_file_splice_read(in, ppos, pipe, len, flags);
528
527 isize = i_size_read(in->f_mapping->host); 529 isize = i_size_read(in->f_mapping->host);
528 if (unlikely(*ppos >= isize)) 530 if (unlikely(*ppos >= isize))
529 return 0; 531 return 0;
@@ -1534,34 +1536,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1534 struct iovec iovstack[UIO_FASTIOV]; 1536 struct iovec iovstack[UIO_FASTIOV];
1535 struct iovec *iov = iovstack; 1537 struct iovec *iov = iovstack;
1536 struct iov_iter iter; 1538 struct iov_iter iter;
1537 ssize_t count;
1538 1539
1539 pipe = get_pipe_info(file); 1540 pipe = get_pipe_info(file);
1540 if (!pipe) 1541 if (!pipe)
1541 return -EBADF; 1542 return -EBADF;
1542 1543
1543 ret = rw_copy_check_uvector(READ, uiov, nr_segs, 1544 ret = import_iovec(READ, uiov, nr_segs,
1544 ARRAY_SIZE(iovstack), iovstack, &iov); 1545 ARRAY_SIZE(iovstack), &iov, &iter);
1545 if (ret <= 0) 1546 if (ret < 0)
1546 goto out; 1547 return ret;
1547
1548 count = ret;
1549 iov_iter_init(&iter, READ, iov, nr_segs, count);
1550 1548
1549 sd.total_len = iov_iter_count(&iter);
1551 sd.len = 0; 1550 sd.len = 0;
1552 sd.total_len = count;
1553 sd.flags = flags; 1551 sd.flags = flags;
1554 sd.u.data = &iter; 1552 sd.u.data = &iter;
1555 sd.pos = 0; 1553 sd.pos = 0;
1556 1554
1557 pipe_lock(pipe); 1555 if (sd.total_len) {
1558 ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1556 pipe_lock(pipe);
1559 pipe_unlock(pipe); 1557 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
1560 1558 pipe_unlock(pipe);
1561out: 1559 }
1562 if (iov != iovstack)
1563 kfree(iov);
1564 1560
1561 kfree(iov);
1565 return ret; 1562 return ret;
1566} 1563}
1567 1564
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3cef9927..19636af5e75c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
66{ 66{
67 int retval; 67 int retval;
68 68
69 retval = security_inode_getattr(path->mnt, path->dentry); 69 retval = security_inode_getattr(path);
70 if (retval) 70 if (retval)
71 return retval; 71 return retval;
72 return vfs_getattr_nosec(path, stat); 72 return vfs_getattr_nosec(path, stat);
diff --git a/fs/super.c b/fs/super.c
index 2b7dc90ccdbb..928c20f47af9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
224 s->s_maxbytes = MAX_NON_LFS; 224 s->s_maxbytes = MAX_NON_LFS;
225 s->s_op = &default_op; 225 s->s_op = &default_op;
226 s->s_time_gran = 1000000000; 226 s->s_time_gran = 1000000000;
227 s->cleancache_poolid = -1; 227 s->cleancache_poolid = CLEANCACHE_NO_POOL;
228 228
229 s->s_shrink.seeks = DEFAULT_SEEKS; 229 s->s_shrink.seeks = DEFAULT_SEEKS;
230 s->s_shrink.scan_objects = super_cache_scan; 230 s->s_shrink.scan_objects = super_cache_scan;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2554d8835b48..b400c04371f0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
41 41
42 if (grp->attrs) { 42 if (grp->attrs) {
43 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { 43 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
44 umode_t mode = 0; 44 umode_t mode = (*attr)->mode;
45 45
46 /* 46 /*
47 * In update mode, we're changing the permissions or 47 * In update mode, we're changing the permissions or
@@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
55 if (!mode) 55 if (!mode)
56 continue; 56 continue;
57 } 57 }
58
59 WARN(mode & ~(SYSFS_PREALLOC | 0664),
60 "Attribute %s: Invalid permissions 0%o\n",
61 (*attr)->name, mode);
62
63 mode &= SYSFS_PREALLOC | 0664;
58 error = sysfs_add_file_mode_ns(parent, *attr, false, 64 error = sysfs_add_file_mode_ns(parent, *attr, false,
59 (*attr)->mode | mode, 65 mode, NULL);
60 NULL);
61 if (unlikely(error)) 66 if (unlikely(error))
62 break; 67 break;
63 } 68 }
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index b00811c75b24..a48e30410ad1 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -21,9 +21,7 @@
21 */ 21 */
22const struct file_operations sysv_file_operations = { 22const struct file_operations sysv_file_operations = {
23 .llseek = generic_file_llseek, 23 .llseek = generic_file_llseek,
24 .read = new_sync_read,
25 .read_iter = generic_file_read_iter, 24 .read_iter = generic_file_read_iter,
26 .write = new_sync_write,
27 .write_iter = generic_file_write_iter, 25 .write_iter = generic_file_write_iter,
28 .mmap = generic_file_mmap, 26 .mmap = generic_file_mmap,
29 .fsync = generic_file_fsync, 27 .fsync = generic_file_fsync,
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
1tracefs-objs := inode.o
2
3obj-$(CONFIG_TRACING) += tracefs.o
4
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
1/*
2 * inode.c - part of tracefs, a pseudo file system for activating tracing
3 *
4 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
5 *
6 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * tracefs is the file system that is used by the tracing infrastructure.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/kobject.h>
20#include <linux/namei.h>
21#include <linux/tracefs.h>
22#include <linux/fsnotify.h>
23#include <linux/seq_file.h>
24#include <linux/parser.h>
25#include <linux/magic.h>
26#include <linux/slab.h>
27
28#define TRACEFS_DEFAULT_MODE 0700
29
30static struct vfsmount *tracefs_mount;
31static int tracefs_mount_count;
32static bool tracefs_registered;
33
34static ssize_t default_read_file(struct file *file, char __user *buf,
35 size_t count, loff_t *ppos)
36{
37 return 0;
38}
39
40static ssize_t default_write_file(struct file *file, const char __user *buf,
41 size_t count, loff_t *ppos)
42{
43 return count;
44}
45
46static const struct file_operations tracefs_file_operations = {
47 .read = default_read_file,
48 .write = default_write_file,
49 .open = simple_open,
50 .llseek = noop_llseek,
51};
52
53static struct tracefs_dir_ops {
54 int (*mkdir)(const char *name);
55 int (*rmdir)(const char *name);
56} tracefs_ops;
57
58static char *get_dname(struct dentry *dentry)
59{
60 const char *dname;
61 char *name;
62 int len = dentry->d_name.len;
63
64 dname = dentry->d_name.name;
65 name = kmalloc(len + 1, GFP_KERNEL);
66 if (!name)
67 return NULL;
68 memcpy(name, dname, len);
69 name[len] = 0;
70 return name;
71}
72
73static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
74{
75 char *name;
76 int ret;
77
78 name = get_dname(dentry);
79 if (!name)
80 return -ENOMEM;
81
82 /*
83 * The mkdir call can call the generic functions that create
84 * the files within the tracefs system. It is up to the individual
85 * mkdir routine to handle races.
86 */
87 mutex_unlock(&inode->i_mutex);
88 ret = tracefs_ops.mkdir(name);
89 mutex_lock(&inode->i_mutex);
90
91 kfree(name);
92
93 return ret;
94}
95
96static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
97{
98 char *name;
99 int ret;
100
101 name = get_dname(dentry);
102 if (!name)
103 return -ENOMEM;
104
105 /*
106 * The rmdir call can call the generic functions that create
107 * the files within the tracefs system. It is up to the individual
108 * rmdir routine to handle races.
109 * This time we need to unlock not only the parent (inode) but
110 * also the directory that is being deleted.
111 */
112 mutex_unlock(&inode->i_mutex);
113 mutex_unlock(&dentry->d_inode->i_mutex);
114
115 ret = tracefs_ops.rmdir(name);
116
117 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
118 mutex_lock(&dentry->d_inode->i_mutex);
119
120 kfree(name);
121
122 return ret;
123}
124
125static const struct inode_operations tracefs_dir_inode_operations = {
126 .lookup = simple_lookup,
127 .mkdir = tracefs_syscall_mkdir,
128 .rmdir = tracefs_syscall_rmdir,
129};
130
131static struct inode *tracefs_get_inode(struct super_block *sb)
132{
133 struct inode *inode = new_inode(sb);
134 if (inode) {
135 inode->i_ino = get_next_ino();
136 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
137 }
138 return inode;
139}
140
141struct tracefs_mount_opts {
142 kuid_t uid;
143 kgid_t gid;
144 umode_t mode;
145};
146
147enum {
148 Opt_uid,
149 Opt_gid,
150 Opt_mode,
151 Opt_err
152};
153
154static const match_table_t tokens = {
155 {Opt_uid, "uid=%u"},
156 {Opt_gid, "gid=%u"},
157 {Opt_mode, "mode=%o"},
158 {Opt_err, NULL}
159};
160
161struct tracefs_fs_info {
162 struct tracefs_mount_opts mount_opts;
163};
164
165static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
166{
167 substring_t args[MAX_OPT_ARGS];
168 int option;
169 int token;
170 kuid_t uid;
171 kgid_t gid;
172 char *p;
173
174 opts->mode = TRACEFS_DEFAULT_MODE;
175
176 while ((p = strsep(&data, ",")) != NULL) {
177 if (!*p)
178 continue;
179
180 token = match_token(p, tokens, args);
181 switch (token) {
182 case Opt_uid:
183 if (match_int(&args[0], &option))
184 return -EINVAL;
185 uid = make_kuid(current_user_ns(), option);
186 if (!uid_valid(uid))
187 return -EINVAL;
188 opts->uid = uid;
189 break;
190 case Opt_gid:
191 if (match_int(&args[0], &option))
192 return -EINVAL;
193 gid = make_kgid(current_user_ns(), option);
194 if (!gid_valid(gid))
195 return -EINVAL;
196 opts->gid = gid;
197 break;
198 case Opt_mode:
199 if (match_octal(&args[0], &option))
200 return -EINVAL;
201 opts->mode = option & S_IALLUGO;
202 break;
203 /*
204 * We might like to report bad mount options here;
205 * but traditionally tracefs has ignored all mount options
206 */
207 }
208 }
209
210 return 0;
211}
212
213static int tracefs_apply_options(struct super_block *sb)
214{
215 struct tracefs_fs_info *fsi = sb->s_fs_info;
216 struct inode *inode = sb->s_root->d_inode;
217 struct tracefs_mount_opts *opts = &fsi->mount_opts;
218
219 inode->i_mode &= ~S_IALLUGO;
220 inode->i_mode |= opts->mode;
221
222 inode->i_uid = opts->uid;
223 inode->i_gid = opts->gid;
224
225 return 0;
226}
227
228static int tracefs_remount(struct super_block *sb, int *flags, char *data)
229{
230 int err;
231 struct tracefs_fs_info *fsi = sb->s_fs_info;
232
233 sync_filesystem(sb);
234 err = tracefs_parse_options(data, &fsi->mount_opts);
235 if (err)
236 goto fail;
237
238 tracefs_apply_options(sb);
239
240fail:
241 return err;
242}
243
244static int tracefs_show_options(struct seq_file *m, struct dentry *root)
245{
246 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
247 struct tracefs_mount_opts *opts = &fsi->mount_opts;
248
249 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
250 seq_printf(m, ",uid=%u",
251 from_kuid_munged(&init_user_ns, opts->uid));
252 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
253 seq_printf(m, ",gid=%u",
254 from_kgid_munged(&init_user_ns, opts->gid));
255 if (opts->mode != TRACEFS_DEFAULT_MODE)
256 seq_printf(m, ",mode=%o", opts->mode);
257
258 return 0;
259}
260
261static const struct super_operations tracefs_super_operations = {
262 .statfs = simple_statfs,
263 .remount_fs = tracefs_remount,
264 .show_options = tracefs_show_options,
265};
266
267static int trace_fill_super(struct super_block *sb, void *data, int silent)
268{
269 static struct tree_descr trace_files[] = {{""}};
270 struct tracefs_fs_info *fsi;
271 int err;
272
273 save_mount_options(sb, data);
274
275 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
276 sb->s_fs_info = fsi;
277 if (!fsi) {
278 err = -ENOMEM;
279 goto fail;
280 }
281
282 err = tracefs_parse_options(data, &fsi->mount_opts);
283 if (err)
284 goto fail;
285
286 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
287 if (err)
288 goto fail;
289
290 sb->s_op = &tracefs_super_operations;
291
292 tracefs_apply_options(sb);
293
294 return 0;
295
296fail:
297 kfree(fsi);
298 sb->s_fs_info = NULL;
299 return err;
300}
301
302static struct dentry *trace_mount(struct file_system_type *fs_type,
303 int flags, const char *dev_name,
304 void *data)
305{
306 return mount_single(fs_type, flags, data, trace_fill_super);
307}
308
309static struct file_system_type trace_fs_type = {
310 .owner = THIS_MODULE,
311 .name = "tracefs",
312 .mount = trace_mount,
313 .kill_sb = kill_litter_super,
314};
315MODULE_ALIAS_FS("tracefs");
316
317static struct dentry *start_creating(const char *name, struct dentry *parent)
318{
319 struct dentry *dentry;
320 int error;
321
322 pr_debug("tracefs: creating file '%s'\n",name);
323
324 error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
325 &tracefs_mount_count);
326 if (error)
327 return ERR_PTR(error);
328
329 /* If the parent is not specified, we create it in the root.
330 * We need the root dentry to do this, which is in the super
331 * block. A pointer to that is in the struct vfsmount that we
332 * have around.
333 */
334 if (!parent)
335 parent = tracefs_mount->mnt_root;
336
337 mutex_lock(&parent->d_inode->i_mutex);
338 dentry = lookup_one_len(name, parent, strlen(name));
339 if (!IS_ERR(dentry) && dentry->d_inode) {
340 dput(dentry);
341 dentry = ERR_PTR(-EEXIST);
342 }
343 if (IS_ERR(dentry))
344 mutex_unlock(&parent->d_inode->i_mutex);
345 return dentry;
346}
347
348static struct dentry *failed_creating(struct dentry *dentry)
349{
350 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
351 dput(dentry);
352 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
353 return NULL;
354}
355
356static struct dentry *end_creating(struct dentry *dentry)
357{
358 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
359 return dentry;
360}
361
362/**
363 * tracefs_create_file - create a file in the tracefs filesystem
364 * @name: a pointer to a string containing the name of the file to create.
365 * @mode: the permission that the file should have.
366 * @parent: a pointer to the parent dentry for this file. This should be a
367 * directory dentry if set. If this parameter is NULL, then the
368 * file will be created in the root of the tracefs filesystem.
369 * @data: a pointer to something that the caller will want to get to later
370 * on. The inode.i_private pointer will point to this value on
371 * the open() call.
372 * @fops: a pointer to a struct file_operations that should be used for
373 * this file.
374 *
375 * This is the basic "create a file" function for tracefs. It allows for a
376 * wide range of flexibility in creating a file, or a directory (if you want
377 * to create a directory, the tracefs_create_dir() function is
378 * recommended to be used instead.)
379 *
380 * This function will return a pointer to a dentry if it succeeds. This
381 * pointer must be passed to the tracefs_remove() function when the file is
382 * to be removed (no automatic cleanup happens if your module is unloaded,
383 * you are responsible here.) If an error occurs, %NULL will be returned.
384 *
385 * If tracefs is not enabled in the kernel, the value -%ENODEV will be
386 * returned.
387 */
388struct dentry *tracefs_create_file(const char *name, umode_t mode,
389 struct dentry *parent, void *data,
390 const struct file_operations *fops)
391{
392 struct dentry *dentry;
393 struct inode *inode;
394
395 if (!(mode & S_IFMT))
396 mode |= S_IFREG;
397 BUG_ON(!S_ISREG(mode));
398 dentry = start_creating(name, parent);
399
400 if (IS_ERR(dentry))
401 return NULL;
402
403 inode = tracefs_get_inode(dentry->d_sb);
404 if (unlikely(!inode))
405 return failed_creating(dentry);
406
407 inode->i_mode = mode;
408 inode->i_fop = fops ? fops : &tracefs_file_operations;
409 inode->i_private = data;
410 d_instantiate(dentry, inode);
411 fsnotify_create(dentry->d_parent->d_inode, dentry);
412 return end_creating(dentry);
413}
414
415static struct dentry *__create_dir(const char *name, struct dentry *parent,
416 const struct inode_operations *ops)
417{
418 struct dentry *dentry = start_creating(name, parent);
419 struct inode *inode;
420
421 if (IS_ERR(dentry))
422 return NULL;
423
424 inode = tracefs_get_inode(dentry->d_sb);
425 if (unlikely(!inode))
426 return failed_creating(dentry);
427
428 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
429 inode->i_op = ops;
430 inode->i_fop = &simple_dir_operations;
431
432 /* directory inodes start off with i_nlink == 2 (for "." entry) */
433 inc_nlink(inode);
434 d_instantiate(dentry, inode);
435 inc_nlink(dentry->d_parent->d_inode);
436 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
437 return end_creating(dentry);
438}
439
440/**
441 * tracefs_create_dir - create a directory in the tracefs filesystem
442 * @name: a pointer to a string containing the name of the directory to
443 * create.
444 * @parent: a pointer to the parent dentry for this file. This should be a
445 * directory dentry if set. If this parameter is NULL, then the
446 * directory will be created in the root of the tracefs filesystem.
447 *
448 * This function creates a directory in tracefs with the given name.
449 *
450 * This function will return a pointer to a dentry if it succeeds. This
451 * pointer must be passed to the tracefs_remove() function when the file is
452 * to be removed. If an error occurs, %NULL will be returned.
453 *
454 * If tracing is not enabled in the kernel, the value -%ENODEV will be
455 * returned.
456 */
457struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
458{
459 return __create_dir(name, parent, &simple_dir_inode_operations);
460}
461
462/**
463 * tracefs_create_instance_dir - create the tracing instances directory
464 * @name: The name of the instances directory to create
465 * @parent: The parent directory that the instances directory will exist
466 * @mkdir: The function to call when a mkdir is performed.
467 * @rmdir: The function to call when a rmdir is performed.
468 *
469 * Only one instances directory is allowed.
470 *
471 * The instances directory is special as it allows for mkdir and rmdir to
472 * to be done by userspace. When a mkdir or rmdir is performed, the inode
473 * locks are released and the methhods passed in (@mkdir and @rmdir) are
474 * called without locks and with the name of the directory being created
475 * within the instances directory.
476 *
477 * Returns the dentry of the instances directory.
478 */
479struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
480 int (*mkdir)(const char *name),
481 int (*rmdir)(const char *name))
482{
483 struct dentry *dentry;
484
485 /* Only allow one instance of the instances directory. */
486 if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
487 return NULL;
488
489 dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
490 if (!dentry)
491 return NULL;
492
493 tracefs_ops.mkdir = mkdir;
494 tracefs_ops.rmdir = rmdir;
495
496 return dentry;
497}
498
499static inline int tracefs_positive(struct dentry *dentry)
500{
501 return dentry->d_inode && !d_unhashed(dentry);
502}
503
504static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
505{
506 int ret = 0;
507
508 if (tracefs_positive(dentry)) {
509 if (dentry->d_inode) {
510 dget(dentry);
511 switch (dentry->d_inode->i_mode & S_IFMT) {
512 case S_IFDIR:
513 ret = simple_rmdir(parent->d_inode, dentry);
514 break;
515 default:
516 simple_unlink(parent->d_inode, dentry);
517 break;
518 }
519 if (!ret)
520 d_delete(dentry);
521 dput(dentry);
522 }
523 }
524 return ret;
525}
526
527/**
528 * tracefs_remove - removes a file or directory from the tracefs filesystem
529 * @dentry: a pointer to a the dentry of the file or directory to be
530 * removed.
531 *
532 * This function removes a file or directory in tracefs that was previously
533 * created with a call to another tracefs function (like
534 * tracefs_create_file() or variants thereof.)
535 */
536void tracefs_remove(struct dentry *dentry)
537{
538 struct dentry *parent;
539 int ret;
540
541 if (IS_ERR_OR_NULL(dentry))
542 return;
543
544 parent = dentry->d_parent;
545 if (!parent || !parent->d_inode)
546 return;
547
548 mutex_lock(&parent->d_inode->i_mutex);
549 ret = __tracefs_remove(dentry, parent);
550 mutex_unlock(&parent->d_inode->i_mutex);
551 if (!ret)
552 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
553}
554
555/**
556 * tracefs_remove_recursive - recursively removes a directory
557 * @dentry: a pointer to a the dentry of the directory to be removed.
558 *
559 * This function recursively removes a directory tree in tracefs that
560 * was previously created with a call to another tracefs function
561 * (like tracefs_create_file() or variants thereof.)
562 */
563void tracefs_remove_recursive(struct dentry *dentry)
564{
565 struct dentry *child, *parent;
566
567 if (IS_ERR_OR_NULL(dentry))
568 return;
569
570 parent = dentry->d_parent;
571 if (!parent || !parent->d_inode)
572 return;
573
574 parent = dentry;
575 down:
576 mutex_lock(&parent->d_inode->i_mutex);
577 loop:
578 /*
579 * The parent->d_subdirs is protected by the d_lock. Outside that
580 * lock, the child can be unlinked and set to be freed which can
581 * use the d_u.d_child as the rcu head and corrupt this list.
582 */
583 spin_lock(&parent->d_lock);
584 list_for_each_entry(child, &parent->d_subdirs, d_child) {
585 if (!tracefs_positive(child))
586 continue;
587
588 /* perhaps simple_empty(child) makes more sense */
589 if (!list_empty(&child->d_subdirs)) {
590 spin_unlock(&parent->d_lock);
591 mutex_unlock(&parent->d_inode->i_mutex);
592 parent = child;
593 goto down;
594 }
595
596 spin_unlock(&parent->d_lock);
597
598 if (!__tracefs_remove(child, parent))
599 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
600
601 /*
602 * The parent->d_lock protects agaist child from unlinking
603 * from d_subdirs. When releasing the parent->d_lock we can
604 * no longer trust that the next pointer is valid.
605 * Restart the loop. We'll skip this one with the
606 * tracefs_positive() check.
607 */
608 goto loop;
609 }
610 spin_unlock(&parent->d_lock);
611
612 mutex_unlock(&parent->d_inode->i_mutex);
613 child = parent;
614 parent = parent->d_parent;
615 mutex_lock(&parent->d_inode->i_mutex);
616
617 if (child != dentry)
618 /* go up */
619 goto loop;
620
621 if (!__tracefs_remove(child, parent))
622 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
623 mutex_unlock(&parent->d_inode->i_mutex);
624}
625
626/**
627 * tracefs_initialized - Tells whether tracefs has been registered
628 */
629bool tracefs_initialized(void)
630{
631 return tracefs_registered;
632}
633
634static struct kobject *trace_kobj;
635
636static int __init tracefs_init(void)
637{
638 int retval;
639
640 trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
641 if (!trace_kobj)
642 return -EINVAL;
643
644 retval = register_filesystem(&trace_fs_type);
645 if (!retval)
646 tracefs_registered = true;
647
648 return retval;
649}
650core_initcall(tracefs_init);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eb997e9c4ab0..11a11b32a2a9 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -509,7 +509,7 @@ again:
509 c->bi.nospace_rp = 1; 509 c->bi.nospace_rp = 1;
510 smp_wmb(); 510 smp_wmb();
511 } else 511 } else
512 ubifs_err("cannot budget space, error %d", err); 512 ubifs_err(c, "cannot budget space, error %d", err);
513 return err; 513 return err;
514} 514}
515 515
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 26b69b2d4a45..63f56619991d 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -225,7 +225,7 @@ out_cancel:
225out_up: 225out_up:
226 up_write(&c->commit_sem); 226 up_write(&c->commit_sem);
227out: 227out:
228 ubifs_err("commit failed, error %d", err); 228 ubifs_err(c, "commit failed, error %d", err);
229 spin_lock(&c->cs_lock); 229 spin_lock(&c->cs_lock);
230 c->cmt_state = COMMIT_BROKEN; 230 c->cmt_state = COMMIT_BROKEN;
231 wake_up(&c->cmt_wq); 231 wake_up(&c->cmt_wq);
@@ -289,7 +289,7 @@ int ubifs_bg_thread(void *info)
289 int err; 289 int err;
290 struct ubifs_info *c = info; 290 struct ubifs_info *c = info;
291 291
292 ubifs_msg("background thread \"%s\" started, PID %d", 292 ubifs_msg(c, "background thread \"%s\" started, PID %d",
293 c->bgt_name, current->pid); 293 c->bgt_name, current->pid);
294 set_freezable(); 294 set_freezable();
295 295
@@ -324,7 +324,7 @@ int ubifs_bg_thread(void *info)
324 cond_resched(); 324 cond_resched();
325 } 325 }
326 326
327 ubifs_msg("background thread \"%s\" stops", c->bgt_name); 327 ubifs_msg(c, "background thread \"%s\" stops", c->bgt_name);
328 return 0; 328 return 0;
329} 329}
330 330
@@ -712,13 +712,13 @@ out:
712 return 0; 712 return 0;
713 713
714out_dump: 714out_dump:
715 ubifs_err("dumping index node (iip=%d)", i->iip); 715 ubifs_err(c, "dumping index node (iip=%d)", i->iip);
716 ubifs_dump_node(c, idx); 716 ubifs_dump_node(c, idx);
717 list_del(&i->list); 717 list_del(&i->list);
718 kfree(i); 718 kfree(i);
719 if (!list_empty(&list)) { 719 if (!list_empty(&list)) {
720 i = list_entry(list.prev, struct idx_node, list); 720 i = list_entry(list.prev, struct idx_node, list);
721 ubifs_err("dumping parent index node"); 721 ubifs_err(c, "dumping parent index node");
722 ubifs_dump_node(c, &i->idx); 722 ubifs_dump_node(c, &i->idx);
723 } 723 }
724out_free: 724out_free:
@@ -727,7 +727,7 @@ out_free:
727 list_del(&i->list); 727 list_del(&i->list);
728 kfree(i); 728 kfree(i);
729 } 729 }
730 ubifs_err("failed, error %d", err); 730 ubifs_err(c, "failed, error %d", err);
731 if (err > 0) 731 if (err > 0)
732 err = -EINVAL; 732 err = -EINVAL;
733 return err; 733 return err;
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 2bfa0953335d..565cb56d7225 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -92,8 +92,8 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
92 * Note, if the input buffer was not compressed, it is copied to the output 92 * Note, if the input buffer was not compressed, it is copied to the output
93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. 93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
94 */ 94 */
95void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 95void ubifs_compress(const struct ubifs_info *c, const void *in_buf,
96 int *compr_type) 96 int in_len, void *out_buf, int *out_len, int *compr_type)
97{ 97{
98 int err; 98 int err;
99 struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; 99 struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
@@ -112,9 +112,9 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
112 if (compr->comp_mutex) 112 if (compr->comp_mutex)
113 mutex_unlock(compr->comp_mutex); 113 mutex_unlock(compr->comp_mutex);
114 if (unlikely(err)) { 114 if (unlikely(err)) {
115 ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", 115 ubifs_warn(c, "cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
116 in_len, compr->name, err); 116 in_len, compr->name, err);
117 goto no_compr; 117 goto no_compr;
118 } 118 }
119 119
120 /* 120 /*
@@ -144,21 +144,21 @@ no_compr:
144 * The length of the uncompressed data is returned in @out_len. This functions 144 * The length of the uncompressed data is returned in @out_len. This functions
145 * returns %0 on success or a negative error code on failure. 145 * returns %0 on success or a negative error code on failure.
146 */ 146 */
147int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, 147int ubifs_decompress(const struct ubifs_info *c, const void *in_buf,
148 int *out_len, int compr_type) 148 int in_len, void *out_buf, int *out_len, int compr_type)
149{ 149{
150 int err; 150 int err;
151 struct ubifs_compressor *compr; 151 struct ubifs_compressor *compr;
152 152
153 if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { 153 if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
154 ubifs_err("invalid compression type %d", compr_type); 154 ubifs_err(c, "invalid compression type %d", compr_type);
155 return -EINVAL; 155 return -EINVAL;
156 } 156 }
157 157
158 compr = ubifs_compressors[compr_type]; 158 compr = ubifs_compressors[compr_type];
159 159
160 if (unlikely(!compr->capi_name)) { 160 if (unlikely(!compr->capi_name)) {
161 ubifs_err("%s compression is not compiled in", compr->name); 161 ubifs_err(c, "%s compression is not compiled in", compr->name);
162 return -EINVAL; 162 return -EINVAL;
163 } 163 }
164 164
@@ -175,7 +175,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
175 if (compr->decomp_mutex) 175 if (compr->decomp_mutex)
176 mutex_unlock(compr->decomp_mutex); 176 mutex_unlock(compr->decomp_mutex);
177 if (err) 177 if (err)
178 ubifs_err("cannot decompress %d bytes, compressor %s, error %d", 178 ubifs_err(c, "cannot decompress %d bytes, compressor %s, error %d",
179 in_len, compr->name, err); 179 in_len, compr->name, err);
180 180
181 return err; 181 return err;
@@ -193,8 +193,8 @@ static int __init compr_init(struct ubifs_compressor *compr)
193 if (compr->capi_name) { 193 if (compr->capi_name) {
194 compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); 194 compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
195 if (IS_ERR(compr->cc)) { 195 if (IS_ERR(compr->cc)) {
196 ubifs_err("cannot initialize compressor %s, error %ld", 196 pr_err("UBIFS error (pid %d): cannot initialize compressor %s, error %ld",
197 compr->name, PTR_ERR(compr->cc)); 197 current->pid, compr->name, PTR_ERR(compr->cc));
198 return PTR_ERR(compr->cc); 198 return PTR_ERR(compr->cc);
199 } 199 }
200 } 200 }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4cfb3e82c56f..4c46a9865fa7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -746,7 +746,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
746 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { 746 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
747 err = ubifs_read_one_lp(c, lnum, &lp); 747 err = ubifs_read_one_lp(c, lnum, &lp);
748 if (err) { 748 if (err) {
749 ubifs_err("cannot read lprops for LEB %d", lnum); 749 ubifs_err(c, "cannot read lprops for LEB %d", lnum);
750 continue; 750 continue;
751 } 751 }
752 752
@@ -819,13 +819,13 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
819 819
820 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 820 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
821 if (!buf) { 821 if (!buf) {
822 ubifs_err("cannot allocate memory for dumping LEB %d", lnum); 822 ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum);
823 return; 823 return;
824 } 824 }
825 825
826 sleb = ubifs_scan(c, lnum, 0, buf, 0); 826 sleb = ubifs_scan(c, lnum, 0, buf, 0);
827 if (IS_ERR(sleb)) { 827 if (IS_ERR(sleb)) {
828 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 828 ubifs_err(c, "scan error %d", (int)PTR_ERR(sleb));
829 goto out; 829 goto out;
830 } 830 }
831 831
@@ -1032,7 +1032,7 @@ int dbg_check_space_info(struct ubifs_info *c)
1032 spin_unlock(&c->space_lock); 1032 spin_unlock(&c->space_lock);
1033 1033
1034 if (free != d->saved_free) { 1034 if (free != d->saved_free) {
1035 ubifs_err("free space changed from %lld to %lld", 1035 ubifs_err(c, "free space changed from %lld to %lld",
1036 d->saved_free, free); 1036 d->saved_free, free);
1037 goto out; 1037 goto out;
1038 } 1038 }
@@ -1040,15 +1040,15 @@ int dbg_check_space_info(struct ubifs_info *c)
1040 return 0; 1040 return 0;
1041 1041
1042out: 1042out:
1043 ubifs_msg("saved lprops statistics dump"); 1043 ubifs_msg(c, "saved lprops statistics dump");
1044 ubifs_dump_lstats(&d->saved_lst); 1044 ubifs_dump_lstats(&d->saved_lst);
1045 ubifs_msg("saved budgeting info dump"); 1045 ubifs_msg(c, "saved budgeting info dump");
1046 ubifs_dump_budg(c, &d->saved_bi); 1046 ubifs_dump_budg(c, &d->saved_bi);
1047 ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); 1047 ubifs_msg(c, "saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
1048 ubifs_msg("current lprops statistics dump"); 1048 ubifs_msg(c, "current lprops statistics dump");
1049 ubifs_get_lp_stats(c, &lst); 1049 ubifs_get_lp_stats(c, &lst);
1050 ubifs_dump_lstats(&lst); 1050 ubifs_dump_lstats(&lst);
1051 ubifs_msg("current budgeting info dump"); 1051 ubifs_msg(c, "current budgeting info dump");
1052 ubifs_dump_budg(c, &c->bi); 1052 ubifs_dump_budg(c, &c->bi);
1053 dump_stack(); 1053 dump_stack();
1054 return -EINVAL; 1054 return -EINVAL;
@@ -1077,9 +1077,9 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
1077 mutex_lock(&ui->ui_mutex); 1077 mutex_lock(&ui->ui_mutex);
1078 spin_lock(&ui->ui_lock); 1078 spin_lock(&ui->ui_lock);
1079 if (ui->ui_size != ui->synced_i_size && !ui->dirty) { 1079 if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
1080 ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", 1080 ubifs_err(c, "ui_size is %lld, synced_i_size is %lld, but inode is clean",
1081 ui->ui_size, ui->synced_i_size); 1081 ui->ui_size, ui->synced_i_size);
1082 ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, 1082 ubifs_err(c, "i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
1083 inode->i_mode, i_size_read(inode)); 1083 inode->i_mode, i_size_read(inode));
1084 dump_stack(); 1084 dump_stack();
1085 err = -EINVAL; 1085 err = -EINVAL;
@@ -1140,7 +1140,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
1140 kfree(pdent); 1140 kfree(pdent);
1141 1141
1142 if (i_size_read(dir) != size) { 1142 if (i_size_read(dir) != size) {
1143 ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", 1143 ubifs_err(c, "directory inode %lu has size %llu, but calculated size is %llu",
1144 dir->i_ino, (unsigned long long)i_size_read(dir), 1144 dir->i_ino, (unsigned long long)i_size_read(dir),
1145 (unsigned long long)size); 1145 (unsigned long long)size);
1146 ubifs_dump_inode(c, dir); 1146 ubifs_dump_inode(c, dir);
@@ -1148,7 +1148,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
1148 return -EINVAL; 1148 return -EINVAL;
1149 } 1149 }
1150 if (dir->i_nlink != nlink) { 1150 if (dir->i_nlink != nlink) {
1151 ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", 1151 ubifs_err(c, "directory inode %lu has nlink %u, but calculated nlink is %u",
1152 dir->i_ino, dir->i_nlink, nlink); 1152 dir->i_ino, dir->i_nlink, nlink);
1153 ubifs_dump_inode(c, dir); 1153 ubifs_dump_inode(c, dir);
1154 dump_stack(); 1154 dump_stack();
@@ -1207,10 +1207,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1207 err = 1; 1207 err = 1;
1208 key_read(c, &dent1->key, &key); 1208 key_read(c, &dent1->key, &key);
1209 if (keys_cmp(c, &zbr1->key, &key)) { 1209 if (keys_cmp(c, &zbr1->key, &key)) {
1210 ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum, 1210 ubifs_err(c, "1st entry at %d:%d has key %s", zbr1->lnum,
1211 zbr1->offs, dbg_snprintf_key(c, &key, key_buf, 1211 zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
1212 DBG_KEY_BUF_LEN)); 1212 DBG_KEY_BUF_LEN));
1213 ubifs_err("but it should have key %s according to tnc", 1213 ubifs_err(c, "but it should have key %s according to tnc",
1214 dbg_snprintf_key(c, &zbr1->key, key_buf, 1214 dbg_snprintf_key(c, &zbr1->key, key_buf,
1215 DBG_KEY_BUF_LEN)); 1215 DBG_KEY_BUF_LEN));
1216 ubifs_dump_node(c, dent1); 1216 ubifs_dump_node(c, dent1);
@@ -1219,10 +1219,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1219 1219
1220 key_read(c, &dent2->key, &key); 1220 key_read(c, &dent2->key, &key);
1221 if (keys_cmp(c, &zbr2->key, &key)) { 1221 if (keys_cmp(c, &zbr2->key, &key)) {
1222 ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum, 1222 ubifs_err(c, "2nd entry at %d:%d has key %s", zbr1->lnum,
1223 zbr1->offs, dbg_snprintf_key(c, &key, key_buf, 1223 zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
1224 DBG_KEY_BUF_LEN)); 1224 DBG_KEY_BUF_LEN));
1225 ubifs_err("but it should have key %s according to tnc", 1225 ubifs_err(c, "but it should have key %s according to tnc",
1226 dbg_snprintf_key(c, &zbr2->key, key_buf, 1226 dbg_snprintf_key(c, &zbr2->key, key_buf,
1227 DBG_KEY_BUF_LEN)); 1227 DBG_KEY_BUF_LEN));
1228 ubifs_dump_node(c, dent2); 1228 ubifs_dump_node(c, dent2);
@@ -1238,14 +1238,14 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1238 goto out_free; 1238 goto out_free;
1239 } 1239 }
1240 if (cmp == 0 && nlen1 == nlen2) 1240 if (cmp == 0 && nlen1 == nlen2)
1241 ubifs_err("2 xent/dent nodes with the same name"); 1241 ubifs_err(c, "2 xent/dent nodes with the same name");
1242 else 1242 else
1243 ubifs_err("bad order of colliding key %s", 1243 ubifs_err(c, "bad order of colliding key %s",
1244 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 1244 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
1245 1245
1246 ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); 1246 ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs);
1247 ubifs_dump_node(c, dent1); 1247 ubifs_dump_node(c, dent1);
1248 ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); 1248 ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs);
1249 ubifs_dump_node(c, dent2); 1249 ubifs_dump_node(c, dent2);
1250 1250
1251out_free: 1251out_free:
@@ -1447,11 +1447,11 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
1447 return 0; 1447 return 0;
1448 1448
1449out: 1449out:
1450 ubifs_err("failed, error %d", err); 1450 ubifs_err(c, "failed, error %d", err);
1451 ubifs_msg("dump of the znode"); 1451 ubifs_msg(c, "dump of the znode");
1452 ubifs_dump_znode(c, znode); 1452 ubifs_dump_znode(c, znode);
1453 if (zp) { 1453 if (zp) {
1454 ubifs_msg("dump of the parent znode"); 1454 ubifs_msg(c, "dump of the parent znode");
1455 ubifs_dump_znode(c, zp); 1455 ubifs_dump_znode(c, zp);
1456 } 1456 }
1457 dump_stack(); 1457 dump_stack();
@@ -1518,9 +1518,9 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
1518 if (err < 0) 1518 if (err < 0)
1519 return err; 1519 return err;
1520 if (err) { 1520 if (err) {
1521 ubifs_msg("first znode"); 1521 ubifs_msg(c, "first znode");
1522 ubifs_dump_znode(c, prev); 1522 ubifs_dump_znode(c, prev);
1523 ubifs_msg("second znode"); 1523 ubifs_msg(c, "second znode");
1524 ubifs_dump_znode(c, znode); 1524 ubifs_dump_znode(c, znode);
1525 return -EINVAL; 1525 return -EINVAL;
1526 } 1526 }
@@ -1529,13 +1529,13 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
1529 1529
1530 if (extra) { 1530 if (extra) {
1531 if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { 1531 if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) {
1532 ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", 1532 ubifs_err(c, "incorrect clean_zn_cnt %ld, calculated %ld",
1533 atomic_long_read(&c->clean_zn_cnt), 1533 atomic_long_read(&c->clean_zn_cnt),
1534 clean_cnt); 1534 clean_cnt);
1535 return -EINVAL; 1535 return -EINVAL;
1536 } 1536 }
1537 if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { 1537 if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) {
1538 ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", 1538 ubifs_err(c, "incorrect dirty_zn_cnt %ld, calculated %ld",
1539 atomic_long_read(&c->dirty_zn_cnt), 1539 atomic_long_read(&c->dirty_zn_cnt),
1540 dirty_cnt); 1540 dirty_cnt);
1541 return -EINVAL; 1541 return -EINVAL;
@@ -1608,7 +1608,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1608 if (znode_cb) { 1608 if (znode_cb) {
1609 err = znode_cb(c, znode, priv); 1609 err = znode_cb(c, znode, priv);
1610 if (err) { 1610 if (err) {
1611 ubifs_err("znode checking function returned error %d", 1611 ubifs_err(c, "znode checking function returned error %d",
1612 err); 1612 err);
1613 ubifs_dump_znode(c, znode); 1613 ubifs_dump_znode(c, znode);
1614 goto out_dump; 1614 goto out_dump;
@@ -1619,7 +1619,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1619 zbr = &znode->zbranch[idx]; 1619 zbr = &znode->zbranch[idx];
1620 err = leaf_cb(c, zbr, priv); 1620 err = leaf_cb(c, zbr, priv);
1621 if (err) { 1621 if (err) {
1622 ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", 1622 ubifs_err(c, "leaf checking function returned error %d, for leaf at LEB %d:%d",
1623 err, zbr->lnum, zbr->offs); 1623 err, zbr->lnum, zbr->offs);
1624 goto out_dump; 1624 goto out_dump;
1625 } 1625 }
@@ -1675,7 +1675,7 @@ out_dump:
1675 zbr = &znode->parent->zbranch[znode->iip]; 1675 zbr = &znode->parent->zbranch[znode->iip];
1676 else 1676 else
1677 zbr = &c->zroot; 1677 zbr = &c->zroot;
1678 ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); 1678 ubifs_msg(c, "dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
1679 ubifs_dump_znode(c, znode); 1679 ubifs_dump_znode(c, znode);
1680out_unlock: 1680out_unlock:
1681 mutex_unlock(&c->tnc_mutex); 1681 mutex_unlock(&c->tnc_mutex);
@@ -1722,12 +1722,12 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
1722 1722
1723 err = dbg_walk_index(c, NULL, add_size, &calc); 1723 err = dbg_walk_index(c, NULL, add_size, &calc);
1724 if (err) { 1724 if (err) {
1725 ubifs_err("error %d while walking the index", err); 1725 ubifs_err(c, "error %d while walking the index", err);
1726 return err; 1726 return err;
1727 } 1727 }
1728 1728
1729 if (calc != idx_size) { 1729 if (calc != idx_size) {
1730 ubifs_err("index size check failed: calculated size is %lld, should be %lld", 1730 ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld",
1731 calc, idx_size); 1731 calc, idx_size);
1732 dump_stack(); 1732 dump_stack();
1733 return -EINVAL; 1733 return -EINVAL;
@@ -1814,7 +1814,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1814 } 1814 }
1815 1815
1816 if (inum > c->highest_inum) { 1816 if (inum > c->highest_inum) {
1817 ubifs_err("too high inode number, max. is %lu", 1817 ubifs_err(c, "too high inode number, max. is %lu",
1818 (unsigned long)c->highest_inum); 1818 (unsigned long)c->highest_inum);
1819 return ERR_PTR(-EINVAL); 1819 return ERR_PTR(-EINVAL);
1820 } 1820 }
@@ -1921,17 +1921,17 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1921 ino_key_init(c, &key, inum); 1921 ino_key_init(c, &key, inum);
1922 err = ubifs_lookup_level0(c, &key, &znode, &n); 1922 err = ubifs_lookup_level0(c, &key, &znode, &n);
1923 if (!err) { 1923 if (!err) {
1924 ubifs_err("inode %lu not found in index", (unsigned long)inum); 1924 ubifs_err(c, "inode %lu not found in index", (unsigned long)inum);
1925 return ERR_PTR(-ENOENT); 1925 return ERR_PTR(-ENOENT);
1926 } else if (err < 0) { 1926 } else if (err < 0) {
1927 ubifs_err("error %d while looking up inode %lu", 1927 ubifs_err(c, "error %d while looking up inode %lu",
1928 err, (unsigned long)inum); 1928 err, (unsigned long)inum);
1929 return ERR_PTR(err); 1929 return ERR_PTR(err);
1930 } 1930 }
1931 1931
1932 zbr = &znode->zbranch[n]; 1932 zbr = &znode->zbranch[n];
1933 if (zbr->len < UBIFS_INO_NODE_SZ) { 1933 if (zbr->len < UBIFS_INO_NODE_SZ) {
1934 ubifs_err("bad node %lu node length %d", 1934 ubifs_err(c, "bad node %lu node length %d",
1935 (unsigned long)inum, zbr->len); 1935 (unsigned long)inum, zbr->len);
1936 return ERR_PTR(-EINVAL); 1936 return ERR_PTR(-EINVAL);
1937 } 1937 }
@@ -1942,7 +1942,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1942 1942
1943 err = ubifs_tnc_read_node(c, zbr, ino); 1943 err = ubifs_tnc_read_node(c, zbr, ino);
1944 if (err) { 1944 if (err) {
1945 ubifs_err("cannot read inode node at LEB %d:%d, error %d", 1945 ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d",
1946 zbr->lnum, zbr->offs, err); 1946 zbr->lnum, zbr->offs, err);
1947 kfree(ino); 1947 kfree(ino);
1948 return ERR_PTR(err); 1948 return ERR_PTR(err);
@@ -1951,7 +1951,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1951 fscki = add_inode(c, fsckd, ino); 1951 fscki = add_inode(c, fsckd, ino);
1952 kfree(ino); 1952 kfree(ino);
1953 if (IS_ERR(fscki)) { 1953 if (IS_ERR(fscki)) {
1954 ubifs_err("error %ld while adding inode %lu node", 1954 ubifs_err(c, "error %ld while adding inode %lu node",
1955 PTR_ERR(fscki), (unsigned long)inum); 1955 PTR_ERR(fscki), (unsigned long)inum);
1956 return fscki; 1956 return fscki;
1957 } 1957 }
@@ -1985,7 +1985,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1985 struct fsck_inode *fscki; 1985 struct fsck_inode *fscki;
1986 1986
1987 if (zbr->len < UBIFS_CH_SZ) { 1987 if (zbr->len < UBIFS_CH_SZ) {
1988 ubifs_err("bad leaf length %d (LEB %d:%d)", 1988 ubifs_err(c, "bad leaf length %d (LEB %d:%d)",
1989 zbr->len, zbr->lnum, zbr->offs); 1989 zbr->len, zbr->lnum, zbr->offs);
1990 return -EINVAL; 1990 return -EINVAL;
1991 } 1991 }
@@ -1996,7 +1996,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1996 1996
1997 err = ubifs_tnc_read_node(c, zbr, node); 1997 err = ubifs_tnc_read_node(c, zbr, node);
1998 if (err) { 1998 if (err) {
1999 ubifs_err("cannot read leaf node at LEB %d:%d, error %d", 1999 ubifs_err(c, "cannot read leaf node at LEB %d:%d, error %d",
2000 zbr->lnum, zbr->offs, err); 2000 zbr->lnum, zbr->offs, err);
2001 goto out_free; 2001 goto out_free;
2002 } 2002 }
@@ -2006,7 +2006,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2006 fscki = add_inode(c, priv, node); 2006 fscki = add_inode(c, priv, node);
2007 if (IS_ERR(fscki)) { 2007 if (IS_ERR(fscki)) {
2008 err = PTR_ERR(fscki); 2008 err = PTR_ERR(fscki);
2009 ubifs_err("error %d while adding inode node", err); 2009 ubifs_err(c, "error %d while adding inode node", err);
2010 goto out_dump; 2010 goto out_dump;
2011 } 2011 }
2012 goto out; 2012 goto out;
@@ -2014,7 +2014,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2014 2014
2015 if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && 2015 if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY &&
2016 type != UBIFS_DATA_KEY) { 2016 type != UBIFS_DATA_KEY) {
2017 ubifs_err("unexpected node type %d at LEB %d:%d", 2017 ubifs_err(c, "unexpected node type %d at LEB %d:%d",
2018 type, zbr->lnum, zbr->offs); 2018 type, zbr->lnum, zbr->offs);
2019 err = -EINVAL; 2019 err = -EINVAL;
2020 goto out_free; 2020 goto out_free;
@@ -2022,7 +2022,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2022 2022
2023 ch = node; 2023 ch = node;
2024 if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { 2024 if (le64_to_cpu(ch->sqnum) > c->max_sqnum) {
2025 ubifs_err("too high sequence number, max. is %llu", 2025 ubifs_err(c, "too high sequence number, max. is %llu",
2026 c->max_sqnum); 2026 c->max_sqnum);
2027 err = -EINVAL; 2027 err = -EINVAL;
2028 goto out_dump; 2028 goto out_dump;
@@ -2042,7 +2042,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2042 fscki = read_add_inode(c, priv, inum); 2042 fscki = read_add_inode(c, priv, inum);
2043 if (IS_ERR(fscki)) { 2043 if (IS_ERR(fscki)) {
2044 err = PTR_ERR(fscki); 2044 err = PTR_ERR(fscki);
2045 ubifs_err("error %d while processing data node and trying to find inode node %lu", 2045 ubifs_err(c, "error %d while processing data node and trying to find inode node %lu",
2046 err, (unsigned long)inum); 2046 err, (unsigned long)inum);
2047 goto out_dump; 2047 goto out_dump;
2048 } 2048 }
@@ -2052,7 +2052,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2052 blk_offs <<= UBIFS_BLOCK_SHIFT; 2052 blk_offs <<= UBIFS_BLOCK_SHIFT;
2053 blk_offs += le32_to_cpu(dn->size); 2053 blk_offs += le32_to_cpu(dn->size);
2054 if (blk_offs > fscki->size) { 2054 if (blk_offs > fscki->size) {
2055 ubifs_err("data node at LEB %d:%d is not within inode size %lld", 2055 ubifs_err(c, "data node at LEB %d:%d is not within inode size %lld",
2056 zbr->lnum, zbr->offs, fscki->size); 2056 zbr->lnum, zbr->offs, fscki->size);
2057 err = -EINVAL; 2057 err = -EINVAL;
2058 goto out_dump; 2058 goto out_dump;
@@ -2076,7 +2076,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2076 fscki = read_add_inode(c, priv, inum); 2076 fscki = read_add_inode(c, priv, inum);
2077 if (IS_ERR(fscki)) { 2077 if (IS_ERR(fscki)) {
2078 err = PTR_ERR(fscki); 2078 err = PTR_ERR(fscki);
2079 ubifs_err("error %d while processing entry node and trying to find inode node %lu", 2079 ubifs_err(c, "error %d while processing entry node and trying to find inode node %lu",
2080 err, (unsigned long)inum); 2080 err, (unsigned long)inum);
2081 goto out_dump; 2081 goto out_dump;
2082 } 2082 }
@@ -2088,7 +2088,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2088 fscki1 = read_add_inode(c, priv, inum); 2088 fscki1 = read_add_inode(c, priv, inum);
2089 if (IS_ERR(fscki1)) { 2089 if (IS_ERR(fscki1)) {
2090 err = PTR_ERR(fscki1); 2090 err = PTR_ERR(fscki1);
2091 ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", 2091 ubifs_err(c, "error %d while processing entry node and trying to find parent inode node %lu",
2092 err, (unsigned long)inum); 2092 err, (unsigned long)inum);
2093 goto out_dump; 2093 goto out_dump;
2094 } 2094 }
@@ -2111,7 +2111,7 @@ out:
2111 return 0; 2111 return 0;
2112 2112
2113out_dump: 2113out_dump:
2114 ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); 2114 ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
2115 ubifs_dump_node(c, node); 2115 ubifs_dump_node(c, node);
2116out_free: 2116out_free:
2117 kfree(node); 2117 kfree(node);
@@ -2162,52 +2162,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
2162 */ 2162 */
2163 if (fscki->inum != UBIFS_ROOT_INO && 2163 if (fscki->inum != UBIFS_ROOT_INO &&
2164 fscki->references != 1) { 2164 fscki->references != 1) {
2165 ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", 2165 ubifs_err(c, "directory inode %lu has %d direntries which refer it, but should be 1",
2166 (unsigned long)fscki->inum, 2166 (unsigned long)fscki->inum,
2167 fscki->references); 2167 fscki->references);
2168 goto out_dump; 2168 goto out_dump;
2169 } 2169 }
2170 if (fscki->inum == UBIFS_ROOT_INO && 2170 if (fscki->inum == UBIFS_ROOT_INO &&
2171 fscki->references != 0) { 2171 fscki->references != 0) {
2172 ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", 2172 ubifs_err(c, "root inode %lu has non-zero (%d) direntries which refer it",
2173 (unsigned long)fscki->inum, 2173 (unsigned long)fscki->inum,
2174 fscki->references); 2174 fscki->references);
2175 goto out_dump; 2175 goto out_dump;
2176 } 2176 }
2177 if (fscki->calc_sz != fscki->size) { 2177 if (fscki->calc_sz != fscki->size) {
2178 ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", 2178 ubifs_err(c, "directory inode %lu size is %lld, but calculated size is %lld",
2179 (unsigned long)fscki->inum, 2179 (unsigned long)fscki->inum,
2180 fscki->size, fscki->calc_sz); 2180 fscki->size, fscki->calc_sz);
2181 goto out_dump; 2181 goto out_dump;
2182 } 2182 }
2183 if (fscki->calc_cnt != fscki->nlink) { 2183 if (fscki->calc_cnt != fscki->nlink) {
2184 ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", 2184 ubifs_err(c, "directory inode %lu nlink is %d, but calculated nlink is %d",
2185 (unsigned long)fscki->inum, 2185 (unsigned long)fscki->inum,
2186 fscki->nlink, fscki->calc_cnt); 2186 fscki->nlink, fscki->calc_cnt);
2187 goto out_dump; 2187 goto out_dump;
2188 } 2188 }
2189 } else { 2189 } else {
2190 if (fscki->references != fscki->nlink) { 2190 if (fscki->references != fscki->nlink) {
2191 ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", 2191 ubifs_err(c, "inode %lu nlink is %d, but calculated nlink is %d",
2192 (unsigned long)fscki->inum, 2192 (unsigned long)fscki->inum,
2193 fscki->nlink, fscki->references); 2193 fscki->nlink, fscki->references);
2194 goto out_dump; 2194 goto out_dump;
2195 } 2195 }
2196 } 2196 }
2197 if (fscki->xattr_sz != fscki->calc_xsz) { 2197 if (fscki->xattr_sz != fscki->calc_xsz) {
2198 ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", 2198 ubifs_err(c, "inode %lu has xattr size %u, but calculated size is %lld",
2199 (unsigned long)fscki->inum, fscki->xattr_sz, 2199 (unsigned long)fscki->inum, fscki->xattr_sz,
2200 fscki->calc_xsz); 2200 fscki->calc_xsz);
2201 goto out_dump; 2201 goto out_dump;
2202 } 2202 }
2203 if (fscki->xattr_cnt != fscki->calc_xcnt) { 2203 if (fscki->xattr_cnt != fscki->calc_xcnt) {
2204 ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", 2204 ubifs_err(c, "inode %lu has %u xattrs, but calculated count is %lld",
2205 (unsigned long)fscki->inum, 2205 (unsigned long)fscki->inum,
2206 fscki->xattr_cnt, fscki->calc_xcnt); 2206 fscki->xattr_cnt, fscki->calc_xcnt);
2207 goto out_dump; 2207 goto out_dump;
2208 } 2208 }
2209 if (fscki->xattr_nms != fscki->calc_xnms) { 2209 if (fscki->xattr_nms != fscki->calc_xnms) {
2210 ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", 2210 ubifs_err(c, "inode %lu has xattr names' size %u, but calculated names' size is %lld",
2211 (unsigned long)fscki->inum, fscki->xattr_nms, 2211 (unsigned long)fscki->inum, fscki->xattr_nms,
2212 fscki->calc_xnms); 2212 fscki->calc_xnms);
2213 goto out_dump; 2213 goto out_dump;
@@ -2221,11 +2221,11 @@ out_dump:
2221 ino_key_init(c, &key, fscki->inum); 2221 ino_key_init(c, &key, fscki->inum);
2222 err = ubifs_lookup_level0(c, &key, &znode, &n); 2222 err = ubifs_lookup_level0(c, &key, &znode, &n);
2223 if (!err) { 2223 if (!err) {
2224 ubifs_err("inode %lu not found in index", 2224 ubifs_err(c, "inode %lu not found in index",
2225 (unsigned long)fscki->inum); 2225 (unsigned long)fscki->inum);
2226 return -ENOENT; 2226 return -ENOENT;
2227 } else if (err < 0) { 2227 } else if (err < 0) {
2228 ubifs_err("error %d while looking up inode %lu", 2228 ubifs_err(c, "error %d while looking up inode %lu",
2229 err, (unsigned long)fscki->inum); 2229 err, (unsigned long)fscki->inum);
2230 return err; 2230 return err;
2231 } 2231 }
@@ -2237,13 +2237,13 @@ out_dump:
2237 2237
2238 err = ubifs_tnc_read_node(c, zbr, ino); 2238 err = ubifs_tnc_read_node(c, zbr, ino);
2239 if (err) { 2239 if (err) {
2240 ubifs_err("cannot read inode node at LEB %d:%d, error %d", 2240 ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d",
2241 zbr->lnum, zbr->offs, err); 2241 zbr->lnum, zbr->offs, err);
2242 kfree(ino); 2242 kfree(ino);
2243 return err; 2243 return err;
2244 } 2244 }
2245 2245
2246 ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", 2246 ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d",
2247 (unsigned long)fscki->inum, zbr->lnum, zbr->offs); 2247 (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
2248 ubifs_dump_node(c, ino); 2248 ubifs_dump_node(c, ino);
2249 kfree(ino); 2249 kfree(ino);
@@ -2284,7 +2284,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
2284 return 0; 2284 return 0;
2285 2285
2286out_free: 2286out_free:
2287 ubifs_err("file-system check failed with error %d", err); 2287 ubifs_err(c, "file-system check failed with error %d", err);
2288 dump_stack(); 2288 dump_stack();
2289 free_inodes(&fsckd); 2289 free_inodes(&fsckd);
2290 return err; 2290 return err;
@@ -2315,12 +2315,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2315 sb = container_of(cur->next, struct ubifs_scan_node, list); 2315 sb = container_of(cur->next, struct ubifs_scan_node, list);
2316 2316
2317 if (sa->type != UBIFS_DATA_NODE) { 2317 if (sa->type != UBIFS_DATA_NODE) {
2318 ubifs_err("bad node type %d", sa->type); 2318 ubifs_err(c, "bad node type %d", sa->type);
2319 ubifs_dump_node(c, sa->node); 2319 ubifs_dump_node(c, sa->node);
2320 return -EINVAL; 2320 return -EINVAL;
2321 } 2321 }
2322 if (sb->type != UBIFS_DATA_NODE) { 2322 if (sb->type != UBIFS_DATA_NODE) {
2323 ubifs_err("bad node type %d", sb->type); 2323 ubifs_err(c, "bad node type %d", sb->type);
2324 ubifs_dump_node(c, sb->node); 2324 ubifs_dump_node(c, sb->node);
2325 return -EINVAL; 2325 return -EINVAL;
2326 } 2326 }
@@ -2331,7 +2331,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2331 if (inuma < inumb) 2331 if (inuma < inumb)
2332 continue; 2332 continue;
2333 if (inuma > inumb) { 2333 if (inuma > inumb) {
2334 ubifs_err("larger inum %lu goes before inum %lu", 2334 ubifs_err(c, "larger inum %lu goes before inum %lu",
2335 (unsigned long)inuma, (unsigned long)inumb); 2335 (unsigned long)inuma, (unsigned long)inumb);
2336 goto error_dump; 2336 goto error_dump;
2337 } 2337 }
@@ -2340,11 +2340,11 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2340 blkb = key_block(c, &sb->key); 2340 blkb = key_block(c, &sb->key);
2341 2341
2342 if (blka > blkb) { 2342 if (blka > blkb) {
2343 ubifs_err("larger block %u goes before %u", blka, blkb); 2343 ubifs_err(c, "larger block %u goes before %u", blka, blkb);
2344 goto error_dump; 2344 goto error_dump;
2345 } 2345 }
2346 if (blka == blkb) { 2346 if (blka == blkb) {
2347 ubifs_err("two data nodes for the same block"); 2347 ubifs_err(c, "two data nodes for the same block");
2348 goto error_dump; 2348 goto error_dump;
2349 } 2349 }
2350 } 2350 }
@@ -2383,19 +2383,19 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2383 2383
2384 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && 2384 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
2385 sa->type != UBIFS_XENT_NODE) { 2385 sa->type != UBIFS_XENT_NODE) {
2386 ubifs_err("bad node type %d", sa->type); 2386 ubifs_err(c, "bad node type %d", sa->type);
2387 ubifs_dump_node(c, sa->node); 2387 ubifs_dump_node(c, sa->node);
2388 return -EINVAL; 2388 return -EINVAL;
2389 } 2389 }
2390 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && 2390 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
2391 sa->type != UBIFS_XENT_NODE) { 2391 sa->type != UBIFS_XENT_NODE) {
2392 ubifs_err("bad node type %d", sb->type); 2392 ubifs_err(c, "bad node type %d", sb->type);
2393 ubifs_dump_node(c, sb->node); 2393 ubifs_dump_node(c, sb->node);
2394 return -EINVAL; 2394 return -EINVAL;
2395 } 2395 }
2396 2396
2397 if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { 2397 if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
2398 ubifs_err("non-inode node goes before inode node"); 2398 ubifs_err(c, "non-inode node goes before inode node");
2399 goto error_dump; 2399 goto error_dump;
2400 } 2400 }
2401 2401
@@ -2405,7 +2405,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2405 if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { 2405 if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
2406 /* Inode nodes are sorted in descending size order */ 2406 /* Inode nodes are sorted in descending size order */
2407 if (sa->len < sb->len) { 2407 if (sa->len < sb->len) {
2408 ubifs_err("smaller inode node goes first"); 2408 ubifs_err(c, "smaller inode node goes first");
2409 goto error_dump; 2409 goto error_dump;
2410 } 2410 }
2411 continue; 2411 continue;
@@ -2421,7 +2421,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2421 if (inuma < inumb) 2421 if (inuma < inumb)
2422 continue; 2422 continue;
2423 if (inuma > inumb) { 2423 if (inuma > inumb) {
2424 ubifs_err("larger inum %lu goes before inum %lu", 2424 ubifs_err(c, "larger inum %lu goes before inum %lu",
2425 (unsigned long)inuma, (unsigned long)inumb); 2425 (unsigned long)inuma, (unsigned long)inumb);
2426 goto error_dump; 2426 goto error_dump;
2427 } 2427 }
@@ -2430,7 +2430,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2430 hashb = key_block(c, &sb->key); 2430 hashb = key_block(c, &sb->key);
2431 2431
2432 if (hasha > hashb) { 2432 if (hasha > hashb) {
2433 ubifs_err("larger hash %u goes before %u", 2433 ubifs_err(c, "larger hash %u goes before %u",
2434 hasha, hashb); 2434 hasha, hashb);
2435 goto error_dump; 2435 goto error_dump;
2436 } 2436 }
@@ -2439,9 +2439,9 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2439 return 0; 2439 return 0;
2440 2440
2441error_dump: 2441error_dump:
2442 ubifs_msg("dumping first node"); 2442 ubifs_msg(c, "dumping first node");
2443 ubifs_dump_node(c, sa->node); 2443 ubifs_dump_node(c, sa->node);
2444 ubifs_msg("dumping second node"); 2444 ubifs_msg(c, "dumping second node");
2445 ubifs_dump_node(c, sb->node); 2445 ubifs_dump_node(c, sb->node);
2446 return -EINVAL; 2446 return -EINVAL;
2447 return 0; 2447 return 0;
@@ -2470,13 +2470,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2470 delay = prandom_u32() % 60000; 2470 delay = prandom_u32() % 60000;
2471 d->pc_timeout = jiffies; 2471 d->pc_timeout = jiffies;
2472 d->pc_timeout += msecs_to_jiffies(delay); 2472 d->pc_timeout += msecs_to_jiffies(delay);
2473 ubifs_warn("failing after %lums", delay); 2473 ubifs_warn(c, "failing after %lums", delay);
2474 } else { 2474 } else {
2475 d->pc_delay = 2; 2475 d->pc_delay = 2;
2476 delay = prandom_u32() % 10000; 2476 delay = prandom_u32() % 10000;
2477 /* Fail within 10000 operations */ 2477 /* Fail within 10000 operations */
2478 d->pc_cnt_max = delay; 2478 d->pc_cnt_max = delay;
2479 ubifs_warn("failing after %lu calls", delay); 2479 ubifs_warn(c, "failing after %lu calls", delay);
2480 } 2480 }
2481 } 2481 }
2482 2482
@@ -2494,55 +2494,55 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2494 return 0; 2494 return 0;
2495 if (chance(19, 20)) 2495 if (chance(19, 20))
2496 return 0; 2496 return 0;
2497 ubifs_warn("failing in super block LEB %d", lnum); 2497 ubifs_warn(c, "failing in super block LEB %d", lnum);
2498 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { 2498 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
2499 if (chance(19, 20)) 2499 if (chance(19, 20))
2500 return 0; 2500 return 0;
2501 ubifs_warn("failing in master LEB %d", lnum); 2501 ubifs_warn(c, "failing in master LEB %d", lnum);
2502 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { 2502 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
2503 if (write && chance(99, 100)) 2503 if (write && chance(99, 100))
2504 return 0; 2504 return 0;
2505 if (chance(399, 400)) 2505 if (chance(399, 400))
2506 return 0; 2506 return 0;
2507 ubifs_warn("failing in log LEB %d", lnum); 2507 ubifs_warn(c, "failing in log LEB %d", lnum);
2508 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { 2508 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
2509 if (write && chance(7, 8)) 2509 if (write && chance(7, 8))
2510 return 0; 2510 return 0;
2511 if (chance(19, 20)) 2511 if (chance(19, 20))
2512 return 0; 2512 return 0;
2513 ubifs_warn("failing in LPT LEB %d", lnum); 2513 ubifs_warn(c, "failing in LPT LEB %d", lnum);
2514 } else if (lnum >= c->orph_first && lnum <= c->orph_last) { 2514 } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
2515 if (write && chance(1, 2)) 2515 if (write && chance(1, 2))
2516 return 0; 2516 return 0;
2517 if (chance(9, 10)) 2517 if (chance(9, 10))
2518 return 0; 2518 return 0;
2519 ubifs_warn("failing in orphan LEB %d", lnum); 2519 ubifs_warn(c, "failing in orphan LEB %d", lnum);
2520 } else if (lnum == c->ihead_lnum) { 2520 } else if (lnum == c->ihead_lnum) {
2521 if (chance(99, 100)) 2521 if (chance(99, 100))
2522 return 0; 2522 return 0;
2523 ubifs_warn("failing in index head LEB %d", lnum); 2523 ubifs_warn(c, "failing in index head LEB %d", lnum);
2524 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { 2524 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
2525 if (chance(9, 10)) 2525 if (chance(9, 10))
2526 return 0; 2526 return 0;
2527 ubifs_warn("failing in GC head LEB %d", lnum); 2527 ubifs_warn(c, "failing in GC head LEB %d", lnum);
2528 } else if (write && !RB_EMPTY_ROOT(&c->buds) && 2528 } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
2529 !ubifs_search_bud(c, lnum)) { 2529 !ubifs_search_bud(c, lnum)) {
2530 if (chance(19, 20)) 2530 if (chance(19, 20))
2531 return 0; 2531 return 0;
2532 ubifs_warn("failing in non-bud LEB %d", lnum); 2532 ubifs_warn(c, "failing in non-bud LEB %d", lnum);
2533 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || 2533 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
2534 c->cmt_state == COMMIT_RUNNING_REQUIRED) { 2534 c->cmt_state == COMMIT_RUNNING_REQUIRED) {
2535 if (chance(999, 1000)) 2535 if (chance(999, 1000))
2536 return 0; 2536 return 0;
2537 ubifs_warn("failing in bud LEB %d commit running", lnum); 2537 ubifs_warn(c, "failing in bud LEB %d commit running", lnum);
2538 } else { 2538 } else {
2539 if (chance(9999, 10000)) 2539 if (chance(9999, 10000))
2540 return 0; 2540 return 0;
2541 ubifs_warn("failing in bud LEB %d commit not running", lnum); 2541 ubifs_warn(c, "failing in bud LEB %d commit not running", lnum);
2542 } 2542 }
2543 2543
2544 d->pc_happened = 1; 2544 d->pc_happened = 1;
2545 ubifs_warn("========== Power cut emulated =========="); 2545 ubifs_warn(c, "========== Power cut emulated ==========");
2546 dump_stack(); 2546 dump_stack();
2547 return 1; 2547 return 1;
2548} 2548}
@@ -2557,7 +2557,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
2557 /* Corruption span max to end of write unit */ 2557 /* Corruption span max to end of write unit */
2558 to = min(len, ALIGN(from + 1, c->max_write_size)); 2558 to = min(len, ALIGN(from + 1, c->max_write_size));
2559 2559
2560 ubifs_warn("filled bytes %u-%u with %s", from, to - 1, 2560 ubifs_warn(c, "filled bytes %u-%u with %s", from, to - 1,
2561 ffs ? "0xFFs" : "random data"); 2561 ffs ? "0xFFs" : "random data");
2562 2562
2563 if (ffs) 2563 if (ffs)
@@ -2579,7 +2579,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
2579 failing = power_cut_emulated(c, lnum, 1); 2579 failing = power_cut_emulated(c, lnum, 1);
2580 if (failing) { 2580 if (failing) {
2581 len = corrupt_data(c, buf, len); 2581 len = corrupt_data(c, buf, len);
2582 ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", 2582 ubifs_warn(c, "actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
2583 len, lnum, offs); 2583 len, lnum, offs);
2584 } 2584 }
2585 err = ubi_leb_write(c->ubi, lnum, buf, offs, len); 2585 err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
@@ -2909,7 +2909,7 @@ out_remove:
2909 debugfs_remove_recursive(d->dfs_dir); 2909 debugfs_remove_recursive(d->dfs_dir);
2910out: 2910out:
2911 err = dent ? PTR_ERR(dent) : -ENODEV; 2911 err = dent ? PTR_ERR(dent) : -ENODEV;
2912 ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", 2912 ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n",
2913 fname, err); 2913 fname, err);
2914 return err; 2914 return err;
2915} 2915}
@@ -3063,8 +3063,8 @@ out_remove:
3063 debugfs_remove_recursive(dfs_rootdir); 3063 debugfs_remove_recursive(dfs_rootdir);
3064out: 3064out:
3065 err = dent ? PTR_ERR(dent) : -ENODEV; 3065 err = dent ? PTR_ERR(dent) : -ENODEV;
3066 ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", 3066 pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n",
3067 fname, err); 3067 current->pid, fname, err);
3068 return err; 3068 return err;
3069} 3069}
3070 3070
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0fa6c803992e..02d1ee778df0 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -146,12 +146,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
146 if (c->highest_inum >= INUM_WARN_WATERMARK) { 146 if (c->highest_inum >= INUM_WARN_WATERMARK) {
147 if (c->highest_inum >= INUM_WATERMARK) { 147 if (c->highest_inum >= INUM_WATERMARK) {
148 spin_unlock(&c->cnt_lock); 148 spin_unlock(&c->cnt_lock);
149 ubifs_err("out of inode numbers"); 149 ubifs_err(c, "out of inode numbers");
150 make_bad_inode(inode); 150 make_bad_inode(inode);
151 iput(inode); 151 iput(inode);
152 return ERR_PTR(-EINVAL); 152 return ERR_PTR(-EINVAL);
153 } 153 }
154 ubifs_warn("running out of inode numbers (current %lu, max %d)", 154 ubifs_warn(c, "running out of inode numbers (current %lu, max %u)",
155 (unsigned long)c->highest_inum, INUM_WATERMARK); 155 (unsigned long)c->highest_inum, INUM_WATERMARK);
156 } 156 }
157 157
@@ -222,7 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
222 * checking. 222 * checking.
223 */ 223 */
224 err = PTR_ERR(inode); 224 err = PTR_ERR(inode);
225 ubifs_err("dead directory entry '%pd', error %d", 225 ubifs_err(c, "dead directory entry '%pd', error %d",
226 dentry, err); 226 dentry, err);
227 ubifs_ro_mode(c, err); 227 ubifs_ro_mode(c, err);
228 goto out; 228 goto out;
@@ -272,7 +272,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
272 272
273 err = ubifs_init_security(dir, inode, &dentry->d_name); 273 err = ubifs_init_security(dir, inode, &dentry->d_name);
274 if (err) 274 if (err)
275 goto out_cancel; 275 goto out_inode;
276 276
277 mutex_lock(&dir_ui->ui_mutex); 277 mutex_lock(&dir_ui->ui_mutex);
278 dir->i_size += sz_change; 278 dir->i_size += sz_change;
@@ -292,11 +292,12 @@ out_cancel:
292 dir->i_size -= sz_change; 292 dir->i_size -= sz_change;
293 dir_ui->ui_size = dir->i_size; 293 dir_ui->ui_size = dir->i_size;
294 mutex_unlock(&dir_ui->ui_mutex); 294 mutex_unlock(&dir_ui->ui_mutex);
295out_inode:
295 make_bad_inode(inode); 296 make_bad_inode(inode);
296 iput(inode); 297 iput(inode);
297out_budg: 298out_budg:
298 ubifs_release_budget(c, &req); 299 ubifs_release_budget(c, &req);
299 ubifs_err("cannot create regular file, error %d", err); 300 ubifs_err(c, "cannot create regular file, error %d", err);
300 return err; 301 return err;
301} 302}
302 303
@@ -449,7 +450,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
449 450
450out: 451out:
451 if (err != -ENOENT) { 452 if (err != -ENOENT) {
452 ubifs_err("cannot find next direntry, error %d", err); 453 ubifs_err(c, "cannot find next direntry, error %d", err);
453 return err; 454 return err;
454 } 455 }
455 456
@@ -732,7 +733,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
732 733
733 err = ubifs_init_security(dir, inode, &dentry->d_name); 734 err = ubifs_init_security(dir, inode, &dentry->d_name);
734 if (err) 735 if (err)
735 goto out_cancel; 736 goto out_inode;
736 737
737 mutex_lock(&dir_ui->ui_mutex); 738 mutex_lock(&dir_ui->ui_mutex);
738 insert_inode_hash(inode); 739 insert_inode_hash(inode);
@@ -743,7 +744,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
743 dir->i_mtime = dir->i_ctime = inode->i_ctime; 744 dir->i_mtime = dir->i_ctime = inode->i_ctime;
744 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); 745 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
745 if (err) { 746 if (err) {
746 ubifs_err("cannot create directory, error %d", err); 747 ubifs_err(c, "cannot create directory, error %d", err);
747 goto out_cancel; 748 goto out_cancel;
748 } 749 }
749 mutex_unlock(&dir_ui->ui_mutex); 750 mutex_unlock(&dir_ui->ui_mutex);
@@ -757,6 +758,7 @@ out_cancel:
757 dir_ui->ui_size = dir->i_size; 758 dir_ui->ui_size = dir->i_size;
758 drop_nlink(dir); 759 drop_nlink(dir);
759 mutex_unlock(&dir_ui->ui_mutex); 760 mutex_unlock(&dir_ui->ui_mutex);
761out_inode:
760 make_bad_inode(inode); 762 make_bad_inode(inode);
761 iput(inode); 763 iput(inode);
762out_budg: 764out_budg:
@@ -816,7 +818,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
816 818
817 err = ubifs_init_security(dir, inode, &dentry->d_name); 819 err = ubifs_init_security(dir, inode, &dentry->d_name);
818 if (err) 820 if (err)
819 goto out_cancel; 821 goto out_inode;
820 822
821 mutex_lock(&dir_ui->ui_mutex); 823 mutex_lock(&dir_ui->ui_mutex);
822 dir->i_size += sz_change; 824 dir->i_size += sz_change;
@@ -836,6 +838,7 @@ out_cancel:
836 dir->i_size -= sz_change; 838 dir->i_size -= sz_change;
837 dir_ui->ui_size = dir->i_size; 839 dir_ui->ui_size = dir->i_size;
838 mutex_unlock(&dir_ui->ui_mutex); 840 mutex_unlock(&dir_ui->ui_mutex);
841out_inode:
839 make_bad_inode(inode); 842 make_bad_inode(inode);
840 iput(inode); 843 iput(inode);
841out_budg: 844out_budg:
@@ -896,7 +899,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
896 899
897 err = ubifs_init_security(dir, inode, &dentry->d_name); 900 err = ubifs_init_security(dir, inode, &dentry->d_name);
898 if (err) 901 if (err)
899 goto out_cancel; 902 goto out_inode;
900 903
901 mutex_lock(&dir_ui->ui_mutex); 904 mutex_lock(&dir_ui->ui_mutex);
902 dir->i_size += sz_change; 905 dir->i_size += sz_change;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0acf626..3ba3fef64e9e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
50 */ 50 */
51 51
52#include "ubifs.h" 52#include "ubifs.h"
53#include <linux/aio.h>
54#include <linux/mount.h> 53#include <linux/mount.h>
55#include <linux/namei.h> 54#include <linux/namei.h>
56#include <linux/slab.h> 55#include <linux/slab.h>
@@ -80,7 +79,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
80 79
81 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; 80 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
82 out_len = UBIFS_BLOCK_SIZE; 81 out_len = UBIFS_BLOCK_SIZE;
83 err = ubifs_decompress(&dn->data, dlen, addr, &out_len, 82 err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
84 le16_to_cpu(dn->compr_type)); 83 le16_to_cpu(dn->compr_type));
85 if (err || len != out_len) 84 if (err || len != out_len)
86 goto dump; 85 goto dump;
@@ -96,7 +95,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
96 return 0; 95 return 0;
97 96
98dump: 97dump:
99 ubifs_err("bad data node (block %u, inode %lu)", 98 ubifs_err(c, "bad data node (block %u, inode %lu)",
100 block, inode->i_ino); 99 block, inode->i_ino);
101 ubifs_dump_node(c, dn); 100 ubifs_dump_node(c, dn);
102 return -EINVAL; 101 return -EINVAL;
@@ -161,13 +160,14 @@ static int do_readpage(struct page *page)
161 addr += UBIFS_BLOCK_SIZE; 160 addr += UBIFS_BLOCK_SIZE;
162 } 161 }
163 if (err) { 162 if (err) {
163 struct ubifs_info *c = inode->i_sb->s_fs_info;
164 if (err == -ENOENT) { 164 if (err == -ENOENT) {
165 /* Not found, so it must be a hole */ 165 /* Not found, so it must be a hole */
166 SetPageChecked(page); 166 SetPageChecked(page);
167 dbg_gen("hole"); 167 dbg_gen("hole");
168 goto out_free; 168 goto out_free;
169 } 169 }
170 ubifs_err("cannot read page %lu of inode %lu, error %d", 170 ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
171 page->index, inode->i_ino, err); 171 page->index, inode->i_ino, err);
172 goto error; 172 goto error;
173 } 173 }
@@ -650,7 +650,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
650 650
651 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; 651 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
652 out_len = UBIFS_BLOCK_SIZE; 652 out_len = UBIFS_BLOCK_SIZE;
653 err = ubifs_decompress(&dn->data, dlen, addr, &out_len, 653 err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
654 le16_to_cpu(dn->compr_type)); 654 le16_to_cpu(dn->compr_type));
655 if (err || len != out_len) 655 if (err || len != out_len)
656 goto out_err; 656 goto out_err;
@@ -698,7 +698,7 @@ out_err:
698 SetPageError(page); 698 SetPageError(page);
699 flush_dcache_page(page); 699 flush_dcache_page(page);
700 kunmap(page); 700 kunmap(page);
701 ubifs_err("bad data node (block %u, inode %lu)", 701 ubifs_err(c, "bad data node (block %u, inode %lu)",
702 page_block, inode->i_ino); 702 page_block, inode->i_ino);
703 return -EINVAL; 703 return -EINVAL;
704} 704}
@@ -802,7 +802,7 @@ out_free:
802 return ret; 802 return ret;
803 803
804out_warn: 804out_warn:
805 ubifs_warn("ignoring error %d and skipping bulk-read", err); 805 ubifs_warn(c, "ignoring error %d and skipping bulk-read", err);
806 goto out_free; 806 goto out_free;
807 807
808out_bu_off: 808out_bu_off:
@@ -930,7 +930,7 @@ static int do_writepage(struct page *page, int len)
930 } 930 }
931 if (err) { 931 if (err) {
932 SetPageError(page); 932 SetPageError(page);
933 ubifs_err("cannot write page %lu of inode %lu, error %d", 933 ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
934 page->index, inode->i_ino, err); 934 page->index, inode->i_ino, err);
935 ubifs_ro_mode(c, err); 935 ubifs_ro_mode(c, err);
936 } 936 }
@@ -1485,7 +1485,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1485 err = ubifs_budget_space(c, &req); 1485 err = ubifs_budget_space(c, &req);
1486 if (unlikely(err)) { 1486 if (unlikely(err)) {
1487 if (err == -ENOSPC) 1487 if (err == -ENOSPC)
1488 ubifs_warn("out of space for mmapped file (inode number %lu)", 1488 ubifs_warn(c, "out of space for mmapped file (inode number %lu)",
1489 inode->i_ino); 1489 inode->i_ino);
1490 return VM_FAULT_SIGBUS; 1490 return VM_FAULT_SIGBUS;
1491 } 1491 }
@@ -1581,8 +1581,6 @@ const struct inode_operations ubifs_symlink_inode_operations = {
1581 1581
1582const struct file_operations ubifs_file_operations = { 1582const struct file_operations ubifs_file_operations = {
1583 .llseek = generic_file_llseek, 1583 .llseek = generic_file_llseek,
1584 .read = new_sync_read,
1585 .write = new_sync_write,
1586 .read_iter = generic_file_read_iter, 1584 .read_iter = generic_file_read_iter,
1587 .write_iter = ubifs_write_iter, 1585 .write_iter = ubifs_write_iter,
1588 .mmap = ubifs_file_mmap, 1586 .mmap = ubifs_file_mmap,
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index fb08b0c514b6..97be41215332 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -85,7 +85,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
85 c->ro_error = 1; 85 c->ro_error = 1;
86 c->no_chk_data_crc = 0; 86 c->no_chk_data_crc = 0;
87 c->vfs_sb->s_flags |= MS_RDONLY; 87 c->vfs_sb->s_flags |= MS_RDONLY;
88 ubifs_warn("switched to read-only mode, error %d", err); 88 ubifs_warn(c, "switched to read-only mode, error %d", err);
89 dump_stack(); 89 dump_stack();
90 } 90 }
91} 91}
@@ -107,7 +107,7 @@ int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
107 * @even_ebadmsg is true. 107 * @even_ebadmsg is true.
108 */ 108 */
109 if (err && (err != -EBADMSG || even_ebadmsg)) { 109 if (err && (err != -EBADMSG || even_ebadmsg)) {
110 ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", 110 ubifs_err(c, "reading %d bytes from LEB %d:%d failed, error %d",
111 len, lnum, offs, err); 111 len, lnum, offs, err);
112 dump_stack(); 112 dump_stack();
113 } 113 }
@@ -127,7 +127,7 @@ int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
127 else 127 else
128 err = dbg_leb_write(c, lnum, buf, offs, len); 128 err = dbg_leb_write(c, lnum, buf, offs, len);
129 if (err) { 129 if (err) {
130 ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", 130 ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d",
131 len, lnum, offs, err); 131 len, lnum, offs, err);
132 ubifs_ro_mode(c, err); 132 ubifs_ro_mode(c, err);
133 dump_stack(); 133 dump_stack();
@@ -147,7 +147,7 @@ int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len)
147 else 147 else
148 err = dbg_leb_change(c, lnum, buf, len); 148 err = dbg_leb_change(c, lnum, buf, len);
149 if (err) { 149 if (err) {
150 ubifs_err("changing %d bytes in LEB %d failed, error %d", 150 ubifs_err(c, "changing %d bytes in LEB %d failed, error %d",
151 len, lnum, err); 151 len, lnum, err);
152 ubifs_ro_mode(c, err); 152 ubifs_ro_mode(c, err);
153 dump_stack(); 153 dump_stack();
@@ -167,7 +167,7 @@ int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
167 else 167 else
168 err = dbg_leb_unmap(c, lnum); 168 err = dbg_leb_unmap(c, lnum);
169 if (err) { 169 if (err) {
170 ubifs_err("unmap LEB %d failed, error %d", lnum, err); 170 ubifs_err(c, "unmap LEB %d failed, error %d", lnum, err);
171 ubifs_ro_mode(c, err); 171 ubifs_ro_mode(c, err);
172 dump_stack(); 172 dump_stack();
173 } 173 }
@@ -186,7 +186,7 @@ int ubifs_leb_map(struct ubifs_info *c, int lnum)
186 else 186 else
187 err = dbg_leb_map(c, lnum); 187 err = dbg_leb_map(c, lnum);
188 if (err) { 188 if (err) {
189 ubifs_err("mapping LEB %d failed, error %d", lnum, err); 189 ubifs_err(c, "mapping LEB %d failed, error %d", lnum, err);
190 ubifs_ro_mode(c, err); 190 ubifs_ro_mode(c, err);
191 dump_stack(); 191 dump_stack();
192 } 192 }
@@ -199,7 +199,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
199 199
200 err = ubi_is_mapped(c->ubi, lnum); 200 err = ubi_is_mapped(c->ubi, lnum);
201 if (err < 0) { 201 if (err < 0) {
202 ubifs_err("ubi_is_mapped failed for LEB %d, error %d", 202 ubifs_err(c, "ubi_is_mapped failed for LEB %d, error %d",
203 lnum, err); 203 lnum, err);
204 dump_stack(); 204 dump_stack();
205 } 205 }
@@ -247,7 +247,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
247 magic = le32_to_cpu(ch->magic); 247 magic = le32_to_cpu(ch->magic);
248 if (magic != UBIFS_NODE_MAGIC) { 248 if (magic != UBIFS_NODE_MAGIC) {
249 if (!quiet) 249 if (!quiet)
250 ubifs_err("bad magic %#08x, expected %#08x", 250 ubifs_err(c, "bad magic %#08x, expected %#08x",
251 magic, UBIFS_NODE_MAGIC); 251 magic, UBIFS_NODE_MAGIC);
252 err = -EUCLEAN; 252 err = -EUCLEAN;
253 goto out; 253 goto out;
@@ -256,7 +256,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
256 type = ch->node_type; 256 type = ch->node_type;
257 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { 257 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
258 if (!quiet) 258 if (!quiet)
259 ubifs_err("bad node type %d", type); 259 ubifs_err(c, "bad node type %d", type);
260 goto out; 260 goto out;
261 } 261 }
262 262
@@ -279,7 +279,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
279 node_crc = le32_to_cpu(ch->crc); 279 node_crc = le32_to_cpu(ch->crc);
280 if (crc != node_crc) { 280 if (crc != node_crc) {
281 if (!quiet) 281 if (!quiet)
282 ubifs_err("bad CRC: calculated %#08x, read %#08x", 282 ubifs_err(c, "bad CRC: calculated %#08x, read %#08x",
283 crc, node_crc); 283 crc, node_crc);
284 err = -EUCLEAN; 284 err = -EUCLEAN;
285 goto out; 285 goto out;
@@ -289,10 +289,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
289 289
290out_len: 290out_len:
291 if (!quiet) 291 if (!quiet)
292 ubifs_err("bad node length %d", node_len); 292 ubifs_err(c, "bad node length %d", node_len);
293out: 293out:
294 if (!quiet) { 294 if (!quiet) {
295 ubifs_err("bad node at LEB %d:%d", lnum, offs); 295 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
296 ubifs_dump_node(c, buf); 296 ubifs_dump_node(c, buf);
297 dump_stack(); 297 dump_stack();
298 } 298 }
@@ -355,11 +355,11 @@ static unsigned long long next_sqnum(struct ubifs_info *c)
355 355
356 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { 356 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
357 if (sqnum >= SQNUM_WATERMARK) { 357 if (sqnum >= SQNUM_WATERMARK) {
358 ubifs_err("sequence number overflow %llu, end of life", 358 ubifs_err(c, "sequence number overflow %llu, end of life",
359 sqnum); 359 sqnum);
360 ubifs_ro_mode(c, -EINVAL); 360 ubifs_ro_mode(c, -EINVAL);
361 } 361 }
362 ubifs_warn("running out of sequence numbers, end of life soon"); 362 ubifs_warn(c, "running out of sequence numbers, end of life soon");
363 } 363 }
364 364
365 return sqnum; 365 return sqnum;
@@ -636,7 +636,7 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
636 err = ubifs_wbuf_sync_nolock(wbuf); 636 err = ubifs_wbuf_sync_nolock(wbuf);
637 mutex_unlock(&wbuf->io_mutex); 637 mutex_unlock(&wbuf->io_mutex);
638 if (err) { 638 if (err) {
639 ubifs_err("cannot sync write-buffer, error %d", err); 639 ubifs_err(c, "cannot sync write-buffer, error %d", err);
640 ubifs_ro_mode(c, err); 640 ubifs_ro_mode(c, err);
641 goto out_timers; 641 goto out_timers;
642 } 642 }
@@ -833,7 +833,7 @@ exit:
833 return 0; 833 return 0;
834 834
835out: 835out:
836 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", 836 ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d",
837 len, wbuf->lnum, wbuf->offs, err); 837 len, wbuf->lnum, wbuf->offs, err);
838 ubifs_dump_node(c, buf); 838 ubifs_dump_node(c, buf);
839 dump_stack(); 839 dump_stack();
@@ -932,27 +932,27 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
932 } 932 }
933 933
934 if (type != ch->node_type) { 934 if (type != ch->node_type) {
935 ubifs_err("bad node type (%d but expected %d)", 935 ubifs_err(c, "bad node type (%d but expected %d)",
936 ch->node_type, type); 936 ch->node_type, type);
937 goto out; 937 goto out;
938 } 938 }
939 939
940 err = ubifs_check_node(c, buf, lnum, offs, 0, 0); 940 err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
941 if (err) { 941 if (err) {
942 ubifs_err("expected node type %d", type); 942 ubifs_err(c, "expected node type %d", type);
943 return err; 943 return err;
944 } 944 }
945 945
946 rlen = le32_to_cpu(ch->len); 946 rlen = le32_to_cpu(ch->len);
947 if (rlen != len) { 947 if (rlen != len) {
948 ubifs_err("bad node length %d, expected %d", rlen, len); 948 ubifs_err(c, "bad node length %d, expected %d", rlen, len);
949 goto out; 949 goto out;
950 } 950 }
951 951
952 return 0; 952 return 0;
953 953
954out: 954out:
955 ubifs_err("bad node at LEB %d:%d", lnum, offs); 955 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
956 ubifs_dump_node(c, buf); 956 ubifs_dump_node(c, buf);
957 dump_stack(); 957 dump_stack();
958 return -EINVAL; 958 return -EINVAL;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 648b143606cc..3c7b29de0ca7 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -138,7 +138,7 @@ static int setflags(struct inode *inode, int flags)
138 return err; 138 return err;
139 139
140out_unlock: 140out_unlock:
141 ubifs_err("can't modify inode %lu attributes", inode->i_ino); 141 ubifs_err(c, "can't modify inode %lu attributes", inode->i_ino);
142 mutex_unlock(&ui->ui_mutex); 142 mutex_unlock(&ui->ui_mutex);
143 ubifs_release_budget(c, &req); 143 ubifs_release_budget(c, &req);
144 return err; 144 return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f6ac3f29323c..90ae1a8439d9 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -363,11 +363,11 @@ again:
363 * This should not happen unless the journal size limitations 363 * This should not happen unless the journal size limitations
364 * are too tough. 364 * are too tough.
365 */ 365 */
366 ubifs_err("stuck in space allocation"); 366 ubifs_err(c, "stuck in space allocation");
367 err = -ENOSPC; 367 err = -ENOSPC;
368 goto out; 368 goto out;
369 } else if (cmt_retries > 32) 369 } else if (cmt_retries > 32)
370 ubifs_warn("too many space allocation re-tries (%d)", 370 ubifs_warn(c, "too many space allocation re-tries (%d)",
371 cmt_retries); 371 cmt_retries);
372 372
373 dbg_jnl("-EAGAIN, commit and retry (retried %d times)", 373 dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
@@ -380,7 +380,7 @@ again:
380 goto again; 380 goto again;
381 381
382out: 382out:
383 ubifs_err("cannot reserve %d bytes in jhead %d, error %d", 383 ubifs_err(c, "cannot reserve %d bytes in jhead %d, error %d",
384 len, jhead, err); 384 len, jhead, err);
385 if (err == -ENOSPC) { 385 if (err == -ENOSPC) {
386 /* This are some budgeting problems, print useful information */ 386 /* This are some budgeting problems, print useful information */
@@ -731,7 +731,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
731 compr_type = ui->compr_type; 731 compr_type = ui->compr_type;
732 732
733 out_len = dlen - UBIFS_DATA_NODE_SZ; 733 out_len = dlen - UBIFS_DATA_NODE_SZ;
734 ubifs_compress(buf, len, &data->data, &out_len, &compr_type); 734 ubifs_compress(c, buf, len, &data->data, &out_len, &compr_type);
735 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); 735 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
736 736
737 dlen = UBIFS_DATA_NODE_SZ + out_len; 737 dlen = UBIFS_DATA_NODE_SZ + out_len;
@@ -1100,7 +1100,8 @@ out_free:
1100 * This function is used when an inode is truncated and the last data node of 1100 * This function is used when an inode is truncated and the last data node of
1101 * the inode has to be re-compressed and re-written. 1101 * the inode has to be re-compressed and re-written.
1102 */ 1102 */
1103static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) 1103static int recomp_data_node(const struct ubifs_info *c,
1104 struct ubifs_data_node *dn, int *new_len)
1104{ 1105{
1105 void *buf; 1106 void *buf;
1106 int err, len, compr_type, out_len; 1107 int err, len, compr_type, out_len;
@@ -1112,11 +1113,11 @@ static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
1112 1113
1113 len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; 1114 len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
1114 compr_type = le16_to_cpu(dn->compr_type); 1115 compr_type = le16_to_cpu(dn->compr_type);
1115 err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); 1116 err = ubifs_decompress(c, &dn->data, len, buf, &out_len, compr_type);
1116 if (err) 1117 if (err)
1117 goto out; 1118 goto out;
1118 1119
1119 ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); 1120 ubifs_compress(c, buf, *new_len, &dn->data, &out_len, &compr_type);
1120 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); 1121 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
1121 dn->compr_type = cpu_to_le16(compr_type); 1122 dn->compr_type = cpu_to_le16(compr_type);
1122 dn->size = cpu_to_le32(*new_len); 1123 dn->size = cpu_to_le32(*new_len);
@@ -1191,7 +1192,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1191 int compr_type = le16_to_cpu(dn->compr_type); 1192 int compr_type = le16_to_cpu(dn->compr_type);
1192 1193
1193 if (compr_type != UBIFS_COMPR_NONE) { 1194 if (compr_type != UBIFS_COMPR_NONE) {
1194 err = recomp_data_node(dn, &dlen); 1195 err = recomp_data_node(c, dn, &dlen);
1195 if (err) 1196 if (err)
1196 goto out_free; 1197 goto out_free;
1197 } else { 1198 } else {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index c14628fbeee2..8c795e6392b1 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -696,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
696 destroy_done_tree(&done_tree); 696 destroy_done_tree(&done_tree);
697 vfree(buf); 697 vfree(buf);
698 if (write_lnum == c->lhead_lnum) { 698 if (write_lnum == c->lhead_lnum) {
699 ubifs_err("log is too full"); 699 ubifs_err(c, "log is too full");
700 return -EINVAL; 700 return -EINVAL;
701 } 701 }
702 /* Unmap remaining LEBs */ 702 /* Unmap remaining LEBs */
@@ -743,7 +743,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
743 bud_bytes += c->leb_size - bud->start; 743 bud_bytes += c->leb_size - bud->start;
744 744
745 if (c->bud_bytes != bud_bytes) { 745 if (c->bud_bytes != bud_bytes) {
746 ubifs_err("bad bud_bytes %lld, calculated %lld", 746 ubifs_err(c, "bad bud_bytes %lld, calculated %lld",
747 c->bud_bytes, bud_bytes); 747 c->bud_bytes, bud_bytes);
748 err = -EINVAL; 748 err = -EINVAL;
749 } 749 }
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 46190a7c42a6..a0011aa3a779 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -682,7 +682,7 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
682out: 682out:
683 ubifs_release_lprops(c); 683 ubifs_release_lprops(c);
684 if (err) 684 if (err)
685 ubifs_err("cannot change properties of LEB %d, error %d", 685 ubifs_err(c, "cannot change properties of LEB %d, error %d",
686 lnum, err); 686 lnum, err);
687 return err; 687 return err;
688} 688}
@@ -721,7 +721,7 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
721out: 721out:
722 ubifs_release_lprops(c); 722 ubifs_release_lprops(c);
723 if (err) 723 if (err)
724 ubifs_err("cannot update properties of LEB %d, error %d", 724 ubifs_err(c, "cannot update properties of LEB %d, error %d",
725 lnum, err); 725 lnum, err);
726 return err; 726 return err;
727} 727}
@@ -746,7 +746,7 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
746 lpp = ubifs_lpt_lookup(c, lnum); 746 lpp = ubifs_lpt_lookup(c, lnum);
747 if (IS_ERR(lpp)) { 747 if (IS_ERR(lpp)) {
748 err = PTR_ERR(lpp); 748 err = PTR_ERR(lpp);
749 ubifs_err("cannot read properties of LEB %d, error %d", 749 ubifs_err(c, "cannot read properties of LEB %d, error %d",
750 lnum, err); 750 lnum, err);
751 goto out; 751 goto out;
752 } 752 }
@@ -873,13 +873,13 @@ int dbg_check_cats(struct ubifs_info *c)
873 873
874 list_for_each_entry(lprops, &c->empty_list, list) { 874 list_for_each_entry(lprops, &c->empty_list, list) {
875 if (lprops->free != c->leb_size) { 875 if (lprops->free != c->leb_size) {
876 ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", 876 ubifs_err(c, "non-empty LEB %d on empty list (free %d dirty %d flags %d)",
877 lprops->lnum, lprops->free, lprops->dirty, 877 lprops->lnum, lprops->free, lprops->dirty,
878 lprops->flags); 878 lprops->flags);
879 return -EINVAL; 879 return -EINVAL;
880 } 880 }
881 if (lprops->flags & LPROPS_TAKEN) { 881 if (lprops->flags & LPROPS_TAKEN) {
882 ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", 882 ubifs_err(c, "taken LEB %d on empty list (free %d dirty %d flags %d)",
883 lprops->lnum, lprops->free, lprops->dirty, 883 lprops->lnum, lprops->free, lprops->dirty,
884 lprops->flags); 884 lprops->flags);
885 return -EINVAL; 885 return -EINVAL;
@@ -889,13 +889,13 @@ int dbg_check_cats(struct ubifs_info *c)
889 i = 0; 889 i = 0;
890 list_for_each_entry(lprops, &c->freeable_list, list) { 890 list_for_each_entry(lprops, &c->freeable_list, list) {
891 if (lprops->free + lprops->dirty != c->leb_size) { 891 if (lprops->free + lprops->dirty != c->leb_size) {
892 ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", 892 ubifs_err(c, "non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
893 lprops->lnum, lprops->free, lprops->dirty, 893 lprops->lnum, lprops->free, lprops->dirty,
894 lprops->flags); 894 lprops->flags);
895 return -EINVAL; 895 return -EINVAL;
896 } 896 }
897 if (lprops->flags & LPROPS_TAKEN) { 897 if (lprops->flags & LPROPS_TAKEN) {
898 ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", 898 ubifs_err(c, "taken LEB %d on freeable list (free %d dirty %d flags %d)",
899 lprops->lnum, lprops->free, lprops->dirty, 899 lprops->lnum, lprops->free, lprops->dirty,
900 lprops->flags); 900 lprops->flags);
901 return -EINVAL; 901 return -EINVAL;
@@ -903,7 +903,7 @@ int dbg_check_cats(struct ubifs_info *c)
903 i += 1; 903 i += 1;
904 } 904 }
905 if (i != c->freeable_cnt) { 905 if (i != c->freeable_cnt) {
906 ubifs_err("freeable list count %d expected %d", i, 906 ubifs_err(c, "freeable list count %d expected %d", i,
907 c->freeable_cnt); 907 c->freeable_cnt);
908 return -EINVAL; 908 return -EINVAL;
909 } 909 }
@@ -912,26 +912,26 @@ int dbg_check_cats(struct ubifs_info *c)
912 list_for_each(pos, &c->idx_gc) 912 list_for_each(pos, &c->idx_gc)
913 i += 1; 913 i += 1;
914 if (i != c->idx_gc_cnt) { 914 if (i != c->idx_gc_cnt) {
915 ubifs_err("idx_gc list count %d expected %d", i, 915 ubifs_err(c, "idx_gc list count %d expected %d", i,
916 c->idx_gc_cnt); 916 c->idx_gc_cnt);
917 return -EINVAL; 917 return -EINVAL;
918 } 918 }
919 919
920 list_for_each_entry(lprops, &c->frdi_idx_list, list) { 920 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
921 if (lprops->free + lprops->dirty != c->leb_size) { 921 if (lprops->free + lprops->dirty != c->leb_size) {
922 ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", 922 ubifs_err(c, "non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
923 lprops->lnum, lprops->free, lprops->dirty, 923 lprops->lnum, lprops->free, lprops->dirty,
924 lprops->flags); 924 lprops->flags);
925 return -EINVAL; 925 return -EINVAL;
926 } 926 }
927 if (lprops->flags & LPROPS_TAKEN) { 927 if (lprops->flags & LPROPS_TAKEN) {
928 ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", 928 ubifs_err(c, "taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
929 lprops->lnum, lprops->free, lprops->dirty, 929 lprops->lnum, lprops->free, lprops->dirty,
930 lprops->flags); 930 lprops->flags);
931 return -EINVAL; 931 return -EINVAL;
932 } 932 }
933 if (!(lprops->flags & LPROPS_INDEX)) { 933 if (!(lprops->flags & LPROPS_INDEX)) {
934 ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", 934 ubifs_err(c, "non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
935 lprops->lnum, lprops->free, lprops->dirty, 935 lprops->lnum, lprops->free, lprops->dirty,
936 lprops->flags); 936 lprops->flags);
937 return -EINVAL; 937 return -EINVAL;
@@ -944,15 +944,15 @@ int dbg_check_cats(struct ubifs_info *c)
944 for (i = 0; i < heap->cnt; i++) { 944 for (i = 0; i < heap->cnt; i++) {
945 lprops = heap->arr[i]; 945 lprops = heap->arr[i];
946 if (!lprops) { 946 if (!lprops) {
947 ubifs_err("null ptr in LPT heap cat %d", cat); 947 ubifs_err(c, "null ptr in LPT heap cat %d", cat);
948 return -EINVAL; 948 return -EINVAL;
949 } 949 }
950 if (lprops->hpos != i) { 950 if (lprops->hpos != i) {
951 ubifs_err("bad ptr in LPT heap cat %d", cat); 951 ubifs_err(c, "bad ptr in LPT heap cat %d", cat);
952 return -EINVAL; 952 return -EINVAL;
953 } 953 }
954 if (lprops->flags & LPROPS_TAKEN) { 954 if (lprops->flags & LPROPS_TAKEN) {
955 ubifs_err("taken LEB in LPT heap cat %d", cat); 955 ubifs_err(c, "taken LEB in LPT heap cat %d", cat);
956 return -EINVAL; 956 return -EINVAL;
957 } 957 }
958 } 958 }
@@ -988,7 +988,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
988 goto out; 988 goto out;
989 } 989 }
990 if (lprops != lp) { 990 if (lprops != lp) {
991 ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", 991 ubifs_err(c, "lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
992 (size_t)lprops, (size_t)lp, lprops->lnum, 992 (size_t)lprops, (size_t)lp, lprops->lnum,
993 lp->lnum); 993 lp->lnum);
994 err = 4; 994 err = 4;
@@ -1008,7 +1008,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
1008 } 1008 }
1009out: 1009out:
1010 if (err) { 1010 if (err) {
1011 ubifs_err("failed cat %d hpos %d err %d", cat, i, err); 1011 ubifs_err(c, "failed cat %d hpos %d err %d", cat, i, err);
1012 dump_stack(); 1012 dump_stack();
1013 ubifs_dump_heap(c, heap, cat); 1013 ubifs_dump_heap(c, heap, cat);
1014 } 1014 }
@@ -1039,7 +1039,7 @@ static int scan_check_cb(struct ubifs_info *c,
1039 if (cat != LPROPS_UNCAT) { 1039 if (cat != LPROPS_UNCAT) {
1040 cat = ubifs_categorize_lprops(c, lp); 1040 cat = ubifs_categorize_lprops(c, lp);
1041 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1041 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1042 ubifs_err("bad LEB category %d expected %d", 1042 ubifs_err(c, "bad LEB category %d expected %d",
1043 (lp->flags & LPROPS_CAT_MASK), cat); 1043 (lp->flags & LPROPS_CAT_MASK), cat);
1044 return -EINVAL; 1044 return -EINVAL;
1045 } 1045 }
@@ -1074,7 +1074,7 @@ static int scan_check_cb(struct ubifs_info *c,
1074 } 1074 }
1075 } 1075 }
1076 if (!found) { 1076 if (!found) {
1077 ubifs_err("bad LPT list (category %d)", cat); 1077 ubifs_err(c, "bad LPT list (category %d)", cat);
1078 return -EINVAL; 1078 return -EINVAL;
1079 } 1079 }
1080 } 1080 }
@@ -1086,7 +1086,7 @@ static int scan_check_cb(struct ubifs_info *c,
1086 1086
1087 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1087 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1088 lp != heap->arr[lp->hpos]) { 1088 lp != heap->arr[lp->hpos]) {
1089 ubifs_err("bad LPT heap (category %d)", cat); 1089 ubifs_err(c, "bad LPT heap (category %d)", cat);
1090 return -EINVAL; 1090 return -EINVAL;
1091 } 1091 }
1092 } 1092 }
@@ -1133,7 +1133,7 @@ static int scan_check_cb(struct ubifs_info *c,
1133 is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; 1133 is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0;
1134 1134
1135 if (is_idx && snod->type != UBIFS_IDX_NODE) { 1135 if (is_idx && snod->type != UBIFS_IDX_NODE) {
1136 ubifs_err("indexing node in data LEB %d:%d", 1136 ubifs_err(c, "indexing node in data LEB %d:%d",
1137 lnum, snod->offs); 1137 lnum, snod->offs);
1138 goto out_destroy; 1138 goto out_destroy;
1139 } 1139 }
@@ -1159,7 +1159,7 @@ static int scan_check_cb(struct ubifs_info *c,
1159 1159
1160 if (free > c->leb_size || free < 0 || dirty > c->leb_size || 1160 if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
1161 dirty < 0) { 1161 dirty < 0) {
1162 ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", 1162 ubifs_err(c, "bad calculated accounting for LEB %d: free %d, dirty %d",
1163 lnum, free, dirty); 1163 lnum, free, dirty);
1164 goto out_destroy; 1164 goto out_destroy;
1165 } 1165 }
@@ -1206,13 +1206,13 @@ static int scan_check_cb(struct ubifs_info *c,
1206 /* Free but not unmapped LEB, it's fine */ 1206 /* Free but not unmapped LEB, it's fine */
1207 is_idx = 0; 1207 is_idx = 0;
1208 else { 1208 else {
1209 ubifs_err("indexing node without indexing flag"); 1209 ubifs_err(c, "indexing node without indexing flag");
1210 goto out_print; 1210 goto out_print;
1211 } 1211 }
1212 } 1212 }
1213 1213
1214 if (!is_idx && (lp->flags & LPROPS_INDEX)) { 1214 if (!is_idx && (lp->flags & LPROPS_INDEX)) {
1215 ubifs_err("data node with indexing flag"); 1215 ubifs_err(c, "data node with indexing flag");
1216 goto out_print; 1216 goto out_print;
1217 } 1217 }
1218 1218
@@ -1241,7 +1241,7 @@ static int scan_check_cb(struct ubifs_info *c,
1241 return LPT_SCAN_CONTINUE; 1241 return LPT_SCAN_CONTINUE;
1242 1242
1243out_print: 1243out_print:
1244 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", 1244 ubifs_err(c, "bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
1245 lnum, lp->free, lp->dirty, lp->flags, free, dirty); 1245 lnum, lp->free, lp->dirty, lp->flags, free, dirty);
1246 ubifs_dump_leb(c, lnum); 1246 ubifs_dump_leb(c, lnum);
1247out_destroy: 1247out_destroy:
@@ -1293,11 +1293,11 @@ int dbg_check_lprops(struct ubifs_info *c)
1293 lst.total_free != c->lst.total_free || 1293 lst.total_free != c->lst.total_free ||
1294 lst.total_dirty != c->lst.total_dirty || 1294 lst.total_dirty != c->lst.total_dirty ||
1295 lst.total_used != c->lst.total_used) { 1295 lst.total_used != c->lst.total_used) {
1296 ubifs_err("bad overall accounting"); 1296 ubifs_err(c, "bad overall accounting");
1297 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", 1297 ubifs_err(c, "calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
1298 lst.empty_lebs, lst.idx_lebs, lst.total_free, 1298 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1299 lst.total_dirty, lst.total_used); 1299 lst.total_dirty, lst.total_used);
1300 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", 1300 ubifs_err(c, "read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
1301 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1301 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
1302 c->lst.total_dirty, c->lst.total_used); 1302 c->lst.total_dirty, c->lst.total_used);
1303 err = -EINVAL; 1303 err = -EINVAL;
@@ -1306,10 +1306,10 @@ int dbg_check_lprops(struct ubifs_info *c)
1306 1306
1307 if (lst.total_dead != c->lst.total_dead || 1307 if (lst.total_dead != c->lst.total_dead ||
1308 lst.total_dark != c->lst.total_dark) { 1308 lst.total_dark != c->lst.total_dark) {
1309 ubifs_err("bad dead/dark space accounting"); 1309 ubifs_err(c, "bad dead/dark space accounting");
1310 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1310 ubifs_err(c, "calculated: total_dead %lld, total_dark %lld",
1311 lst.total_dead, lst.total_dark); 1311 lst.total_dead, lst.total_dark);
1312 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1312 ubifs_err(c, "read from lprops: total_dead %lld, total_dark %lld",
1313 c->lst.total_dead, c->lst.total_dark); 1313 c->lst.total_dead, c->lst.total_dark);
1314 err = -EINVAL; 1314 err = -EINVAL;
1315 goto out; 1315 goto out;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 421bd0a80424..dc9f27e9d61b 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -145,13 +145,13 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
145 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ 145 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
146 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); 146 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
147 if (lebs_needed > c->lpt_lebs) { 147 if (lebs_needed > c->lpt_lebs) {
148 ubifs_err("too few LPT LEBs"); 148 ubifs_err(c, "too few LPT LEBs");
149 return -EINVAL; 149 return -EINVAL;
150 } 150 }
151 151
152 /* Verify that ltab fits in a single LEB (since ltab is a single node */ 152 /* Verify that ltab fits in a single LEB (since ltab is a single node */
153 if (c->ltab_sz > c->leb_size) { 153 if (c->ltab_sz > c->leb_size) {
154 ubifs_err("LPT ltab too big"); 154 ubifs_err(c, "LPT ltab too big");
155 return -EINVAL; 155 return -EINVAL;
156 } 156 }
157 157
@@ -213,7 +213,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
213 continue; 213 continue;
214 } 214 }
215 if (c->ltab_sz > c->leb_size) { 215 if (c->ltab_sz > c->leb_size) {
216 ubifs_err("LPT ltab too big"); 216 ubifs_err(c, "LPT ltab too big");
217 return -EINVAL; 217 return -EINVAL;
218 } 218 }
219 *main_lebs = c->main_lebs; 219 *main_lebs = c->main_lebs;
@@ -911,7 +911,7 @@ static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode,
911 * 911 *
912 * This function returns %0 on success and a negative error code on failure. 912 * This function returns %0 on success and a negative error code on failure.
913 */ 913 */
914static int check_lpt_crc(void *buf, int len) 914static int check_lpt_crc(const struct ubifs_info *c, void *buf, int len)
915{ 915{
916 int pos = 0; 916 int pos = 0;
917 uint8_t *addr = buf; 917 uint8_t *addr = buf;
@@ -921,8 +921,8 @@ static int check_lpt_crc(void *buf, int len)
921 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, 921 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
922 len - UBIFS_LPT_CRC_BYTES); 922 len - UBIFS_LPT_CRC_BYTES);
923 if (crc != calc_crc) { 923 if (crc != calc_crc) {
924 ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, 924 ubifs_err(c, "invalid crc in LPT node: crc %hx calc %hx",
925 calc_crc); 925 crc, calc_crc);
926 dump_stack(); 926 dump_stack();
927 return -EINVAL; 927 return -EINVAL;
928 } 928 }
@@ -938,14 +938,15 @@ static int check_lpt_crc(void *buf, int len)
938 * 938 *
939 * This function returns %0 on success and a negative error code on failure. 939 * This function returns %0 on success and a negative error code on failure.
940 */ 940 */
941static int check_lpt_type(uint8_t **addr, int *pos, int type) 941static int check_lpt_type(const struct ubifs_info *c, uint8_t **addr,
942 int *pos, int type)
942{ 943{
943 int node_type; 944 int node_type;
944 945
945 node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); 946 node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
946 if (node_type != type) { 947 if (node_type != type) {
947 ubifs_err("invalid type (%d) in LPT node type %d", node_type, 948 ubifs_err(c, "invalid type (%d) in LPT node type %d",
948 type); 949 node_type, type);
949 dump_stack(); 950 dump_stack();
950 return -EINVAL; 951 return -EINVAL;
951 } 952 }
@@ -966,7 +967,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
966 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 967 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
967 int i, pos = 0, err; 968 int i, pos = 0, err;
968 969
969 err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); 970 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_PNODE);
970 if (err) 971 if (err)
971 return err; 972 return err;
972 if (c->big_lpt) 973 if (c->big_lpt)
@@ -985,7 +986,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
985 lprops->flags = 0; 986 lprops->flags = 0;
986 lprops->flags |= ubifs_categorize_lprops(c, lprops); 987 lprops->flags |= ubifs_categorize_lprops(c, lprops);
987 } 988 }
988 err = check_lpt_crc(buf, c->pnode_sz); 989 err = check_lpt_crc(c, buf, c->pnode_sz);
989 return err; 990 return err;
990} 991}
991 992
@@ -1003,7 +1004,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1003 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1004 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1004 int i, pos = 0, err; 1005 int i, pos = 0, err;
1005 1006
1006 err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); 1007 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_NNODE);
1007 if (err) 1008 if (err)
1008 return err; 1009 return err;
1009 if (c->big_lpt) 1010 if (c->big_lpt)
@@ -1019,7 +1020,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1019 nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, 1020 nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
1020 c->lpt_offs_bits); 1021 c->lpt_offs_bits);
1021 } 1022 }
1022 err = check_lpt_crc(buf, c->nnode_sz); 1023 err = check_lpt_crc(c, buf, c->nnode_sz);
1023 return err; 1024 return err;
1024} 1025}
1025 1026
@@ -1035,7 +1036,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
1035 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1036 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1036 int i, pos = 0, err; 1037 int i, pos = 0, err;
1037 1038
1038 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); 1039 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LTAB);
1039 if (err) 1040 if (err)
1040 return err; 1041 return err;
1041 for (i = 0; i < c->lpt_lebs; i++) { 1042 for (i = 0; i < c->lpt_lebs; i++) {
@@ -1051,7 +1052,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
1051 c->ltab[i].tgc = 0; 1052 c->ltab[i].tgc = 0;
1052 c->ltab[i].cmt = 0; 1053 c->ltab[i].cmt = 0;
1053 } 1054 }
1054 err = check_lpt_crc(buf, c->ltab_sz); 1055 err = check_lpt_crc(c, buf, c->ltab_sz);
1055 return err; 1056 return err;
1056} 1057}
1057 1058
@@ -1067,7 +1068,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
1067 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1068 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1068 int i, pos = 0, err; 1069 int i, pos = 0, err;
1069 1070
1070 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); 1071 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LSAVE);
1071 if (err) 1072 if (err)
1072 return err; 1073 return err;
1073 for (i = 0; i < c->lsave_cnt; i++) { 1074 for (i = 0; i < c->lsave_cnt; i++) {
@@ -1077,7 +1078,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
1077 return -EINVAL; 1078 return -EINVAL;
1078 c->lsave[i] = lnum; 1079 c->lsave[i] = lnum;
1079 } 1080 }
1080 err = check_lpt_crc(buf, c->lsave_sz); 1081 err = check_lpt_crc(c, buf, c->lsave_sz);
1081 return err; 1082 return err;
1082} 1083}
1083 1084
@@ -1243,7 +1244,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1243 return 0; 1244 return 0;
1244 1245
1245out: 1246out:
1246 ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); 1247 ubifs_err(c, "error %d reading nnode at %d:%d", err, lnum, offs);
1247 dump_stack(); 1248 dump_stack();
1248 kfree(nnode); 1249 kfree(nnode);
1249 return err; 1250 return err;
@@ -1308,10 +1309,10 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1308 return 0; 1309 return 0;
1309 1310
1310out: 1311out:
1311 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); 1312 ubifs_err(c, "error %d reading pnode at %d:%d", err, lnum, offs);
1312 ubifs_dump_pnode(c, pnode, parent, iip); 1313 ubifs_dump_pnode(c, pnode, parent, iip);
1313 dump_stack(); 1314 dump_stack();
1314 ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); 1315 ubifs_err(c, "calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
1315 kfree(pnode); 1316 kfree(pnode);
1316 return err; 1317 return err;
1317} 1318}
@@ -2095,7 +2096,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2095 int i; 2096 int i;
2096 2097
2097 if (pnode->num != col) { 2098 if (pnode->num != col) {
2098 ubifs_err("pnode num %d expected %d parent num %d iip %d", 2099 ubifs_err(c, "pnode num %d expected %d parent num %d iip %d",
2099 pnode->num, col, pnode->parent->num, pnode->iip); 2100 pnode->num, col, pnode->parent->num, pnode->iip);
2100 return -EINVAL; 2101 return -EINVAL;
2101 } 2102 }
@@ -2110,13 +2111,13 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2110 if (lnum >= c->leb_cnt) 2111 if (lnum >= c->leb_cnt)
2111 continue; 2112 continue;
2112 if (lprops->lnum != lnum) { 2113 if (lprops->lnum != lnum) {
2113 ubifs_err("bad LEB number %d expected %d", 2114 ubifs_err(c, "bad LEB number %d expected %d",
2114 lprops->lnum, lnum); 2115 lprops->lnum, lnum);
2115 return -EINVAL; 2116 return -EINVAL;
2116 } 2117 }
2117 if (lprops->flags & LPROPS_TAKEN) { 2118 if (lprops->flags & LPROPS_TAKEN) {
2118 if (cat != LPROPS_UNCAT) { 2119 if (cat != LPROPS_UNCAT) {
2119 ubifs_err("LEB %d taken but not uncat %d", 2120 ubifs_err(c, "LEB %d taken but not uncat %d",
2120 lprops->lnum, cat); 2121 lprops->lnum, cat);
2121 return -EINVAL; 2122 return -EINVAL;
2122 } 2123 }
@@ -2129,7 +2130,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2129 case LPROPS_FRDI_IDX: 2130 case LPROPS_FRDI_IDX:
2130 break; 2131 break;
2131 default: 2132 default:
2132 ubifs_err("LEB %d index but cat %d", 2133 ubifs_err(c, "LEB %d index but cat %d",
2133 lprops->lnum, cat); 2134 lprops->lnum, cat);
2134 return -EINVAL; 2135 return -EINVAL;
2135 } 2136 }
@@ -2142,7 +2143,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2142 case LPROPS_FREEABLE: 2143 case LPROPS_FREEABLE:
2143 break; 2144 break;
2144 default: 2145 default:
2145 ubifs_err("LEB %d not index but cat %d", 2146 ubifs_err(c, "LEB %d not index but cat %d",
2146 lprops->lnum, cat); 2147 lprops->lnum, cat);
2147 return -EINVAL; 2148 return -EINVAL;
2148 } 2149 }
@@ -2183,14 +2184,14 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2183 break; 2184 break;
2184 } 2185 }
2185 if (!found) { 2186 if (!found) {
2186 ubifs_err("LEB %d cat %d not found in cat heap/list", 2187 ubifs_err(c, "LEB %d cat %d not found in cat heap/list",
2187 lprops->lnum, cat); 2188 lprops->lnum, cat);
2188 return -EINVAL; 2189 return -EINVAL;
2189 } 2190 }
2190 switch (cat) { 2191 switch (cat) {
2191 case LPROPS_EMPTY: 2192 case LPROPS_EMPTY:
2192 if (lprops->free != c->leb_size) { 2193 if (lprops->free != c->leb_size) {
2193 ubifs_err("LEB %d cat %d free %d dirty %d", 2194 ubifs_err(c, "LEB %d cat %d free %d dirty %d",
2194 lprops->lnum, cat, lprops->free, 2195 lprops->lnum, cat, lprops->free,
2195 lprops->dirty); 2196 lprops->dirty);
2196 return -EINVAL; 2197 return -EINVAL;
@@ -2199,7 +2200,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2199 case LPROPS_FREEABLE: 2200 case LPROPS_FREEABLE:
2200 case LPROPS_FRDI_IDX: 2201 case LPROPS_FRDI_IDX:
2201 if (lprops->free + lprops->dirty != c->leb_size) { 2202 if (lprops->free + lprops->dirty != c->leb_size) {
2202 ubifs_err("LEB %d cat %d free %d dirty %d", 2203 ubifs_err(c, "LEB %d cat %d free %d dirty %d",
2203 lprops->lnum, cat, lprops->free, 2204 lprops->lnum, cat, lprops->free,
2204 lprops->dirty); 2205 lprops->dirty);
2205 return -EINVAL; 2206 return -EINVAL;
@@ -2236,7 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
2236 /* cnode is a nnode */ 2237 /* cnode is a nnode */
2237 num = calc_nnode_num(row, col); 2238 num = calc_nnode_num(row, col);
2238 if (cnode->num != num) { 2239 if (cnode->num != num) {
2239 ubifs_err("nnode num %d expected %d parent num %d iip %d", 2240 ubifs_err(c, "nnode num %d expected %d parent num %d iip %d",
2240 cnode->num, num, 2241 cnode->num, num,
2241 (nnode ? nnode->num : 0), cnode->iip); 2242 (nnode ? nnode->num : 0), cnode->iip);
2242 return -EINVAL; 2243 return -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index d9c02928e992..ce89bdc3eb02 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -319,7 +319,7 @@ static int layout_cnodes(struct ubifs_info *c)
319 return 0; 319 return 0;
320 320
321no_space: 321no_space:
322 ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", 322 ubifs_err(c, "LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
323 lnum, offs, len, done_ltab, done_lsave); 323 lnum, offs, len, done_ltab, done_lsave);
324 ubifs_dump_lpt_info(c); 324 ubifs_dump_lpt_info(c);
325 ubifs_dump_lpt_lebs(c); 325 ubifs_dump_lpt_lebs(c);
@@ -543,7 +543,7 @@ static int write_cnodes(struct ubifs_info *c)
543 return 0; 543 return 0;
544 544
545no_space: 545no_space:
546 ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", 546 ubifs_err(c, "LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
547 lnum, offs, len, done_ltab, done_lsave); 547 lnum, offs, len, done_ltab, done_lsave);
548 ubifs_dump_lpt_info(c); 548 ubifs_dump_lpt_info(c);
549 ubifs_dump_lpt_lebs(c); 549 ubifs_dump_lpt_lebs(c);
@@ -1638,7 +1638,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1638 1638
1639 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1639 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1640 if (!buf) { 1640 if (!buf) {
1641 ubifs_err("cannot allocate memory for ltab checking"); 1641 ubifs_err(c, "cannot allocate memory for ltab checking");
1642 return 0; 1642 return 0;
1643 } 1643 }
1644 1644
@@ -1660,18 +1660,18 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1660 continue; 1660 continue;
1661 } 1661 }
1662 if (!dbg_is_all_ff(p, len)) { 1662 if (!dbg_is_all_ff(p, len)) {
1663 ubifs_err("invalid empty space in LEB %d at %d", 1663 ubifs_err(c, "invalid empty space in LEB %d at %d",
1664 lnum, c->leb_size - len); 1664 lnum, c->leb_size - len);
1665 err = -EINVAL; 1665 err = -EINVAL;
1666 } 1666 }
1667 i = lnum - c->lpt_first; 1667 i = lnum - c->lpt_first;
1668 if (len != c->ltab[i].free) { 1668 if (len != c->ltab[i].free) {
1669 ubifs_err("invalid free space in LEB %d (free %d, expected %d)", 1669 ubifs_err(c, "invalid free space in LEB %d (free %d, expected %d)",
1670 lnum, len, c->ltab[i].free); 1670 lnum, len, c->ltab[i].free);
1671 err = -EINVAL; 1671 err = -EINVAL;
1672 } 1672 }
1673 if (dirty != c->ltab[i].dirty) { 1673 if (dirty != c->ltab[i].dirty) {
1674 ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", 1674 ubifs_err(c, "invalid dirty space in LEB %d (dirty %d, expected %d)",
1675 lnum, dirty, c->ltab[i].dirty); 1675 lnum, dirty, c->ltab[i].dirty);
1676 err = -EINVAL; 1676 err = -EINVAL;
1677 } 1677 }
@@ -1725,7 +1725,7 @@ int dbg_check_ltab(struct ubifs_info *c)
1725 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { 1725 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
1726 err = dbg_check_ltab_lnum(c, lnum); 1726 err = dbg_check_ltab_lnum(c, lnum);
1727 if (err) { 1727 if (err) {
1728 ubifs_err("failed at LEB %d", lnum); 1728 ubifs_err(c, "failed at LEB %d", lnum);
1729 return err; 1729 return err;
1730 } 1730 }
1731 } 1731 }
@@ -1757,7 +1757,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1757 free += c->leb_size; 1757 free += c->leb_size;
1758 } 1758 }
1759 if (free < c->lpt_sz) { 1759 if (free < c->lpt_sz) {
1760 ubifs_err("LPT space error: free %lld lpt_sz %lld", 1760 ubifs_err(c, "LPT space error: free %lld lpt_sz %lld",
1761 free, c->lpt_sz); 1761 free, c->lpt_sz);
1762 ubifs_dump_lpt_info(c); 1762 ubifs_dump_lpt_info(c);
1763 ubifs_dump_lpt_lebs(c); 1763 ubifs_dump_lpt_lebs(c);
@@ -1797,12 +1797,12 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1797 d->chk_lpt_lebs = 0; 1797 d->chk_lpt_lebs = 0;
1798 d->chk_lpt_wastage = 0; 1798 d->chk_lpt_wastage = 0;
1799 if (c->dirty_pn_cnt > c->pnode_cnt) { 1799 if (c->dirty_pn_cnt > c->pnode_cnt) {
1800 ubifs_err("dirty pnodes %d exceed max %d", 1800 ubifs_err(c, "dirty pnodes %d exceed max %d",
1801 c->dirty_pn_cnt, c->pnode_cnt); 1801 c->dirty_pn_cnt, c->pnode_cnt);
1802 err = -EINVAL; 1802 err = -EINVAL;
1803 } 1803 }
1804 if (c->dirty_nn_cnt > c->nnode_cnt) { 1804 if (c->dirty_nn_cnt > c->nnode_cnt) {
1805 ubifs_err("dirty nnodes %d exceed max %d", 1805 ubifs_err(c, "dirty nnodes %d exceed max %d",
1806 c->dirty_nn_cnt, c->nnode_cnt); 1806 c->dirty_nn_cnt, c->nnode_cnt);
1807 err = -EINVAL; 1807 err = -EINVAL;
1808 } 1808 }
@@ -1820,22 +1820,22 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1820 chk_lpt_sz *= d->chk_lpt_lebs; 1820 chk_lpt_sz *= d->chk_lpt_lebs;
1821 chk_lpt_sz += len - c->nhead_offs; 1821 chk_lpt_sz += len - c->nhead_offs;
1822 if (d->chk_lpt_sz != chk_lpt_sz) { 1822 if (d->chk_lpt_sz != chk_lpt_sz) {
1823 ubifs_err("LPT wrote %lld but space used was %lld", 1823 ubifs_err(c, "LPT wrote %lld but space used was %lld",
1824 d->chk_lpt_sz, chk_lpt_sz); 1824 d->chk_lpt_sz, chk_lpt_sz);
1825 err = -EINVAL; 1825 err = -EINVAL;
1826 } 1826 }
1827 if (d->chk_lpt_sz > c->lpt_sz) { 1827 if (d->chk_lpt_sz > c->lpt_sz) {
1828 ubifs_err("LPT wrote %lld but lpt_sz is %lld", 1828 ubifs_err(c, "LPT wrote %lld but lpt_sz is %lld",
1829 d->chk_lpt_sz, c->lpt_sz); 1829 d->chk_lpt_sz, c->lpt_sz);
1830 err = -EINVAL; 1830 err = -EINVAL;
1831 } 1831 }
1832 if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { 1832 if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
1833 ubifs_err("LPT layout size %lld but wrote %lld", 1833 ubifs_err(c, "LPT layout size %lld but wrote %lld",
1834 d->chk_lpt_sz, d->chk_lpt_sz2); 1834 d->chk_lpt_sz, d->chk_lpt_sz2);
1835 err = -EINVAL; 1835 err = -EINVAL;
1836 } 1836 }
1837 if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { 1837 if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
1838 ubifs_err("LPT new nhead offs: expected %d was %d", 1838 ubifs_err(c, "LPT new nhead offs: expected %d was %d",
1839 d->new_nhead_offs, len); 1839 d->new_nhead_offs, len);
1840 err = -EINVAL; 1840 err = -EINVAL;
1841 } 1841 }
@@ -1845,7 +1845,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1845 if (c->big_lpt) 1845 if (c->big_lpt)
1846 lpt_sz += c->lsave_sz; 1846 lpt_sz += c->lsave_sz;
1847 if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { 1847 if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
1848 ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", 1848 ubifs_err(c, "LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
1849 d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); 1849 d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
1850 err = -EINVAL; 1850 err = -EINVAL;
1851 } 1851 }
@@ -1887,7 +1887,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1887 pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); 1887 pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
1888 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1888 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1889 if (!buf) { 1889 if (!buf) {
1890 ubifs_err("cannot allocate memory to dump LPT"); 1890 ubifs_err(c, "cannot allocate memory to dump LPT");
1891 return; 1891 return;
1892 } 1892 }
1893 1893
@@ -1962,7 +1962,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1962 pr_err("LEB %d:%d, lsave len\n", lnum, offs); 1962 pr_err("LEB %d:%d, lsave len\n", lnum, offs);
1963 break; 1963 break;
1964 default: 1964 default:
1965 ubifs_err("LPT node type %d not recognized", node_type); 1965 ubifs_err(c, "LPT node type %d not recognized", node_type);
1966 goto out; 1966 goto out;
1967 } 1967 }
1968 1968
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 1a4bb9e8b3b8..c6a5e39e2ba5 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -82,7 +82,7 @@ out:
82 return -EUCLEAN; 82 return -EUCLEAN;
83 83
84out_dump: 84out_dump:
85 ubifs_err("unexpected node type %d master LEB %d:%d", 85 ubifs_err(c, "unexpected node type %d master LEB %d:%d",
86 snod->type, lnum, snod->offs); 86 snod->type, lnum, snod->offs);
87 ubifs_scan_destroy(sleb); 87 ubifs_scan_destroy(sleb);
88 return -EINVAL; 88 return -EINVAL;
@@ -240,7 +240,7 @@ static int validate_master(const struct ubifs_info *c)
240 return 0; 240 return 0;
241 241
242out: 242out:
243 ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); 243 ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err);
244 ubifs_dump_node(c, c->mst_node); 244 ubifs_dump_node(c, c->mst_node);
245 return -EINVAL; 245 return -EINVAL;
246} 246}
@@ -316,7 +316,7 @@ int ubifs_read_master(struct ubifs_info *c)
316 316
317 if (c->leb_cnt < old_leb_cnt || 317 if (c->leb_cnt < old_leb_cnt ||
318 c->leb_cnt < UBIFS_MIN_LEB_CNT) { 318 c->leb_cnt < UBIFS_MIN_LEB_CNT) {
319 ubifs_err("bad leb_cnt on master node"); 319 ubifs_err(c, "bad leb_cnt on master node");
320 ubifs_dump_node(c, c->mst_node); 320 ubifs_dump_node(c, c->mst_node);
321 return -EINVAL; 321 return -EINVAL;
322 } 322 }
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 4409f486ecef..caf2d123e9ee 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -88,7 +88,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
88 else if (inum > o->inum) 88 else if (inum > o->inum)
89 p = &(*p)->rb_right; 89 p = &(*p)->rb_right;
90 else { 90 else {
91 ubifs_err("orphaned twice"); 91 ubifs_err(c, "orphaned twice");
92 spin_unlock(&c->orphan_lock); 92 spin_unlock(&c->orphan_lock);
93 kfree(orphan); 93 kfree(orphan);
94 return 0; 94 return 0;
@@ -155,7 +155,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
155 } 155 }
156 } 156 }
157 spin_unlock(&c->orphan_lock); 157 spin_unlock(&c->orphan_lock);
158 ubifs_err("missing orphan ino %lu", (unsigned long)inum); 158 ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
159 dump_stack(); 159 dump_stack();
160} 160}
161 161
@@ -287,7 +287,7 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
287 * We limit the number of orphans so that this should 287 * We limit the number of orphans so that this should
288 * never happen. 288 * never happen.
289 */ 289 */
290 ubifs_err("out of space in orphan area"); 290 ubifs_err(c, "out of space in orphan area");
291 return -EINVAL; 291 return -EINVAL;
292 } 292 }
293 } 293 }
@@ -397,7 +397,7 @@ static int consolidate(struct ubifs_info *c)
397 * We limit the number of orphans so that this should 397 * We limit the number of orphans so that this should
398 * never happen. 398 * never happen.
399 */ 399 */
400 ubifs_err("out of space in orphan area"); 400 ubifs_err(c, "out of space in orphan area");
401 err = -EINVAL; 401 err = -EINVAL;
402 } 402 }
403 spin_unlock(&c->orphan_lock); 403 spin_unlock(&c->orphan_lock);
@@ -569,7 +569,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
569 569
570 list_for_each_entry(snod, &sleb->nodes, list) { 570 list_for_each_entry(snod, &sleb->nodes, list) {
571 if (snod->type != UBIFS_ORPH_NODE) { 571 if (snod->type != UBIFS_ORPH_NODE) {
572 ubifs_err("invalid node type %d in orphan area at %d:%d", 572 ubifs_err(c, "invalid node type %d in orphan area at %d:%d",
573 snod->type, sleb->lnum, snod->offs); 573 snod->type, sleb->lnum, snod->offs);
574 ubifs_dump_node(c, snod->node); 574 ubifs_dump_node(c, snod->node);
575 return -EINVAL; 575 return -EINVAL;
@@ -596,7 +596,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
596 * number. That makes this orphan node, out of date. 596 * number. That makes this orphan node, out of date.
597 */ 597 */
598 if (!first) { 598 if (!first) {
599 ubifs_err("out of order commit number %llu in orphan node at %d:%d", 599 ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d",
600 cmt_no, sleb->lnum, snod->offs); 600 cmt_no, sleb->lnum, snod->offs);
601 ubifs_dump_node(c, snod->node); 601 ubifs_dump_node(c, snod->node);
602 return -EINVAL; 602 return -EINVAL;
@@ -831,20 +831,20 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
831 if (inum != ci->last_ino) { 831 if (inum != ci->last_ino) {
832 /* Lowest node type is the inode node, so it comes first */ 832 /* Lowest node type is the inode node, so it comes first */
833 if (key_type(c, &zbr->key) != UBIFS_INO_KEY) 833 if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
834 ubifs_err("found orphan node ino %lu, type %d", 834 ubifs_err(c, "found orphan node ino %lu, type %d",
835 (unsigned long)inum, key_type(c, &zbr->key)); 835 (unsigned long)inum, key_type(c, &zbr->key));
836 ci->last_ino = inum; 836 ci->last_ino = inum;
837 ci->tot_inos += 1; 837 ci->tot_inos += 1;
838 err = ubifs_tnc_read_node(c, zbr, ci->node); 838 err = ubifs_tnc_read_node(c, zbr, ci->node);
839 if (err) { 839 if (err) {
840 ubifs_err("node read failed, error %d", err); 840 ubifs_err(c, "node read failed, error %d", err);
841 return err; 841 return err;
842 } 842 }
843 if (ci->node->nlink == 0) 843 if (ci->node->nlink == 0)
844 /* Must be recorded as an orphan */ 844 /* Must be recorded as an orphan */
845 if (!dbg_find_check_orphan(&ci->root, inum) && 845 if (!dbg_find_check_orphan(&ci->root, inum) &&
846 !dbg_find_orphan(c, inum)) { 846 !dbg_find_orphan(c, inum)) {
847 ubifs_err("missing orphan, ino %lu", 847 ubifs_err(c, "missing orphan, ino %lu",
848 (unsigned long)inum); 848 (unsigned long)inum);
849 ci->missing += 1; 849 ci->missing += 1;
850 } 850 }
@@ -887,7 +887,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
887 887
888 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 888 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
889 if (!buf) { 889 if (!buf) {
890 ubifs_err("cannot allocate memory to check orphans"); 890 ubifs_err(c, "cannot allocate memory to check orphans");
891 return 0; 891 return 0;
892 } 892 }
893 893
@@ -925,7 +925,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
925 ci.root = RB_ROOT; 925 ci.root = RB_ROOT;
926 ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); 926 ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
927 if (!ci.node) { 927 if (!ci.node) {
928 ubifs_err("out of memory"); 928 ubifs_err(c, "out of memory");
929 return -ENOMEM; 929 return -ENOMEM;
930 } 930 }
931 931
@@ -935,12 +935,12 @@ static int dbg_check_orphans(struct ubifs_info *c)
935 935
936 err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); 936 err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
937 if (err) { 937 if (err) {
938 ubifs_err("cannot scan TNC, error %d", err); 938 ubifs_err(c, "cannot scan TNC, error %d", err);
939 goto out; 939 goto out;
940 } 940 }
941 941
942 if (ci.missing) { 942 if (ci.missing) {
943 ubifs_err("%lu missing orphan(s)", ci.missing); 943 ubifs_err(c, "%lu missing orphan(s)", ci.missing);
944 err = -EINVAL; 944 err = -EINVAL;
945 goto out; 945 goto out;
946 } 946 }
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c640938f62f0..695fc71d5244 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -305,7 +305,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
305 mst = mst2; 305 mst = mst2;
306 } 306 }
307 307
308 ubifs_msg("recovered master node from LEB %d", 308 ubifs_msg(c, "recovered master node from LEB %d",
309 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); 309 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
310 310
311 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); 311 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
@@ -360,13 +360,13 @@ int ubifs_recover_master_node(struct ubifs_info *c)
360out_err: 360out_err:
361 err = -EINVAL; 361 err = -EINVAL;
362out_free: 362out_free:
363 ubifs_err("failed to recover master node"); 363 ubifs_err(c, "failed to recover master node");
364 if (mst1) { 364 if (mst1) {
365 ubifs_err("dumping first master node"); 365 ubifs_err(c, "dumping first master node");
366 ubifs_dump_node(c, mst1); 366 ubifs_dump_node(c, mst1);
367 } 367 }
368 if (mst2) { 368 if (mst2) {
369 ubifs_err("dumping second master node"); 369 ubifs_err(c, "dumping second master node");
370 ubifs_dump_node(c, mst2); 370 ubifs_dump_node(c, mst2);
371 } 371 }
372 vfree(buf2); 372 vfree(buf2);
@@ -682,7 +682,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
682 ret, lnum, offs); 682 ret, lnum, offs);
683 break; 683 break;
684 } else { 684 } else {
685 ubifs_err("unexpected return value %d", ret); 685 ubifs_err(c, "unexpected return value %d", ret);
686 err = -EINVAL; 686 err = -EINVAL;
687 goto error; 687 goto error;
688 } 688 }
@@ -702,7 +702,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
702 * See header comment for this file for more 702 * See header comment for this file for more
703 * explanations about the reasons we have this check. 703 * explanations about the reasons we have this check.
704 */ 704 */
705 ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", 705 ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d",
706 lnum, offs, corruption); 706 lnum, offs, corruption);
707 /* Make sure we dump interesting non-0xFF data */ 707 /* Make sure we dump interesting non-0xFF data */
708 offs += corruption; 708 offs += corruption;
@@ -788,13 +788,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
788 788
789corrupted_rescan: 789corrupted_rescan:
790 /* Re-scan the corrupted data with verbose messages */ 790 /* Re-scan the corrupted data with verbose messages */
791 ubifs_err("corruption %d", ret); 791 ubifs_err(c, "corruption %d", ret);
792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
793corrupted: 793corrupted:
794 ubifs_scanned_corruption(c, lnum, offs, buf); 794 ubifs_scanned_corruption(c, lnum, offs, buf);
795 err = -EUCLEAN; 795 err = -EUCLEAN;
796error: 796error:
797 ubifs_err("LEB %d scanning failed", lnum); 797 ubifs_err(c, "LEB %d scanning failed", lnum);
798 ubifs_scan_destroy(sleb); 798 ubifs_scan_destroy(sleb);
799 return ERR_PTR(err); 799 return ERR_PTR(err);
800} 800}
@@ -826,15 +826,15 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
826 goto out_free; 826 goto out_free;
827 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); 827 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
828 if (ret != SCANNED_A_NODE) { 828 if (ret != SCANNED_A_NODE) {
829 ubifs_err("Not a valid node"); 829 ubifs_err(c, "Not a valid node");
830 goto out_err; 830 goto out_err;
831 } 831 }
832 if (cs_node->ch.node_type != UBIFS_CS_NODE) { 832 if (cs_node->ch.node_type != UBIFS_CS_NODE) {
833 ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type); 833 ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type);
834 goto out_err; 834 goto out_err;
835 } 835 }
836 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { 836 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
837 ubifs_err("CS node cmt_no %llu != current cmt_no %llu", 837 ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu",
838 (unsigned long long)le64_to_cpu(cs_node->cmt_no), 838 (unsigned long long)le64_to_cpu(cs_node->cmt_no),
839 c->cmt_no); 839 c->cmt_no);
840 goto out_err; 840 goto out_err;
@@ -847,7 +847,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
847out_err: 847out_err:
848 err = -EINVAL; 848 err = -EINVAL;
849out_free: 849out_free:
850 ubifs_err("failed to get CS sqnum"); 850 ubifs_err(c, "failed to get CS sqnum");
851 kfree(cs_node); 851 kfree(cs_node);
852 return err; 852 return err;
853} 853}
@@ -899,7 +899,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
899 } 899 }
900 } 900 }
901 if (snod->sqnum > cs_sqnum) { 901 if (snod->sqnum > cs_sqnum) {
902 ubifs_err("unrecoverable log corruption in LEB %d", 902 ubifs_err(c, "unrecoverable log corruption in LEB %d",
903 lnum); 903 lnum);
904 ubifs_scan_destroy(sleb); 904 ubifs_scan_destroy(sleb);
905 return ERR_PTR(-EUCLEAN); 905 return ERR_PTR(-EUCLEAN);
@@ -975,11 +975,8 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
975 return err; 975 return err;
976 976
977 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); 977 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
978 err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
979 if (err)
980 return err;
981 978
982 return 0; 979 return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
983} 980}
984 981
985/** 982/**
@@ -1004,10 +1001,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
1004 1001
1005 if (len == 0) { 1002 if (len == 0) {
1006 /* Nothing to read, just unmap it */ 1003 /* Nothing to read, just unmap it */
1007 err = ubifs_leb_unmap(c, lnum); 1004 return ubifs_leb_unmap(c, lnum);
1008 if (err)
1009 return err;
1010 return 0;
1011 } 1005 }
1012 1006
1013 err = ubifs_leb_read(c, lnum, buf, offs, len, 0); 1007 err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
@@ -1043,7 +1037,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
1043 } 1037 }
1044 1038
1045 if (ret == SCANNED_EMPTY_SPACE) { 1039 if (ret == SCANNED_EMPTY_SPACE) {
1046 ubifs_err("unexpected empty space at %d:%d", 1040 ubifs_err(c, "unexpected empty space at %d:%d",
1047 lnum, offs); 1041 lnum, offs);
1048 return -EUCLEAN; 1042 return -EUCLEAN;
1049 } 1043 }
@@ -1137,7 +1131,7 @@ static int grab_empty_leb(struct ubifs_info *c)
1137 */ 1131 */
1138 lnum = ubifs_find_free_leb_for_idx(c); 1132 lnum = ubifs_find_free_leb_for_idx(c);
1139 if (lnum < 0) { 1133 if (lnum < 0) {
1140 ubifs_err("could not find an empty LEB"); 1134 ubifs_err(c, "could not find an empty LEB");
1141 ubifs_dump_lprops(c); 1135 ubifs_dump_lprops(c);
1142 ubifs_dump_budg(c, &c->bi); 1136 ubifs_dump_budg(c, &c->bi);
1143 return lnum; 1137 return lnum;
@@ -1217,7 +1211,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1217 } 1211 }
1218 mutex_unlock(&wbuf->io_mutex); 1212 mutex_unlock(&wbuf->io_mutex);
1219 if (err < 0) { 1213 if (err < 0) {
1220 ubifs_err("GC failed, error %d", err); 1214 ubifs_err(c, "GC failed, error %d", err);
1221 if (err == -EAGAIN) 1215 if (err == -EAGAIN)
1222 err = -EINVAL; 1216 err = -EINVAL;
1223 return err; 1217 return err;
@@ -1464,7 +1458,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1464 return 0; 1458 return 0;
1465 1459
1466out: 1460out:
1467 ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", 1461 ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d",
1468 (unsigned long)e->inum, e->i_size, e->d_size, err); 1462 (unsigned long)e->inum, e->i_size, e->d_size, err);
1469 return err; 1463 return err;
1470} 1464}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 9b40a1c5e160..3ca4540130b5 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -458,13 +458,13 @@ int ubifs_validate_entry(struct ubifs_info *c,
458 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 458 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
459 strnlen(dent->name, nlen) != nlen || 459 strnlen(dent->name, nlen) != nlen ||
460 le64_to_cpu(dent->inum) > MAX_INUM) { 460 le64_to_cpu(dent->inum) > MAX_INUM) {
461 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 461 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ?
462 "directory entry" : "extended attribute entry"); 462 "directory entry" : "extended attribute entry");
463 return -EINVAL; 463 return -EINVAL;
464 } 464 }
465 465
466 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 466 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
467 ubifs_err("bad key type %d", key_type); 467 ubifs_err(c, "bad key type %d", key_type);
468 return -EINVAL; 468 return -EINVAL;
469 } 469 }
470 470
@@ -589,7 +589,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
589 cond_resched(); 589 cond_resched();
590 590
591 if (snod->sqnum >= SQNUM_WATERMARK) { 591 if (snod->sqnum >= SQNUM_WATERMARK) {
592 ubifs_err("file system's life ended"); 592 ubifs_err(c, "file system's life ended");
593 goto out_dump; 593 goto out_dump;
594 } 594 }
595 595
@@ -647,7 +647,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
647 if (old_size < 0 || old_size > c->max_inode_sz || 647 if (old_size < 0 || old_size > c->max_inode_sz ||
648 new_size < 0 || new_size > c->max_inode_sz || 648 new_size < 0 || new_size > c->max_inode_sz ||
649 old_size <= new_size) { 649 old_size <= new_size) {
650 ubifs_err("bad truncation node"); 650 ubifs_err(c, "bad truncation node");
651 goto out_dump; 651 goto out_dump;
652 } 652 }
653 653
@@ -662,7 +662,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
662 break; 662 break;
663 } 663 }
664 default: 664 default:
665 ubifs_err("unexpected node type %d in bud LEB %d:%d", 665 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
666 snod->type, lnum, snod->offs); 666 snod->type, lnum, snod->offs);
667 err = -EINVAL; 667 err = -EINVAL;
668 goto out_dump; 668 goto out_dump;
@@ -685,7 +685,7 @@ out:
685 return err; 685 return err;
686 686
687out_dump: 687out_dump:
688 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 688 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
689 ubifs_dump_node(c, snod->node); 689 ubifs_dump_node(c, snod->node);
690 ubifs_scan_destroy(sleb); 690 ubifs_scan_destroy(sleb);
691 return -EINVAL; 691 return -EINVAL;
@@ -805,7 +805,7 @@ static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
805 if (bud) { 805 if (bud) {
806 if (bud->jhead == jhead && bud->start <= offs) 806 if (bud->jhead == jhead && bud->start <= offs)
807 return 1; 807 return 1;
808 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 808 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs);
809 return -EINVAL; 809 return -EINVAL;
810 } 810 }
811 811
@@ -861,12 +861,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
861 * numbers. 861 * numbers.
862 */ 862 */
863 if (snod->type != UBIFS_CS_NODE) { 863 if (snod->type != UBIFS_CS_NODE) {
864 ubifs_err("first log node at LEB %d:%d is not CS node", 864 ubifs_err(c, "first log node at LEB %d:%d is not CS node",
865 lnum, offs); 865 lnum, offs);
866 goto out_dump; 866 goto out_dump;
867 } 867 }
868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
869 ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 869 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
870 lnum, offs, 870 lnum, offs,
871 (unsigned long long)le64_to_cpu(node->cmt_no), 871 (unsigned long long)le64_to_cpu(node->cmt_no),
872 c->cmt_no); 872 c->cmt_no);
@@ -891,7 +891,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
891 891
892 /* Make sure the first node sits at offset zero of the LEB */ 892 /* Make sure the first node sits at offset zero of the LEB */
893 if (snod->offs != 0) { 893 if (snod->offs != 0) {
894 ubifs_err("first node is not at zero offset"); 894 ubifs_err(c, "first node is not at zero offset");
895 goto out_dump; 895 goto out_dump;
896 } 896 }
897 897
@@ -899,12 +899,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
899 cond_resched(); 899 cond_resched();
900 900
901 if (snod->sqnum >= SQNUM_WATERMARK) { 901 if (snod->sqnum >= SQNUM_WATERMARK) {
902 ubifs_err("file system's life ended"); 902 ubifs_err(c, "file system's life ended");
903 goto out_dump; 903 goto out_dump;
904 } 904 }
905 905
906 if (snod->sqnum < c->cs_sqnum) { 906 if (snod->sqnum < c->cs_sqnum) {
907 ubifs_err("bad sqnum %llu, commit sqnum %llu", 907 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu",
908 snod->sqnum, c->cs_sqnum); 908 snod->sqnum, c->cs_sqnum);
909 goto out_dump; 909 goto out_dump;
910 } 910 }
@@ -934,12 +934,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
934 case UBIFS_CS_NODE: 934 case UBIFS_CS_NODE:
935 /* Make sure it sits at the beginning of LEB */ 935 /* Make sure it sits at the beginning of LEB */
936 if (snod->offs != 0) { 936 if (snod->offs != 0) {
937 ubifs_err("unexpected node in log"); 937 ubifs_err(c, "unexpected node in log");
938 goto out_dump; 938 goto out_dump;
939 } 939 }
940 break; 940 break;
941 default: 941 default:
942 ubifs_err("unexpected node in log"); 942 ubifs_err(c, "unexpected node in log");
943 goto out_dump; 943 goto out_dump;
944 } 944 }
945 } 945 }
@@ -955,7 +955,7 @@ out:
955 return err; 955 return err;
956 956
957out_dump: 957out_dump:
958 ubifs_err("log error detected while replaying the log at LEB %d:%d", 958 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
959 lnum, offs + snod->offs); 959 lnum, offs + snod->offs);
960 ubifs_dump_node(c, snod->node); 960 ubifs_dump_node(c, snod->node);
961 ubifs_scan_destroy(sleb); 961 ubifs_scan_destroy(sleb);
@@ -1017,7 +1017,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1017 return free; /* Error code */ 1017 return free; /* Error code */
1018 1018
1019 if (c->ihead_offs != c->leb_size - free) { 1019 if (c->ihead_offs != c->leb_size - free) {
1020 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1020 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum,
1021 c->ihead_offs); 1021 c->ihead_offs);
1022 return -EINVAL; 1022 return -EINVAL;
1023 } 1023 }
@@ -1040,7 +1040,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1040 * someting went wrong and we cannot proceed mounting 1040 * someting went wrong and we cannot proceed mounting
1041 * the file-system. 1041 * the file-system.
1042 */ 1042 */
1043 ubifs_err("no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1043 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
1044 lnum, 0); 1044 lnum, 0);
1045 err = -EINVAL; 1045 err = -EINVAL;
1046 } 1046 }
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 79c6dbbc0e04..f4fbc7b6b794 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -335,7 +335,7 @@ static int create_default_filesystem(struct ubifs_info *c)
335 if (err) 335 if (err)
336 return err; 336 return err;
337 337
338 ubifs_msg("default file-system created"); 338 ubifs_msg(c, "default file-system created");
339 return 0; 339 return 0;
340} 340}
341 341
@@ -365,13 +365,13 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
365 } 365 }
366 366
367 if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { 367 if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
368 ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", 368 ubifs_err(c, "min. I/O unit mismatch: %d in superblock, %d real",
369 le32_to_cpu(sup->min_io_size), c->min_io_size); 369 le32_to_cpu(sup->min_io_size), c->min_io_size);
370 goto failed; 370 goto failed;
371 } 371 }
372 372
373 if (le32_to_cpu(sup->leb_size) != c->leb_size) { 373 if (le32_to_cpu(sup->leb_size) != c->leb_size) {
374 ubifs_err("LEB size mismatch: %d in superblock, %d real", 374 ubifs_err(c, "LEB size mismatch: %d in superblock, %d real",
375 le32_to_cpu(sup->leb_size), c->leb_size); 375 le32_to_cpu(sup->leb_size), c->leb_size);
376 goto failed; 376 goto failed;
377 } 377 }
@@ -393,33 +393,33 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
393 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; 393 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
394 394
395 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { 395 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
396 ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", 396 ubifs_err(c, "bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
397 c->leb_cnt, c->vi.size, min_leb_cnt); 397 c->leb_cnt, c->vi.size, min_leb_cnt);
398 goto failed; 398 goto failed;
399 } 399 }
400 400
401 if (c->max_leb_cnt < c->leb_cnt) { 401 if (c->max_leb_cnt < c->leb_cnt) {
402 ubifs_err("max. LEB count %d less than LEB count %d", 402 ubifs_err(c, "max. LEB count %d less than LEB count %d",
403 c->max_leb_cnt, c->leb_cnt); 403 c->max_leb_cnt, c->leb_cnt);
404 goto failed; 404 goto failed;
405 } 405 }
406 406
407 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { 407 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
408 ubifs_err("too few main LEBs count %d, must be at least %d", 408 ubifs_err(c, "too few main LEBs count %d, must be at least %d",
409 c->main_lebs, UBIFS_MIN_MAIN_LEBS); 409 c->main_lebs, UBIFS_MIN_MAIN_LEBS);
410 goto failed; 410 goto failed;
411 } 411 }
412 412
413 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; 413 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
414 if (c->max_bud_bytes < max_bytes) { 414 if (c->max_bud_bytes < max_bytes) {
415 ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", 415 ubifs_err(c, "too small journal (%lld bytes), must be at least %lld bytes",
416 c->max_bud_bytes, max_bytes); 416 c->max_bud_bytes, max_bytes);
417 goto failed; 417 goto failed;
418 } 418 }
419 419
420 max_bytes = (long long)c->leb_size * c->main_lebs; 420 max_bytes = (long long)c->leb_size * c->main_lebs;
421 if (c->max_bud_bytes > max_bytes) { 421 if (c->max_bud_bytes > max_bytes) {
422 ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", 422 ubifs_err(c, "too large journal size (%lld bytes), only %lld bytes available in the main area",
423 c->max_bud_bytes, max_bytes); 423 c->max_bud_bytes, max_bytes);
424 goto failed; 424 goto failed;
425 } 425 }
@@ -468,7 +468,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
468 return 0; 468 return 0;
469 469
470failed: 470failed:
471 ubifs_err("bad superblock, error %d", err); 471 ubifs_err(c, "bad superblock, error %d", err);
472 ubifs_dump_node(c, sup); 472 ubifs_dump_node(c, sup);
473 return -EINVAL; 473 return -EINVAL;
474} 474}
@@ -549,12 +549,12 @@ int ubifs_read_superblock(struct ubifs_info *c)
549 ubifs_assert(!c->ro_media || c->ro_mount); 549 ubifs_assert(!c->ro_media || c->ro_mount);
550 if (!c->ro_mount || 550 if (!c->ro_mount ||
551 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { 551 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
552 ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", 552 ubifs_err(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
553 c->fmt_version, c->ro_compat_version, 553 c->fmt_version, c->ro_compat_version,
554 UBIFS_FORMAT_VERSION, 554 UBIFS_FORMAT_VERSION,
555 UBIFS_RO_COMPAT_VERSION); 555 UBIFS_RO_COMPAT_VERSION);
556 if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { 556 if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
557 ubifs_msg("only R/O mounting is possible"); 557 ubifs_msg(c, "only R/O mounting is possible");
558 err = -EROFS; 558 err = -EROFS;
559 } else 559 } else
560 err = -EINVAL; 560 err = -EINVAL;
@@ -570,7 +570,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
570 } 570 }
571 571
572 if (c->fmt_version < 3) { 572 if (c->fmt_version < 3) {
573 ubifs_err("on-flash format version %d is not supported", 573 ubifs_err(c, "on-flash format version %d is not supported",
574 c->fmt_version); 574 c->fmt_version);
575 err = -EINVAL; 575 err = -EINVAL;
576 goto out; 576 goto out;
@@ -595,7 +595,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
595 c->key_len = UBIFS_SK_LEN; 595 c->key_len = UBIFS_SK_LEN;
596 break; 596 break;
597 default: 597 default:
598 ubifs_err("unsupported key format"); 598 ubifs_err(c, "unsupported key format");
599 err = -EINVAL; 599 err = -EINVAL;
600 goto out; 600 goto out;
601 } 601 }
@@ -785,7 +785,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
785 ubifs_assert(c->space_fixup); 785 ubifs_assert(c->space_fixup);
786 ubifs_assert(!c->ro_mount); 786 ubifs_assert(!c->ro_mount);
787 787
788 ubifs_msg("start fixing up free space"); 788 ubifs_msg(c, "start fixing up free space");
789 789
790 err = fixup_free_space(c); 790 err = fixup_free_space(c);
791 if (err) 791 if (err)
@@ -804,6 +804,6 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
804 if (err) 804 if (err)
805 return err; 805 return err;
806 806
807 ubifs_msg("free space fixup complete"); 807 ubifs_msg(c, "free space fixup complete");
808 return err; 808 return err;
809} 809}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 89adbc4d08ac..aab87340d3de 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -100,7 +100,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
100 if (pad_len < 0 || 100 if (pad_len < 0 ||
101 offs + node_len + pad_len > c->leb_size) { 101 offs + node_len + pad_len > c->leb_size) {
102 if (!quiet) { 102 if (!quiet) {
103 ubifs_err("bad pad node at LEB %d:%d", 103 ubifs_err(c, "bad pad node at LEB %d:%d",
104 lnum, offs); 104 lnum, offs);
105 ubifs_dump_node(c, pad); 105 ubifs_dump_node(c, pad);
106 } 106 }
@@ -110,7 +110,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
110 /* Make the node pads to 8-byte boundary */ 110 /* Make the node pads to 8-byte boundary */
111 if ((node_len + pad_len) & 7) { 111 if ((node_len + pad_len) & 7) {
112 if (!quiet) 112 if (!quiet)
113 ubifs_err("bad padding length %d - %d", 113 ubifs_err(c, "bad padding length %d - %d",
114 offs, offs + node_len + pad_len); 114 offs, offs + node_len + pad_len);
115 return SCANNED_A_BAD_PAD_NODE; 115 return SCANNED_A_BAD_PAD_NODE;
116 } 116 }
@@ -152,7 +152,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
152 152
153 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); 153 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
154 if (err && err != -EBADMSG) { 154 if (err && err != -EBADMSG) {
155 ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", 155 ubifs_err(c, "cannot read %d bytes from LEB %d:%d, error %d",
156 c->leb_size - offs, lnum, offs, err); 156 c->leb_size - offs, lnum, offs, err);
157 kfree(sleb); 157 kfree(sleb);
158 return ERR_PTR(err); 158 return ERR_PTR(err);
@@ -240,11 +240,11 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
240{ 240{
241 int len; 241 int len;
242 242
243 ubifs_err("corruption at LEB %d:%d", lnum, offs); 243 ubifs_err(c, "corruption at LEB %d:%d", lnum, offs);
244 len = c->leb_size - offs; 244 len = c->leb_size - offs;
245 if (len > 8192) 245 if (len > 8192)
246 len = 8192; 246 len = 8192;
247 ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs); 247 ubifs_err(c, "first %d bytes from LEB %d:%d", len, lnum, offs);
248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); 248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
249} 249}
250 250
@@ -299,16 +299,16 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
299 299
300 switch (ret) { 300 switch (ret) {
301 case SCANNED_GARBAGE: 301 case SCANNED_GARBAGE:
302 ubifs_err("garbage"); 302 ubifs_err(c, "garbage");
303 goto corrupted; 303 goto corrupted;
304 case SCANNED_A_NODE: 304 case SCANNED_A_NODE:
305 break; 305 break;
306 case SCANNED_A_CORRUPT_NODE: 306 case SCANNED_A_CORRUPT_NODE:
307 case SCANNED_A_BAD_PAD_NODE: 307 case SCANNED_A_BAD_PAD_NODE:
308 ubifs_err("bad node"); 308 ubifs_err(c, "bad node");
309 goto corrupted; 309 goto corrupted;
310 default: 310 default:
311 ubifs_err("unknown"); 311 ubifs_err(c, "unknown");
312 err = -EINVAL; 312 err = -EINVAL;
313 goto error; 313 goto error;
314 } 314 }
@@ -325,7 +325,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
325 325
326 if (offs % c->min_io_size) { 326 if (offs % c->min_io_size) {
327 if (!quiet) 327 if (!quiet)
328 ubifs_err("empty space starts at non-aligned offset %d", 328 ubifs_err(c, "empty space starts at non-aligned offset %d",
329 offs); 329 offs);
330 goto corrupted; 330 goto corrupted;
331 } 331 }
@@ -338,7 +338,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
338 for (; len; offs++, buf++, len--) 338 for (; len; offs++, buf++, len--)
339 if (*(uint8_t *)buf != 0xff) { 339 if (*(uint8_t *)buf != 0xff) {
340 if (!quiet) 340 if (!quiet)
341 ubifs_err("corrupt empty space at LEB %d:%d", 341 ubifs_err(c, "corrupt empty space at LEB %d:%d",
342 lnum, offs); 342 lnum, offs);
343 goto corrupted; 343 goto corrupted;
344 } 344 }
@@ -348,14 +348,14 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
348corrupted: 348corrupted:
349 if (!quiet) { 349 if (!quiet) {
350 ubifs_scanned_corruption(c, lnum, offs, buf); 350 ubifs_scanned_corruption(c, lnum, offs, buf);
351 ubifs_err("LEB %d scanning failed", lnum); 351 ubifs_err(c, "LEB %d scanning failed", lnum);
352 } 352 }
353 err = -EUCLEAN; 353 err = -EUCLEAN;
354 ubifs_scan_destroy(sleb); 354 ubifs_scan_destroy(sleb);
355 return ERR_PTR(err); 355 return ERR_PTR(err);
356 356
357error: 357error:
358 ubifs_err("LEB %d scanning failed, error %d", lnum, err); 358 ubifs_err(c, "LEB %d scanning failed, error %d", lnum, err);
359 ubifs_scan_destroy(sleb); 359 ubifs_scan_destroy(sleb);
360 return ERR_PTR(err); 360 return ERR_PTR(err);
361} 361}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 93e946561c5c..75e6f04bb795 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -70,13 +70,13 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
70 const struct ubifs_inode *ui = ubifs_inode(inode); 70 const struct ubifs_inode *ui = ubifs_inode(inode);
71 71
72 if (inode->i_size > c->max_inode_sz) { 72 if (inode->i_size > c->max_inode_sz) {
73 ubifs_err("inode is too large (%lld)", 73 ubifs_err(c, "inode is too large (%lld)",
74 (long long)inode->i_size); 74 (long long)inode->i_size);
75 return 1; 75 return 1;
76 } 76 }
77 77
78 if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { 78 if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
79 ubifs_err("unknown compression type %d", ui->compr_type); 79 ubifs_err(c, "unknown compression type %d", ui->compr_type);
80 return 2; 80 return 2;
81 } 81 }
82 82
@@ -90,7 +90,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
90 return 5; 90 return 5;
91 91
92 if (!ubifs_compr_present(ui->compr_type)) { 92 if (!ubifs_compr_present(ui->compr_type)) {
93 ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", 93 ubifs_warn(c, "inode %lu uses '%s' compression, but it was not compiled in",
94 inode->i_ino, ubifs_compr_name(ui->compr_type)); 94 inode->i_ino, ubifs_compr_name(ui->compr_type));
95 } 95 }
96 96
@@ -242,14 +242,14 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
242 return inode; 242 return inode;
243 243
244out_invalid: 244out_invalid:
245 ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); 245 ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err);
246 ubifs_dump_node(c, ino); 246 ubifs_dump_node(c, ino);
247 ubifs_dump_inode(c, inode); 247 ubifs_dump_inode(c, inode);
248 err = -EINVAL; 248 err = -EINVAL;
249out_ino: 249out_ino:
250 kfree(ino); 250 kfree(ino);
251out: 251out:
252 ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); 252 ubifs_err(c, "failed to read inode %lu, error %d", inode->i_ino, err);
253 iget_failed(inode); 253 iget_failed(inode);
254 return ERR_PTR(err); 254 return ERR_PTR(err);
255} 255}
@@ -319,7 +319,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
319 if (inode->i_nlink) { 319 if (inode->i_nlink) {
320 err = ubifs_jnl_write_inode(c, inode); 320 err = ubifs_jnl_write_inode(c, inode);
321 if (err) 321 if (err)
322 ubifs_err("can't write inode %lu, error %d", 322 ubifs_err(c, "can't write inode %lu, error %d",
323 inode->i_ino, err); 323 inode->i_ino, err);
324 else 324 else
325 err = dbg_check_inode_size(c, inode, ui->ui_size); 325 err = dbg_check_inode_size(c, inode, ui->ui_size);
@@ -363,7 +363,7 @@ static void ubifs_evict_inode(struct inode *inode)
363 * Worst case we have a lost orphan inode wasting space, so a 363 * Worst case we have a lost orphan inode wasting space, so a
364 * simple error message is OK here. 364 * simple error message is OK here.
365 */ 365 */
366 ubifs_err("can't delete inode %lu, error %d", 366 ubifs_err(c, "can't delete inode %lu, error %d",
367 inode->i_ino, err); 367 inode->i_ino, err);
368 368
369out: 369out:
@@ -492,17 +492,17 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
492static int init_constants_early(struct ubifs_info *c) 492static int init_constants_early(struct ubifs_info *c)
493{ 493{
494 if (c->vi.corrupted) { 494 if (c->vi.corrupted) {
495 ubifs_warn("UBI volume is corrupted - read-only mode"); 495 ubifs_warn(c, "UBI volume is corrupted - read-only mode");
496 c->ro_media = 1; 496 c->ro_media = 1;
497 } 497 }
498 498
499 if (c->di.ro_mode) { 499 if (c->di.ro_mode) {
500 ubifs_msg("read-only UBI device"); 500 ubifs_msg(c, "read-only UBI device");
501 c->ro_media = 1; 501 c->ro_media = 1;
502 } 502 }
503 503
504 if (c->vi.vol_type == UBI_STATIC_VOLUME) { 504 if (c->vi.vol_type == UBI_STATIC_VOLUME) {
505 ubifs_msg("static UBI volume - read-only mode"); 505 ubifs_msg(c, "static UBI volume - read-only mode");
506 c->ro_media = 1; 506 c->ro_media = 1;
507 } 507 }
508 508
@@ -516,19 +516,19 @@ static int init_constants_early(struct ubifs_info *c)
516 c->max_write_shift = fls(c->max_write_size) - 1; 516 c->max_write_shift = fls(c->max_write_size) - 1;
517 517
518 if (c->leb_size < UBIFS_MIN_LEB_SZ) { 518 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
519 ubifs_err("too small LEBs (%d bytes), min. is %d bytes", 519 ubifs_err(c, "too small LEBs (%d bytes), min. is %d bytes",
520 c->leb_size, UBIFS_MIN_LEB_SZ); 520 c->leb_size, UBIFS_MIN_LEB_SZ);
521 return -EINVAL; 521 return -EINVAL;
522 } 522 }
523 523
524 if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { 524 if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
525 ubifs_err("too few LEBs (%d), min. is %d", 525 ubifs_err(c, "too few LEBs (%d), min. is %d",
526 c->leb_cnt, UBIFS_MIN_LEB_CNT); 526 c->leb_cnt, UBIFS_MIN_LEB_CNT);
527 return -EINVAL; 527 return -EINVAL;
528 } 528 }
529 529
530 if (!is_power_of_2(c->min_io_size)) { 530 if (!is_power_of_2(c->min_io_size)) {
531 ubifs_err("bad min. I/O size %d", c->min_io_size); 531 ubifs_err(c, "bad min. I/O size %d", c->min_io_size);
532 return -EINVAL; 532 return -EINVAL;
533 } 533 }
534 534
@@ -539,7 +539,7 @@ static int init_constants_early(struct ubifs_info *c)
539 if (c->max_write_size < c->min_io_size || 539 if (c->max_write_size < c->min_io_size ||
540 c->max_write_size % c->min_io_size || 540 c->max_write_size % c->min_io_size ||
541 !is_power_of_2(c->max_write_size)) { 541 !is_power_of_2(c->max_write_size)) {
542 ubifs_err("bad write buffer size %d for %d min. I/O unit", 542 ubifs_err(c, "bad write buffer size %d for %d min. I/O unit",
543 c->max_write_size, c->min_io_size); 543 c->max_write_size, c->min_io_size);
544 return -EINVAL; 544 return -EINVAL;
545 } 545 }
@@ -665,7 +665,7 @@ static int init_constants_sb(struct ubifs_info *c)
665 tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; 665 tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
666 tmp = ALIGN(tmp, c->min_io_size); 666 tmp = ALIGN(tmp, c->min_io_size);
667 if (tmp > c->leb_size) { 667 if (tmp > c->leb_size) {
668 ubifs_err("too small LEB size %d, at least %d needed", 668 ubifs_err(c, "too small LEB size %d, at least %d needed",
669 c->leb_size, tmp); 669 c->leb_size, tmp);
670 return -EINVAL; 670 return -EINVAL;
671 } 671 }
@@ -680,7 +680,7 @@ static int init_constants_sb(struct ubifs_info *c)
680 tmp /= c->leb_size; 680 tmp /= c->leb_size;
681 tmp += 1; 681 tmp += 1;
682 if (c->log_lebs < tmp) { 682 if (c->log_lebs < tmp) {
683 ubifs_err("too small log %d LEBs, required min. %d LEBs", 683 ubifs_err(c, "too small log %d LEBs, required min. %d LEBs",
684 c->log_lebs, tmp); 684 c->log_lebs, tmp);
685 return -EINVAL; 685 return -EINVAL;
686 } 686 }
@@ -772,7 +772,7 @@ static int take_gc_lnum(struct ubifs_info *c)
772 int err; 772 int err;
773 773
774 if (c->gc_lnum == -1) { 774 if (c->gc_lnum == -1) {
775 ubifs_err("no LEB for GC"); 775 ubifs_err(c, "no LEB for GC");
776 return -EINVAL; 776 return -EINVAL;
777 } 777 }
778 778
@@ -857,7 +857,7 @@ static void free_orphans(struct ubifs_info *c)
857 orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); 857 orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
858 list_del(&orph->list); 858 list_del(&orph->list);
859 kfree(orph); 859 kfree(orph);
860 ubifs_err("orphan list not empty at unmount"); 860 ubifs_err(c, "orphan list not empty at unmount");
861 } 861 }
862 862
863 vfree(c->orph_buf); 863 vfree(c->orph_buf);
@@ -954,7 +954,8 @@ static const match_table_t tokens = {
954 */ 954 */
955static int parse_standard_option(const char *option) 955static int parse_standard_option(const char *option)
956{ 956{
957 ubifs_msg("parse %s", option); 957
958 pr_notice("UBIFS: parse %s\n", option);
958 if (!strcmp(option, "sync")) 959 if (!strcmp(option, "sync"))
959 return MS_SYNCHRONOUS; 960 return MS_SYNCHRONOUS;
960 return 0; 961 return 0;
@@ -1026,7 +1027,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
1026 else if (!strcmp(name, "zlib")) 1027 else if (!strcmp(name, "zlib"))
1027 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; 1028 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
1028 else { 1029 else {
1029 ubifs_err("unknown compressor \"%s\"", name); 1030 ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready?
1030 kfree(name); 1031 kfree(name);
1031 return -EINVAL; 1032 return -EINVAL;
1032 } 1033 }
@@ -1042,7 +1043,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
1042 1043
1043 flag = parse_standard_option(p); 1044 flag = parse_standard_option(p);
1044 if (!flag) { 1045 if (!flag) {
1045 ubifs_err("unrecognized mount option \"%s\" or missing value", 1046 ubifs_err(c, "unrecognized mount option \"%s\" or missing value",
1046 p); 1047 p);
1047 return -EINVAL; 1048 return -EINVAL;
1048 } 1049 }
@@ -1105,7 +1106,7 @@ again:
1105 } 1106 }
1106 1107
1107 /* Just disable bulk-read */ 1108 /* Just disable bulk-read */
1108 ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", 1109 ubifs_warn(c, "cannot allocate %d bytes of memory for bulk-read, disabling it",
1109 c->max_bu_buf_len); 1110 c->max_bu_buf_len);
1110 c->mount_opts.bulk_read = 1; 1111 c->mount_opts.bulk_read = 1;
1111 c->bulk_read = 0; 1112 c->bulk_read = 0;
@@ -1124,7 +1125,7 @@ static int check_free_space(struct ubifs_info *c)
1124{ 1125{
1125 ubifs_assert(c->dark_wm > 0); 1126 ubifs_assert(c->dark_wm > 0);
1126 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1127 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1127 ubifs_err("insufficient free space to mount in R/W mode"); 1128 ubifs_err(c, "insufficient free space to mount in R/W mode");
1128 ubifs_dump_budg(c, &c->bi); 1129 ubifs_dump_budg(c, &c->bi);
1129 ubifs_dump_lprops(c); 1130 ubifs_dump_lprops(c);
1130 return -ENOSPC; 1131 return -ENOSPC;
@@ -1166,14 +1167,14 @@ static int mount_ubifs(struct ubifs_info *c)
1166 * This UBI volume is empty, and read-only, or the file system 1167 * This UBI volume is empty, and read-only, or the file system
1167 * is mounted read-only - we cannot format it. 1168 * is mounted read-only - we cannot format it.
1168 */ 1169 */
1169 ubifs_err("can't format empty UBI volume: read-only %s", 1170 ubifs_err(c, "can't format empty UBI volume: read-only %s",
1170 c->ro_media ? "UBI volume" : "mount"); 1171 c->ro_media ? "UBI volume" : "mount");
1171 err = -EROFS; 1172 err = -EROFS;
1172 goto out_free; 1173 goto out_free;
1173 } 1174 }
1174 1175
1175 if (c->ro_media && !c->ro_mount) { 1176 if (c->ro_media && !c->ro_mount) {
1176 ubifs_err("cannot mount read-write - read-only media"); 1177 ubifs_err(c, "cannot mount read-write - read-only media");
1177 err = -EROFS; 1178 err = -EROFS;
1178 goto out_free; 1179 goto out_free;
1179 } 1180 }
@@ -1221,7 +1222,7 @@ static int mount_ubifs(struct ubifs_info *c)
1221 * or overridden by mount options is actually compiled in. 1222 * or overridden by mount options is actually compiled in.
1222 */ 1223 */
1223 if (!ubifs_compr_present(c->default_compr)) { 1224 if (!ubifs_compr_present(c->default_compr)) {
1224 ubifs_err("'compressor \"%s\" is not compiled in", 1225 ubifs_err(c, "'compressor \"%s\" is not compiled in",
1225 ubifs_compr_name(c->default_compr)); 1226 ubifs_compr_name(c->default_compr));
1226 err = -ENOTSUPP; 1227 err = -ENOTSUPP;
1227 goto out_free; 1228 goto out_free;
@@ -1250,7 +1251,7 @@ static int mount_ubifs(struct ubifs_info *c)
1250 if (IS_ERR(c->bgt)) { 1251 if (IS_ERR(c->bgt)) {
1251 err = PTR_ERR(c->bgt); 1252 err = PTR_ERR(c->bgt);
1252 c->bgt = NULL; 1253 c->bgt = NULL;
1253 ubifs_err("cannot spawn \"%s\", error %d", 1254 ubifs_err(c, "cannot spawn \"%s\", error %d",
1254 c->bgt_name, err); 1255 c->bgt_name, err);
1255 goto out_wbufs; 1256 goto out_wbufs;
1256 } 1257 }
@@ -1264,7 +1265,7 @@ static int mount_ubifs(struct ubifs_info *c)
1264 init_constants_master(c); 1265 init_constants_master(c);
1265 1266
1266 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1267 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1267 ubifs_msg("recovery needed"); 1268 ubifs_msg(c, "recovery needed");
1268 c->need_recovery = 1; 1269 c->need_recovery = 1;
1269 } 1270 }
1270 1271
@@ -1284,7 +1285,7 @@ static int mount_ubifs(struct ubifs_info *c)
1284 goto out_lpt; 1285 goto out_lpt;
1285 } 1286 }
1286 1287
1287 if (!c->ro_mount) { 1288 if (!c->ro_mount && !c->need_recovery) {
1288 /* 1289 /*
1289 * Set the "dirty" flag so that if we reboot uncleanly we 1290 * Set the "dirty" flag so that if we reboot uncleanly we
1290 * will notice this immediately on the next mount. 1291 * will notice this immediately on the next mount.
@@ -1373,10 +1374,10 @@ static int mount_ubifs(struct ubifs_info *c)
1373 1374
1374 if (c->need_recovery) { 1375 if (c->need_recovery) {
1375 if (c->ro_mount) 1376 if (c->ro_mount)
1376 ubifs_msg("recovery deferred"); 1377 ubifs_msg(c, "recovery deferred");
1377 else { 1378 else {
1378 c->need_recovery = 0; 1379 c->need_recovery = 0;
1379 ubifs_msg("recovery completed"); 1380 ubifs_msg(c, "recovery completed");
1380 /* 1381 /*
1381 * GC LEB has to be empty and taken at this point. But 1382 * GC LEB has to be empty and taken at this point. But
1382 * the journal head LEBs may also be accounted as 1383 * the journal head LEBs may also be accounted as
@@ -1397,20 +1398,20 @@ static int mount_ubifs(struct ubifs_info *c)
1397 1398
1398 c->mounting = 0; 1399 c->mounting = 0;
1399 1400
1400 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", 1401 ubifs_msg(c, "UBIFS: mounted UBI device %d, volume %d, name \"%s\"%s",
1401 c->vi.ubi_num, c->vi.vol_id, c->vi.name, 1402 c->vi.ubi_num, c->vi.vol_id, c->vi.name,
1402 c->ro_mount ? ", R/O mode" : ""); 1403 c->ro_mount ? ", R/O mode" : "");
1403 x = (long long)c->main_lebs * c->leb_size; 1404 x = (long long)c->main_lebs * c->leb_size;
1404 y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; 1405 y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1405 ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", 1406 ubifs_msg(c, "LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
1406 c->leb_size, c->leb_size >> 10, c->min_io_size, 1407 c->leb_size, c->leb_size >> 10, c->min_io_size,
1407 c->max_write_size); 1408 c->max_write_size);
1408 ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", 1409 ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
1409 x, x >> 20, c->main_lebs, 1410 x, x >> 20, c->main_lebs,
1410 y, y >> 20, c->log_lebs + c->max_bud_cnt); 1411 y, y >> 20, c->log_lebs + c->max_bud_cnt);
1411 ubifs_msg("reserved for root: %llu bytes (%llu KiB)", 1412 ubifs_msg(c, "reserved for root: %llu bytes (%llu KiB)",
1412 c->report_rp_size, c->report_rp_size >> 10); 1413 c->report_rp_size, c->report_rp_size >> 10);
1413 ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", 1414 ubifs_msg(c, "media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
1414 c->fmt_version, c->ro_compat_version, 1415 c->fmt_version, c->ro_compat_version,
1415 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, 1416 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
1416 c->big_lpt ? ", big LPT model" : ", small LPT model"); 1417 c->big_lpt ? ", big LPT model" : ", small LPT model");
@@ -1543,8 +1544,8 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1543 int err, lnum; 1544 int err, lnum;
1544 1545
1545 if (c->rw_incompat) { 1546 if (c->rw_incompat) {
1546 ubifs_err("the file-system is not R/W-compatible"); 1547 ubifs_err(c, "the file-system is not R/W-compatible");
1547 ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", 1548 ubifs_msg(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
1548 c->fmt_version, c->ro_compat_version, 1549 c->fmt_version, c->ro_compat_version,
1549 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); 1550 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
1550 return -EROFS; 1551 return -EROFS;
@@ -1581,7 +1582,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1581 } 1582 }
1582 1583
1583 if (c->need_recovery) { 1584 if (c->need_recovery) {
1584 ubifs_msg("completing deferred recovery"); 1585 ubifs_msg(c, "completing deferred recovery");
1585 err = ubifs_write_rcvrd_mst_node(c); 1586 err = ubifs_write_rcvrd_mst_node(c);
1586 if (err) 1587 if (err)
1587 goto out; 1588 goto out;
@@ -1630,7 +1631,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1630 if (IS_ERR(c->bgt)) { 1631 if (IS_ERR(c->bgt)) {
1631 err = PTR_ERR(c->bgt); 1632 err = PTR_ERR(c->bgt);
1632 c->bgt = NULL; 1633 c->bgt = NULL;
1633 ubifs_err("cannot spawn \"%s\", error %d", 1634 ubifs_err(c, "cannot spawn \"%s\", error %d",
1634 c->bgt_name, err); 1635 c->bgt_name, err);
1635 goto out; 1636 goto out;
1636 } 1637 }
@@ -1664,7 +1665,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1664 1665
1665 if (c->need_recovery) { 1666 if (c->need_recovery) {
1666 c->need_recovery = 0; 1667 c->need_recovery = 0;
1667 ubifs_msg("deferred recovery completed"); 1668 ubifs_msg(c, "deferred recovery completed");
1668 } else { 1669 } else {
1669 /* 1670 /*
1670 * Do not run the debugging space check if the were doing 1671 * Do not run the debugging space check if the were doing
@@ -1752,8 +1753,7 @@ static void ubifs_put_super(struct super_block *sb)
1752 int i; 1753 int i;
1753 struct ubifs_info *c = sb->s_fs_info; 1754 struct ubifs_info *c = sb->s_fs_info;
1754 1755
1755 ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, 1756 ubifs_msg(c, "un-mount UBI device %d", c->vi.ubi_num);
1756 c->vi.vol_id);
1757 1757
1758 /* 1758 /*
1759 * The following asserts are only valid if there has not been a failure 1759 * The following asserts are only valid if there has not been a failure
@@ -1809,7 +1809,7 @@ static void ubifs_put_super(struct super_block *sb)
1809 * next mount, so we just print a message and 1809 * next mount, so we just print a message and
1810 * continue to unmount normally. 1810 * continue to unmount normally.
1811 */ 1811 */
1812 ubifs_err("failed to write master node, error %d", 1812 ubifs_err(c, "failed to write master node, error %d",
1813 err); 1813 err);
1814 } else { 1814 } else {
1815 for (i = 0; i < c->jhead_cnt; i++) 1815 for (i = 0; i < c->jhead_cnt; i++)
@@ -1834,17 +1834,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1834 1834
1835 err = ubifs_parse_options(c, data, 1); 1835 err = ubifs_parse_options(c, data, 1);
1836 if (err) { 1836 if (err) {
1837 ubifs_err("invalid or unknown remount parameter"); 1837 ubifs_err(c, "invalid or unknown remount parameter");
1838 return err; 1838 return err;
1839 } 1839 }
1840 1840
1841 if (c->ro_mount && !(*flags & MS_RDONLY)) { 1841 if (c->ro_mount && !(*flags & MS_RDONLY)) {
1842 if (c->ro_error) { 1842 if (c->ro_error) {
1843 ubifs_msg("cannot re-mount R/W due to prior errors"); 1843 ubifs_msg(c, "cannot re-mount R/W due to prior errors");
1844 return -EROFS; 1844 return -EROFS;
1845 } 1845 }
1846 if (c->ro_media) { 1846 if (c->ro_media) {
1847 ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); 1847 ubifs_msg(c, "cannot re-mount R/W - UBI volume is R/O");
1848 return -EROFS; 1848 return -EROFS;
1849 } 1849 }
1850 err = ubifs_remount_rw(c); 1850 err = ubifs_remount_rw(c);
@@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1852 return err; 1852 return err;
1853 } else if (!c->ro_mount && (*flags & MS_RDONLY)) { 1853 } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
1854 if (c->ro_error) { 1854 if (c->ro_error) {
1855 ubifs_msg("cannot re-mount R/O due to prior errors"); 1855 ubifs_msg(c, "cannot re-mount R/O due to prior errors");
1856 return -EROFS; 1856 return -EROFS;
1857 } 1857 }
1858 ubifs_remount_ro(c); 1858 ubifs_remount_ro(c);
@@ -2104,8 +2104,8 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2104 */ 2104 */
2105 ubi = open_ubi(name, UBI_READONLY); 2105 ubi = open_ubi(name, UBI_READONLY);
2106 if (IS_ERR(ubi)) { 2106 if (IS_ERR(ubi)) {
2107 ubifs_err("cannot open \"%s\", error %d", 2107 pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
2108 name, (int)PTR_ERR(ubi)); 2108 current->pid, name, (int)PTR_ERR(ubi));
2109 return ERR_CAST(ubi); 2109 return ERR_CAST(ubi);
2110 } 2110 }
2111 2111
@@ -2233,8 +2233,8 @@ static int __init ubifs_init(void)
2233 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. 2233 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
2234 */ 2234 */
2235 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { 2235 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
2236 ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", 2236 pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
2237 (unsigned int)PAGE_CACHE_SIZE); 2237 current->pid, (unsigned int)PAGE_CACHE_SIZE);
2238 return -EINVAL; 2238 return -EINVAL;
2239 } 2239 }
2240 2240
@@ -2257,7 +2257,8 @@ static int __init ubifs_init(void)
2257 2257
2258 err = register_filesystem(&ubifs_fs_type); 2258 err = register_filesystem(&ubifs_fs_type);
2259 if (err) { 2259 if (err) {
2260 ubifs_err("cannot register file system, error %d", err); 2260 pr_err("UBIFS error (pid %d): cannot register file system, error %d",
2261 current->pid, err);
2261 goto out_dbg; 2262 goto out_dbg;
2262 } 2263 }
2263 return 0; 2264 return 0;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 6793db0754f6..957f5757f374 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -98,7 +98,7 @@ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
98 else if (offs > o->offs) 98 else if (offs > o->offs)
99 p = &(*p)->rb_right; 99 p = &(*p)->rb_right;
100 else { 100 else {
101 ubifs_err("old idx added twice!"); 101 ubifs_err(c, "old idx added twice!");
102 kfree(old_idx); 102 kfree(old_idx);
103 return 0; 103 return 0;
104 } 104 }
@@ -447,7 +447,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
447 447
448 err = ubifs_leb_read(c, lnum, buf, offs, len, 1); 448 err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
449 if (err) { 449 if (err) {
450 ubifs_err("cannot read node type %d from LEB %d:%d, error %d", 450 ubifs_err(c, "cannot read node type %d from LEB %d:%d, error %d",
451 type, lnum, offs, err); 451 type, lnum, offs, err);
452 return err; 452 return err;
453 } 453 }
@@ -1684,27 +1684,27 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
1684 int err, len; 1684 int err, len;
1685 1685
1686 if (ch->node_type != UBIFS_DATA_NODE) { 1686 if (ch->node_type != UBIFS_DATA_NODE) {
1687 ubifs_err("bad node type (%d but expected %d)", 1687 ubifs_err(c, "bad node type (%d but expected %d)",
1688 ch->node_type, UBIFS_DATA_NODE); 1688 ch->node_type, UBIFS_DATA_NODE);
1689 goto out_err; 1689 goto out_err;
1690 } 1690 }
1691 1691
1692 err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); 1692 err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
1693 if (err) { 1693 if (err) {
1694 ubifs_err("expected node type %d", UBIFS_DATA_NODE); 1694 ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE);
1695 goto out; 1695 goto out;
1696 } 1696 }
1697 1697
1698 len = le32_to_cpu(ch->len); 1698 len = le32_to_cpu(ch->len);
1699 if (len != zbr->len) { 1699 if (len != zbr->len) {
1700 ubifs_err("bad node length %d, expected %d", len, zbr->len); 1700 ubifs_err(c, "bad node length %d, expected %d", len, zbr->len);
1701 goto out_err; 1701 goto out_err;
1702 } 1702 }
1703 1703
1704 /* Make sure the key of the read node is correct */ 1704 /* Make sure the key of the read node is correct */
1705 key_read(c, buf + UBIFS_KEY_OFFSET, &key1); 1705 key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
1706 if (!keys_eq(c, &zbr->key, &key1)) { 1706 if (!keys_eq(c, &zbr->key, &key1)) {
1707 ubifs_err("bad key in node at LEB %d:%d", 1707 ubifs_err(c, "bad key in node at LEB %d:%d",
1708 zbr->lnum, zbr->offs); 1708 zbr->lnum, zbr->offs);
1709 dbg_tnck(&zbr->key, "looked for key "); 1709 dbg_tnck(&zbr->key, "looked for key ");
1710 dbg_tnck(&key1, "found node's key "); 1710 dbg_tnck(&key1, "found node's key ");
@@ -1716,7 +1716,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
1716out_err: 1716out_err:
1717 err = -EINVAL; 1717 err = -EINVAL;
1718out: 1718out:
1719 ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); 1719 ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs);
1720 ubifs_dump_node(c, buf); 1720 ubifs_dump_node(c, buf);
1721 dump_stack(); 1721 dump_stack();
1722 return err; 1722 return err;
@@ -1741,7 +1741,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1741 len = bu->zbranch[bu->cnt - 1].offs; 1741 len = bu->zbranch[bu->cnt - 1].offs;
1742 len += bu->zbranch[bu->cnt - 1].len - offs; 1742 len += bu->zbranch[bu->cnt - 1].len - offs;
1743 if (len > bu->buf_len) { 1743 if (len > bu->buf_len) {
1744 ubifs_err("buffer too small %d vs %d", bu->buf_len, len); 1744 ubifs_err(c, "buffer too small %d vs %d", bu->buf_len, len);
1745 return -EINVAL; 1745 return -EINVAL;
1746 } 1746 }
1747 1747
@@ -1757,7 +1757,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1757 return -EAGAIN; 1757 return -EAGAIN;
1758 1758
1759 if (err && err != -EBADMSG) { 1759 if (err && err != -EBADMSG) {
1760 ubifs_err("failed to read from LEB %d:%d, error %d", 1760 ubifs_err(c, "failed to read from LEB %d:%d, error %d",
1761 lnum, offs, err); 1761 lnum, offs, err);
1762 dump_stack(); 1762 dump_stack();
1763 dbg_tnck(&bu->key, "key "); 1763 dbg_tnck(&bu->key, "key ");
@@ -3313,7 +3313,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3313 3313
3314out_dump: 3314out_dump:
3315 block = key_block(c, key); 3315 block = key_block(c, key);
3316 ubifs_err("inode %lu has size %lld, but there are data at offset %lld", 3316 ubifs_err(c, "inode %lu has size %lld, but there are data at offset %lld",
3317 (unsigned long)inode->i_ino, size, 3317 (unsigned long)inode->i_ino, size,
3318 ((loff_t)block) << UBIFS_BLOCK_SHIFT); 3318 ((loff_t)block) << UBIFS_BLOCK_SHIFT);
3319 mutex_unlock(&c->tnc_mutex); 3319 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 7a205e046776..b45345d701e7 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -53,7 +53,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
53 br->offs = cpu_to_le32(zbr->offs); 53 br->offs = cpu_to_le32(zbr->offs);
54 br->len = cpu_to_le32(zbr->len); 54 br->len = cpu_to_le32(zbr->len);
55 if (!zbr->lnum || !zbr->len) { 55 if (!zbr->lnum || !zbr->len) {
56 ubifs_err("bad ref in znode"); 56 ubifs_err(c, "bad ref in znode");
57 ubifs_dump_znode(c, znode); 57 ubifs_dump_znode(c, znode);
58 if (zbr->znode) 58 if (zbr->znode)
59 ubifs_dump_znode(c, zbr->znode); 59 ubifs_dump_znode(c, zbr->znode);
@@ -384,7 +384,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
384 * Do not print scary warnings if the debugging 384 * Do not print scary warnings if the debugging
385 * option which forces in-the-gaps is enabled. 385 * option which forces in-the-gaps is enabled.
386 */ 386 */
387 ubifs_warn("out of space"); 387 ubifs_warn(c, "out of space");
388 ubifs_dump_budg(c, &c->bi); 388 ubifs_dump_budg(c, &c->bi);
389 ubifs_dump_lprops(c); 389 ubifs_dump_lprops(c);
390 } 390 }
@@ -441,7 +441,7 @@ static int layout_in_empty_space(struct ubifs_info *c)
441 /* Determine the index node position */ 441 /* Determine the index node position */
442 if (lnum == -1) { 442 if (lnum == -1) {
443 if (c->ileb_nxt >= c->ileb_cnt) { 443 if (c->ileb_nxt >= c->ileb_cnt) {
444 ubifs_err("out of space"); 444 ubifs_err(c, "out of space");
445 return -ENOSPC; 445 return -ENOSPC;
446 } 446 }
447 lnum = c->ilebs[c->ileb_nxt++]; 447 lnum = c->ilebs[c->ileb_nxt++];
@@ -855,7 +855,7 @@ static int write_index(struct ubifs_info *c)
855 br->offs = cpu_to_le32(zbr->offs); 855 br->offs = cpu_to_le32(zbr->offs);
856 br->len = cpu_to_le32(zbr->len); 856 br->len = cpu_to_le32(zbr->len);
857 if (!zbr->lnum || !zbr->len) { 857 if (!zbr->lnum || !zbr->len) {
858 ubifs_err("bad ref in znode"); 858 ubifs_err(c, "bad ref in znode");
859 ubifs_dump_znode(c, znode); 859 ubifs_dump_znode(c, znode);
860 if (zbr->znode) 860 if (zbr->znode)
861 ubifs_dump_znode(c, zbr->znode); 861 ubifs_dump_znode(c, zbr->znode);
@@ -875,7 +875,7 @@ static int write_index(struct ubifs_info *c)
875 875
876 if (lnum != znode->lnum || offs != znode->offs || 876 if (lnum != znode->lnum || offs != znode->offs ||
877 len != znode->len) { 877 len != znode->len) {
878 ubifs_err("inconsistent znode posn"); 878 ubifs_err(c, "inconsistent znode posn");
879 return -EINVAL; 879 return -EINVAL;
880 } 880 }
881 881
@@ -973,7 +973,7 @@ static int write_index(struct ubifs_info *c)
973 973
974 if (lnum != c->dbg->new_ihead_lnum || 974 if (lnum != c->dbg->new_ihead_lnum ||
975 buf_offs != c->dbg->new_ihead_offs) { 975 buf_offs != c->dbg->new_ihead_offs) {
976 ubifs_err("inconsistent ihead"); 976 ubifs_err(c, "inconsistent ihead");
977 return -EINVAL; 977 return -EINVAL;
978 } 978 }
979 979
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index f6bf8995c7b1..93f5b7859e6f 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -293,9 +293,9 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
293 lnum, offs, znode->level, znode->child_cnt); 293 lnum, offs, znode->level, znode->child_cnt);
294 294
295 if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { 295 if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
296 ubifs_err("current fanout %d, branch count %d", 296 ubifs_err(c, "current fanout %d, branch count %d",
297 c->fanout, znode->child_cnt); 297 c->fanout, znode->child_cnt);
298 ubifs_err("max levels %d, znode level %d", 298 ubifs_err(c, "max levels %d, znode level %d",
299 UBIFS_MAX_LEVELS, znode->level); 299 UBIFS_MAX_LEVELS, znode->level);
300 err = 1; 300 err = 1;
301 goto out_dump; 301 goto out_dump;
@@ -316,7 +316,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
316 if (zbr->lnum < c->main_first || 316 if (zbr->lnum < c->main_first ||
317 zbr->lnum >= c->leb_cnt || zbr->offs < 0 || 317 zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
318 zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { 318 zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
319 ubifs_err("bad branch %d", i); 319 ubifs_err(c, "bad branch %d", i);
320 err = 2; 320 err = 2;
321 goto out_dump; 321 goto out_dump;
322 } 322 }
@@ -328,7 +328,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
328 case UBIFS_XENT_KEY: 328 case UBIFS_XENT_KEY:
329 break; 329 break;
330 default: 330 default:
331 ubifs_err("bad key type at slot %d: %d", 331 ubifs_err(c, "bad key type at slot %d: %d",
332 i, key_type(c, &zbr->key)); 332 i, key_type(c, &zbr->key));
333 err = 3; 333 err = 3;
334 goto out_dump; 334 goto out_dump;
@@ -340,17 +340,17 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
340 type = key_type(c, &zbr->key); 340 type = key_type(c, &zbr->key);
341 if (c->ranges[type].max_len == 0) { 341 if (c->ranges[type].max_len == 0) {
342 if (zbr->len != c->ranges[type].len) { 342 if (zbr->len != c->ranges[type].len) {
343 ubifs_err("bad target node (type %d) length (%d)", 343 ubifs_err(c, "bad target node (type %d) length (%d)",
344 type, zbr->len); 344 type, zbr->len);
345 ubifs_err("have to be %d", c->ranges[type].len); 345 ubifs_err(c, "have to be %d", c->ranges[type].len);
346 err = 4; 346 err = 4;
347 goto out_dump; 347 goto out_dump;
348 } 348 }
349 } else if (zbr->len < c->ranges[type].min_len || 349 } else if (zbr->len < c->ranges[type].min_len ||
350 zbr->len > c->ranges[type].max_len) { 350 zbr->len > c->ranges[type].max_len) {
351 ubifs_err("bad target node (type %d) length (%d)", 351 ubifs_err(c, "bad target node (type %d) length (%d)",
352 type, zbr->len); 352 type, zbr->len);
353 ubifs_err("have to be in range of %d-%d", 353 ubifs_err(c, "have to be in range of %d-%d",
354 c->ranges[type].min_len, 354 c->ranges[type].min_len,
355 c->ranges[type].max_len); 355 c->ranges[type].max_len);
356 err = 5; 356 err = 5;
@@ -370,12 +370,12 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
370 370
371 cmp = keys_cmp(c, key1, key2); 371 cmp = keys_cmp(c, key1, key2);
372 if (cmp > 0) { 372 if (cmp > 0) {
373 ubifs_err("bad key order (keys %d and %d)", i, i + 1); 373 ubifs_err(c, "bad key order (keys %d and %d)", i, i + 1);
374 err = 6; 374 err = 6;
375 goto out_dump; 375 goto out_dump;
376 } else if (cmp == 0 && !is_hash_key(c, key1)) { 376 } else if (cmp == 0 && !is_hash_key(c, key1)) {
377 /* These can only be keys with colliding hash */ 377 /* These can only be keys with colliding hash */
378 ubifs_err("keys %d and %d are not hashed but equivalent", 378 ubifs_err(c, "keys %d and %d are not hashed but equivalent",
379 i, i + 1); 379 i, i + 1);
380 err = 7; 380 err = 7;
381 goto out_dump; 381 goto out_dump;
@@ -386,7 +386,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
386 return 0; 386 return 0;
387 387
388out_dump: 388out_dump:
389 ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); 389 ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
390 ubifs_dump_node(c, idx); 390 ubifs_dump_node(c, idx);
391 kfree(idx); 391 kfree(idx);
392 return -EINVAL; 392 return -EINVAL;
@@ -482,7 +482,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
482 /* Make sure the key of the read node is correct */ 482 /* Make sure the key of the read node is correct */
483 key_read(c, node + UBIFS_KEY_OFFSET, &key1); 483 key_read(c, node + UBIFS_KEY_OFFSET, &key1);
484 if (!keys_eq(c, key, &key1)) { 484 if (!keys_eq(c, key, &key1)) {
485 ubifs_err("bad key in node at LEB %d:%d", 485 ubifs_err(c, "bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs); 486 zbr->lnum, zbr->offs);
487 dbg_tnck(key, "looked for key "); 487 dbg_tnck(key, "looked for key ");
488 dbg_tnck(&key1, "but found node's key "); 488 dbg_tnck(&key1, "but found node's key ");
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index bc04b9c69891..de759022f3d6 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -43,15 +43,19 @@
43#define UBIFS_VERSION 1 43#define UBIFS_VERSION 1
44 44
45/* Normal UBIFS messages */ 45/* Normal UBIFS messages */
46#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) 46#define ubifs_msg(c, fmt, ...) \
47 pr_notice("UBIFS (ubi%d:%d): " fmt "\n", \
48 (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__)
47/* UBIFS error messages */ 49/* UBIFS error messages */
48#define ubifs_err(fmt, ...) \ 50#define ubifs_err(c, fmt, ...) \
49 pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ 51 pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n", \
52 (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
50 __func__, ##__VA_ARGS__) 53 __func__, ##__VA_ARGS__)
51/* UBIFS warning messages */ 54/* UBIFS warning messages */
52#define ubifs_warn(fmt, ...) \ 55#define ubifs_warn(c, fmt, ...) \
53 pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ 56 pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n", \
54 current->pid, __func__, ##__VA_ARGS__) 57 (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
58 __func__, ##__VA_ARGS__)
55/* 59/*
56 * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description 60 * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
57 * object as an argument. 61 * object as an argument.
@@ -59,7 +63,7 @@
59#define ubifs_errc(c, fmt, ...) \ 63#define ubifs_errc(c, fmt, ...) \
60 do { \ 64 do { \
61 if (!(c)->probing) \ 65 if (!(c)->probing) \
62 ubifs_err(fmt, ##__VA_ARGS__); \ 66 ubifs_err(c, fmt, ##__VA_ARGS__); \
63 } while (0) 67 } while (0)
64 68
65/* UBIFS file system VFS magic number */ 69/* UBIFS file system VFS magic number */
@@ -158,7 +162,7 @@
158#define WORST_COMPR_FACTOR 2 162#define WORST_COMPR_FACTOR 2
159 163
160/* 164/*
161 * How much memory is needed for a buffer where we comress a data node. 165 * How much memory is needed for a buffer where we compress a data node.
162 */ 166 */
163#define COMPRESSED_DATA_NODE_BUF_SZ \ 167#define COMPRESSED_DATA_NODE_BUF_SZ \
164 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) 168 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
@@ -664,7 +668,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
664 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes 668 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
665 * fields 669 * fields
666 * @softlimit: soft write-buffer timeout interval 670 * @softlimit: soft write-buffer timeout interval
667 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit 671 * @delta: hard and soft timeouts delta (the timer expire interval is @softlimit
668 * and @softlimit + @delta) 672 * and @softlimit + @delta)
669 * @timer: write-buffer timer 673 * @timer: write-buffer timer
670 * @no_timer: non-zero if this write-buffer does not have a timer 674 * @no_timer: non-zero if this write-buffer does not have a timer
@@ -930,9 +934,9 @@ struct ubifs_orphan {
930/** 934/**
931 * struct ubifs_mount_opts - UBIFS-specific mount options information. 935 * struct ubifs_mount_opts - UBIFS-specific mount options information.
932 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) 936 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
933 * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) 937 * @bulk_read: enable/disable bulk-reads (%0 default, %1 disable, %2 enable)
934 * @chk_data_crc: enable/disable CRC data checking when reading data nodes 938 * @chk_data_crc: enable/disable CRC data checking when reading data nodes
935 * (%0 default, %1 disabe, %2 enable) 939 * (%0 default, %1 disable, %2 enable)
936 * @override_compr: override default compressor (%0 - do not override and use 940 * @override_compr: override default compressor (%0 - do not override and use
937 * superblock compressor, %1 - override and use compressor 941 * superblock compressor, %1 - override and use compressor
938 * specified in @compr_type) 942 * specified in @compr_type)
@@ -962,9 +966,9 @@ struct ubifs_mount_opts {
962 * optimization) 966 * optimization)
963 * @nospace_rp: the same as @nospace, but additionally means that even reserved 967 * @nospace_rp: the same as @nospace, but additionally means that even reserved
964 * pool is full 968 * pool is full
965 * @page_budget: budget for a page (constant, nenver changed after mount) 969 * @page_budget: budget for a page (constant, never changed after mount)
966 * @inode_budget: budget for an inode (constant, nenver changed after mount) 970 * @inode_budget: budget for an inode (constant, never changed after mount)
967 * @dent_budget: budget for a directory entry (constant, nenver changed after 971 * @dent_budget: budget for a directory entry (constant, never changed after
968 * mount) 972 * mount)
969 */ 973 */
970struct ubifs_budg_info { 974struct ubifs_budg_info {
@@ -1787,10 +1791,10 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1787/* compressor.c */ 1791/* compressor.c */
1788int __init ubifs_compressors_init(void); 1792int __init ubifs_compressors_init(void);
1789void ubifs_compressors_exit(void); 1793void ubifs_compressors_exit(void);
1790void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 1794void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len,
1791 int *compr_type); 1795 void *out_buf, int *out_len, int *compr_type);
1792int ubifs_decompress(const void *buf, int len, void *out, int *out_len, 1796int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
1793 int compr_type); 1797 void *out, int *out_len, int compr_type);
1794 1798
1795#include "debug.h" 1799#include "debug.h"
1796#include "misc.h" 1800#include "misc.h"
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index a92be244a6fb..3659b1934500 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -108,7 +108,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
108 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; 108 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
109 109
110 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) { 110 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) {
111 ubifs_err("inode %lu already has too many xattrs (%d), cannot create more", 111 ubifs_err(c, "inode %lu already has too many xattrs (%d), cannot create more",
112 host->i_ino, host_ui->xattr_cnt); 112 host->i_ino, host_ui->xattr_cnt);
113 return -ENOSPC; 113 return -ENOSPC;
114 } 114 }
@@ -120,7 +120,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
120 */ 120 */
121 names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1; 121 names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1;
122 if (names_len > XATTR_LIST_MAX) { 122 if (names_len > XATTR_LIST_MAX) {
123 ubifs_err("cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d", 123 ubifs_err(c, "cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d",
124 host->i_ino, names_len, XATTR_LIST_MAX); 124 host->i_ino, names_len, XATTR_LIST_MAX);
125 return -ENOSPC; 125 return -ENOSPC;
126 } 126 }
@@ -288,13 +288,13 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
288 288
289 inode = ubifs_iget(c->vfs_sb, inum); 289 inode = ubifs_iget(c->vfs_sb, inum);
290 if (IS_ERR(inode)) { 290 if (IS_ERR(inode)) {
291 ubifs_err("dead extended attribute entry, error %d", 291 ubifs_err(c, "dead extended attribute entry, error %d",
292 (int)PTR_ERR(inode)); 292 (int)PTR_ERR(inode));
293 return inode; 293 return inode;
294 } 294 }
295 if (ubifs_inode(inode)->xattr) 295 if (ubifs_inode(inode)->xattr)
296 return inode; 296 return inode;
297 ubifs_err("corrupt extended attribute entry"); 297 ubifs_err(c, "corrupt extended attribute entry");
298 iput(inode); 298 iput(inode);
299 return ERR_PTR(-EINVAL); 299 return ERR_PTR(-EINVAL);
300} 300}
@@ -412,7 +412,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
412 if (buf) { 412 if (buf) {
413 /* If @buf is %NULL we are supposed to return the length */ 413 /* If @buf is %NULL we are supposed to return the length */
414 if (ui->data_len > size) { 414 if (ui->data_len > size) {
415 ubifs_err("buffer size %zd, xattr len %d", 415 ubifs_err(c, "buffer size %zd, xattr len %d",
416 size, ui->data_len); 416 size, ui->data_len);
417 err = -ERANGE; 417 err = -ERANGE;
418 goto out_iput; 418 goto out_iput;
@@ -485,7 +485,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
485 485
486 kfree(pxent); 486 kfree(pxent);
487 if (err != -ENOENT) { 487 if (err != -ENOENT) {
488 ubifs_err("cannot find next direntry, error %d", err); 488 ubifs_err(c, "cannot find next direntry, error %d", err);
489 return err; 489 return err;
490 } 490 }
491 491
@@ -657,8 +657,10 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
657 &init_xattrs, 0); 657 &init_xattrs, 0);
658 mutex_unlock(&inode->i_mutex); 658 mutex_unlock(&inode->i_mutex);
659 659
660 if (err) 660 if (err) {
661 ubifs_err("cannot initialize security for inode %lu, error %d", 661 struct ubifs_info *c = dentry->i_sb->s_fs_info;
662 ubifs_err(c, "cannot initialize security for inode %lu, error %d",
662 inode->i_ino, err); 663 inode->i_ino, err);
664 }
663 return err; 665 return err;
664} 666}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1ba2baaf4367..6d6a96b4e73f 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -21,7 +21,6 @@
21 21
22#include "udfdecl.h" 22#include "udfdecl.h"
23 23
24#include <linux/buffer_head.h>
25#include <linux/bitops.h> 24#include <linux/bitops.h>
26 25
27#include "udf_i.h" 26#include "udf_i.h"
@@ -63,15 +62,14 @@ static int __load_block_bitmap(struct super_block *sb,
63 block_group, nr_groups); 62 block_group, nr_groups);
64 } 63 }
65 64
66 if (bitmap->s_block_bitmap[block_group]) { 65 if (bitmap->s_block_bitmap[block_group])
67 return block_group; 66 return block_group;
68 } else { 67
69 retval = read_block_bitmap(sb, bitmap, block_group, 68 retval = read_block_bitmap(sb, bitmap, block_group, block_group);
70 block_group); 69 if (retval < 0)
71 if (retval < 0) 70 return retval;
72 return retval; 71
73 return block_group; 72 return block_group;
74 }
75} 73}
76 74
77static inline int load_block_bitmap(struct super_block *sb, 75static inline int load_block_bitmap(struct super_block *sb,
@@ -358,7 +356,6 @@ static void udf_table_free_blocks(struct super_block *sb,
358 struct kernel_lb_addr eloc; 356 struct kernel_lb_addr eloc;
359 struct extent_position oepos, epos; 357 struct extent_position oepos, epos;
360 int8_t etype; 358 int8_t etype;
361 int i;
362 struct udf_inode_info *iinfo; 359 struct udf_inode_info *iinfo;
363 360
364 mutex_lock(&sbi->s_alloc_mutex); 361 mutex_lock(&sbi->s_alloc_mutex);
@@ -425,7 +422,6 @@ static void udf_table_free_blocks(struct super_block *sb,
425 } 422 }
426 423
427 if (epos.bh != oepos.bh) { 424 if (epos.bh != oepos.bh) {
428 i = -1;
429 oepos.block = epos.block; 425 oepos.block = epos.block;
430 brelse(oepos.bh); 426 brelse(oepos.bh);
431 get_bh(epos.bh); 427 get_bh(epos.bh);
@@ -762,7 +758,7 @@ inline int udf_prealloc_blocks(struct super_block *sb,
762 uint32_t block_count) 758 uint32_t block_count)
763{ 759{
764 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; 760 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
765 sector_t allocated; 761 int allocated;
766 762
767 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) 763 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
768 allocated = udf_bitmap_prealloc_blocks(sb, 764 allocated = udf_bitmap_prealloc_blocks(sb,
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 05e90edd1992..541a12b5792d 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/buffer_head.h>
34 33
35#include "udf_i.h" 34#include "udf_i.h"
36#include "udf_sb.h" 35#include "udf_sb.h"
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 3e44f575fb9c..c763fda257bf 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -16,7 +16,6 @@
16 16
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/buffer_head.h>
20 19
21struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, 20struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
22 struct udf_fileident_bh *fibh, 21 struct udf_fileident_bh *fibh,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555fbeac..5dadad9960b9 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -33,8 +33,7 @@
33#include <linux/capability.h> 33#include <linux/capability.h>
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/buffer_head.h> 36#include <linux/uio.h>
37#include <linux/aio.h>
38 37
39#include "udf_i.h" 38#include "udf_i.h"
40#include "udf_sb.h" 39#include "udf_sb.h"
@@ -100,8 +99,7 @@ static int udf_adinicb_write_begin(struct file *file,
100 return 0; 99 return 0;
101} 100}
102 101
103static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, 102static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
104 struct iov_iter *iter,
105 loff_t offset) 103 loff_t offset)
106{ 104{
107 /* Fallback to buffered I/O. */ 105 /* Fallback to buffered I/O. */
@@ -121,21 +119,21 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
121 ssize_t retval; 119 ssize_t retval;
122 struct file *file = iocb->ki_filp; 120 struct file *file = iocb->ki_filp;
123 struct inode *inode = file_inode(file); 121 struct inode *inode = file_inode(file);
124 int err, pos;
125 size_t count = iocb->ki_nbytes;
126 struct udf_inode_info *iinfo = UDF_I(inode); 122 struct udf_inode_info *iinfo = UDF_I(inode);
123 int err;
127 124
128 mutex_lock(&inode->i_mutex); 125 mutex_lock(&inode->i_mutex);
126
127 retval = generic_write_checks(iocb, from);
128 if (retval <= 0)
129 goto out;
130
129 down_write(&iinfo->i_data_sem); 131 down_write(&iinfo->i_data_sem);
130 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 132 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
131 if (file->f_flags & O_APPEND) 133 loff_t end = iocb->ki_pos + iov_iter_count(from);
132 pos = inode->i_size;
133 else
134 pos = iocb->ki_pos;
135 134
136 if (inode->i_sb->s_blocksize < 135 if (inode->i_sb->s_blocksize <
137 (udf_file_entry_alloc_offset(inode) + 136 (udf_file_entry_alloc_offset(inode) + end)) {
138 pos + count)) {
139 err = udf_expand_file_adinicb(inode); 137 err = udf_expand_file_adinicb(inode);
140 if (err) { 138 if (err) {
141 mutex_unlock(&inode->i_mutex); 139 mutex_unlock(&inode->i_mutex);
@@ -143,16 +141,14 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
143 return err; 141 return err;
144 } 142 }
145 } else { 143 } else {
146 if (pos + count > inode->i_size) 144 iinfo->i_lenAlloc = max(end, inode->i_size);
147 iinfo->i_lenAlloc = pos + count;
148 else
149 iinfo->i_lenAlloc = inode->i_size;
150 up_write(&iinfo->i_data_sem); 145 up_write(&iinfo->i_data_sem);
151 } 146 }
152 } else 147 } else
153 up_write(&iinfo->i_data_sem); 148 up_write(&iinfo->i_data_sem);
154 149
155 retval = __generic_file_write_iter(iocb, from); 150 retval = __generic_file_write_iter(iocb, from);
151out:
156 mutex_unlock(&inode->i_mutex); 152 mutex_unlock(&inode->i_mutex);
157 153
158 if (retval > 0) { 154 if (retval > 0) {
@@ -240,12 +236,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
240} 236}
241 237
242const struct file_operations udf_file_operations = { 238const struct file_operations udf_file_operations = {
243 .read = new_sync_read,
244 .read_iter = generic_file_read_iter, 239 .read_iter = generic_file_read_iter,
245 .unlocked_ioctl = udf_ioctl, 240 .unlocked_ioctl = udf_ioctl,
246 .open = generic_file_open, 241 .open = generic_file_open,
247 .mmap = generic_file_mmap, 242 .mmap = generic_file_mmap,
248 .write = new_sync_write,
249 .write_iter = udf_file_write_iter, 243 .write_iter = udf_file_write_iter,
250 .release = udf_release_file, 244 .release = udf_release_file,
251 .fsync = generic_file_fsync, 245 .fsync = generic_file_fsync,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d599098d..6afac3d561ac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -33,12 +33,11 @@
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/buffer_head.h>
37#include <linux/writeback.h> 36#include <linux/writeback.h>
38#include <linux/slab.h> 37#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 38#include <linux/crc-itu-t.h>
40#include <linux/mpage.h> 39#include <linux/mpage.h>
41#include <linux/aio.h> 40#include <linux/uio.h>
42 41
43#include "udf_i.h" 42#include "udf_i.h"
44#include "udf_sb.h" 43#include "udf_sb.h"
@@ -215,8 +214,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
215 return ret; 214 return ret;
216} 215}
217 216
218static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, 217static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
219 struct iov_iter *iter,
220 loff_t offset) 218 loff_t offset)
221{ 219{
222 struct file *file = iocb->ki_filp; 220 struct file *file = iocb->ki_filp;
@@ -225,8 +223,8 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
225 size_t count = iov_iter_count(iter); 223 size_t count = iov_iter_count(iter);
226 ssize_t ret; 224 ssize_t ret;
227 225
228 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); 226 ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block);
229 if (unlikely(ret < 0 && (rw & WRITE))) 227 if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE))
230 udf_write_failed(mapping, offset + count); 228 udf_write_failed(mapping, offset + count);
231 return ret; 229 return ret;
232} 230}
@@ -1637,7 +1635,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1637 udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0)); 1635 udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0));
1638 if (!bh) { 1636 if (!bh) {
1639 udf_debug("getblk failure\n"); 1637 udf_debug("getblk failure\n");
1640 return -ENOMEM; 1638 return -EIO;
1641 } 1639 }
1642 1640
1643 lock_buffer(bh); 1641 lock_buffer(bh);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index c175b4dabc14..71d1c25f360d 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -23,7 +23,6 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/buffer_head.h>
27#include <linux/crc-itu-t.h> 26#include <linux/crc-itu-t.h>
28 27
29#include "udf_i.h" 28#include "udf_i.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 33b246b82c98..39661977c89c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/buffer_head.h>
31#include <linux/sched.h> 30#include <linux/sched.h>
32#include <linux/crc-itu-t.h> 31#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 32#include <linux/exportfs.h>
@@ -569,8 +568,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
569 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 568 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
570 cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); 569 cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
571 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 570 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
572 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 571 dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
573 mark_inode_dirty(dir); 572 mark_inode_dirty(dir);
574 if (fibh.sbh != fibh.ebh) 573 if (fibh.sbh != fibh.ebh)
575 brelse(fibh.ebh); 574 brelse(fibh.ebh);
576 brelse(fibh.sbh); 575 brelse(fibh.sbh);
@@ -683,6 +682,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
683 cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY; 682 cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY;
684 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 683 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
685 inc_nlink(dir); 684 inc_nlink(dir);
685 dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
686 mark_inode_dirty(dir); 686 mark_inode_dirty(dir);
687 unlock_new_inode(inode); 687 unlock_new_inode(inode);
688 d_instantiate(dentry, inode); 688 d_instantiate(dentry, inode);
@@ -1024,6 +1024,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1024 inc_nlink(inode); 1024 inc_nlink(inode);
1025 inode->i_ctime = current_fs_time(inode->i_sb); 1025 inode->i_ctime = current_fs_time(inode->i_sb);
1026 mark_inode_dirty(inode); 1026 mark_inode_dirty(inode);
1027 dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
1028 mark_inode_dirty(dir);
1027 ihold(inode); 1029 ihold(inode);
1028 d_instantiate(dentry, inode); 1030 d_instantiate(dentry, inode);
1029 1031
@@ -1127,7 +1129,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1127 inode_dec_link_count(new_inode); 1129 inode_dec_link_count(new_inode);
1128 } 1130 }
1129 old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb); 1131 old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb);
1132 new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb);
1130 mark_inode_dirty(old_dir); 1133 mark_inode_dirty(old_dir);
1134 mark_inode_dirty(new_dir);
1131 1135
1132 if (dir_fi) { 1136 if (dir_fi) {
1133 dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location); 1137 dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index d6caf01a2097..5f861ed287c3 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -24,7 +24,6 @@
24 24
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h>
28#include <linux/mutex.h> 27#include <linux/mutex.h>
29 28
30uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, 29uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f169411c4ea0..6299f341967b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
48#include <linux/stat.h> 48#include <linux/stat.h>
49#include <linux/cdrom.h> 49#include <linux/cdrom.h>
50#include <linux/nls.h> 50#include <linux/nls.h>
51#include <linux/buffer_head.h>
52#include <linux/vfs.h> 51#include <linux/vfs.h>
53#include <linux/vmalloc.h> 52#include <linux/vmalloc.h>
54#include <linux/errno.h> 53#include <linux/errno.h>
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index ac10ca939f26..8dfbc4025e2f 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/buffer_head.h>
31#include "udf_i.h" 30#include "udf_i.h"
32 31
33static int udf_pc_to_char(struct super_block *sb, unsigned char *from, 32static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8a9657d7f7c6..42b8c57795cb 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -22,7 +22,6 @@
22#include "udfdecl.h" 22#include "udfdecl.h"
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/buffer_head.h>
26 25
27#include "udf_i.h" 26#include "udf_i.h"
28#include "udf_sb.h" 27#include "udf_sb.h"
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index c84ec010a676..042ddbf110cc 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -35,9 +35,7 @@
35 35
36const struct file_operations ufs_file_operations = { 36const struct file_operations ufs_file_operations = {
37 .llseek = generic_file_llseek, 37 .llseek = generic_file_llseek,
38 .read = new_sync_read,
39 .read_iter = generic_file_read_iter, 38 .read_iter = generic_file_read_iter,
40 .write = new_sync_write,
41 .write_iter = generic_file_write_iter, 39 .write_iter = generic_file_write_iter,
42 .mmap = generic_file_mmap, 40 .mmap = generic_file_mmap,
43 .open = generic_file_open, 41 .open = generic_file_open,
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a6fbf4472017..516162be1398 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -260,6 +260,7 @@ xfs_alloc_fix_len(
260 rlen = rlen - (k - args->mod); 260 rlen = rlen - (k - args->mod);
261 else 261 else
262 rlen = rlen - args->prod + (args->mod - k); 262 rlen = rlen - args->prod + (args->mod - k);
263 /* casts to (int) catch length underflows */
263 if ((int)rlen < (int)args->minlen) 264 if ((int)rlen < (int)args->minlen)
264 return; 265 return;
265 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 266 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
@@ -286,7 +287,8 @@ xfs_alloc_fix_minleft(
286 if (diff >= 0) 287 if (diff >= 0)
287 return 1; 288 return 1;
288 args->len += diff; /* shrink the allocated space */ 289 args->len += diff; /* shrink the allocated space */
289 if (args->len >= args->minlen) 290 /* casts to (int) catch length underflows */
291 if ((int)args->len >= (int)args->minlen)
290 return 1; 292 return 1;
291 args->agbno = NULLAGBLOCK; 293 args->agbno = NULLAGBLOCK;
292 return 0; 294 return 0;
@@ -315,6 +317,9 @@ xfs_alloc_fixup_trees(
315 xfs_agblock_t nfbno2; /* second new free startblock */ 317 xfs_agblock_t nfbno2; /* second new free startblock */
316 xfs_extlen_t nflen1=0; /* first new free length */ 318 xfs_extlen_t nflen1=0; /* first new free length */
317 xfs_extlen_t nflen2=0; /* second new free length */ 319 xfs_extlen_t nflen2=0; /* second new free length */
320 struct xfs_mount *mp;
321
322 mp = cnt_cur->bc_mp;
318 323
319 /* 324 /*
320 * Look up the record in the by-size tree if necessary. 325 * Look up the record in the by-size tree if necessary.
@@ -323,13 +328,13 @@ xfs_alloc_fixup_trees(
323#ifdef DEBUG 328#ifdef DEBUG
324 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 329 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
325 return error; 330 return error;
326 XFS_WANT_CORRUPTED_RETURN( 331 XFS_WANT_CORRUPTED_RETURN(mp,
327 i == 1 && nfbno1 == fbno && nflen1 == flen); 332 i == 1 && nfbno1 == fbno && nflen1 == flen);
328#endif 333#endif
329 } else { 334 } else {
330 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 335 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
331 return error; 336 return error;
332 XFS_WANT_CORRUPTED_RETURN(i == 1); 337 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
333 } 338 }
334 /* 339 /*
335 * Look up the record in the by-block tree if necessary. 340 * Look up the record in the by-block tree if necessary.
@@ -338,13 +343,13 @@ xfs_alloc_fixup_trees(
338#ifdef DEBUG 343#ifdef DEBUG
339 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 344 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
340 return error; 345 return error;
341 XFS_WANT_CORRUPTED_RETURN( 346 XFS_WANT_CORRUPTED_RETURN(mp,
342 i == 1 && nfbno1 == fbno && nflen1 == flen); 347 i == 1 && nfbno1 == fbno && nflen1 == flen);
343#endif 348#endif
344 } else { 349 } else {
345 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 350 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
346 return error; 351 return error;
347 XFS_WANT_CORRUPTED_RETURN(i == 1); 352 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
348 } 353 }
349 354
350#ifdef DEBUG 355#ifdef DEBUG
@@ -355,7 +360,7 @@ xfs_alloc_fixup_trees(
355 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 360 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
356 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 361 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
357 362
358 XFS_WANT_CORRUPTED_RETURN( 363 XFS_WANT_CORRUPTED_RETURN(mp,
359 bnoblock->bb_numrecs == cntblock->bb_numrecs); 364 bnoblock->bb_numrecs == cntblock->bb_numrecs);
360 } 365 }
361#endif 366#endif
@@ -386,25 +391,25 @@ xfs_alloc_fixup_trees(
386 */ 391 */
387 if ((error = xfs_btree_delete(cnt_cur, &i))) 392 if ((error = xfs_btree_delete(cnt_cur, &i)))
388 return error; 393 return error;
389 XFS_WANT_CORRUPTED_RETURN(i == 1); 394 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
390 /* 395 /*
391 * Add new by-size btree entry(s). 396 * Add new by-size btree entry(s).
392 */ 397 */
393 if (nfbno1 != NULLAGBLOCK) { 398 if (nfbno1 != NULLAGBLOCK) {
394 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 399 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
395 return error; 400 return error;
396 XFS_WANT_CORRUPTED_RETURN(i == 0); 401 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
397 if ((error = xfs_btree_insert(cnt_cur, &i))) 402 if ((error = xfs_btree_insert(cnt_cur, &i)))
398 return error; 403 return error;
399 XFS_WANT_CORRUPTED_RETURN(i == 1); 404 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
400 } 405 }
401 if (nfbno2 != NULLAGBLOCK) { 406 if (nfbno2 != NULLAGBLOCK) {
402 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 407 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
403 return error; 408 return error;
404 XFS_WANT_CORRUPTED_RETURN(i == 0); 409 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
405 if ((error = xfs_btree_insert(cnt_cur, &i))) 410 if ((error = xfs_btree_insert(cnt_cur, &i)))
406 return error; 411 return error;
407 XFS_WANT_CORRUPTED_RETURN(i == 1); 412 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
408 } 413 }
409 /* 414 /*
410 * Fix up the by-block btree entry(s). 415 * Fix up the by-block btree entry(s).
@@ -415,7 +420,7 @@ xfs_alloc_fixup_trees(
415 */ 420 */
416 if ((error = xfs_btree_delete(bno_cur, &i))) 421 if ((error = xfs_btree_delete(bno_cur, &i)))
417 return error; 422 return error;
418 XFS_WANT_CORRUPTED_RETURN(i == 1); 423 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
419 } else { 424 } else {
420 /* 425 /*
421 * Update the by-block entry to start later|be shorter. 426 * Update the by-block entry to start later|be shorter.
@@ -429,10 +434,10 @@ xfs_alloc_fixup_trees(
429 */ 434 */
430 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 435 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
431 return error; 436 return error;
432 XFS_WANT_CORRUPTED_RETURN(i == 0); 437 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
433 if ((error = xfs_btree_insert(bno_cur, &i))) 438 if ((error = xfs_btree_insert(bno_cur, &i)))
434 return error; 439 return error;
435 XFS_WANT_CORRUPTED_RETURN(i == 1); 440 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
436 } 441 }
437 return 0; 442 return 0;
438} 443}
@@ -682,7 +687,7 @@ xfs_alloc_ag_vextent_exact(
682 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 687 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
683 if (error) 688 if (error)
684 goto error0; 689 goto error0;
685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 690 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
686 ASSERT(fbno <= args->agbno); 691 ASSERT(fbno <= args->agbno);
687 692
688 /* 693 /*
@@ -783,7 +788,7 @@ xfs_alloc_find_best_extent(
783 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 788 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
784 if (error) 789 if (error)
785 goto error0; 790 goto error0;
786 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 791 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
787 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); 792 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
788 793
789 /* 794 /*
@@ -946,7 +951,7 @@ restart:
946 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, 951 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
947 &ltlen, &i))) 952 &ltlen, &i)))
948 goto error0; 953 goto error0;
949 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 954 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
950 if (ltlen >= args->minlen) 955 if (ltlen >= args->minlen)
951 break; 956 break;
952 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 957 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
@@ -966,7 +971,7 @@ restart:
966 */ 971 */
967 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 972 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
968 goto error0; 973 goto error0;
969 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 974 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
970 xfs_alloc_compute_aligned(args, ltbno, ltlen, 975 xfs_alloc_compute_aligned(args, ltbno, ltlen,
971 &ltbnoa, &ltlena); 976 &ltbnoa, &ltlena);
972 if (ltlena < args->minlen) 977 if (ltlena < args->minlen)
@@ -999,7 +1004,7 @@ restart:
999 cnt_cur->bc_ptrs[0] = besti; 1004 cnt_cur->bc_ptrs[0] = besti;
1000 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 1005 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1001 goto error0; 1006 goto error0;
1002 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1007 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1003 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1008 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1004 args->len = blen; 1009 args->len = blen;
1005 if (!xfs_alloc_fix_minleft(args)) { 1010 if (!xfs_alloc_fix_minleft(args)) {
@@ -1088,7 +1093,7 @@ restart:
1088 if (bno_cur_lt) { 1093 if (bno_cur_lt) {
1089 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 1094 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1090 goto error0; 1095 goto error0;
1091 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1096 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1092 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1097 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1093 &ltbnoa, &ltlena); 1098 &ltbnoa, &ltlena);
1094 if (ltlena >= args->minlen) 1099 if (ltlena >= args->minlen)
@@ -1104,7 +1109,7 @@ restart:
1104 if (bno_cur_gt) { 1109 if (bno_cur_gt) {
1105 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 1110 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1106 goto error0; 1111 goto error0;
1107 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1112 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1108 xfs_alloc_compute_aligned(args, gtbno, gtlen, 1113 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1109 &gtbnoa, &gtlena); 1114 &gtbnoa, &gtlena);
1110 if (gtlena >= args->minlen) 1115 if (gtlena >= args->minlen)
@@ -1303,7 +1308,7 @@ restart:
1303 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1308 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1304 if (error) 1309 if (error)
1305 goto error0; 1310 goto error0;
1306 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1311 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1307 1312
1308 xfs_alloc_compute_aligned(args, fbno, flen, 1313 xfs_alloc_compute_aligned(args, fbno, flen,
1309 &rbno, &rlen); 1314 &rbno, &rlen);
@@ -1342,7 +1347,7 @@ restart:
1342 * This can't happen in the second case above. 1347 * This can't happen in the second case above.
1343 */ 1348 */
1344 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1349 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1345 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1350 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1346 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1351 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1347 if (rlen < args->maxlen) { 1352 if (rlen < args->maxlen) {
1348 xfs_agblock_t bestfbno; 1353 xfs_agblock_t bestfbno;
@@ -1362,13 +1367,13 @@ restart:
1362 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1367 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1363 &i))) 1368 &i)))
1364 goto error0; 1369 goto error0;
1365 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1370 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1366 if (flen < bestrlen) 1371 if (flen < bestrlen)
1367 break; 1372 break;
1368 xfs_alloc_compute_aligned(args, fbno, flen, 1373 xfs_alloc_compute_aligned(args, fbno, flen,
1369 &rbno, &rlen); 1374 &rbno, &rlen);
1370 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1375 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1371 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1376 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1372 (rlen <= flen && rbno + rlen <= fbno + flen), 1377 (rlen <= flen && rbno + rlen <= fbno + flen),
1373 error0); 1378 error0);
1374 if (rlen > bestrlen) { 1379 if (rlen > bestrlen) {
@@ -1383,7 +1388,7 @@ restart:
1383 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1388 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1384 &i))) 1389 &i)))
1385 goto error0; 1390 goto error0;
1386 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1391 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1387 rlen = bestrlen; 1392 rlen = bestrlen;
1388 rbno = bestrbno; 1393 rbno = bestrbno;
1389 flen = bestflen; 1394 flen = bestflen;
@@ -1408,7 +1413,7 @@ restart:
1408 if (!xfs_alloc_fix_minleft(args)) 1413 if (!xfs_alloc_fix_minleft(args))
1409 goto out_nominleft; 1414 goto out_nominleft;
1410 rlen = args->len; 1415 rlen = args->len;
1411 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1416 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1412 /* 1417 /*
1413 * Allocate and initialize a cursor for the by-block tree. 1418 * Allocate and initialize a cursor for the by-block tree.
1414 */ 1419 */
@@ -1422,7 +1427,7 @@ restart:
1422 cnt_cur = bno_cur = NULL; 1427 cnt_cur = bno_cur = NULL;
1423 args->len = rlen; 1428 args->len = rlen;
1424 args->agbno = rbno; 1429 args->agbno = rbno;
1425 XFS_WANT_CORRUPTED_GOTO( 1430 XFS_WANT_CORRUPTED_GOTO(args->mp,
1426 args->agbno + args->len <= 1431 args->agbno + args->len <=
1427 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1432 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1428 error0); 1433 error0);
@@ -1467,7 +1472,7 @@ xfs_alloc_ag_vextent_small(
1467 if (i) { 1472 if (i) {
1468 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1473 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1469 goto error0; 1474 goto error0;
1470 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1475 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1471 } 1476 }
1472 /* 1477 /*
1473 * Nothing in the btree, try the freelist. Make sure 1478 * Nothing in the btree, try the freelist. Make sure
@@ -1493,7 +1498,7 @@ xfs_alloc_ag_vextent_small(
1493 } 1498 }
1494 args->len = 1; 1499 args->len = 1;
1495 args->agbno = fbno; 1500 args->agbno = fbno;
1496 XFS_WANT_CORRUPTED_GOTO( 1501 XFS_WANT_CORRUPTED_GOTO(args->mp,
1497 args->agbno + args->len <= 1502 args->agbno + args->len <=
1498 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1503 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1499 error0); 1504 error0);
@@ -1579,7 +1584,7 @@ xfs_free_ag_extent(
1579 */ 1584 */
1580 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i))) 1585 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1581 goto error0; 1586 goto error0;
1582 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1587 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1583 /* 1588 /*
1584 * It's not contiguous, though. 1589 * It's not contiguous, though.
1585 */ 1590 */
@@ -1591,7 +1596,8 @@ xfs_free_ag_extent(
1591 * space was invalid, it's (partly) already free. 1596 * space was invalid, it's (partly) already free.
1592 * Very bad. 1597 * Very bad.
1593 */ 1598 */
1594 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); 1599 XFS_WANT_CORRUPTED_GOTO(mp,
1600 ltbno + ltlen <= bno, error0);
1595 } 1601 }
1596 } 1602 }
1597 /* 1603 /*
@@ -1606,7 +1612,7 @@ xfs_free_ag_extent(
1606 */ 1612 */
1607 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i))) 1613 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1608 goto error0; 1614 goto error0;
1609 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1615 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1610 /* 1616 /*
1611 * It's not contiguous, though. 1617 * It's not contiguous, though.
1612 */ 1618 */
@@ -1618,7 +1624,7 @@ xfs_free_ag_extent(
1618 * space was invalid, it's (partly) already free. 1624 * space was invalid, it's (partly) already free.
1619 * Very bad. 1625 * Very bad.
1620 */ 1626 */
1621 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); 1627 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1622 } 1628 }
1623 } 1629 }
1624 /* 1630 /*
@@ -1635,31 +1641,31 @@ xfs_free_ag_extent(
1635 */ 1641 */
1636 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1642 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1637 goto error0; 1643 goto error0;
1638 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1644 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1639 if ((error = xfs_btree_delete(cnt_cur, &i))) 1645 if ((error = xfs_btree_delete(cnt_cur, &i)))
1640 goto error0; 1646 goto error0;
1641 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1647 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1642 /* 1648 /*
1643 * Delete the old by-size entry on the right. 1649 * Delete the old by-size entry on the right.
1644 */ 1650 */
1645 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1651 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1646 goto error0; 1652 goto error0;
1647 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1653 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1648 if ((error = xfs_btree_delete(cnt_cur, &i))) 1654 if ((error = xfs_btree_delete(cnt_cur, &i)))
1649 goto error0; 1655 goto error0;
1650 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1656 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1651 /* 1657 /*
1652 * Delete the old by-block entry for the right block. 1658 * Delete the old by-block entry for the right block.
1653 */ 1659 */
1654 if ((error = xfs_btree_delete(bno_cur, &i))) 1660 if ((error = xfs_btree_delete(bno_cur, &i)))
1655 goto error0; 1661 goto error0;
1656 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1662 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1657 /* 1663 /*
1658 * Move the by-block cursor back to the left neighbor. 1664 * Move the by-block cursor back to the left neighbor.
1659 */ 1665 */
1660 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1666 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1661 goto error0; 1667 goto error0;
1662 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1668 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1663#ifdef DEBUG 1669#ifdef DEBUG
1664 /* 1670 /*
1665 * Check that this is the right record: delete didn't 1671 * Check that this is the right record: delete didn't
@@ -1672,7 +1678,7 @@ xfs_free_ag_extent(
1672 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1678 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1673 &i))) 1679 &i)))
1674 goto error0; 1680 goto error0;
1675 XFS_WANT_CORRUPTED_GOTO( 1681 XFS_WANT_CORRUPTED_GOTO(mp,
1676 i == 1 && xxbno == ltbno && xxlen == ltlen, 1682 i == 1 && xxbno == ltbno && xxlen == ltlen,
1677 error0); 1683 error0);
1678 } 1684 }
@@ -1695,17 +1701,17 @@ xfs_free_ag_extent(
1695 */ 1701 */
1696 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1702 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1697 goto error0; 1703 goto error0;
1698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1704 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1699 if ((error = xfs_btree_delete(cnt_cur, &i))) 1705 if ((error = xfs_btree_delete(cnt_cur, &i)))
1700 goto error0; 1706 goto error0;
1701 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1707 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1702 /* 1708 /*
1703 * Back up the by-block cursor to the left neighbor, and 1709 * Back up the by-block cursor to the left neighbor, and
1704 * update its length. 1710 * update its length.
1705 */ 1711 */
1706 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1712 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1707 goto error0; 1713 goto error0;
1708 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1714 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1709 nbno = ltbno; 1715 nbno = ltbno;
1710 nlen = len + ltlen; 1716 nlen = len + ltlen;
1711 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1717 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
@@ -1721,10 +1727,10 @@ xfs_free_ag_extent(
1721 */ 1727 */
1722 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1728 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1723 goto error0; 1729 goto error0;
1724 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1730 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1725 if ((error = xfs_btree_delete(cnt_cur, &i))) 1731 if ((error = xfs_btree_delete(cnt_cur, &i)))
1726 goto error0; 1732 goto error0;
1727 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1733 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1728 /* 1734 /*
1729 * Update the starting block and length of the right 1735 * Update the starting block and length of the right
1730 * neighbor in the by-block tree. 1736 * neighbor in the by-block tree.
@@ -1743,7 +1749,7 @@ xfs_free_ag_extent(
1743 nlen = len; 1749 nlen = len;
1744 if ((error = xfs_btree_insert(bno_cur, &i))) 1750 if ((error = xfs_btree_insert(bno_cur, &i)))
1745 goto error0; 1751 goto error0;
1746 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1752 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1747 } 1753 }
1748 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1754 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1749 bno_cur = NULL; 1755 bno_cur = NULL;
@@ -1752,10 +1758,10 @@ xfs_free_ag_extent(
1752 */ 1758 */
1753 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1759 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1754 goto error0; 1760 goto error0;
1755 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1761 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1756 if ((error = xfs_btree_insert(cnt_cur, &i))) 1762 if ((error = xfs_btree_insert(cnt_cur, &i)))
1757 goto error0; 1763 goto error0;
1758 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1764 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1759 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1765 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1760 cnt_cur = NULL; 1766 cnt_cur = NULL;
1761 1767
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 15105dbc9e28..04e79d57bca6 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -86,8 +86,83 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
86 int move_count); 86 int move_count);
87STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); 87STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
88 88
89/*
90 * attr3 block 'firstused' conversion helpers.
91 *
92 * firstused refers to the offset of the first used byte of the nameval region
93 * of an attr leaf block. The region starts at the tail of the block and expands
94 * backwards towards the middle. As such, firstused is initialized to the block
95 * size for an empty leaf block and is reduced from there.
96 *
97 * The attr3 block size is pegged to the fsb size and the maximum fsb is 64k.
98 * The in-core firstused field is 32-bit and thus supports the maximum fsb size.
99 * The on-disk field is only 16-bit, however, and overflows at 64k. Since this
100 * only occurs at exactly 64k, we use zero as a magic on-disk value to represent
101 * the attr block size. The following helpers manage the conversion between the
102 * in-core and on-disk formats.
103 */
104
105static void
106xfs_attr3_leaf_firstused_from_disk(
107 struct xfs_da_geometry *geo,
108 struct xfs_attr3_icleaf_hdr *to,
109 struct xfs_attr_leafblock *from)
110{
111 struct xfs_attr3_leaf_hdr *hdr3;
112
113 if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
114 hdr3 = (struct xfs_attr3_leaf_hdr *) from;
115 to->firstused = be16_to_cpu(hdr3->firstused);
116 } else {
117 to->firstused = be16_to_cpu(from->hdr.firstused);
118 }
119
120 /*
121 * Convert from the magic fsb size value to actual blocksize. This
122 * should only occur for empty blocks when the block size overflows
123 * 16-bits.
124 */
125 if (to->firstused == XFS_ATTR3_LEAF_NULLOFF) {
126 ASSERT(!to->count && !to->usedbytes);
127 ASSERT(geo->blksize > USHRT_MAX);
128 to->firstused = geo->blksize;
129 }
130}
131
132static void
133xfs_attr3_leaf_firstused_to_disk(
134 struct xfs_da_geometry *geo,
135 struct xfs_attr_leafblock *to,
136 struct xfs_attr3_icleaf_hdr *from)
137{
138 struct xfs_attr3_leaf_hdr *hdr3;
139 uint32_t firstused;
140
141 /* magic value should only be seen on disk */
142 ASSERT(from->firstused != XFS_ATTR3_LEAF_NULLOFF);
143
144 /*
145 * Scale down the 32-bit in-core firstused value to the 16-bit on-disk
146 * value. This only overflows at the max supported value of 64k. Use the
147 * magic on-disk value to represent block size in this case.
148 */
149 firstused = from->firstused;
150 if (firstused > USHRT_MAX) {
151 ASSERT(from->firstused == geo->blksize);
152 firstused = XFS_ATTR3_LEAF_NULLOFF;
153 }
154
155 if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
156 hdr3 = (struct xfs_attr3_leaf_hdr *) to;
157 hdr3->firstused = cpu_to_be16(firstused);
158 } else {
159 to->hdr.firstused = cpu_to_be16(firstused);
160 }
161}
162
89void 163void
90xfs_attr3_leaf_hdr_from_disk( 164xfs_attr3_leaf_hdr_from_disk(
165 struct xfs_da_geometry *geo,
91 struct xfs_attr3_icleaf_hdr *to, 166 struct xfs_attr3_icleaf_hdr *to,
92 struct xfs_attr_leafblock *from) 167 struct xfs_attr_leafblock *from)
93{ 168{
@@ -104,7 +179,7 @@ xfs_attr3_leaf_hdr_from_disk(
104 to->magic = be16_to_cpu(hdr3->info.hdr.magic); 179 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
105 to->count = be16_to_cpu(hdr3->count); 180 to->count = be16_to_cpu(hdr3->count);
106 to->usedbytes = be16_to_cpu(hdr3->usedbytes); 181 to->usedbytes = be16_to_cpu(hdr3->usedbytes);
107 to->firstused = be16_to_cpu(hdr3->firstused); 182 xfs_attr3_leaf_firstused_from_disk(geo, to, from);
108 to->holes = hdr3->holes; 183 to->holes = hdr3->holes;
109 184
110 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 185 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -118,7 +193,7 @@ xfs_attr3_leaf_hdr_from_disk(
118 to->magic = be16_to_cpu(from->hdr.info.magic); 193 to->magic = be16_to_cpu(from->hdr.info.magic);
119 to->count = be16_to_cpu(from->hdr.count); 194 to->count = be16_to_cpu(from->hdr.count);
120 to->usedbytes = be16_to_cpu(from->hdr.usedbytes); 195 to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
121 to->firstused = be16_to_cpu(from->hdr.firstused); 196 xfs_attr3_leaf_firstused_from_disk(geo, to, from);
122 to->holes = from->hdr.holes; 197 to->holes = from->hdr.holes;
123 198
124 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 199 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -129,10 +204,11 @@ xfs_attr3_leaf_hdr_from_disk(
129 204
130void 205void
131xfs_attr3_leaf_hdr_to_disk( 206xfs_attr3_leaf_hdr_to_disk(
207 struct xfs_da_geometry *geo,
132 struct xfs_attr_leafblock *to, 208 struct xfs_attr_leafblock *to,
133 struct xfs_attr3_icleaf_hdr *from) 209 struct xfs_attr3_icleaf_hdr *from)
134{ 210{
135 int i; 211 int i;
136 212
137 ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || 213 ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
138 from->magic == XFS_ATTR3_LEAF_MAGIC); 214 from->magic == XFS_ATTR3_LEAF_MAGIC);
@@ -145,7 +221,7 @@ xfs_attr3_leaf_hdr_to_disk(
145 hdr3->info.hdr.magic = cpu_to_be16(from->magic); 221 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
146 hdr3->count = cpu_to_be16(from->count); 222 hdr3->count = cpu_to_be16(from->count);
147 hdr3->usedbytes = cpu_to_be16(from->usedbytes); 223 hdr3->usedbytes = cpu_to_be16(from->usedbytes);
148 hdr3->firstused = cpu_to_be16(from->firstused); 224 xfs_attr3_leaf_firstused_to_disk(geo, to, from);
149 hdr3->holes = from->holes; 225 hdr3->holes = from->holes;
150 hdr3->pad1 = 0; 226 hdr3->pad1 = 0;
151 227
@@ -160,7 +236,7 @@ xfs_attr3_leaf_hdr_to_disk(
160 to->hdr.info.magic = cpu_to_be16(from->magic); 236 to->hdr.info.magic = cpu_to_be16(from->magic);
161 to->hdr.count = cpu_to_be16(from->count); 237 to->hdr.count = cpu_to_be16(from->count);
162 to->hdr.usedbytes = cpu_to_be16(from->usedbytes); 238 to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
163 to->hdr.firstused = cpu_to_be16(from->firstused); 239 xfs_attr3_leaf_firstused_to_disk(geo, to, from);
164 to->hdr.holes = from->holes; 240 to->hdr.holes = from->holes;
165 to->hdr.pad1 = 0; 241 to->hdr.pad1 = 0;
166 242
@@ -178,7 +254,7 @@ xfs_attr3_leaf_verify(
178 struct xfs_attr_leafblock *leaf = bp->b_addr; 254 struct xfs_attr_leafblock *leaf = bp->b_addr;
179 struct xfs_attr3_icleaf_hdr ichdr; 255 struct xfs_attr3_icleaf_hdr ichdr;
180 256
181 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 257 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
182 258
183 if (xfs_sb_version_hascrc(&mp->m_sb)) { 259 if (xfs_sb_version_hascrc(&mp->m_sb)) {
184 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 260 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
@@ -757,9 +833,10 @@ xfs_attr_shortform_allfit(
757 struct xfs_attr3_icleaf_hdr leafhdr; 833 struct xfs_attr3_icleaf_hdr leafhdr;
758 int bytes; 834 int bytes;
759 int i; 835 int i;
836 struct xfs_mount *mp = bp->b_target->bt_mount;
760 837
761 leaf = bp->b_addr; 838 leaf = bp->b_addr;
762 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); 839 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
763 entry = xfs_attr3_leaf_entryp(leaf); 840 entry = xfs_attr3_leaf_entryp(leaf);
764 841
765 bytes = sizeof(struct xfs_attr_sf_hdr); 842 bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -812,7 +889,7 @@ xfs_attr3_leaf_to_shortform(
812 memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); 889 memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
813 890
814 leaf = (xfs_attr_leafblock_t *)tmpbuffer; 891 leaf = (xfs_attr_leafblock_t *)tmpbuffer;
815 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 892 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
816 entry = xfs_attr3_leaf_entryp(leaf); 893 entry = xfs_attr3_leaf_entryp(leaf);
817 894
818 /* XXX (dgc): buffer is about to be marked stale - why zero it? */ 895 /* XXX (dgc): buffer is about to be marked stale - why zero it? */
@@ -923,7 +1000,7 @@ xfs_attr3_leaf_to_node(
923 btree = dp->d_ops->node_tree_p(node); 1000 btree = dp->d_ops->node_tree_p(node);
924 1001
925 leaf = bp2->b_addr; 1002 leaf = bp2->b_addr;
926 xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); 1003 xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf);
927 entries = xfs_attr3_leaf_entryp(leaf); 1004 entries = xfs_attr3_leaf_entryp(leaf);
928 1005
929 /* both on-disk, don't endian-flip twice */ 1006 /* both on-disk, don't endian-flip twice */
@@ -988,7 +1065,7 @@ xfs_attr3_leaf_create(
988 } 1065 }
989 ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; 1066 ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
990 1067
991 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); 1068 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
992 xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); 1069 xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
993 1070
994 *bpp = bp; 1071 *bpp = bp;
@@ -1073,7 +1150,7 @@ xfs_attr3_leaf_add(
1073 trace_xfs_attr_leaf_add(args); 1150 trace_xfs_attr_leaf_add(args);
1074 1151
1075 leaf = bp->b_addr; 1152 leaf = bp->b_addr;
1076 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 1153 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
1077 ASSERT(args->index >= 0 && args->index <= ichdr.count); 1154 ASSERT(args->index >= 0 && args->index <= ichdr.count);
1078 entsize = xfs_attr_leaf_newentsize(args, NULL); 1155 entsize = xfs_attr_leaf_newentsize(args, NULL);
1079 1156
@@ -1126,7 +1203,7 @@ xfs_attr3_leaf_add(
1126 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); 1203 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
1127 1204
1128out_log_hdr: 1205out_log_hdr:
1129 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); 1206 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
1130 xfs_trans_log_buf(args->trans, bp, 1207 xfs_trans_log_buf(args->trans, bp,
1131 XFS_DA_LOGRANGE(leaf, &leaf->hdr, 1208 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
1132 xfs_attr3_leaf_hdr_size(leaf))); 1209 xfs_attr3_leaf_hdr_size(leaf)));
@@ -1294,7 +1371,7 @@ xfs_attr3_leaf_compact(
1294 ichdr_dst->freemap[0].base; 1371 ichdr_dst->freemap[0].base;
1295 1372
1296 /* write the header back to initialise the underlying buffer */ 1373 /* write the header back to initialise the underlying buffer */
1297 xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst); 1374 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
1298 1375
1299 /* 1376 /*
1300 * Copy all entry's in the same (sorted) order, 1377 * Copy all entry's in the same (sorted) order,
@@ -1344,9 +1421,10 @@ xfs_attr_leaf_order(
1344{ 1421{
1345 struct xfs_attr3_icleaf_hdr ichdr1; 1422 struct xfs_attr3_icleaf_hdr ichdr1;
1346 struct xfs_attr3_icleaf_hdr ichdr2; 1423 struct xfs_attr3_icleaf_hdr ichdr2;
1424 struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
1347 1425
1348 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr); 1426 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
1349 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr); 1427 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
1350 return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); 1428 return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
1351} 1429}
1352 1430
@@ -1388,8 +1466,8 @@ xfs_attr3_leaf_rebalance(
1388 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); 1466 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
1389 leaf1 = blk1->bp->b_addr; 1467 leaf1 = blk1->bp->b_addr;
1390 leaf2 = blk2->bp->b_addr; 1468 leaf2 = blk2->bp->b_addr;
1391 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); 1469 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr1, leaf1);
1392 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); 1470 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, leaf2);
1393 ASSERT(ichdr2.count == 0); 1471 ASSERT(ichdr2.count == 0);
1394 args = state->args; 1472 args = state->args;
1395 1473
@@ -1490,8 +1568,8 @@ xfs_attr3_leaf_rebalance(
1490 ichdr1.count, count); 1568 ichdr1.count, count);
1491 } 1569 }
1492 1570
1493 xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); 1571 xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf1, &ichdr1);
1494 xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); 1572 xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf2, &ichdr2);
1495 xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); 1573 xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
1496 xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); 1574 xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
1497 1575
@@ -1684,7 +1762,7 @@ xfs_attr3_leaf_toosmall(
1684 */ 1762 */
1685 blk = &state->path.blk[ state->path.active-1 ]; 1763 blk = &state->path.blk[ state->path.active-1 ];
1686 leaf = blk->bp->b_addr; 1764 leaf = blk->bp->b_addr;
1687 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 1765 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
1688 bytes = xfs_attr3_leaf_hdr_size(leaf) + 1766 bytes = xfs_attr3_leaf_hdr_size(leaf) +
1689 ichdr.count * sizeof(xfs_attr_leaf_entry_t) + 1767 ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
1690 ichdr.usedbytes; 1768 ichdr.usedbytes;
@@ -1740,7 +1818,7 @@ xfs_attr3_leaf_toosmall(
1740 if (error) 1818 if (error)
1741 return error; 1819 return error;
1742 1820
1743 xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); 1821 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, bp->b_addr);
1744 1822
1745 bytes = state->args->geo->blksize - 1823 bytes = state->args->geo->blksize -
1746 (state->args->geo->blksize >> 2) - 1824 (state->args->geo->blksize >> 2) -
@@ -1805,7 +1883,7 @@ xfs_attr3_leaf_remove(
1805 trace_xfs_attr_leaf_remove(args); 1883 trace_xfs_attr_leaf_remove(args);
1806 1884
1807 leaf = bp->b_addr; 1885 leaf = bp->b_addr;
1808 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 1886 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
1809 1887
1810 ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); 1888 ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
1811 ASSERT(args->index >= 0 && args->index < ichdr.count); 1889 ASSERT(args->index >= 0 && args->index < ichdr.count);
@@ -1918,12 +1996,11 @@ xfs_attr3_leaf_remove(
1918 tmp = be16_to_cpu(entry->nameidx); 1996 tmp = be16_to_cpu(entry->nameidx);
1919 } 1997 }
1920 ichdr.firstused = tmp; 1998 ichdr.firstused = tmp;
1921 if (!ichdr.firstused) 1999 ASSERT(ichdr.firstused != 0);
1922 ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
1923 } else { 2000 } else {
1924 ichdr.holes = 1; /* mark as needing compaction */ 2001 ichdr.holes = 1; /* mark as needing compaction */
1925 } 2002 }
1926 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); 2003 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
1927 xfs_trans_log_buf(args->trans, bp, 2004 xfs_trans_log_buf(args->trans, bp,
1928 XFS_DA_LOGRANGE(leaf, &leaf->hdr, 2005 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
1929 xfs_attr3_leaf_hdr_size(leaf))); 2006 xfs_attr3_leaf_hdr_size(leaf)));
@@ -1957,8 +2034,8 @@ xfs_attr3_leaf_unbalance(
1957 2034
1958 drop_leaf = drop_blk->bp->b_addr; 2035 drop_leaf = drop_blk->bp->b_addr;
1959 save_leaf = save_blk->bp->b_addr; 2036 save_leaf = save_blk->bp->b_addr;
1960 xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf); 2037 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
1961 xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf); 2038 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
1962 entry = xfs_attr3_leaf_entryp(drop_leaf); 2039 entry = xfs_attr3_leaf_entryp(drop_leaf);
1963 2040
1964 /* 2041 /*
@@ -2012,7 +2089,7 @@ xfs_attr3_leaf_unbalance(
2012 tmphdr.firstused = state->args->geo->blksize; 2089 tmphdr.firstused = state->args->geo->blksize;
2013 2090
2014 /* write the header to the temp buffer to initialise it */ 2091 /* write the header to the temp buffer to initialise it */
2015 xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); 2092 xfs_attr3_leaf_hdr_to_disk(state->args->geo, tmp_leaf, &tmphdr);
2016 2093
2017 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, 2094 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
2018 drop_blk->bp, &drophdr)) { 2095 drop_blk->bp, &drophdr)) {
@@ -2039,7 +2116,7 @@ xfs_attr3_leaf_unbalance(
2039 kmem_free(tmp_leaf); 2116 kmem_free(tmp_leaf);
2040 } 2117 }
2041 2118
2042 xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); 2119 xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
2043 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, 2120 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
2044 state->args->geo->blksize - 1); 2121 state->args->geo->blksize - 1);
2045 2122
@@ -2085,7 +2162,7 @@ xfs_attr3_leaf_lookup_int(
2085 trace_xfs_attr_leaf_lookup(args); 2162 trace_xfs_attr_leaf_lookup(args);
2086 2163
2087 leaf = bp->b_addr; 2164 leaf = bp->b_addr;
2088 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2165 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2089 entries = xfs_attr3_leaf_entryp(leaf); 2166 entries = xfs_attr3_leaf_entryp(leaf);
2090 ASSERT(ichdr.count < args->geo->blksize / 8); 2167 ASSERT(ichdr.count < args->geo->blksize / 8);
2091 2168
@@ -2190,7 +2267,7 @@ xfs_attr3_leaf_getvalue(
2190 int valuelen; 2267 int valuelen;
2191 2268
2192 leaf = bp->b_addr; 2269 leaf = bp->b_addr;
2193 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2270 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2194 ASSERT(ichdr.count < args->geo->blksize / 8); 2271 ASSERT(ichdr.count < args->geo->blksize / 8);
2195 ASSERT(args->index < ichdr.count); 2272 ASSERT(args->index < ichdr.count);
2196 2273
@@ -2391,8 +2468,9 @@ xfs_attr_leaf_lasthash(
2391{ 2468{
2392 struct xfs_attr3_icleaf_hdr ichdr; 2469 struct xfs_attr3_icleaf_hdr ichdr;
2393 struct xfs_attr_leaf_entry *entries; 2470 struct xfs_attr_leaf_entry *entries;
2471 struct xfs_mount *mp = bp->b_target->bt_mount;
2394 2472
2395 xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr); 2473 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
2396 entries = xfs_attr3_leaf_entryp(bp->b_addr); 2474 entries = xfs_attr3_leaf_entryp(bp->b_addr);
2397 if (count) 2475 if (count)
2398 *count = ichdr.count; 2476 *count = ichdr.count;
@@ -2486,7 +2564,7 @@ xfs_attr3_leaf_clearflag(
2486 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); 2564 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
2487 2565
2488#ifdef DEBUG 2566#ifdef DEBUG
2489 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2567 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2490 ASSERT(args->index < ichdr.count); 2568 ASSERT(args->index < ichdr.count);
2491 ASSERT(args->index >= 0); 2569 ASSERT(args->index >= 0);
2492 2570
@@ -2550,7 +2628,7 @@ xfs_attr3_leaf_setflag(
2550 2628
2551 leaf = bp->b_addr; 2629 leaf = bp->b_addr;
2552#ifdef DEBUG 2630#ifdef DEBUG
2553 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2631 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2554 ASSERT(args->index < ichdr.count); 2632 ASSERT(args->index < ichdr.count);
2555 ASSERT(args->index >= 0); 2633 ASSERT(args->index >= 0);
2556#endif 2634#endif
@@ -2629,11 +2707,11 @@ xfs_attr3_leaf_flipflags(
2629 entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; 2707 entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
2630 2708
2631#ifdef DEBUG 2709#ifdef DEBUG
2632 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); 2710 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr1, leaf1);
2633 ASSERT(args->index < ichdr1.count); 2711 ASSERT(args->index < ichdr1.count);
2634 ASSERT(args->index >= 0); 2712 ASSERT(args->index >= 0);
2635 2713
2636 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); 2714 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr2, leaf2);
2637 ASSERT(args->index2 < ichdr2.count); 2715 ASSERT(args->index2 < ichdr2.count);
2638 ASSERT(args->index2 >= 0); 2716 ASSERT(args->index2 >= 0);
2639 2717
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index e2929da7c3ba..025c4b820c03 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -100,9 +100,11 @@ int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
100int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, 100int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
101 xfs_dablk_t bno, xfs_daddr_t mappedbno, 101 xfs_dablk_t bno, xfs_daddr_t mappedbno,
102 struct xfs_buf **bpp); 102 struct xfs_buf **bpp);
103void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to, 103void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
104 struct xfs_attr3_icleaf_hdr *to,
104 struct xfs_attr_leafblock *from); 105 struct xfs_attr_leafblock *from);
105void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to, 106void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
107 struct xfs_attr_leafblock *to,
106 struct xfs_attr3_icleaf_hdr *from); 108 struct xfs_attr3_icleaf_hdr *from);
107 109
108#endif /* __XFS_ATTR_LEAF_H__ */ 110#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 61ec015dca16..aeffeaaac0ec 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
244 } 244 }
245} 245}
246 246
247/*
248 * Debug/sanity checking code
249 */
250
251STATIC int
252xfs_bmap_sanity_check(
253 struct xfs_mount *mp,
254 struct xfs_buf *bp,
255 int level)
256{
257 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
258
259 if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
260 block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
261 return 0;
262
263 if (be16_to_cpu(block->bb_level) != level ||
264 be16_to_cpu(block->bb_numrecs) == 0 ||
265 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
266 return 0;
267
268 return 1;
269}
270
271#ifdef DEBUG 247#ifdef DEBUG
272STATIC struct xfs_buf * 248STATIC struct xfs_buf *
273xfs_bmap_get_bp( 249xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
410 goto error_norelse; 386 goto error_norelse;
411 } 387 }
412 block = XFS_BUF_TO_BLOCK(bp); 388 block = XFS_BUF_TO_BLOCK(bp);
413 XFS_WANT_CORRUPTED_GOTO(
414 xfs_bmap_sanity_check(mp, bp, level),
415 error0);
416 if (level == 0) 389 if (level == 0)
417 break; 390 break;
418 391
@@ -424,7 +397,8 @@ xfs_bmap_check_leaf_extents(
424 xfs_check_block(block, mp, 0, 0); 397 xfs_check_block(block, mp, 0, 0);
425 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 398 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
426 bno = be64_to_cpu(*pp); 399 bno = be64_to_cpu(*pp);
427 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 400 XFS_WANT_CORRUPTED_GOTO(mp,
401 XFS_FSB_SANITY_CHECK(mp, bno), error0);
428 if (bp_release) { 402 if (bp_release) {
429 bp_release = 0; 403 bp_release = 0;
430 xfs_trans_brelse(NULL, bp); 404 xfs_trans_brelse(NULL, bp);
@@ -1029,7 +1003,7 @@ xfs_bmap_add_attrfork_btree(
1029 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 1003 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1030 goto error0; 1004 goto error0;
1031 /* must be at least one entry */ 1005 /* must be at least one entry */
1032 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); 1006 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1033 if ((error = xfs_btree_new_iroot(cur, flags, &stat))) 1007 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1034 goto error0; 1008 goto error0;
1035 if (stat == 0) { 1009 if (stat == 0) {
@@ -1311,14 +1285,12 @@ xfs_bmap_read_extents(
1311 if (error) 1285 if (error)
1312 return error; 1286 return error;
1313 block = XFS_BUF_TO_BLOCK(bp); 1287 block = XFS_BUF_TO_BLOCK(bp);
1314 XFS_WANT_CORRUPTED_GOTO(
1315 xfs_bmap_sanity_check(mp, bp, level),
1316 error0);
1317 if (level == 0) 1288 if (level == 0)
1318 break; 1289 break;
1319 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1290 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1320 bno = be64_to_cpu(*pp); 1291 bno = be64_to_cpu(*pp);
1321 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 1292 XFS_WANT_CORRUPTED_GOTO(mp,
1293 XFS_FSB_SANITY_CHECK(mp, bno), error0);
1322 xfs_trans_brelse(tp, bp); 1294 xfs_trans_brelse(tp, bp);
1323 } 1295 }
1324 /* 1296 /*
@@ -1345,9 +1317,6 @@ xfs_bmap_read_extents(
1345 XFS_ERRLEVEL_LOW, ip->i_mount, block); 1317 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1346 goto error0; 1318 goto error0;
1347 } 1319 }
1348 XFS_WANT_CORRUPTED_GOTO(
1349 xfs_bmap_sanity_check(mp, bp, 0),
1350 error0);
1351 /* 1320 /*
1352 * Read-ahead the next leaf block, if any. 1321 * Read-ahead the next leaf block, if any.
1353 */ 1322 */
@@ -1755,7 +1724,9 @@ xfs_bmap_add_extent_delay_real(
1755 xfs_filblks_t temp=0; /* value for da_new calculations */ 1724 xfs_filblks_t temp=0; /* value for da_new calculations */
1756 xfs_filblks_t temp2=0;/* value for da_new calculations */ 1725 xfs_filblks_t temp2=0;/* value for da_new calculations */
1757 int tmp_rval; /* partial logging flags */ 1726 int tmp_rval; /* partial logging flags */
1727 struct xfs_mount *mp;
1758 1728
1729 mp = bma->tp ? bma->tp->t_mountp : NULL;
1759 ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); 1730 ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
1760 1731
1761 ASSERT(bma->idx >= 0); 1732 ASSERT(bma->idx >= 0);
@@ -1866,15 +1837,15 @@ xfs_bmap_add_extent_delay_real(
1866 RIGHT.br_blockcount, &i); 1837 RIGHT.br_blockcount, &i);
1867 if (error) 1838 if (error)
1868 goto done; 1839 goto done;
1869 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1870 error = xfs_btree_delete(bma->cur, &i); 1841 error = xfs_btree_delete(bma->cur, &i);
1871 if (error) 1842 if (error)
1872 goto done; 1843 goto done;
1873 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1844 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1874 error = xfs_btree_decrement(bma->cur, 0, &i); 1845 error = xfs_btree_decrement(bma->cur, 0, &i);
1875 if (error) 1846 if (error)
1876 goto done; 1847 goto done;
1877 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1848 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1878 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1849 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1879 LEFT.br_startblock, 1850 LEFT.br_startblock,
1880 LEFT.br_blockcount + 1851 LEFT.br_blockcount +
@@ -1907,7 +1878,7 @@ xfs_bmap_add_extent_delay_real(
1907 &i); 1878 &i);
1908 if (error) 1879 if (error)
1909 goto done; 1880 goto done;
1910 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1881 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1911 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1882 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1912 LEFT.br_startblock, 1883 LEFT.br_startblock,
1913 LEFT.br_blockcount + 1884 LEFT.br_blockcount +
@@ -1938,7 +1909,7 @@ xfs_bmap_add_extent_delay_real(
1938 RIGHT.br_blockcount, &i); 1909 RIGHT.br_blockcount, &i);
1939 if (error) 1910 if (error)
1940 goto done; 1911 goto done;
1941 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1912 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1942 error = xfs_bmbt_update(bma->cur, PREV.br_startoff, 1913 error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1943 new->br_startblock, 1914 new->br_startblock,
1944 PREV.br_blockcount + 1915 PREV.br_blockcount +
@@ -1968,12 +1939,12 @@ xfs_bmap_add_extent_delay_real(
1968 &i); 1939 &i);
1969 if (error) 1940 if (error)
1970 goto done; 1941 goto done;
1971 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1942 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1972 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 1943 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1973 error = xfs_btree_insert(bma->cur, &i); 1944 error = xfs_btree_insert(bma->cur, &i);
1974 if (error) 1945 if (error)
1975 goto done; 1946 goto done;
1976 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1947 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1977 } 1948 }
1978 break; 1949 break;
1979 1950
@@ -2001,7 +1972,7 @@ xfs_bmap_add_extent_delay_real(
2001 &i); 1972 &i);
2002 if (error) 1973 if (error)
2003 goto done; 1974 goto done;
2004 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1975 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2005 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1976 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
2006 LEFT.br_startblock, 1977 LEFT.br_startblock,
2007 LEFT.br_blockcount + 1978 LEFT.br_blockcount +
@@ -2038,12 +2009,12 @@ xfs_bmap_add_extent_delay_real(
2038 &i); 2009 &i);
2039 if (error) 2010 if (error)
2040 goto done; 2011 goto done;
2041 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2012 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2042 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 2013 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2043 error = xfs_btree_insert(bma->cur, &i); 2014 error = xfs_btree_insert(bma->cur, &i);
2044 if (error) 2015 if (error)
2045 goto done; 2016 goto done;
2046 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2017 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2047 } 2018 }
2048 2019
2049 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2020 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2084,7 +2055,7 @@ xfs_bmap_add_extent_delay_real(
2084 RIGHT.br_blockcount, &i); 2055 RIGHT.br_blockcount, &i);
2085 if (error) 2056 if (error)
2086 goto done; 2057 goto done;
2087 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2058 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2088 error = xfs_bmbt_update(bma->cur, new->br_startoff, 2059 error = xfs_bmbt_update(bma->cur, new->br_startoff,
2089 new->br_startblock, 2060 new->br_startblock,
2090 new->br_blockcount + 2061 new->br_blockcount +
@@ -2122,12 +2093,12 @@ xfs_bmap_add_extent_delay_real(
2122 &i); 2093 &i);
2123 if (error) 2094 if (error)
2124 goto done; 2095 goto done;
2125 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2096 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2126 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 2097 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2127 error = xfs_btree_insert(bma->cur, &i); 2098 error = xfs_btree_insert(bma->cur, &i);
2128 if (error) 2099 if (error)
2129 goto done; 2100 goto done;
2130 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2101 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2131 } 2102 }
2132 2103
2133 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2104 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2191,12 +2162,12 @@ xfs_bmap_add_extent_delay_real(
2191 &i); 2162 &i);
2192 if (error) 2163 if (error)
2193 goto done; 2164 goto done;
2194 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2165 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2195 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 2166 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2196 error = xfs_btree_insert(bma->cur, &i); 2167 error = xfs_btree_insert(bma->cur, &i);
2197 if (error) 2168 if (error)
2198 goto done; 2169 goto done;
2199 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2170 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2200 } 2171 }
2201 2172
2202 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2173 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2212,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
2212 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - 2183 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2213 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 2184 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2214 if (diff > 0) { 2185 if (diff > 0) {
2215 error = xfs_icsb_modify_counters(bma->ip->i_mount, 2186 error = xfs_mod_fdblocks(bma->ip->i_mount,
2216 XFS_SBS_FDBLOCKS, 2187 -((int64_t)diff), false);
2217 -((int64_t)diff), 0);
2218 ASSERT(!error); 2188 ASSERT(!error);
2219 if (error) 2189 if (error)
2220 goto done; 2190 goto done;
@@ -2265,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
2265 temp += bma->cur->bc_private.b.allocated; 2235 temp += bma->cur->bc_private.b.allocated;
2266 ASSERT(temp <= da_old); 2236 ASSERT(temp <= da_old);
2267 if (temp < da_old) 2237 if (temp < da_old)
2268 xfs_icsb_modify_counters(bma->ip->i_mount, 2238 xfs_mod_fdblocks(bma->ip->i_mount,
2269 XFS_SBS_FDBLOCKS, 2239 (int64_t)(da_old - temp), false);
2270 (int64_t)(da_old - temp), 0);
2271 } 2240 }
2272 2241
2273 /* clear out the allocated field, done with it now in any case. */ 2242 /* clear out the allocated field, done with it now in any case. */
@@ -2309,6 +2278,7 @@ xfs_bmap_add_extent_unwritten_real(
2309 /* left is 0, right is 1, prev is 2 */ 2278 /* left is 0, right is 1, prev is 2 */
2310 int rval=0; /* return value (logging flags) */ 2279 int rval=0; /* return value (logging flags) */
2311 int state = 0;/* state bits, accessed thru macros */ 2280 int state = 0;/* state bits, accessed thru macros */
2281 struct xfs_mount *mp = tp->t_mountp;
2312 2282
2313 *logflagsp = 0; 2283 *logflagsp = 0;
2314 2284
@@ -2421,19 +2391,19 @@ xfs_bmap_add_extent_unwritten_real(
2421 RIGHT.br_startblock, 2391 RIGHT.br_startblock,
2422 RIGHT.br_blockcount, &i))) 2392 RIGHT.br_blockcount, &i)))
2423 goto done; 2393 goto done;
2424 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2394 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2425 if ((error = xfs_btree_delete(cur, &i))) 2395 if ((error = xfs_btree_delete(cur, &i)))
2426 goto done; 2396 goto done;
2427 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2397 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2428 if ((error = xfs_btree_decrement(cur, 0, &i))) 2398 if ((error = xfs_btree_decrement(cur, 0, &i)))
2429 goto done; 2399 goto done;
2430 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2400 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2431 if ((error = xfs_btree_delete(cur, &i))) 2401 if ((error = xfs_btree_delete(cur, &i)))
2432 goto done; 2402 goto done;
2433 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2403 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2434 if ((error = xfs_btree_decrement(cur, 0, &i))) 2404 if ((error = xfs_btree_decrement(cur, 0, &i)))
2435 goto done; 2405 goto done;
2436 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2406 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2407 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2438 LEFT.br_startblock, 2408 LEFT.br_startblock,
2439 LEFT.br_blockcount + PREV.br_blockcount + 2409 LEFT.br_blockcount + PREV.br_blockcount +
@@ -2464,13 +2434,13 @@ xfs_bmap_add_extent_unwritten_real(
2464 PREV.br_startblock, PREV.br_blockcount, 2434 PREV.br_startblock, PREV.br_blockcount,
2465 &i))) 2435 &i)))
2466 goto done; 2436 goto done;
2467 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2437 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2468 if ((error = xfs_btree_delete(cur, &i))) 2438 if ((error = xfs_btree_delete(cur, &i)))
2469 goto done; 2439 goto done;
2470 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2440 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2471 if ((error = xfs_btree_decrement(cur, 0, &i))) 2441 if ((error = xfs_btree_decrement(cur, 0, &i)))
2472 goto done; 2442 goto done;
2473 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2443 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2474 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2444 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2475 LEFT.br_startblock, 2445 LEFT.br_startblock,
2476 LEFT.br_blockcount + PREV.br_blockcount, 2446 LEFT.br_blockcount + PREV.br_blockcount,
@@ -2499,13 +2469,13 @@ xfs_bmap_add_extent_unwritten_real(
2499 RIGHT.br_startblock, 2469 RIGHT.br_startblock,
2500 RIGHT.br_blockcount, &i))) 2470 RIGHT.br_blockcount, &i)))
2501 goto done; 2471 goto done;
2502 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2472 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2503 if ((error = xfs_btree_delete(cur, &i))) 2473 if ((error = xfs_btree_delete(cur, &i)))
2504 goto done; 2474 goto done;
2505 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2475 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2506 if ((error = xfs_btree_decrement(cur, 0, &i))) 2476 if ((error = xfs_btree_decrement(cur, 0, &i)))
2507 goto done; 2477 goto done;
2508 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2478 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2509 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2479 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2510 new->br_startblock, 2480 new->br_startblock,
2511 new->br_blockcount + RIGHT.br_blockcount, 2481 new->br_blockcount + RIGHT.br_blockcount,
@@ -2532,7 +2502,7 @@ xfs_bmap_add_extent_unwritten_real(
2532 new->br_startblock, new->br_blockcount, 2502 new->br_startblock, new->br_blockcount,
2533 &i))) 2503 &i)))
2534 goto done; 2504 goto done;
2535 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2505 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2536 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2506 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2537 new->br_startblock, new->br_blockcount, 2507 new->br_startblock, new->br_blockcount,
2538 newext))) 2508 newext)))
@@ -2569,7 +2539,7 @@ xfs_bmap_add_extent_unwritten_real(
2569 PREV.br_startblock, PREV.br_blockcount, 2539 PREV.br_startblock, PREV.br_blockcount,
2570 &i))) 2540 &i)))
2571 goto done; 2541 goto done;
2572 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2542 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2573 if ((error = xfs_bmbt_update(cur, 2543 if ((error = xfs_bmbt_update(cur,
2574 PREV.br_startoff + new->br_blockcount, 2544 PREV.br_startoff + new->br_blockcount,
2575 PREV.br_startblock + new->br_blockcount, 2545 PREV.br_startblock + new->br_blockcount,
@@ -2611,7 +2581,7 @@ xfs_bmap_add_extent_unwritten_real(
2611 PREV.br_startblock, PREV.br_blockcount, 2581 PREV.br_startblock, PREV.br_blockcount,
2612 &i))) 2582 &i)))
2613 goto done; 2583 goto done;
2614 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2584 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2615 if ((error = xfs_bmbt_update(cur, 2585 if ((error = xfs_bmbt_update(cur,
2616 PREV.br_startoff + new->br_blockcount, 2586 PREV.br_startoff + new->br_blockcount,
2617 PREV.br_startblock + new->br_blockcount, 2587 PREV.br_startblock + new->br_blockcount,
@@ -2621,7 +2591,7 @@ xfs_bmap_add_extent_unwritten_real(
2621 cur->bc_rec.b = *new; 2591 cur->bc_rec.b = *new;
2622 if ((error = xfs_btree_insert(cur, &i))) 2592 if ((error = xfs_btree_insert(cur, &i)))
2623 goto done; 2593 goto done;
2624 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2594 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2625 } 2595 }
2626 break; 2596 break;
2627 2597
@@ -2651,7 +2621,7 @@ xfs_bmap_add_extent_unwritten_real(
2651 PREV.br_startblock, 2621 PREV.br_startblock,
2652 PREV.br_blockcount, &i))) 2622 PREV.br_blockcount, &i)))
2653 goto done; 2623 goto done;
2654 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2624 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2655 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2625 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2656 PREV.br_startblock, 2626 PREV.br_startblock,
2657 PREV.br_blockcount - new->br_blockcount, 2627 PREV.br_blockcount - new->br_blockcount,
@@ -2689,7 +2659,7 @@ xfs_bmap_add_extent_unwritten_real(
2689 PREV.br_startblock, PREV.br_blockcount, 2659 PREV.br_startblock, PREV.br_blockcount,
2690 &i))) 2660 &i)))
2691 goto done; 2661 goto done;
2692 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2662 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2693 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2663 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2694 PREV.br_startblock, 2664 PREV.br_startblock,
2695 PREV.br_blockcount - new->br_blockcount, 2665 PREV.br_blockcount - new->br_blockcount,
@@ -2699,11 +2669,11 @@ xfs_bmap_add_extent_unwritten_real(
2699 new->br_startblock, new->br_blockcount, 2669 new->br_startblock, new->br_blockcount,
2700 &i))) 2670 &i)))
2701 goto done; 2671 goto done;
2702 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2672 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2703 cur->bc_rec.b.br_state = XFS_EXT_NORM; 2673 cur->bc_rec.b.br_state = XFS_EXT_NORM;
2704 if ((error = xfs_btree_insert(cur, &i))) 2674 if ((error = xfs_btree_insert(cur, &i)))
2705 goto done; 2675 goto done;
2706 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2676 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2707 } 2677 }
2708 break; 2678 break;
2709 2679
@@ -2737,7 +2707,7 @@ xfs_bmap_add_extent_unwritten_real(
2737 PREV.br_startblock, PREV.br_blockcount, 2707 PREV.br_startblock, PREV.br_blockcount,
2738 &i))) 2708 &i)))
2739 goto done; 2709 goto done;
2740 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2710 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2741 /* new right extent - oldext */ 2711 /* new right extent - oldext */
2742 if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 2712 if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2743 r[1].br_startblock, r[1].br_blockcount, 2713 r[1].br_startblock, r[1].br_blockcount,
@@ -2749,7 +2719,7 @@ xfs_bmap_add_extent_unwritten_real(
2749 new->br_startoff - PREV.br_startoff; 2719 new->br_startoff - PREV.br_startoff;
2750 if ((error = xfs_btree_insert(cur, &i))) 2720 if ((error = xfs_btree_insert(cur, &i)))
2751 goto done; 2721 goto done;
2752 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2722 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2753 /* 2723 /*
2754 * Reset the cursor to the position of the new extent 2724 * Reset the cursor to the position of the new extent
2755 * we are about to insert as we can't trust it after 2725 * we are about to insert as we can't trust it after
@@ -2759,12 +2729,12 @@ xfs_bmap_add_extent_unwritten_real(
2759 new->br_startblock, new->br_blockcount, 2729 new->br_startblock, new->br_blockcount,
2760 &i))) 2730 &i)))
2761 goto done; 2731 goto done;
2762 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2732 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2763 /* new middle extent - newext */ 2733 /* new middle extent - newext */
2764 cur->bc_rec.b.br_state = new->br_state; 2734 cur->bc_rec.b.br_state = new->br_state;
2765 if ((error = xfs_btree_insert(cur, &i))) 2735 if ((error = xfs_btree_insert(cur, &i)))
2766 goto done; 2736 goto done;
2767 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2737 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2768 } 2738 }
2769 break; 2739 break;
2770 2740
@@ -2944,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
2944 } 2914 }
2945 if (oldlen != newlen) { 2915 if (oldlen != newlen) {
2946 ASSERT(oldlen > newlen); 2916 ASSERT(oldlen > newlen);
2947 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, 2917 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2948 (int64_t)(oldlen - newlen), 0); 2918 false);
2949 /* 2919 /*
2950 * Nothing to do for disk quota accounting here. 2920 * Nothing to do for disk quota accounting here.
2951 */ 2921 */
@@ -2968,7 +2938,9 @@ xfs_bmap_add_extent_hole_real(
2968 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2938 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2969 int rval=0; /* return value (logging flags) */ 2939 int rval=0; /* return value (logging flags) */
2970 int state; /* state bits, accessed thru macros */ 2940 int state; /* state bits, accessed thru macros */
2941 struct xfs_mount *mp;
2971 2942
2943 mp = bma->tp ? bma->tp->t_mountp : NULL;
2972 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 2944 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2973 2945
2974 ASSERT(bma->idx >= 0); 2946 ASSERT(bma->idx >= 0);
@@ -3056,15 +3028,15 @@ xfs_bmap_add_extent_hole_real(
3056 &i); 3028 &i);
3057 if (error) 3029 if (error)
3058 goto done; 3030 goto done;
3059 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3031 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3060 error = xfs_btree_delete(bma->cur, &i); 3032 error = xfs_btree_delete(bma->cur, &i);
3061 if (error) 3033 if (error)
3062 goto done; 3034 goto done;
3063 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3035 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3064 error = xfs_btree_decrement(bma->cur, 0, &i); 3036 error = xfs_btree_decrement(bma->cur, 0, &i);
3065 if (error) 3037 if (error)
3066 goto done; 3038 goto done;
3067 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3039 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3068 error = xfs_bmbt_update(bma->cur, left.br_startoff, 3040 error = xfs_bmbt_update(bma->cur, left.br_startoff,
3069 left.br_startblock, 3041 left.br_startblock,
3070 left.br_blockcount + 3042 left.br_blockcount +
@@ -3097,7 +3069,7 @@ xfs_bmap_add_extent_hole_real(
3097 &i); 3069 &i);
3098 if (error) 3070 if (error)
3099 goto done; 3071 goto done;
3100 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3072 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3101 error = xfs_bmbt_update(bma->cur, left.br_startoff, 3073 error = xfs_bmbt_update(bma->cur, left.br_startoff,
3102 left.br_startblock, 3074 left.br_startblock,
3103 left.br_blockcount + 3075 left.br_blockcount +
@@ -3131,7 +3103,7 @@ xfs_bmap_add_extent_hole_real(
3131 right.br_blockcount, &i); 3103 right.br_blockcount, &i);
3132 if (error) 3104 if (error)
3133 goto done; 3105 goto done;
3134 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3106 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3135 error = xfs_bmbt_update(bma->cur, new->br_startoff, 3107 error = xfs_bmbt_update(bma->cur, new->br_startoff,
3136 new->br_startblock, 3108 new->br_startblock,
3137 new->br_blockcount + 3109 new->br_blockcount +
@@ -3161,12 +3133,12 @@ xfs_bmap_add_extent_hole_real(
3161 new->br_blockcount, &i); 3133 new->br_blockcount, &i);
3162 if (error) 3134 if (error)
3163 goto done; 3135 goto done;
3164 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 3136 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3165 bma->cur->bc_rec.b.br_state = new->br_state; 3137 bma->cur->bc_rec.b.br_state = new->br_state;
3166 error = xfs_btree_insert(bma->cur, &i); 3138 error = xfs_btree_insert(bma->cur, &i);
3167 if (error) 3139 if (error)
3168 goto done; 3140 goto done;
3169 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3141 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3170 } 3142 }
3171 break; 3143 break;
3172 } 3144 }
@@ -4160,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
4160 ASSERT(indlen > 0); 4132 ASSERT(indlen > 0);
4161 4133
4162 if (rt) { 4134 if (rt) {
4163 error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 4135 error = xfs_mod_frextents(mp, -((int64_t)extsz));
4164 -((int64_t)extsz), 0);
4165 } else { 4136 } else {
4166 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4137 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4167 -((int64_t)alen), 0);
4168 } 4138 }
4169 4139
4170 if (error) 4140 if (error)
4171 goto out_unreserve_quota; 4141 goto out_unreserve_quota;
4172 4142
4173 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4143 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4174 -((int64_t)indlen), 0);
4175 if (error) 4144 if (error)
4176 goto out_unreserve_blocks; 4145 goto out_unreserve_blocks;
4177 4146
@@ -4198,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
4198 4167
4199out_unreserve_blocks: 4168out_unreserve_blocks:
4200 if (rt) 4169 if (rt)
4201 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); 4170 xfs_mod_frextents(mp, extsz);
4202 else 4171 else
4203 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); 4172 xfs_mod_fdblocks(mp, alen, false);
4204out_unreserve_quota: 4173out_unreserve_quota:
4205 if (XFS_IS_QUOTA_ON(mp)) 4174 if (XFS_IS_QUOTA_ON(mp))
4206 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? 4175 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -4801,7 +4770,7 @@ xfs_bmap_del_extent(
4801 got.br_startblock, got.br_blockcount, 4770 got.br_startblock, got.br_blockcount,
4802 &i))) 4771 &i)))
4803 goto done; 4772 goto done;
4804 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4773 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4805 } 4774 }
4806 da_old = da_new = 0; 4775 da_old = da_new = 0;
4807 } else { 4776 } else {
@@ -4835,7 +4804,7 @@ xfs_bmap_del_extent(
4835 } 4804 }
4836 if ((error = xfs_btree_delete(cur, &i))) 4805 if ((error = xfs_btree_delete(cur, &i)))
4837 goto done; 4806 goto done;
4838 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4807 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4839 break; 4808 break;
4840 4809
4841 case 2: 4810 case 2:
@@ -4935,7 +4904,8 @@ xfs_bmap_del_extent(
4935 got.br_startblock, 4904 got.br_startblock,
4936 temp, &i))) 4905 temp, &i)))
4937 goto done; 4906 goto done;
4938 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4907 XFS_WANT_CORRUPTED_GOTO(mp,
4908 i == 1, done);
4939 /* 4909 /*
4940 * Update the btree record back 4910 * Update the btree record back
4941 * to the original value. 4911 * to the original value.
@@ -4956,7 +4926,7 @@ xfs_bmap_del_extent(
4956 error = -ENOSPC; 4926 error = -ENOSPC;
4957 goto done; 4927 goto done;
4958 } 4928 }
4959 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4929 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4960 } else 4930 } else
4961 flags |= xfs_ilog_fext(whichfork); 4931 flags |= xfs_ilog_fext(whichfork);
4962 XFS_IFORK_NEXT_SET(ip, whichfork, 4932 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5012,10 +4982,8 @@ xfs_bmap_del_extent(
5012 * Nothing to do for disk quota accounting here. 4982 * Nothing to do for disk quota accounting here.
5013 */ 4983 */
5014 ASSERT(da_old >= da_new); 4984 ASSERT(da_old >= da_new);
5015 if (da_old > da_new) { 4985 if (da_old > da_new)
5016 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4986 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5017 (int64_t)(da_old - da_new), 0);
5018 }
5019done: 4987done:
5020 *logflagsp = flags; 4988 *logflagsp = flags;
5021 return error; 4989 return error;
@@ -5284,14 +5252,13 @@ xfs_bunmapi(
5284 5252
5285 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); 5253 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5286 do_div(rtexts, mp->m_sb.sb_rextsize); 5254 do_div(rtexts, mp->m_sb.sb_rextsize);
5287 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5255 xfs_mod_frextents(mp, (int64_t)rtexts);
5288 (int64_t)rtexts, 0);
5289 (void)xfs_trans_reserve_quota_nblks(NULL, 5256 (void)xfs_trans_reserve_quota_nblks(NULL,
5290 ip, -((long)del.br_blockcount), 0, 5257 ip, -((long)del.br_blockcount), 0,
5291 XFS_QMOPT_RES_RTBLKS); 5258 XFS_QMOPT_RES_RTBLKS);
5292 } else { 5259 } else {
5293 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 5260 xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
5294 (int64_t)del.br_blockcount, 0); 5261 false);
5295 (void)xfs_trans_reserve_quota_nblks(NULL, 5262 (void)xfs_trans_reserve_quota_nblks(NULL,
5296 ip, -((long)del.br_blockcount), 0, 5263 ip, -((long)del.br_blockcount), 0,
5297 XFS_QMOPT_RES_REGBLKS); 5264 XFS_QMOPT_RES_REGBLKS);
@@ -5453,6 +5420,7 @@ xfs_bmse_merge(
5453 struct xfs_bmbt_irec left; 5420 struct xfs_bmbt_irec left;
5454 xfs_filblks_t blockcount; 5421 xfs_filblks_t blockcount;
5455 int error, i; 5422 int error, i;
5423 struct xfs_mount *mp = ip->i_mount;
5456 5424
5457 xfs_bmbt_get_all(gotp, &got); 5425 xfs_bmbt_get_all(gotp, &got);
5458 xfs_bmbt_get_all(leftp, &left); 5426 xfs_bmbt_get_all(leftp, &left);
@@ -5487,19 +5455,19 @@ xfs_bmse_merge(
5487 got.br_blockcount, &i); 5455 got.br_blockcount, &i);
5488 if (error) 5456 if (error)
5489 return error; 5457 return error;
5490 XFS_WANT_CORRUPTED_RETURN(i == 1); 5458 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5491 5459
5492 error = xfs_btree_delete(cur, &i); 5460 error = xfs_btree_delete(cur, &i);
5493 if (error) 5461 if (error)
5494 return error; 5462 return error;
5495 XFS_WANT_CORRUPTED_RETURN(i == 1); 5463 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5496 5464
5497 /* lookup and update size of the previous extent */ 5465 /* lookup and update size of the previous extent */
5498 error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, 5466 error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5499 left.br_blockcount, &i); 5467 left.br_blockcount, &i);
5500 if (error) 5468 if (error)
5501 return error; 5469 return error;
5502 XFS_WANT_CORRUPTED_RETURN(i == 1); 5470 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5503 5471
5504 left.br_blockcount = blockcount; 5472 left.br_blockcount = blockcount;
5505 5473
@@ -5518,50 +5486,92 @@ xfs_bmse_shift_one(
5518 int *current_ext, 5486 int *current_ext,
5519 struct xfs_bmbt_rec_host *gotp, 5487 struct xfs_bmbt_rec_host *gotp,
5520 struct xfs_btree_cur *cur, 5488 struct xfs_btree_cur *cur,
5521 int *logflags) 5489 int *logflags,
5490 enum shift_direction direction)
5522{ 5491{
5523 struct xfs_ifork *ifp; 5492 struct xfs_ifork *ifp;
5493 struct xfs_mount *mp;
5524 xfs_fileoff_t startoff; 5494 xfs_fileoff_t startoff;
5525 struct xfs_bmbt_rec_host *leftp; 5495 struct xfs_bmbt_rec_host *adj_irecp;
5526 struct xfs_bmbt_irec got; 5496 struct xfs_bmbt_irec got;
5527 struct xfs_bmbt_irec left; 5497 struct xfs_bmbt_irec adj_irec;
5528 int error; 5498 int error;
5529 int i; 5499 int i;
5500 int total_extents;
5530 5501
5502 mp = ip->i_mount;
5531 ifp = XFS_IFORK_PTR(ip, whichfork); 5503 ifp = XFS_IFORK_PTR(ip, whichfork);
5504 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5532 5505
5533 xfs_bmbt_get_all(gotp, &got); 5506 xfs_bmbt_get_all(gotp, &got);
5534 startoff = got.br_startoff - offset_shift_fsb;
5535 5507
5536 /* delalloc extents should be prevented by caller */ 5508 /* delalloc extents should be prevented by caller */
5537 XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock)); 5509 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5538 5510
5539 /* 5511 if (direction == SHIFT_LEFT) {
5540 * Check for merge if we've got an extent to the left, otherwise make 5512 startoff = got.br_startoff - offset_shift_fsb;
5541 * sure there's enough room at the start of the file for the shift. 5513
5542 */ 5514 /*
5543 if (*current_ext) { 5515 * Check for merge if we've got an extent to the left,
5544 /* grab the left extent and check for a large enough hole */ 5516 * otherwise make sure there's enough room at the start
5545 leftp = xfs_iext_get_ext(ifp, *current_ext - 1); 5517 * of the file for the shift.
5546 xfs_bmbt_get_all(leftp, &left); 5518 */
5519 if (!*current_ext) {
5520 if (got.br_startoff < offset_shift_fsb)
5521 return -EINVAL;
5522 goto update_current_ext;
5523 }
5524 /*
5525 * grab the left extent and check for a large
5526 * enough hole.
5527 */
5528 adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5529 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5547 5530
5548 if (startoff < left.br_startoff + left.br_blockcount) 5531 if (startoff <
5532 adj_irec.br_startoff + adj_irec.br_blockcount)
5549 return -EINVAL; 5533 return -EINVAL;
5550 5534
5551 /* check whether to merge the extent or shift it down */ 5535 /* check whether to merge the extent or shift it down */
5552 if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { 5536 if (xfs_bmse_can_merge(&adj_irec, &got,
5537 offset_shift_fsb)) {
5553 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5538 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5554 *current_ext, gotp, leftp, cur, 5539 *current_ext, gotp, adj_irecp,
5555 logflags); 5540 cur, logflags);
5556 } 5541 }
5557 } else if (got.br_startoff < offset_shift_fsb) 5542 } else {
5558 return -EINVAL; 5543 startoff = got.br_startoff + offset_shift_fsb;
5559 5544 /* nothing to move if this is the last extent */
5545 if (*current_ext >= (total_extents - 1))
5546 goto update_current_ext;
5547 /*
5548 * If this is not the last extent in the file, make sure there
5549 * is enough room between current extent and next extent for
5550 * accommodating the shift.
5551 */
5552 adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5553 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5554 if (startoff + got.br_blockcount > adj_irec.br_startoff)
5555 return -EINVAL;
5556 /*
5557 * Unlike a left shift (which involves a hole punch),
5558 * a right shift does not modify extent neighbors
5559 * in any way. We should never find mergeable extents
5560 * in this scenario. Check anyways and warn if we
5561 * encounter two extents that could be one.
5562 */
5563 if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5564 WARN_ON_ONCE(1);
5565 }
5560 /* 5566 /*
5561 * Increment the extent index for the next iteration, update the start 5567 * Increment the extent index for the next iteration, update the start
5562 * offset of the in-core extent and update the btree if applicable. 5568 * offset of the in-core extent and update the btree if applicable.
5563 */ 5569 */
5564 (*current_ext)++; 5570update_current_ext:
5571 if (direction == SHIFT_LEFT)
5572 (*current_ext)++;
5573 else
5574 (*current_ext)--;
5565 xfs_bmbt_set_startoff(gotp, startoff); 5575 xfs_bmbt_set_startoff(gotp, startoff);
5566 *logflags |= XFS_ILOG_CORE; 5576 *logflags |= XFS_ILOG_CORE;
5567 if (!cur) { 5577 if (!cur) {
@@ -5573,18 +5583,18 @@ xfs_bmse_shift_one(
5573 got.br_blockcount, &i); 5583 got.br_blockcount, &i);
5574 if (error) 5584 if (error)
5575 return error; 5585 return error;
5576 XFS_WANT_CORRUPTED_RETURN(i == 1); 5586 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5577 5587
5578 got.br_startoff = startoff; 5588 got.br_startoff = startoff;
5579 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5589 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5580 got.br_blockcount, got.br_state); 5590 got.br_blockcount, got.br_state);
5581} 5591}
5582 5592
5583/* 5593/*
5584 * Shift extent records to the left to cover a hole. 5594 * Shift extent records to the left/right to cover/create a hole.
5585 * 5595 *
5586 * The maximum number of extents to be shifted in a single operation is 5596 * The maximum number of extents to be shifted in a single operation is
5587 * @num_exts. @start_fsb specifies the file offset to start the shift and the 5597 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5588 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb 5598 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5589 * is the length by which each extent is shifted. If there is no hole to shift 5599 * is the length by which each extent is shifted. If there is no hole to shift
5590 * the extents into, this will be considered invalid operation and we abort 5600 * the extents into, this will be considered invalid operation and we abort
@@ -5594,12 +5604,13 @@ int
5594xfs_bmap_shift_extents( 5604xfs_bmap_shift_extents(
5595 struct xfs_trans *tp, 5605 struct xfs_trans *tp,
5596 struct xfs_inode *ip, 5606 struct xfs_inode *ip,
5597 xfs_fileoff_t start_fsb, 5607 xfs_fileoff_t *next_fsb,
5598 xfs_fileoff_t offset_shift_fsb, 5608 xfs_fileoff_t offset_shift_fsb,
5599 int *done, 5609 int *done,
5600 xfs_fileoff_t *next_fsb, 5610 xfs_fileoff_t stop_fsb,
5601 xfs_fsblock_t *firstblock, 5611 xfs_fsblock_t *firstblock,
5602 struct xfs_bmap_free *flist, 5612 struct xfs_bmap_free *flist,
5613 enum shift_direction direction,
5603 int num_exts) 5614 int num_exts)
5604{ 5615{
5605 struct xfs_btree_cur *cur = NULL; 5616 struct xfs_btree_cur *cur = NULL;
@@ -5609,10 +5620,11 @@ xfs_bmap_shift_extents(
5609 struct xfs_ifork *ifp; 5620 struct xfs_ifork *ifp;
5610 xfs_extnum_t nexts = 0; 5621 xfs_extnum_t nexts = 0;
5611 xfs_extnum_t current_ext; 5622 xfs_extnum_t current_ext;
5623 xfs_extnum_t total_extents;
5624 xfs_extnum_t stop_extent;
5612 int error = 0; 5625 int error = 0;
5613 int whichfork = XFS_DATA_FORK; 5626 int whichfork = XFS_DATA_FORK;
5614 int logflags = 0; 5627 int logflags = 0;
5615 int total_extents;
5616 5628
5617 if (unlikely(XFS_TEST_ERROR( 5629 if (unlikely(XFS_TEST_ERROR(
5618 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5630 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5628,6 +5640,8 @@ xfs_bmap_shift_extents(
5628 5640
5629 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5641 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5630 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5642 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5643 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5644 ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5631 5645
5632 ifp = XFS_IFORK_PTR(ip, whichfork); 5646 ifp = XFS_IFORK_PTR(ip, whichfork);
5633 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5647 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5645,43 +5659,83 @@ xfs_bmap_shift_extents(
5645 } 5659 }
5646 5660
5647 /* 5661 /*
5662 * There may be delalloc extents in the data fork before the range we
5663 * are collapsing out, so we cannot use the count of real extents here.
5664 * Instead we have to calculate it from the incore fork.
5665 */
5666 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5667 if (total_extents == 0) {
5668 *done = 1;
5669 goto del_cursor;
5670 }
5671
5672 /*
5673 * In case of first right shift, we need to initialize next_fsb
5674 */
5675 if (*next_fsb == NULLFSBLOCK) {
5676 gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5677 xfs_bmbt_get_all(gotp, &got);
5678 *next_fsb = got.br_startoff;
5679 if (stop_fsb > *next_fsb) {
5680 *done = 1;
5681 goto del_cursor;
5682 }
5683 }
5684
5685 /* Lookup the extent index at which we have to stop */
5686 if (direction == SHIFT_RIGHT) {
5687 gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5688 /* Make stop_extent exclusive of shift range */
5689 stop_extent--;
5690 } else
5691 stop_extent = total_extents;
5692
5693 /*
5648 * Look up the extent index for the fsb where we start shifting. We can 5694 * Look up the extent index for the fsb where we start shifting. We can
5649 * henceforth iterate with current_ext as extent list changes are locked 5695 * henceforth iterate with current_ext as extent list changes are locked
5650 * out via ilock. 5696 * out via ilock.
5651 * 5697 *
5652 * gotp can be null in 2 cases: 1) if there are no extents or 2) 5698 * gotp can be null in 2 cases: 1) if there are no extents or 2)
5653 * start_fsb lies in a hole beyond which there are no extents. Either 5699 * *next_fsb lies in a hole beyond which there are no extents. Either
5654 * way, we are done. 5700 * way, we are done.
5655 */ 5701 */
5656 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext); 5702 gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5657 if (!gotp) { 5703 if (!gotp) {
5658 *done = 1; 5704 *done = 1;
5659 goto del_cursor; 5705 goto del_cursor;
5660 } 5706 }
5661 5707
5662 /* 5708 /* some sanity checking before we finally start shifting extents */
5663 * There may be delalloc extents in the data fork before the range we 5709 if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5664 * are collapsing out, so we cannot use the count of real extents here. 5710 (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5665 * Instead we have to calculate it from the incore fork. 5711 error = -EIO;
5666 */ 5712 goto del_cursor;
5667 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5713 }
5668 while (nexts++ < num_exts && current_ext < total_extents) { 5714
5715 while (nexts++ < num_exts) {
5669 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5716 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5670 &current_ext, gotp, cur, &logflags); 5717 &current_ext, gotp, cur, &logflags,
5718 direction);
5671 if (error) 5719 if (error)
5672 goto del_cursor; 5720 goto del_cursor;
5721 /*
5722 * If there was an extent merge during the shift, the extent
5723 * count can change. Update the total and grade the next record.
5724 */
5725 if (direction == SHIFT_LEFT) {
5726 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5727 stop_extent = total_extents;
5728 }
5673 5729
5674 /* update total extent count and grab the next record */ 5730 if (current_ext == stop_extent) {
5675 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5731 *done = 1;
5676 if (current_ext >= total_extents) 5732 *next_fsb = NULLFSBLOCK;
5677 break; 5733 break;
5734 }
5678 gotp = xfs_iext_get_ext(ifp, current_ext); 5735 gotp = xfs_iext_get_ext(ifp, current_ext);
5679 } 5736 }
5680 5737
5681 /* Check if we are done */ 5738 if (!*done) {
5682 if (current_ext == total_extents) {
5683 *done = 1;
5684 } else if (next_fsb) {
5685 xfs_bmbt_get_all(gotp, &got); 5739 xfs_bmbt_get_all(gotp, &got);
5686 *next_fsb = got.br_startoff; 5740 *next_fsb = got.br_startoff;
5687 } 5741 }
@@ -5696,3 +5750,189 @@ del_cursor:
5696 5750
5697 return error; 5751 return error;
5698} 5752}
5753
5754/*
5755 * Splits an extent into two extents at split_fsb block such that it is
5756 * the first block of the current_ext. @current_ext is a target extent
5757 * to be split. @split_fsb is a block where the extents is split.
5758 * If split_fsb lies in a hole or the first block of extents, just return 0.
5759 */
5760STATIC int
5761xfs_bmap_split_extent_at(
5762 struct xfs_trans *tp,
5763 struct xfs_inode *ip,
5764 xfs_fileoff_t split_fsb,
5765 xfs_fsblock_t *firstfsb,
5766 struct xfs_bmap_free *free_list)
5767{
5768 int whichfork = XFS_DATA_FORK;
5769 struct xfs_btree_cur *cur = NULL;
5770 struct xfs_bmbt_rec_host *gotp;
5771 struct xfs_bmbt_irec got;
5772 struct xfs_bmbt_irec new; /* split extent */
5773 struct xfs_mount *mp = ip->i_mount;
5774 struct xfs_ifork *ifp;
5775 xfs_fsblock_t gotblkcnt; /* new block count for got */
5776 xfs_extnum_t current_ext;
5777 int error = 0;
5778 int logflags = 0;
5779 int i = 0;
5780
5781 if (unlikely(XFS_TEST_ERROR(
5782 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5783 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5784 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5785 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5786 XFS_ERRLEVEL_LOW, mp);
5787 return -EFSCORRUPTED;
5788 }
5789
5790 if (XFS_FORCED_SHUTDOWN(mp))
5791 return -EIO;
5792
5793 ifp = XFS_IFORK_PTR(ip, whichfork);
5794 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5795 /* Read in all the extents */
5796 error = xfs_iread_extents(tp, ip, whichfork);
5797 if (error)
5798 return error;
5799 }
5800
5801 /*
5802 * gotp can be null in 2 cases: 1) if there are no extents
5803 * or 2) split_fsb lies in a hole beyond which there are
5804 * no extents. Either way, we are done.
5805 */
5806 gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5807 if (!gotp)
5808 return 0;
5809
5810 xfs_bmbt_get_all(gotp, &got);
5811
5812 /*
5813 * Check split_fsb lies in a hole or the start boundary offset
5814 * of the extent.
5815 */
5816 if (got.br_startoff >= split_fsb)
5817 return 0;
5818
5819 gotblkcnt = split_fsb - got.br_startoff;
5820 new.br_startoff = split_fsb;
5821 new.br_startblock = got.br_startblock + gotblkcnt;
5822 new.br_blockcount = got.br_blockcount - gotblkcnt;
5823 new.br_state = got.br_state;
5824
5825 if (ifp->if_flags & XFS_IFBROOT) {
5826 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5827 cur->bc_private.b.firstblock = *firstfsb;
5828 cur->bc_private.b.flist = free_list;
5829 cur->bc_private.b.flags = 0;
5830 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5831 got.br_startblock,
5832 got.br_blockcount,
5833 &i);
5834 if (error)
5835 goto del_cursor;
5836 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5837 }
5838
5839 xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5840 got.br_blockcount = gotblkcnt;
5841
5842 logflags = XFS_ILOG_CORE;
5843 if (cur) {
5844 error = xfs_bmbt_update(cur, got.br_startoff,
5845 got.br_startblock,
5846 got.br_blockcount,
5847 got.br_state);
5848 if (error)
5849 goto del_cursor;
5850 } else
5851 logflags |= XFS_ILOG_DEXT;
5852
5853 /* Add new extent */
5854 current_ext++;
5855 xfs_iext_insert(ip, current_ext, 1, &new, 0);
5856 XFS_IFORK_NEXT_SET(ip, whichfork,
5857 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5858
5859 if (cur) {
5860 error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5861 new.br_startblock, new.br_blockcount,
5862 &i);
5863 if (error)
5864 goto del_cursor;
5865 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5866 cur->bc_rec.b.br_state = new.br_state;
5867
5868 error = xfs_btree_insert(cur, &i);
5869 if (error)
5870 goto del_cursor;
5871 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5872 }
5873
5874 /*
5875 * Convert to a btree if necessary.
5876 */
5877 if (xfs_bmap_needs_btree(ip, whichfork)) {
5878 int tmp_logflags; /* partial log flag return val */
5879
5880 ASSERT(cur == NULL);
5881 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5882 &cur, 0, &tmp_logflags, whichfork);
5883 logflags |= tmp_logflags;
5884 }
5885
5886del_cursor:
5887 if (cur) {
5888 cur->bc_private.b.allocated = 0;
5889 xfs_btree_del_cursor(cur,
5890 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5891 }
5892
5893 if (logflags)
5894 xfs_trans_log_inode(tp, ip, logflags);
5895 return error;
5896}
5897
5898int
5899xfs_bmap_split_extent(
5900 struct xfs_inode *ip,
5901 xfs_fileoff_t split_fsb)
5902{
5903 struct xfs_mount *mp = ip->i_mount;
5904 struct xfs_trans *tp;
5905 struct xfs_bmap_free free_list;
5906 xfs_fsblock_t firstfsb;
5907 int committed;
5908 int error;
5909
5910 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5911 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5912 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5913 if (error) {
5914 xfs_trans_cancel(tp, 0);
5915 return error;
5916 }
5917
5918 xfs_ilock(ip, XFS_ILOCK_EXCL);
5919 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5920
5921 xfs_bmap_init(&free_list, &firstfsb);
5922
5923 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5924 &firstfsb, &free_list);
5925 if (error)
5926 goto out;
5927
5928 error = xfs_bmap_finish(&tp, &free_list, &committed);
5929 if (error)
5930 goto out;
5931
5932 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
5933
5934
5935out:
5936 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
5937 return error;
5938}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b9d8a499d2c4..6aaa0c1c7200 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
166 */ 166 */
167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
168 168
169enum shift_direction {
170 SHIFT_LEFT = 0,
171 SHIFT_RIGHT,
172};
173
169#ifdef DEBUG 174#ifdef DEBUG
170void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 175void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
171 int whichfork, unsigned long caller_ip); 176 int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
211 xfs_extnum_t num); 216 xfs_extnum_t num);
212uint xfs_default_attroffset(struct xfs_inode *ip); 217uint xfs_default_attroffset(struct xfs_inode *ip);
213int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 218int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
214 xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, 219 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
215 int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, 220 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
216 struct xfs_bmap_free *flist, int num_exts); 221 struct xfs_bmap_free *flist, enum shift_direction direction,
222 int num_exts);
223int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
217 224
218#endif /* __XFS_BMAP_H__ */ 225#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 81cad433df85..c72283dd8d44 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -168,7 +168,7 @@ xfs_btree_check_lptr(
168 xfs_fsblock_t bno, /* btree block disk address */ 168 xfs_fsblock_t bno, /* btree block disk address */
169 int level) /* btree block level */ 169 int level) /* btree block level */
170{ 170{
171 XFS_WANT_CORRUPTED_RETURN( 171 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
172 level > 0 && 172 level > 0 &&
173 bno != NULLFSBLOCK && 173 bno != NULLFSBLOCK &&
174 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 174 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
@@ -187,7 +187,7 @@ xfs_btree_check_sptr(
187{ 187{
188 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 188 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
189 189
190 XFS_WANT_CORRUPTED_RETURN( 190 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
191 level > 0 && 191 level > 0 &&
192 bno != NULLAGBLOCK && 192 bno != NULLAGBLOCK &&
193 bno != 0 && 193 bno != 0 &&
@@ -1825,7 +1825,7 @@ xfs_btree_lookup(
1825 error = xfs_btree_increment(cur, 0, &i); 1825 error = xfs_btree_increment(cur, 0, &i);
1826 if (error) 1826 if (error)
1827 goto error0; 1827 goto error0;
1828 XFS_WANT_CORRUPTED_RETURN(i == 1); 1828 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1829 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1829 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1830 *stat = 1; 1830 *stat = 1;
1831 return 0; 1831 return 0;
@@ -2285,7 +2285,7 @@ xfs_btree_rshift(
2285 if (error) 2285 if (error)
2286 goto error0; 2286 goto error0;
2287 i = xfs_btree_lastrec(tcur, level); 2287 i = xfs_btree_lastrec(tcur, level);
2288 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2288 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
2289 2289
2290 error = xfs_btree_increment(tcur, level, &i); 2290 error = xfs_btree_increment(tcur, level, &i);
2291 if (error) 2291 if (error)
@@ -3138,7 +3138,7 @@ xfs_btree_insert(
3138 goto error0; 3138 goto error0;
3139 } 3139 }
3140 3140
3141 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3141 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3142 level++; 3142 level++;
3143 3143
3144 /* 3144 /*
@@ -3582,15 +3582,15 @@ xfs_btree_delrec(
3582 * Actually any entry but the first would suffice. 3582 * Actually any entry but the first would suffice.
3583 */ 3583 */
3584 i = xfs_btree_lastrec(tcur, level); 3584 i = xfs_btree_lastrec(tcur, level);
3585 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3585 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3586 3586
3587 error = xfs_btree_increment(tcur, level, &i); 3587 error = xfs_btree_increment(tcur, level, &i);
3588 if (error) 3588 if (error)
3589 goto error0; 3589 goto error0;
3590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3590 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3591 3591
3592 i = xfs_btree_lastrec(tcur, level); 3592 i = xfs_btree_lastrec(tcur, level);
3593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3593 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3594 3594
3595 /* Grab a pointer to the block. */ 3595 /* Grab a pointer to the block. */
3596 right = xfs_btree_get_block(tcur, level, &rbp); 3596 right = xfs_btree_get_block(tcur, level, &rbp);
@@ -3634,12 +3634,12 @@ xfs_btree_delrec(
3634 rrecs = xfs_btree_get_numrecs(right); 3634 rrecs = xfs_btree_get_numrecs(right);
3635 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3635 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3636 i = xfs_btree_firstrec(tcur, level); 3636 i = xfs_btree_firstrec(tcur, level);
3637 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3637 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3638 3638
3639 error = xfs_btree_decrement(tcur, level, &i); 3639 error = xfs_btree_decrement(tcur, level, &i);
3640 if (error) 3640 if (error)
3641 goto error0; 3641 goto error0;
3642 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3642 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3643 } 3643 }
3644 } 3644 }
3645 3645
@@ -3653,13 +3653,13 @@ xfs_btree_delrec(
3653 * previous block. 3653 * previous block.
3654 */ 3654 */
3655 i = xfs_btree_firstrec(tcur, level); 3655 i = xfs_btree_firstrec(tcur, level);
3656 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3656 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3657 3657
3658 error = xfs_btree_decrement(tcur, level, &i); 3658 error = xfs_btree_decrement(tcur, level, &i);
3659 if (error) 3659 if (error)
3660 goto error0; 3660 goto error0;
3661 i = xfs_btree_firstrec(tcur, level); 3661 i = xfs_btree_firstrec(tcur, level);
3662 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3662 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3663 3663
3664 /* Grab a pointer to the block. */ 3664 /* Grab a pointer to the block. */
3665 left = xfs_btree_get_block(tcur, level, &lbp); 3665 left = xfs_btree_get_block(tcur, level, &lbp);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 9cb0115c6bd1..2385f8cd08ab 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -538,12 +538,12 @@ xfs_da3_root_split(
538 oldroot = blk1->bp->b_addr; 538 oldroot = blk1->bp->b_addr;
539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
541 struct xfs_da3_icnode_hdr nodehdr; 541 struct xfs_da3_icnode_hdr icnodehdr;
542 542
543 dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); 543 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
544 btree = dp->d_ops->node_tree_p(oldroot); 544 btree = dp->d_ops->node_tree_p(oldroot);
545 size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); 545 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
546 level = nodehdr.level; 546 level = icnodehdr.level;
547 547
548 /* 548 /*
549 * we are about to copy oldroot to bp, so set up the type 549 * we are about to copy oldroot to bp, so set up the type
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 0a49b0286372..74bcbabfa523 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -725,7 +725,13 @@ struct xfs_attr3_icleaf_hdr {
725 __uint16_t magic; 725 __uint16_t magic;
726 __uint16_t count; 726 __uint16_t count;
727 __uint16_t usedbytes; 727 __uint16_t usedbytes;
728 __uint16_t firstused; 728 /*
729 * firstused is 32-bit here instead of 16-bit like the on-disk variant
730 * to support maximum fsb size of 64k without overflow issues throughout
731 * the attr code. Instead, the overflow condition is handled on
732 * conversion to/from disk.
733 */
734 __uint32_t firstused;
729 __u8 holes; 735 __u8 holes;
730 struct { 736 struct {
731 __uint16_t base; 737 __uint16_t base;
@@ -734,6 +740,12 @@ struct xfs_attr3_icleaf_hdr {
734}; 740};
735 741
736/* 742/*
743 * Special value to represent fs block size in the leaf header firstused field.
744 * Only used when block size overflows the 2-bytes available on disk.
745 */
746#define XFS_ATTR3_LEAF_NULLOFF 0
747
748/*
737 * Flags used in the leaf_entry[i].flags field. 749 * Flags used in the leaf_entry[i].flags field.
738 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified 750 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
739 * on the system call, they are "or"ed together for various operations. 751 * on the system call, they are "or"ed together for various operations.
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 5ff31be9b1cd..de1ea16f5748 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -89,7 +89,7 @@ __xfs_dir3_data_check(
89 * so just ensure that the count falls somewhere inside the 89 * so just ensure that the count falls somewhere inside the
90 * block right now. 90 * block right now.
91 */ 91 */
92 XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < 92 XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
93 ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); 93 ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
94 break; 94 break;
95 case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 95 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
@@ -107,21 +107,21 @@ __xfs_dir3_data_check(
107 bf = ops->data_bestfree_p(hdr); 107 bf = ops->data_bestfree_p(hdr);
108 count = lastfree = freeseen = 0; 108 count = lastfree = freeseen = 0;
109 if (!bf[0].length) { 109 if (!bf[0].length) {
110 XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); 110 XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
111 freeseen |= 1 << 0; 111 freeseen |= 1 << 0;
112 } 112 }
113 if (!bf[1].length) { 113 if (!bf[1].length) {
114 XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); 114 XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
115 freeseen |= 1 << 1; 115 freeseen |= 1 << 1;
116 } 116 }
117 if (!bf[2].length) { 117 if (!bf[2].length) {
118 XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); 118 XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
119 freeseen |= 1 << 2; 119 freeseen |= 1 << 2;
120 } 120 }
121 121
122 XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= 122 XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
123 be16_to_cpu(bf[1].length)); 123 be16_to_cpu(bf[1].length));
124 XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= 124 XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
125 be16_to_cpu(bf[2].length)); 125 be16_to_cpu(bf[2].length));
126 /* 126 /*
127 * Loop over the data/unused entries. 127 * Loop over the data/unused entries.
@@ -134,18 +134,18 @@ __xfs_dir3_data_check(
134 * doesn't need to be there. 134 * doesn't need to be there.
135 */ 135 */
136 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 136 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
137 XFS_WANT_CORRUPTED_RETURN(lastfree == 0); 137 XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
138 XFS_WANT_CORRUPTED_RETURN( 138 XFS_WANT_CORRUPTED_RETURN(mp,
139 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == 139 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
140 (char *)dup - (char *)hdr); 140 (char *)dup - (char *)hdr);
141 dfp = xfs_dir2_data_freefind(hdr, bf, dup); 141 dfp = xfs_dir2_data_freefind(hdr, bf, dup);
142 if (dfp) { 142 if (dfp) {
143 i = (int)(dfp - bf); 143 i = (int)(dfp - bf);
144 XFS_WANT_CORRUPTED_RETURN( 144 XFS_WANT_CORRUPTED_RETURN(mp,
145 (freeseen & (1 << i)) == 0); 145 (freeseen & (1 << i)) == 0);
146 freeseen |= 1 << i; 146 freeseen |= 1 << i;
147 } else { 147 } else {
148 XFS_WANT_CORRUPTED_RETURN( 148 XFS_WANT_CORRUPTED_RETURN(mp,
149 be16_to_cpu(dup->length) <= 149 be16_to_cpu(dup->length) <=
150 be16_to_cpu(bf[2].length)); 150 be16_to_cpu(bf[2].length));
151 } 151 }
@@ -160,13 +160,13 @@ __xfs_dir3_data_check(
160 * The linear search is crude but this is DEBUG code. 160 * The linear search is crude but this is DEBUG code.
161 */ 161 */
162 dep = (xfs_dir2_data_entry_t *)p; 162 dep = (xfs_dir2_data_entry_t *)p;
163 XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); 163 XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
164 XFS_WANT_CORRUPTED_RETURN( 164 XFS_WANT_CORRUPTED_RETURN(mp,
165 !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); 165 !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
166 XFS_WANT_CORRUPTED_RETURN( 166 XFS_WANT_CORRUPTED_RETURN(mp,
167 be16_to_cpu(*ops->data_entry_tag_p(dep)) == 167 be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
168 (char *)dep - (char *)hdr); 168 (char *)dep - (char *)hdr);
169 XFS_WANT_CORRUPTED_RETURN( 169 XFS_WANT_CORRUPTED_RETURN(mp,
170 ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); 170 ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
171 count++; 171 count++;
172 lastfree = 0; 172 lastfree = 0;
@@ -183,14 +183,15 @@ __xfs_dir3_data_check(
183 be32_to_cpu(lep[i].hashval) == hash) 183 be32_to_cpu(lep[i].hashval) == hash)
184 break; 184 break;
185 } 185 }
186 XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); 186 XFS_WANT_CORRUPTED_RETURN(mp,
187 i < be32_to_cpu(btp->count));
187 } 188 }
188 p += ops->data_entsize(dep->namelen); 189 p += ops->data_entsize(dep->namelen);
189 } 190 }
190 /* 191 /*
191 * Need to have seen all the entries and all the bestfree slots. 192 * Need to have seen all the entries and all the bestfree slots.
192 */ 193 */
193 XFS_WANT_CORRUPTED_RETURN(freeseen == 7); 194 XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
194 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 195 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
195 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 196 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
196 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 197 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
@@ -198,13 +199,13 @@ __xfs_dir3_data_check(
198 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 199 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
199 stale++; 200 stale++;
200 if (i > 0) 201 if (i > 0)
201 XFS_WANT_CORRUPTED_RETURN( 202 XFS_WANT_CORRUPTED_RETURN(mp,
202 be32_to_cpu(lep[i].hashval) >= 203 be32_to_cpu(lep[i].hashval) >=
203 be32_to_cpu(lep[i - 1].hashval)); 204 be32_to_cpu(lep[i - 1].hashval));
204 } 205 }
205 XFS_WANT_CORRUPTED_RETURN(count == 206 XFS_WANT_CORRUPTED_RETURN(mp, count ==
206 be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); 207 be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
207 XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); 208 XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
208 } 209 }
209 return 0; 210 return 0;
210} 211}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8eb718979383..4daaa662337b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
264 /* must be padded to 64 bit alignment */ 264 /* must be padded to 64 bit alignment */
265} xfs_dsb_t; 265} xfs_dsb_t;
266 266
267/*
268 * Sequence number values for the fields.
269 */
270typedef enum {
271 XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
272 XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
273 XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
274 XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
275 XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
276 XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
277 XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
278 XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
279 XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
280 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
281 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
282 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
283 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
284 XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
285 XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
286 XFS_SBS_PQUOTINO, XFS_SBS_LSN,
287 XFS_SBS_FIELDCOUNT
288} xfs_sb_field_t;
289
290/*
291 * Mask values, defined based on the xfs_sb_field_t values.
292 * Only define the ones we're using.
293 */
294#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x)
295#define XFS_SB_UUID XFS_SB_MVAL(UUID)
296#define XFS_SB_FNAME XFS_SB_MVAL(FNAME)
297#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO)
298#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO)
299#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO)
300#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM)
301#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO)
302#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO)
303#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS)
304#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
305#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
306#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
307#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
308#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
309#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
310#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \
311 XFS_SB_MVAL(BAD_FEATURES2))
312#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
313#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
314#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
315#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
316#define XFS_SB_CRC XFS_SB_MVAL(CRC)
317#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
318#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
319#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
320#define XFS_SB_MOD_BITS \
321 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
322 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
323 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
324 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
325 XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
326 XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
327 XFS_SB_PQUOTINO)
328
329 267
330/* 268/*
331 * Misc. Flags - warning - these will be cleared by xfs_repair unless 269 * Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 116ef1ddb3e3..07349a183a11 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
376 */ 376 */
377 newlen = args.mp->m_ialloc_inos; 377 newlen = args.mp->m_ialloc_inos;
378 if (args.mp->m_maxicount && 378 if (args.mp->m_maxicount &&
379 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 379 percpu_counter_read(&args.mp->m_icount) + newlen >
380 args.mp->m_maxicount)
380 return -ENOSPC; 381 return -ENOSPC;
381 args.minlen = args.maxlen = args.mp->m_ialloc_blks; 382 args.minlen = args.maxlen = args.mp->m_ialloc_blks;
382 /* 383 /*
@@ -700,7 +701,7 @@ xfs_ialloc_next_rec(
700 error = xfs_inobt_get_rec(cur, rec, &i); 701 error = xfs_inobt_get_rec(cur, rec, &i);
701 if (error) 702 if (error)
702 return error; 703 return error;
703 XFS_WANT_CORRUPTED_RETURN(i == 1); 704 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
704 } 705 }
705 706
706 return 0; 707 return 0;
@@ -724,7 +725,7 @@ xfs_ialloc_get_rec(
724 error = xfs_inobt_get_rec(cur, rec, &i); 725 error = xfs_inobt_get_rec(cur, rec, &i);
725 if (error) 726 if (error)
726 return error; 727 return error;
727 XFS_WANT_CORRUPTED_RETURN(i == 1); 728 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
728 } 729 }
729 730
730 return 0; 731 return 0;
@@ -783,12 +784,12 @@ xfs_dialloc_ag_inobt(
783 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); 784 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
784 if (error) 785 if (error)
785 goto error0; 786 goto error0;
786 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 787 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
787 788
788 error = xfs_inobt_get_rec(cur, &rec, &j); 789 error = xfs_inobt_get_rec(cur, &rec, &j);
789 if (error) 790 if (error)
790 goto error0; 791 goto error0;
791 XFS_WANT_CORRUPTED_GOTO(j == 1, error0); 792 XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0);
792 793
793 if (rec.ir_freecount > 0) { 794 if (rec.ir_freecount > 0) {
794 /* 795 /*
@@ -944,19 +945,19 @@ newino:
944 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 945 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
945 if (error) 946 if (error)
946 goto error0; 947 goto error0;
947 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 948 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
948 949
949 for (;;) { 950 for (;;) {
950 error = xfs_inobt_get_rec(cur, &rec, &i); 951 error = xfs_inobt_get_rec(cur, &rec, &i);
951 if (error) 952 if (error)
952 goto error0; 953 goto error0;
953 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 954 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
954 if (rec.ir_freecount > 0) 955 if (rec.ir_freecount > 0)
955 break; 956 break;
956 error = xfs_btree_increment(cur, 0, &i); 957 error = xfs_btree_increment(cur, 0, &i);
957 if (error) 958 if (error)
958 goto error0; 959 goto error0;
959 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 960 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
960 } 961 }
961 962
962alloc_inode: 963alloc_inode:
@@ -1016,7 +1017,7 @@ xfs_dialloc_ag_finobt_near(
1016 error = xfs_inobt_get_rec(lcur, rec, &i); 1017 error = xfs_inobt_get_rec(lcur, rec, &i);
1017 if (error) 1018 if (error)
1018 return error; 1019 return error;
1019 XFS_WANT_CORRUPTED_RETURN(i == 1); 1020 XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1);
1020 1021
1021 /* 1022 /*
1022 * See if we've landed in the parent inode record. The finobt 1023 * See if we've landed in the parent inode record. The finobt
@@ -1039,10 +1040,10 @@ xfs_dialloc_ag_finobt_near(
1039 error = xfs_inobt_get_rec(rcur, &rrec, &j); 1040 error = xfs_inobt_get_rec(rcur, &rrec, &j);
1040 if (error) 1041 if (error)
1041 goto error_rcur; 1042 goto error_rcur;
1042 XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); 1043 XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur);
1043 } 1044 }
1044 1045
1045 XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); 1046 XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur);
1046 if (i == 1 && j == 1) { 1047 if (i == 1 && j == 1) {
1047 /* 1048 /*
1048 * Both the left and right records are valid. Choose the closer 1049 * Both the left and right records are valid. Choose the closer
@@ -1095,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino(
1095 error = xfs_inobt_get_rec(cur, rec, &i); 1096 error = xfs_inobt_get_rec(cur, rec, &i);
1096 if (error) 1097 if (error)
1097 return error; 1098 return error;
1098 XFS_WANT_CORRUPTED_RETURN(i == 1); 1099 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1099 return 0; 1100 return 0;
1100 } 1101 }
1101 } 1102 }
@@ -1106,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino(
1106 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 1107 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
1107 if (error) 1108 if (error)
1108 return error; 1109 return error;
1109 XFS_WANT_CORRUPTED_RETURN(i == 1); 1110 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1110 1111
1111 error = xfs_inobt_get_rec(cur, rec, &i); 1112 error = xfs_inobt_get_rec(cur, rec, &i);
1112 if (error) 1113 if (error)
1113 return error; 1114 return error;
1114 XFS_WANT_CORRUPTED_RETURN(i == 1); 1115 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1115 1116
1116 return 0; 1117 return 0;
1117} 1118}
@@ -1133,19 +1134,19 @@ xfs_dialloc_ag_update_inobt(
1133 error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); 1134 error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
1134 if (error) 1135 if (error)
1135 return error; 1136 return error;
1136 XFS_WANT_CORRUPTED_RETURN(i == 1); 1137 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1137 1138
1138 error = xfs_inobt_get_rec(cur, &rec, &i); 1139 error = xfs_inobt_get_rec(cur, &rec, &i);
1139 if (error) 1140 if (error)
1140 return error; 1141 return error;
1141 XFS_WANT_CORRUPTED_RETURN(i == 1); 1142 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1142 ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % 1143 ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
1143 XFS_INODES_PER_CHUNK) == 0); 1144 XFS_INODES_PER_CHUNK) == 0);
1144 1145
1145 rec.ir_free &= ~XFS_INOBT_MASK(offset); 1146 rec.ir_free &= ~XFS_INOBT_MASK(offset);
1146 rec.ir_freecount--; 1147 rec.ir_freecount--;
1147 1148
1148 XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && 1149 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) &&
1149 (rec.ir_freecount == frec->ir_freecount)); 1150 (rec.ir_freecount == frec->ir_freecount));
1150 1151
1151 return xfs_inobt_update(cur, &rec); 1152 return xfs_inobt_update(cur, &rec);
@@ -1340,7 +1341,8 @@ xfs_dialloc(
1340 * inode. 1341 * inode.
1341 */ 1342 */
1342 if (mp->m_maxicount && 1343 if (mp->m_maxicount &&
1343 mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { 1344 percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
1345 mp->m_maxicount) {
1344 noroom = 1; 1346 noroom = 1;
1345 okalloc = 0; 1347 okalloc = 0;
1346 } 1348 }
@@ -1475,14 +1477,14 @@ xfs_difree_inobt(
1475 __func__, error); 1477 __func__, error);
1476 goto error0; 1478 goto error0;
1477 } 1479 }
1478 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1480 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1479 error = xfs_inobt_get_rec(cur, &rec, &i); 1481 error = xfs_inobt_get_rec(cur, &rec, &i);
1480 if (error) { 1482 if (error) {
1481 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", 1483 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1482 __func__, error); 1484 __func__, error);
1483 goto error0; 1485 goto error0;
1484 } 1486 }
1485 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1487 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1486 /* 1488 /*
1487 * Get the offset in the inode chunk. 1489 * Get the offset in the inode chunk.
1488 */ 1490 */
@@ -1592,7 +1594,7 @@ xfs_difree_finobt(
1592 * freed an inode in a previously fully allocated chunk. If not, 1594 * freed an inode in a previously fully allocated chunk. If not,
1593 * something is out of sync. 1595 * something is out of sync.
1594 */ 1596 */
1595 XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); 1597 XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
1596 1598
1597 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, 1599 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
1598 ibtrec->ir_free, &i); 1600 ibtrec->ir_free, &i);
@@ -1613,12 +1615,12 @@ xfs_difree_finobt(
1613 error = xfs_inobt_get_rec(cur, &rec, &i); 1615 error = xfs_inobt_get_rec(cur, &rec, &i);
1614 if (error) 1616 if (error)
1615 goto error; 1617 goto error;
1616 XFS_WANT_CORRUPTED_GOTO(i == 1, error); 1618 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
1617 1619
1618 rec.ir_free |= XFS_INOBT_MASK(offset); 1620 rec.ir_free |= XFS_INOBT_MASK(offset);
1619 rec.ir_freecount++; 1621 rec.ir_freecount++;
1620 1622
1621 XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && 1623 XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) &&
1622 (rec.ir_freecount == ibtrec->ir_freecount), 1624 (rec.ir_freecount == ibtrec->ir_freecount),
1623 error); 1625 error);
1624 1626
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index b0a5fe95a3e2..dc4bfc5d88fc 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -111,14 +111,6 @@ xfs_mount_validate_sb(
111 bool check_inprogress, 111 bool check_inprogress,
112 bool check_version) 112 bool check_version)
113{ 113{
114
115 /*
116 * If the log device and data device have the
117 * same device number, the log is internal.
118 * Consequently, the sb_logstart should be non-zero. If
119 * we have a zero sb_logstart in this case, we may be trying to mount
120 * a volume filesystem in a non-volume manner.
121 */
122 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 114 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
123 xfs_warn(mp, "bad magic number"); 115 xfs_warn(mp, "bad magic number");
124 return -EWRONGFS; 116 return -EWRONGFS;
@@ -743,17 +735,15 @@ xfs_initialize_perag_data(
743 btree += pag->pagf_btreeblks; 735 btree += pag->pagf_btreeblks;
744 xfs_perag_put(pag); 736 xfs_perag_put(pag);
745 } 737 }
746 /* 738
747 * Overwrite incore superblock counters with just-read data 739 /* Overwrite incore superblock counters with just-read data */
748 */
749 spin_lock(&mp->m_sb_lock); 740 spin_lock(&mp->m_sb_lock);
750 sbp->sb_ifree = ifree; 741 sbp->sb_ifree = ifree;
751 sbp->sb_icount = ialloc; 742 sbp->sb_icount = ialloc;
752 sbp->sb_fdblocks = bfree + bfreelst + btree; 743 sbp->sb_fdblocks = bfree + bfreelst + btree;
753 spin_unlock(&mp->m_sb_lock); 744 spin_unlock(&mp->m_sb_lock);
754 745
755 /* Fixup the per-cpu counters as well. */ 746 xfs_reinit_percpu_counters(mp);
756 xfs_icsb_reinit_counters(mp);
757 747
758 return 0; 748 return 0;
759} 749}
@@ -771,6 +761,10 @@ xfs_log_sb(
771 struct xfs_mount *mp = tp->t_mountp; 761 struct xfs_mount *mp = tp->t_mountp;
772 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); 762 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
773 763
764 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
765 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
766 mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
767
774 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); 768 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
775 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); 769 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
776 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); 770 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1b8704..a56960dd1684 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
31#include "xfs_bmap.h" 31#include "xfs_bmap.h"
32#include "xfs_bmap_util.h" 32#include "xfs_bmap_util.h"
33#include "xfs_bmap_btree.h" 33#include "xfs_bmap_btree.h"
34#include <linux/aio.h>
35#include <linux/gfp.h> 34#include <linux/gfp.h>
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/pagevec.h> 36#include <linux/pagevec.h>
@@ -1233,6 +1232,117 @@ xfs_vm_releasepage(
1233 return try_to_free_buffers(page); 1232 return try_to_free_buffers(page);
1234} 1233}
1235 1234
1235/*
1236 * When we map a DIO buffer, we may need to attach an ioend that describes the
1237 * type of write IO we are doing. This passes to the completion function the
1238 * operations it needs to perform. If the mapping is for an overwrite wholly
1239 * within the EOF then we don't need an ioend and so we don't allocate one.
1240 * This avoids the unnecessary overhead of allocating and freeing ioends for
1241 * workloads that don't require transactions on IO completion.
1242 *
1243 * If we get multiple mappings in a single IO, we might be mapping different
1244 * types. But because the direct IO can only have a single private pointer, we
1245 * need to ensure that:
1246 *
1247 * a) i) the ioend spans the entire region of unwritten mappings; or
1248 * ii) the ioend spans all the mappings that cross or are beyond EOF; and
1249 * b) if it contains unwritten extents, it is *permanently* marked as such
1250 *
1251 * We could do this by chaining ioends like buffered IO does, but we only
1252 * actually get one IO completion callback from the direct IO, and that spans
1253 * the entire IO regardless of how many mappings and IOs are needed to complete
1254 * the DIO. There is only going to be one reference to the ioend and its life
1255 * cycle is constrained by the DIO completion code. hence we don't need
1256 * reference counting here.
1257 */
1258static void
1259xfs_map_direct(
1260 struct inode *inode,
1261 struct buffer_head *bh_result,
1262 struct xfs_bmbt_irec *imap,
1263 xfs_off_t offset)
1264{
1265 struct xfs_ioend *ioend;
1266 xfs_off_t size = bh_result->b_size;
1267 int type;
1268
1269 if (ISUNWRITTEN(imap))
1270 type = XFS_IO_UNWRITTEN;
1271 else
1272 type = XFS_IO_OVERWRITE;
1273
1274 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
1275
1276 if (bh_result->b_private) {
1277 ioend = bh_result->b_private;
1278 ASSERT(ioend->io_size > 0);
1279 ASSERT(offset >= ioend->io_offset);
1280 if (offset + size > ioend->io_offset + ioend->io_size)
1281 ioend->io_size = offset - ioend->io_offset + size;
1282
1283 if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
1284 ioend->io_type = XFS_IO_UNWRITTEN;
1285
1286 trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
1287 ioend->io_size, ioend->io_type,
1288 imap);
1289 } else if (type == XFS_IO_UNWRITTEN ||
1290 offset + size > i_size_read(inode)) {
1291 ioend = xfs_alloc_ioend(inode, type);
1292 ioend->io_offset = offset;
1293 ioend->io_size = size;
1294
1295 bh_result->b_private = ioend;
1296 set_buffer_defer_completion(bh_result);
1297
1298 trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
1299 imap);
1300 } else {
1301 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1302 imap);
1303 }
1304}
1305
1306/*
1307 * If this is O_DIRECT or the mpage code calling tell them how large the mapping
1308 * is, so that we can avoid repeated get_blocks calls.
1309 *
1310 * If the mapping spans EOF, then we have to break the mapping up as the mapping
1311 * for blocks beyond EOF must be marked new so that sub block regions can be
1312 * correctly zeroed. We can't do this for mappings within EOF unless the mapping
1313 * was just allocated or is unwritten, otherwise the callers would overwrite
1314 * existing data with zeros. Hence we have to split the mapping into a range up
1315 * to and including EOF, and a second mapping for beyond EOF.
1316 */
1317static void
1318xfs_map_trim_size(
1319 struct inode *inode,
1320 sector_t iblock,
1321 struct buffer_head *bh_result,
1322 struct xfs_bmbt_irec *imap,
1323 xfs_off_t offset,
1324 ssize_t size)
1325{
1326 xfs_off_t mapping_size;
1327
1328 mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
1329 mapping_size <<= inode->i_blkbits;
1330
1331 ASSERT(mapping_size > 0);
1332 if (mapping_size > size)
1333 mapping_size = size;
1334 if (offset < i_size_read(inode) &&
1335 offset + mapping_size >= i_size_read(inode)) {
1336 /* limit mapping to block that spans EOF */
1337 mapping_size = roundup_64(i_size_read(inode) - offset,
1338 1 << inode->i_blkbits);
1339 }
1340 if (mapping_size > LONG_MAX)
1341 mapping_size = LONG_MAX;
1342
1343 bh_result->b_size = mapping_size;
1344}
1345
1236STATIC int 1346STATIC int
1237__xfs_get_blocks( 1347__xfs_get_blocks(
1238 struct inode *inode, 1348 struct inode *inode,
@@ -1321,31 +1431,37 @@ __xfs_get_blocks(
1321 1431
1322 xfs_iunlock(ip, lockmode); 1432 xfs_iunlock(ip, lockmode);
1323 } 1433 }
1324 1434 trace_xfs_get_blocks_alloc(ip, offset, size,
1325 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); 1435 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1436 : XFS_IO_DELALLOC, &imap);
1326 } else if (nimaps) { 1437 } else if (nimaps) {
1327 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); 1438 trace_xfs_get_blocks_found(ip, offset, size,
1439 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1440 : XFS_IO_OVERWRITE, &imap);
1328 xfs_iunlock(ip, lockmode); 1441 xfs_iunlock(ip, lockmode);
1329 } else { 1442 } else {
1330 trace_xfs_get_blocks_notfound(ip, offset, size); 1443 trace_xfs_get_blocks_notfound(ip, offset, size);
1331 goto out_unlock; 1444 goto out_unlock;
1332 } 1445 }
1333 1446
1447 /* trim mapping down to size requested */
1448 if (direct || size > (1 << inode->i_blkbits))
1449 xfs_map_trim_size(inode, iblock, bh_result,
1450 &imap, offset, size);
1451
1452 /*
1453 * For unwritten extents do not report a disk address in the buffered
1454 * read case (treat as if we're reading into a hole).
1455 */
1334 if (imap.br_startblock != HOLESTARTBLOCK && 1456 if (imap.br_startblock != HOLESTARTBLOCK &&
1335 imap.br_startblock != DELAYSTARTBLOCK) { 1457 imap.br_startblock != DELAYSTARTBLOCK &&
1336 /* 1458 (create || !ISUNWRITTEN(&imap))) {
1337 * For unwritten extents do not report a disk address on 1459 xfs_map_buffer(inode, bh_result, &imap, offset);
1338 * the read case (treat as if we're reading into a hole). 1460 if (ISUNWRITTEN(&imap))
1339 */
1340 if (create || !ISUNWRITTEN(&imap))
1341 xfs_map_buffer(inode, bh_result, &imap, offset);
1342 if (create && ISUNWRITTEN(&imap)) {
1343 if (direct) {
1344 bh_result->b_private = inode;
1345 set_buffer_defer_completion(bh_result);
1346 }
1347 set_buffer_unwritten(bh_result); 1461 set_buffer_unwritten(bh_result);
1348 } 1462 /* direct IO needs special help */
1463 if (create && direct)
1464 xfs_map_direct(inode, bh_result, &imap, offset);
1349 } 1465 }
1350 1466
1351 /* 1467 /*
@@ -1378,39 +1494,6 @@ __xfs_get_blocks(
1378 } 1494 }
1379 } 1495 }
1380 1496
1381 /*
1382 * If this is O_DIRECT or the mpage code calling tell them how large
1383 * the mapping is, so that we can avoid repeated get_blocks calls.
1384 *
1385 * If the mapping spans EOF, then we have to break the mapping up as the
1386 * mapping for blocks beyond EOF must be marked new so that sub block
1387 * regions can be correctly zeroed. We can't do this for mappings within
1388 * EOF unless the mapping was just allocated or is unwritten, otherwise
1389 * the callers would overwrite existing data with zeros. Hence we have
1390 * to split the mapping into a range up to and including EOF, and a
1391 * second mapping for beyond EOF.
1392 */
1393 if (direct || size > (1 << inode->i_blkbits)) {
1394 xfs_off_t mapping_size;
1395
1396 mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
1397 mapping_size <<= inode->i_blkbits;
1398
1399 ASSERT(mapping_size > 0);
1400 if (mapping_size > size)
1401 mapping_size = size;
1402 if (offset < i_size_read(inode) &&
1403 offset + mapping_size >= i_size_read(inode)) {
1404 /* limit mapping to block that spans EOF */
1405 mapping_size = roundup_64(i_size_read(inode) - offset,
1406 1 << inode->i_blkbits);
1407 }
1408 if (mapping_size > LONG_MAX)
1409 mapping_size = LONG_MAX;
1410
1411 bh_result->b_size = mapping_size;
1412 }
1413
1414 return 0; 1497 return 0;
1415 1498
1416out_unlock: 1499out_unlock:
@@ -1441,9 +1524,11 @@ xfs_get_blocks_direct(
1441/* 1524/*
1442 * Complete a direct I/O write request. 1525 * Complete a direct I/O write request.
1443 * 1526 *
1444 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1527 * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
1445 * need to issue a transaction to convert the range from unwritten to written 1528 * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
1446 * extents. 1529 * wholly within the EOF and so there is nothing for us to do. Note that in this
1530 * case the completion can be called in interrupt context, whereas if we have an
1531 * ioend we will always be called in task context (i.e. from a workqueue).
1447 */ 1532 */
1448STATIC void 1533STATIC void
1449xfs_end_io_direct_write( 1534xfs_end_io_direct_write(
@@ -1455,48 +1540,75 @@ xfs_end_io_direct_write(
1455 struct inode *inode = file_inode(iocb->ki_filp); 1540 struct inode *inode = file_inode(iocb->ki_filp);
1456 struct xfs_inode *ip = XFS_I(inode); 1541 struct xfs_inode *ip = XFS_I(inode);
1457 struct xfs_mount *mp = ip->i_mount; 1542 struct xfs_mount *mp = ip->i_mount;
1543 struct xfs_ioend *ioend = private;
1458 1544
1459 if (XFS_FORCED_SHUTDOWN(mp)) 1545 trace_xfs_gbmap_direct_endio(ip, offset, size,
1546 ioend ? ioend->io_type : 0, NULL);
1547
1548 if (!ioend) {
1549 ASSERT(offset + size <= i_size_read(inode));
1460 return; 1550 return;
1551 }
1552
1553 if (XFS_FORCED_SHUTDOWN(mp))
1554 goto out_end_io;
1461 1555
1462 /* 1556 /*
1463 * While the generic direct I/O code updates the inode size, it does 1557 * dio completion end_io functions are only called on writes if more
1464 * so only after the end_io handler is called, which means our 1558 * than 0 bytes was written.
1465 * end_io handler thinks the on-disk size is outside the in-core
1466 * size. To prevent this just update it a little bit earlier here.
1467 */ 1559 */
1560 ASSERT(size > 0);
1561
1562 /*
1563 * The ioend only maps whole blocks, while the IO may be sector aligned.
1564 * Hence the ioend offset/size may not match the IO offset/size exactly.
1565 * Because we don't map overwrites within EOF into the ioend, the offset
1566 * may not match, but only if the endio spans EOF. Either way, write
1567 * the IO sizes into the ioend so that completion processing does the
1568 * right thing.
1569 */
1570 ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
1571 ioend->io_size = size;
1572 ioend->io_offset = offset;
1573
1574 /*
1575 * The ioend tells us whether we are doing unwritten extent conversion
1576 * or an append transaction that updates the on-disk file size. These
1577 * cases are the only cases where we should *potentially* be needing
1578 * to update the VFS inode size.
1579 *
1580 * We need to update the in-core inode size here so that we don't end up
1581 * with the on-disk inode size being outside the in-core inode size. We
1582 * have no other method of updating EOF for AIO, so always do it here
1583 * if necessary.
1584 *
1585 * We need to lock the test/set EOF update as we can be racing with
1586 * other IO completions here to update the EOF. Failing to serialise
1587 * here can result in EOF moving backwards and Bad Things Happen when
1588 * that occurs.
1589 */
1590 spin_lock(&ip->i_flags_lock);
1468 if (offset + size > i_size_read(inode)) 1591 if (offset + size > i_size_read(inode))
1469 i_size_write(inode, offset + size); 1592 i_size_write(inode, offset + size);
1593 spin_unlock(&ip->i_flags_lock);
1470 1594
1471 /* 1595 /*
1472 * For direct I/O we do not know if we need to allocate blocks or not, 1596 * If we are doing an append IO that needs to update the EOF on disk,
1473 * so we can't preallocate an append transaction, as that results in 1597 * do the transaction reserve now so we can use common end io
1474 * nested reservations and log space deadlocks. Hence allocate the 1598 * processing. Stashing the error (if there is one) in the ioend will
1475 * transaction here. While this is sub-optimal and can block IO 1599 * result in the ioend processing passing on the error if it is
1476 * completion for some time, we're stuck with doing it this way until 1600 * possible as we can't return it from here.
1477 * we can pass the ioend to the direct IO allocation callbacks and
1478 * avoid nesting that way.
1479 */ 1601 */
1480 if (private && size > 0) { 1602 if (ioend->io_type == XFS_IO_OVERWRITE)
1481 xfs_iomap_write_unwritten(ip, offset, size); 1603 ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
1482 } else if (offset + size > ip->i_d.di_size) {
1483 struct xfs_trans *tp;
1484 int error;
1485
1486 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1487 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1488 if (error) {
1489 xfs_trans_cancel(tp, 0);
1490 return;
1491 }
1492 1604
1493 xfs_setfilesize(ip, tp, offset, size); 1605out_end_io:
1494 } 1606 xfs_end_io(&ioend->io_work);
1607 return;
1495} 1608}
1496 1609
1497STATIC ssize_t 1610STATIC ssize_t
1498xfs_vm_direct_IO( 1611xfs_vm_direct_IO(
1499 int rw,
1500 struct kiocb *iocb, 1612 struct kiocb *iocb,
1501 struct iov_iter *iter, 1613 struct iov_iter *iter,
1502 loff_t offset) 1614 loff_t offset)
@@ -1504,15 +1616,14 @@ xfs_vm_direct_IO(
1504 struct inode *inode = iocb->ki_filp->f_mapping->host; 1616 struct inode *inode = iocb->ki_filp->f_mapping->host;
1505 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1617 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1506 1618
1507 if (rw & WRITE) { 1619 if (iov_iter_rw(iter) == WRITE) {
1508 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, 1620 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1509 offset, xfs_get_blocks_direct, 1621 xfs_get_blocks_direct,
1510 xfs_end_io_direct_write, NULL, 1622 xfs_end_io_direct_write, NULL,
1511 DIO_ASYNC_EXTEND); 1623 DIO_ASYNC_EXTEND);
1512 } 1624 }
1513 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, 1625 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1514 offset, xfs_get_blocks_direct, 1626 xfs_get_blocks_direct, NULL, NULL, 0);
1515 NULL, NULL, 0);
1516} 1627}
1517 1628
1518/* 1629/*
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 83af4c149635..f9c1c64782d3 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -132,9 +132,10 @@ xfs_attr3_leaf_inactive(
132 int size; 132 int size;
133 int tmp; 133 int tmp;
134 int i; 134 int i;
135 struct xfs_mount *mp = bp->b_target->bt_mount;
135 136
136 leaf = bp->b_addr; 137 leaf = bp->b_addr;
137 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 138 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
138 139
139 /* 140 /*
140 * Count the number of "remote" value extents. 141 * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index a43d370d2c58..65fb37a18e92 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -225,6 +225,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
225 int error, i; 225 int error, i;
226 struct xfs_buf *bp; 226 struct xfs_buf *bp;
227 struct xfs_inode *dp = context->dp; 227 struct xfs_inode *dp = context->dp;
228 struct xfs_mount *mp = dp->i_mount;
228 229
229 trace_xfs_attr_node_list(context); 230 trace_xfs_attr_node_list(context);
230 231
@@ -256,7 +257,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
256 case XFS_ATTR_LEAF_MAGIC: 257 case XFS_ATTR_LEAF_MAGIC:
257 case XFS_ATTR3_LEAF_MAGIC: 258 case XFS_ATTR3_LEAF_MAGIC:
258 leaf = bp->b_addr; 259 leaf = bp->b_addr;
259 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); 260 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo,
261 &leafhdr, leaf);
260 entries = xfs_attr3_leaf_entryp(leaf); 262 entries = xfs_attr3_leaf_entryp(leaf);
261 if (cursor->hashval > be32_to_cpu( 263 if (cursor->hashval > be32_to_cpu(
262 entries[leafhdr.count - 1].hashval)) { 264 entries[leafhdr.count - 1].hashval)) {
@@ -340,7 +342,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
340 xfs_trans_brelse(NULL, bp); 342 xfs_trans_brelse(NULL, bp);
341 return error; 343 return error;
342 } 344 }
343 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); 345 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
344 if (context->seen_enough || leafhdr.forw == 0) 346 if (context->seen_enough || leafhdr.forw == 0)
345 break; 347 break;
346 cursor->blkno = leafhdr.forw; 348 cursor->blkno = leafhdr.forw;
@@ -368,11 +370,12 @@ xfs_attr3_leaf_list_int(
368 struct xfs_attr_leaf_entry *entry; 370 struct xfs_attr_leaf_entry *entry;
369 int retval; 371 int retval;
370 int i; 372 int i;
373 struct xfs_mount *mp = context->dp->i_mount;
371 374
372 trace_xfs_attr_list_leaf(context); 375 trace_xfs_attr_list_leaf(context);
373 376
374 leaf = bp->b_addr; 377 leaf = bp->b_addr;
375 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 378 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
376 entries = xfs_attr3_leaf_entryp(leaf); 379 entries = xfs_attr3_leaf_entryp(leaf);
377 380
378 cursor = context->cursor; 381 cursor = context->cursor;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 22a5dcb70b32..a52bbd3abc7d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1376,22 +1376,19 @@ out:
1376} 1376}
1377 1377
1378/* 1378/*
1379 * xfs_collapse_file_space() 1379 * @next_fsb will keep track of the extent currently undergoing shift.
1380 * This routine frees disk space and shift extent for the given file. 1380 * @stop_fsb will keep track of the extent at which we have to stop.
1381 * The first thing we do is to free data blocks in the specified range 1381 * If we are shifting left, we will start with block (offset + len) and
1382 * by calling xfs_free_file_space(). It would also sync dirty data 1382 * shift each extent till last extent.
1383 * and invalidate page cache over the region on which collapse range 1383 * If we are shifting right, we will start with last extent inside file space
1384 * is working. And Shift extent records to the left to cover a hole. 1384 * and continue until we reach the block corresponding to offset.
1385 * RETURNS:
1386 * 0 on success
1387 * errno on error
1388 *
1389 */ 1385 */
1390int 1386static int
1391xfs_collapse_file_space( 1387xfs_shift_file_space(
1392 struct xfs_inode *ip, 1388 struct xfs_inode *ip,
1393 xfs_off_t offset, 1389 xfs_off_t offset,
1394 xfs_off_t len) 1390 xfs_off_t len,
1391 enum shift_direction direction)
1395{ 1392{
1396 int done = 0; 1393 int done = 0;
1397 struct xfs_mount *mp = ip->i_mount; 1394 struct xfs_mount *mp = ip->i_mount;
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
1400 struct xfs_bmap_free free_list; 1397 struct xfs_bmap_free free_list;
1401 xfs_fsblock_t first_block; 1398 xfs_fsblock_t first_block;
1402 int committed; 1399 int committed;
1403 xfs_fileoff_t start_fsb; 1400 xfs_fileoff_t stop_fsb;
1404 xfs_fileoff_t next_fsb; 1401 xfs_fileoff_t next_fsb;
1405 xfs_fileoff_t shift_fsb; 1402 xfs_fileoff_t shift_fsb;
1406 1403
1407 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1404 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
1408 1405
1409 trace_xfs_collapse_file_space(ip); 1406 if (direction == SHIFT_LEFT) {
1407 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1408 stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1409 } else {
1410 /*
1411 * If right shift, delegate the work of initialization of
1412 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
1413 */
1414 next_fsb = NULLFSBLOCK;
1415 stop_fsb = XFS_B_TO_FSB(mp, offset);
1416 }
1410 1417
1411 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1412 shift_fsb = XFS_B_TO_FSB(mp, len); 1418 shift_fsb = XFS_B_TO_FSB(mp, len);
1413 1419
1414 error = xfs_free_file_space(ip, offset, len);
1415 if (error)
1416 return error;
1417
1418 /* 1420 /*
1419 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1421 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1420 * into the accessible region of the file. 1422 * into the accessible region of the file.
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
1427 1429
1428 /* 1430 /*
1429 * Writeback and invalidate cache for the remainder of the file as we're 1431 * Writeback and invalidate cache for the remainder of the file as we're
1430 * about to shift down every extent from the collapse range to EOF. The 1432 * about to shift down every extent from offset to EOF.
1431 * free of the collapse range above might have already done some of
1432 * this, but we shouldn't rely on it to do anything outside of the range
1433 * that was freed.
1434 */ 1433 */
1435 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1434 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1436 offset + len, -1); 1435 offset, -1);
1437 if (error) 1436 if (error)
1438 return error; 1437 return error;
1439 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1438 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
1440 (offset + len) >> PAGE_CACHE_SHIFT, -1); 1439 offset >> PAGE_CACHE_SHIFT, -1);
1441 if (error) 1440 if (error)
1442 return error; 1441 return error;
1443 1442
1443 /*
1444 * The extent shiting code works on extent granularity. So, if
1445 * stop_fsb is not the starting block of extent, we need to split
1446 * the extent at stop_fsb.
1447 */
1448 if (direction == SHIFT_RIGHT) {
1449 error = xfs_bmap_split_extent(ip, stop_fsb);
1450 if (error)
1451 return error;
1452 }
1453
1444 while (!error && !done) { 1454 while (!error && !done) {
1445 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1455 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1446 /* 1456 /*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
1464 if (error) 1474 if (error)
1465 goto out; 1475 goto out;
1466 1476
1467 xfs_trans_ijoin(tp, ip, 0); 1477 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1468 1478
1469 xfs_bmap_init(&free_list, &first_block); 1479 xfs_bmap_init(&free_list, &first_block);
1470 1480
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
1472 * We are using the write transaction in which max 2 bmbt 1482 * We are using the write transaction in which max 2 bmbt
1473 * updates are allowed 1483 * updates are allowed
1474 */ 1484 */
1475 start_fsb = next_fsb; 1485 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1476 error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, 1486 &done, stop_fsb, &first_block, &free_list,
1477 &done, &next_fsb, &first_block, &free_list, 1487 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1478 XFS_BMAP_MAX_SHIFT_EXTENTS);
1479 if (error) 1488 if (error)
1480 goto out; 1489 goto out;
1481 1490
@@ -1484,18 +1493,70 @@ xfs_collapse_file_space(
1484 goto out; 1493 goto out;
1485 1494
1486 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1495 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1487 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1488 } 1496 }
1489 1497
1490 return error; 1498 return error;
1491 1499
1492out: 1500out:
1493 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1501 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1494 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1495 return error; 1502 return error;
1496} 1503}
1497 1504
1498/* 1505/*
1506 * xfs_collapse_file_space()
1507 * This routine frees disk space and shift extent for the given file.
1508 * The first thing we do is to free data blocks in the specified range
1509 * by calling xfs_free_file_space(). It would also sync dirty data
1510 * and invalidate page cache over the region on which collapse range
1511 * is working. And Shift extent records to the left to cover a hole.
1512 * RETURNS:
1513 * 0 on success
1514 * errno on error
1515 *
1516 */
1517int
1518xfs_collapse_file_space(
1519 struct xfs_inode *ip,
1520 xfs_off_t offset,
1521 xfs_off_t len)
1522{
1523 int error;
1524
1525 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1526 trace_xfs_collapse_file_space(ip);
1527
1528 error = xfs_free_file_space(ip, offset, len);
1529 if (error)
1530 return error;
1531
1532 return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
1533}
1534
1535/*
1536 * xfs_insert_file_space()
1537 * This routine create hole space by shifting extents for the given file.
1538 * The first thing we do is to sync dirty data and invalidate page cache
1539 * over the region on which insert range is working. And split an extent
1540 * to two extents at given offset by calling xfs_bmap_split_extent.
1541 * And shift all extent records which are laying between [offset,
1542 * last allocated extent] to the right to reserve hole range.
1543 * RETURNS:
1544 * 0 on success
1545 * errno on error
1546 */
1547int
1548xfs_insert_file_space(
1549 struct xfs_inode *ip,
1550 loff_t offset,
1551 loff_t len)
1552{
1553 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1554 trace_xfs_insert_file_space(ip);
1555
1556 return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
1557}
1558
1559/*
1499 * We need to check that the format of the data fork in the temporary inode is 1560 * We need to check that the format of the data fork in the temporary inode is
1500 * valid for the target inode before doing the swap. This is not a problem with 1561 * valid for the target inode before doing the swap. This is not a problem with
1501 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1562 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
@@ -1599,13 +1660,6 @@ xfs_swap_extent_flush(
1599 /* Verify O_DIRECT for ftmp */ 1660 /* Verify O_DIRECT for ftmp */
1600 if (VFS_I(ip)->i_mapping->nrpages) 1661 if (VFS_I(ip)->i_mapping->nrpages)
1601 return -EINVAL; 1662 return -EINVAL;
1602
1603 /*
1604 * Don't try to swap extents on mmap()d files because we can't lock
1605 * out races against page faults safely.
1606 */
1607 if (mapping_mapped(VFS_I(ip)->i_mapping))
1608 return -EBUSY;
1609 return 0; 1663 return 0;
1610} 1664}
1611 1665
@@ -1633,13 +1687,14 @@ xfs_swap_extents(
1633 } 1687 }
1634 1688
1635 /* 1689 /*
1636 * Lock up the inodes against other IO and truncate to begin with. 1690 * Lock the inodes against other IO, page faults and truncate to
1637 * Then we can ensure the inodes are flushed and have no page cache 1691 * begin with. Then we can ensure the inodes are flushed and have no
1638 * safely. Once we have done this we can take the ilocks and do the rest 1692 * page cache safely. Once we have done this we can take the ilocks and
1639 * of the checks. 1693 * do the rest of the checks.
1640 */ 1694 */
1641 lock_flags = XFS_IOLOCK_EXCL; 1695 lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1642 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1696 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
1697 xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
1643 1698
1644 /* Verify that both files have the same format */ 1699 /* Verify that both files have the same format */
1645 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 1700 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1666,8 +1721,16 @@ xfs_swap_extents(
1666 xfs_trans_cancel(tp, 0); 1721 xfs_trans_cancel(tp, 0);
1667 goto out_unlock; 1722 goto out_unlock;
1668 } 1723 }
1724
1725 /*
1726 * Lock and join the inodes to the tansaction so that transaction commit
1727 * or cancel will unlock the inodes from this point onwards.
1728 */
1669 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1729 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
1670 lock_flags |= XFS_ILOCK_EXCL; 1730 lock_flags |= XFS_ILOCK_EXCL;
1731 xfs_trans_ijoin(tp, ip, lock_flags);
1732 xfs_trans_ijoin(tp, tip, lock_flags);
1733
1671 1734
1672 /* Verify all data are being swapped */ 1735 /* Verify all data are being swapped */
1673 if (sxp->sx_offset != 0 || 1736 if (sxp->sx_offset != 0 ||
@@ -1720,9 +1783,6 @@ xfs_swap_extents(
1720 goto out_trans_cancel; 1783 goto out_trans_cancel;
1721 } 1784 }
1722 1785
1723 xfs_trans_ijoin(tp, ip, lock_flags);
1724 xfs_trans_ijoin(tp, tip, lock_flags);
1725
1726 /* 1786 /*
1727 * Before we've swapped the forks, lets set the owners of the forks 1787 * Before we've swapped the forks, lets set the owners of the forks
1728 * appropriately. We have to do this as we are demand paging the btree 1788 * appropriately. We have to do this as we are demand paging the btree
@@ -1856,5 +1916,5 @@ out_unlock:
1856 1916
1857out_trans_cancel: 1917out_trans_cancel:
1858 xfs_trans_cancel(tp, 0); 1918 xfs_trans_cancel(tp, 0);
1859 goto out_unlock; 1919 goto out;
1860} 1920}
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 736429a72a12..af97d9a1dfb4 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,6 +63,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
63 xfs_off_t len); 63 xfs_off_t len);
64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, 64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
65 xfs_off_t len); 65 xfs_off_t len);
66int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
67 xfs_off_t len);
66 68
67/* EOF block manipulation functions */ 69/* EOF block manipulation functions */
68bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 70bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 507d96a57ac7..092d652bc03d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -537,9 +537,9 @@ xfs_buf_item_push(
537 537
538 /* has a previous flush failed due to IO errors? */ 538 /* has a previous flush failed due to IO errors? */
539 if ((bp->b_flags & XBF_WRITE_FAIL) && 539 if ((bp->b_flags & XBF_WRITE_FAIL) &&
540 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { 540 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
541 xfs_warn(bp->b_target->bt_mount, 541 xfs_warn(bp->b_target->bt_mount,
542"Detected failing async write on buffer block 0x%llx. Retrying async write.", 542"Failing async write on buffer block 0x%llx. Retrying async write.",
543 (long long)bp->b_bn); 543 (long long)bp->b_bn);
544 } 544 }
545 545
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 799e5a2d334d..e85a9519a5ae 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -84,7 +84,7 @@ xfs_trim_extents(
84 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); 84 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
85 if (error) 85 if (error)
86 goto out_del_cursor; 86 goto out_del_cursor;
87 XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); 87 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor);
88 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); 88 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
89 89
90 /* 90 /*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 3ee186ac1093..338e50bbfd1e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -131,7 +131,7 @@ xfs_error_report(
131{ 131{
132 if (level <= xfs_error_level) { 132 if (level <= xfs_error_level) {
133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
134 "Internal error %s at line %d of file %s. Caller %pF", 134 "Internal error %s at line %d of file %s. Caller %pS",
135 tag, linenum, filename, ra); 135 tag, linenum, filename, ra);
136 136
137 xfs_stack_trace(); 137 xfs_stack_trace();
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 279a76e52791..c0394ed126fc 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
40/* 40/*
41 * Macros to set EFSCORRUPTED & return/branch. 41 * Macros to set EFSCORRUPTED & return/branch.
42 */ 42 */
43#define XFS_WANT_CORRUPTED_GOTO(x,l) \ 43#define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \
44 { \ 44 { \
45 int fs_is_ok = (x); \ 45 int fs_is_ok = (x); \
46 ASSERT(fs_is_ok); \ 46 ASSERT(fs_is_ok); \
47 if (unlikely(!fs_is_ok)) { \ 47 if (unlikely(!fs_is_ok)) { \
48 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ 48 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
49 XFS_ERRLEVEL_LOW, NULL); \ 49 XFS_ERRLEVEL_LOW, mp); \
50 error = -EFSCORRUPTED; \ 50 error = -EFSCORRUPTED; \
51 goto l; \ 51 goto l; \
52 } \ 52 } \
53 } 53 }
54 54
55#define XFS_WANT_CORRUPTED_RETURN(x) \ 55#define XFS_WANT_CORRUPTED_RETURN(mp, x) \
56 { \ 56 { \
57 int fs_is_ok = (x); \ 57 int fs_is_ok = (x); \
58 ASSERT(fs_is_ok); \ 58 ASSERT(fs_is_ok); \
59 if (unlikely(!fs_is_ok)) { \ 59 if (unlikely(!fs_is_ok)) { \
60 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ 60 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
61 XFS_ERRLEVEL_LOW, NULL); \ 61 XFS_ERRLEVEL_LOW, mp); \
62 return -EFSCORRUPTED; \ 62 return -EFSCORRUPTED; \
63 } \ 63 } \
64 } 64 }
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d12fb44..8121e75352ee 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
38#include "xfs_icache.h" 38#include "xfs_icache.h"
39#include "xfs_pnfs.h" 39#include "xfs_pnfs.h"
40 40
41#include <linux/aio.h>
42#include <linux/dcache.h> 41#include <linux/dcache.h>
43#include <linux/falloc.h> 42#include <linux/falloc.h>
44#include <linux/pagevec.h> 43#include <linux/pagevec.h>
@@ -280,7 +279,7 @@ xfs_file_read_iter(
280 279
281 XFS_STATS_INC(xs_read_calls); 280 XFS_STATS_INC(xs_read_calls);
282 281
283 if (unlikely(file->f_flags & O_DIRECT)) 282 if (unlikely(iocb->ki_flags & IOCB_DIRECT))
284 ioflags |= XFS_IO_ISDIRECT; 283 ioflags |= XFS_IO_ISDIRECT;
285 if (file->f_mode & FMODE_NOCMTIME) 284 if (file->f_mode & FMODE_NOCMTIME)
286 ioflags |= XFS_IO_INVIS; 285 ioflags |= XFS_IO_INVIS;
@@ -397,7 +396,8 @@ STATIC int /* error (positive) */
397xfs_zero_last_block( 396xfs_zero_last_block(
398 struct xfs_inode *ip, 397 struct xfs_inode *ip,
399 xfs_fsize_t offset, 398 xfs_fsize_t offset,
400 xfs_fsize_t isize) 399 xfs_fsize_t isize,
400 bool *did_zeroing)
401{ 401{
402 struct xfs_mount *mp = ip->i_mount; 402 struct xfs_mount *mp = ip->i_mount;
403 xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); 403 xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +425,7 @@ xfs_zero_last_block(
425 zero_len = mp->m_sb.sb_blocksize - zero_offset; 425 zero_len = mp->m_sb.sb_blocksize - zero_offset;
426 if (isize + zero_len > offset) 426 if (isize + zero_len > offset)
427 zero_len = offset - isize; 427 zero_len = offset - isize;
428 *did_zeroing = true;
428 return xfs_iozero(ip, isize, zero_len); 429 return xfs_iozero(ip, isize, zero_len);
429} 430}
430 431
@@ -443,7 +444,8 @@ int /* error (positive) */
443xfs_zero_eof( 444xfs_zero_eof(
444 struct xfs_inode *ip, 445 struct xfs_inode *ip,
445 xfs_off_t offset, /* starting I/O offset */ 446 xfs_off_t offset, /* starting I/O offset */
446 xfs_fsize_t isize) /* current inode size */ 447 xfs_fsize_t isize, /* current inode size */
448 bool *did_zeroing)
447{ 449{
448 struct xfs_mount *mp = ip->i_mount; 450 struct xfs_mount *mp = ip->i_mount;
449 xfs_fileoff_t start_zero_fsb; 451 xfs_fileoff_t start_zero_fsb;
@@ -465,7 +467,7 @@ xfs_zero_eof(
465 * We only zero a part of that block so it is handled specially. 467 * We only zero a part of that block so it is handled specially.
466 */ 468 */
467 if (XFS_B_FSB_OFFSET(mp, isize) != 0) { 469 if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
468 error = xfs_zero_last_block(ip, offset, isize); 470 error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
469 if (error) 471 if (error)
470 return error; 472 return error;
471 } 473 }
@@ -525,6 +527,7 @@ xfs_zero_eof(
525 if (error) 527 if (error)
526 return error; 528 return error;
527 529
530 *did_zeroing = true;
528 start_zero_fsb = imap.br_startoff + imap.br_blockcount; 531 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
529 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 532 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
530 } 533 }
@@ -541,21 +544,22 @@ xfs_zero_eof(
541 */ 544 */
542STATIC ssize_t 545STATIC ssize_t
543xfs_file_aio_write_checks( 546xfs_file_aio_write_checks(
544 struct file *file, 547 struct kiocb *iocb,
545 loff_t *pos, 548 struct iov_iter *from,
546 size_t *count,
547 int *iolock) 549 int *iolock)
548{ 550{
551 struct file *file = iocb->ki_filp;
549 struct inode *inode = file->f_mapping->host; 552 struct inode *inode = file->f_mapping->host;
550 struct xfs_inode *ip = XFS_I(inode); 553 struct xfs_inode *ip = XFS_I(inode);
551 int error = 0; 554 ssize_t error = 0;
555 size_t count = iov_iter_count(from);
552 556
553restart: 557restart:
554 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); 558 error = generic_write_checks(iocb, from);
555 if (error) 559 if (error <= 0)
556 return error; 560 return error;
557 561
558 error = xfs_break_layouts(inode, iolock); 562 error = xfs_break_layouts(inode, iolock, true);
559 if (error) 563 if (error)
560 return error; 564 return error;
561 565
@@ -565,18 +569,42 @@ restart:
565 * write. If zeroing is needed and we are currently holding the 569 * write. If zeroing is needed and we are currently holding the
566 * iolock shared, we need to update it to exclusive which implies 570 * iolock shared, we need to update it to exclusive which implies
567 * having to redo all checks before. 571 * having to redo all checks before.
572 *
573 * We need to serialise against EOF updates that occur in IO
574 * completions here. We want to make sure that nobody is changing the
575 * size while we do this check until we have placed an IO barrier (i.e.
576 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
577 * The spinlock effectively forms a memory barrier once we have the
578 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
579 * and hence be able to correctly determine if we need to run zeroing.
568 */ 580 */
569 if (*pos > i_size_read(inode)) { 581 spin_lock(&ip->i_flags_lock);
582 if (iocb->ki_pos > i_size_read(inode)) {
583 bool zero = false;
584
585 spin_unlock(&ip->i_flags_lock);
570 if (*iolock == XFS_IOLOCK_SHARED) { 586 if (*iolock == XFS_IOLOCK_SHARED) {
571 xfs_rw_iunlock(ip, *iolock); 587 xfs_rw_iunlock(ip, *iolock);
572 *iolock = XFS_IOLOCK_EXCL; 588 *iolock = XFS_IOLOCK_EXCL;
573 xfs_rw_ilock(ip, *iolock); 589 xfs_rw_ilock(ip, *iolock);
590 iov_iter_reexpand(from, count);
591
592 /*
593 * We now have an IO submission barrier in place, but
594 * AIO can do EOF updates during IO completion and hence
595 * we now need to wait for all of them to drain. Non-AIO
596 * DIO will have drained before we are given the
597 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
598 * no-op.
599 */
600 inode_dio_wait(inode);
574 goto restart; 601 goto restart;
575 } 602 }
576 error = xfs_zero_eof(ip, *pos, i_size_read(inode)); 603 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
577 if (error) 604 if (error)
578 return error; 605 return error;
579 } 606 } else
607 spin_unlock(&ip->i_flags_lock);
580 608
581 /* 609 /*
582 * Updating the timestamps will grab the ilock again from 610 * Updating the timestamps will grab the ilock again from
@@ -638,6 +666,8 @@ xfs_file_dio_aio_write(
638 int iolock; 666 int iolock;
639 size_t count = iov_iter_count(from); 667 size_t count = iov_iter_count(from);
640 loff_t pos = iocb->ki_pos; 668 loff_t pos = iocb->ki_pos;
669 loff_t end;
670 struct iov_iter data;
641 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 671 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
642 mp->m_rtdev_targp : mp->m_ddev_targp; 672 mp->m_rtdev_targp : mp->m_ddev_targp;
643 673
@@ -673,14 +703,16 @@ xfs_file_dio_aio_write(
673 xfs_rw_ilock(ip, iolock); 703 xfs_rw_ilock(ip, iolock);
674 } 704 }
675 705
676 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 706 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
677 if (ret) 707 if (ret)
678 goto out; 708 goto out;
679 iov_iter_truncate(from, count); 709 count = iov_iter_count(from);
710 pos = iocb->ki_pos;
711 end = pos + count - 1;
680 712
681 if (mapping->nrpages) { 713 if (mapping->nrpages) {
682 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 714 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
683 pos, pos + count - 1); 715 pos, end);
684 if (ret) 716 if (ret)
685 goto out; 717 goto out;
686 /* 718 /*
@@ -690,7 +722,7 @@ xfs_file_dio_aio_write(
690 */ 722 */
691 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 723 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
692 pos >> PAGE_CACHE_SHIFT, 724 pos >> PAGE_CACHE_SHIFT,
693 (pos + count - 1) >> PAGE_CACHE_SHIFT); 725 end >> PAGE_CACHE_SHIFT);
694 WARN_ON_ONCE(ret); 726 WARN_ON_ONCE(ret);
695 ret = 0; 727 ret = 0;
696 } 728 }
@@ -707,8 +739,22 @@ xfs_file_dio_aio_write(
707 } 739 }
708 740
709 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 741 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
710 ret = generic_file_direct_write(iocb, from, pos);
711 742
743 data = *from;
744 ret = mapping->a_ops->direct_IO(iocb, &data, pos);
745
746 /* see generic_file_direct_write() for why this is necessary */
747 if (mapping->nrpages) {
748 invalidate_inode_pages2_range(mapping,
749 pos >> PAGE_CACHE_SHIFT,
750 end >> PAGE_CACHE_SHIFT);
751 }
752
753 if (ret > 0) {
754 pos += ret;
755 iov_iter_advance(from, ret);
756 iocb->ki_pos = pos;
757 }
712out: 758out:
713 xfs_rw_iunlock(ip, iolock); 759 xfs_rw_iunlock(ip, iolock);
714 760
@@ -729,24 +775,22 @@ xfs_file_buffered_aio_write(
729 ssize_t ret; 775 ssize_t ret;
730 int enospc = 0; 776 int enospc = 0;
731 int iolock = XFS_IOLOCK_EXCL; 777 int iolock = XFS_IOLOCK_EXCL;
732 loff_t pos = iocb->ki_pos;
733 size_t count = iov_iter_count(from);
734 778
735 xfs_rw_ilock(ip, iolock); 779 xfs_rw_ilock(ip, iolock);
736 780
737 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 781 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
738 if (ret) 782 if (ret)
739 goto out; 783 goto out;
740 784
741 iov_iter_truncate(from, count);
742 /* We can write back this queue in page reclaim */ 785 /* We can write back this queue in page reclaim */
743 current->backing_dev_info = inode_to_bdi(inode); 786 current->backing_dev_info = inode_to_bdi(inode);
744 787
745write_retry: 788write_retry:
746 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 789 trace_xfs_file_buffered_write(ip, iov_iter_count(from),
747 ret = generic_perform_write(file, from, pos); 790 iocb->ki_pos, 0);
791 ret = generic_perform_write(file, from, iocb->ki_pos);
748 if (likely(ret >= 0)) 792 if (likely(ret >= 0))
749 iocb->ki_pos = pos + ret; 793 iocb->ki_pos += ret;
750 794
751 /* 795 /*
752 * If we hit a space limit, try to free up some lingering preallocated 796 * If we hit a space limit, try to free up some lingering preallocated
@@ -798,7 +842,7 @@ xfs_file_write_iter(
798 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 842 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
799 return -EIO; 843 return -EIO;
800 844
801 if (unlikely(file->f_flags & O_DIRECT)) 845 if (unlikely(iocb->ki_flags & IOCB_DIRECT))
802 ret = xfs_file_dio_aio_write(iocb, from); 846 ret = xfs_file_dio_aio_write(iocb, from);
803 else 847 else
804 ret = xfs_file_buffered_aio_write(iocb, from); 848 ret = xfs_file_buffered_aio_write(iocb, from);
@@ -816,6 +860,11 @@ xfs_file_write_iter(
816 return ret; 860 return ret;
817} 861}
818 862
863#define XFS_FALLOC_FL_SUPPORTED \
864 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
865 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
866 FALLOC_FL_INSERT_RANGE)
867
819STATIC long 868STATIC long
820xfs_file_fallocate( 869xfs_file_fallocate(
821 struct file *file, 870 struct file *file,
@@ -829,18 +878,21 @@ xfs_file_fallocate(
829 enum xfs_prealloc_flags flags = 0; 878 enum xfs_prealloc_flags flags = 0;
830 uint iolock = XFS_IOLOCK_EXCL; 879 uint iolock = XFS_IOLOCK_EXCL;
831 loff_t new_size = 0; 880 loff_t new_size = 0;
881 bool do_file_insert = 0;
832 882
833 if (!S_ISREG(inode->i_mode)) 883 if (!S_ISREG(inode->i_mode))
834 return -EINVAL; 884 return -EINVAL;
835 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 885 if (mode & ~XFS_FALLOC_FL_SUPPORTED)
836 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
837 return -EOPNOTSUPP; 886 return -EOPNOTSUPP;
838 887
839 xfs_ilock(ip, iolock); 888 xfs_ilock(ip, iolock);
840 error = xfs_break_layouts(inode, &iolock); 889 error = xfs_break_layouts(inode, &iolock, false);
841 if (error) 890 if (error)
842 goto out_unlock; 891 goto out_unlock;
843 892
893 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
894 iolock |= XFS_MMAPLOCK_EXCL;
895
844 if (mode & FALLOC_FL_PUNCH_HOLE) { 896 if (mode & FALLOC_FL_PUNCH_HOLE) {
845 error = xfs_free_file_space(ip, offset, len); 897 error = xfs_free_file_space(ip, offset, len);
846 if (error) 898 if (error)
@@ -867,6 +919,27 @@ xfs_file_fallocate(
867 error = xfs_collapse_file_space(ip, offset, len); 919 error = xfs_collapse_file_space(ip, offset, len);
868 if (error) 920 if (error)
869 goto out_unlock; 921 goto out_unlock;
922 } else if (mode & FALLOC_FL_INSERT_RANGE) {
923 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
924
925 new_size = i_size_read(inode) + len;
926 if (offset & blksize_mask || len & blksize_mask) {
927 error = -EINVAL;
928 goto out_unlock;
929 }
930
931 /* check the new inode size does not wrap through zero */
932 if (new_size > inode->i_sb->s_maxbytes) {
933 error = -EFBIG;
934 goto out_unlock;
935 }
936
937 /* Offset should be less than i_size */
938 if (offset >= i_size_read(inode)) {
939 error = -EINVAL;
940 goto out_unlock;
941 }
942 do_file_insert = 1;
870 } else { 943 } else {
871 flags |= XFS_PREALLOC_SET; 944 flags |= XFS_PREALLOC_SET;
872 945
@@ -901,8 +974,19 @@ xfs_file_fallocate(
901 iattr.ia_valid = ATTR_SIZE; 974 iattr.ia_valid = ATTR_SIZE;
902 iattr.ia_size = new_size; 975 iattr.ia_size = new_size;
903 error = xfs_setattr_size(ip, &iattr); 976 error = xfs_setattr_size(ip, &iattr);
977 if (error)
978 goto out_unlock;
904 } 979 }
905 980
981 /*
982 * Perform hole insertion now that the file size has been
983 * updated so that if we crash during the operation we don't
984 * leave shifted extents past EOF and hence losing access to
985 * the data that is contained within them.
986 */
987 if (do_file_insert)
988 error = xfs_insert_file_space(ip, offset, len);
989
906out_unlock: 990out_unlock:
907 xfs_iunlock(ip, iolock); 991 xfs_iunlock(ip, iolock);
908 return error; 992 return error;
@@ -991,20 +1075,6 @@ xfs_file_mmap(
991} 1075}
992 1076
993/* 1077/*
994 * mmap()d file has taken write protection fault and is being made
995 * writable. We can set the page state up correctly for a writable
996 * page, which means we can do correct delalloc accounting (ENOSPC
997 * checking!) and unwritten extent mapping.
998 */
999STATIC int
1000xfs_vm_page_mkwrite(
1001 struct vm_area_struct *vma,
1002 struct vm_fault *vmf)
1003{
1004 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1005}
1006
1007/*
1008 * This type is designed to indicate the type of offset we would like 1078 * This type is designed to indicate the type of offset we would like
1009 * to search from page cache for xfs_seek_hole_data(). 1079 * to search from page cache for xfs_seek_hole_data().
1010 */ 1080 */
@@ -1379,10 +1449,57 @@ xfs_file_llseek(
1379 } 1449 }
1380} 1450}
1381 1451
1452/*
1453 * Locking for serialisation of IO during page faults. This results in a lock
1454 * ordering of:
1455 *
1456 * mmap_sem (MM)
1457 * i_mmap_lock (XFS - truncate serialisation)
1458 * page_lock (MM)
1459 * i_lock (XFS - extent map serialisation)
1460 */
1461STATIC int
1462xfs_filemap_fault(
1463 struct vm_area_struct *vma,
1464 struct vm_fault *vmf)
1465{
1466 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1467 int error;
1468
1469 trace_xfs_filemap_fault(ip);
1470
1471 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1472 error = filemap_fault(vma, vmf);
1473 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1474
1475 return error;
1476}
1477
1478/*
1479 * mmap()d file has taken write protection fault and is being made writable. We
1480 * can set the page state up correctly for a writable page, which means we can
1481 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1482 * mapping.
1483 */
1484STATIC int
1485xfs_filemap_page_mkwrite(
1486 struct vm_area_struct *vma,
1487 struct vm_fault *vmf)
1488{
1489 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1490 int error;
1491
1492 trace_xfs_filemap_page_mkwrite(ip);
1493
1494 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1495 error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
1496 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1497
1498 return error;
1499}
1500
1382const struct file_operations xfs_file_operations = { 1501const struct file_operations xfs_file_operations = {
1383 .llseek = xfs_file_llseek, 1502 .llseek = xfs_file_llseek,
1384 .read = new_sync_read,
1385 .write = new_sync_write,
1386 .read_iter = xfs_file_read_iter, 1503 .read_iter = xfs_file_read_iter,
1387 .write_iter = xfs_file_write_iter, 1504 .write_iter = xfs_file_write_iter,
1388 .splice_read = xfs_file_splice_read, 1505 .splice_read = xfs_file_splice_read,
@@ -1411,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
1411}; 1528};
1412 1529
1413static const struct vm_operations_struct xfs_file_vm_ops = { 1530static const struct vm_operations_struct xfs_file_vm_ops = {
1414 .fault = filemap_fault, 1531 .fault = xfs_filemap_fault,
1415 .map_pages = filemap_map_pages, 1532 .map_pages = filemap_map_pages,
1416 .page_mkwrite = xfs_vm_page_mkwrite, 1533 .page_mkwrite = xfs_filemap_page_mkwrite,
1417}; 1534};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a2e86e8a0fea..8f9f854376c6 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
322 322
323 pip = xfs_filestream_get_parent(ip); 323 pip = xfs_filestream_get_parent(ip);
324 if (!pip) 324 if (!pip)
325 goto out; 325 return NULLAGNUMBER;
326 326
327 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 327 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
328 if (mru) { 328 if (mru) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 74efe5b760dc..cb7e8a29dfb6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
637 xfs_mount_t *mp, 637 xfs_mount_t *mp,
638 xfs_fsop_counts_t *cnt) 638 xfs_fsop_counts_t *cnt)
639{ 639{
640 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 640 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
641 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
642 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
643 XFS_ALLOC_SET_ASIDE(mp);
644
641 spin_lock(&mp->m_sb_lock); 645 spin_lock(&mp->m_sb_lock);
642 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
643 cnt->freertx = mp->m_sb.sb_frextents; 646 cnt->freertx = mp->m_sb.sb_frextents;
644 cnt->freeino = mp->m_sb.sb_ifree;
645 cnt->allocino = mp->m_sb.sb_icount;
646 spin_unlock(&mp->m_sb_lock); 647 spin_unlock(&mp->m_sb_lock);
647 return 0; 648 return 0;
648} 649}
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
692 * what to do. This means that the amount of free space can 693 * what to do. This means that the amount of free space can
693 * change while we do this, so we need to retry if we end up 694 * change while we do this, so we need to retry if we end up
694 * trying to reserve more space than is available. 695 * trying to reserve more space than is available.
695 *
696 * We also use the xfs_mod_incore_sb() interface so that we
697 * don't have to care about whether per cpu counter are
698 * enabled, disabled or even compiled in....
699 */ 696 */
700retry: 697retry:
701 spin_lock(&mp->m_sb_lock); 698 spin_lock(&mp->m_sb_lock);
702 xfs_icsb_sync_counters_locked(mp, 0);
703 699
704 /* 700 /*
705 * If our previous reservation was larger than the current value, 701 * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
716 } else { 712 } else {
717 __int64_t free; 713 __int64_t free;
718 714
719 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 715 free = percpu_counter_sum(&mp->m_fdblocks) -
716 XFS_ALLOC_SET_ASIDE(mp);
720 if (!free) 717 if (!free)
721 goto out; /* ENOSPC and fdblks_delta = 0 */ 718 goto out; /* ENOSPC and fdblks_delta = 0 */
722 719
@@ -755,8 +752,7 @@ out:
755 * the extra reserve blocks from the reserve..... 752 * the extra reserve blocks from the reserve.....
756 */ 753 */
757 int error; 754 int error;
758 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 755 error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
759 fdblks_delta, 0);
760 if (error == -ENOSPC) 756 if (error == -ENOSPC)
761 goto retry; 757 goto retry;
762 } 758 }
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 9771b7ef62ed..76a9f2783282 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -439,11 +439,11 @@ again:
439 *ipp = ip; 439 *ipp = ip;
440 440
441 /* 441 /*
442 * If we have a real type for an on-disk inode, we can set ops(&unlock) 442 * If we have a real type for an on-disk inode, we can setup the inode
443 * now. If it's a new inode being created, xfs_ialloc will handle it. 443 * now. If it's a new inode being created, xfs_ialloc will handle it.
444 */ 444 */
445 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) 445 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
446 xfs_setup_inode(ip); 446 xfs_setup_existing_inode(ip);
447 return 0; 447 return 0;
448 448
449out_error_or_again: 449out_error_or_again:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index daafa1f6d260..d6ebc85192b7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
117} 117}
118 118
119/* 119/*
120 * The xfs inode contains 2 locks: a multi-reader lock called the 120 * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
121 * i_iolock and a multi-reader lock called the i_lock. This routine 121 * the i_lock. This routine allows various combinations of the locks to be
122 * allows either or both of the locks to be obtained. 122 * obtained.
123 * 123 *
124 * The 2 locks should always be ordered so that the IO lock is 124 * The 3 locks should always be ordered so that the IO lock is obtained first,
125 * obtained first in order to prevent deadlock. 125 * the mmap lock second and the ilock last in order to prevent deadlock.
126 * 126 *
127 * ip -- the inode being locked 127 * Basic locking order:
128 * lock_flags -- this parameter indicates the inode's locks 128 *
129 * to be locked. It can be: 129 * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
130 * XFS_IOLOCK_SHARED, 130 *
131 * XFS_IOLOCK_EXCL, 131 * mmap_sem locking order:
132 * XFS_ILOCK_SHARED, 132 *
133 * XFS_ILOCK_EXCL, 133 * i_iolock -> page lock -> mmap_sem
134 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 134 * mmap_sem -> i_mmap_lock -> page_lock
135 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 135 *
136 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 136 * The difference in mmap_sem locking order mean that we cannot hold the
137 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 137 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
138 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
139 * in get_user_pages() to map the user pages into the kernel address space for
140 * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
141 * page faults already hold the mmap_sem.
142 *
143 * Hence to serialise fully against both syscall and mmap based IO, we need to
144 * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
145 * taken in places where we need to invalidate the page cache in a race
146 * free manner (e.g. truncate, hole punch and other extent manipulation
147 * functions).
138 */ 148 */
139void 149void
140xfs_ilock( 150xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
150 */ 160 */
151 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 161 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
152 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 162 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
163 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
164 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
153 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 165 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
154 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 166 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
155 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 167 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
159 else if (lock_flags & XFS_IOLOCK_SHARED) 171 else if (lock_flags & XFS_IOLOCK_SHARED)
160 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 172 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
161 173
174 if (lock_flags & XFS_MMAPLOCK_EXCL)
175 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
176 else if (lock_flags & XFS_MMAPLOCK_SHARED)
177 mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
178
162 if (lock_flags & XFS_ILOCK_EXCL) 179 if (lock_flags & XFS_ILOCK_EXCL)
163 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 180 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
164 else if (lock_flags & XFS_ILOCK_SHARED) 181 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
191 */ 208 */
192 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 209 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
193 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 210 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
211 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
212 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
194 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 213 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
195 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 214 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
196 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 215 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
202 if (!mrtryaccess(&ip->i_iolock)) 221 if (!mrtryaccess(&ip->i_iolock))
203 goto out; 222 goto out;
204 } 223 }
224
225 if (lock_flags & XFS_MMAPLOCK_EXCL) {
226 if (!mrtryupdate(&ip->i_mmaplock))
227 goto out_undo_iolock;
228 } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
229 if (!mrtryaccess(&ip->i_mmaplock))
230 goto out_undo_iolock;
231 }
232
205 if (lock_flags & XFS_ILOCK_EXCL) { 233 if (lock_flags & XFS_ILOCK_EXCL) {
206 if (!mrtryupdate(&ip->i_lock)) 234 if (!mrtryupdate(&ip->i_lock))
207 goto out_undo_iolock; 235 goto out_undo_mmaplock;
208 } else if (lock_flags & XFS_ILOCK_SHARED) { 236 } else if (lock_flags & XFS_ILOCK_SHARED) {
209 if (!mrtryaccess(&ip->i_lock)) 237 if (!mrtryaccess(&ip->i_lock))
210 goto out_undo_iolock; 238 goto out_undo_mmaplock;
211 } 239 }
212 return 1; 240 return 1;
213 241
214 out_undo_iolock: 242out_undo_mmaplock:
243 if (lock_flags & XFS_MMAPLOCK_EXCL)
244 mrunlock_excl(&ip->i_mmaplock);
245 else if (lock_flags & XFS_MMAPLOCK_SHARED)
246 mrunlock_shared(&ip->i_mmaplock);
247out_undo_iolock:
215 if (lock_flags & XFS_IOLOCK_EXCL) 248 if (lock_flags & XFS_IOLOCK_EXCL)
216 mrunlock_excl(&ip->i_iolock); 249 mrunlock_excl(&ip->i_iolock);
217 else if (lock_flags & XFS_IOLOCK_SHARED) 250 else if (lock_flags & XFS_IOLOCK_SHARED)
218 mrunlock_shared(&ip->i_iolock); 251 mrunlock_shared(&ip->i_iolock);
219 out: 252out:
220 return 0; 253 return 0;
221} 254}
222 255
@@ -244,6 +277,8 @@ xfs_iunlock(
244 */ 277 */
245 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 278 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
246 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 279 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
280 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
281 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
247 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 282 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
248 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 283 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
249 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 284 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
254 else if (lock_flags & XFS_IOLOCK_SHARED) 289 else if (lock_flags & XFS_IOLOCK_SHARED)
255 mrunlock_shared(&ip->i_iolock); 290 mrunlock_shared(&ip->i_iolock);
256 291
292 if (lock_flags & XFS_MMAPLOCK_EXCL)
293 mrunlock_excl(&ip->i_mmaplock);
294 else if (lock_flags & XFS_MMAPLOCK_SHARED)
295 mrunlock_shared(&ip->i_mmaplock);
296
257 if (lock_flags & XFS_ILOCK_EXCL) 297 if (lock_flags & XFS_ILOCK_EXCL)
258 mrunlock_excl(&ip->i_lock); 298 mrunlock_excl(&ip->i_lock);
259 else if (lock_flags & XFS_ILOCK_SHARED) 299 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
271 xfs_inode_t *ip, 311 xfs_inode_t *ip,
272 uint lock_flags) 312 uint lock_flags)
273{ 313{
274 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 314 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
275 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 315 ASSERT((lock_flags &
316 ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
276 317
277 if (lock_flags & XFS_ILOCK_EXCL) 318 if (lock_flags & XFS_ILOCK_EXCL)
278 mrdemote(&ip->i_lock); 319 mrdemote(&ip->i_lock);
320 if (lock_flags & XFS_MMAPLOCK_EXCL)
321 mrdemote(&ip->i_mmaplock);
279 if (lock_flags & XFS_IOLOCK_EXCL) 322 if (lock_flags & XFS_IOLOCK_EXCL)
280 mrdemote(&ip->i_iolock); 323 mrdemote(&ip->i_iolock);
281 324
@@ -294,6 +337,12 @@ xfs_isilocked(
294 return rwsem_is_locked(&ip->i_lock.mr_lock); 337 return rwsem_is_locked(&ip->i_lock.mr_lock);
295 } 338 }
296 339
340 if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
341 if (!(lock_flags & XFS_MMAPLOCK_SHARED))
342 return !!ip->i_mmaplock.mr_writer;
343 return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
344 }
345
297 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 346 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
298 if (!(lock_flags & XFS_IOLOCK_SHARED)) 347 if (!(lock_flags & XFS_IOLOCK_SHARED))
299 return !!ip->i_iolock.mr_writer; 348 return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
314#endif 363#endif
315 364
316/* 365/*
317 * Bump the subclass so xfs_lock_inodes() acquires each lock with 366 * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
318 * a different value 367 * value. This shouldn't be called for page fault locking, but we also need to
368 * ensure we don't overrun the number of lockdep subclasses for the iolock or
369 * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
319 */ 370 */
320static inline int 371static inline int
321xfs_lock_inumorder(int lock_mode, int subclass) 372xfs_lock_inumorder(int lock_mode, int subclass)
322{ 373{
323 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 374 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
375 ASSERT(subclass + XFS_LOCK_INUMORDER <
376 (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
324 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 377 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
378 }
379
380 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
381 ASSERT(subclass + XFS_LOCK_INUMORDER <
382 (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
383 lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
384 XFS_MMAPLOCK_SHIFT;
385 }
386
325 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 387 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
326 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 388 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
327 389
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
329} 391}
330 392
331/* 393/*
332 * The following routine will lock n inodes in exclusive mode. 394 * The following routine will lock n inodes in exclusive mode. We assume the
333 * We assume the caller calls us with the inodes in i_ino order. 395 * caller calls us with the inodes in i_ino order.
334 * 396 *
335 * We need to detect deadlock where an inode that we lock 397 * We need to detect deadlock where an inode that we lock is in the AIL and we
336 * is in the AIL and we start waiting for another inode that is locked 398 * start waiting for another inode that is locked by a thread in a long running
337 * by a thread in a long running transaction (such as truncate). This can 399 * transaction (such as truncate). This can result in deadlock since the long
338 * result in deadlock since the long running trans might need to wait 400 * running trans might need to wait for the inode we just locked in order to
339 * for the inode we just locked in order to push the tail and free space 401 * push the tail and free space in the log.
340 * in the log.
341 */ 402 */
342void 403void
343xfs_lock_inodes( 404xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
348 int attempts = 0, i, j, try_lock; 409 int attempts = 0, i, j, try_lock;
349 xfs_log_item_t *lp; 410 xfs_log_item_t *lp;
350 411
351 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 412 /* currently supports between 2 and 5 inodes */
413 ASSERT(ips && inodes >= 2 && inodes <= 5);
352 414
353 try_lock = 0; 415 try_lock = 0;
354 i = 0; 416 i = 0;
355
356again: 417again:
357 for (; i < inodes; i++) { 418 for (; i < inodes; i++) {
358 ASSERT(ips[i]); 419 ASSERT(ips[i]);
359 420
360 if (i && (ips[i] == ips[i-1])) /* Already locked */ 421 if (i && (ips[i] == ips[i - 1])) /* Already locked */
361 continue; 422 continue;
362 423
363 /* 424 /*
364 * If try_lock is not set yet, make sure all locked inodes 425 * If try_lock is not set yet, make sure all locked inodes are
365 * are not in the AIL. 426 * not in the AIL. If any are, set try_lock to be used later.
366 * If any are, set try_lock to be used later.
367 */ 427 */
368
369 if (!try_lock) { 428 if (!try_lock) {
370 for (j = (i - 1); j >= 0 && !try_lock; j--) { 429 for (j = (i - 1); j >= 0 && !try_lock; j--) {
371 lp = (xfs_log_item_t *)ips[j]->i_itemp; 430 lp = (xfs_log_item_t *)ips[j]->i_itemp;
372 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 431 if (lp && (lp->li_flags & XFS_LI_IN_AIL))
373 try_lock++; 432 try_lock++;
374 }
375 } 433 }
376 } 434 }
377 435
@@ -381,51 +439,42 @@ again:
381 * we can't get any, we must release all we have 439 * we can't get any, we must release all we have
382 * and try again. 440 * and try again.
383 */ 441 */
442 if (!try_lock) {
443 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
444 continue;
445 }
446
447 /* try_lock means we have an inode locked that is in the AIL. */
448 ASSERT(i != 0);
449 if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
450 continue;
384 451
385 if (try_lock) { 452 /*
386 /* try_lock must be 0 if i is 0. */ 453 * Unlock all previous guys and try again. xfs_iunlock will try
454 * to push the tail if the inode is in the AIL.
455 */
456 attempts++;
457 for (j = i - 1; j >= 0; j--) {
387 /* 458 /*
388 * try_lock means we have an inode locked 459 * Check to see if we've already unlocked this one. Not
389 * that is in the AIL. 460 * the first one going back, and the inode ptr is the
461 * same.
390 */ 462 */
391 ASSERT(i != 0); 463 if (j != (i - 1) && ips[j] == ips[j + 1])
392 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 464 continue;
393 attempts++; 465
394 466 xfs_iunlock(ips[j], lock_mode);
395 /* 467 }
396 * Unlock all previous guys and try again.
397 * xfs_iunlock will try to push the tail
398 * if the inode is in the AIL.
399 */
400
401 for(j = i - 1; j >= 0; j--) {
402
403 /*
404 * Check to see if we've already
405 * unlocked this one.
406 * Not the first one going back,
407 * and the inode ptr is the same.
408 */
409 if ((j != (i - 1)) && ips[j] ==
410 ips[j+1])
411 continue;
412
413 xfs_iunlock(ips[j], lock_mode);
414 }
415 468
416 if ((attempts % 5) == 0) { 469 if ((attempts % 5) == 0) {
417 delay(1); /* Don't just spin the CPU */ 470 delay(1); /* Don't just spin the CPU */
418#ifdef DEBUG 471#ifdef DEBUG
419 xfs_lock_delays++; 472 xfs_lock_delays++;
420#endif 473#endif
421 }
422 i = 0;
423 try_lock = 0;
424 goto again;
425 }
426 } else {
427 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
428 } 474 }
475 i = 0;
476 try_lock = 0;
477 goto again;
429 } 478 }
430 479
431#ifdef DEBUG 480#ifdef DEBUG
@@ -440,10 +489,10 @@ again:
440} 489}
441 490
442/* 491/*
443 * xfs_lock_two_inodes() can only be used to lock one type of lock 492 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
444 * at a time - the iolock or the ilock, but not both at once. If 493 * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
445 * we lock both at once, lockdep will report false positives saying 494 * lock more than one at a time, lockdep will report false positives saying we
446 * we have violated locking orders. 495 * have violated locking orders.
447 */ 496 */
448void 497void
449xfs_lock_two_inodes( 498xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
455 int attempts = 0; 504 int attempts = 0;
456 xfs_log_item_t *lp; 505 xfs_log_item_t *lp;
457 506
458 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 507 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
459 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 508 ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
509 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
510 } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
511 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
512
460 ASSERT(ip0->i_ino != ip1->i_ino); 513 ASSERT(ip0->i_ino != ip1->i_ino);
461 514
462 if (ip0->i_ino > ip1->i_ino) { 515 if (ip0->i_ino > ip1->i_ino) {
@@ -818,7 +871,7 @@ xfs_ialloc(
818 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 871 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
819 xfs_trans_log_inode(tp, ip, flags); 872 xfs_trans_log_inode(tp, ip, flags);
820 873
821 /* now that we have an i_mode we can setup inode ops and unlock */ 874 /* now that we have an i_mode we can setup the inode structure */
822 xfs_setup_inode(ip); 875 xfs_setup_inode(ip);
823 876
824 *ipp = ip; 877 *ipp = ip;
@@ -1235,12 +1288,14 @@ xfs_create(
1235 xfs_trans_cancel(tp, cancel_flags); 1288 xfs_trans_cancel(tp, cancel_flags);
1236 out_release_inode: 1289 out_release_inode:
1237 /* 1290 /*
1238 * Wait until after the current transaction is aborted to 1291 * Wait until after the current transaction is aborted to finish the
1239 * release the inode. This prevents recursive transactions 1292 * setup of the inode and release the inode. This prevents recursive
1240 * and deadlocks from xfs_inactive. 1293 * transactions and deadlocks from xfs_inactive.
1241 */ 1294 */
1242 if (ip) 1295 if (ip) {
1296 xfs_finish_inode_setup(ip);
1243 IRELE(ip); 1297 IRELE(ip);
1298 }
1244 1299
1245 xfs_qm_dqrele(udqp); 1300 xfs_qm_dqrele(udqp);
1246 xfs_qm_dqrele(gdqp); 1301 xfs_qm_dqrele(gdqp);
@@ -1345,12 +1400,14 @@ xfs_create_tmpfile(
1345 xfs_trans_cancel(tp, cancel_flags); 1400 xfs_trans_cancel(tp, cancel_flags);
1346 out_release_inode: 1401 out_release_inode:
1347 /* 1402 /*
1348 * Wait until after the current transaction is aborted to 1403 * Wait until after the current transaction is aborted to finish the
1349 * release the inode. This prevents recursive transactions 1404 * setup of the inode and release the inode. This prevents recursive
1350 * and deadlocks from xfs_inactive. 1405 * transactions and deadlocks from xfs_inactive.
1351 */ 1406 */
1352 if (ip) 1407 if (ip) {
1408 xfs_finish_inode_setup(ip);
1353 IRELE(ip); 1409 IRELE(ip);
1410 }
1354 1411
1355 xfs_qm_dqrele(udqp); 1412 xfs_qm_dqrele(udqp);
1356 xfs_qm_dqrele(gdqp); 1413 xfs_qm_dqrele(gdqp);
@@ -2611,19 +2668,22 @@ xfs_remove(
2611/* 2668/*
2612 * Enter all inodes for a rename transaction into a sorted array. 2669 * Enter all inodes for a rename transaction into a sorted array.
2613 */ 2670 */
2671#define __XFS_SORT_INODES 5
2614STATIC void 2672STATIC void
2615xfs_sort_for_rename( 2673xfs_sort_for_rename(
2616 xfs_inode_t *dp1, /* in: old (source) directory inode */ 2674 struct xfs_inode *dp1, /* in: old (source) directory inode */
2617 xfs_inode_t *dp2, /* in: new (target) directory inode */ 2675 struct xfs_inode *dp2, /* in: new (target) directory inode */
2618 xfs_inode_t *ip1, /* in: inode of old entry */ 2676 struct xfs_inode *ip1, /* in: inode of old entry */
2619 xfs_inode_t *ip2, /* in: inode of new entry, if it 2677 struct xfs_inode *ip2, /* in: inode of new entry */
2620 already exists, NULL otherwise. */ 2678 struct xfs_inode *wip, /* in: whiteout inode */
2621 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 2679 struct xfs_inode **i_tab,/* out: sorted array of inodes */
2622 int *num_inodes) /* out: number of inodes in array */ 2680 int *num_inodes) /* in/out: inodes in array */
2623{ 2681{
2624 xfs_inode_t *temp;
2625 int i, j; 2682 int i, j;
2626 2683
2684 ASSERT(*num_inodes == __XFS_SORT_INODES);
2685 memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
2686
2627 /* 2687 /*
2628 * i_tab contains a list of pointers to inodes. We initialize 2688 * i_tab contains a list of pointers to inodes. We initialize
2629 * the table here & we'll sort it. We will then use it to 2689 * the table here & we'll sort it. We will then use it to
@@ -2631,25 +2691,24 @@ xfs_sort_for_rename(
2631 * 2691 *
2632 * Note that the table may contain duplicates. e.g., dp1 == dp2. 2692 * Note that the table may contain duplicates. e.g., dp1 == dp2.
2633 */ 2693 */
2634 i_tab[0] = dp1; 2694 i = 0;
2635 i_tab[1] = dp2; 2695 i_tab[i++] = dp1;
2636 i_tab[2] = ip1; 2696 i_tab[i++] = dp2;
2637 if (ip2) { 2697 i_tab[i++] = ip1;
2638 *num_inodes = 4; 2698 if (ip2)
2639 i_tab[3] = ip2; 2699 i_tab[i++] = ip2;
2640 } else { 2700 if (wip)
2641 *num_inodes = 3; 2701 i_tab[i++] = wip;
2642 i_tab[3] = NULL; 2702 *num_inodes = i;
2643 }
2644 2703
2645 /* 2704 /*
2646 * Sort the elements via bubble sort. (Remember, there are at 2705 * Sort the elements via bubble sort. (Remember, there are at
2647 * most 4 elements to sort, so this is adequate.) 2706 * most 5 elements to sort, so this is adequate.)
2648 */ 2707 */
2649 for (i = 0; i < *num_inodes; i++) { 2708 for (i = 0; i < *num_inodes; i++) {
2650 for (j = 1; j < *num_inodes; j++) { 2709 for (j = 1; j < *num_inodes; j++) {
2651 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 2710 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
2652 temp = i_tab[j]; 2711 struct xfs_inode *temp = i_tab[j];
2653 i_tab[j] = i_tab[j-1]; 2712 i_tab[j] = i_tab[j-1];
2654 i_tab[j-1] = temp; 2713 i_tab[j-1] = temp;
2655 } 2714 }
@@ -2657,6 +2716,31 @@ xfs_sort_for_rename(
2657 } 2716 }
2658} 2717}
2659 2718
2719static int
2720xfs_finish_rename(
2721 struct xfs_trans *tp,
2722 struct xfs_bmap_free *free_list)
2723{
2724 int committed = 0;
2725 int error;
2726
2727 /*
2728 * If this is a synchronous mount, make sure that the rename transaction
2729 * goes to disk before returning to the user.
2730 */
2731 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2732 xfs_trans_set_sync(tp);
2733
2734 error = xfs_bmap_finish(&tp, free_list, &committed);
2735 if (error) {
2736 xfs_bmap_cancel(free_list);
2737 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
2738 return error;
2739 }
2740
2741 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2742}
2743
2660/* 2744/*
2661 * xfs_cross_rename() 2745 * xfs_cross_rename()
2662 * 2746 *
@@ -2685,14 +2769,14 @@ xfs_cross_rename(
2685 ip2->i_ino, 2769 ip2->i_ino,
2686 first_block, free_list, spaceres); 2770 first_block, free_list, spaceres);
2687 if (error) 2771 if (error)
2688 goto out; 2772 goto out_trans_abort;
2689 2773
2690 /* Swap inode number for dirent in second parent */ 2774 /* Swap inode number for dirent in second parent */
2691 error = xfs_dir_replace(tp, dp2, name2, 2775 error = xfs_dir_replace(tp, dp2, name2,
2692 ip1->i_ino, 2776 ip1->i_ino,
2693 first_block, free_list, spaceres); 2777 first_block, free_list, spaceres);
2694 if (error) 2778 if (error)
2695 goto out; 2779 goto out_trans_abort;
2696 2780
2697 /* 2781 /*
2698 * If we're renaming one or more directories across different parents, 2782 * If we're renaming one or more directories across different parents,
@@ -2707,16 +2791,16 @@ xfs_cross_rename(
2707 dp1->i_ino, first_block, 2791 dp1->i_ino, first_block,
2708 free_list, spaceres); 2792 free_list, spaceres);
2709 if (error) 2793 if (error)
2710 goto out; 2794 goto out_trans_abort;
2711 2795
2712 /* transfer ip2 ".." reference to dp1 */ 2796 /* transfer ip2 ".." reference to dp1 */
2713 if (!S_ISDIR(ip1->i_d.di_mode)) { 2797 if (!S_ISDIR(ip1->i_d.di_mode)) {
2714 error = xfs_droplink(tp, dp2); 2798 error = xfs_droplink(tp, dp2);
2715 if (error) 2799 if (error)
2716 goto out; 2800 goto out_trans_abort;
2717 error = xfs_bumplink(tp, dp1); 2801 error = xfs_bumplink(tp, dp1);
2718 if (error) 2802 if (error)
2719 goto out; 2803 goto out_trans_abort;
2720 } 2804 }
2721 2805
2722 /* 2806 /*
@@ -2734,16 +2818,16 @@ xfs_cross_rename(
2734 dp2->i_ino, first_block, 2818 dp2->i_ino, first_block,
2735 free_list, spaceres); 2819 free_list, spaceres);
2736 if (error) 2820 if (error)
2737 goto out; 2821 goto out_trans_abort;
2738 2822
2739 /* transfer ip1 ".." reference to dp2 */ 2823 /* transfer ip1 ".." reference to dp2 */
2740 if (!S_ISDIR(ip2->i_d.di_mode)) { 2824 if (!S_ISDIR(ip2->i_d.di_mode)) {
2741 error = xfs_droplink(tp, dp1); 2825 error = xfs_droplink(tp, dp1);
2742 if (error) 2826 if (error)
2743 goto out; 2827 goto out_trans_abort;
2744 error = xfs_bumplink(tp, dp2); 2828 error = xfs_bumplink(tp, dp2);
2745 if (error) 2829 if (error)
2746 goto out; 2830 goto out_trans_abort;
2747 } 2831 }
2748 2832
2749 /* 2833 /*
@@ -2771,66 +2855,108 @@ xfs_cross_rename(
2771 } 2855 }
2772 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2856 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2773 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2857 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
2774out: 2858 return xfs_finish_rename(tp, free_list);
2859
2860out_trans_abort:
2861 xfs_bmap_cancel(free_list);
2862 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
2775 return error; 2863 return error;
2776} 2864}
2777 2865
2778/* 2866/*
2867 * xfs_rename_alloc_whiteout()
2868 *
2869 * Return a referenced, unlinked, unlocked inode that that can be used as a
2870 * whiteout in a rename transaction. We use a tmpfile inode here so that if we
2871 * crash between allocating the inode and linking it into the rename transaction
2872 * recovery will free the inode and we won't leak it.
2873 */
2874static int
2875xfs_rename_alloc_whiteout(
2876 struct xfs_inode *dp,
2877 struct xfs_inode **wip)
2878{
2879 struct xfs_inode *tmpfile;
2880 int error;
2881
2882 error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
2883 if (error)
2884 return error;
2885
2886 /* Satisfy xfs_bumplink that this is a real tmpfile */
2887 xfs_finish_inode_setup(tmpfile);
2888 VFS_I(tmpfile)->i_state |= I_LINKABLE;
2889
2890 *wip = tmpfile;
2891 return 0;
2892}
2893
2894/*
2779 * xfs_rename 2895 * xfs_rename
2780 */ 2896 */
2781int 2897int
2782xfs_rename( 2898xfs_rename(
2783 xfs_inode_t *src_dp, 2899 struct xfs_inode *src_dp,
2784 struct xfs_name *src_name, 2900 struct xfs_name *src_name,
2785 xfs_inode_t *src_ip, 2901 struct xfs_inode *src_ip,
2786 xfs_inode_t *target_dp, 2902 struct xfs_inode *target_dp,
2787 struct xfs_name *target_name, 2903 struct xfs_name *target_name,
2788 xfs_inode_t *target_ip, 2904 struct xfs_inode *target_ip,
2789 unsigned int flags) 2905 unsigned int flags)
2790{ 2906{
2791 xfs_trans_t *tp = NULL; 2907 struct xfs_mount *mp = src_dp->i_mount;
2792 xfs_mount_t *mp = src_dp->i_mount; 2908 struct xfs_trans *tp;
2793 int new_parent; /* moving to a new dir */ 2909 struct xfs_bmap_free free_list;
2794 int src_is_directory; /* src_name is a directory */ 2910 xfs_fsblock_t first_block;
2795 int error; 2911 struct xfs_inode *wip = NULL; /* whiteout inode */
2796 xfs_bmap_free_t free_list; 2912 struct xfs_inode *inodes[__XFS_SORT_INODES];
2797 xfs_fsblock_t first_block; 2913 int num_inodes = __XFS_SORT_INODES;
2798 int cancel_flags; 2914 bool new_parent = (src_dp != target_dp);
2799 int committed; 2915 bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
2800 xfs_inode_t *inodes[4]; 2916 int cancel_flags = 0;
2801 int spaceres; 2917 int spaceres;
2802 int num_inodes; 2918 int error;
2803 2919
2804 trace_xfs_rename(src_dp, target_dp, src_name, target_name); 2920 trace_xfs_rename(src_dp, target_dp, src_name, target_name);
2805 2921
2806 new_parent = (src_dp != target_dp); 2922 if ((flags & RENAME_EXCHANGE) && !target_ip)
2807 src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 2923 return -EINVAL;
2924
2925 /*
2926 * If we are doing a whiteout operation, allocate the whiteout inode
2927 * we will be placing at the target and ensure the type is set
2928 * appropriately.
2929 */
2930 if (flags & RENAME_WHITEOUT) {
2931 ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
2932 error = xfs_rename_alloc_whiteout(target_dp, &wip);
2933 if (error)
2934 return error;
2935
2936 /* setup target dirent info as whiteout */
2937 src_name->type = XFS_DIR3_FT_CHRDEV;
2938 }
2808 2939
2809 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 2940 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
2810 inodes, &num_inodes); 2941 inodes, &num_inodes);
2811 2942
2812 xfs_bmap_init(&free_list, &first_block);
2813 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 2943 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
2814 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2815 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 2944 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
2816 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2945 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
2817 if (error == -ENOSPC) { 2946 if (error == -ENOSPC) {
2818 spaceres = 0; 2947 spaceres = 0;
2819 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2948 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
2820 } 2949 }
2821 if (error) { 2950 if (error)
2822 xfs_trans_cancel(tp, 0); 2951 goto out_trans_cancel;
2823 goto std_return; 2952 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2824 }
2825 2953
2826 /* 2954 /*
2827 * Attach the dquots to the inodes 2955 * Attach the dquots to the inodes
2828 */ 2956 */
2829 error = xfs_qm_vop_rename_dqattach(inodes); 2957 error = xfs_qm_vop_rename_dqattach(inodes);
2830 if (error) { 2958 if (error)
2831 xfs_trans_cancel(tp, cancel_flags); 2959 goto out_trans_cancel;
2832 goto std_return;
2833 }
2834 2960
2835 /* 2961 /*
2836 * Lock all the participating inodes. Depending upon whether 2962 * Lock all the participating inodes. Depending upon whether
@@ -2851,6 +2977,8 @@ xfs_rename(
2851 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2977 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
2852 if (target_ip) 2978 if (target_ip)
2853 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2979 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
2980 if (wip)
2981 xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
2854 2982
2855 /* 2983 /*
2856 * If we are using project inheritance, we only allow renames 2984 * If we are using project inheritance, we only allow renames
@@ -2860,20 +2988,16 @@ xfs_rename(
2860 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2988 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2861 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { 2989 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
2862 error = -EXDEV; 2990 error = -EXDEV;
2863 goto error_return; 2991 goto out_trans_cancel;
2864 } 2992 }
2865 2993
2866 /* 2994 xfs_bmap_init(&free_list, &first_block);
2867 * Handle RENAME_EXCHANGE flags 2995
2868 */ 2996 /* RENAME_EXCHANGE is unique from here on. */
2869 if (flags & RENAME_EXCHANGE) { 2997 if (flags & RENAME_EXCHANGE)
2870 error = xfs_cross_rename(tp, src_dp, src_name, src_ip, 2998 return xfs_cross_rename(tp, src_dp, src_name, src_ip,
2871 target_dp, target_name, target_ip, 2999 target_dp, target_name, target_ip,
2872 &free_list, &first_block, spaceres); 3000 &free_list, &first_block, spaceres);
2873 if (error)
2874 goto abort_return;
2875 goto finish_rename;
2876 }
2877 3001
2878 /* 3002 /*
2879 * Set up the target. 3003 * Set up the target.
@@ -2886,7 +3010,7 @@ xfs_rename(
2886 if (!spaceres) { 3010 if (!spaceres) {
2887 error = xfs_dir_canenter(tp, target_dp, target_name); 3011 error = xfs_dir_canenter(tp, target_dp, target_name);
2888 if (error) 3012 if (error)
2889 goto error_return; 3013 goto out_trans_cancel;
2890 } 3014 }
2891 /* 3015 /*
2892 * If target does not exist and the rename crosses 3016 * If target does not exist and the rename crosses
@@ -2897,9 +3021,9 @@ xfs_rename(
2897 src_ip->i_ino, &first_block, 3021 src_ip->i_ino, &first_block,
2898 &free_list, spaceres); 3022 &free_list, spaceres);
2899 if (error == -ENOSPC) 3023 if (error == -ENOSPC)
2900 goto error_return; 3024 goto out_bmap_cancel;
2901 if (error) 3025 if (error)
2902 goto abort_return; 3026 goto out_trans_abort;
2903 3027
2904 xfs_trans_ichgtime(tp, target_dp, 3028 xfs_trans_ichgtime(tp, target_dp,
2905 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3029 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2907,7 +3031,7 @@ xfs_rename(
2907 if (new_parent && src_is_directory) { 3031 if (new_parent && src_is_directory) {
2908 error = xfs_bumplink(tp, target_dp); 3032 error = xfs_bumplink(tp, target_dp);
2909 if (error) 3033 if (error)
2910 goto abort_return; 3034 goto out_trans_abort;
2911 } 3035 }
2912 } else { /* target_ip != NULL */ 3036 } else { /* target_ip != NULL */
2913 /* 3037 /*
@@ -2922,7 +3046,7 @@ xfs_rename(
2922 if (!(xfs_dir_isempty(target_ip)) || 3046 if (!(xfs_dir_isempty(target_ip)) ||
2923 (target_ip->i_d.di_nlink > 2)) { 3047 (target_ip->i_d.di_nlink > 2)) {
2924 error = -EEXIST; 3048 error = -EEXIST;
2925 goto error_return; 3049 goto out_trans_cancel;
2926 } 3050 }
2927 } 3051 }
2928 3052
@@ -2939,7 +3063,7 @@ xfs_rename(
2939 src_ip->i_ino, 3063 src_ip->i_ino,
2940 &first_block, &free_list, spaceres); 3064 &first_block, &free_list, spaceres);
2941 if (error) 3065 if (error)
2942 goto abort_return; 3066 goto out_trans_abort;
2943 3067
2944 xfs_trans_ichgtime(tp, target_dp, 3068 xfs_trans_ichgtime(tp, target_dp,
2945 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3069 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2950,7 +3074,7 @@ xfs_rename(
2950 */ 3074 */
2951 error = xfs_droplink(tp, target_ip); 3075 error = xfs_droplink(tp, target_ip);
2952 if (error) 3076 if (error)
2953 goto abort_return; 3077 goto out_trans_abort;
2954 3078
2955 if (src_is_directory) { 3079 if (src_is_directory) {
2956 /* 3080 /*
@@ -2958,7 +3082,7 @@ xfs_rename(
2958 */ 3082 */
2959 error = xfs_droplink(tp, target_ip); 3083 error = xfs_droplink(tp, target_ip);
2960 if (error) 3084 if (error)
2961 goto abort_return; 3085 goto out_trans_abort;
2962 } 3086 }
2963 } /* target_ip != NULL */ 3087 } /* target_ip != NULL */
2964 3088
@@ -2975,7 +3099,7 @@ xfs_rename(
2975 &first_block, &free_list, spaceres); 3099 &first_block, &free_list, spaceres);
2976 ASSERT(error != -EEXIST); 3100 ASSERT(error != -EEXIST);
2977 if (error) 3101 if (error)
2978 goto abort_return; 3102 goto out_trans_abort;
2979 } 3103 }
2980 3104
2981 /* 3105 /*
@@ -3001,49 +3125,67 @@ xfs_rename(
3001 */ 3125 */
3002 error = xfs_droplink(tp, src_dp); 3126 error = xfs_droplink(tp, src_dp);
3003 if (error) 3127 if (error)
3004 goto abort_return; 3128 goto out_trans_abort;
3005 } 3129 }
3006 3130
3007 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3131 /*
3132 * For whiteouts, we only need to update the source dirent with the
3133 * inode number of the whiteout inode rather than removing it
3134 * altogether.
3135 */
3136 if (wip) {
3137 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
3008 &first_block, &free_list, spaceres); 3138 &first_block, &free_list, spaceres);
3139 } else
3140 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
3141 &first_block, &free_list, spaceres);
3009 if (error) 3142 if (error)
3010 goto abort_return; 3143 goto out_trans_abort;
3011
3012 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3013 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
3014 if (new_parent)
3015 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3016 3144
3017finish_rename:
3018 /* 3145 /*
3019 * If this is a synchronous mount, make sure that the 3146 * For whiteouts, we need to bump the link count on the whiteout inode.
3020 * rename transaction goes to disk before returning to 3147 * This means that failures all the way up to this point leave the inode
3021 * the user. 3148 * on the unlinked list and so cleanup is a simple matter of dropping
3149 * the remaining reference to it. If we fail here after bumping the link
3150 * count, we're shutting down the filesystem so we'll never see the
3151 * intermediate state on disk.
3022 */ 3152 */
3023 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3153 if (wip) {
3024 xfs_trans_set_sync(tp); 3154 ASSERT(wip->i_d.di_nlink == 0);
3025 } 3155 error = xfs_bumplink(tp, wip);
3156 if (error)
3157 goto out_trans_abort;
3158 error = xfs_iunlink_remove(tp, wip);
3159 if (error)
3160 goto out_trans_abort;
3161 xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
3026 3162
3027 error = xfs_bmap_finish(&tp, &free_list, &committed); 3163 /*
3028 if (error) { 3164 * Now we have a real link, clear the "I'm a tmpfile" state
3029 xfs_bmap_cancel(&free_list); 3165 * flag from the inode so it doesn't accidentally get misused in
3030 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3166 * future.
3031 XFS_TRANS_ABORT)); 3167 */
3032 goto std_return; 3168 VFS_I(wip)->i_state &= ~I_LINKABLE;
3033 } 3169 }
3034 3170
3035 /* 3171 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3036 * trans_commit will unlock src_ip, target_ip & decrement 3172 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
3037 * the vnode references. 3173 if (new_parent)
3038 */ 3174 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3039 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3040 3175
3041 abort_return: 3176 error = xfs_finish_rename(tp, &free_list);
3177 if (wip)
3178 IRELE(wip);
3179 return error;
3180
3181out_trans_abort:
3042 cancel_flags |= XFS_TRANS_ABORT; 3182 cancel_flags |= XFS_TRANS_ABORT;
3043 error_return: 3183out_bmap_cancel:
3044 xfs_bmap_cancel(&free_list); 3184 xfs_bmap_cancel(&free_list);
3185out_trans_cancel:
3045 xfs_trans_cancel(tp, cancel_flags); 3186 xfs_trans_cancel(tp, cancel_flags);
3046 std_return: 3187 if (wip)
3188 IRELE(wip);
3047 return error; 3189 return error;
3048} 3190}
3049 3191
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 86cd6b39bed7..8f22d20368d8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
56 struct xfs_inode_log_item *i_itemp; /* logging information */ 56 struct xfs_inode_log_item *i_itemp; /* logging information */
57 mrlock_t i_lock; /* inode lock */ 57 mrlock_t i_lock; /* inode lock */
58 mrlock_t i_iolock; /* inode IO lock */ 58 mrlock_t i_iolock; /* inode IO lock */
59 mrlock_t i_mmaplock; /* inode mmap IO lock */
59 atomic_t i_pincount; /* inode pin count */ 60 atomic_t i_pincount; /* inode pin count */
60 spinlock_t i_flags_lock; /* inode i_flags lock */ 61 spinlock_t i_flags_lock; /* inode i_flags lock */
61 /* Miscellaneous state. */ 62 /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
263#define XFS_IOLOCK_SHARED (1<<1) 264#define XFS_IOLOCK_SHARED (1<<1)
264#define XFS_ILOCK_EXCL (1<<2) 265#define XFS_ILOCK_EXCL (1<<2)
265#define XFS_ILOCK_SHARED (1<<3) 266#define XFS_ILOCK_SHARED (1<<3)
267#define XFS_MMAPLOCK_EXCL (1<<4)
268#define XFS_MMAPLOCK_SHARED (1<<5)
266 269
267#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 270#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
268 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 271 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
272 | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
269 273
270#define XFS_LOCK_FLAGS \ 274#define XFS_LOCK_FLAGS \
271 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ 275 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
272 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ 276 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
273 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ 277 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
274 { XFS_ILOCK_SHARED, "ILOCK_SHARED" } 278 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
279 { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
280 { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
275 281
276 282
277/* 283/*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
302#define XFS_IOLOCK_SHIFT 16 308#define XFS_IOLOCK_SHIFT 16
303#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 309#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
304 310
311#define XFS_MMAPLOCK_SHIFT 20
312
305#define XFS_ILOCK_SHIFT 24 313#define XFS_ILOCK_SHIFT 24
306#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 314#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
307#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) 315#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
308#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) 316#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
309 317
310#define XFS_IOLOCK_DEP_MASK 0x00ff0000 318#define XFS_IOLOCK_DEP_MASK 0x000f0000
319#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
311#define XFS_ILOCK_DEP_MASK 0xff000000 320#define XFS_ILOCK_DEP_MASK 0xff000000
312#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) 321#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
322 XFS_MMAPLOCK_DEP_MASK | \
323 XFS_ILOCK_DEP_MASK)
313 324
314#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) 325#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
315#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 326 >> XFS_IOLOCK_SHIFT)
327#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
328 >> XFS_MMAPLOCK_SHIFT)
329#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
330 >> XFS_ILOCK_SHIFT)
316 331
317/* 332/*
318 * For multiple groups support: if S_ISGID bit is set in the parent 333 * For multiple groups support: if S_ISGID bit is set in the parent
@@ -384,11 +399,34 @@ enum xfs_prealloc_flags {
384 XFS_PREALLOC_INVISIBLE = (1 << 4), 399 XFS_PREALLOC_INVISIBLE = (1 << 4),
385}; 400};
386 401
387int xfs_update_prealloc_flags(struct xfs_inode *, 402int xfs_update_prealloc_flags(struct xfs_inode *ip,
388 enum xfs_prealloc_flags); 403 enum xfs_prealloc_flags flags);
389int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); 404int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
390int xfs_iozero(struct xfs_inode *, loff_t, size_t); 405 xfs_fsize_t isize, bool *did_zeroing);
406int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
407
391 408
409/* from xfs_iops.c */
410/*
411 * When setting up a newly allocated inode, we need to call
412 * xfs_finish_inode_setup() once the inode is fully instantiated at
413 * the VFS level to prevent the rest of the world seeing the inode
414 * before we've completed instantiation. Otherwise we can do it
415 * the moment the inode lookup is complete.
416 */
417extern void xfs_setup_inode(struct xfs_inode *ip);
418static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
419{
420 xfs_iflags_clear(ip, XFS_INEW);
421 barrier();
422 unlock_new_inode(VFS_I(ip));
423}
424
425static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
426{
427 xfs_setup_inode(ip);
428 xfs_finish_inode_setup(ip);
429}
392 430
393#define IHOLD(ip) \ 431#define IHOLD(ip) \
394do { \ 432do { \
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ac4feae45eb3..5f4a396f5186 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -631,7 +631,7 @@ xfs_ioc_space(
631 631
632 if (filp->f_flags & O_DSYNC) 632 if (filp->f_flags & O_DSYNC)
633 flags |= XFS_PREALLOC_SYNC; 633 flags |= XFS_PREALLOC_SYNC;
634 if (ioflags & XFS_IO_INVIS) 634 if (ioflags & XFS_IO_INVIS)
635 flags |= XFS_PREALLOC_INVISIBLE; 635 flags |= XFS_PREALLOC_INVISIBLE;
636 636
637 error = mnt_want_write_file(filp); 637 error = mnt_want_write_file(filp);
@@ -639,10 +639,13 @@ xfs_ioc_space(
639 return error; 639 return error;
640 640
641 xfs_ilock(ip, iolock); 641 xfs_ilock(ip, iolock);
642 error = xfs_break_layouts(inode, &iolock); 642 error = xfs_break_layouts(inode, &iolock, false);
643 if (error) 643 if (error)
644 goto out_unlock; 644 goto out_unlock;
645 645
646 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
647 iolock |= XFS_MMAPLOCK_EXCL;
648
646 switch (bf->l_whence) { 649 switch (bf->l_whence) {
647 case 0: /*SEEK_SET*/ 650 case 0: /*SEEK_SET*/
648 break; 651 break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ccb1dd0d509e..38e633bad8c2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
460 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), 460 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
461 alloc_blocks); 461 alloc_blocks);
462 462
463 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 463 freesp = percpu_counter_read_positive(&mp->m_fdblocks);
464 freesp = mp->m_sb.sb_fdblocks;
465 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 464 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
466 shift = 2; 465 shift = 2;
467 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 466 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d919ad7b16bf..2f1839e4dd1b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -187,6 +187,8 @@ xfs_generic_create(
187 else 187 else
188 d_instantiate(dentry, inode); 188 d_instantiate(dentry, inode);
189 189
190 xfs_finish_inode_setup(ip);
191
190 out_free_acl: 192 out_free_acl:
191 if (default_acl) 193 if (default_acl)
192 posix_acl_release(default_acl); 194 posix_acl_release(default_acl);
@@ -195,6 +197,7 @@ xfs_generic_create(
195 return error; 197 return error;
196 198
197 out_cleanup_inode: 199 out_cleanup_inode:
200 xfs_finish_inode_setup(ip);
198 if (!tmpfile) 201 if (!tmpfile)
199 xfs_cleanup_inode(dir, inode, dentry); 202 xfs_cleanup_inode(dir, inode, dentry);
200 iput(inode); 203 iput(inode);
@@ -367,9 +370,11 @@ xfs_vn_symlink(
367 goto out_cleanup_inode; 370 goto out_cleanup_inode;
368 371
369 d_instantiate(dentry, inode); 372 d_instantiate(dentry, inode);
373 xfs_finish_inode_setup(cip);
370 return 0; 374 return 0;
371 375
372 out_cleanup_inode: 376 out_cleanup_inode:
377 xfs_finish_inode_setup(cip);
373 xfs_cleanup_inode(dir, inode, dentry); 378 xfs_cleanup_inode(dir, inode, dentry);
374 iput(inode); 379 iput(inode);
375 out: 380 out:
@@ -389,7 +394,7 @@ xfs_vn_rename(
389 struct xfs_name oname; 394 struct xfs_name oname;
390 struct xfs_name nname; 395 struct xfs_name nname;
391 396
392 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 397 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
393 return -EINVAL; 398 return -EINVAL;
394 399
395 /* if we are exchanging files, we need to set i_mode of both files */ 400 /* if we are exchanging files, we need to set i_mode of both files */
@@ -751,6 +756,7 @@ xfs_setattr_size(
751 int error; 756 int error;
752 uint lock_flags = 0; 757 uint lock_flags = 0;
753 uint commit_flags = 0; 758 uint commit_flags = 0;
759 bool did_zeroing = false;
754 760
755 trace_xfs_setattr(ip); 761 trace_xfs_setattr(ip);
756 762
@@ -765,6 +771,7 @@ xfs_setattr_size(
765 return error; 771 return error;
766 772
767 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 773 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
774 ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
768 ASSERT(S_ISREG(ip->i_d.di_mode)); 775 ASSERT(S_ISREG(ip->i_d.di_mode));
769 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 776 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
770 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); 777 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -794,20 +801,16 @@ xfs_setattr_size(
794 return error; 801 return error;
795 802
796 /* 803 /*
797 * Now we can make the changes. Before we join the inode to the 804 * File data changes must be complete before we start the transaction to
798 * transaction, take care of the part of the truncation that must be 805 * modify the inode. This needs to be done before joining the inode to
799 * done without the inode lock. This needs to be done before joining 806 * the transaction because the inode cannot be unlocked once it is a
800 * the inode to the transaction, because the inode cannot be unlocked 807 * part of the transaction.
801 * once it is a part of the transaction. 808 *
809 * Start with zeroing any data block beyond EOF that we may expose on
810 * file extension.
802 */ 811 */
803 if (newsize > oldsize) { 812 if (newsize > oldsize) {
804 /* 813 error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
805 * Do the first part of growing a file: zero any data in the
806 * last block that is beyond the old EOF. We need to do this
807 * before the inode is joined to the transaction to modify
808 * i_size.
809 */
810 error = xfs_zero_eof(ip, newsize, oldsize);
811 if (error) 814 if (error)
812 return error; 815 return error;
813 } 816 }
@@ -817,75 +820,42 @@ xfs_setattr_size(
817 * any previous writes that are beyond the on disk EOF and the new 820 * any previous writes that are beyond the on disk EOF and the new
818 * EOF that have not been written out need to be written here. If we 821 * EOF that have not been written out need to be written here. If we
819 * do not write the data out, we expose ourselves to the null files 822 * do not write the data out, we expose ourselves to the null files
820 * problem. 823 * problem. Note that this includes any block zeroing we did above;
821 * 824 * otherwise those blocks may not be zeroed after a crash.
822 * Only flush from the on disk size to the smaller of the in memory
823 * file size or the new size as that's the range we really care about
824 * here and prevents waiting for other data not within the range we
825 * care about here.
826 */ 825 */
827 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { 826 if (newsize > ip->i_d.di_size &&
827 (oldsize != ip->i_d.di_size || did_zeroing)) {
828 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 828 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
829 ip->i_d.di_size, newsize); 829 ip->i_d.di_size, newsize);
830 if (error) 830 if (error)
831 return error; 831 return error;
832 } 832 }
833 833
834 /* 834 /* Now wait for all direct I/O to complete. */
835 * Wait for all direct I/O to complete.
836 */
837 inode_dio_wait(inode); 835 inode_dio_wait(inode);
838 836
839 /* 837 /*
840 * Do all the page cache truncate work outside the transaction context 838 * We've already locked out new page faults, so now we can safely remove
841 * as the "lock" order is page lock->log space reservation. i.e. 839 * pages from the page cache knowing they won't get refaulted until we
842 * locking pages inside the transaction can ABBA deadlock with 840 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
843 * writeback. We have to do the VFS inode size update before we truncate 841 * complete. The truncate_setsize() call also cleans partial EOF page
844 * the pagecache, however, to avoid racing with page faults beyond the 842 * PTEs on extending truncates and hence ensures sub-page block size
845 * new EOF they are not serialised against truncate operations except by 843 * filesystems are correctly handled, too.
846 * page locks and size updates.
847 * 844 *
848 * Hence we are in a situation where a truncate can fail with ENOMEM 845 * We have to do all the page cache truncate work outside the
849 * from xfs_trans_reserve(), but having already truncated the in-memory 846 * transaction context as the "lock" order is page lock->log space
850 * version of the file (i.e. made user visible changes). There's not 847 * reservation as defined by extent allocation in the writeback path.
851 * much we can do about this, except to hope that the caller sees ENOMEM 848 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
852 * and retries the truncate operation. 849 * having already truncated the in-memory version of the file (i.e. made
850 * user visible changes). There's not much we can do about this, except
851 * to hope that the caller sees ENOMEM and retries the truncate
852 * operation.
853 */ 853 */
854 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 854 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
855 if (error) 855 if (error)
856 return error; 856 return error;
857 truncate_setsize(inode, newsize); 857 truncate_setsize(inode, newsize);
858 858
859 /*
860 * The "we can't serialise against page faults" pain gets worse.
861 *
862 * If the file is mapped then we have to clean the page at the old EOF
863 * when extending the file. Extending the file can expose changes the
864 * underlying page mapping (e.g. from beyond EOF to a hole or
865 * unwritten), and so on the next attempt to write to that page we need
866 * to remap it for write. i.e. we need .page_mkwrite() to be called.
867 * Hence we need to clean the page to clean the pte and so a new write
868 * fault will be triggered appropriately.
869 *
870 * If we do it before we change the inode size, then we can race with a
871 * page fault that maps the page with exactly the same problem. If we do
872 * it after we change the file size, then a new page fault can come in
873 * and allocate space before we've run the rest of the truncate
874 * transaction. That's kinda grotesque, but it's better than have data
875 * over a hole, and so that's the lesser evil that has been chosen here.
876 *
877 * The real solution, however, is to have some mechanism for locking out
878 * page faults while a truncate is in progress.
879 */
880 if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
881 error = filemap_write_and_wait_range(
882 VFS_I(ip)->i_mapping,
883 round_down(oldsize, PAGE_CACHE_SIZE),
884 round_up(oldsize, PAGE_CACHE_SIZE) - 1);
885 if (error)
886 return error;
887 }
888
889 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 859 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
890 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 860 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
891 if (error) 861 if (error)
@@ -983,9 +953,13 @@ xfs_vn_setattr(
983 uint iolock = XFS_IOLOCK_EXCL; 953 uint iolock = XFS_IOLOCK_EXCL;
984 954
985 xfs_ilock(ip, iolock); 955 xfs_ilock(ip, iolock);
986 error = xfs_break_layouts(dentry->d_inode, &iolock); 956 error = xfs_break_layouts(dentry->d_inode, &iolock, true);
987 if (!error) 957 if (!error) {
958 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
959 iolock |= XFS_MMAPLOCK_EXCL;
960
988 error = xfs_setattr_size(ip, iattr); 961 error = xfs_setattr_size(ip, iattr);
962 }
989 xfs_iunlock(ip, iolock); 963 xfs_iunlock(ip, iolock);
990 } else { 964 } else {
991 error = xfs_setattr_nonsize(ip, iattr, 0); 965 error = xfs_setattr_nonsize(ip, iattr, 0);
@@ -1236,16 +1210,12 @@ xfs_diflags_to_iflags(
1236} 1210}
1237 1211
1238/* 1212/*
1239 * Initialize the Linux inode, set up the operation vectors and 1213 * Initialize the Linux inode and set up the operation vectors.
1240 * unlock the inode.
1241 *
1242 * When reading existing inodes from disk this is called directly
1243 * from xfs_iget, when creating a new inode it is called from
1244 * xfs_ialloc after setting up the inode.
1245 * 1214 *
1246 * We are always called with an uninitialised linux inode here. 1215 * When reading existing inodes from disk this is called directly from xfs_iget,
1247 * We need to initialise the necessary fields and take a reference 1216 * when creating a new inode it is called from xfs_ialloc after setting up the
1248 * on it. 1217 * inode. These callers have different criteria for clearing XFS_INEW, so leave
1218 * it up to the caller to deal with unlocking the inode appropriately.
1249 */ 1219 */
1250void 1220void
1251xfs_setup_inode( 1221xfs_setup_inode(
@@ -1332,9 +1302,4 @@ xfs_setup_inode(
1332 inode_has_no_xattr(inode); 1302 inode_has_no_xattr(inode);
1333 cache_no_acl(inode); 1303 cache_no_acl(inode);
1334 } 1304 }
1335
1336 xfs_iflags_clear(ip, XFS_INEW);
1337 barrier();
1338
1339 unlock_new_inode(inode);
1340} 1305}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index ea7a98e9cb70..a0f84abb0d09 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations;
25 25
26extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); 26extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
27 27
28extern void xfs_setup_inode(struct xfs_inode *);
29
30/* 28/*
31 * Internal setattr interfaces. 29 * Internal setattr interfaces.
32 */ 30 */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 82e314258f73..80429891dc9b 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk(
229 error = xfs_inobt_get_rec(cur, irec, &stat); 229 error = xfs_inobt_get_rec(cur, irec, &stat);
230 if (error) 230 if (error)
231 return error; 231 return error;
232 XFS_WANT_CORRUPTED_RETURN(stat == 1); 232 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
233 233
234 /* Check if the record contains the inode in request */ 234 /* Check if the record contains the inode in request */
235 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { 235 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c31d2c2eadc4..7c7842c85a08 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
116#undef XFS_NATIVE_HOST 116#undef XFS_NATIVE_HOST
117#endif 117#endif
118 118
119/*
120 * Feature macros (disable/enable)
121 */
122#ifdef CONFIG_SMP
123#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
124#else
125#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
126#endif
127
128#define irix_sgid_inherit xfs_params.sgid_inherit.val 119#define irix_sgid_inherit xfs_params.sgid_inherit.val
129#define irix_symlink_mode xfs_params.symlink_mode.val 120#define irix_symlink_mode xfs_params.symlink_mode.val
130#define xfs_panic_mask xfs_params.panic_mask.val 121#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a5a945fc3bdc..4f5784f85a5b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
4463 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 4463 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
4464 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 4464 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
4465 ASSERT(xfs_sb_good_version(sbp)); 4465 ASSERT(xfs_sb_good_version(sbp));
4466 xfs_reinit_percpu_counters(log->l_mp);
4467
4466 xfs_buf_relse(bp); 4468 xfs_buf_relse(bp);
4467 4469
4468 /* We've re-read the superblock so re-initialize per-cpu counters */
4469 xfs_icsb_reinit_counters(log->l_mp);
4470 4470
4471 xlog_recover_check_summary(log); 4471 xlog_recover_check_summary(log);
4472 4472
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e63eea2..2ce7ee3b4ec1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
43#include "xfs_sysfs.h" 43#include "xfs_sysfs.h"
44 44
45 45
46#ifdef HAVE_PERCPU_SB
47STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
48 int);
49STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
50 int);
51STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
52#else
53
54#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
55#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
56#endif
57
58static DEFINE_MUTEX(xfs_uuid_table_mutex); 46static DEFINE_MUTEX(xfs_uuid_table_mutex);
59static int xfs_uuid_table_size; 47static int xfs_uuid_table_size;
60static uuid_t *xfs_uuid_table; 48static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
347 goto reread; 335 goto reread;
348 } 336 }
349 337
350 /* Initialize per-cpu counters */ 338 xfs_reinit_percpu_counters(mp);
351 xfs_icsb_reinit_counters(mp);
352 339
353 /* no need to be quiet anymore, so reset the buf ops */ 340 /* no need to be quiet anymore, so reset the buf ops */
354 bp->b_ops = &xfs_sb_buf_ops; 341 bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
1087 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) 1074 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1088 return 0; 1075 return 0;
1089 1076
1090 xfs_icsb_sync_counters(mp, 0);
1091
1092 /* 1077 /*
1093 * we don't need to do this if we are updating the superblock 1078 * we don't need to do this if we are updating the superblock
1094 * counters on every modification. 1079 * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
1099 return xfs_sync_sb(mp, true); 1084 return xfs_sync_sb(mp, true);
1100} 1085}
1101 1086
1102/* 1087int
1103 * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply 1088xfs_mod_icount(
1104 * a delta to a specified field in the in-core superblock. Simply 1089 struct xfs_mount *mp,
1105 * switch on the field indicated and apply the delta to that field. 1090 int64_t delta)
1106 * Fields are not allowed to dip below zero, so if the delta would
1107 * do this do not apply it and return EINVAL.
1108 *
1109 * The m_sb_lock must be held when this routine is called.
1110 */
1111STATIC int
1112xfs_mod_incore_sb_unlocked(
1113 xfs_mount_t *mp,
1114 xfs_sb_field_t field,
1115 int64_t delta,
1116 int rsvd)
1117{ 1091{
1118 int scounter; /* short counter for 32 bit fields */ 1092 /* deltas are +/-64, hence the large batch size of 128. */
1119 long long lcounter; /* long counter for 64 bit fields */ 1093 __percpu_counter_add(&mp->m_icount, delta, 128);
1120 long long res_used, rem; 1094 if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
1121
1122 /*
1123 * With the in-core superblock spin lock held, switch
1124 * on the indicated field. Apply the delta to the
1125 * proper field. If the fields value would dip below
1126 * 0, then do not apply the delta and return EINVAL.
1127 */
1128 switch (field) {
1129 case XFS_SBS_ICOUNT:
1130 lcounter = (long long)mp->m_sb.sb_icount;
1131 lcounter += delta;
1132 if (lcounter < 0) {
1133 ASSERT(0);
1134 return -EINVAL;
1135 }
1136 mp->m_sb.sb_icount = lcounter;
1137 return 0;
1138 case XFS_SBS_IFREE:
1139 lcounter = (long long)mp->m_sb.sb_ifree;
1140 lcounter += delta;
1141 if (lcounter < 0) {
1142 ASSERT(0);
1143 return -EINVAL;
1144 }
1145 mp->m_sb.sb_ifree = lcounter;
1146 return 0;
1147 case XFS_SBS_FDBLOCKS:
1148 lcounter = (long long)
1149 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1150 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1151
1152 if (delta > 0) { /* Putting blocks back */
1153 if (res_used > delta) {
1154 mp->m_resblks_avail += delta;
1155 } else {
1156 rem = delta - res_used;
1157 mp->m_resblks_avail = mp->m_resblks;
1158 lcounter += rem;
1159 }
1160 } else { /* Taking blocks away */
1161 lcounter += delta;
1162 if (lcounter >= 0) {
1163 mp->m_sb.sb_fdblocks = lcounter +
1164 XFS_ALLOC_SET_ASIDE(mp);
1165 return 0;
1166 }
1167
1168 /*
1169 * We are out of blocks, use any available reserved
1170 * blocks if were allowed to.
1171 */
1172 if (!rsvd)
1173 return -ENOSPC;
1174
1175 lcounter = (long long)mp->m_resblks_avail + delta;
1176 if (lcounter >= 0) {
1177 mp->m_resblks_avail = lcounter;
1178 return 0;
1179 }
1180 printk_once(KERN_WARNING
1181 "Filesystem \"%s\": reserve blocks depleted! "
1182 "Consider increasing reserve pool size.",
1183 mp->m_fsname);
1184 return -ENOSPC;
1185 }
1186
1187 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1188 return 0;
1189 case XFS_SBS_FREXTENTS:
1190 lcounter = (long long)mp->m_sb.sb_frextents;
1191 lcounter += delta;
1192 if (lcounter < 0) {
1193 return -ENOSPC;
1194 }
1195 mp->m_sb.sb_frextents = lcounter;
1196 return 0;
1197 case XFS_SBS_DBLOCKS:
1198 lcounter = (long long)mp->m_sb.sb_dblocks;
1199 lcounter += delta;
1200 if (lcounter < 0) {
1201 ASSERT(0);
1202 return -EINVAL;
1203 }
1204 mp->m_sb.sb_dblocks = lcounter;
1205 return 0;
1206 case XFS_SBS_AGCOUNT:
1207 scounter = mp->m_sb.sb_agcount;
1208 scounter += delta;
1209 if (scounter < 0) {
1210 ASSERT(0);
1211 return -EINVAL;
1212 }
1213 mp->m_sb.sb_agcount = scounter;
1214 return 0;
1215 case XFS_SBS_IMAX_PCT:
1216 scounter = mp->m_sb.sb_imax_pct;
1217 scounter += delta;
1218 if (scounter < 0) {
1219 ASSERT(0);
1220 return -EINVAL;
1221 }
1222 mp->m_sb.sb_imax_pct = scounter;
1223 return 0;
1224 case XFS_SBS_REXTSIZE:
1225 scounter = mp->m_sb.sb_rextsize;
1226 scounter += delta;
1227 if (scounter < 0) {
1228 ASSERT(0);
1229 return -EINVAL;
1230 }
1231 mp->m_sb.sb_rextsize = scounter;
1232 return 0;
1233 case XFS_SBS_RBMBLOCKS:
1234 scounter = mp->m_sb.sb_rbmblocks;
1235 scounter += delta;
1236 if (scounter < 0) {
1237 ASSERT(0);
1238 return -EINVAL;
1239 }
1240 mp->m_sb.sb_rbmblocks = scounter;
1241 return 0;
1242 case XFS_SBS_RBLOCKS:
1243 lcounter = (long long)mp->m_sb.sb_rblocks;
1244 lcounter += delta;
1245 if (lcounter < 0) {
1246 ASSERT(0);
1247 return -EINVAL;
1248 }
1249 mp->m_sb.sb_rblocks = lcounter;
1250 return 0;
1251 case XFS_SBS_REXTENTS:
1252 lcounter = (long long)mp->m_sb.sb_rextents;
1253 lcounter += delta;
1254 if (lcounter < 0) {
1255 ASSERT(0);
1256 return -EINVAL;
1257 }
1258 mp->m_sb.sb_rextents = lcounter;
1259 return 0;
1260 case XFS_SBS_REXTSLOG:
1261 scounter = mp->m_sb.sb_rextslog;
1262 scounter += delta;
1263 if (scounter < 0) {
1264 ASSERT(0);
1265 return -EINVAL;
1266 }
1267 mp->m_sb.sb_rextslog = scounter;
1268 return 0;
1269 default:
1270 ASSERT(0); 1095 ASSERT(0);
1096 percpu_counter_add(&mp->m_icount, -delta);
1271 return -EINVAL; 1097 return -EINVAL;
1272 } 1098 }
1099 return 0;
1273} 1100}
1274 1101
1275/*
1276 * xfs_mod_incore_sb() is used to change a field in the in-core
1277 * superblock structure by the specified delta. This modification
1278 * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked()
1279 * routine to do the work.
1280 */
1281int 1102int
1282xfs_mod_incore_sb( 1103xfs_mod_ifree(
1283 struct xfs_mount *mp, 1104 struct xfs_mount *mp,
1284 xfs_sb_field_t field, 1105 int64_t delta)
1285 int64_t delta,
1286 int rsvd)
1287{ 1106{
1288 int status; 1107 percpu_counter_add(&mp->m_ifree, delta);
1289 1108 if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1290#ifdef HAVE_PERCPU_SB 1109 ASSERT(0);
1291 ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); 1110 percpu_counter_add(&mp->m_ifree, -delta);
1292#endif 1111 return -EINVAL;
1293 spin_lock(&mp->m_sb_lock); 1112 }
1294 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1113 return 0;
1295 spin_unlock(&mp->m_sb_lock);
1296
1297 return status;
1298} 1114}
1299 1115
1300/*
1301 * Change more than one field in the in-core superblock structure at a time.
1302 *
1303 * The fields and changes to those fields are specified in the array of
1304 * xfs_mod_sb structures passed in. Either all of the specified deltas
1305 * will be applied or none of them will. If any modified field dips below 0,
1306 * then all modifications will be backed out and EINVAL will be returned.
1307 *
1308 * Note that this function may not be used for the superblock values that
1309 * are tracked with the in-memory per-cpu counters - a direct call to
1310 * xfs_icsb_modify_counters is required for these.
1311 */
1312int 1116int
1313xfs_mod_incore_sb_batch( 1117xfs_mod_fdblocks(
1314 struct xfs_mount *mp, 1118 struct xfs_mount *mp,
1315 xfs_mod_sb_t *msb, 1119 int64_t delta,
1316 uint nmsb, 1120 bool rsvd)
1317 int rsvd)
1318{ 1121{
1319 xfs_mod_sb_t *msbp; 1122 int64_t lcounter;
1320 int error = 0; 1123 long long res_used;
1124 s32 batch;
1125
1126 if (delta > 0) {
1127 /*
1128 * If the reserve pool is depleted, put blocks back into it
1129 * first. Most of the time the pool is full.
1130 */
1131 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1132 percpu_counter_add(&mp->m_fdblocks, delta);
1133 return 0;
1134 }
1135
1136 spin_lock(&mp->m_sb_lock);
1137 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1138
1139 if (res_used > delta) {
1140 mp->m_resblks_avail += delta;
1141 } else {
1142 delta -= res_used;
1143 mp->m_resblks_avail = mp->m_resblks;
1144 percpu_counter_add(&mp->m_fdblocks, delta);
1145 }
1146 spin_unlock(&mp->m_sb_lock);
1147 return 0;
1148 }
1321 1149
1322 /* 1150 /*
1323 * Loop through the array of mod structures and apply each individually. 1151 * Taking blocks away, need to be more accurate the closer we
1324 * If any fail, then back out all those which have already been applied. 1152 * are to zero.
1325 * Do all of this within the scope of the m_sb_lock so that all of the 1153 *
1326 * changes will be atomic. 1154 * batch size is set to a maximum of 1024 blocks - if we are
1155 * allocating of freeing extents larger than this then we aren't
1156 * going to be hammering the counter lock so a lock per update
1157 * is not a problem.
1158 *
1159 * If the counter has a value of less than 2 * max batch size,
1160 * then make everything serialise as we are real close to
1161 * ENOSPC.
1162 */
1163#define __BATCH 1024
1164 if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
1165 batch = 1;
1166 else
1167 batch = __BATCH;
1168#undef __BATCH
1169
1170 __percpu_counter_add(&mp->m_fdblocks, delta, batch);
1171 if (percpu_counter_compare(&mp->m_fdblocks,
1172 XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
1173 /* we had space! */
1174 return 0;
1175 }
1176
1177 /*
1178 * lock up the sb for dipping into reserves before releasing the space
1179 * that took us to ENOSPC.
1327 */ 1180 */
1328 spin_lock(&mp->m_sb_lock); 1181 spin_lock(&mp->m_sb_lock);
1329 for (msbp = msb; msbp < (msb + nmsb); msbp++) { 1182 percpu_counter_add(&mp->m_fdblocks, -delta);
1330 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1183 if (!rsvd)
1331 msbp->msb_field > XFS_SBS_FDBLOCKS); 1184 goto fdblocks_enospc;
1332 1185
1333 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1186 lcounter = (long long)mp->m_resblks_avail + delta;
1334 msbp->msb_delta, rsvd); 1187 if (lcounter >= 0) {
1335 if (error) 1188 mp->m_resblks_avail = lcounter;
1336 goto unwind; 1189 spin_unlock(&mp->m_sb_lock);
1190 return 0;
1337 } 1191 }
1192 printk_once(KERN_WARNING
1193 "Filesystem \"%s\": reserve blocks depleted! "
1194 "Consider increasing reserve pool size.",
1195 mp->m_fsname);
1196fdblocks_enospc:
1338 spin_unlock(&mp->m_sb_lock); 1197 spin_unlock(&mp->m_sb_lock);
1339 return 0; 1198 return -ENOSPC;
1199}
1340 1200
1341unwind: 1201int
1342 while (--msbp >= msb) { 1202xfs_mod_frextents(
1343 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1203 struct xfs_mount *mp,
1344 -msbp->msb_delta, rsvd); 1204 int64_t delta)
1345 ASSERT(error == 0); 1205{
1346 } 1206 int64_t lcounter;
1207 int ret = 0;
1208
1209 spin_lock(&mp->m_sb_lock);
1210 lcounter = mp->m_sb.sb_frextents + delta;
1211 if (lcounter < 0)
1212 ret = -ENOSPC;
1213 else
1214 mp->m_sb.sb_frextents = lcounter;
1347 spin_unlock(&mp->m_sb_lock); 1215 spin_unlock(&mp->m_sb_lock);
1348 return error; 1216 return ret;
1349} 1217}
1350 1218
1351/* 1219/*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
1407 } 1275 }
1408 return 0; 1276 return 0;
1409} 1277}
1410
1411#ifdef HAVE_PERCPU_SB
1412/*
1413 * Per-cpu incore superblock counters
1414 *
1415 * Simple concept, difficult implementation
1416 *
1417 * Basically, replace the incore superblock counters with a distributed per cpu
1418 * counter for contended fields (e.g. free block count).
1419 *
1420 * Difficulties arise in that the incore sb is used for ENOSPC checking, and
1421 * hence needs to be accurately read when we are running low on space. Hence
1422 * there is a method to enable and disable the per-cpu counters based on how
1423 * much "stuff" is available in them.
1424 *
1425 * Basically, a counter is enabled if there is enough free resource to justify
1426 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1427 * ENOSPC), then we disable the counters to synchronise all callers and
1428 * re-distribute the available resources.
1429 *
1430 * If, once we redistributed the available resources, we still get a failure,
1431 * we disable the per-cpu counter and go through the slow path.
1432 *
1433 * The slow path is the current xfs_mod_incore_sb() function. This means that
1434 * when we disable a per-cpu counter, we need to drain its resources back to
1435 * the global superblock. We do this after disabling the counter to prevent
1436 * more threads from queueing up on the counter.
1437 *
1438 * Essentially, this means that we still need a lock in the fast path to enable
1439 * synchronisation between the global counters and the per-cpu counters. This
1440 * is not a problem because the lock will be local to a CPU almost all the time
1441 * and have little contention except when we get to ENOSPC conditions.
1442 *
1443 * Basically, this lock becomes a barrier that enables us to lock out the fast
1444 * path while we do things like enabling and disabling counters and
1445 * synchronising the counters.
1446 *
1447 * Locking rules:
1448 *
1449 * 1. m_sb_lock before picking up per-cpu locks
1450 * 2. per-cpu locks always picked up via for_each_online_cpu() order
1451 * 3. accurate counter sync requires m_sb_lock + per cpu locks
1452 * 4. modifying per-cpu counters requires holding per-cpu lock
1453 * 5. modifying global counters requires holding m_sb_lock
1454 * 6. enabling or disabling a counter requires holding the m_sb_lock
1455 * and _none_ of the per-cpu locks.
1456 *
1457 * Disabled counters are only ever re-enabled by a balance operation
1458 * that results in more free resources per CPU than a given threshold.
1459 * To ensure counters don't remain disabled, they are rebalanced when
1460 * the global resource goes above a higher threshold (i.e. some hysteresis
1461 * is present to prevent thrashing).
1462 */
1463
1464#ifdef CONFIG_HOTPLUG_CPU
1465/*
1466 * hot-plug CPU notifier support.
1467 *
1468 * We need a notifier per filesystem as we need to be able to identify
1469 * the filesystem to balance the counters out. This is achieved by
1470 * having a notifier block embedded in the xfs_mount_t and doing pointer
1471 * magic to get the mount pointer from the notifier block address.
1472 */
1473STATIC int
1474xfs_icsb_cpu_notify(
1475 struct notifier_block *nfb,
1476 unsigned long action,
1477 void *hcpu)
1478{
1479 xfs_icsb_cnts_t *cntp;
1480 xfs_mount_t *mp;
1481
1482 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
1483 cntp = (xfs_icsb_cnts_t *)
1484 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1485 switch (action) {
1486 case CPU_UP_PREPARE:
1487 case CPU_UP_PREPARE_FROZEN:
1488 /* Easy Case - initialize the area and locks, and
1489 * then rebalance when online does everything else for us. */
1490 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1491 break;
1492 case CPU_ONLINE:
1493 case CPU_ONLINE_FROZEN:
1494 xfs_icsb_lock(mp);
1495 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1496 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1497 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1498 xfs_icsb_unlock(mp);
1499 break;
1500 case CPU_DEAD:
1501 case CPU_DEAD_FROZEN:
1502 /* Disable all the counters, then fold the dead cpu's
1503 * count into the total on the global superblock and
1504 * re-enable the counters. */
1505 xfs_icsb_lock(mp);
1506 spin_lock(&mp->m_sb_lock);
1507 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
1508 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
1509 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
1510
1511 mp->m_sb.sb_icount += cntp->icsb_icount;
1512 mp->m_sb.sb_ifree += cntp->icsb_ifree;
1513 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
1514
1515 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1516
1517 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
1518 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
1519 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
1520 spin_unlock(&mp->m_sb_lock);
1521 xfs_icsb_unlock(mp);
1522 break;
1523 }
1524
1525 return NOTIFY_OK;
1526}
1527#endif /* CONFIG_HOTPLUG_CPU */
1528
1529int
1530xfs_icsb_init_counters(
1531 xfs_mount_t *mp)
1532{
1533 xfs_icsb_cnts_t *cntp;
1534 int i;
1535
1536 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1537 if (mp->m_sb_cnts == NULL)
1538 return -ENOMEM;
1539
1540 for_each_online_cpu(i) {
1541 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1542 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1543 }
1544
1545 mutex_init(&mp->m_icsb_mutex);
1546
1547 /*
1548 * start with all counters disabled so that the
1549 * initial balance kicks us off correctly
1550 */
1551 mp->m_icsb_counters = -1;
1552
1553#ifdef CONFIG_HOTPLUG_CPU
1554 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1555 mp->m_icsb_notifier.priority = 0;
1556 register_hotcpu_notifier(&mp->m_icsb_notifier);
1557#endif /* CONFIG_HOTPLUG_CPU */
1558
1559 return 0;
1560}
1561
1562void
1563xfs_icsb_reinit_counters(
1564 xfs_mount_t *mp)
1565{
1566 xfs_icsb_lock(mp);
1567 /*
1568 * start with all counters disabled so that the
1569 * initial balance kicks us off correctly
1570 */
1571 mp->m_icsb_counters = -1;
1572 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1573 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1574 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1575 xfs_icsb_unlock(mp);
1576}
1577
1578void
1579xfs_icsb_destroy_counters(
1580 xfs_mount_t *mp)
1581{
1582 if (mp->m_sb_cnts) {
1583 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1584 free_percpu(mp->m_sb_cnts);
1585 }
1586 mutex_destroy(&mp->m_icsb_mutex);
1587}
1588
1589STATIC void
1590xfs_icsb_lock_cntr(
1591 xfs_icsb_cnts_t *icsbp)
1592{
1593 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
1594 ndelay(1000);
1595 }
1596}
1597
1598STATIC void
1599xfs_icsb_unlock_cntr(
1600 xfs_icsb_cnts_t *icsbp)
1601{
1602 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
1603}
1604
1605
1606STATIC void
1607xfs_icsb_lock_all_counters(
1608 xfs_mount_t *mp)
1609{
1610 xfs_icsb_cnts_t *cntp;
1611 int i;
1612
1613 for_each_online_cpu(i) {
1614 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1615 xfs_icsb_lock_cntr(cntp);
1616 }
1617}
1618
1619STATIC void
1620xfs_icsb_unlock_all_counters(
1621 xfs_mount_t *mp)
1622{
1623 xfs_icsb_cnts_t *cntp;
1624 int i;
1625
1626 for_each_online_cpu(i) {
1627 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1628 xfs_icsb_unlock_cntr(cntp);
1629 }
1630}
1631
1632STATIC void
1633xfs_icsb_count(
1634 xfs_mount_t *mp,
1635 xfs_icsb_cnts_t *cnt,
1636 int flags)
1637{
1638 xfs_icsb_cnts_t *cntp;
1639 int i;
1640
1641 memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1642
1643 if (!(flags & XFS_ICSB_LAZY_COUNT))
1644 xfs_icsb_lock_all_counters(mp);
1645
1646 for_each_online_cpu(i) {
1647 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1648 cnt->icsb_icount += cntp->icsb_icount;
1649 cnt->icsb_ifree += cntp->icsb_ifree;
1650 cnt->icsb_fdblocks += cntp->icsb_fdblocks;
1651 }
1652
1653 if (!(flags & XFS_ICSB_LAZY_COUNT))
1654 xfs_icsb_unlock_all_counters(mp);
1655}
1656
1657STATIC int
1658xfs_icsb_counter_disabled(
1659 xfs_mount_t *mp,
1660 xfs_sb_field_t field)
1661{
1662 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1663 return test_bit(field, &mp->m_icsb_counters);
1664}
1665
1666STATIC void
1667xfs_icsb_disable_counter(
1668 xfs_mount_t *mp,
1669 xfs_sb_field_t field)
1670{
1671 xfs_icsb_cnts_t cnt;
1672
1673 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1674
1675 /*
1676 * If we are already disabled, then there is nothing to do
1677 * here. We check before locking all the counters to avoid
1678 * the expensive lock operation when being called in the
1679 * slow path and the counter is already disabled. This is
1680 * safe because the only time we set or clear this state is under
1681 * the m_icsb_mutex.
1682 */
1683 if (xfs_icsb_counter_disabled(mp, field))
1684 return;
1685
1686 xfs_icsb_lock_all_counters(mp);
1687 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1688 /* drain back to superblock */
1689
1690 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
1691 switch(field) {
1692 case XFS_SBS_ICOUNT:
1693 mp->m_sb.sb_icount = cnt.icsb_icount;
1694 break;
1695 case XFS_SBS_IFREE:
1696 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1697 break;
1698 case XFS_SBS_FDBLOCKS:
1699 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1700 break;
1701 default:
1702 BUG();
1703 }
1704 }
1705
1706 xfs_icsb_unlock_all_counters(mp);
1707}
1708
1709STATIC void
1710xfs_icsb_enable_counter(
1711 xfs_mount_t *mp,
1712 xfs_sb_field_t field,
1713 uint64_t count,
1714 uint64_t resid)
1715{
1716 xfs_icsb_cnts_t *cntp;
1717 int i;
1718
1719 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1720
1721 xfs_icsb_lock_all_counters(mp);
1722 for_each_online_cpu(i) {
1723 cntp = per_cpu_ptr(mp->m_sb_cnts, i);
1724 switch (field) {
1725 case XFS_SBS_ICOUNT:
1726 cntp->icsb_icount = count + resid;
1727 break;
1728 case XFS_SBS_IFREE:
1729 cntp->icsb_ifree = count + resid;
1730 break;
1731 case XFS_SBS_FDBLOCKS:
1732 cntp->icsb_fdblocks = count + resid;
1733 break;
1734 default:
1735 BUG();
1736 break;
1737 }
1738 resid = 0;
1739 }
1740 clear_bit(field, &mp->m_icsb_counters);
1741 xfs_icsb_unlock_all_counters(mp);
1742}
1743
1744void
1745xfs_icsb_sync_counters_locked(
1746 xfs_mount_t *mp,
1747 int flags)
1748{
1749 xfs_icsb_cnts_t cnt;
1750
1751 xfs_icsb_count(mp, &cnt, flags);
1752
1753 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
1754 mp->m_sb.sb_icount = cnt.icsb_icount;
1755 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
1756 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1757 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
1758 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1759}
1760
1761/*
1762 * Accurate update of per-cpu counters to incore superblock
1763 */
1764void
1765xfs_icsb_sync_counters(
1766 xfs_mount_t *mp,
1767 int flags)
1768{
1769 spin_lock(&mp->m_sb_lock);
1770 xfs_icsb_sync_counters_locked(mp, flags);
1771 spin_unlock(&mp->m_sb_lock);
1772}
1773
1774/*
1775 * Balance and enable/disable counters as necessary.
1776 *
1777 * Thresholds for re-enabling counters are somewhat magic. inode counts are
1778 * chosen to be the same number as single on disk allocation chunk per CPU, and
1779 * free blocks is something far enough zero that we aren't going thrash when we
1780 * get near ENOSPC. We also need to supply a minimum we require per cpu to
1781 * prevent looping endlessly when xfs_alloc_space asks for more than will
1782 * be distributed to a single CPU but each CPU has enough blocks to be
1783 * reenabled.
1784 *
1785 * Note that we can be called when counters are already disabled.
1786 * xfs_icsb_disable_counter() optimises the counter locking in this case to
1787 * prevent locking every per-cpu counter needlessly.
1788 */
1789
1790#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
1791#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
1792 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
1793STATIC void
1794xfs_icsb_balance_counter_locked(
1795 xfs_mount_t *mp,
1796 xfs_sb_field_t field,
1797 int min_per_cpu)
1798{
1799 uint64_t count, resid;
1800 int weight = num_online_cpus();
1801 uint64_t min = (uint64_t)min_per_cpu;
1802
1803 /* disable counter and sync counter */
1804 xfs_icsb_disable_counter(mp, field);
1805
1806 /* update counters - first CPU gets residual*/
1807 switch (field) {
1808 case XFS_SBS_ICOUNT:
1809 count = mp->m_sb.sb_icount;
1810 resid = do_div(count, weight);
1811 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1812 return;
1813 break;
1814 case XFS_SBS_IFREE:
1815 count = mp->m_sb.sb_ifree;
1816 resid = do_div(count, weight);
1817 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1818 return;
1819 break;
1820 case XFS_SBS_FDBLOCKS:
1821 count = mp->m_sb.sb_fdblocks;
1822 resid = do_div(count, weight);
1823 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
1824 return;
1825 break;
1826 default:
1827 BUG();
1828 count = resid = 0; /* quiet, gcc */
1829 break;
1830 }
1831
1832 xfs_icsb_enable_counter(mp, field, count, resid);
1833}
1834
1835STATIC void
1836xfs_icsb_balance_counter(
1837 xfs_mount_t *mp,
1838 xfs_sb_field_t fields,
1839 int min_per_cpu)
1840{
1841 spin_lock(&mp->m_sb_lock);
1842 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
1843 spin_unlock(&mp->m_sb_lock);
1844}
1845
1846int
1847xfs_icsb_modify_counters(
1848 xfs_mount_t *mp,
1849 xfs_sb_field_t field,
1850 int64_t delta,
1851 int rsvd)
1852{
1853 xfs_icsb_cnts_t *icsbp;
1854 long long lcounter; /* long counter for 64 bit fields */
1855 int ret = 0;
1856
1857 might_sleep();
1858again:
1859 preempt_disable();
1860 icsbp = this_cpu_ptr(mp->m_sb_cnts);
1861
1862 /*
1863 * if the counter is disabled, go to slow path
1864 */
1865 if (unlikely(xfs_icsb_counter_disabled(mp, field)))
1866 goto slow_path;
1867 xfs_icsb_lock_cntr(icsbp);
1868 if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
1869 xfs_icsb_unlock_cntr(icsbp);
1870 goto slow_path;
1871 }
1872
1873 switch (field) {
1874 case XFS_SBS_ICOUNT:
1875 lcounter = icsbp->icsb_icount;
1876 lcounter += delta;
1877 if (unlikely(lcounter < 0))
1878 goto balance_counter;
1879 icsbp->icsb_icount = lcounter;
1880 break;
1881
1882 case XFS_SBS_IFREE:
1883 lcounter = icsbp->icsb_ifree;
1884 lcounter += delta;
1885 if (unlikely(lcounter < 0))
1886 goto balance_counter;
1887 icsbp->icsb_ifree = lcounter;
1888 break;
1889
1890 case XFS_SBS_FDBLOCKS:
1891 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
1892
1893 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1894 lcounter += delta;
1895 if (unlikely(lcounter < 0))
1896 goto balance_counter;
1897 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1898 break;
1899 default:
1900 BUG();
1901 break;
1902 }
1903 xfs_icsb_unlock_cntr(icsbp);
1904 preempt_enable();
1905 return 0;
1906
1907slow_path:
1908 preempt_enable();
1909
1910 /*
1911 * serialise with a mutex so we don't burn lots of cpu on
1912 * the superblock lock. We still need to hold the superblock
1913 * lock, however, when we modify the global structures.
1914 */
1915 xfs_icsb_lock(mp);
1916
1917 /*
1918 * Now running atomically.
1919 *
1920 * If the counter is enabled, someone has beaten us to rebalancing.
1921 * Drop the lock and try again in the fast path....
1922 */
1923 if (!(xfs_icsb_counter_disabled(mp, field))) {
1924 xfs_icsb_unlock(mp);
1925 goto again;
1926 }
1927
1928 /*
1929 * The counter is currently disabled. Because we are
1930 * running atomically here, we know a rebalance cannot
1931 * be in progress. Hence we can go straight to operating
1932 * on the global superblock. We do not call xfs_mod_incore_sb()
1933 * here even though we need to get the m_sb_lock. Doing so
1934 * will cause us to re-enter this function and deadlock.
1935 * Hence we get the m_sb_lock ourselves and then call
1936 * xfs_mod_incore_sb_unlocked() as the unlocked path operates
1937 * directly on the global counters.
1938 */
1939 spin_lock(&mp->m_sb_lock);
1940 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1941 spin_unlock(&mp->m_sb_lock);
1942
1943 /*
1944 * Now that we've modified the global superblock, we
1945 * may be able to re-enable the distributed counters
1946 * (e.g. lots of space just got freed). After that
1947 * we are done.
1948 */
1949 if (ret != -ENOSPC)
1950 xfs_icsb_balance_counter(mp, field, 0);
1951 xfs_icsb_unlock(mp);
1952 return ret;
1953
1954balance_counter:
1955 xfs_icsb_unlock_cntr(icsbp);
1956 preempt_enable();
1957
1958 /*
1959 * We may have multiple threads here if multiple per-cpu
1960 * counters run dry at the same time. This will mean we can
1961 * do more balances than strictly necessary but it is not
1962 * the common slowpath case.
1963 */
1964 xfs_icsb_lock(mp);
1965
1966 /*
1967 * running atomically.
1968 *
1969 * This will leave the counter in the correct state for future
1970 * accesses. After the rebalance, we simply try again and our retry
1971 * will either succeed through the fast path or slow path without
1972 * another balance operation being required.
1973 */
1974 xfs_icsb_balance_counter(mp, field, delta);
1975 xfs_icsb_unlock(mp);
1976 goto again;
1977}
1978
1979#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0d8abd6364d9..8c995a2ccb6f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21#ifdef __KERNEL__
22
23struct xlog; 21struct xlog;
24struct xfs_inode; 22struct xfs_inode;
25struct xfs_mru_cache; 23struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
29struct xfs_dir_ops; 27struct xfs_dir_ops;
30struct xfs_da_geometry; 28struct xfs_da_geometry;
31 29
32#ifdef HAVE_PERCPU_SB
33
34/*
35 * Valid per-cpu incore superblock counters. Note that if you add new counters,
36 * you may need to define new counter disabled bit field descriptors as there
37 * are more possible fields in the superblock that can fit in a bitfield on a
38 * 32 bit platform. The XFS_SBS_* values for the current current counters just
39 * fit.
40 */
41typedef struct xfs_icsb_cnts {
42 uint64_t icsb_fdblocks;
43 uint64_t icsb_ifree;
44 uint64_t icsb_icount;
45 unsigned long icsb_flags;
46} xfs_icsb_cnts_t;
47
48#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
49
50#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
51
52extern int xfs_icsb_init_counters(struct xfs_mount *);
53extern void xfs_icsb_reinit_counters(struct xfs_mount *);
54extern void xfs_icsb_destroy_counters(struct xfs_mount *);
55extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
56extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
57extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
58 int64_t, int);
59
60#else
61#define xfs_icsb_init_counters(mp) (0)
62#define xfs_icsb_destroy_counters(mp) do { } while (0)
63#define xfs_icsb_reinit_counters(mp) do { } while (0)
64#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
65#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
66#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
67 xfs_mod_incore_sb(mp, field, delta, rsvd)
68#endif
69
70/* dynamic preallocation free space thresholds, 5% down to 1% */ 30/* dynamic preallocation free space thresholds, 5% down to 1% */
71enum { 31enum {
72 XFS_LOWSP_1_PCNT = 0, 32 XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
81 struct super_block *m_super; 41 struct super_block *m_super;
82 xfs_tid_t m_tid; /* next unused tid for fs */ 42 xfs_tid_t m_tid; /* next unused tid for fs */
83 struct xfs_ail *m_ail; /* fs active log item list */ 43 struct xfs_ail *m_ail; /* fs active log item list */
84 xfs_sb_t m_sb; /* copy of fs superblock */ 44
45 struct xfs_sb m_sb; /* copy of fs superblock */
85 spinlock_t m_sb_lock; /* sb counter lock */ 46 spinlock_t m_sb_lock; /* sb counter lock */
47 struct percpu_counter m_icount; /* allocated inodes counter */
48 struct percpu_counter m_ifree; /* free inodes counter */
49 struct percpu_counter m_fdblocks; /* free block counter */
50
86 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 51 struct xfs_buf *m_sb_bp; /* buffer for superblock */
87 char *m_fsname; /* filesystem name */ 52 char *m_fsname; /* filesystem name */
88 int m_fsname_len; /* strlen of fs name */ 53 int m_fsname_len; /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
152 const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ 117 const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
153 uint m_chsize; /* size of next field */ 118 uint m_chsize; /* size of next field */
154 atomic_t m_active_trans; /* number trans frozen */ 119 atomic_t m_active_trans; /* number trans frozen */
155#ifdef HAVE_PERCPU_SB
156 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
157 unsigned long m_icsb_counters; /* disabled per-cpu counters */
158 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
159 struct mutex m_icsb_mutex; /* balancer sync lock */
160#endif
161 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 120 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
162 struct delayed_work m_reclaim_work; /* background inode reclaim */ 121 struct delayed_work m_reclaim_work; /* background inode reclaim */
163 struct delayed_work m_eofblocks_work; /* background eof blocks 122 struct delayed_work m_eofblocks_work; /* background eof blocks
@@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
301} 260}
302 261
303/* 262/*
304 * Per-cpu superblock locking functions
305 */
306#ifdef HAVE_PERCPU_SB
307static inline void
308xfs_icsb_lock(xfs_mount_t *mp)
309{
310 mutex_lock(&mp->m_icsb_mutex);
311}
312
313static inline void
314xfs_icsb_unlock(xfs_mount_t *mp)
315{
316 mutex_unlock(&mp->m_icsb_mutex);
317}
318#else
319#define xfs_icsb_lock(mp)
320#define xfs_icsb_unlock(mp)
321#endif
322
323/*
324 * This structure is for use by the xfs_mod_incore_sb_batch() routine.
325 * xfs_growfs can specify a few fields which are more than int limit
326 */
327typedef struct xfs_mod_sb {
328 xfs_sb_field_t msb_field; /* Field to modify, see below */
329 int64_t msb_delta; /* Change to make to specified field */
330} xfs_mod_sb_t;
331
332/*
333 * Per-ag incore structure, copies of information in agf and agi, to improve the 263 * Per-ag incore structure, copies of information in agf and agi, to improve the
334 * performance of allocation group selection. 264 * performance of allocation group selection.
335 */ 265 */
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
383extern int xfs_mountfs(xfs_mount_t *mp); 313extern int xfs_mountfs(xfs_mount_t *mp);
384extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, 314extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
385 xfs_agnumber_t *maxagi); 315 xfs_agnumber_t *maxagi);
386
387extern void xfs_unmountfs(xfs_mount_t *); 316extern void xfs_unmountfs(xfs_mount_t *);
388extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 317
389extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 318extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
390 uint, int); 319extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
320extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
321 bool reserved);
322extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
323
391extern int xfs_mount_log_sb(xfs_mount_t *); 324extern int xfs_mount_log_sb(xfs_mount_t *);
392extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 325extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
393extern int xfs_readsb(xfs_mount_t *, int); 326extern int xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
399 332
400extern void xfs_set_low_space_thresholds(struct xfs_mount *); 333extern void xfs_set_low_space_thresholds(struct xfs_mount *);
401 334
402#endif /* __KERNEL__ */
403
404#endif /* __XFS_MOUNT_H__ */ 335#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 30ecca3037e3..f8a674d7f092 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
437 if (!mru || !mru->lists) 437 if (!mru || !mru->lists)
438 return -EINVAL; 438 return -EINVAL;
439 439
440 if (radix_tree_preload(GFP_KERNEL)) 440 if (radix_tree_preload(GFP_NOFS))
441 return -ENOMEM; 441 return -ENOMEM;
442 442
443 INIT_LIST_HEAD(&elem->list_node); 443 INIT_LIST_HEAD(&elem->list_node);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 4b33ef112400..981a657eca39 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -31,7 +31,8 @@
31int 31int
32xfs_break_layouts( 32xfs_break_layouts(
33 struct inode *inode, 33 struct inode *inode,
34 uint *iolock) 34 uint *iolock,
35 bool with_imutex)
35{ 36{
36 struct xfs_inode *ip = XFS_I(inode); 37 struct xfs_inode *ip = XFS_I(inode);
37 int error; 38 int error;
@@ -40,8 +41,12 @@ xfs_break_layouts(
40 41
41 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { 42 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
42 xfs_iunlock(ip, *iolock); 43 xfs_iunlock(ip, *iolock);
44 if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
45 mutex_unlock(&inode->i_mutex);
43 error = break_layout(inode, true); 46 error = break_layout(inode, true);
44 *iolock = XFS_IOLOCK_EXCL; 47 *iolock = XFS_IOLOCK_EXCL;
48 if (with_imutex)
49 mutex_lock(&inode->i_mutex);
45 xfs_ilock(ip, *iolock); 50 xfs_ilock(ip, *iolock);
46 } 51 }
47 52
@@ -300,8 +305,10 @@ xfs_fs_commit_blocks(
300 305
301 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 306 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
302 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 307 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
303 if (error) 308 if (error) {
309 xfs_trans_cancel(tp, 0);
304 goto out_drop_iolock; 310 goto out_drop_iolock;
311 }
305 312
306 xfs_ilock(ip, XFS_ILOCK_EXCL); 313 xfs_ilock(ip, XFS_ILOCK_EXCL);
307 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 314 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
index b7fbfce660f6..8147ac108820 100644
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -8,9 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
8int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, 8int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
9 struct iattr *iattr); 9 struct iattr *iattr);
10 10
11int xfs_break_layouts(struct inode *inode, uint *iolock); 11int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
12#else 12#else
13static inline int xfs_break_layouts(struct inode *inode, uint *iolock) 13static inline int
14xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
14{ 15{
15 return 0; 16 return 0;
16} 17}
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 53cc2aaf8d2b..5538468c7f63 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -719,6 +719,7 @@ xfs_qm_qino_alloc(
719 xfs_trans_t *tp; 719 xfs_trans_t *tp;
720 int error; 720 int error;
721 int committed; 721 int committed;
722 bool need_alloc = true;
722 723
723 *ip = NULL; 724 *ip = NULL;
724 /* 725 /*
@@ -747,6 +748,7 @@ xfs_qm_qino_alloc(
747 return error; 748 return error;
748 mp->m_sb.sb_gquotino = NULLFSINO; 749 mp->m_sb.sb_gquotino = NULLFSINO;
749 mp->m_sb.sb_pquotino = NULLFSINO; 750 mp->m_sb.sb_pquotino = NULLFSINO;
751 need_alloc = false;
750 } 752 }
751 } 753 }
752 754
@@ -758,7 +760,7 @@ xfs_qm_qino_alloc(
758 return error; 760 return error;
759 } 761 }
760 762
761 if (!*ip) { 763 if (need_alloc) {
762 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, 764 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
763 &committed); 765 &committed);
764 if (error) { 766 if (error) {
@@ -794,11 +796,14 @@ xfs_qm_qino_alloc(
794 spin_unlock(&mp->m_sb_lock); 796 spin_unlock(&mp->m_sb_lock);
795 xfs_log_sb(tp); 797 xfs_log_sb(tp);
796 798
797 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 799 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
800 if (error) {
801 ASSERT(XFS_FORCED_SHUTDOWN(mp));
798 xfs_alert(mp, "%s failed (error %d)!", __func__, error); 802 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
799 return error;
800 } 803 }
801 return 0; 804 if (need_alloc)
805 xfs_finish_inode_setup(*ip);
806 return error;
802} 807}
803 808
804 809
@@ -836,6 +841,11 @@ xfs_qm_reset_dqcounts(
836 */ 841 */
837 xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, 842 xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
838 "xfs_quotacheck"); 843 "xfs_quotacheck");
844 /*
845 * Reset type in case we are reusing group quota file for
846 * project quotas or vice versa
847 */
848 ddq->d_flags = type;
839 ddq->d_bcount = 0; 849 ddq->d_bcount = 0;
840 ddq->d_icount = 0; 850 ddq->d_icount = 0;
841 ddq->d_rtbcount = 0; 851 ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 0d4d3590cf85..996a04064894 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -168,10 +168,6 @@ extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
168 uint, struct qc_dqblk *); 168 uint, struct qc_dqblk *);
169extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, 169extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
170 struct qc_dqblk *); 170 struct qc_dqblk *);
171extern int xfs_qm_scall_getqstat(struct xfs_mount *,
172 struct fs_quota_stat *);
173extern int xfs_qm_scall_getqstatv(struct xfs_mount *,
174 struct fs_quota_statv *);
175extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); 171extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint);
176extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); 172extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
177 173
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9b965db45800..9a25c9275fb3 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -38,7 +38,6 @@
38STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 38STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
39STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 39STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
40 uint); 40 uint);
41STATIC uint xfs_qm_export_flags(uint);
42 41
43/* 42/*
44 * Turn off quota accounting and/or enforcement for all udquots and/or 43 * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -389,159 +388,6 @@ xfs_qm_scall_quotaon(
389 return 0; 388 return 0;
390} 389}
391 390
392
393/*
394 * Return quota status information, such as uquota-off, enforcements, etc.
395 * for Q_XGETQSTAT command.
396 */
397int
398xfs_qm_scall_getqstat(
399 struct xfs_mount *mp,
400 struct fs_quota_stat *out)
401{
402 struct xfs_quotainfo *q = mp->m_quotainfo;
403 struct xfs_inode *uip = NULL;
404 struct xfs_inode *gip = NULL;
405 struct xfs_inode *pip = NULL;
406 bool tempuqip = false;
407 bool tempgqip = false;
408 bool temppqip = false;
409
410 memset(out, 0, sizeof(fs_quota_stat_t));
411
412 out->qs_version = FS_QSTAT_VERSION;
413 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
414 (XFS_ALL_QUOTA_ACCT|
415 XFS_ALL_QUOTA_ENFD));
416 uip = q->qi_uquotaip;
417 gip = q->qi_gquotaip;
418 pip = q->qi_pquotaip;
419 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
420 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
421 0, 0, &uip) == 0)
422 tempuqip = true;
423 }
424 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
425 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
426 0, 0, &gip) == 0)
427 tempgqip = true;
428 }
429 /*
430 * Q_XGETQSTAT doesn't have room for both group and project quotas.
431 * So, allow the project quota values to be copied out only if
432 * there is no group quota information available.
433 */
434 if (!gip) {
435 if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
436 if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
437 0, 0, &pip) == 0)
438 temppqip = true;
439 }
440 } else
441 pip = NULL;
442 if (uip) {
443 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
444 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
445 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
446 if (tempuqip)
447 IRELE(uip);
448 }
449
450 if (gip) {
451 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
452 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
453 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
454 if (tempgqip)
455 IRELE(gip);
456 }
457 if (pip) {
458 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
459 out->qs_gquota.qfs_nblks = pip->i_d.di_nblocks;
460 out->qs_gquota.qfs_nextents = pip->i_d.di_nextents;
461 if (temppqip)
462 IRELE(pip);
463 }
464 out->qs_incoredqs = q->qi_dquots;
465 out->qs_btimelimit = q->qi_btimelimit;
466 out->qs_itimelimit = q->qi_itimelimit;
467 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
468 out->qs_bwarnlimit = q->qi_bwarnlimit;
469 out->qs_iwarnlimit = q->qi_iwarnlimit;
470
471 return 0;
472}
473
474/*
475 * Return quota status information, such as uquota-off, enforcements, etc.
476 * for Q_XGETQSTATV command, to support separate project quota field.
477 */
478int
479xfs_qm_scall_getqstatv(
480 struct xfs_mount *mp,
481 struct fs_quota_statv *out)
482{
483 struct xfs_quotainfo *q = mp->m_quotainfo;
484 struct xfs_inode *uip = NULL;
485 struct xfs_inode *gip = NULL;
486 struct xfs_inode *pip = NULL;
487 bool tempuqip = false;
488 bool tempgqip = false;
489 bool temppqip = false;
490
491 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
492 (XFS_ALL_QUOTA_ACCT|
493 XFS_ALL_QUOTA_ENFD));
494 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
495 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
496 out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
497
498 uip = q->qi_uquotaip;
499 gip = q->qi_gquotaip;
500 pip = q->qi_pquotaip;
501 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
502 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
503 0, 0, &uip) == 0)
504 tempuqip = true;
505 }
506 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
507 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
508 0, 0, &gip) == 0)
509 tempgqip = true;
510 }
511 if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
512 if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
513 0, 0, &pip) == 0)
514 temppqip = true;
515 }
516 if (uip) {
517 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
518 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
519 if (tempuqip)
520 IRELE(uip);
521 }
522
523 if (gip) {
524 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
525 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
526 if (tempgqip)
527 IRELE(gip);
528 }
529 if (pip) {
530 out->qs_pquota.qfs_nblks = pip->i_d.di_nblocks;
531 out->qs_pquota.qfs_nextents = pip->i_d.di_nextents;
532 if (temppqip)
533 IRELE(pip);
534 }
535 out->qs_incoredqs = q->qi_dquots;
536 out->qs_btimelimit = q->qi_btimelimit;
537 out->qs_itimelimit = q->qi_itimelimit;
538 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
539 out->qs_bwarnlimit = q->qi_bwarnlimit;
540 out->qs_iwarnlimit = q->qi_iwarnlimit;
541
542 return 0;
543}
544
545#define XFS_QC_MASK \ 391#define XFS_QC_MASK \
546 (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK) 392 (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK)
547 393
@@ -873,28 +719,6 @@ out_put:
873 return error; 719 return error;
874} 720}
875 721
876STATIC uint
877xfs_qm_export_flags(
878 uint flags)
879{
880 uint uflags;
881
882 uflags = 0;
883 if (flags & XFS_UQUOTA_ACCT)
884 uflags |= FS_QUOTA_UDQ_ACCT;
885 if (flags & XFS_GQUOTA_ACCT)
886 uflags |= FS_QUOTA_GDQ_ACCT;
887 if (flags & XFS_PQUOTA_ACCT)
888 uflags |= FS_QUOTA_PDQ_ACCT;
889 if (flags & XFS_UQUOTA_ENFD)
890 uflags |= FS_QUOTA_UDQ_ENFD;
891 if (flags & XFS_GQUOTA_ENFD)
892 uflags |= FS_QUOTA_GDQ_ENFD;
893 if (flags & XFS_PQUOTA_ENFD)
894 uflags |= FS_QUOTA_PDQ_ENFD;
895 return uflags;
896}
897
898 722
899STATIC int 723STATIC int
900xfs_dqrele_inode( 724xfs_dqrele_inode(
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 6923905ab33d..7795e0d01382 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -23,10 +23,81 @@
23#include "xfs_inode.h" 23#include "xfs_inode.h"
24#include "xfs_quota.h" 24#include "xfs_quota.h"
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_trace.h"
27#include "xfs_icache.h"
26#include "xfs_qm.h" 28#include "xfs_qm.h"
27#include <linux/quota.h> 29#include <linux/quota.h>
28 30
29 31
32static void
33xfs_qm_fill_state(
34 struct qc_type_state *tstate,
35 struct xfs_mount *mp,
36 struct xfs_inode *ip,
37 xfs_ino_t ino)
38{
39 struct xfs_quotainfo *q = mp->m_quotainfo;
40 bool tempqip = false;
41
42 tstate->ino = ino;
43 if (!ip && ino == NULLFSINO)
44 return;
45 if (!ip) {
46 if (xfs_iget(mp, NULL, ino, 0, 0, &ip))
47 return;
48 tempqip = true;
49 }
50 tstate->flags |= QCI_SYSFILE;
51 tstate->blocks = ip->i_d.di_nblocks;
52 tstate->nextents = ip->i_d.di_nextents;
53 tstate->spc_timelimit = q->qi_btimelimit;
54 tstate->ino_timelimit = q->qi_itimelimit;
55 tstate->rt_spc_timelimit = q->qi_rtbtimelimit;
56 tstate->spc_warnlimit = q->qi_bwarnlimit;
57 tstate->ino_warnlimit = q->qi_iwarnlimit;
58 tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
59 if (tempqip)
60 IRELE(ip);
61}
62
63/*
64 * Return quota status information, such as enforcements, quota file inode
65 * numbers etc.
66 */
67static int
68xfs_fs_get_quota_state(
69 struct super_block *sb,
70 struct qc_state *state)
71{
72 struct xfs_mount *mp = XFS_M(sb);
73 struct xfs_quotainfo *q = mp->m_quotainfo;
74
75 memset(state, 0, sizeof(*state));
76 if (!XFS_IS_QUOTA_RUNNING(mp))
77 return 0;
78 state->s_incoredqs = q->qi_dquots;
79 if (XFS_IS_UQUOTA_RUNNING(mp))
80 state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED;
81 if (XFS_IS_UQUOTA_ENFORCED(mp))
82 state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
83 if (XFS_IS_GQUOTA_RUNNING(mp))
84 state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED;
85 if (XFS_IS_GQUOTA_ENFORCED(mp))
86 state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
87 if (XFS_IS_PQUOTA_RUNNING(mp))
88 state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED;
89 if (XFS_IS_PQUOTA_ENFORCED(mp))
90 state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED;
91
92 xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip,
93 mp->m_sb.sb_uquotino);
94 xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip,
95 mp->m_sb.sb_gquotino);
96 xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip,
97 mp->m_sb.sb_pquotino);
98 return 0;
99}
100
30STATIC int 101STATIC int
31xfs_quota_type(int type) 102xfs_quota_type(int type)
32{ 103{
@@ -40,28 +111,40 @@ xfs_quota_type(int type)
40 } 111 }
41} 112}
42 113
43STATIC int 114#define XFS_QC_SETINFO_MASK (QC_TIMER_MASK | QC_WARNS_MASK)
44xfs_fs_get_xstate( 115
116/*
117 * Adjust quota timers & warnings
118 */
119static int
120xfs_fs_set_info(
45 struct super_block *sb, 121 struct super_block *sb,
46 struct fs_quota_stat *fqs) 122 int type,
123 struct qc_info *info)
47{ 124{
48 struct xfs_mount *mp = XFS_M(sb); 125 struct xfs_mount *mp = XFS_M(sb);
126 struct qc_dqblk newlim;
49 127
128 if (sb->s_flags & MS_RDONLY)
129 return -EROFS;
50 if (!XFS_IS_QUOTA_RUNNING(mp)) 130 if (!XFS_IS_QUOTA_RUNNING(mp))
51 return -ENOSYS; 131 return -ENOSYS;
52 return xfs_qm_scall_getqstat(mp, fqs); 132 if (!XFS_IS_QUOTA_ON(mp))
53} 133 return -ESRCH;
134 if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK)
135 return -EINVAL;
136 if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0)
137 return 0;
54 138
55STATIC int 139 newlim.d_fieldmask = info->i_fieldmask;
56xfs_fs_get_xstatev( 140 newlim.d_spc_timer = info->i_spc_timelimit;
57 struct super_block *sb, 141 newlim.d_ino_timer = info->i_ino_timelimit;
58 struct fs_quota_statv *fqs) 142 newlim.d_rt_spc_timer = info->i_rt_spc_timelimit;
59{ 143 newlim.d_ino_warns = info->i_ino_warnlimit;
60 struct xfs_mount *mp = XFS_M(sb); 144 newlim.d_spc_warns = info->i_spc_warnlimit;
145 newlim.d_rt_spc_warns = info->i_rt_spc_warnlimit;
61 146
62 if (!XFS_IS_QUOTA_RUNNING(mp)) 147 return xfs_qm_scall_setqlim(mp, 0, xfs_quota_type(type), &newlim);
63 return -ENOSYS;
64 return xfs_qm_scall_getqstatv(mp, fqs);
65} 148}
66 149
67static unsigned int 150static unsigned int
@@ -178,8 +261,8 @@ xfs_fs_set_dqblk(
178} 261}
179 262
180const struct quotactl_ops xfs_quotactl_operations = { 263const struct quotactl_ops xfs_quotactl_operations = {
181 .get_xstatev = xfs_fs_get_xstatev, 264 .get_state = xfs_fs_get_quota_state,
182 .get_xstate = xfs_fs_get_xstate, 265 .set_info = xfs_fs_set_info,
183 .quota_enable = xfs_quota_enable, 266 .quota_enable = xfs_quota_enable,
184 .quota_disable = xfs_quota_disable, 267 .quota_disable = xfs_quota_disable,
185 .rm_xquota = xfs_fs_rm_xquota, 268 .rm_xquota = xfs_fs_rm_xquota,
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8fcc4ccc5c79..5f357ca97e76 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -109,8 +109,6 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
109#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ 109#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
110#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ 110#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
111#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ 111#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
112#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */
113#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */
114#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ 112#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
115#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ 113#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
116 114
@@ -361,28 +359,10 @@ xfs_parseargs(
361 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 359 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
362 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 360 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
363 mp->m_qflags &= ~XFS_GQUOTA_ENFD; 361 mp->m_qflags &= ~XFS_GQUOTA_ENFD;
364 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
365 xfs_warn(mp,
366 "delaylog is the default now, option is deprecated.");
367 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
368 xfs_warn(mp,
369 "nodelaylog support has been removed, option is deprecated.");
370 } else if (!strcmp(this_char, MNTOPT_DISCARD)) { 362 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
371 mp->m_flags |= XFS_MOUNT_DISCARD; 363 mp->m_flags |= XFS_MOUNT_DISCARD;
372 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { 364 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
373 mp->m_flags &= ~XFS_MOUNT_DISCARD; 365 mp->m_flags &= ~XFS_MOUNT_DISCARD;
374 } else if (!strcmp(this_char, "ihashsize")) {
375 xfs_warn(mp,
376 "ihashsize no longer used, option is deprecated.");
377 } else if (!strcmp(this_char, "osyncisdsync")) {
378 xfs_warn(mp,
379 "osyncisdsync has no effect, option is deprecated.");
380 } else if (!strcmp(this_char, "osyncisosync")) {
381 xfs_warn(mp,
382 "osyncisosync has no effect, option is deprecated.");
383 } else if (!strcmp(this_char, "irixsgid")) {
384 xfs_warn(mp,
385 "irixsgid is now a sysctl(2) variable, option is deprecated.");
386 } else { 366 } else {
387 xfs_warn(mp, "unknown mount option [%s].", this_char); 367 xfs_warn(mp, "unknown mount option [%s].", this_char);
388 return -EINVAL; 368 return -EINVAL;
@@ -986,6 +966,8 @@ xfs_fs_inode_init_once(
986 atomic_set(&ip->i_pincount, 0); 966 atomic_set(&ip->i_pincount, 0);
987 spin_lock_init(&ip->i_flags_lock); 967 spin_lock_init(&ip->i_flags_lock);
988 968
969 mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
970 "xfsino", ip->i_ino);
989 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 971 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
990 "xfsino", ip->i_ino); 972 "xfsino", ip->i_ino);
991} 973}
@@ -1033,23 +1015,6 @@ xfs_free_fsname(
1033 kfree(mp->m_logname); 1015 kfree(mp->m_logname);
1034} 1016}
1035 1017
1036STATIC void
1037xfs_fs_put_super(
1038 struct super_block *sb)
1039{
1040 struct xfs_mount *mp = XFS_M(sb);
1041
1042 xfs_filestream_unmount(mp);
1043 xfs_unmountfs(mp);
1044
1045 xfs_freesb(mp);
1046 xfs_icsb_destroy_counters(mp);
1047 xfs_destroy_mount_workqueues(mp);
1048 xfs_close_devices(mp);
1049 xfs_free_fsname(mp);
1050 kfree(mp);
1051}
1052
1053STATIC int 1018STATIC int
1054xfs_fs_sync_fs( 1019xfs_fs_sync_fs(
1055 struct super_block *sb, 1020 struct super_block *sb,
@@ -1085,6 +1050,9 @@ xfs_fs_statfs(
1085 xfs_sb_t *sbp = &mp->m_sb; 1050 xfs_sb_t *sbp = &mp->m_sb;
1086 struct xfs_inode *ip = XFS_I(dentry->d_inode); 1051 struct xfs_inode *ip = XFS_I(dentry->d_inode);
1087 __uint64_t fakeinos, id; 1052 __uint64_t fakeinos, id;
1053 __uint64_t icount;
1054 __uint64_t ifree;
1055 __uint64_t fdblocks;
1088 xfs_extlen_t lsize; 1056 xfs_extlen_t lsize;
1089 __int64_t ffree; 1057 __int64_t ffree;
1090 1058
@@ -1095,17 +1063,21 @@ xfs_fs_statfs(
1095 statp->f_fsid.val[0] = (u32)id; 1063 statp->f_fsid.val[0] = (u32)id;
1096 statp->f_fsid.val[1] = (u32)(id >> 32); 1064 statp->f_fsid.val[1] = (u32)(id >> 32);
1097 1065
1098 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 1066 icount = percpu_counter_sum(&mp->m_icount);
1067 ifree = percpu_counter_sum(&mp->m_ifree);
1068 fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1099 1069
1100 spin_lock(&mp->m_sb_lock); 1070 spin_lock(&mp->m_sb_lock);
1101 statp->f_bsize = sbp->sb_blocksize; 1071 statp->f_bsize = sbp->sb_blocksize;
1102 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 1072 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1103 statp->f_blocks = sbp->sb_dblocks - lsize; 1073 statp->f_blocks = sbp->sb_dblocks - lsize;
1104 statp->f_bfree = statp->f_bavail = 1074 spin_unlock(&mp->m_sb_lock);
1105 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1075
1076 statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1077 statp->f_bavail = statp->f_bfree;
1078
1106 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1079 fakeinos = statp->f_bfree << sbp->sb_inopblog;
1107 statp->f_files = 1080 statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1108 MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1109 if (mp->m_maxicount) 1081 if (mp->m_maxicount)
1110 statp->f_files = min_t(typeof(statp->f_files), 1082 statp->f_files = min_t(typeof(statp->f_files),
1111 statp->f_files, 1083 statp->f_files,
@@ -1117,10 +1089,9 @@ xfs_fs_statfs(
1117 sbp->sb_icount); 1089 sbp->sb_icount);
1118 1090
1119 /* make sure statp->f_ffree does not underflow */ 1091 /* make sure statp->f_ffree does not underflow */
1120 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1092 ffree = statp->f_files - (icount - ifree);
1121 statp->f_ffree = max_t(__int64_t, ffree, 0); 1093 statp->f_ffree = max_t(__int64_t, ffree, 0);
1122 1094
1123 spin_unlock(&mp->m_sb_lock);
1124 1095
1125 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1096 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1126 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1097 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1256,6 +1227,12 @@ xfs_fs_remount(
1256 1227
1257 /* ro -> rw */ 1228 /* ro -> rw */
1258 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { 1229 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1230 if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1231 xfs_warn(mp,
1232 "ro->rw transition prohibited on norecovery mount");
1233 return -EINVAL;
1234 }
1235
1259 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1236 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1260 1237
1261 /* 1238 /*
@@ -1401,6 +1378,51 @@ xfs_finish_flags(
1401 return 0; 1378 return 0;
1402} 1379}
1403 1380
1381static int
1382xfs_init_percpu_counters(
1383 struct xfs_mount *mp)
1384{
1385 int error;
1386
1387 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1388 if (error)
1389 return -ENOMEM;
1390
1391 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1392 if (error)
1393 goto free_icount;
1394
1395 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1396 if (error)
1397 goto free_ifree;
1398
1399 return 0;
1400
1401free_ifree:
1402 percpu_counter_destroy(&mp->m_ifree);
1403free_icount:
1404 percpu_counter_destroy(&mp->m_icount);
1405 return -ENOMEM;
1406}
1407
1408void
1409xfs_reinit_percpu_counters(
1410 struct xfs_mount *mp)
1411{
1412 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1413 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1414 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1415}
1416
1417static void
1418xfs_destroy_percpu_counters(
1419 struct xfs_mount *mp)
1420{
1421 percpu_counter_destroy(&mp->m_icount);
1422 percpu_counter_destroy(&mp->m_ifree);
1423 percpu_counter_destroy(&mp->m_fdblocks);
1424}
1425
1404STATIC int 1426STATIC int
1405xfs_fs_fill_super( 1427xfs_fs_fill_super(
1406 struct super_block *sb, 1428 struct super_block *sb,
@@ -1449,7 +1471,7 @@ xfs_fs_fill_super(
1449 if (error) 1471 if (error)
1450 goto out_close_devices; 1472 goto out_close_devices;
1451 1473
1452 error = xfs_icsb_init_counters(mp); 1474 error = xfs_init_percpu_counters(mp);
1453 if (error) 1475 if (error)
1454 goto out_destroy_workqueues; 1476 goto out_destroy_workqueues;
1455 1477
@@ -1507,7 +1529,7 @@ xfs_fs_fill_super(
1507 out_free_sb: 1529 out_free_sb:
1508 xfs_freesb(mp); 1530 xfs_freesb(mp);
1509 out_destroy_counters: 1531 out_destroy_counters:
1510 xfs_icsb_destroy_counters(mp); 1532 xfs_destroy_percpu_counters(mp);
1511out_destroy_workqueues: 1533out_destroy_workqueues:
1512 xfs_destroy_mount_workqueues(mp); 1534 xfs_destroy_mount_workqueues(mp);
1513 out_close_devices: 1535 out_close_devices:
@@ -1524,6 +1546,24 @@ out_destroy_workqueues:
1524 goto out_free_sb; 1546 goto out_free_sb;
1525} 1547}
1526 1548
1549STATIC void
1550xfs_fs_put_super(
1551 struct super_block *sb)
1552{
1553 struct xfs_mount *mp = XFS_M(sb);
1554
1555 xfs_notice(mp, "Unmounting Filesystem");
1556 xfs_filestream_unmount(mp);
1557 xfs_unmountfs(mp);
1558
1559 xfs_freesb(mp);
1560 xfs_destroy_percpu_counters(mp);
1561 xfs_destroy_mount_workqueues(mp);
1562 xfs_close_devices(mp);
1563 xfs_free_fsname(mp);
1564 kfree(mp);
1565}
1566
1527STATIC struct dentry * 1567STATIC struct dentry *
1528xfs_fs_mount( 1568xfs_fs_mount(
1529 struct file_system_type *fs_type, 1569 struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 2b830c2f322e..499058fea303 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
72extern const struct xattr_handler *xfs_xattr_handlers[]; 72extern const struct xattr_handler *xfs_xattr_handlers[];
73extern const struct quotactl_ops xfs_quotactl_operations; 73extern const struct quotactl_ops xfs_quotactl_operations;
74 74
75extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
76
75#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 77#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
76 78
77#endif /* __XFS_SUPER_H__ */ 79#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 25791df6f638..3df411eadb86 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -177,7 +177,7 @@ xfs_symlink(
177 int pathlen; 177 int pathlen;
178 struct xfs_bmap_free free_list; 178 struct xfs_bmap_free free_list;
179 xfs_fsblock_t first_block; 179 xfs_fsblock_t first_block;
180 bool unlock_dp_on_error = false; 180 bool unlock_dp_on_error = false;
181 uint cancel_flags; 181 uint cancel_flags;
182 int committed; 182 int committed;
183 xfs_fileoff_t first_fsb; 183 xfs_fileoff_t first_fsb;
@@ -221,7 +221,7 @@ xfs_symlink(
221 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 221 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
222 &udqp, &gdqp, &pdqp); 222 &udqp, &gdqp, &pdqp);
223 if (error) 223 if (error)
224 goto std_return; 224 return error;
225 225
226 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 226 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
227 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 227 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -241,7 +241,7 @@ xfs_symlink(
241 } 241 }
242 if (error) { 242 if (error) {
243 cancel_flags = 0; 243 cancel_flags = 0;
244 goto error_return; 244 goto out_trans_cancel;
245 } 245 }
246 246
247 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 247 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -252,7 +252,7 @@ xfs_symlink(
252 */ 252 */
253 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 253 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
254 error = -EPERM; 254 error = -EPERM;
255 goto error_return; 255 goto out_trans_cancel;
256 } 256 }
257 257
258 /* 258 /*
@@ -261,7 +261,7 @@ xfs_symlink(
261 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, 261 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
262 pdqp, resblks, 1, 0); 262 pdqp, resblks, 1, 0);
263 if (error) 263 if (error)
264 goto error_return; 264 goto out_trans_cancel;
265 265
266 /* 266 /*
267 * Check for ability to enter directory entry, if no space reserved. 267 * Check for ability to enter directory entry, if no space reserved.
@@ -269,7 +269,7 @@ xfs_symlink(
269 if (!resblks) { 269 if (!resblks) {
270 error = xfs_dir_canenter(tp, dp, link_name); 270 error = xfs_dir_canenter(tp, dp, link_name);
271 if (error) 271 if (error)
272 goto error_return; 272 goto out_trans_cancel;
273 } 273 }
274 /* 274 /*
275 * Initialize the bmap freelist prior to calling either 275 * Initialize the bmap freelist prior to calling either
@@ -282,15 +282,14 @@ xfs_symlink(
282 */ 282 */
283 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 283 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
284 prid, resblks > 0, &ip, NULL); 284 prid, resblks > 0, &ip, NULL);
285 if (error) { 285 if (error)
286 if (error == -ENOSPC) 286 goto out_trans_cancel;
287 goto error_return;
288 goto error1;
289 }
290 287
291 /* 288 /*
292 * An error after we've joined dp to the transaction will result in the 289 * Now we join the directory inode to the transaction. We do not do it
293 * transaction cancel unlocking dp so don't do it explicitly in the 290 * earlier because xfs_dir_ialloc might commit the previous transaction
291 * (and release all the locks). An error from here on will result in
292 * the transaction cancel unlocking dp so don't do it explicitly in the
294 * error path. 293 * error path.
295 */ 294 */
296 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 295 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
@@ -330,7 +329,7 @@ xfs_symlink(
330 XFS_BMAPI_METADATA, &first_block, resblks, 329 XFS_BMAPI_METADATA, &first_block, resblks,
331 mval, &nmaps, &free_list); 330 mval, &nmaps, &free_list);
332 if (error) 331 if (error)
333 goto error2; 332 goto out_bmap_cancel;
334 333
335 if (resblks) 334 if (resblks)
336 resblks -= fs_blocks; 335 resblks -= fs_blocks;
@@ -348,7 +347,7 @@ xfs_symlink(
348 BTOBB(byte_cnt), 0); 347 BTOBB(byte_cnt), 0);
349 if (!bp) { 348 if (!bp) {
350 error = -ENOMEM; 349 error = -ENOMEM;
351 goto error2; 350 goto out_bmap_cancel;
352 } 351 }
353 bp->b_ops = &xfs_symlink_buf_ops; 352 bp->b_ops = &xfs_symlink_buf_ops;
354 353
@@ -378,7 +377,7 @@ xfs_symlink(
378 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 377 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
379 &first_block, &free_list, resblks); 378 &first_block, &free_list, resblks);
380 if (error) 379 if (error)
381 goto error2; 380 goto out_bmap_cancel;
382 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 381 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
383 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 382 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
384 383
@@ -392,10 +391,13 @@ xfs_symlink(
392 } 391 }
393 392
394 error = xfs_bmap_finish(&tp, &free_list, &committed); 393 error = xfs_bmap_finish(&tp, &free_list, &committed);
395 if (error) { 394 if (error)
396 goto error2; 395 goto out_bmap_cancel;
397 } 396
398 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 397 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
398 if (error)
399 goto out_release_inode;
400
399 xfs_qm_dqrele(udqp); 401 xfs_qm_dqrele(udqp);
400 xfs_qm_dqrele(gdqp); 402 xfs_qm_dqrele(gdqp);
401 xfs_qm_dqrele(pdqp); 403 xfs_qm_dqrele(pdqp);
@@ -403,20 +405,28 @@ xfs_symlink(
403 *ipp = ip; 405 *ipp = ip;
404 return 0; 406 return 0;
405 407
406 error2: 408out_bmap_cancel:
407 IRELE(ip);
408 error1:
409 xfs_bmap_cancel(&free_list); 409 xfs_bmap_cancel(&free_list);
410 cancel_flags |= XFS_TRANS_ABORT; 410 cancel_flags |= XFS_TRANS_ABORT;
411 error_return: 411out_trans_cancel:
412 xfs_trans_cancel(tp, cancel_flags); 412 xfs_trans_cancel(tp, cancel_flags);
413out_release_inode:
414 /*
415 * Wait until after the current transaction is aborted to finish the
416 * setup of the inode and release the inode. This prevents recursive
417 * transactions and deadlocks from xfs_inactive.
418 */
419 if (ip) {
420 xfs_finish_inode_setup(ip);
421 IRELE(ip);
422 }
423
413 xfs_qm_dqrele(udqp); 424 xfs_qm_dqrele(udqp);
414 xfs_qm_dqrele(gdqp); 425 xfs_qm_dqrele(gdqp);
415 xfs_qm_dqrele(pdqp); 426 xfs_qm_dqrele(pdqp);
416 427
417 if (unlock_dp_on_error) 428 if (unlock_dp_on_error)
418 xfs_iunlock(dp, XFS_ILOCK_EXCL); 429 xfs_iunlock(dp, XFS_ILOCK_EXCL);
419 std_return:
420 return error; 430 return error;
421} 431}
422 432
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 51372e34d988..615781bf4ee5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
115 __entry->refcount = refcount; 115 __entry->refcount = refcount;
116 __entry->caller_ip = caller_ip; 116 __entry->caller_ip = caller_ip;
117 ), 117 ),
118 TP_printk("dev %d:%d agno %u refcount %d caller %pf", 118 TP_printk("dev %d:%d agno %u refcount %d caller %ps",
119 MAJOR(__entry->dev), MINOR(__entry->dev), 119 MAJOR(__entry->dev), MINOR(__entry->dev),
120 __entry->agno, 120 __entry->agno,
121 __entry->refcount, 121 __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
239 __entry->caller_ip = caller_ip; 239 __entry->caller_ip = caller_ip;
240 ), 240 ),
241 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 241 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
242 "offset %lld block %lld count %lld flag %d caller %pf", 242 "offset %lld block %lld count %lld flag %d caller %ps",
243 MAJOR(__entry->dev), MINOR(__entry->dev), 243 MAJOR(__entry->dev), MINOR(__entry->dev),
244 __entry->ino, 244 __entry->ino,
245 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 245 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
283 __entry->caller_ip = caller_ip; 283 __entry->caller_ip = caller_ip;
284 ), 284 ),
285 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 285 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
286 "offset %lld block %lld count %lld flag %d caller %pf", 286 "offset %lld block %lld count %lld flag %d caller %ps",
287 MAJOR(__entry->dev), MINOR(__entry->dev), 287 MAJOR(__entry->dev), MINOR(__entry->dev),
288 __entry->ino, 288 __entry->ino,
289 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 289 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
329 __entry->caller_ip = caller_ip; 329 __entry->caller_ip = caller_ip;
330 ), 330 ),
331 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " 331 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
332 "lock %d flags %s caller %pf", 332 "lock %d flags %s caller %ps",
333 MAJOR(__entry->dev), MINOR(__entry->dev), 333 MAJOR(__entry->dev), MINOR(__entry->dev),
334 (unsigned long long)__entry->bno, 334 (unsigned long long)__entry->bno,
335 __entry->nblks, 335 __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
402 __entry->caller_ip = caller_ip; 402 __entry->caller_ip = caller_ip;
403 ), 403 ),
404 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 404 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
405 "lock %d flags %s caller %pf", 405 "lock %d flags %s caller %ps",
406 MAJOR(__entry->dev), MINOR(__entry->dev), 406 MAJOR(__entry->dev), MINOR(__entry->dev),
407 (unsigned long long)__entry->bno, 407 (unsigned long long)__entry->bno,
408 __entry->buffer_length, 408 __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
447 __entry->caller_ip = caller_ip; 447 __entry->caller_ip = caller_ip;
448 ), 448 ),
449 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 449 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
450 "lock %d error %d flags %s caller %pf", 450 "lock %d error %d flags %s caller %ps",
451 MAJOR(__entry->dev), MINOR(__entry->dev), 451 MAJOR(__entry->dev), MINOR(__entry->dev),
452 (unsigned long long)__entry->bno, 452 (unsigned long long)__entry->bno,
453 __entry->buffer_length, 453 __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
613 __entry->lock_flags = lock_flags; 613 __entry->lock_flags = lock_flags;
614 __entry->caller_ip = caller_ip; 614 __entry->caller_ip = caller_ip;
615 ), 615 ),
616 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", 616 TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
617 MAJOR(__entry->dev), MINOR(__entry->dev), 617 MAJOR(__entry->dev), MINOR(__entry->dev),
618 __entry->ino, 618 __entry->ino,
619 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), 619 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
664DEFINE_INODE_EVENT(xfs_free_file_space); 664DEFINE_INODE_EVENT(xfs_free_file_space);
665DEFINE_INODE_EVENT(xfs_zero_file_space); 665DEFINE_INODE_EVENT(xfs_zero_file_space);
666DEFINE_INODE_EVENT(xfs_collapse_file_space); 666DEFINE_INODE_EVENT(xfs_collapse_file_space);
667DEFINE_INODE_EVENT(xfs_insert_file_space);
667DEFINE_INODE_EVENT(xfs_readdir); 668DEFINE_INODE_EVENT(xfs_readdir);
668#ifdef CONFIG_XFS_POSIX_ACL 669#ifdef CONFIG_XFS_POSIX_ACL
669DEFINE_INODE_EVENT(xfs_get_acl); 670DEFINE_INODE_EVENT(xfs_get_acl);
@@ -685,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
685DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); 686DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
686DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); 687DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
687 688
689DEFINE_INODE_EVENT(xfs_filemap_fault);
690DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
691
688DECLARE_EVENT_CLASS(xfs_iref_class, 692DECLARE_EVENT_CLASS(xfs_iref_class,
689 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), 693 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
690 TP_ARGS(ip, caller_ip), 694 TP_ARGS(ip, caller_ip),
@@ -702,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
702 __entry->pincount = atomic_read(&ip->i_pincount); 706 __entry->pincount = atomic_read(&ip->i_pincount);
703 __entry->caller_ip = caller_ip; 707 __entry->caller_ip = caller_ip;
704 ), 708 ),
705 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", 709 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
706 MAJOR(__entry->dev), MINOR(__entry->dev), 710 MAJOR(__entry->dev), MINOR(__entry->dev),
707 __entry->ino, 711 __entry->ino,
708 __entry->count, 712 __entry->count,
@@ -1217,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
1217DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); 1221DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
1218DEFINE_IOMAP_EVENT(xfs_get_blocks_found); 1222DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
1219DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); 1223DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1224DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
1225DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
1226DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
1227DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
1228DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
1220 1229
1221DECLARE_EVENT_CLASS(xfs_simple_io_class, 1230DECLARE_EVENT_CLASS(xfs_simple_io_class,
1222 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1231 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1333,7 +1342,7 @@ TRACE_EVENT(xfs_bunmap,
1333 __entry->flags = flags; 1342 __entry->flags = flags;
1334 ), 1343 ),
1335 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" 1344 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1336 "flags %s caller %pf", 1345 "flags %s caller %ps",
1337 MAJOR(__entry->dev), MINOR(__entry->dev), 1346 MAJOR(__entry->dev), MINOR(__entry->dev),
1338 __entry->ino, 1347 __entry->ino,
1339 __entry->size, 1348 __entry->size,
@@ -1466,7 +1475,7 @@ TRACE_EVENT(xfs_agf,
1466 ), 1475 ),
1467 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " 1476 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1468 "levels b %u c %u flfirst %u fllast %u flcount %u " 1477 "levels b %u c %u flfirst %u fllast %u flcount %u "
1469 "freeblks %u longest %u caller %pf", 1478 "freeblks %u longest %u caller %ps",
1470 MAJOR(__entry->dev), MINOR(__entry->dev), 1479 MAJOR(__entry->dev), MINOR(__entry->dev),
1471 __entry->agno, 1480 __entry->agno,
1472 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), 1481 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index eb90cd59a0ec..220ef2c906b2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
173 uint rtextents) 173 uint rtextents)
174{ 174{
175 int error = 0; 175 int error = 0;
176 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 176 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
177 177
178 /* Mark this thread as being in a transaction */ 178 /* Mark this thread as being in a transaction */
179 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 179 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
184 * fail if the count would go below zero. 184 * fail if the count would go below zero.
185 */ 185 */
186 if (blocks > 0) { 186 if (blocks > 0) {
187 error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, 187 error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
188 -((int64_t)blocks), rsvd);
189 if (error != 0) { 188 if (error != 0) {
190 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 189 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
191 return -ENOSPC; 190 return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
236 * fail if the count would go below zero. 235 * fail if the count would go below zero.
237 */ 236 */
238 if (rtextents > 0) { 237 if (rtextents > 0) {
239 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, 238 error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
240 -((int64_t)rtextents), rsvd);
241 if (error) { 239 if (error) {
242 error = -ENOSPC; 240 error = -ENOSPC;
243 goto undo_log; 241 goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
268 266
269undo_blocks: 267undo_blocks:
270 if (blocks > 0) { 268 if (blocks > 0) {
271 xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, 269 xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
272 (int64_t)blocks, rsvd);
273 tp->t_blk_res = 0; 270 tp->t_blk_res = 0;
274 } 271 }
275 272
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
488 sizeof(sbp->sb_frextents) - 1); 485 sizeof(sbp->sb_frextents) - 1);
489} 486}
490 487
488STATIC int
489xfs_sb_mod8(
490 uint8_t *field,
491 int8_t delta)
492{
493 int8_t counter = *field;
494
495 counter += delta;
496 if (counter < 0) {
497 ASSERT(0);
498 return -EINVAL;
499 }
500 *field = counter;
501 return 0;
502}
503
504STATIC int
505xfs_sb_mod32(
506 uint32_t *field,
507 int32_t delta)
508{
509 int32_t counter = *field;
510
511 counter += delta;
512 if (counter < 0) {
513 ASSERT(0);
514 return -EINVAL;
515 }
516 *field = counter;
517 return 0;
518}
519
520STATIC int
521xfs_sb_mod64(
522 uint64_t *field,
523 int64_t delta)
524{
525 int64_t counter = *field;
526
527 counter += delta;
528 if (counter < 0) {
529 ASSERT(0);
530 return -EINVAL;
531 }
532 *field = counter;
533 return 0;
534}
535
491/* 536/*
492 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations 537 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
493 * and apply superblock counter changes to the in-core superblock. The 538 * and apply superblock counter changes to the in-core superblock. The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
495 * applied to the in-core superblock. The idea is that that has already been 540 * applied to the in-core superblock. The idea is that that has already been
496 * done. 541 * done.
497 * 542 *
498 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
499 * However, we have to ensure that we only modify each superblock field only
500 * once because the application of the delta values may not be atomic. That can
501 * lead to ENOSPC races occurring if we have two separate modifcations of the
502 * free space counter to put back the entire reservation and then take away
503 * what we used.
504 *
505 * If we are not logging superblock counters, then the inode allocated/free and 543 * If we are not logging superblock counters, then the inode allocated/free and
506 * used block counts are not updated in the on disk superblock. In this case, 544 * used block counts are not updated in the on disk superblock. In this case,
507 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 545 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
509 */ 547 */
510void 548void
511xfs_trans_unreserve_and_mod_sb( 549xfs_trans_unreserve_and_mod_sb(
512 xfs_trans_t *tp) 550 struct xfs_trans *tp)
513{ 551{
514 xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ 552 struct xfs_mount *mp = tp->t_mountp;
515 xfs_mod_sb_t *msbp; 553 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
516 xfs_mount_t *mp = tp->t_mountp; 554 int64_t blkdelta = 0;
517 /* REFERENCED */ 555 int64_t rtxdelta = 0;
518 int error; 556 int64_t idelta = 0;
519 int rsvd; 557 int64_t ifreedelta = 0;
520 int64_t blkdelta = 0; 558 int error;
521 int64_t rtxdelta = 0;
522 int64_t idelta = 0;
523 int64_t ifreedelta = 0;
524
525 msbp = msb;
526 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
527 559
528 /* calculate deltas */ 560 /* calculate deltas */
529 if (tp->t_blk_res > 0) 561 if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
547 579
548 /* apply the per-cpu counters */ 580 /* apply the per-cpu counters */
549 if (blkdelta) { 581 if (blkdelta) {
550 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 582 error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
551 blkdelta, rsvd);
552 if (error) 583 if (error)
553 goto out; 584 goto out;
554 } 585 }
555 586
556 if (idelta) { 587 if (idelta) {
557 error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, 588 error = xfs_mod_icount(mp, idelta);
558 idelta, rsvd);
559 if (error) 589 if (error)
560 goto out_undo_fdblocks; 590 goto out_undo_fdblocks;
561 } 591 }
562 592
563 if (ifreedelta) { 593 if (ifreedelta) {
564 error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, 594 error = xfs_mod_ifree(mp, ifreedelta);
565 ifreedelta, rsvd);
566 if (error) 595 if (error)
567 goto out_undo_icount; 596 goto out_undo_icount;
568 } 597 }
569 598
599 if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
600 return;
601
570 /* apply remaining deltas */ 602 /* apply remaining deltas */
571 if (rtxdelta != 0) { 603 spin_lock(&mp->m_sb_lock);
572 msbp->msb_field = XFS_SBS_FREXTENTS; 604 if (rtxdelta) {
573 msbp->msb_delta = rtxdelta; 605 error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
574 msbp++; 606 if (error)
607 goto out_undo_ifree;
575 } 608 }
576 609
577 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 610 if (tp->t_dblocks_delta != 0) {
578 if (tp->t_dblocks_delta != 0) { 611 error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
579 msbp->msb_field = XFS_SBS_DBLOCKS; 612 if (error)
580 msbp->msb_delta = tp->t_dblocks_delta; 613 goto out_undo_frextents;
581 msbp++;
582 }
583 if (tp->t_agcount_delta != 0) {
584 msbp->msb_field = XFS_SBS_AGCOUNT;
585 msbp->msb_delta = tp->t_agcount_delta;
586 msbp++;
587 }
588 if (tp->t_imaxpct_delta != 0) {
589 msbp->msb_field = XFS_SBS_IMAX_PCT;
590 msbp->msb_delta = tp->t_imaxpct_delta;
591 msbp++;
592 }
593 if (tp->t_rextsize_delta != 0) {
594 msbp->msb_field = XFS_SBS_REXTSIZE;
595 msbp->msb_delta = tp->t_rextsize_delta;
596 msbp++;
597 }
598 if (tp->t_rbmblocks_delta != 0) {
599 msbp->msb_field = XFS_SBS_RBMBLOCKS;
600 msbp->msb_delta = tp->t_rbmblocks_delta;
601 msbp++;
602 }
603 if (tp->t_rblocks_delta != 0) {
604 msbp->msb_field = XFS_SBS_RBLOCKS;
605 msbp->msb_delta = tp->t_rblocks_delta;
606 msbp++;
607 }
608 if (tp->t_rextents_delta != 0) {
609 msbp->msb_field = XFS_SBS_REXTENTS;
610 msbp->msb_delta = tp->t_rextents_delta;
611 msbp++;
612 }
613 if (tp->t_rextslog_delta != 0) {
614 msbp->msb_field = XFS_SBS_REXTSLOG;
615 msbp->msb_delta = tp->t_rextslog_delta;
616 msbp++;
617 }
618 } 614 }
619 615 if (tp->t_agcount_delta != 0) {
620 /* 616 error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
621 * If we need to change anything, do it.
622 */
623 if (msbp > msb) {
624 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
625 (uint)(msbp - msb), rsvd);
626 if (error) 617 if (error)
627 goto out_undo_ifreecount; 618 goto out_undo_dblocks;
628 } 619 }
629 620 if (tp->t_imaxpct_delta != 0) {
621 error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
622 if (error)
623 goto out_undo_agcount;
624 }
625 if (tp->t_rextsize_delta != 0) {
626 error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
627 tp->t_rextsize_delta);
628 if (error)
629 goto out_undo_imaxpct;
630 }
631 if (tp->t_rbmblocks_delta != 0) {
632 error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
633 tp->t_rbmblocks_delta);
634 if (error)
635 goto out_undo_rextsize;
636 }
637 if (tp->t_rblocks_delta != 0) {
638 error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
639 if (error)
640 goto out_undo_rbmblocks;
641 }
642 if (tp->t_rextents_delta != 0) {
643 error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
644 tp->t_rextents_delta);
645 if (error)
646 goto out_undo_rblocks;
647 }
648 if (tp->t_rextslog_delta != 0) {
649 error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
650 tp->t_rextslog_delta);
651 if (error)
652 goto out_undo_rextents;
653 }
654 spin_unlock(&mp->m_sb_lock);
630 return; 655 return;
631 656
632out_undo_ifreecount: 657out_undo_rextents:
658 if (tp->t_rextents_delta)
659 xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
660out_undo_rblocks:
661 if (tp->t_rblocks_delta)
662 xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
663out_undo_rbmblocks:
664 if (tp->t_rbmblocks_delta)
665 xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
666out_undo_rextsize:
667 if (tp->t_rextsize_delta)
668 xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
669out_undo_imaxpct:
670 if (tp->t_rextsize_delta)
671 xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
672out_undo_agcount:
673 if (tp->t_agcount_delta)
674 xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
675out_undo_dblocks:
676 if (tp->t_dblocks_delta)
677 xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
678out_undo_frextents:
679 if (rtxdelta)
680 xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
681out_undo_ifree:
682 spin_unlock(&mp->m_sb_lock);
633 if (ifreedelta) 683 if (ifreedelta)
634 xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); 684 xfs_mod_ifree(mp, -ifreedelta);
635out_undo_icount: 685out_undo_icount:
636 if (idelta) 686 if (idelta)
637 xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); 687 xfs_mod_icount(mp, -idelta);
638out_undo_fdblocks: 688out_undo_fdblocks:
639 if (blkdelta) 689 if (blkdelta)
640 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); 690 xfs_mod_fdblocks(mp, -blkdelta, rsvd);
641out: 691out:
642 ASSERT(error == 0); 692 ASSERT(error == 0);
643 return; 693 return;