aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-08-22 04:04:15 -0400
committerIngo Molnar <mingo@kernel.org>2014-08-22 04:04:15 -0400
commit80b304fd00e8b667775ff791121b61ecd7cd0c03 (patch)
treeb4f2ec59fe062c43343ee4c2f10a6bcd0e4dcd1b /fs
parentfb21b84e7f809ef04b1e5aed5d463cf0d4866638 (diff)
parent6a7519e81321343165f89abb8b616df186d3e57a (diff)
Merge tag 'efi-urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi into x86/urgent
Pull EFI fixes from Matt Fleming: * WARN_ON(!spin_is_locked()) always triggers on non-SMP machines. Swap it for the more canonical lockdep_assert_held() which always does the right thing - Guenter Roeck * Assign the correct value to efi.runtime_version on arm64 so that all the runtime services can be invoked - Semen Protsenko Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/adfs/adfs.h1
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/dir_fplus.c9
-rw-r--r--fs/aio.c86
-rw-r--r--fs/autofs4/autofs_i.h63
-rw-r--r--fs/autofs4/expire.c1
-rw-r--r--fs/autofs4/root.c10
-rw-r--r--fs/bad_inode.c7
-rw-r--r--fs/befs/linuxvfs.c8
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/inode.c8
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/btrfs_inode.h6
-rw-r--r--fs/btrfs/ctree.c20
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c285
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/file.c26
-rw-r--r--fs/btrfs/inode.c71
-rw-r--r--fs/btrfs/ordered-data.c123
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/qgroup.c169
-rw-r--r--fs/btrfs/qgroup.h1
-rw-r--r--fs/btrfs/super.c60
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/ulist.h15
-rw-r--r--fs/ceph/acl.c14
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/file.c24
-rw-r--r--fs/ceph/mds_client.c16
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c4
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/cifsglob.h19
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c119
-rw-r--r--fs/cifs/connect.c8
-rw-r--r--fs/cifs/file.c872
-rw-r--r--fs/cifs/inode.c14
-rw-r--r--fs/cifs/misc.c13
-rw-r--r--fs/cifs/sess.c1192
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c2
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c73
-rw-r--r--fs/cifs/smb2pdu.c94
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/cifs/smb2transport.c5
-rw-r--r--fs/cifs/transport.c25
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c2
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/cramfs/inode.c45
-rw-r--r--fs/cramfs/uncompress.c10
-rw-r--r--fs/dcache.c196
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/debugfs/inode.c39
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dlm/debug_fs.c15
-rw-r--r--fs/efs/namei.c11
-rw-r--r--fs/exec.c10
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext4/balloc.c1
-rw-r--r--fs/ext4/dir.c25
-rw-r--r--fs/ext4/ext4.h14
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/indirect.c281
-rw-r--r--fs/ext4/inline.c18
-rw-r--r--fs/ext4/inode.c130
-rw-r--r--fs/ext4/mballoc.c41
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/namei.c1
-rw-r--r--fs/ext4/super.c88
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c178
-rw-r--r--fs/f2fs/data.c59
-rw-r--r--fs/f2fs/debug.c19
-rw-r--r--fs/f2fs/dir.c87
-rw-r--r--fs/f2fs/f2fs.h50
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c7
-rw-r--r--fs/f2fs/hash.c4
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c12
-rw-r--r--fs/f2fs/namei.c246
-rw-r--r--fs/f2fs/node.c273
-rw-r--r--fs/f2fs/node.h7
-rw-r--r--fs/f2fs/recovery.c22
-rw-r--r--fs/f2fs/segment.c38
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c21
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/fs_pin.c78
-rw-r--r--fs/fscache/main.c4
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/hostfs/hostfs_kern.c30
-rw-r--r--fs/hostfs/hostfs_user.c28
-rw-r--r--fs/hpfs/dnode.c17
-rw-r--r--fs/inode.c1
-rw-r--r--fs/internal.h7
-rw-r--r--fs/isofs/compress.c4
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/jffs2/xattr.c3
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/lockd/mon.c4
-rw-r--r--fs/locks.c86
-rw-r--r--fs/logfs/readwrite.c15
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namei.c34
-rw-r--r--fs/namespace.c132
-rw-r--r--fs/nfs/blocklayout/blocklayout.c101
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/client.c113
-rw-r--r--fs/nfs/delegation.c34
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c208
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/filelayout/filelayout.c298
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/idmap.c10
-rw-r--r--fs/nfs/inode.c12
-rw-r--r--fs/nfs/internal.h20
-rw-r--r--fs/nfs/netns.h3
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs4_fs.h32
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c248
-rw-r--r--fs/nfs/nfs4state.c69
-rw-r--r--fs/nfs/nfs4trace.h28
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c81
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c276
-rw-r--r--fs/nfs/pnfs.c178
-rw-r--r--fs/nfs/pnfs.h45
-rw-r--r--fs/nfs/proc.c27
-rw-r--r--fs/nfs/read.c54
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c150
-rw-r--r--fs/nfs_common/nfsacl.c5
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c2
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/export.h3
-rw-r--r--fs/nfsd/fault_inject.c138
-rw-r--r--fs/nfsd/netns.h23
-rw-r--r--fs/nfsd/nfs2acl.c8
-rw-r--r--fs/nfsd/nfs3acl.c8
-rw-r--r--fs/nfsd/nfs3proc.c9
-rw-r--r--fs/nfsd/nfs3xdr.c30
-rw-r--r--fs/nfsd/nfs4acl.c39
-rw-r--r--fs/nfsd/nfs4callback.c32
-rw-r--r--fs/nfsd/nfs4proc.c53
-rw-r--r--fs/nfsd/nfs4state.c3096
-rw-r--r--fs/nfsd/nfs4xdr.c128
-rw-r--r--fs/nfsd/nfscache.c13
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsfh.c12
-rw-r--r--fs/nfsd/nfsfh.h15
-rw-r--r--fs/nfsd/nfsproc.c13
-rw-r--r--fs/nfsd/nfssvc.c21
-rw-r--r--fs/nfsd/nfsxdr.c14
-rw-r--r--fs/nfsd/state.h220
-rw-r--r--fs/nfsd/vfs.c48
-rw-r--r--fs/nfsd/vfs.h8
-rw-r--r--fs/nfsd/xdr4.h30
-rw-r--r--fs/nilfs2/Makefile2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/nilfs2/super.c11
-rw-r--r--fs/nilfs2/sysfs.c1137
-rw-r--r--fs/nilfs2/sysfs.h176
-rw-r--r--fs/nilfs2/the_nilfs.c17
-rw-r--r--fs/nilfs2/the_nilfs.h20
-rw-r--r--fs/notify/fanotify/fanotify.c11
-rw-r--r--fs/notify/fanotify/fanotify_user.c14
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/notify/notification.c37
-rw-r--r--fs/notify/vfsmount_mark.c2
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/alloc.c15
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c18
-rw-r--r--fs/proc/base.c199
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c32
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h15
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/proc/proc_tty.c4
-rw-r--r--fs/proc/root.c7
-rw-r--r--fs/proc/task_mmu.c27
-rw-r--r--fs/proc/thread_self.c85
-rw-r--r--fs/proc/vmcore.c82
-rw-r--r--fs/proc_namespace.c8
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/qnx6/Makefile1
-rw-r--r--fs/qnx6/dir.c26
-rw-r--r--fs/qnx6/inode.c99
-rw-r--r--fs/qnx6/namei.c6
-rw-r--r--fs/qnx6/qnx6.h12
-rw-r--r--fs/qnx6/super_mmi.c22
-rw-r--r--fs/quota/dquot.c180
-rw-r--r--fs/quota/kqid.c2
-rw-r--r--fs/quota/netlink.c3
-rw-r--r--fs/quota/quota.c6
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/do_balan.c113
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ibalance.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/item_ops.c4
-rw-r--r--fs/reiserfs/journal.c22
-rw-r--r--fs/reiserfs/lbalance.c7
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/reiserfs.h9
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c15
-rw-r--r--fs/reiserfs/xattr.c22
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/squashfs/file_direct.c2
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/super.c20
-rw-r--r--fs/timerfd.c77
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/log.c12
-rw-r--r--fs/ubifs/lpt.c5
-rw-r--r--fs/ubifs/lpt_commit.c7
-rw-r--r--fs/ubifs/master.c7
-rw-r--r--fs/ubifs/orphan.c1
-rw-r--r--fs/ubifs/recovery.c5
-rw-r--r--fs/ubifs/sb.c4
-rw-r--r--fs/ubifs/scan.c14
-rw-r--r--fs/ubifs/super.c19
-rw-r--r--fs/ubifs/tnc.c1
-rw-r--r--fs/ubifs/tnc_commit.c1
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/file.c22
-rw-r--r--fs/udf/lowlevel.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/udf/unicode.c9
-rw-r--r--fs/ufs/Makefile1
-rw-r--r--fs/ufs/inode.c32
-rw-r--r--fs/ufs/super.c304
-rw-r--r--fs/ufs/ufs.h10
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile71
-rw-r--r--fs/xfs/libxfs/xfs_ag.h (renamed from fs/xfs/xfs_ag.h)0
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c (renamed from fs/xfs/xfs_alloc.c)20
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h (renamed from fs/xfs/xfs_alloc.h)0
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c (renamed from fs/xfs/xfs_alloc_btree.c)6
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.h (renamed from fs/xfs/xfs_alloc_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr.c (renamed from fs/xfs/xfs_attr.c)92
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c (renamed from fs/xfs/xfs_attr_leaf.c)78
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h (renamed from fs/xfs/xfs_attr_leaf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c (renamed from fs/xfs/xfs_attr_remote.c)22
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h (renamed from fs/xfs/xfs_attr_remote.h)0
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h (renamed from fs/xfs/xfs_attr_sf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bit.h (renamed from fs/xfs/xfs_bit.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c (renamed from fs/xfs/xfs_bmap.c)60
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h (renamed from fs/xfs/xfs_bmap.h)0
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c (renamed from fs/xfs/xfs_bmap_btree.c)99
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h (renamed from fs/xfs/xfs_bmap_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_btree.c (renamed from fs/xfs/xfs_btree.c)46
-rw-r--r--fs/xfs/libxfs/xfs_btree.h (renamed from fs/xfs/xfs_btree.h)2
-rw-r--r--fs/xfs/libxfs/xfs_cksum.h (renamed from fs/xfs/xfs_cksum.h)0
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c (renamed from fs/xfs/xfs_da_btree.c)112
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h (renamed from fs/xfs/xfs_da_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c (renamed from fs/xfs/xfs_da_format.c)0
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h (renamed from fs/xfs/xfs_da_format.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dinode.h (renamed from fs/xfs/xfs_dinode.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c (renamed from fs/xfs/xfs_dir2.c)24
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h (renamed from fs/xfs/xfs_dir2.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c (renamed from fs/xfs/xfs_dir2_block.c)18
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c (renamed from fs/xfs/xfs_dir2_data.c)10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c (renamed from fs/xfs/xfs_dir2_leaf.c)24
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c (renamed from fs/xfs/xfs_dir2_node.c)40
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h (renamed from fs/xfs/xfs_dir2_priv.h)0
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c (renamed from fs/xfs/xfs_dir2_sf.c)75
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c (renamed from fs/xfs/xfs_dquot_buf.c)6
-rw-r--r--fs/xfs/libxfs/xfs_format.h (renamed from fs/xfs/xfs_format.h)14
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c (renamed from fs/xfs/xfs_ialloc.c)34
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h (renamed from fs/xfs/xfs_ialloc.h)0
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c (renamed from fs/xfs/xfs_ialloc_btree.c)6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h (renamed from fs/xfs/xfs_ialloc_btree.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c (renamed from fs/xfs/xfs_inode_buf.c)10
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h (renamed from fs/xfs/xfs_inode_buf.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c (renamed from fs/xfs/xfs_inode_fork.c)36
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h (renamed from fs/xfs/xfs_inode_fork.h)0
-rw-r--r--fs/xfs/libxfs/xfs_inum.h (renamed from fs/xfs/xfs_inum.h)4
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h (renamed from fs/xfs/xfs_log_format.h)4
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h (renamed from fs/xfs/xfs_log_recover.h)0
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c (renamed from fs/xfs/xfs_log_rlimit.c)0
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h (renamed from fs/xfs/xfs_quota_defs.h)2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c (renamed from fs/xfs/xfs_rtbitmap.c)0
-rw-r--r--fs/xfs/libxfs/xfs_sb.c (renamed from fs/xfs/xfs_sb.c)56
-rw-r--r--fs/xfs/libxfs/xfs_sb.h (renamed from fs/xfs/xfs_sb.h)8
-rw-r--r--fs/xfs/libxfs/xfs_shared.h (renamed from fs/xfs/xfs_shared.h)0
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c (renamed from fs/xfs/xfs_symlink_remote.c)6
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c (renamed from fs/xfs/xfs_trans_resv.c)0
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h (renamed from fs/xfs/xfs_trans_resv.h)0
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h (renamed from fs/xfs/xfs_trans_space.h)0
-rw-r--r--fs/xfs/xfs_acl.c8
-rw-r--r--fs/xfs/xfs_aops.c18
-rw-r--r--fs/xfs/xfs_attr_inactive.c22
-rw-r--r--fs/xfs/xfs_attr_list.c38
-rw-r--r--fs/xfs/xfs_bmap_util.c174
-rw-r--r--fs/xfs/xfs_buf.c40
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_dir2_readdir.c4
-rw-r--r--fs/xfs/xfs_discard.c18
-rw-r--r--fs/xfs/xfs_dquot.c41
-rw-r--r--fs/xfs/xfs_dquot.h15
-rw-r--r--fs/xfs/xfs_error.c25
-rw-r--r--fs/xfs/xfs_error.h13
-rw-r--r--fs/xfs/xfs_export.c10
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_file.c75
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fs.h7
-rw-r--r--fs/xfs/xfs_fsops.c42
-rw-r--r--fs/xfs/xfs_icache.c148
-rw-r--r--fs/xfs/xfs_icache.h13
-rw-r--r--fs/xfs/xfs_inode.c68
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c266
-rw-r--r--fs/xfs/xfs_ioctl32.c111
-rw-r--r--fs/xfs/xfs_iomap.c54
-rw-r--r--fs/xfs/xfs_iops.c72
-rw-r--r--fs/xfs/xfs_itable.c579
-rw-r--r--fs/xfs/xfs_itable.h23
-rw-r--r--fs/xfs/xfs_linux.h27
-rw-r--r--fs/xfs/xfs_log.c69
-rw-r--r--fs/xfs/xfs_log_cil.c8
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c284
-rw-r--r--fs/xfs/xfs_mount.c97
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_mru_cache.c14
-rw-r--r--fs/xfs/xfs_qm.c229
-rw-r--r--fs/xfs/xfs_qm.h1
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c46
-rw-r--r--fs/xfs/xfs_quotaops.c20
-rw-r--r--fs/xfs/xfs_rtalloc.c24
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_super.c132
-rw-r--r--fs/xfs/xfs_super.h15
-rw-r--r--fs/xfs/xfs_symlink.c30
-rw-r--r--fs/xfs/xfs_sysfs.c165
-rw-r--r--fs/xfs/xfs_sysfs.h59
-rw-r--r--fs/xfs/xfs_trans.c10
-rw-r--r--fs/xfs/xfs_trans_ail.c4
-rw-r--r--fs/xfs/xfs_trans_buf.c37
-rw-r--r--fs/xfs/xfs_trans_dquot.c4
-rw-r--r--fs/xfs/xfs_types.h29
-rw-r--r--fs/xfs/xfs_vnode.h46
-rw-r--r--fs/xfs/xfs_xattr.c6
403 files changed, 12433 insertions, 7255 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 4030cbfbc9af..90c88529892b 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o splice.o sync.o utimes.o \ 13 pnode.o splice.o sync.o utimes.o \
14 stack.o fs_struct.o statfs.o 14 stack.o fs_struct.o statfs.o fs_pin.o
15 15
16ifeq ($(CONFIG_BLOCK),y) 16ifeq ($(CONFIG_BLOCK),y)
17obj-y += buffer.o block_dev.o direct-io.o mpage.o 17obj-y += buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index c770337c4b45..24575d9d882d 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -153,6 +153,7 @@ extern int adfs_map_lookup(struct super_block *sb, unsigned int frag_id, unsigne
153extern unsigned int adfs_map_free(struct super_block *sb); 153extern unsigned int adfs_map_free(struct super_block *sb);
154 154
155/* Misc */ 155/* Misc */
156__printf(3, 4)
156void __adfs_error(struct super_block *sb, const char *function, 157void __adfs_error(struct super_block *sb, const char *function,
157 const char *fmt, ...); 158 const char *fmt, ...);
158#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt) 159#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 0d138c0de293..51c279a29845 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -138,7 +138,7 @@ adfs_dir_lookup_byname(struct inode *inode, struct qstr *name, struct object_inf
138 goto out; 138 goto out;
139 139
140 if (ADFS_I(inode)->parent_id != dir.parent_id) { 140 if (ADFS_I(inode)->parent_id != dir.parent_id) {
141 adfs_error(sb, "parent directory changed under me! (%lx but got %lx)\n", 141 adfs_error(sb, "parent directory changed under me! (%lx but got %x)\n",
142 ADFS_I(inode)->parent_id, dir.parent_id); 142 ADFS_I(inode)->parent_id, dir.parent_id);
143 ret = -EIO; 143 ret = -EIO;
144 goto free_out; 144 goto free_out;
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index d9e3bee4e653..f2ba88ab4aed 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -55,10 +55,10 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
55 } 55 }
56 56
57 size >>= sb->s_blocksize_bits; 57 size >>= sb->s_blocksize_bits;
58 if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) { 58 if (size > ARRAY_SIZE(dir->bh)) {
59 /* this directory is too big for fixed bh set, must allocate */ 59 /* this directory is too big for fixed bh set, must allocate */
60 struct buffer_head **bh_fplus = 60 struct buffer_head **bh_fplus =
61 kzalloc(size * sizeof(struct buffer_head *), 61 kcalloc(size, sizeof(struct buffer_head *),
62 GFP_KERNEL); 62 GFP_KERNEL);
63 if (!bh_fplus) { 63 if (!bh_fplus) {
64 adfs_error(sb, "not enough memory for" 64 adfs_error(sb, "not enough memory for"
@@ -79,9 +79,8 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
79 79
80 dir->bh_fplus[blk] = sb_bread(sb, block); 80 dir->bh_fplus[blk] = sb_bread(sb, block);
81 if (!dir->bh_fplus[blk]) { 81 if (!dir->bh_fplus[blk]) {
82 adfs_error(sb, "dir object %X failed read for" 82 adfs_error(sb, "dir object %x failed read for offset %d, mapped block %lX",
83 " offset %d, mapped block %X", 83 id, blk, block);
84 id, blk, block);
85 goto out; 84 goto out;
86 } 85 }
87 86
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc2674..ae635872affb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -192,7 +192,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
192 } 192 }
193 193
194 file->f_flags = O_RDWR; 194 file->f_flags = O_RDWR;
195 file->private_data = ctx;
196 return file; 195 return file;
197} 196}
198 197
@@ -202,7 +201,7 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
202 static const struct dentry_operations ops = { 201 static const struct dentry_operations ops = {
203 .d_dname = simple_dname, 202 .d_dname = simple_dname,
204 }; 203 };
205 return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); 204 return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
206} 205}
207 206
208/* aio_setup 207/* aio_setup
@@ -556,8 +555,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
556 struct aio_ring *ring; 555 struct aio_ring *ring;
557 556
558 spin_lock(&mm->ioctx_lock); 557 spin_lock(&mm->ioctx_lock);
559 rcu_read_lock(); 558 table = rcu_dereference_raw(mm->ioctx_table);
560 table = rcu_dereference(mm->ioctx_table);
561 559
562 while (1) { 560 while (1) {
563 if (table) 561 if (table)
@@ -565,7 +563,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
565 if (!table->table[i]) { 563 if (!table->table[i]) {
566 ctx->id = i; 564 ctx->id = i;
567 table->table[i] = ctx; 565 table->table[i] = ctx;
568 rcu_read_unlock();
569 spin_unlock(&mm->ioctx_lock); 566 spin_unlock(&mm->ioctx_lock);
570 567
571 /* While kioctx setup is in progress, 568 /* While kioctx setup is in progress,
@@ -579,8 +576,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
579 } 576 }
580 577
581 new_nr = (table ? table->nr : 1) * 4; 578 new_nr = (table ? table->nr : 1) * 4;
582
583 rcu_read_unlock();
584 spin_unlock(&mm->ioctx_lock); 579 spin_unlock(&mm->ioctx_lock);
585 580
586 table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * 581 table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
@@ -591,8 +586,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
591 table->nr = new_nr; 586 table->nr = new_nr;
592 587
593 spin_lock(&mm->ioctx_lock); 588 spin_lock(&mm->ioctx_lock);
594 rcu_read_lock(); 589 old = rcu_dereference_raw(mm->ioctx_table);
595 old = rcu_dereference(mm->ioctx_table);
596 590
597 if (!old) { 591 if (!old) {
598 rcu_assign_pointer(mm->ioctx_table, table); 592 rcu_assign_pointer(mm->ioctx_table, table);
@@ -739,12 +733,9 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
739 733
740 734
741 spin_lock(&mm->ioctx_lock); 735 spin_lock(&mm->ioctx_lock);
742 rcu_read_lock(); 736 table = rcu_dereference_raw(mm->ioctx_table);
743 table = rcu_dereference(mm->ioctx_table);
744
745 WARN_ON(ctx != table->table[ctx->id]); 737 WARN_ON(ctx != table->table[ctx->id]);
746 table->table[ctx->id] = NULL; 738 table->table[ctx->id] = NULL;
747 rcu_read_unlock();
748 spin_unlock(&mm->ioctx_lock); 739 spin_unlock(&mm->ioctx_lock);
749 740
750 /* percpu_ref_kill() will do the necessary call_rcu() */ 741 /* percpu_ref_kill() will do the necessary call_rcu() */
@@ -793,40 +784,30 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
793 */ 784 */
794void exit_aio(struct mm_struct *mm) 785void exit_aio(struct mm_struct *mm)
795{ 786{
796 struct kioctx_table *table; 787 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
797 struct kioctx *ctx; 788 int i;
798 unsigned i = 0;
799
800 while (1) {
801 rcu_read_lock();
802 table = rcu_dereference(mm->ioctx_table);
803
804 do {
805 if (!table || i >= table->nr) {
806 rcu_read_unlock();
807 rcu_assign_pointer(mm->ioctx_table, NULL);
808 if (table)
809 kfree(table);
810 return;
811 }
812 789
813 ctx = table->table[i++]; 790 if (!table)
814 } while (!ctx); 791 return;
815 792
816 rcu_read_unlock(); 793 for (i = 0; i < table->nr; ++i) {
794 struct kioctx *ctx = table->table[i];
817 795
796 if (!ctx)
797 continue;
818 /* 798 /*
819 * We don't need to bother with munmap() here - 799 * We don't need to bother with munmap() here - exit_mmap(mm)
820 * exit_mmap(mm) is coming and it'll unmap everything. 800 * is coming and it'll unmap everything. And we simply can't,
821 * Since aio_free_ring() uses non-zero ->mmap_size 801 * this is not necessarily our ->mm.
822 * as indicator that it needs to unmap the area, 802 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
823 * just set it to 0; aio_free_ring() is the only 803 * that it needs to unmap the area, just set it to 0.
824 * place that uses ->mmap_size, so it's safe.
825 */ 804 */
826 ctx->mmap_size = 0; 805 ctx->mmap_size = 0;
827
828 kill_ioctx(mm, ctx, NULL); 806 kill_ioctx(mm, ctx, NULL);
829 } 807 }
808
809 RCU_INIT_POINTER(mm->ioctx_table, NULL);
810 kfree(table);
830} 811}
831 812
832static void put_reqs_available(struct kioctx *ctx, unsigned nr) 813static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -834,10 +815,8 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
834 struct kioctx_cpu *kcpu; 815 struct kioctx_cpu *kcpu;
835 unsigned long flags; 816 unsigned long flags;
836 817
837 preempt_disable();
838 kcpu = this_cpu_ptr(ctx->cpu);
839
840 local_irq_save(flags); 818 local_irq_save(flags);
819 kcpu = this_cpu_ptr(ctx->cpu);
841 kcpu->reqs_available += nr; 820 kcpu->reqs_available += nr;
842 821
843 while (kcpu->reqs_available >= ctx->req_batch * 2) { 822 while (kcpu->reqs_available >= ctx->req_batch * 2) {
@@ -846,7 +825,6 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
846 } 825 }
847 826
848 local_irq_restore(flags); 827 local_irq_restore(flags);
849 preempt_enable();
850} 828}
851 829
852static bool get_reqs_available(struct kioctx *ctx) 830static bool get_reqs_available(struct kioctx *ctx)
@@ -855,10 +833,8 @@ static bool get_reqs_available(struct kioctx *ctx)
855 bool ret = false; 833 bool ret = false;
856 unsigned long flags; 834 unsigned long flags;
857 835
858 preempt_disable();
859 kcpu = this_cpu_ptr(ctx->cpu);
860
861 local_irq_save(flags); 836 local_irq_save(flags);
837 kcpu = this_cpu_ptr(ctx->cpu);
862 if (!kcpu->reqs_available) { 838 if (!kcpu->reqs_available) {
863 int old, avail = atomic_read(&ctx->reqs_available); 839 int old, avail = atomic_read(&ctx->reqs_available);
864 840
@@ -878,7 +854,6 @@ static bool get_reqs_available(struct kioctx *ctx)
878 kcpu->reqs_available--; 854 kcpu->reqs_available--;
879out: 855out:
880 local_irq_restore(flags); 856 local_irq_restore(flags);
881 preempt_enable();
882 return ret; 857 return ret;
883} 858}
884 859
@@ -1047,7 +1022,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1047} 1022}
1048EXPORT_SYMBOL(aio_complete); 1023EXPORT_SYMBOL(aio_complete);
1049 1024
1050/* aio_read_events 1025/* aio_read_events_ring
1051 * Pull an event off of the ioctx's event ring. Returns the number of 1026 * Pull an event off of the ioctx's event ring. Returns the number of
1052 * events fetched 1027 * events fetched
1053 */ 1028 */
@@ -1270,12 +1245,12 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
1270 if (compat) 1245 if (compat)
1271 ret = compat_rw_copy_check_uvector(rw, 1246 ret = compat_rw_copy_check_uvector(rw,
1272 (struct compat_iovec __user *)buf, 1247 (struct compat_iovec __user *)buf,
1273 *nr_segs, 1, *iovec, iovec); 1248 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1274 else 1249 else
1275#endif 1250#endif
1276 ret = rw_copy_check_uvector(rw, 1251 ret = rw_copy_check_uvector(rw,
1277 (struct iovec __user *)buf, 1252 (struct iovec __user *)buf,
1278 *nr_segs, 1, *iovec, iovec); 1253 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1279 if (ret < 0) 1254 if (ret < 0)
1280 return ret; 1255 return ret;
1281 1256
@@ -1299,9 +1274,8 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1299} 1274}
1300 1275
1301/* 1276/*
1302 * aio_setup_iocb: 1277 * aio_run_iocb:
1303 * Performs the initial checks and aio retry method 1278 * Performs the initial checks and io submission.
1304 * setup for the kiocb at the time of io submission.
1305 */ 1279 */
1306static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1280static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1307 char __user *buf, bool compat) 1281 char __user *buf, bool compat)
@@ -1313,7 +1287,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1313 fmode_t mode; 1287 fmode_t mode;
1314 aio_rw_op *rw_op; 1288 aio_rw_op *rw_op;
1315 rw_iter_op *iter_op; 1289 rw_iter_op *iter_op;
1316 struct iovec inline_vec, *iovec = &inline_vec; 1290 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
1317 struct iov_iter iter; 1291 struct iov_iter iter;
1318 1292
1319 switch (opcode) { 1293 switch (opcode) {
@@ -1348,7 +1322,7 @@ rw_common:
1348 if (!ret) 1322 if (!ret)
1349 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1323 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
1350 if (ret < 0) { 1324 if (ret < 0) {
1351 if (iovec != &inline_vec) 1325 if (iovec != inline_vecs)
1352 kfree(iovec); 1326 kfree(iovec);
1353 return ret; 1327 return ret;
1354 } 1328 }
@@ -1395,7 +1369,7 @@ rw_common:
1395 return -EINVAL; 1369 return -EINVAL;
1396 } 1370 }
1397 1371
1398 if (iovec != &inline_vec) 1372 if (iovec != inline_vecs)
1399 kfree(iovec); 1373 kfree(iovec);
1400 1374
1401 if (ret != -EIOCBQUEUED) { 1375 if (ret != -EIOCBQUEUED) {
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index acf32054edd8..9e359fb20c0a 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -143,20 +143,6 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
143 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; 143 return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
144} 144}
145 145
146/* Does a dentry have some pending activity? */
147static inline int autofs4_ispending(struct dentry *dentry)
148{
149 struct autofs_info *inf = autofs4_dentry_ino(dentry);
150
151 if (inf->flags & AUTOFS_INF_PENDING)
152 return 1;
153
154 if (inf->flags & AUTOFS_INF_EXPIRING)
155 return 1;
156
157 return 0;
158}
159
160struct inode *autofs4_get_inode(struct super_block *, umode_t); 146struct inode *autofs4_get_inode(struct super_block *, umode_t);
161void autofs4_free_ino(struct autofs_info *); 147void autofs4_free_ino(struct autofs_info *);
162 148
@@ -191,55 +177,6 @@ extern const struct file_operations autofs4_root_operations;
191extern const struct dentry_operations autofs4_dentry_operations; 177extern const struct dentry_operations autofs4_dentry_operations;
192 178
193/* VFS automount flags management functions */ 179/* VFS automount flags management functions */
194
195static inline void __managed_dentry_set_automount(struct dentry *dentry)
196{
197 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
198}
199
200static inline void managed_dentry_set_automount(struct dentry *dentry)
201{
202 spin_lock(&dentry->d_lock);
203 __managed_dentry_set_automount(dentry);
204 spin_unlock(&dentry->d_lock);
205}
206
207static inline void __managed_dentry_clear_automount(struct dentry *dentry)
208{
209 dentry->d_flags &= ~DCACHE_NEED_AUTOMOUNT;
210}
211
212static inline void managed_dentry_clear_automount(struct dentry *dentry)
213{
214 spin_lock(&dentry->d_lock);
215 __managed_dentry_clear_automount(dentry);
216 spin_unlock(&dentry->d_lock);
217}
218
219static inline void __managed_dentry_set_transit(struct dentry *dentry)
220{
221 dentry->d_flags |= DCACHE_MANAGE_TRANSIT;
222}
223
224static inline void managed_dentry_set_transit(struct dentry *dentry)
225{
226 spin_lock(&dentry->d_lock);
227 __managed_dentry_set_transit(dentry);
228 spin_unlock(&dentry->d_lock);
229}
230
231static inline void __managed_dentry_clear_transit(struct dentry *dentry)
232{
233 dentry->d_flags &= ~DCACHE_MANAGE_TRANSIT;
234}
235
236static inline void managed_dentry_clear_transit(struct dentry *dentry)
237{
238 spin_lock(&dentry->d_lock);
239 __managed_dentry_clear_transit(dentry);
240 spin_unlock(&dentry->d_lock);
241}
242
243static inline void __managed_dentry_set_managed(struct dentry *dentry) 180static inline void __managed_dentry_set_managed(struct dentry *dentry)
244{ 181{
245 dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT); 182 dentry->d_flags |= (DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 394e90b02c5e..a7be57e39be7 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -333,7 +333,6 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
333 if (ino->flags & AUTOFS_INF_PENDING) 333 if (ino->flags & AUTOFS_INF_PENDING)
334 goto out; 334 goto out;
335 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 335 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
336 struct autofs_info *ino = autofs4_dentry_ino(root);
337 ino->flags |= AUTOFS_INF_EXPIRING; 336 ino->flags |= AUTOFS_INF_EXPIRING;
338 init_completion(&ino->expire_complete); 337 init_completion(&ino->expire_complete);
339 spin_unlock(&sbi->fs_lock); 338 spin_unlock(&sbi->fs_lock);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index cc87c1abac97..cdb25ebccc4c 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -166,8 +166,10 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
166 const unsigned char *str = name->name; 166 const unsigned char *str = name->name;
167 struct list_head *p, *head; 167 struct list_head *p, *head;
168 168
169 spin_lock(&sbi->lookup_lock);
170 head = &sbi->active_list; 169 head = &sbi->active_list;
170 if (list_empty(head))
171 return NULL;
172 spin_lock(&sbi->lookup_lock);
171 list_for_each(p, head) { 173 list_for_each(p, head) {
172 struct autofs_info *ino; 174 struct autofs_info *ino;
173 struct dentry *active; 175 struct dentry *active;
@@ -218,8 +220,10 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
218 const unsigned char *str = name->name; 220 const unsigned char *str = name->name;
219 struct list_head *p, *head; 221 struct list_head *p, *head;
220 222
221 spin_lock(&sbi->lookup_lock);
222 head = &sbi->expiring_list; 223 head = &sbi->expiring_list;
224 if (list_empty(head))
225 return NULL;
226 spin_lock(&sbi->lookup_lock);
223 list_for_each(p, head) { 227 list_for_each(p, head) {
224 struct autofs_info *ino; 228 struct autofs_info *ino;
225 struct dentry *expiring; 229 struct dentry *expiring;
@@ -373,7 +377,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
373 * this because the leaves of the directory tree under the 377 * this because the leaves of the directory tree under the
374 * mount never trigger mounts themselves (they have an autofs 378 * mount never trigger mounts themselves (they have an autofs
375 * trigger mount mounted on them). But v4 pseudo direct mounts 379 * trigger mount mounted on them). But v4 pseudo direct mounts
376 * do need the leaves to to trigger mounts. In this case we 380 * do need the leaves to trigger mounts. In this case we
377 * have no choice but to use the list_empty() check and 381 * have no choice but to use the list_empty() check and
378 * require user space behave. 382 * require user space behave.
379 */ 383 */
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 7c93953030fb..afd2b4408adf 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -218,8 +218,9 @@ static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
218 return -EIO; 218 return -EIO;
219} 219}
220 220
221static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry, 221static int bad_inode_rename2(struct inode *old_dir, struct dentry *old_dentry,
222 struct inode *new_dir, struct dentry *new_dentry) 222 struct inode *new_dir, struct dentry *new_dentry,
223 unsigned int flags)
223{ 224{
224 return -EIO; 225 return -EIO;
225} 226}
@@ -279,7 +280,7 @@ static const struct inode_operations bad_inode_ops =
279 .mkdir = bad_inode_mkdir, 280 .mkdir = bad_inode_mkdir,
280 .rmdir = bad_inode_rmdir, 281 .rmdir = bad_inode_rmdir,
281 .mknod = bad_inode_mknod, 282 .mknod = bad_inode_mknod,
282 .rename = bad_inode_rename, 283 .rename2 = bad_inode_rename2,
283 .readlink = bad_inode_readlink, 284 .readlink = bad_inode_readlink,
284 /* follow_link must be no-op, otherwise unmounting this inode 285 /* follow_link must be no-op, otherwise unmounting this inode
285 won't work */ 286 won't work */
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index a16fbd4e8241..4cf61ec6b7a8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -799,13 +799,11 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
799 799
800 befs_debug(sb, "---> %s", __func__); 800 befs_debug(sb, "---> %s", __func__);
801 801
802#ifndef CONFIG_BEFS_RW
803 if (!(sb->s_flags & MS_RDONLY)) { 802 if (!(sb->s_flags & MS_RDONLY)) {
804 befs_warning(sb, 803 befs_warning(sb,
805 "No write support. Marking filesystem read-only"); 804 "No write support. Marking filesystem read-only");
806 sb->s_flags |= MS_RDONLY; 805 sb->s_flags |= MS_RDONLY;
807 } 806 }
808#endif /* CONFIG_BEFS_RW */
809 807
810 /* 808 /*
811 * Set dummy blocksize to read super block. 809 * Set dummy blocksize to read super block.
@@ -834,16 +832,14 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
834 (befs_super_block *) ((void *) bh->b_data + x86_sb_off); 832 (befs_super_block *) ((void *) bh->b_data + x86_sb_off);
835 } 833 }
836 834
837 if (befs_load_sb(sb, disk_sb) != BEFS_OK) 835 if ((befs_load_sb(sb, disk_sb) != BEFS_OK) ||
836 (befs_check_sb(sb) != BEFS_OK))
838 goto unacquire_bh; 837 goto unacquire_bh;
839 838
840 befs_dump_super_block(sb, disk_sb); 839 befs_dump_super_block(sb, disk_sb);
841 840
842 brelse(bh); 841 brelse(bh);
843 842
844 if (befs_check_sb(sb) != BEFS_OK)
845 goto unacquire_priv_sbp;
846
847 if( befs_sb->num_blocks > ~((sector_t)0) ) { 843 if( befs_sb->num_blocks > ~((sector_t)0) ) {
848 befs_error(sb, "blocks count: %llu " 844 befs_error(sb, "blocks count: %llu "
849 "is larger than the host can use", 845 "is larger than the host can use",
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index f7f87e233dd9..f40006db36df 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -46,6 +46,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
46 46
47/* inode.c */ 47/* inode.c */
48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); 48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
49extern void bfs_dump_imap(const char *, struct super_block *);
49 50
50/* file.c */ 51/* file.c */
51extern const struct inode_operations bfs_file_inops; 52extern const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index a399e6d9dc74..08063ae0a17c 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -75,8 +75,6 @@ const struct file_operations bfs_dir_operations = {
75 .llseek = generic_file_llseek, 75 .llseek = generic_file_llseek,
76}; 76};
77 77
78extern void dump_imap(const char *, struct super_block *);
79
80static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 78static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
81 bool excl) 79 bool excl)
82{ 80{
@@ -110,7 +108,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
110 BFS_I(inode)->i_eblock = 0; 108 BFS_I(inode)->i_eblock = 0;
111 insert_inode_hash(inode); 109 insert_inode_hash(inode);
112 mark_inode_dirty(inode); 110 mark_inode_dirty(inode);
113 dump_imap("create", s); 111 bfs_dump_imap("create", s);
114 112
115 err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len, 113 err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
116 inode->i_ino); 114 inode->i_ino);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 7041ac35ace8..90bc079d9982 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,8 +30,6 @@ MODULE_LICENSE("GPL");
30#define dprintf(x...) 30#define dprintf(x...)
31#endif 31#endif
32 32
33void dump_imap(const char *prefix, struct super_block *s);
34
35struct inode *bfs_iget(struct super_block *sb, unsigned long ino) 33struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
36{ 34{
37 struct bfs_inode *di; 35 struct bfs_inode *di;
@@ -194,7 +192,7 @@ static void bfs_evict_inode(struct inode *inode)
194 info->si_freeb += bi->i_eblock + 1 - bi->i_sblock; 192 info->si_freeb += bi->i_eblock + 1 - bi->i_sblock;
195 info->si_freei++; 193 info->si_freei++;
196 clear_bit(ino, info->si_imap); 194 clear_bit(ino, info->si_imap);
197 dump_imap("delete_inode", s); 195 bfs_dump_imap("delete_inode", s);
198 } 196 }
199 197
200 /* 198 /*
@@ -297,7 +295,7 @@ static const struct super_operations bfs_sops = {
297 .statfs = bfs_statfs, 295 .statfs = bfs_statfs,
298}; 296};
299 297
300void dump_imap(const char *prefix, struct super_block *s) 298void bfs_dump_imap(const char *prefix, struct super_block *s)
301{ 299{
302#ifdef DEBUG 300#ifdef DEBUG
303 int i; 301 int i;
@@ -443,7 +441,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
443 } 441 }
444 brelse(bh); 442 brelse(bh);
445 brelse(sbh); 443 brelse(sbh);
446 dump_imap("read_super", s); 444 bfs_dump_imap("read_super", s);
447 return 0; 445 return 0;
448 446
449out3: 447out3:
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e25564bfcb46..54a201dac7f9 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
276 } 276 }
277 if (ret > 0) 277 if (ret > 0)
278 goto next; 278 goto next;
279 ret = ulist_add_merge(parents, eb->start, 279 ret = ulist_add_merge_ptr(parents, eb->start,
280 (uintptr_t)eie, 280 eie, (void **)&old, GFP_NOFS);
281 (u64 *)&old, GFP_NOFS);
282 if (ret < 0) 281 if (ret < 0)
283 break; 282 break;
284 if (!ret && extent_item_pos) { 283 if (!ret && extent_item_pos) {
@@ -1001,16 +1000,19 @@ again:
1001 ret = -EIO; 1000 ret = -EIO;
1002 goto out; 1001 goto out;
1003 } 1002 }
1003 btrfs_tree_read_lock(eb);
1004 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1004 ret = find_extent_in_eb(eb, bytenr, 1005 ret = find_extent_in_eb(eb, bytenr,
1005 *extent_item_pos, &eie); 1006 *extent_item_pos, &eie);
1007 btrfs_tree_read_unlock_blocking(eb);
1006 free_extent_buffer(eb); 1008 free_extent_buffer(eb);
1007 if (ret < 0) 1009 if (ret < 0)
1008 goto out; 1010 goto out;
1009 ref->inode_list = eie; 1011 ref->inode_list = eie;
1010 } 1012 }
1011 ret = ulist_add_merge(refs, ref->parent, 1013 ret = ulist_add_merge_ptr(refs, ref->parent,
1012 (uintptr_t)ref->inode_list, 1014 ref->inode_list,
1013 (u64 *)&eie, GFP_NOFS); 1015 (void **)&eie, GFP_NOFS);
1014 if (ret < 0) 1016 if (ret < 0)
1015 goto out; 1017 goto out;
1016 if (!ret && extent_item_pos) { 1018 if (!ret && extent_item_pos) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923c410c..43527fd78825 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -84,12 +84,6 @@ struct btrfs_inode {
84 */ 84 */
85 struct list_head delalloc_inodes; 85 struct list_head delalloc_inodes;
86 86
87 /*
88 * list for tracking inodes that must be sent to disk before a
89 * rename or truncate commit
90 */
91 struct list_head ordered_operations;
92
93 /* node for the red-black tree that links inodes in subvolume root */ 87 /* node for the red-black tree that links inodes in subvolume root */
94 struct rb_node rb_node; 88 struct rb_node rb_node;
95 89
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453b8e24..44ee5d2e52a4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -280,9 +280,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
280 280
281 WARN_ON(btrfs_header_generation(buf) > trans->transid); 281 WARN_ON(btrfs_header_generation(buf) > trans->transid);
282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
283 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 283 ret = btrfs_inc_ref(trans, root, cow, 1);
284 else 284 else
285 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 285 ret = btrfs_inc_ref(trans, root, cow, 0);
286 286
287 if (ret) 287 if (ret)
288 return ret; 288 return ret;
@@ -1035,14 +1035,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1035 if ((owner == root->root_key.objectid || 1035 if ((owner == root->root_key.objectid ||
1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1038 ret = btrfs_inc_ref(trans, root, buf, 1, 1); 1038 ret = btrfs_inc_ref(trans, root, buf, 1);
1039 BUG_ON(ret); /* -ENOMEM */ 1039 BUG_ON(ret); /* -ENOMEM */
1040 1040
1041 if (root->root_key.objectid == 1041 if (root->root_key.objectid ==
1042 BTRFS_TREE_RELOC_OBJECTID) { 1042 BTRFS_TREE_RELOC_OBJECTID) {
1043 ret = btrfs_dec_ref(trans, root, buf, 0, 1); 1043 ret = btrfs_dec_ref(trans, root, buf, 0);
1044 BUG_ON(ret); /* -ENOMEM */ 1044 BUG_ON(ret); /* -ENOMEM */
1045 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1045 ret = btrfs_inc_ref(trans, root, cow, 1);
1046 BUG_ON(ret); /* -ENOMEM */ 1046 BUG_ON(ret); /* -ENOMEM */
1047 } 1047 }
1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -1050,9 +1050,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1050 1050
1051 if (root->root_key.objectid == 1051 if (root->root_key.objectid ==
1052 BTRFS_TREE_RELOC_OBJECTID) 1052 BTRFS_TREE_RELOC_OBJECTID)
1053 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1053 ret = btrfs_inc_ref(trans, root, cow, 1);
1054 else 1054 else
1055 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1055 ret = btrfs_inc_ref(trans, root, cow, 0);
1056 BUG_ON(ret); /* -ENOMEM */ 1056 BUG_ON(ret); /* -ENOMEM */
1057 } 1057 }
1058 if (new_flags != 0) { 1058 if (new_flags != 0) {
@@ -1069,11 +1069,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
1070 if (root->root_key.objectid == 1070 if (root->root_key.objectid ==
1071 BTRFS_TREE_RELOC_OBJECTID) 1071 BTRFS_TREE_RELOC_OBJECTID)
1072 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1072 ret = btrfs_inc_ref(trans, root, cow, 1);
1073 else 1073 else
1074 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1074 ret = btrfs_inc_ref(trans, root, cow, 0);
1075 BUG_ON(ret); /* -ENOMEM */ 1075 BUG_ON(ret); /* -ENOMEM */
1076 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 1076 ret = btrfs_dec_ref(trans, root, buf, 1);
1077 BUG_ON(ret); /* -ENOMEM */ 1077 BUG_ON(ret); /* -ENOMEM */
1078 } 1078 }
1079 clean_tree_block(trans, root, buf); 1079 clean_tree_block(trans, root, buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397f4e92..8e29b614fe93 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3326,9 +3326,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3327 struct btrfs_key *ins, int is_data, int delalloc); 3327 struct btrfs_key *ins, int is_data, int delalloc);
3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3329 struct extent_buffer *buf, int full_backref, int no_quota); 3329 struct extent_buffer *buf, int full_backref);
3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3331 struct extent_buffer *buf, int full_backref, int no_quota); 3331 struct extent_buffer *buf, int full_backref);
3332int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3332int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root, 3333 struct btrfs_root *root,
3334 u64 bytenr, u64 num_bytes, u64 flags, 3334 u64 bytenr, u64 num_bytes, u64 flags,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..d0ed9e664f7d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrfs_work *work);
60static void free_fs_root(struct btrfs_root *root); 60static void free_fs_root(struct btrfs_root *root);
61static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 61static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
62 int read_only); 62 int read_only);
63static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
64 struct btrfs_root *root);
65static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 63static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
66static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 64static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
67 struct btrfs_root *root); 65 struct btrfs_root *root);
@@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root)
3829 btrfs_cleanup_transaction(root); 3827 btrfs_cleanup_transaction(root);
3830} 3828}
3831 3829
3832static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3833 struct btrfs_root *root)
3834{
3835 struct btrfs_inode *btrfs_inode;
3836 struct list_head splice;
3837
3838 INIT_LIST_HEAD(&splice);
3839
3840 mutex_lock(&root->fs_info->ordered_operations_mutex);
3841 spin_lock(&root->fs_info->ordered_root_lock);
3842
3843 list_splice_init(&t->ordered_operations, &splice);
3844 while (!list_empty(&splice)) {
3845 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3846 ordered_operations);
3847
3848 list_del_init(&btrfs_inode->ordered_operations);
3849 spin_unlock(&root->fs_info->ordered_root_lock);
3850
3851 btrfs_invalidate_inodes(btrfs_inode->root);
3852
3853 spin_lock(&root->fs_info->ordered_root_lock);
3854 }
3855
3856 spin_unlock(&root->fs_info->ordered_root_lock);
3857 mutex_unlock(&root->fs_info->ordered_operations_mutex);
3858}
3859
3860static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3830static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3861{ 3831{
3862 struct btrfs_ordered_extent *ordered; 3832 struct btrfs_ordered_extent *ordered;
@@ -4093,8 +4063,6 @@ again:
4093void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4063void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4094 struct btrfs_root *root) 4064 struct btrfs_root *root)
4095{ 4065{
4096 btrfs_destroy_ordered_operations(cur_trans, root);
4097
4098 btrfs_destroy_delayed_refs(cur_trans, root); 4066 btrfs_destroy_delayed_refs(cur_trans, root);
4099 4067
4100 cur_trans->state = TRANS_STATE_COMMIT_START; 4068 cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..102ed3143976 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3057,7 +3057,7 @@ out:
3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3058 struct btrfs_root *root, 3058 struct btrfs_root *root,
3059 struct extent_buffer *buf, 3059 struct extent_buffer *buf,
3060 int full_backref, int inc, int no_quota) 3060 int full_backref, int inc)
3061{ 3061{
3062 u64 bytenr; 3062 u64 bytenr;
3063 u64 num_bytes; 3063 u64 num_bytes;
@@ -3111,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3111 key.offset -= btrfs_file_extent_offset(buf, fi); 3111 key.offset -= btrfs_file_extent_offset(buf, fi);
3112 ret = process_func(trans, root, bytenr, num_bytes, 3112 ret = process_func(trans, root, bytenr, num_bytes,
3113 parent, ref_root, key.objectid, 3113 parent, ref_root, key.objectid,
3114 key.offset, no_quota); 3114 key.offset, 1);
3115 if (ret) 3115 if (ret)
3116 goto fail; 3116 goto fail;
3117 } else { 3117 } else {
@@ -3119,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3119 num_bytes = btrfs_level_size(root, level - 1); 3119 num_bytes = btrfs_level_size(root, level - 1);
3120 ret = process_func(trans, root, bytenr, num_bytes, 3120 ret = process_func(trans, root, bytenr, num_bytes,
3121 parent, ref_root, level - 1, 0, 3121 parent, ref_root, level - 1, 0,
3122 no_quota); 3122 1);
3123 if (ret) 3123 if (ret)
3124 goto fail; 3124 goto fail;
3125 } 3125 }
@@ -3130,15 +3130,15 @@ fail:
3130} 3130}
3131 3131
3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3133 struct extent_buffer *buf, int full_backref, int no_quota) 3133 struct extent_buffer *buf, int full_backref)
3134{ 3134{
3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); 3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3136} 3136}
3137 3137
3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3139 struct extent_buffer *buf, int full_backref, int no_quota) 3139 struct extent_buffer *buf, int full_backref)
3140{ 3140{
3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); 3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3142} 3142}
3143 3143
3144static int write_one_cache_group(struct btrfs_trans_handle *trans, 3144static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -7478,6 +7478,220 @@ reada:
7478 wc->reada_slot = slot; 7478 wc->reada_slot = slot;
7479} 7479}
7480 7480
7481static int account_leaf_items(struct btrfs_trans_handle *trans,
7482 struct btrfs_root *root,
7483 struct extent_buffer *eb)
7484{
7485 int nr = btrfs_header_nritems(eb);
7486 int i, extent_type, ret;
7487 struct btrfs_key key;
7488 struct btrfs_file_extent_item *fi;
7489 u64 bytenr, num_bytes;
7490
7491 for (i = 0; i < nr; i++) {
7492 btrfs_item_key_to_cpu(eb, &key, i);
7493
7494 if (key.type != BTRFS_EXTENT_DATA_KEY)
7495 continue;
7496
7497 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7498 /* filter out non qgroup-accountable extents */
7499 extent_type = btrfs_file_extent_type(eb, fi);
7500
7501 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7502 continue;
7503
7504 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7505 if (!bytenr)
7506 continue;
7507
7508 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
7509
7510 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7511 root->objectid,
7512 bytenr, num_bytes,
7513 BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
7514 if (ret)
7515 return ret;
7516 }
7517 return 0;
7518}
7519
7520/*
7521 * Walk up the tree from the bottom, freeing leaves and any interior
7522 * nodes which have had all slots visited. If a node (leaf or
7523 * interior) is freed, the node above it will have it's slot
7524 * incremented. The root node will never be freed.
7525 *
7526 * At the end of this function, we should have a path which has all
7527 * slots incremented to the next position for a search. If we need to
7528 * read a new node it will be NULL and the node above it will have the
7529 * correct slot selected for a later read.
7530 *
7531 * If we increment the root nodes slot counter past the number of
7532 * elements, 1 is returned to signal completion of the search.
7533 */
7534static int adjust_slots_upwards(struct btrfs_root *root,
7535 struct btrfs_path *path, int root_level)
7536{
7537 int level = 0;
7538 int nr, slot;
7539 struct extent_buffer *eb;
7540
7541 if (root_level == 0)
7542 return 1;
7543
7544 while (level <= root_level) {
7545 eb = path->nodes[level];
7546 nr = btrfs_header_nritems(eb);
7547 path->slots[level]++;
7548 slot = path->slots[level];
7549 if (slot >= nr || level == 0) {
7550 /*
7551 * Don't free the root - we will detect this
7552 * condition after our loop and return a
7553 * positive value for caller to stop walking the tree.
7554 */
7555 if (level != root_level) {
7556 btrfs_tree_unlock_rw(eb, path->locks[level]);
7557 path->locks[level] = 0;
7558
7559 free_extent_buffer(eb);
7560 path->nodes[level] = NULL;
7561 path->slots[level] = 0;
7562 }
7563 } else {
7564 /*
7565 * We have a valid slot to walk back down
7566 * from. Stop here so caller can process these
7567 * new nodes.
7568 */
7569 break;
7570 }
7571
7572 level++;
7573 }
7574
7575 eb = path->nodes[root_level];
7576 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7577 return 1;
7578
7579 return 0;
7580}
7581
7582/*
7583 * root_eb is the subtree root and is locked before this function is called.
7584 */
7585static int account_shared_subtree(struct btrfs_trans_handle *trans,
7586 struct btrfs_root *root,
7587 struct extent_buffer *root_eb,
7588 u64 root_gen,
7589 int root_level)
7590{
7591 int ret = 0;
7592 int level;
7593 struct extent_buffer *eb = root_eb;
7594 struct btrfs_path *path = NULL;
7595
7596 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7597 BUG_ON(root_eb == NULL);
7598
7599 if (!root->fs_info->quota_enabled)
7600 return 0;
7601
7602 if (!extent_buffer_uptodate(root_eb)) {
7603 ret = btrfs_read_buffer(root_eb, root_gen);
7604 if (ret)
7605 goto out;
7606 }
7607
7608 if (root_level == 0) {
7609 ret = account_leaf_items(trans, root, root_eb);
7610 goto out;
7611 }
7612
7613 path = btrfs_alloc_path();
7614 if (!path)
7615 return -ENOMEM;
7616
7617 /*
7618 * Walk down the tree. Missing extent blocks are filled in as
7619 * we go. Metadata is accounted every time we read a new
7620 * extent block.
7621 *
7622 * When we reach a leaf, we account for file extent items in it,
7623 * walk back up the tree (adjusting slot pointers as we go)
7624 * and restart the search process.
7625 */
7626 extent_buffer_get(root_eb); /* For path */
7627 path->nodes[root_level] = root_eb;
7628 path->slots[root_level] = 0;
7629 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
7630walk_down:
7631 level = root_level;
7632 while (level >= 0) {
7633 if (path->nodes[level] == NULL) {
7634 int child_bsize = root->nodesize;
7635 int parent_slot;
7636 u64 child_gen;
7637 u64 child_bytenr;
7638
7639 /* We need to get child blockptr/gen from
7640 * parent before we can read it. */
7641 eb = path->nodes[level + 1];
7642 parent_slot = path->slots[level + 1];
7643 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7644 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7645
7646 eb = read_tree_block(root, child_bytenr, child_bsize,
7647 child_gen);
7648 if (!eb || !extent_buffer_uptodate(eb)) {
7649 ret = -EIO;
7650 goto out;
7651 }
7652
7653 path->nodes[level] = eb;
7654 path->slots[level] = 0;
7655
7656 btrfs_tree_read_lock(eb);
7657 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7658 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
7659
7660 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7661 root->objectid,
7662 child_bytenr,
7663 child_bsize,
7664 BTRFS_QGROUP_OPER_SUB_SUBTREE,
7665 0);
7666 if (ret)
7667 goto out;
7668
7669 }
7670
7671 if (level == 0) {
7672 ret = account_leaf_items(trans, root, path->nodes[level]);
7673 if (ret)
7674 goto out;
7675
7676 /* Nonzero return here means we completed our search */
7677 ret = adjust_slots_upwards(root, path, root_level);
7678 if (ret)
7679 break;
7680
7681 /* Restart search with new slots */
7682 goto walk_down;
7683 }
7684
7685 level--;
7686 }
7687
7688 ret = 0;
7689out:
7690 btrfs_free_path(path);
7691
7692 return ret;
7693}
7694
7481/* 7695/*
7482 * helper to process tree block while walking down the tree. 7696 * helper to process tree block while walking down the tree.
7483 * 7697 *
@@ -7532,9 +7746,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7532 /* wc->stage == UPDATE_BACKREF */ 7746 /* wc->stage == UPDATE_BACKREF */
7533 if (!(wc->flags[level] & flag)) { 7747 if (!(wc->flags[level] & flag)) {
7534 BUG_ON(!path->locks[level]); 7748 BUG_ON(!path->locks[level]);
7535 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 7749 ret = btrfs_inc_ref(trans, root, eb, 1);
7536 BUG_ON(ret); /* -ENOMEM */ 7750 BUG_ON(ret); /* -ENOMEM */
7537 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 7751 ret = btrfs_dec_ref(trans, root, eb, 0);
7538 BUG_ON(ret); /* -ENOMEM */ 7752 BUG_ON(ret); /* -ENOMEM */
7539 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 7753 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
7540 eb->len, flag, 7754 eb->len, flag,
@@ -7581,6 +7795,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7581 int level = wc->level; 7795 int level = wc->level;
7582 int reada = 0; 7796 int reada = 0;
7583 int ret = 0; 7797 int ret = 0;
7798 bool need_account = false;
7584 7799
7585 generation = btrfs_node_ptr_generation(path->nodes[level], 7800 generation = btrfs_node_ptr_generation(path->nodes[level],
7586 path->slots[level]); 7801 path->slots[level]);
@@ -7626,6 +7841,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7626 7841
7627 if (wc->stage == DROP_REFERENCE) { 7842 if (wc->stage == DROP_REFERENCE) {
7628 if (wc->refs[level - 1] > 1) { 7843 if (wc->refs[level - 1] > 1) {
7844 need_account = true;
7629 if (level == 1 && 7845 if (level == 1 &&
7630 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 7846 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7631 goto skip; 7847 goto skip;
@@ -7689,6 +7905,16 @@ skip:
7689 parent = 0; 7905 parent = 0;
7690 } 7906 }
7691 7907
7908 if (need_account) {
7909 ret = account_shared_subtree(trans, root, next,
7910 generation, level - 1);
7911 if (ret) {
7912 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
7913 "%d accounting shared subtree. Quota "
7914 "is out of sync, rescan required.\n",
7915 root->fs_info->sb->s_id, ret);
7916 }
7917 }
7692 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 7918 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7693 root->root_key.objectid, level - 1, 0, 0); 7919 root->root_key.objectid, level - 1, 0, 0);
7694 BUG_ON(ret); /* -ENOMEM */ 7920 BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7995,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7769 if (wc->refs[level] == 1) { 7995 if (wc->refs[level] == 1) {
7770 if (level == 0) { 7996 if (level == 0) {
7771 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 7997 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7772 ret = btrfs_dec_ref(trans, root, eb, 1, 7998 ret = btrfs_dec_ref(trans, root, eb, 1);
7773 wc->for_reloc);
7774 else 7999 else
7775 ret = btrfs_dec_ref(trans, root, eb, 0, 8000 ret = btrfs_dec_ref(trans, root, eb, 0);
7776 wc->for_reloc);
7777 BUG_ON(ret); /* -ENOMEM */ 8001 BUG_ON(ret); /* -ENOMEM */
8002 ret = account_leaf_items(trans, root, eb);
8003 if (ret) {
8004 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8005 "%d accounting leaf items. Quota "
8006 "is out of sync, rescan required.\n",
8007 root->fs_info->sb->s_id, ret);
8008 }
7778 } 8009 }
7779 /* make block locked assertion in clean_tree_block happy */ 8010 /* make block locked assertion in clean_tree_block happy */
7780 if (!path->locks[level] && 8011 if (!path->locks[level] &&
@@ -7900,6 +8131,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7900 int level; 8131 int level;
7901 bool root_dropped = false; 8132 bool root_dropped = false;
7902 8133
8134 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8135
7903 path = btrfs_alloc_path(); 8136 path = btrfs_alloc_path();
7904 if (!path) { 8137 if (!path) {
7905 err = -ENOMEM; 8138 err = -ENOMEM;
@@ -8025,6 +8258,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8025 goto out_end_trans; 8258 goto out_end_trans;
8026 } 8259 }
8027 8260
8261 /*
8262 * Qgroup update accounting is run from
8263 * delayed ref handling. This usually works
8264 * out because delayed refs are normally the
8265 * only way qgroup updates are added. However,
8266 * we may have added updates during our tree
8267 * walk so run qgroups here to make sure we
8268 * don't lose any updates.
8269 */
8270 ret = btrfs_delayed_qgroup_accounting(trans,
8271 root->fs_info);
8272 if (ret)
8273 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8274 "running qgroup updates "
8275 "during snapshot delete. "
8276 "Quota is out of sync, "
8277 "rescan required.\n", ret);
8278
8028 btrfs_end_transaction_throttle(trans, tree_root); 8279 btrfs_end_transaction_throttle(trans, tree_root);
8029 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 8280 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8030 pr_debug("BTRFS: drop snapshot early exit\n"); 8281 pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8329,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8078 } 8329 }
8079 root_dropped = true; 8330 root_dropped = true;
8080out_end_trans: 8331out_end_trans:
8332 ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
8333 if (ret)
8334 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8335 "running qgroup updates "
8336 "during snapshot delete. "
8337 "Quota is out of sync, "
8338 "rescan required.\n", ret);
8339
8081 btrfs_end_transaction_throttle(trans, tree_root); 8340 btrfs_end_transaction_throttle(trans, tree_root);
8082out_free: 8341out_free:
8083 kfree(wc); 8342 kfree(wc);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe45d686..54c84daec9b5 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -756,7 +756,7 @@ again:
756 found_next = 1; 756 found_next = 1;
757 if (ret != 0) 757 if (ret != 0)
758 goto insert; 758 goto insert;
759 slot = 0; 759 slot = path->slots[0];
760 } 760 }
761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1f2b99cb55ea..d3afac292d67 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1838,33 +1838,9 @@ out:
1838 1838
1839int btrfs_release_file(struct inode *inode, struct file *filp) 1839int btrfs_release_file(struct inode *inode, struct file *filp)
1840{ 1840{
1841 /*
1842 * ordered_data_close is set by settattr when we are about to truncate
1843 * a file from a non-zero size to a zero size. This tries to
1844 * flush down new bytes that may have been written if the
1845 * application were using truncate to replace a file in place.
1846 */
1847 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1848 &BTRFS_I(inode)->runtime_flags)) {
1849 struct btrfs_trans_handle *trans;
1850 struct btrfs_root *root = BTRFS_I(inode)->root;
1851
1852 /*
1853 * We need to block on a committing transaction to keep us from
1854 * throwing a ordered operation on to the list and causing
1855 * something like sync to deadlock trying to flush out this
1856 * inode.
1857 */
1858 trans = btrfs_start_transaction(root, 0);
1859 if (IS_ERR(trans))
1860 return PTR_ERR(trans);
1861 btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
1862 btrfs_end_transaction(trans, root);
1863 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1864 filemap_flush(inode->i_mapping);
1865 }
1866 if (filp->private_data) 1841 if (filp->private_data)
1867 btrfs_ioctl_trans_end(filp); 1842 btrfs_ioctl_trans_end(filp);
1843 filemap_flush(inode->i_mapping);
1868 return 0; 1844 return 0;
1869} 1845}
1870 1846
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3668048e16f8..03708ef3deef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -709,6 +709,18 @@ retry:
709 unlock_extent(io_tree, async_extent->start, 709 unlock_extent(io_tree, async_extent->start,
710 async_extent->start + 710 async_extent->start +
711 async_extent->ram_size - 1); 711 async_extent->ram_size - 1);
712
713 /*
714 * we need to redirty the pages if we decide to
715 * fallback to uncompressed IO, otherwise we
716 * will not submit these pages down to lower
717 * layers.
718 */
719 extent_range_redirty_for_io(inode,
720 async_extent->start,
721 async_extent->start +
722 async_extent->ram_size - 1);
723
712 goto retry; 724 goto retry;
713 } 725 }
714 goto out_free; 726 goto out_free;
@@ -7939,27 +7951,6 @@ static int btrfs_truncate(struct inode *inode)
7939 BUG_ON(ret); 7951 BUG_ON(ret);
7940 7952
7941 /* 7953 /*
7942 * setattr is responsible for setting the ordered_data_close flag,
7943 * but that is only tested during the last file release. That
7944 * could happen well after the next commit, leaving a great big
7945 * window where new writes may get lost if someone chooses to write
7946 * to this file after truncating to zero
7947 *
7948 * The inode doesn't have any dirty data here, and so if we commit
7949 * this is a noop. If someone immediately starts writing to the inode
7950 * it is very likely we'll catch some of their writes in this
7951 * transaction, and the commit will find this file on the ordered
7952 * data list with good things to send down.
7953 *
7954 * This is a best effort solution, there is still a window where
7955 * using truncate to replace the contents of the file will
7956 * end up with a zero length file after a crash.
7957 */
7958 if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
7959 &BTRFS_I(inode)->runtime_flags))
7960 btrfs_add_ordered_operation(trans, root, inode);
7961
7962 /*
7963 * So if we truncate and then write and fsync we normally would just 7954 * So if we truncate and then write and fsync we normally would just
7964 * write the extents that changed, which is a problem if we need to 7955 * write the extents that changed, which is a problem if we need to
7965 * first truncate that entire inode. So set this flag so we write out 7956 * first truncate that entire inode. So set this flag so we write out
@@ -8106,7 +8097,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
8106 mutex_init(&ei->delalloc_mutex); 8097 mutex_init(&ei->delalloc_mutex);
8107 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 8098 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8108 INIT_LIST_HEAD(&ei->delalloc_inodes); 8099 INIT_LIST_HEAD(&ei->delalloc_inodes);
8109 INIT_LIST_HEAD(&ei->ordered_operations);
8110 RB_CLEAR_NODE(&ei->rb_node); 8100 RB_CLEAR_NODE(&ei->rb_node);
8111 8101
8112 return inode; 8102 return inode;
@@ -8146,17 +8136,6 @@ void btrfs_destroy_inode(struct inode *inode)
8146 if (!root) 8136 if (!root)
8147 goto free; 8137 goto free;
8148 8138
8149 /*
8150 * Make sure we're properly removed from the ordered operation
8151 * lists.
8152 */
8153 smp_mb();
8154 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
8155 spin_lock(&root->fs_info->ordered_root_lock);
8156 list_del_init(&BTRFS_I(inode)->ordered_operations);
8157 spin_unlock(&root->fs_info->ordered_root_lock);
8158 }
8159
8160 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 8139 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
8161 &BTRFS_I(inode)->runtime_flags)) { 8140 &BTRFS_I(inode)->runtime_flags)) {
8162 btrfs_info(root->fs_info, "inode %llu still on the orphan list", 8141 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8338,12 +8317,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8338 ret = 0; 8317 ret = 0;
8339 8318
8340 /* 8319 /*
8341 * we're using rename to replace one file with another. 8320 * we're using rename to replace one file with another. Start IO on it
8342 * and the replacement file is large. Start IO on it now so 8321 * now so we don't add too much work to the end of the transaction
8343 * we don't add too much work to the end of the transaction
8344 */ 8322 */
8345 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && 8323 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
8346 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
8347 filemap_flush(old_inode->i_mapping); 8324 filemap_flush(old_inode->i_mapping);
8348 8325
8349 /* close the racy window with snapshot create/destroy ioctl */ 8326 /* close the racy window with snapshot create/destroy ioctl */
@@ -8391,12 +8368,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8391 */ 8368 */
8392 btrfs_pin_log_trans(root); 8369 btrfs_pin_log_trans(root);
8393 } 8370 }
8394 /*
8395 * make sure the inode gets flushed if it is replacing
8396 * something.
8397 */
8398 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
8399 btrfs_add_ordered_operation(trans, root, old_inode);
8400 8371
8401 inode_inc_iversion(old_dir); 8372 inode_inc_iversion(old_dir);
8402 inode_inc_iversion(new_dir); 8373 inode_inc_iversion(new_dir);
@@ -8476,6 +8447,16 @@ out_notrans:
8476 return ret; 8447 return ret;
8477} 8448}
8478 8449
8450static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
8451 struct inode *new_dir, struct dentry *new_dentry,
8452 unsigned int flags)
8453{
8454 if (flags & ~RENAME_NOREPLACE)
8455 return -EINVAL;
8456
8457 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
8458}
8459
8479static void btrfs_run_delalloc_work(struct btrfs_work *work) 8460static void btrfs_run_delalloc_work(struct btrfs_work *work)
8480{ 8461{
8481 struct btrfs_delalloc_work *delalloc_work; 8462 struct btrfs_delalloc_work *delalloc_work;
@@ -9019,7 +9000,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
9019 .link = btrfs_link, 9000 .link = btrfs_link,
9020 .mkdir = btrfs_mkdir, 9001 .mkdir = btrfs_mkdir,
9021 .rmdir = btrfs_rmdir, 9002 .rmdir = btrfs_rmdir,
9022 .rename = btrfs_rename, 9003 .rename2 = btrfs_rename2,
9023 .symlink = btrfs_symlink, 9004 .symlink = btrfs_symlink,
9024 .setattr = btrfs_setattr, 9005 .setattr = btrfs_setattr,
9025 .mknod = btrfs_mknod, 9006 .mknod = btrfs_mknod,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7187b14faa6c..963895c1f801 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct inode *inode,
571 571
572 trace_btrfs_ordered_extent_remove(inode, entry); 572 trace_btrfs_ordered_extent_remove(inode, entry);
573 573
574 /*
575 * we have no more ordered extents for this inode and
576 * no dirty pages. We can safely remove it from the
577 * list of ordered extents
578 */
579 if (RB_EMPTY_ROOT(&tree->tree) &&
580 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
581 spin_lock(&root->fs_info->ordered_root_lock);
582 list_del_init(&BTRFS_I(inode)->ordered_operations);
583 spin_unlock(&root->fs_info->ordered_root_lock);
584 }
585
586 if (!root->nr_ordered_extents) { 574 if (!root->nr_ordered_extents) {
587 spin_lock(&root->fs_info->ordered_root_lock); 575 spin_lock(&root->fs_info->ordered_root_lock);
588 BUG_ON(list_empty(&root->ordered_root)); 576 BUG_ON(list_empty(&root->ordered_root));
@@ -687,81 +675,6 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
687} 675}
688 676
689/* 677/*
690 * this is used during transaction commit to write all the inodes
691 * added to the ordered operation list. These files must be fully on
692 * disk before the transaction commits.
693 *
694 * we have two modes here, one is to just start the IO via filemap_flush
695 * and the other is to wait for all the io. When we wait, we have an
696 * extra check to make sure the ordered operation list really is empty
697 * before we return
698 */
699int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
700 struct btrfs_root *root, int wait)
701{
702 struct btrfs_inode *btrfs_inode;
703 struct inode *inode;
704 struct btrfs_transaction *cur_trans = trans->transaction;
705 struct list_head splice;
706 struct list_head works;
707 struct btrfs_delalloc_work *work, *next;
708 int ret = 0;
709
710 INIT_LIST_HEAD(&splice);
711 INIT_LIST_HEAD(&works);
712
713 mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
714 spin_lock(&root->fs_info->ordered_root_lock);
715 list_splice_init(&cur_trans->ordered_operations, &splice);
716 while (!list_empty(&splice)) {
717 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
718 ordered_operations);
719 inode = &btrfs_inode->vfs_inode;
720
721 list_del_init(&btrfs_inode->ordered_operations);
722
723 /*
724 * the inode may be getting freed (in sys_unlink path).
725 */
726 inode = igrab(inode);
727 if (!inode)
728 continue;
729
730 if (!wait)
731 list_add_tail(&BTRFS_I(inode)->ordered_operations,
732 &cur_trans->ordered_operations);
733 spin_unlock(&root->fs_info->ordered_root_lock);
734
735 work = btrfs_alloc_delalloc_work(inode, wait, 1);
736 if (!work) {
737 spin_lock(&root->fs_info->ordered_root_lock);
738 if (list_empty(&BTRFS_I(inode)->ordered_operations))
739 list_add_tail(&btrfs_inode->ordered_operations,
740 &splice);
741 list_splice_tail(&splice,
742 &cur_trans->ordered_operations);
743 spin_unlock(&root->fs_info->ordered_root_lock);
744 ret = -ENOMEM;
745 goto out;
746 }
747 list_add_tail(&work->list, &works);
748 btrfs_queue_work(root->fs_info->flush_workers,
749 &work->work);
750
751 cond_resched();
752 spin_lock(&root->fs_info->ordered_root_lock);
753 }
754 spin_unlock(&root->fs_info->ordered_root_lock);
755out:
756 list_for_each_entry_safe(work, next, &works, list) {
757 list_del_init(&work->list);
758 btrfs_wait_and_free_delalloc_work(work);
759 }
760 mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
761 return ret;
762}
763
764/*
765 * Used to start IO or wait for a given ordered extent to finish. 678 * Used to start IO or wait for a given ordered extent to finish.
766 * 679 *
767 * If wait is one, this effectively waits on page writeback for all the pages 680 * If wait is one, this effectively waits on page writeback for all the pages
@@ -1120,42 +1033,6 @@ out:
1120 return index; 1033 return index;
1121} 1034}
1122 1035
1123
1124/*
1125 * add a given inode to the list of inodes that must be fully on
1126 * disk before a transaction commit finishes.
1127 *
1128 * This basically gives us the ext3 style data=ordered mode, and it is mostly
1129 * used to make sure renamed files are fully on disk.
1130 *
1131 * It is a noop if the inode is already fully on disk.
1132 *
1133 * If trans is not null, we'll do a friendly check for a transaction that
1134 * is already flushing things and force the IO down ourselves.
1135 */
1136void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1137 struct btrfs_root *root, struct inode *inode)
1138{
1139 struct btrfs_transaction *cur_trans = trans->transaction;
1140 u64 last_mod;
1141
1142 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
1143
1144 /*
1145 * if this file hasn't been changed since the last transaction
1146 * commit, we can safely return without doing anything
1147 */
1148 if (last_mod <= root->fs_info->last_trans_committed)
1149 return;
1150
1151 spin_lock(&root->fs_info->ordered_root_lock);
1152 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
1153 list_add_tail(&BTRFS_I(inode)->ordered_operations,
1154 &cur_trans->ordered_operations);
1155 }
1156 spin_unlock(&root->fs_info->ordered_root_lock);
1157}
1158
1159int __init ordered_data_init(void) 1036int __init ordered_data_init(void)
1160{ 1037{
1161 btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", 1038 btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 246897058efb..d81a274d621e 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
190 struct btrfs_ordered_extent *ordered); 190 struct btrfs_ordered_extent *ordered);
191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
192 u32 *sum, int len); 192 u32 *sum, int len);
193int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
194 struct btrfs_root *root, int wait);
195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root,
197 struct inode *inode);
198int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); 193int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
199void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); 194void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
200void btrfs_get_logged_extents(struct inode *inode, 195void btrfs_get_logged_extents(struct inode *inode,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2630f9..b497498484be 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1201,6 +1201,50 @@ out:
1201 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1201 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1202 return ret; 1202 return ret;
1203} 1203}
1204
1205static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1206 struct btrfs_qgroup_operation *oper2)
1207{
1208 /*
1209 * Ignore seq and type here, we're looking for any operation
1210 * at all related to this extent on that root.
1211 */
1212 if (oper1->bytenr < oper2->bytenr)
1213 return -1;
1214 if (oper1->bytenr > oper2->bytenr)
1215 return 1;
1216 if (oper1->ref_root < oper2->ref_root)
1217 return -1;
1218 if (oper1->ref_root > oper2->ref_root)
1219 return 1;
1220 return 0;
1221}
1222
1223static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1224 struct btrfs_qgroup_operation *oper)
1225{
1226 struct rb_node *n;
1227 struct btrfs_qgroup_operation *cur;
1228 int cmp;
1229
1230 spin_lock(&fs_info->qgroup_op_lock);
1231 n = fs_info->qgroup_op_tree.rb_node;
1232 while (n) {
1233 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1234 cmp = comp_oper_exist(cur, oper);
1235 if (cmp < 0) {
1236 n = n->rb_right;
1237 } else if (cmp) {
1238 n = n->rb_left;
1239 } else {
1240 spin_unlock(&fs_info->qgroup_op_lock);
1241 return -EEXIST;
1242 }
1243 }
1244 spin_unlock(&fs_info->qgroup_op_lock);
1245 return 0;
1246}
1247
1204static int comp_oper(struct btrfs_qgroup_operation *oper1, 1248static int comp_oper(struct btrfs_qgroup_operation *oper1,
1205 struct btrfs_qgroup_operation *oper2) 1249 struct btrfs_qgroup_operation *oper2)
1206{ 1250{
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1290 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1334 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1291 INIT_LIST_HEAD(&oper->elem.list); 1335 INIT_LIST_HEAD(&oper->elem.list);
1292 oper->elem.seq = 0; 1336 oper->elem.seq = 0;
1337
1338 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1339 /*
1340 * If any operation for this bytenr/ref_root combo
1341 * exists, then we know it's not exclusively owned and
1342 * shouldn't be queued up.
1343 *
1344 * This also catches the case where we have a cloned
1345 * extent that gets queued up multiple times during
1346 * drop snapshot.
1347 */
1348 if (qgroup_oper_exists(fs_info, oper)) {
1349 kfree(oper);
1350 return 0;
1351 }
1352 }
1353
1293 ret = insert_qgroup_oper(fs_info, oper); 1354 ret = insert_qgroup_oper(fs_info, oper);
1294 if (ret) { 1355 if (ret) {
1295 /* Shouldn't happen so have an assert for developers */ 1356 /* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,111 @@ out:
1884} 1945}
1885 1946
1886/* 1947/*
1948 * Process a reference to a shared subtree. This type of operation is
1949 * queued during snapshot removal when we encounter extents which are
1950 * shared between more than one root.
1951 */
1952static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1953 struct btrfs_fs_info *fs_info,
1954 struct btrfs_qgroup_operation *oper)
1955{
1956 struct ulist *roots = NULL;
1957 struct ulist_node *unode;
1958 struct ulist_iterator uiter;
1959 struct btrfs_qgroup_list *glist;
1960 struct ulist *parents;
1961 int ret = 0;
1962 int err;
1963 struct btrfs_qgroup *qg;
1964 u64 root_obj = 0;
1965 struct seq_list elem = {};
1966
1967 parents = ulist_alloc(GFP_NOFS);
1968 if (!parents)
1969 return -ENOMEM;
1970
1971 btrfs_get_tree_mod_seq(fs_info, &elem);
1972 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1973 elem.seq, &roots);
1974 btrfs_put_tree_mod_seq(fs_info, &elem);
1975 if (ret < 0)
1976 return ret;
1977
1978 if (roots->nnodes != 1)
1979 goto out;
1980
1981 ULIST_ITER_INIT(&uiter);
1982 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
1983 /*
1984 * If we find our ref root then that means all refs
1985 * this extent has to the root have not yet been
1986 * deleted. In that case, we do nothing and let the
1987 * last ref for this bytenr drive our update.
1988 *
1989 * This can happen for example if an extent is
1990 * referenced multiple times in a snapshot (clone,
1991 * etc). If we are in the middle of snapshot removal,
1992 * queued updates for such an extent will find the
1993 * root if we have not yet finished removing the
1994 * snapshot.
1995 */
1996 if (unode->val == oper->ref_root)
1997 goto out;
1998
1999 root_obj = unode->val;
2000 BUG_ON(!root_obj);
2001
2002 spin_lock(&fs_info->qgroup_lock);
2003 qg = find_qgroup_rb(fs_info, root_obj);
2004 if (!qg)
2005 goto out_unlock;
2006
2007 qg->excl += oper->num_bytes;
2008 qg->excl_cmpr += oper->num_bytes;
2009 qgroup_dirty(fs_info, qg);
2010
2011 /*
2012 * Adjust counts for parent groups. First we find all
2013 * parents, then in the 2nd loop we do the adjustment
2014 * while adding parents of the parents to our ulist.
2015 */
2016 list_for_each_entry(glist, &qg->groups, next_group) {
2017 err = ulist_add(parents, glist->group->qgroupid,
2018 ptr_to_u64(glist->group), GFP_ATOMIC);
2019 if (err < 0) {
2020 ret = err;
2021 goto out_unlock;
2022 }
2023 }
2024
2025 ULIST_ITER_INIT(&uiter);
2026 while ((unode = ulist_next(parents, &uiter))) {
2027 qg = u64_to_ptr(unode->aux);
2028 qg->excl += oper->num_bytes;
2029 qg->excl_cmpr += oper->num_bytes;
2030 qgroup_dirty(fs_info, qg);
2031
2032 /* Add any parents of the parents */
2033 list_for_each_entry(glist, &qg->groups, next_group) {
2034 err = ulist_add(parents, glist->group->qgroupid,
2035 ptr_to_u64(glist->group), GFP_ATOMIC);
2036 if (err < 0) {
2037 ret = err;
2038 goto out_unlock;
2039 }
2040 }
2041 }
2042
2043out_unlock:
2044 spin_unlock(&fs_info->qgroup_lock);
2045
2046out:
2047 ulist_free(roots);
2048 ulist_free(parents);
2049 return ret;
2050}
2051
2052/*
1887 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2053 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1888 * from the fs. First, all roots referencing the extent are searched, and 2054 * from the fs. First, all roots referencing the extent are searched, and
1889 * then the space is accounted accordingly to the different roots. The 2055 * then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2086,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1920 case BTRFS_QGROUP_OPER_SUB_SHARED: 2086 case BTRFS_QGROUP_OPER_SUB_SHARED:
1921 ret = qgroup_shared_accounting(trans, fs_info, oper); 2087 ret = qgroup_shared_accounting(trans, fs_info, oper);
1922 break; 2088 break;
2089 case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2090 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2091 break;
1923 default: 2092 default:
1924 ASSERT(0); 2093 ASSERT(0);
1925 } 2094 }
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1fbd7a..18cc68ca3090 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type {
44 BTRFS_QGROUP_OPER_ADD_SHARED, 44 BTRFS_QGROUP_OPER_ADD_SHARED,
45 BTRFS_QGROUP_OPER_SUB_EXCL, 45 BTRFS_QGROUP_OPER_SUB_EXCL,
46 BTRFS_QGROUP_OPER_SUB_SHARED, 46 BTRFS_QGROUP_OPER_SUB_SHARED,
47 BTRFS_QGROUP_OPER_SUB_SUBTREE,
47}; 48};
48 49
49struct btrfs_qgroup_operation { 50struct btrfs_qgroup_operation {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8e16bca69c56..c4124de4435b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -851,7 +851,6 @@ static struct dentry *get_default_root(struct super_block *sb,
851 struct btrfs_path *path; 851 struct btrfs_path *path;
852 struct btrfs_key location; 852 struct btrfs_key location;
853 struct inode *inode; 853 struct inode *inode;
854 struct dentry *dentry;
855 u64 dir_id; 854 u64 dir_id;
856 int new = 0; 855 int new = 0;
857 856
@@ -922,13 +921,7 @@ setup_root:
922 return dget(sb->s_root); 921 return dget(sb->s_root);
923 } 922 }
924 923
925 dentry = d_obtain_alias(inode); 924 return d_obtain_root(inode);
926 if (!IS_ERR(dentry)) {
927 spin_lock(&dentry->d_lock);
928 dentry->d_flags &= ~DCACHE_DISCONNECTED;
929 spin_unlock(&dentry->d_lock);
930 }
931 return dentry;
932} 925}
933 926
934static int btrfs_fill_super(struct super_block *sb, 927static int btrfs_fill_super(struct super_block *sb,
@@ -1672,6 +1665,21 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1672 return 0; 1665 return 0;
1673} 1666}
1674 1667
1668/*
1669 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
1670 *
1671 * If there's a redundant raid level at DATA block groups, use the respective
1672 * multiplier to scale the sizes.
1673 *
1674 * Unused device space usage is based on simulating the chunk allocator
1675 * algorithm that respects the device sizes, order of allocations and the
1676 * 'alloc_start' value, this is a close approximation of the actual use but
1677 * there are other factors that may change the result (like a new metadata
1678 * chunk).
1679 *
1680 * FIXME: not accurate for mixed block groups, total and free/used are ok,
1681 * available appears slightly larger.
1682 */
1675static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 1683static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1676{ 1684{
1677 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); 1685 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
@@ -1682,6 +1690,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1682 u64 total_free_data = 0; 1690 u64 total_free_data = 0;
1683 int bits = dentry->d_sb->s_blocksize_bits; 1691 int bits = dentry->d_sb->s_blocksize_bits;
1684 __be32 *fsid = (__be32 *)fs_info->fsid; 1692 __be32 *fsid = (__be32 *)fs_info->fsid;
1693 unsigned factor = 1;
1694 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
1685 int ret; 1695 int ret;
1686 1696
1687 /* holding chunk_muext to avoid allocating new chunks */ 1697 /* holding chunk_muext to avoid allocating new chunks */
@@ -1689,30 +1699,52 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1689 rcu_read_lock(); 1699 rcu_read_lock();
1690 list_for_each_entry_rcu(found, head, list) { 1700 list_for_each_entry_rcu(found, head, list) {
1691 if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 1701 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1702 int i;
1703
1692 total_free_data += found->disk_total - found->disk_used; 1704 total_free_data += found->disk_total - found->disk_used;
1693 total_free_data -= 1705 total_free_data -=
1694 btrfs_account_ro_block_groups_free_space(found); 1706 btrfs_account_ro_block_groups_free_space(found);
1707
1708 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1709 if (!list_empty(&found->block_groups[i])) {
1710 switch (i) {
1711 case BTRFS_RAID_DUP:
1712 case BTRFS_RAID_RAID1:
1713 case BTRFS_RAID_RAID10:
1714 factor = 2;
1715 }
1716 }
1717 }
1695 } 1718 }
1696 1719
1697 total_used += found->disk_used; 1720 total_used += found->disk_used;
1698 } 1721 }
1722
1699 rcu_read_unlock(); 1723 rcu_read_unlock();
1700 1724
1701 buf->f_namelen = BTRFS_NAME_LEN; 1725 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
1702 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1726 buf->f_blocks >>= bits;
1703 buf->f_bfree = buf->f_blocks - (total_used >> bits); 1727 buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
1704 buf->f_bsize = dentry->d_sb->s_blocksize; 1728
1705 buf->f_type = BTRFS_SUPER_MAGIC; 1729 /* Account global block reserve as used, it's in logical size already */
1730 spin_lock(&block_rsv->lock);
1731 buf->f_bfree -= block_rsv->size >> bits;
1732 spin_unlock(&block_rsv->lock);
1733
1706 buf->f_bavail = total_free_data; 1734 buf->f_bavail = total_free_data;
1707 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); 1735 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
1708 if (ret) { 1736 if (ret) {
1709 mutex_unlock(&fs_info->chunk_mutex); 1737 mutex_unlock(&fs_info->chunk_mutex);
1710 return ret; 1738 return ret;
1711 } 1739 }
1712 buf->f_bavail += total_free_data; 1740 buf->f_bavail += div_u64(total_free_data, factor);
1713 buf->f_bavail = buf->f_bavail >> bits; 1741 buf->f_bavail = buf->f_bavail >> bits;
1714 mutex_unlock(&fs_info->chunk_mutex); 1742 mutex_unlock(&fs_info->chunk_mutex);
1715 1743
1744 buf->f_type = BTRFS_SUPER_MAGIC;
1745 buf->f_bsize = dentry->d_sb->s_blocksize;
1746 buf->f_namelen = BTRFS_NAME_LEN;
1747
1716 /* We treat it as constant endianness (it doesn't matter _which_) 1748 /* We treat it as constant endianness (it doesn't matter _which_)
1717 because we want the fsid to come out the same whether mounted 1749 because we want the fsid to come out the same whether mounted
1718 on a big-endian or little-endian host */ 1750 on a big-endian or little-endian host */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5f379affdf23..d89c6d3542ca 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -218,7 +218,6 @@ loop:
218 spin_lock_init(&cur_trans->delayed_refs.lock); 218 spin_lock_init(&cur_trans->delayed_refs.lock);
219 219
220 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 220 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
221 INIT_LIST_HEAD(&cur_trans->ordered_operations);
222 INIT_LIST_HEAD(&cur_trans->pending_chunks); 221 INIT_LIST_HEAD(&cur_trans->pending_chunks);
223 INIT_LIST_HEAD(&cur_trans->switch_commits); 222 INIT_LIST_HEAD(&cur_trans->switch_commits);
224 list_add_tail(&cur_trans->list, &fs_info->trans_list); 223 list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1612 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1611 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1613} 1612}
1614 1613
1615static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1616 struct btrfs_root *root)
1617{
1618 int ret;
1619
1620 ret = btrfs_run_delayed_items(trans, root);
1621 if (ret)
1622 return ret;
1623
1624 /*
1625 * rename don't use btrfs_join_transaction, so, once we
1626 * set the transaction to blocked above, we aren't going
1627 * to get any new ordered operations. We can safely run
1628 * it here and no for sure that nothing new will be added
1629 * to the list
1630 */
1631 ret = btrfs_run_ordered_operations(trans, root, 1);
1632
1633 return ret;
1634}
1635
1636static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1614static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1637{ 1615{
1638 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1616 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1653 struct btrfs_transaction *prev_trans = NULL; 1631 struct btrfs_transaction *prev_trans = NULL;
1654 int ret; 1632 int ret;
1655 1633
1656 ret = btrfs_run_ordered_operations(trans, root, 0);
1657 if (ret) {
1658 btrfs_abort_transaction(trans, root, ret);
1659 btrfs_end_transaction(trans, root);
1660 return ret;
1661 }
1662
1663 /* Stop the commit early if ->aborted is set */ 1634 /* Stop the commit early if ->aborted is set */
1664 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { 1635 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1665 ret = cur_trans->aborted; 1636 ret = cur_trans->aborted;
@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1740 if (ret) 1711 if (ret)
1741 goto cleanup_transaction; 1712 goto cleanup_transaction;
1742 1713
1743 ret = btrfs_flush_all_pending_stuffs(trans, root); 1714 ret = btrfs_run_delayed_items(trans, root);
1744 if (ret) 1715 if (ret)
1745 goto cleanup_transaction; 1716 goto cleanup_transaction;
1746 1717
@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1748 extwriter_counter_read(cur_trans) == 0); 1719 extwriter_counter_read(cur_trans) == 0);
1749 1720
1750 /* some pending stuffs might be added after the previous flush. */ 1721 /* some pending stuffs might be added after the previous flush. */
1751 ret = btrfs_flush_all_pending_stuffs(trans, root); 1722 ret = btrfs_run_delayed_items(trans, root);
1752 if (ret) 1723 if (ret)
1753 goto cleanup_transaction; 1724 goto cleanup_transaction;
1754 1725
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7dd558ed0716..579be51b27e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -55,7 +55,6 @@ struct btrfs_transaction {
55 wait_queue_head_t writer_wait; 55 wait_queue_head_t writer_wait;
56 wait_queue_head_t commit_wait; 56 wait_queue_head_t commit_wait;
57 struct list_head pending_snapshots; 57 struct list_head pending_snapshots;
58 struct list_head ordered_operations;
59 struct list_head pending_chunks; 58 struct list_head pending_chunks;
60 struct list_head switch_commits; 59 struct list_head switch_commits;
61 struct btrfs_delayed_ref_root delayed_refs; 60 struct btrfs_delayed_ref_root delayed_refs;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 7f78cbf5cf41..4c29db604bbe 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
57int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); 57int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
58int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 58int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
59 u64 *old_aux, gfp_t gfp_mask); 59 u64 *old_aux, gfp_t gfp_mask);
60
61/* just like ulist_add_merge() but take a pointer for the aux data */
62static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
63 void **old_aux, gfp_t gfp_mask)
64{
65#if BITS_PER_LONG == 32
66 u64 old64 = (uintptr_t)*old_aux;
67 int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
68 *old_aux = (void *)((uintptr_t)old64);
69 return ret;
70#else
71 return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
72#endif
73}
74
60struct ulist_node *ulist_next(struct ulist *ulist, 75struct ulist_node *ulist_next(struct ulist *ulist,
61 struct ulist_iterator *uiter); 76 struct ulist_iterator *uiter);
62 77
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 469f2e8657e8..cebf2ebefb55 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -172,14 +172,24 @@ out:
172int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir) 172int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir)
173{ 173{
174 struct posix_acl *default_acl, *acl; 174 struct posix_acl *default_acl, *acl;
175 umode_t new_mode = inode->i_mode;
175 int error; 176 int error;
176 177
177 error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); 178 error = posix_acl_create(dir, &new_mode, &default_acl, &acl);
178 if (error) 179 if (error)
179 return error; 180 return error;
180 181
181 if (!default_acl && !acl) 182 if (!default_acl && !acl) {
182 cache_no_acl(inode); 183 cache_no_acl(inode);
184 if (new_mode != inode->i_mode) {
185 struct iattr newattrs = {
186 .ia_mode = new_mode,
187 .ia_valid = ATTR_MODE,
188 };
189 error = ceph_setattr(dentry, &newattrs);
190 }
191 return error;
192 }
183 193
184 if (default_acl) { 194 if (default_acl) {
185 error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 195 error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1fde164b74b5..6d1cd45dca89 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3277,7 +3277,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3277 rel->ino = cpu_to_le64(ceph_ino(inode)); 3277 rel->ino = cpu_to_le64(ceph_ino(inode));
3278 rel->cap_id = cpu_to_le64(cap->cap_id); 3278 rel->cap_id = cpu_to_le64(cap->cap_id);
3279 rel->seq = cpu_to_le32(cap->seq); 3279 rel->seq = cpu_to_le32(cap->seq);
3280 rel->issue_seq = cpu_to_le32(cap->issue_seq), 3280 rel->issue_seq = cpu_to_le32(cap->issue_seq);
3281 rel->mseq = cpu_to_le32(cap->mseq); 3281 rel->mseq = cpu_to_le32(cap->mseq);
3282 rel->caps = cpu_to_le32(cap->implemented); 3282 rel->caps = cpu_to_le32(cap->implemented);
3283 rel->wanted = cpu_to_le32(cap->mds_wanted); 3283 rel->wanted = cpu_to_le32(cap->mds_wanted);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 302085100c28..2eb02f80a0ab 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -423,6 +423,9 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
423 dout("sync_read on file %p %llu~%u %s\n", file, off, 423 dout("sync_read on file %p %llu~%u %s\n", file, off,
424 (unsigned)len, 424 (unsigned)len,
425 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 425 (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
426
427 if (!len)
428 return 0;
426 /* 429 /*
427 * flush any page cache pages in this range. this 430 * flush any page cache pages in this range. this
428 * will make concurrent normal and sync io slow, 431 * will make concurrent normal and sync io slow,
@@ -470,8 +473,11 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
470 size_t left = ret; 473 size_t left = ret;
471 474
472 while (left) { 475 while (left) {
473 int copy = min_t(size_t, PAGE_SIZE, left); 476 size_t page_off = off & ~PAGE_MASK;
474 l = copy_page_to_iter(pages[k++], 0, copy, i); 477 size_t copy = min_t(size_t,
478 PAGE_SIZE - page_off, left);
479 l = copy_page_to_iter(pages[k++], page_off,
480 copy, i);
475 off += l; 481 off += l;
476 left -= l; 482 left -= l;
477 if (l < copy) 483 if (l < copy)
@@ -531,7 +537,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
531 * objects, rollback on failure, etc.) 537 * objects, rollback on failure, etc.)
532 */ 538 */
533static ssize_t 539static ssize_t
534ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) 540ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
535{ 541{
536 struct file *file = iocb->ki_filp; 542 struct file *file = iocb->ki_filp;
537 struct inode *inode = file_inode(file); 543 struct inode *inode = file_inode(file);
@@ -547,7 +553,6 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
547 int check_caps = 0; 553 int check_caps = 0;
548 int ret; 554 int ret;
549 struct timespec mtime = CURRENT_TIME; 555 struct timespec mtime = CURRENT_TIME;
550 loff_t pos = iocb->ki_pos;
551 size_t count = iov_iter_count(from); 556 size_t count = iov_iter_count(from);
552 557
553 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) 558 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -646,7 +651,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
646 * correct atomic write, we should e.g. take write locks on all 651 * correct atomic write, we should e.g. take write locks on all
647 * objects, rollback on failure, etc.) 652 * objects, rollback on failure, etc.)
648 */ 653 */
649static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) 654static ssize_t
655ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
650{ 656{
651 struct file *file = iocb->ki_filp; 657 struct file *file = iocb->ki_filp;
652 struct inode *inode = file_inode(file); 658 struct inode *inode = file_inode(file);
@@ -663,7 +669,6 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
663 int check_caps = 0; 669 int check_caps = 0;
664 int ret; 670 int ret;
665 struct timespec mtime = CURRENT_TIME; 671 struct timespec mtime = CURRENT_TIME;
666 loff_t pos = iocb->ki_pos;
667 size_t count = iov_iter_count(from); 672 size_t count = iov_iter_count(from);
668 673
669 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) 674 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -918,9 +923,9 @@ retry_snap:
918 /* we might need to revert back to that point */ 923 /* we might need to revert back to that point */
919 data = *from; 924 data = *from;
920 if (file->f_flags & O_DIRECT) 925 if (file->f_flags & O_DIRECT)
921 written = ceph_sync_direct_write(iocb, &data); 926 written = ceph_sync_direct_write(iocb, &data, pos);
922 else 927 else
923 written = ceph_sync_write(iocb, &data); 928 written = ceph_sync_write(iocb, &data, pos);
924 if (written == -EOLDSNAPC) { 929 if (written == -EOLDSNAPC) {
925 dout("aio_write %p %llx.%llx %llu~%u" 930 dout("aio_write %p %llx.%llx %llu~%u"
926 "got EOLDSNAPC, retrying\n", 931 "got EOLDSNAPC, retrying\n",
@@ -1177,6 +1182,9 @@ static long ceph_fallocate(struct file *file, int mode,
1177 loff_t endoff = 0; 1182 loff_t endoff = 0;
1178 loff_t size; 1183 loff_t size;
1179 1184
1185 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
1186 return -EOPNOTSUPP;
1187
1180 if (!S_ISREG(inode->i_mode)) 1188 if (!S_ISREG(inode->i_mode))
1181 return -EOPNOTSUPP; 1189 return -EOPNOTSUPP;
1182 1190
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 92a2548278fc..bad07c09f91e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1904,6 +1904,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
1904 req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); 1904 req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
1905 1905
1906 if (req->r_got_unsafe) { 1906 if (req->r_got_unsafe) {
1907 void *p;
1907 /* 1908 /*
1908 * Replay. Do not regenerate message (and rebuild 1909 * Replay. Do not regenerate message (and rebuild
1909 * paths, etc.); just use the original message. 1910 * paths, etc.); just use the original message.
@@ -1924,8 +1925,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
1924 1925
1925 /* remove cap/dentry releases from message */ 1926 /* remove cap/dentry releases from message */
1926 rhead->num_releases = 0; 1927 rhead->num_releases = 0;
1927 msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset); 1928
1928 msg->front.iov_len = req->r_request_release_offset; 1929 /* time stamp */
1930 p = msg->front.iov_base + req->r_request_release_offset;
1931 ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp));
1932
1933 msg->front.iov_len = p - msg->front.iov_base;
1934 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
1929 return 0; 1935 return 0;
1930 } 1936 }
1931 1937
@@ -2061,11 +2067,12 @@ static void __wake_requests(struct ceph_mds_client *mdsc,
2061static void kick_requests(struct ceph_mds_client *mdsc, int mds) 2067static void kick_requests(struct ceph_mds_client *mdsc, int mds)
2062{ 2068{
2063 struct ceph_mds_request *req; 2069 struct ceph_mds_request *req;
2064 struct rb_node *p; 2070 struct rb_node *p = rb_first(&mdsc->request_tree);
2065 2071
2066 dout("kick_requests mds%d\n", mds); 2072 dout("kick_requests mds%d\n", mds);
2067 for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) { 2073 while (p) {
2068 req = rb_entry(p, struct ceph_mds_request, r_node); 2074 req = rb_entry(p, struct ceph_mds_request, r_node);
2075 p = rb_next(p);
2069 if (req->r_got_unsafe) 2076 if (req->r_got_unsafe)
2070 continue; 2077 continue;
2071 if (req->r_session && 2078 if (req->r_session &&
@@ -2248,6 +2255,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2248 */ 2255 */
2249 if (result == -ESTALE) { 2256 if (result == -ESTALE) {
2250 dout("got ESTALE on request %llu", req->r_tid); 2257 dout("got ESTALE on request %llu", req->r_tid);
2258 req->r_resend_mds = -1;
2251 if (req->r_direct_mode != USE_AUTH_MDS) { 2259 if (req->r_direct_mode != USE_AUTH_MDS) {
2252 dout("not using auth, setting for that now"); 2260 dout("not using auth, setting for that now");
2253 req->r_direct_mode = USE_AUTH_MDS; 2261 req->r_direct_mode = USE_AUTH_MDS;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 06150fd745ac..f6e12377335c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -755,7 +755,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
755 goto out; 755 goto out;
756 } 756 }
757 } else { 757 } else {
758 root = d_obtain_alias(inode); 758 root = d_obtain_root(inode);
759 } 759 }
760 ceph_init_dentry(root); 760 ceph_init_dentry(root);
761 dout("open_root_inode success, root dentry is %p\n", root); 761 dout("open_root_inode success, root dentry is %p\n", root);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index c9c2b887381e..12f58d22e017 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -592,12 +592,12 @@ start:
592 xattr_version = ci->i_xattrs.version; 592 xattr_version = ci->i_xattrs.version;
593 spin_unlock(&ci->i_ceph_lock); 593 spin_unlock(&ci->i_ceph_lock);
594 594
595 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), 595 xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
596 GFP_NOFS); 596 GFP_NOFS);
597 err = -ENOMEM; 597 err = -ENOMEM;
598 if (!xattrs) 598 if (!xattrs)
599 goto bad_lock; 599 goto bad_lock;
600 memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *)); 600
601 for (i = 0; i < numattr; i++) { 601 for (i = 0; i < numattr; i++) {
602 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), 602 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
603 GFP_NOFS); 603 GFP_NOFS);
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index f3ac4154cbb6..44ec72684df5 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -213,7 +213,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
213 tcon->nativeFileSystem); 213 tcon->nativeFileSystem);
214 } 214 }
215 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" 215 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
216 "\n\tPathComponentMax: %d Status: 0x%d", 216 "\n\tPathComponentMax: %d Status: %d",
217 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), 217 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
218 le32_to_cpu(tcon->fsAttrInfo.Attributes), 218 le32_to_cpu(tcon->fsAttrInfo.Attributes),
219 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), 219 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 888398067420..ac4f260155c8 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -848,7 +848,7 @@ const struct inode_operations cifs_dir_inode_ops = {
848 .link = cifs_hardlink, 848 .link = cifs_hardlink,
849 .mkdir = cifs_mkdir, 849 .mkdir = cifs_mkdir,
850 .rmdir = cifs_rmdir, 850 .rmdir = cifs_rmdir,
851 .rename = cifs_rename, 851 .rename2 = cifs_rename2,
852 .permission = cifs_permission, 852 .permission = cifs_permission,
853/* revalidate:cifs_revalidate, */ 853/* revalidate:cifs_revalidate, */
854 .setattr = cifs_setattr, 854 .setattr = cifs_setattr,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 70f178a7c759..b0fafa499505 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -68,8 +68,8 @@ extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
68extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); 68extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
69extern int cifs_mkdir(struct inode *, struct dentry *, umode_t); 69extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
70extern int cifs_rmdir(struct inode *, struct dentry *); 70extern int cifs_rmdir(struct inode *, struct dentry *);
71extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 71extern int cifs_rename2(struct inode *, struct dentry *, struct inode *,
72 struct dentry *); 72 struct dentry *, unsigned int);
73extern int cifs_revalidate_file_attr(struct file *filp); 73extern int cifs_revalidate_file_attr(struct file *filp);
74extern int cifs_revalidate_dentry_attr(struct dentry *); 74extern int cifs_revalidate_dentry_attr(struct dentry *);
75extern int cifs_revalidate_file(struct file *filp); 75extern int cifs_revalidate_file(struct file *filp);
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
136extern const struct export_operations cifs_export_ops; 136extern const struct export_operations cifs_export_ops;
137#endif /* CONFIG_CIFS_NFSD_EXPORT */ 137#endif /* CONFIG_CIFS_NFSD_EXPORT */
138 138
139#define CIFS_VERSION "2.03" 139#define CIFS_VERSION "2.04"
140#endif /* _CIFSFS_H */ 140#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index de6aed8c78e5..0012e1e291d4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -404,6 +404,11 @@ struct smb_version_operations {
404 const struct cifs_fid *, u32 *); 404 const struct cifs_fid *, u32 *);
405 int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, 405 int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
406 int); 406 int);
407 /* writepages retry size */
408 unsigned int (*wp_retry_size)(struct inode *);
409 /* get mtu credits */
410 int (*wait_mtu_credits)(struct TCP_Server_Info *, unsigned int,
411 unsigned int *, unsigned int *);
407}; 412};
408 413
409struct smb_version_values { 414struct smb_version_values {
@@ -640,6 +645,16 @@ add_credits(struct TCP_Server_Info *server, const unsigned int add,
640} 645}
641 646
642static inline void 647static inline void
648add_credits_and_wake_if(struct TCP_Server_Info *server, const unsigned int add,
649 const int optype)
650{
651 if (add) {
652 server->ops->add_credits(server, add, optype);
653 wake_up(&server->request_q);
654 }
655}
656
657static inline void
643set_credits(struct TCP_Server_Info *server, const int val) 658set_credits(struct TCP_Server_Info *server, const int val)
644{ 659{
645 server->ops->set_credits(server, val); 660 server->ops->set_credits(server, val);
@@ -1044,6 +1059,7 @@ struct cifs_readdata {
1044 struct address_space *mapping; 1059 struct address_space *mapping;
1045 __u64 offset; 1060 __u64 offset;
1046 unsigned int bytes; 1061 unsigned int bytes;
1062 unsigned int got_bytes;
1047 pid_t pid; 1063 pid_t pid;
1048 int result; 1064 int result;
1049 struct work_struct work; 1065 struct work_struct work;
@@ -1053,6 +1069,7 @@ struct cifs_readdata {
1053 struct kvec iov; 1069 struct kvec iov;
1054 unsigned int pagesz; 1070 unsigned int pagesz;
1055 unsigned int tailsz; 1071 unsigned int tailsz;
1072 unsigned int credits;
1056 unsigned int nr_pages; 1073 unsigned int nr_pages;
1057 struct page *pages[]; 1074 struct page *pages[];
1058}; 1075};
@@ -1073,6 +1090,7 @@ struct cifs_writedata {
1073 int result; 1090 int result;
1074 unsigned int pagesz; 1091 unsigned int pagesz;
1075 unsigned int tailsz; 1092 unsigned int tailsz;
1093 unsigned int credits;
1076 unsigned int nr_pages; 1094 unsigned int nr_pages;
1077 struct page *pages[]; 1095 struct page *pages[];
1078}; 1096};
@@ -1398,6 +1416,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
1398#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ 1416#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
1399#define CIFS_NEG_OP 0x0200 /* negotiate request */ 1417#define CIFS_NEG_OP 0x0200 /* negotiate request */
1400#define CIFS_OP_MASK 0x0380 /* mask request type */ 1418#define CIFS_OP_MASK 0x0380 /* mask request type */
1419#define CIFS_HAS_CREDITS 0x0400 /* already has credits */
1401 1420
1402/* Security Flags: indicate type of session setup needed */ 1421/* Security Flags: indicate type of session setup needed */
1403#define CIFSSEC_MAY_SIGN 0x00001 1422#define CIFSSEC_MAY_SIGN 0x00001
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ca7980a1e303..c31ce98c1704 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,6 +36,7 @@ extern struct smb_hdr *cifs_buf_get(void);
36extern void cifs_buf_release(void *); 36extern void cifs_buf_release(void *);
37extern struct smb_hdr *cifs_small_buf_get(void); 37extern struct smb_hdr *cifs_small_buf_get(void);
38extern void cifs_small_buf_release(void *); 38extern void cifs_small_buf_release(void *);
39extern void free_rsp_buf(int, void *);
39extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, 40extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
40 struct kvec *iov); 41 struct kvec *iov);
41extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, 42extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
@@ -89,6 +90,9 @@ extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
89 struct smb_rqst *); 90 struct smb_rqst *);
90extern int cifs_check_receive(struct mid_q_entry *mid, 91extern int cifs_check_receive(struct mid_q_entry *mid,
91 struct TCP_Server_Info *server, bool log_error); 92 struct TCP_Server_Info *server, bool log_error);
93extern int cifs_wait_mtu_credits(struct TCP_Server_Info *server,
94 unsigned int size, unsigned int *num,
95 unsigned int *credits);
92extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 96extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
93 struct kvec *, int /* nvec to send */, 97 struct kvec *, int /* nvec to send */,
94 int * /* type of buf returned */ , const int flags); 98 int * /* type of buf returned */ , const int flags);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6ce4e0954b98..66f65001a6d8 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -196,10 +196,6 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
196 if (rc) 196 if (rc)
197 goto out; 197 goto out;
198 198
199 /*
200 * FIXME: check if wsize needs updated due to negotiated smb buffer
201 * size shrinking
202 */
203 atomic_inc(&tconInfoReconnectCount); 199 atomic_inc(&tconInfoReconnectCount);
204 200
205 /* tell server Unix caps we support */ 201 /* tell server Unix caps we support */
@@ -1517,7 +1513,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1517 return length; 1513 return length;
1518 1514
1519 server->total_read += length; 1515 server->total_read += length;
1520 rdata->bytes = length;
1521 1516
1522 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n", 1517 cifs_dbg(FYI, "total_read=%u buflen=%u remaining=%u\n",
1523 server->total_read, buflen, data_len); 1518 server->total_read, buflen, data_len);
@@ -1560,12 +1555,18 @@ cifs_readv_callback(struct mid_q_entry *mid)
1560 rc); 1555 rc);
1561 } 1556 }
1562 /* FIXME: should this be counted toward the initiating task? */ 1557 /* FIXME: should this be counted toward the initiating task? */
1563 task_io_account_read(rdata->bytes); 1558 task_io_account_read(rdata->got_bytes);
1564 cifs_stats_bytes_read(tcon, rdata->bytes); 1559 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1565 break; 1560 break;
1566 case MID_REQUEST_SUBMITTED: 1561 case MID_REQUEST_SUBMITTED:
1567 case MID_RETRY_NEEDED: 1562 case MID_RETRY_NEEDED:
1568 rdata->result = -EAGAIN; 1563 rdata->result = -EAGAIN;
1564 if (server->sign && rdata->got_bytes)
1565 /* reset bytes number since we can not check a sign */
1566 rdata->got_bytes = 0;
1567 /* FIXME: should this be counted toward the initiating task? */
1568 task_io_account_read(rdata->got_bytes);
1569 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1569 break; 1570 break;
1570 default: 1571 default:
1571 rdata->result = -EIO; 1572 rdata->result = -EIO;
@@ -1734,10 +1735,7 @@ CIFSSMBRead(const unsigned int xid, struct cifs_io_parms *io_parms,
1734 1735
1735/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 1736/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
1736 if (*buf) { 1737 if (*buf) {
1737 if (resp_buf_type == CIFS_SMALL_BUFFER) 1738 free_rsp_buf(resp_buf_type, iov[0].iov_base);
1738 cifs_small_buf_release(iov[0].iov_base);
1739 else if (resp_buf_type == CIFS_LARGE_BUFFER)
1740 cifs_buf_release(iov[0].iov_base);
1741 } else if (resp_buf_type != CIFS_NO_BUFFER) { 1739 } else if (resp_buf_type != CIFS_NO_BUFFER) {
1742 /* return buffer to caller to free */ 1740 /* return buffer to caller to free */
1743 *buf = iov[0].iov_base; 1741 *buf = iov[0].iov_base;
@@ -1899,28 +1897,80 @@ cifs_writedata_release(struct kref *refcount)
1899static void 1897static void
1900cifs_writev_requeue(struct cifs_writedata *wdata) 1898cifs_writev_requeue(struct cifs_writedata *wdata)
1901{ 1899{
1902 int i, rc; 1900 int i, rc = 0;
1903 struct inode *inode = wdata->cfile->dentry->d_inode; 1901 struct inode *inode = wdata->cfile->dentry->d_inode;
1904 struct TCP_Server_Info *server; 1902 struct TCP_Server_Info *server;
1903 unsigned int rest_len;
1905 1904
1906 for (i = 0; i < wdata->nr_pages; i++) { 1905 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1907 lock_page(wdata->pages[i]); 1906 i = 0;
1908 clear_page_dirty_for_io(wdata->pages[i]); 1907 rest_len = wdata->bytes;
1909 }
1910
1911 do { 1908 do {
1912 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 1909 struct cifs_writedata *wdata2;
1913 rc = server->ops->async_writev(wdata, cifs_writedata_release); 1910 unsigned int j, nr_pages, wsize, tailsz, cur_len;
1914 } while (rc == -EAGAIN); 1911
1912 wsize = server->ops->wp_retry_size(inode);
1913 if (wsize < rest_len) {
1914 nr_pages = wsize / PAGE_CACHE_SIZE;
1915 if (!nr_pages) {
1916 rc = -ENOTSUPP;
1917 break;
1918 }
1919 cur_len = nr_pages * PAGE_CACHE_SIZE;
1920 tailsz = PAGE_CACHE_SIZE;
1921 } else {
1922 nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
1923 cur_len = rest_len;
1924 tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
1925 }
1915 1926
1916 for (i = 0; i < wdata->nr_pages; i++) { 1927 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
1917 unlock_page(wdata->pages[i]); 1928 if (!wdata2) {
1918 if (rc != 0) { 1929 rc = -ENOMEM;
1919 SetPageError(wdata->pages[i]); 1930 break;
1920 end_page_writeback(wdata->pages[i]);
1921 page_cache_release(wdata->pages[i]);
1922 } 1931 }
1923 } 1932
1933 for (j = 0; j < nr_pages; j++) {
1934 wdata2->pages[j] = wdata->pages[i + j];
1935 lock_page(wdata2->pages[j]);
1936 clear_page_dirty_for_io(wdata2->pages[j]);
1937 }
1938
1939 wdata2->sync_mode = wdata->sync_mode;
1940 wdata2->nr_pages = nr_pages;
1941 wdata2->offset = page_offset(wdata2->pages[0]);
1942 wdata2->pagesz = PAGE_CACHE_SIZE;
1943 wdata2->tailsz = tailsz;
1944 wdata2->bytes = cur_len;
1945
1946 wdata2->cfile = find_writable_file(CIFS_I(inode), false);
1947 if (!wdata2->cfile) {
1948 cifs_dbg(VFS, "No writable handles for inode\n");
1949 rc = -EBADF;
1950 break;
1951 }
1952 wdata2->pid = wdata2->cfile->pid;
1953 rc = server->ops->async_writev(wdata2, cifs_writedata_release);
1954
1955 for (j = 0; j < nr_pages; j++) {
1956 unlock_page(wdata2->pages[j]);
1957 if (rc != 0 && rc != -EAGAIN) {
1958 SetPageError(wdata2->pages[j]);
1959 end_page_writeback(wdata2->pages[j]);
1960 page_cache_release(wdata2->pages[j]);
1961 }
1962 }
1963
1964 if (rc) {
1965 kref_put(&wdata2->refcount, cifs_writedata_release);
1966 if (rc == -EAGAIN)
1967 continue;
1968 break;
1969 }
1970
1971 rest_len -= cur_len;
1972 i += nr_pages;
1973 } while (i < wdata->nr_pages);
1924 1974
1925 mapping_set_error(inode->i_mapping, rc); 1975 mapping_set_error(inode->i_mapping, rc);
1926 kref_put(&wdata->refcount, cifs_writedata_release); 1976 kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2203,10 +2253,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
2203 } 2253 }
2204 2254
2205/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 2255/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
2206 if (resp_buf_type == CIFS_SMALL_BUFFER) 2256 free_rsp_buf(resp_buf_type, iov[0].iov_base);
2207 cifs_small_buf_release(iov[0].iov_base);
2208 else if (resp_buf_type == CIFS_LARGE_BUFFER)
2209 cifs_buf_release(iov[0].iov_base);
2210 2257
2211 /* Note: On -EAGAIN error only caller can retry on handle based calls 2258 /* Note: On -EAGAIN error only caller can retry on handle based calls
2212 since file handle passed in no longer valid */ 2259 since file handle passed in no longer valid */
@@ -2451,10 +2498,7 @@ plk_err_exit:
2451 if (pSMB) 2498 if (pSMB)
2452 cifs_small_buf_release(pSMB); 2499 cifs_small_buf_release(pSMB);
2453 2500
2454 if (resp_buf_type == CIFS_SMALL_BUFFER) 2501 free_rsp_buf(resp_buf_type, iov[0].iov_base);
2455 cifs_small_buf_release(iov[0].iov_base);
2456 else if (resp_buf_type == CIFS_LARGE_BUFFER)
2457 cifs_buf_release(iov[0].iov_base);
2458 2502
2459 /* Note: On -EAGAIN error only caller can retry on handle based calls 2503 /* Note: On -EAGAIN error only caller can retry on handle based calls
2460 since file handle passed in no longer valid */ 2504 since file handle passed in no longer valid */
@@ -3838,10 +3882,7 @@ CIFSSMBGetCIFSACL(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid,
3838 } 3882 }
3839 } 3883 }
3840qsec_out: 3884qsec_out:
3841 if (buf_type == CIFS_SMALL_BUFFER) 3885 free_rsp_buf(buf_type, iov[0].iov_base);
3842 cifs_small_buf_release(iov[0].iov_base);
3843 else if (buf_type == CIFS_LARGE_BUFFER)
3844 cifs_buf_release(iov[0].iov_base);
3845/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */ 3886/* cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
3846 return rc; 3887 return rc;
3847} 3888}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b98366f21f9e..03ed8a09581c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -557,7 +557,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
557 try_to_freeze(); 557 try_to_freeze();
558 558
559 if (server_unresponsive(server)) { 559 if (server_unresponsive(server)) {
560 total_read = -EAGAIN; 560 total_read = -ECONNABORTED;
561 break; 561 break;
562 } 562 }
563 563
@@ -571,7 +571,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
571 break; 571 break;
572 } else if (server->tcpStatus == CifsNeedReconnect) { 572 } else if (server->tcpStatus == CifsNeedReconnect) {
573 cifs_reconnect(server); 573 cifs_reconnect(server);
574 total_read = -EAGAIN; 574 total_read = -ECONNABORTED;
575 break; 575 break;
576 } else if (length == -ERESTARTSYS || 576 } else if (length == -ERESTARTSYS ||
577 length == -EAGAIN || 577 length == -EAGAIN ||
@@ -588,7 +588,7 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
588 cifs_dbg(FYI, "Received no data or error: expecting %d\n" 588 cifs_dbg(FYI, "Received no data or error: expecting %d\n"
589 "got %d", to_read, length); 589 "got %d", to_read, length);
590 cifs_reconnect(server); 590 cifs_reconnect(server);
591 total_read = -EAGAIN; 591 total_read = -ECONNABORTED;
592 break; 592 break;
593 } 593 }
594 } 594 }
@@ -786,7 +786,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
786 cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length); 786 cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
787 cifs_reconnect(server); 787 cifs_reconnect(server);
788 wake_up(&server->response_q); 788 wake_up(&server->response_q);
789 return -EAGAIN; 789 return -ECONNABORTED;
790 } 790 }
791 791
792 /* switch to large buffer if too big for a small one */ 792 /* switch to large buffer if too big for a small one */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b88b1ade4d3d..4ab2f79ffa7a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1670,8 +1670,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1670 break; 1670 break;
1671 } 1671 }
1672 1672
1673 len = min((size_t)cifs_sb->wsize, 1673 len = min(server->ops->wp_retry_size(dentry->d_inode),
1674 write_size - total_written); 1674 (unsigned int)write_size - total_written);
1675 /* iov[0] is reserved for smb header */ 1675 /* iov[0] is reserved for smb header */
1676 iov[1].iov_base = (char *)write_data + total_written; 1676 iov[1].iov_base = (char *)write_data + total_written;
1677 iov[1].iov_len = len; 1677 iov[1].iov_len = len;
@@ -1878,15 +1878,163 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1878 return rc; 1878 return rc;
1879} 1879}
1880 1880
1881static struct cifs_writedata *
1882wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1883 pgoff_t end, pgoff_t *index,
1884 unsigned int *found_pages)
1885{
1886 unsigned int nr_pages;
1887 struct page **pages;
1888 struct cifs_writedata *wdata;
1889
1890 wdata = cifs_writedata_alloc((unsigned int)tofind,
1891 cifs_writev_complete);
1892 if (!wdata)
1893 return NULL;
1894
1895 /*
1896 * find_get_pages_tag seems to return a max of 256 on each
1897 * iteration, so we must call it several times in order to
1898 * fill the array or the wsize is effectively limited to
1899 * 256 * PAGE_CACHE_SIZE.
1900 */
1901 *found_pages = 0;
1902 pages = wdata->pages;
1903 do {
1904 nr_pages = find_get_pages_tag(mapping, index,
1905 PAGECACHE_TAG_DIRTY, tofind,
1906 pages);
1907 *found_pages += nr_pages;
1908 tofind -= nr_pages;
1909 pages += nr_pages;
1910 } while (nr_pages && tofind && *index <= end);
1911
1912 return wdata;
1913}
1914
1915static unsigned int
1916wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1917 struct address_space *mapping,
1918 struct writeback_control *wbc,
1919 pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1920{
1921 unsigned int nr_pages = 0, i;
1922 struct page *page;
1923
1924 for (i = 0; i < found_pages; i++) {
1925 page = wdata->pages[i];
1926 /*
1927 * At this point we hold neither mapping->tree_lock nor
1928 * lock on the page itself: the page may be truncated or
1929 * invalidated (changing page->mapping to NULL), or even
1930 * swizzled back from swapper_space to tmpfs file
1931 * mapping
1932 */
1933
1934 if (nr_pages == 0)
1935 lock_page(page);
1936 else if (!trylock_page(page))
1937 break;
1938
1939 if (unlikely(page->mapping != mapping)) {
1940 unlock_page(page);
1941 break;
1942 }
1943
1944 if (!wbc->range_cyclic && page->index > end) {
1945 *done = true;
1946 unlock_page(page);
1947 break;
1948 }
1949
1950 if (*next && (page->index != *next)) {
1951 /* Not next consecutive page */
1952 unlock_page(page);
1953 break;
1954 }
1955
1956 if (wbc->sync_mode != WB_SYNC_NONE)
1957 wait_on_page_writeback(page);
1958
1959 if (PageWriteback(page) ||
1960 !clear_page_dirty_for_io(page)) {
1961 unlock_page(page);
1962 break;
1963 }
1964
1965 /*
1966 * This actually clears the dirty bit in the radix tree.
1967 * See cifs_writepage() for more commentary.
1968 */
1969 set_page_writeback(page);
1970 if (page_offset(page) >= i_size_read(mapping->host)) {
1971 *done = true;
1972 unlock_page(page);
1973 end_page_writeback(page);
1974 break;
1975 }
1976
1977 wdata->pages[i] = page;
1978 *next = page->index + 1;
1979 ++nr_pages;
1980 }
1981
1982 /* reset index to refind any pages skipped */
1983 if (nr_pages == 0)
1984 *index = wdata->pages[0]->index + 1;
1985
1986 /* put any pages we aren't going to use */
1987 for (i = nr_pages; i < found_pages; i++) {
1988 page_cache_release(wdata->pages[i]);
1989 wdata->pages[i] = NULL;
1990 }
1991
1992 return nr_pages;
1993}
1994
1995static int
1996wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
1997 struct address_space *mapping, struct writeback_control *wbc)
1998{
1999 int rc = 0;
2000 struct TCP_Server_Info *server;
2001 unsigned int i;
2002
2003 wdata->sync_mode = wbc->sync_mode;
2004 wdata->nr_pages = nr_pages;
2005 wdata->offset = page_offset(wdata->pages[0]);
2006 wdata->pagesz = PAGE_CACHE_SIZE;
2007 wdata->tailsz = min(i_size_read(mapping->host) -
2008 page_offset(wdata->pages[nr_pages - 1]),
2009 (loff_t)PAGE_CACHE_SIZE);
2010 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2011
2012 if (wdata->cfile != NULL)
2013 cifsFileInfo_put(wdata->cfile);
2014 wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2015 if (!wdata->cfile) {
2016 cifs_dbg(VFS, "No writable handles for inode\n");
2017 rc = -EBADF;
2018 } else {
2019 wdata->pid = wdata->cfile->pid;
2020 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2021 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2022 }
2023
2024 for (i = 0; i < nr_pages; ++i)
2025 unlock_page(wdata->pages[i]);
2026
2027 return rc;
2028}
2029
1881static int cifs_writepages(struct address_space *mapping, 2030static int cifs_writepages(struct address_space *mapping,
1882 struct writeback_control *wbc) 2031 struct writeback_control *wbc)
1883{ 2032{
1884 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); 2033 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2034 struct TCP_Server_Info *server;
1885 bool done = false, scanned = false, range_whole = false; 2035 bool done = false, scanned = false, range_whole = false;
1886 pgoff_t end, index; 2036 pgoff_t end, index;
1887 struct cifs_writedata *wdata; 2037 struct cifs_writedata *wdata;
1888 struct TCP_Server_Info *server;
1889 struct page *page;
1890 int rc = 0; 2038 int rc = 0;
1891 2039
1892 /* 2040 /*
@@ -1906,152 +2054,50 @@ static int cifs_writepages(struct address_space *mapping,
1906 range_whole = true; 2054 range_whole = true;
1907 scanned = true; 2055 scanned = true;
1908 } 2056 }
2057 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
1909retry: 2058retry:
1910 while (!done && index <= end) { 2059 while (!done && index <= end) {
1911 unsigned int i, nr_pages, found_pages; 2060 unsigned int i, nr_pages, found_pages, wsize, credits;
1912 pgoff_t next = 0, tofind; 2061 pgoff_t next = 0, tofind, saved_index = index;
1913 struct page **pages; 2062
2063 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2064 &wsize, &credits);
2065 if (rc)
2066 break;
1914 2067
1915 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, 2068 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
1916 end - index) + 1;
1917 2069
1918 wdata = cifs_writedata_alloc((unsigned int)tofind, 2070 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
1919 cifs_writev_complete); 2071 &found_pages);
1920 if (!wdata) { 2072 if (!wdata) {
1921 rc = -ENOMEM; 2073 rc = -ENOMEM;
2074 add_credits_and_wake_if(server, credits, 0);
1922 break; 2075 break;
1923 } 2076 }
1924 2077
1925 /*
1926 * find_get_pages_tag seems to return a max of 256 on each
1927 * iteration, so we must call it several times in order to
1928 * fill the array or the wsize is effectively limited to
1929 * 256 * PAGE_CACHE_SIZE.
1930 */
1931 found_pages = 0;
1932 pages = wdata->pages;
1933 do {
1934 nr_pages = find_get_pages_tag(mapping, &index,
1935 PAGECACHE_TAG_DIRTY,
1936 tofind, pages);
1937 found_pages += nr_pages;
1938 tofind -= nr_pages;
1939 pages += nr_pages;
1940 } while (nr_pages && tofind && index <= end);
1941
1942 if (found_pages == 0) { 2078 if (found_pages == 0) {
1943 kref_put(&wdata->refcount, cifs_writedata_release); 2079 kref_put(&wdata->refcount, cifs_writedata_release);
2080 add_credits_and_wake_if(server, credits, 0);
1944 break; 2081 break;
1945 } 2082 }
1946 2083
1947 nr_pages = 0; 2084 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
1948 for (i = 0; i < found_pages; i++) { 2085 end, &index, &next, &done);
1949 page = wdata->pages[i];
1950 /*
1951 * At this point we hold neither mapping->tree_lock nor
1952 * lock on the page itself: the page may be truncated or
1953 * invalidated (changing page->mapping to NULL), or even
1954 * swizzled back from swapper_space to tmpfs file
1955 * mapping
1956 */
1957
1958 if (nr_pages == 0)
1959 lock_page(page);
1960 else if (!trylock_page(page))
1961 break;
1962
1963 if (unlikely(page->mapping != mapping)) {
1964 unlock_page(page);
1965 break;
1966 }
1967
1968 if (!wbc->range_cyclic && page->index > end) {
1969 done = true;
1970 unlock_page(page);
1971 break;
1972 }
1973
1974 if (next && (page->index != next)) {
1975 /* Not next consecutive page */
1976 unlock_page(page);
1977 break;
1978 }
1979
1980 if (wbc->sync_mode != WB_SYNC_NONE)
1981 wait_on_page_writeback(page);
1982
1983 if (PageWriteback(page) ||
1984 !clear_page_dirty_for_io(page)) {
1985 unlock_page(page);
1986 break;
1987 }
1988
1989 /*
1990 * This actually clears the dirty bit in the radix tree.
1991 * See cifs_writepage() for more commentary.
1992 */
1993 set_page_writeback(page);
1994
1995 if (page_offset(page) >= i_size_read(mapping->host)) {
1996 done = true;
1997 unlock_page(page);
1998 end_page_writeback(page);
1999 break;
2000 }
2001
2002 wdata->pages[i] = page;
2003 next = page->index + 1;
2004 ++nr_pages;
2005 }
2006
2007 /* reset index to refind any pages skipped */
2008 if (nr_pages == 0)
2009 index = wdata->pages[0]->index + 1;
2010
2011 /* put any pages we aren't going to use */
2012 for (i = nr_pages; i < found_pages; i++) {
2013 page_cache_release(wdata->pages[i]);
2014 wdata->pages[i] = NULL;
2015 }
2016 2086
2017 /* nothing to write? */ 2087 /* nothing to write? */
2018 if (nr_pages == 0) { 2088 if (nr_pages == 0) {
2019 kref_put(&wdata->refcount, cifs_writedata_release); 2089 kref_put(&wdata->refcount, cifs_writedata_release);
2090 add_credits_and_wake_if(server, credits, 0);
2020 continue; 2091 continue;
2021 } 2092 }
2022 2093
2023 wdata->sync_mode = wbc->sync_mode; 2094 wdata->credits = credits;
2024 wdata->nr_pages = nr_pages;
2025 wdata->offset = page_offset(wdata->pages[0]);
2026 wdata->pagesz = PAGE_CACHE_SIZE;
2027 wdata->tailsz =
2028 min(i_size_read(mapping->host) -
2029 page_offset(wdata->pages[nr_pages - 1]),
2030 (loff_t)PAGE_CACHE_SIZE);
2031 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2032 wdata->tailsz;
2033
2034 do {
2035 if (wdata->cfile != NULL)
2036 cifsFileInfo_put(wdata->cfile);
2037 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2038 false);
2039 if (!wdata->cfile) {
2040 cifs_dbg(VFS, "No writable handles for inode\n");
2041 rc = -EBADF;
2042 break;
2043 }
2044 wdata->pid = wdata->cfile->pid;
2045 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2046 rc = server->ops->async_writev(wdata,
2047 cifs_writedata_release);
2048 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2049 2095
2050 for (i = 0; i < nr_pages; ++i) 2096 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2051 unlock_page(wdata->pages[i]);
2052 2097
2053 /* send failure -- clean up the mess */ 2098 /* send failure -- clean up the mess */
2054 if (rc != 0) { 2099 if (rc != 0) {
2100 add_credits_and_wake_if(server, wdata->credits, 0);
2055 for (i = 0; i < nr_pages; ++i) { 2101 for (i = 0; i < nr_pages; ++i) {
2056 if (rc == -EAGAIN) 2102 if (rc == -EAGAIN)
2057 redirty_page_for_writepage(wbc, 2103 redirty_page_for_writepage(wbc,
@@ -2066,6 +2112,11 @@ retry:
2066 } 2112 }
2067 kref_put(&wdata->refcount, cifs_writedata_release); 2113 kref_put(&wdata->refcount, cifs_writedata_release);
2068 2114
2115 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2116 index = saved_index;
2117 continue;
2118 }
2119
2069 wbc->nr_to_write -= nr_pages; 2120 wbc->nr_to_write -= nr_pages;
2070 if (wbc->nr_to_write <= 0) 2121 if (wbc->nr_to_write <= 0)
2071 done = true; 2122 done = true;
@@ -2362,123 +2413,109 @@ cifs_uncached_writev_complete(struct work_struct *work)
2362 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2413 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2363} 2414}
2364 2415
2365/* attempt to send write to server, retry on any -EAGAIN errors */
2366static int 2416static int
2367cifs_uncached_retry_writev(struct cifs_writedata *wdata) 2417wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2418 size_t *len, unsigned long *num_pages)
2368{ 2419{
2369 int rc; 2420 size_t save_len, copied, bytes, cur_len = *len;
2370 struct TCP_Server_Info *server; 2421 unsigned long i, nr_pages = *num_pages;
2371 2422
2372 server = tlink_tcon(wdata->cfile->tlink)->ses->server; 2423 save_len = cur_len;
2424 for (i = 0; i < nr_pages; i++) {
2425 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2426 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2427 cur_len -= copied;
2428 /*
2429 * If we didn't copy as much as we expected, then that
2430 * may mean we trod into an unmapped area. Stop copying
2431 * at that point. On the next pass through the big
2432 * loop, we'll likely end up getting a zero-length
2433 * write and bailing out of it.
2434 */
2435 if (copied < bytes)
2436 break;
2437 }
2438 cur_len = save_len - cur_len;
2439 *len = cur_len;
2373 2440
2374 do { 2441 /*
2375 if (wdata->cfile->invalidHandle) { 2442 * If we have no data to send, then that probably means that
2376 rc = cifs_reopen_file(wdata->cfile, false); 2443 * the copy above failed altogether. That's most likely because
2377 if (rc != 0) 2444 * the address in the iovec was bogus. Return -EFAULT and let
2378 continue; 2445 * the caller free anything we allocated and bail out.
2379 } 2446 */
2380 rc = server->ops->async_writev(wdata, 2447 if (!cur_len)
2381 cifs_uncached_writedata_release); 2448 return -EFAULT;
2382 } while (rc == -EAGAIN);
2383 2449
2384 return rc; 2450 /*
2451 * i + 1 now represents the number of pages we actually used in
2452 * the copy phase above.
2453 */
2454 *num_pages = i + 1;
2455 return 0;
2385} 2456}
2386 2457
2387static ssize_t 2458static int
2388cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) 2459cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2460 struct cifsFileInfo *open_file,
2461 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2389{ 2462{
2390 unsigned long nr_pages, i; 2463 int rc = 0;
2391 size_t bytes, copied, len, cur_len; 2464 size_t cur_len;
2392 ssize_t total_written = 0; 2465 unsigned long nr_pages, num_pages, i;
2393 loff_t offset; 2466 struct cifs_writedata *wdata;
2394 struct cifsFileInfo *open_file; 2467 struct iov_iter saved_from;
2395 struct cifs_tcon *tcon; 2468 loff_t saved_offset = offset;
2396 struct cifs_sb_info *cifs_sb;
2397 struct cifs_writedata *wdata, *tmp;
2398 struct list_head wdata_list;
2399 int rc;
2400 pid_t pid; 2469 pid_t pid;
2401 2470 struct TCP_Server_Info *server;
2402 len = iov_iter_count(from);
2403 rc = generic_write_checks(file, poffset, &len, 0);
2404 if (rc)
2405 return rc;
2406
2407 if (!len)
2408 return 0;
2409
2410 iov_iter_truncate(from, len);
2411
2412 INIT_LIST_HEAD(&wdata_list);
2413 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2414 open_file = file->private_data;
2415 tcon = tlink_tcon(open_file->tlink);
2416
2417 if (!tcon->ses->server->ops->async_writev)
2418 return -ENOSYS;
2419
2420 offset = *poffset;
2421 2471
2422 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2472 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2423 pid = open_file->pid; 2473 pid = open_file->pid;
2424 else 2474 else
2425 pid = current->tgid; 2475 pid = current->tgid;
2426 2476
2477 server = tlink_tcon(open_file->tlink)->ses->server;
2478 memcpy(&saved_from, from, sizeof(struct iov_iter));
2479
2427 do { 2480 do {
2428 size_t save_len; 2481 unsigned int wsize, credits;
2482
2483 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2484 &wsize, &credits);
2485 if (rc)
2486 break;
2429 2487
2430 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); 2488 nr_pages = get_numpages(wsize, len, &cur_len);
2431 wdata = cifs_writedata_alloc(nr_pages, 2489 wdata = cifs_writedata_alloc(nr_pages,
2432 cifs_uncached_writev_complete); 2490 cifs_uncached_writev_complete);
2433 if (!wdata) { 2491 if (!wdata) {
2434 rc = -ENOMEM; 2492 rc = -ENOMEM;
2493 add_credits_and_wake_if(server, credits, 0);
2435 break; 2494 break;
2436 } 2495 }
2437 2496
2438 rc = cifs_write_allocate_pages(wdata->pages, nr_pages); 2497 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2439 if (rc) { 2498 if (rc) {
2440 kfree(wdata); 2499 kfree(wdata);
2500 add_credits_and_wake_if(server, credits, 0);
2441 break; 2501 break;
2442 } 2502 }
2443 2503
2444 save_len = cur_len; 2504 num_pages = nr_pages;
2445 for (i = 0; i < nr_pages; i++) { 2505 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2446 bytes = min_t(size_t, cur_len, PAGE_SIZE); 2506 if (rc) {
2447 copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
2448 from);
2449 cur_len -= copied;
2450 /*
2451 * If we didn't copy as much as we expected, then that
2452 * may mean we trod into an unmapped area. Stop copying
2453 * at that point. On the next pass through the big
2454 * loop, we'll likely end up getting a zero-length
2455 * write and bailing out of it.
2456 */
2457 if (copied < bytes)
2458 break;
2459 }
2460 cur_len = save_len - cur_len;
2461
2462 /*
2463 * If we have no data to send, then that probably means that
2464 * the copy above failed altogether. That's most likely because
2465 * the address in the iovec was bogus. Set the rc to -EFAULT,
2466 * free anything we allocated and bail out.
2467 */
2468 if (!cur_len) {
2469 for (i = 0; i < nr_pages; i++) 2507 for (i = 0; i < nr_pages; i++)
2470 put_page(wdata->pages[i]); 2508 put_page(wdata->pages[i]);
2471 kfree(wdata); 2509 kfree(wdata);
2472 rc = -EFAULT; 2510 add_credits_and_wake_if(server, credits, 0);
2473 break; 2511 break;
2474 } 2512 }
2475 2513
2476 /* 2514 /*
2477 * i + 1 now represents the number of pages we actually used in 2515 * Bring nr_pages down to the number of pages we actually used,
2478 * the copy phase above. Bring nr_pages down to that, and free 2516 * and free any pages that we didn't use.
2479 * any pages that we didn't use.
2480 */ 2517 */
2481 for ( ; nr_pages > i + 1; nr_pages--) 2518 for ( ; nr_pages > num_pages; nr_pages--)
2482 put_page(wdata->pages[nr_pages - 1]); 2519 put_page(wdata->pages[nr_pages - 1]);
2483 2520
2484 wdata->sync_mode = WB_SYNC_ALL; 2521 wdata->sync_mode = WB_SYNC_ALL;
@@ -2489,18 +2526,69 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2489 wdata->bytes = cur_len; 2526 wdata->bytes = cur_len;
2490 wdata->pagesz = PAGE_SIZE; 2527 wdata->pagesz = PAGE_SIZE;
2491 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); 2528 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2492 rc = cifs_uncached_retry_writev(wdata); 2529 wdata->credits = credits;
2530
2531 if (!wdata->cfile->invalidHandle ||
2532 !cifs_reopen_file(wdata->cfile, false))
2533 rc = server->ops->async_writev(wdata,
2534 cifs_uncached_writedata_release);
2493 if (rc) { 2535 if (rc) {
2536 add_credits_and_wake_if(server, wdata->credits, 0);
2494 kref_put(&wdata->refcount, 2537 kref_put(&wdata->refcount,
2495 cifs_uncached_writedata_release); 2538 cifs_uncached_writedata_release);
2539 if (rc == -EAGAIN) {
2540 memcpy(from, &saved_from,
2541 sizeof(struct iov_iter));
2542 iov_iter_advance(from, offset - saved_offset);
2543 continue;
2544 }
2496 break; 2545 break;
2497 } 2546 }
2498 2547
2499 list_add_tail(&wdata->list, &wdata_list); 2548 list_add_tail(&wdata->list, wdata_list);
2500 offset += cur_len; 2549 offset += cur_len;
2501 len -= cur_len; 2550 len -= cur_len;
2502 } while (len > 0); 2551 } while (len > 0);
2503 2552
2553 return rc;
2554}
2555
2556static ssize_t
2557cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2558{
2559 size_t len;
2560 ssize_t total_written = 0;
2561 struct cifsFileInfo *open_file;
2562 struct cifs_tcon *tcon;
2563 struct cifs_sb_info *cifs_sb;
2564 struct cifs_writedata *wdata, *tmp;
2565 struct list_head wdata_list;
2566 struct iov_iter saved_from;
2567 int rc;
2568
2569 len = iov_iter_count(from);
2570 rc = generic_write_checks(file, poffset, &len, 0);
2571 if (rc)
2572 return rc;
2573
2574 if (!len)
2575 return 0;
2576
2577 iov_iter_truncate(from, len);
2578
2579 INIT_LIST_HEAD(&wdata_list);
2580 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2581 open_file = file->private_data;
2582 tcon = tlink_tcon(open_file->tlink);
2583
2584 if (!tcon->ses->server->ops->async_writev)
2585 return -ENOSYS;
2586
2587 memcpy(&saved_from, from, sizeof(struct iov_iter));
2588
2589 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2590 &wdata_list);
2591
2504 /* 2592 /*
2505 * If at least one write was successfully sent, then discard any rc 2593 * If at least one write was successfully sent, then discard any rc
2506 * value from the later writes. If the other write succeeds, then 2594 * value from the later writes. If the other write succeeds, then
@@ -2529,7 +2617,25 @@ restart_loop:
2529 2617
2530 /* resend call if it's a retryable error */ 2618 /* resend call if it's a retryable error */
2531 if (rc == -EAGAIN) { 2619 if (rc == -EAGAIN) {
2532 rc = cifs_uncached_retry_writev(wdata); 2620 struct list_head tmp_list;
2621 struct iov_iter tmp_from;
2622
2623 INIT_LIST_HEAD(&tmp_list);
2624 list_del_init(&wdata->list);
2625
2626 memcpy(&tmp_from, &saved_from,
2627 sizeof(struct iov_iter));
2628 iov_iter_advance(&tmp_from,
2629 wdata->offset - *poffset);
2630
2631 rc = cifs_write_from_iter(wdata->offset,
2632 wdata->bytes, &tmp_from,
2633 open_file, cifs_sb, &tmp_list);
2634
2635 list_splice(&tmp_list, &wdata_list);
2636
2637 kref_put(&wdata->refcount,
2638 cifs_uncached_writedata_release);
2533 goto restart_loop; 2639 goto restart_loop;
2534 } 2640 }
2535 } 2641 }
@@ -2722,26 +2828,6 @@ cifs_uncached_readdata_release(struct kref *refcount)
2722 cifs_readdata_release(refcount); 2828 cifs_readdata_release(refcount);
2723} 2829}
2724 2830
2725static int
2726cifs_retry_async_readv(struct cifs_readdata *rdata)
2727{
2728 int rc;
2729 struct TCP_Server_Info *server;
2730
2731 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2732
2733 do {
2734 if (rdata->cfile->invalidHandle) {
2735 rc = cifs_reopen_file(rdata->cfile, true);
2736 if (rc != 0)
2737 continue;
2738 }
2739 rc = server->ops->async_readv(rdata);
2740 } while (rc == -EAGAIN);
2741
2742 return rc;
2743}
2744
2745/** 2831/**
2746 * cifs_readdata_to_iov - copy data from pages in response to an iovec 2832 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2747 * @rdata: the readdata response with list of pages holding data 2833 * @rdata: the readdata response with list of pages holding data
@@ -2754,7 +2840,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
2754static int 2840static int
2755cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) 2841cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2756{ 2842{
2757 size_t remaining = rdata->bytes; 2843 size_t remaining = rdata->got_bytes;
2758 unsigned int i; 2844 unsigned int i;
2759 2845
2760 for (i = 0; i < rdata->nr_pages; i++) { 2846 for (i = 0; i < rdata->nr_pages; i++) {
@@ -2782,11 +2868,12 @@ static int
2782cifs_uncached_read_into_pages(struct TCP_Server_Info *server, 2868cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2783 struct cifs_readdata *rdata, unsigned int len) 2869 struct cifs_readdata *rdata, unsigned int len)
2784{ 2870{
2785 int total_read = 0, result = 0; 2871 int result = 0;
2786 unsigned int i; 2872 unsigned int i;
2787 unsigned int nr_pages = rdata->nr_pages; 2873 unsigned int nr_pages = rdata->nr_pages;
2788 struct kvec iov; 2874 struct kvec iov;
2789 2875
2876 rdata->got_bytes = 0;
2790 rdata->tailsz = PAGE_SIZE; 2877 rdata->tailsz = PAGE_SIZE;
2791 for (i = 0; i < nr_pages; i++) { 2878 for (i = 0; i < nr_pages; i++) {
2792 struct page *page = rdata->pages[i]; 2879 struct page *page = rdata->pages[i];
@@ -2820,55 +2907,45 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2820 if (result < 0) 2907 if (result < 0)
2821 break; 2908 break;
2822 2909
2823 total_read += result; 2910 rdata->got_bytes += result;
2824 } 2911 }
2825 2912
2826 return total_read > 0 ? total_read : result; 2913 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2914 rdata->got_bytes : result;
2827} 2915}
2828 2916
2829ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) 2917static int
2918cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2919 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2830{ 2920{
2831 struct file *file = iocb->ki_filp; 2921 struct cifs_readdata *rdata;
2832 ssize_t rc; 2922 unsigned int npages, rsize, credits;
2833 size_t len, cur_len; 2923 size_t cur_len;
2834 ssize_t total_read = 0; 2924 int rc;
2835 loff_t offset = iocb->ki_pos;
2836 unsigned int npages;
2837 struct cifs_sb_info *cifs_sb;
2838 struct cifs_tcon *tcon;
2839 struct cifsFileInfo *open_file;
2840 struct cifs_readdata *rdata, *tmp;
2841 struct list_head rdata_list;
2842 pid_t pid; 2925 pid_t pid;
2926 struct TCP_Server_Info *server;
2843 2927
2844 len = iov_iter_count(to); 2928 server = tlink_tcon(open_file->tlink)->ses->server;
2845 if (!len)
2846 return 0;
2847
2848 INIT_LIST_HEAD(&rdata_list);
2849 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2850 open_file = file->private_data;
2851 tcon = tlink_tcon(open_file->tlink);
2852
2853 if (!tcon->ses->server->ops->async_readv)
2854 return -ENOSYS;
2855 2929
2856 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2930 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2857 pid = open_file->pid; 2931 pid = open_file->pid;
2858 else 2932 else
2859 pid = current->tgid; 2933 pid = current->tgid;
2860 2934
2861 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2862 cifs_dbg(FYI, "attempting read on write only file instance\n");
2863
2864 do { 2935 do {
2865 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); 2936 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2937 &rsize, &credits);
2938 if (rc)
2939 break;
2940
2941 cur_len = min_t(const size_t, len, rsize);
2866 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); 2942 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2867 2943
2868 /* allocate a readdata struct */ 2944 /* allocate a readdata struct */
2869 rdata = cifs_readdata_alloc(npages, 2945 rdata = cifs_readdata_alloc(npages,
2870 cifs_uncached_readv_complete); 2946 cifs_uncached_readv_complete);
2871 if (!rdata) { 2947 if (!rdata) {
2948 add_credits_and_wake_if(server, credits, 0);
2872 rc = -ENOMEM; 2949 rc = -ENOMEM;
2873 break; 2950 break;
2874 } 2951 }
@@ -2884,44 +2961,113 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2884 rdata->pid = pid; 2961 rdata->pid = pid;
2885 rdata->pagesz = PAGE_SIZE; 2962 rdata->pagesz = PAGE_SIZE;
2886 rdata->read_into_pages = cifs_uncached_read_into_pages; 2963 rdata->read_into_pages = cifs_uncached_read_into_pages;
2964 rdata->credits = credits;
2887 2965
2888 rc = cifs_retry_async_readv(rdata); 2966 if (!rdata->cfile->invalidHandle ||
2967 !cifs_reopen_file(rdata->cfile, true))
2968 rc = server->ops->async_readv(rdata);
2889error: 2969error:
2890 if (rc) { 2970 if (rc) {
2971 add_credits_and_wake_if(server, rdata->credits, 0);
2891 kref_put(&rdata->refcount, 2972 kref_put(&rdata->refcount,
2892 cifs_uncached_readdata_release); 2973 cifs_uncached_readdata_release);
2974 if (rc == -EAGAIN)
2975 continue;
2893 break; 2976 break;
2894 } 2977 }
2895 2978
2896 list_add_tail(&rdata->list, &rdata_list); 2979 list_add_tail(&rdata->list, rdata_list);
2897 offset += cur_len; 2980 offset += cur_len;
2898 len -= cur_len; 2981 len -= cur_len;
2899 } while (len > 0); 2982 } while (len > 0);
2900 2983
2984 return rc;
2985}
2986
2987ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2988{
2989 struct file *file = iocb->ki_filp;
2990 ssize_t rc;
2991 size_t len;
2992 ssize_t total_read = 0;
2993 loff_t offset = iocb->ki_pos;
2994 struct cifs_sb_info *cifs_sb;
2995 struct cifs_tcon *tcon;
2996 struct cifsFileInfo *open_file;
2997 struct cifs_readdata *rdata, *tmp;
2998 struct list_head rdata_list;
2999
3000 len = iov_iter_count(to);
3001 if (!len)
3002 return 0;
3003
3004 INIT_LIST_HEAD(&rdata_list);
3005 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3006 open_file = file->private_data;
3007 tcon = tlink_tcon(open_file->tlink);
3008
3009 if (!tcon->ses->server->ops->async_readv)
3010 return -ENOSYS;
3011
3012 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3013 cifs_dbg(FYI, "attempting read on write only file instance\n");
3014
3015 rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3016
2901 /* if at least one read request send succeeded, then reset rc */ 3017 /* if at least one read request send succeeded, then reset rc */
2902 if (!list_empty(&rdata_list)) 3018 if (!list_empty(&rdata_list))
2903 rc = 0; 3019 rc = 0;
2904 3020
2905 len = iov_iter_count(to); 3021 len = iov_iter_count(to);
2906 /* the loop below should proceed in the order of increasing offsets */ 3022 /* the loop below should proceed in the order of increasing offsets */
3023again:
2907 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { 3024 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2908 again:
2909 if (!rc) { 3025 if (!rc) {
2910 /* FIXME: freezable sleep too? */ 3026 /* FIXME: freezable sleep too? */
2911 rc = wait_for_completion_killable(&rdata->done); 3027 rc = wait_for_completion_killable(&rdata->done);
2912 if (rc) 3028 if (rc)
2913 rc = -EINTR; 3029 rc = -EINTR;
2914 else if (rdata->result) { 3030 else if (rdata->result == -EAGAIN) {
2915 rc = rdata->result;
2916 /* resend call if it's a retryable error */ 3031 /* resend call if it's a retryable error */
2917 if (rc == -EAGAIN) { 3032 struct list_head tmp_list;
2918 rc = cifs_retry_async_readv(rdata); 3033 unsigned int got_bytes = rdata->got_bytes;
2919 goto again; 3034
3035 list_del_init(&rdata->list);
3036 INIT_LIST_HEAD(&tmp_list);
3037
3038 /*
3039 * Got a part of data and then reconnect has
3040 * happened -- fill the buffer and continue
3041 * reading.
3042 */
3043 if (got_bytes && got_bytes < rdata->bytes) {
3044 rc = cifs_readdata_to_iov(rdata, to);
3045 if (rc) {
3046 kref_put(&rdata->refcount,
3047 cifs_uncached_readdata_release);
3048 continue;
3049 }
2920 } 3050 }
2921 } else { 3051
3052 rc = cifs_send_async_read(
3053 rdata->offset + got_bytes,
3054 rdata->bytes - got_bytes,
3055 rdata->cfile, cifs_sb,
3056 &tmp_list);
3057
3058 list_splice(&tmp_list, &rdata_list);
3059
3060 kref_put(&rdata->refcount,
3061 cifs_uncached_readdata_release);
3062 goto again;
3063 } else if (rdata->result)
3064 rc = rdata->result;
3065 else
2922 rc = cifs_readdata_to_iov(rdata, to); 3066 rc = cifs_readdata_to_iov(rdata, to);
2923 }
2924 3067
3068 /* if there was a short read -- discard anything left */
3069 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3070 rc = -ENODATA;
2925 } 3071 }
2926 list_del_init(&rdata->list); 3072 list_del_init(&rdata->list);
2927 kref_put(&rdata->refcount, cifs_uncached_readdata_release); 3073 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
@@ -3030,18 +3176,19 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3030 3176
3031 for (total_read = 0, cur_offset = read_data; read_size > total_read; 3177 for (total_read = 0, cur_offset = read_data; read_size > total_read;
3032 total_read += bytes_read, cur_offset += bytes_read) { 3178 total_read += bytes_read, cur_offset += bytes_read) {
3033 current_read_size = min_t(uint, read_size - total_read, rsize); 3179 do {
3034 /* 3180 current_read_size = min_t(uint, read_size - total_read,
3035 * For windows me and 9x we do not want to request more than it 3181 rsize);
3036 * negotiated since it will refuse the read then. 3182 /*
3037 */ 3183 * For windows me and 9x we do not want to request more
3038 if ((tcon->ses) && !(tcon->ses->capabilities & 3184 * than it negotiated since it will refuse the read
3185 * then.
3186 */
3187 if ((tcon->ses) && !(tcon->ses->capabilities &
3039 tcon->ses->server->vals->cap_large_files)) { 3188 tcon->ses->server->vals->cap_large_files)) {
3040 current_read_size = min_t(uint, current_read_size, 3189 current_read_size = min_t(uint,
3041 CIFSMaxBufSize); 3190 current_read_size, CIFSMaxBufSize);
3042 } 3191 }
3043 rc = -EAGAIN;
3044 while (rc == -EAGAIN) {
3045 if (open_file->invalidHandle) { 3192 if (open_file->invalidHandle) {
3046 rc = cifs_reopen_file(open_file, true); 3193 rc = cifs_reopen_file(open_file, true);
3047 if (rc != 0) 3194 if (rc != 0)
@@ -3054,7 +3201,8 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3054 rc = server->ops->sync_read(xid, open_file, &io_parms, 3201 rc = server->ops->sync_read(xid, open_file, &io_parms,
3055 &bytes_read, &cur_offset, 3202 &bytes_read, &cur_offset,
3056 &buf_type); 3203 &buf_type);
3057 } 3204 } while (rc == -EAGAIN);
3205
3058 if (rc || (bytes_read == 0)) { 3206 if (rc || (bytes_read == 0)) {
3059 if (total_read) { 3207 if (total_read) {
3060 break; 3208 break;
@@ -3133,25 +3281,30 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3133static void 3281static void
3134cifs_readv_complete(struct work_struct *work) 3282cifs_readv_complete(struct work_struct *work)
3135{ 3283{
3136 unsigned int i; 3284 unsigned int i, got_bytes;
3137 struct cifs_readdata *rdata = container_of(work, 3285 struct cifs_readdata *rdata = container_of(work,
3138 struct cifs_readdata, work); 3286 struct cifs_readdata, work);
3139 3287
3288 got_bytes = rdata->got_bytes;
3140 for (i = 0; i < rdata->nr_pages; i++) { 3289 for (i = 0; i < rdata->nr_pages; i++) {
3141 struct page *page = rdata->pages[i]; 3290 struct page *page = rdata->pages[i];
3142 3291
3143 lru_cache_add_file(page); 3292 lru_cache_add_file(page);
3144 3293
3145 if (rdata->result == 0) { 3294 if (rdata->result == 0 ||
3295 (rdata->result == -EAGAIN && got_bytes)) {
3146 flush_dcache_page(page); 3296 flush_dcache_page(page);
3147 SetPageUptodate(page); 3297 SetPageUptodate(page);
3148 } 3298 }
3149 3299
3150 unlock_page(page); 3300 unlock_page(page);
3151 3301
3152 if (rdata->result == 0) 3302 if (rdata->result == 0 ||
3303 (rdata->result == -EAGAIN && got_bytes))
3153 cifs_readpage_to_fscache(rdata->mapping->host, page); 3304 cifs_readpage_to_fscache(rdata->mapping->host, page);
3154 3305
3306 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3307
3155 page_cache_release(page); 3308 page_cache_release(page);
3156 rdata->pages[i] = NULL; 3309 rdata->pages[i] = NULL;
3157 } 3310 }
@@ -3162,7 +3315,7 @@ static int
3162cifs_readpages_read_into_pages(struct TCP_Server_Info *server, 3315cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3163 struct cifs_readdata *rdata, unsigned int len) 3316 struct cifs_readdata *rdata, unsigned int len)
3164{ 3317{
3165 int total_read = 0, result = 0; 3318 int result = 0;
3166 unsigned int i; 3319 unsigned int i;
3167 u64 eof; 3320 u64 eof;
3168 pgoff_t eof_index; 3321 pgoff_t eof_index;
@@ -3174,6 +3327,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3174 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; 3327 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3175 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); 3328 cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3176 3329
3330 rdata->got_bytes = 0;
3177 rdata->tailsz = PAGE_CACHE_SIZE; 3331 rdata->tailsz = PAGE_CACHE_SIZE;
3178 for (i = 0; i < nr_pages; i++) { 3332 for (i = 0; i < nr_pages; i++) {
3179 struct page *page = rdata->pages[i]; 3333 struct page *page = rdata->pages[i];
@@ -3228,10 +3382,70 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3228 if (result < 0) 3382 if (result < 0)
3229 break; 3383 break;
3230 3384
3231 total_read += result; 3385 rdata->got_bytes += result;
3232 } 3386 }
3233 3387
3234 return total_read > 0 ? total_read : result; 3388 return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3389 rdata->got_bytes : result;
3390}
3391
3392static int
3393readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3394 unsigned int rsize, struct list_head *tmplist,
3395 unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3396{
3397 struct page *page, *tpage;
3398 unsigned int expected_index;
3399 int rc;
3400
3401 INIT_LIST_HEAD(tmplist);
3402
3403 page = list_entry(page_list->prev, struct page, lru);
3404
3405 /*
3406 * Lock the page and put it in the cache. Since no one else
3407 * should have access to this page, we're safe to simply set
3408 * PG_locked without checking it first.
3409 */
3410 __set_page_locked(page);
3411 rc = add_to_page_cache_locked(page, mapping,
3412 page->index, GFP_KERNEL);
3413
3414 /* give up if we can't stick it in the cache */
3415 if (rc) {
3416 __clear_page_locked(page);
3417 return rc;
3418 }
3419
3420 /* move first page to the tmplist */
3421 *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3422 *bytes = PAGE_CACHE_SIZE;
3423 *nr_pages = 1;
3424 list_move_tail(&page->lru, tmplist);
3425
3426 /* now try and add more pages onto the request */
3427 expected_index = page->index + 1;
3428 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3429 /* discontinuity ? */
3430 if (page->index != expected_index)
3431 break;
3432
3433 /* would this page push the read over the rsize? */
3434 if (*bytes + PAGE_CACHE_SIZE > rsize)
3435 break;
3436
3437 __set_page_locked(page);
3438 if (add_to_page_cache_locked(page, mapping, page->index,
3439 GFP_KERNEL)) {
3440 __clear_page_locked(page);
3441 break;
3442 }
3443 list_move_tail(&page->lru, tmplist);
3444 (*bytes) += PAGE_CACHE_SIZE;
3445 expected_index++;
3446 (*nr_pages)++;
3447 }
3448 return rc;
3235} 3449}
3236 3450
3237static int cifs_readpages(struct file *file, struct address_space *mapping, 3451static int cifs_readpages(struct file *file, struct address_space *mapping,
@@ -3241,19 +3455,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3241 struct list_head tmplist; 3455 struct list_head tmplist;
3242 struct cifsFileInfo *open_file = file->private_data; 3456 struct cifsFileInfo *open_file = file->private_data;
3243 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 3457 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3244 unsigned int rsize = cifs_sb->rsize; 3458 struct TCP_Server_Info *server;
3245 pid_t pid; 3459 pid_t pid;
3246 3460
3247 /* 3461 /*
3248 * Give up immediately if rsize is too small to read an entire page.
3249 * The VFS will fall back to readpage. We should never reach this
3250 * point however since we set ra_pages to 0 when the rsize is smaller
3251 * than a cache page.
3252 */
3253 if (unlikely(rsize < PAGE_CACHE_SIZE))
3254 return 0;
3255
3256 /*
3257 * Reads as many pages as possible from fscache. Returns -ENOBUFS 3462 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3258 * immediately if the cookie is negative 3463 * immediately if the cookie is negative
3259 * 3464 *
@@ -3271,7 +3476,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3271 pid = current->tgid; 3476 pid = current->tgid;
3272 3477
3273 rc = 0; 3478 rc = 0;
3274 INIT_LIST_HEAD(&tmplist); 3479 server = tlink_tcon(open_file->tlink)->ses->server;
3275 3480
3276 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", 3481 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3277 __func__, file, mapping, num_pages); 3482 __func__, file, mapping, num_pages);
@@ -3288,58 +3493,35 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3288 * the rdata->pages, then we want them in increasing order. 3493 * the rdata->pages, then we want them in increasing order.
3289 */ 3494 */
3290 while (!list_empty(page_list)) { 3495 while (!list_empty(page_list)) {
3291 unsigned int i; 3496 unsigned int i, nr_pages, bytes, rsize;
3292 unsigned int bytes = PAGE_CACHE_SIZE;
3293 unsigned int expected_index;
3294 unsigned int nr_pages = 1;
3295 loff_t offset; 3497 loff_t offset;
3296 struct page *page, *tpage; 3498 struct page *page, *tpage;
3297 struct cifs_readdata *rdata; 3499 struct cifs_readdata *rdata;
3500 unsigned credits;
3298 3501
3299 page = list_entry(page_list->prev, struct page, lru); 3502 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3503 &rsize, &credits);
3504 if (rc)
3505 break;
3300 3506
3301 /* 3507 /*
3302 * Lock the page and put it in the cache. Since no one else 3508 * Give up immediately if rsize is too small to read an entire
3303 * should have access to this page, we're safe to simply set 3509 * page. The VFS will fall back to readpage. We should never
3304 * PG_locked without checking it first. 3510 * reach this point however since we set ra_pages to 0 when the
3511 * rsize is smaller than a cache page.
3305 */ 3512 */
3306 __set_page_locked(page); 3513 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3307 rc = add_to_page_cache_locked(page, mapping, 3514 add_credits_and_wake_if(server, credits, 0);
3308 page->index, GFP_KERNEL); 3515 return 0;
3516 }
3309 3517
3310 /* give up if we can't stick it in the cache */ 3518 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3519 &nr_pages, &offset, &bytes);
3311 if (rc) { 3520 if (rc) {
3312 __clear_page_locked(page); 3521 add_credits_and_wake_if(server, credits, 0);
3313 break; 3522 break;
3314 } 3523 }
3315 3524
3316 /* move first page to the tmplist */
3317 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3318 list_move_tail(&page->lru, &tmplist);
3319
3320 /* now try and add more pages onto the request */
3321 expected_index = page->index + 1;
3322 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3323 /* discontinuity ? */
3324 if (page->index != expected_index)
3325 break;
3326
3327 /* would this page push the read over the rsize? */
3328 if (bytes + PAGE_CACHE_SIZE > rsize)
3329 break;
3330
3331 __set_page_locked(page);
3332 if (add_to_page_cache_locked(page, mapping,
3333 page->index, GFP_KERNEL)) {
3334 __clear_page_locked(page);
3335 break;
3336 }
3337 list_move_tail(&page->lru, &tmplist);
3338 bytes += PAGE_CACHE_SIZE;
3339 expected_index++;
3340 nr_pages++;
3341 }
3342
3343 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); 3525 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3344 if (!rdata) { 3526 if (!rdata) {
3345 /* best to give up if we're out of mem */ 3527 /* best to give up if we're out of mem */
@@ -3350,6 +3532,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3350 page_cache_release(page); 3532 page_cache_release(page);
3351 } 3533 }
3352 rc = -ENOMEM; 3534 rc = -ENOMEM;
3535 add_credits_and_wake_if(server, credits, 0);
3353 break; 3536 break;
3354 } 3537 }
3355 3538
@@ -3360,21 +3543,32 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3360 rdata->pid = pid; 3543 rdata->pid = pid;
3361 rdata->pagesz = PAGE_CACHE_SIZE; 3544 rdata->pagesz = PAGE_CACHE_SIZE;
3362 rdata->read_into_pages = cifs_readpages_read_into_pages; 3545 rdata->read_into_pages = cifs_readpages_read_into_pages;
3546 rdata->credits = credits;
3363 3547
3364 list_for_each_entry_safe(page, tpage, &tmplist, lru) { 3548 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3365 list_del(&page->lru); 3549 list_del(&page->lru);
3366 rdata->pages[rdata->nr_pages++] = page; 3550 rdata->pages[rdata->nr_pages++] = page;
3367 } 3551 }
3368 3552
3369 rc = cifs_retry_async_readv(rdata); 3553 if (!rdata->cfile->invalidHandle ||
3370 if (rc != 0) { 3554 !cifs_reopen_file(rdata->cfile, true))
3555 rc = server->ops->async_readv(rdata);
3556 if (rc) {
3557 add_credits_and_wake_if(server, rdata->credits, 0);
3371 for (i = 0; i < rdata->nr_pages; i++) { 3558 for (i = 0; i < rdata->nr_pages; i++) {
3372 page = rdata->pages[i]; 3559 page = rdata->pages[i];
3373 lru_cache_add_file(page); 3560 lru_cache_add_file(page);
3374 unlock_page(page); 3561 unlock_page(page);
3375 page_cache_release(page); 3562 page_cache_release(page);
3563 if (rc == -EAGAIN)
3564 list_add_tail(&page->lru, &tmplist);
3376 } 3565 }
3377 kref_put(&rdata->refcount, cifs_readdata_release); 3566 kref_put(&rdata->refcount, cifs_readdata_release);
3567 if (rc == -EAGAIN) {
3568 /* Re-add pages to the page_list and retry */
3569 list_splice(&tmplist, page_list);
3570 continue;
3571 }
3378 break; 3572 break;
3379 } 3573 }
3380 3574
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 41de3935caa0..426d6c6ad8bf 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1627,8 +1627,9 @@ do_rename_exit:
1627} 1627}
1628 1628
1629int 1629int
1630cifs_rename(struct inode *source_dir, struct dentry *source_dentry, 1630cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
1631 struct inode *target_dir, struct dentry *target_dentry) 1631 struct inode *target_dir, struct dentry *target_dentry,
1632 unsigned int flags)
1632{ 1633{
1633 char *from_name = NULL; 1634 char *from_name = NULL;
1634 char *to_name = NULL; 1635 char *to_name = NULL;
@@ -1640,6 +1641,9 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1640 unsigned int xid; 1641 unsigned int xid;
1641 int rc, tmprc; 1642 int rc, tmprc;
1642 1643
1644 if (flags & ~RENAME_NOREPLACE)
1645 return -EINVAL;
1646
1643 cifs_sb = CIFS_SB(source_dir->i_sb); 1647 cifs_sb = CIFS_SB(source_dir->i_sb);
1644 tlink = cifs_sb_tlink(cifs_sb); 1648 tlink = cifs_sb_tlink(cifs_sb);
1645 if (IS_ERR(tlink)) 1649 if (IS_ERR(tlink))
@@ -1667,6 +1671,12 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1667 rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, 1671 rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
1668 to_name); 1672 to_name);
1669 1673
1674 /*
1675 * No-replace is the natural behavior for CIFS, so skip unlink hacks.
1676 */
1677 if (flags & RENAME_NOREPLACE)
1678 goto cifs_rename_exit;
1679
1670 if (rc == -EEXIST && tcon->unix_ext) { 1680 if (rc == -EEXIST && tcon->unix_ext) {
1671 /* 1681 /*
1672 * Are src and dst hardlinks of same inode? We can only tell 1682 * Are src and dst hardlinks of same inode? We can only tell
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 6bf55d0ed494..81340c6253eb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -226,6 +226,15 @@ cifs_small_buf_release(void *buf_to_free)
226 return; 226 return;
227} 227}
228 228
229void
230free_rsp_buf(int resp_buftype, void *rsp)
231{
232 if (resp_buftype == CIFS_SMALL_BUFFER)
233 cifs_small_buf_release(rsp);
234 else if (resp_buftype == CIFS_LARGE_BUFFER)
235 cifs_buf_release(rsp);
236}
237
229/* NB: MID can not be set if treeCon not passed in, in that 238/* NB: MID can not be set if treeCon not passed in, in that
230 case it is responsbility of caller to set the mid */ 239 case it is responsbility of caller to set the mid */
231void 240void
@@ -414,7 +423,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
414 return true; 423 return true;
415 } 424 }
416 if (pSMBr->hdr.Status.CifsError) { 425 if (pSMBr->hdr.Status.CifsError) {
417 cifs_dbg(FYI, "notify err 0x%d\n", 426 cifs_dbg(FYI, "notify err 0x%x\n",
418 pSMBr->hdr.Status.CifsError); 427 pSMBr->hdr.Status.CifsError);
419 return true; 428 return true;
420 } 429 }
@@ -441,7 +450,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
441 if (pSMB->hdr.WordCount != 8) 450 if (pSMB->hdr.WordCount != 8)
442 return false; 451 return false;
443 452
444 cifs_dbg(FYI, "oplock type 0x%d level 0x%d\n", 453 cifs_dbg(FYI, "oplock type 0x%x level 0x%x\n",
445 pSMB->LockType, pSMB->OplockLevel); 454 pSMB->LockType, pSMB->OplockLevel);
446 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) 455 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
447 return false; 456 return false;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index e87387dbf39f..39ee32688eac 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -520,382 +520,559 @@ select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
520 } 520 }
521} 521}
522 522
523int 523struct sess_data {
524CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses, 524 unsigned int xid;
525 const struct nls_table *nls_cp) 525 struct cifs_ses *ses;
526 struct nls_table *nls_cp;
527 void (*func)(struct sess_data *);
528 int result;
529
530 /* we will send the SMB in three pieces:
531 * a fixed length beginning part, an optional
532 * SPNEGO blob (which can be zero length), and a
533 * last part which will include the strings
534 * and rest of bcc area. This allows us to avoid
535 * a large buffer 17K allocation
536 */
537 int buf0_type;
538 struct kvec iov[3];
539};
540
541static int
542sess_alloc_buffer(struct sess_data *sess_data, int wct)
526{ 543{
527 int rc = 0; 544 int rc;
528 int wct; 545 struct cifs_ses *ses = sess_data->ses;
529 struct smb_hdr *smb_buf; 546 struct smb_hdr *smb_buf;
530 char *bcc_ptr;
531 char *str_area;
532 SESSION_SETUP_ANDX *pSMB;
533 __u32 capabilities;
534 __u16 count;
535 int resp_buf_type;
536 struct kvec iov[3];
537 enum securityEnum type;
538 __u16 action, bytes_remaining;
539 struct key *spnego_key = NULL;
540 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
541 u16 blob_len;
542 char *ntlmsspblob = NULL;
543 547
544 if (ses == NULL) { 548 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses,
545 WARN(1, "%s: ses == NULL!", __func__); 549 (void **)&smb_buf);
546 return -EINVAL;
547 }
548 550
549 type = select_sectype(ses->server, ses->sectype); 551 if (rc)
550 cifs_dbg(FYI, "sess setup type %d\n", type); 552 return rc;
551 if (type == Unspecified) { 553
552 cifs_dbg(VFS, 554 sess_data->iov[0].iov_base = (char *)smb_buf;
553 "Unable to select appropriate authentication method!"); 555 sess_data->iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
554 return -EINVAL; 556 /*
557 * This variable will be used to clear the buffer
558 * allocated above in case of any error in the calling function.
559 */
560 sess_data->buf0_type = CIFS_SMALL_BUFFER;
561
562 /* 2000 big enough to fit max user, domain, NOS name etc. */
563 sess_data->iov[2].iov_base = kmalloc(2000, GFP_KERNEL);
564 if (!sess_data->iov[2].iov_base) {
565 rc = -ENOMEM;
566 goto out_free_smb_buf;
555 } 567 }
556 568
557 if (type == RawNTLMSSP) { 569 return 0;
558 /* if memory allocation is successful, caller of this function 570
559 * frees it. 571out_free_smb_buf:
560 */ 572 kfree(smb_buf);
561 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL); 573 sess_data->iov[0].iov_base = NULL;
562 if (!ses->ntlmssp) 574 sess_data->iov[0].iov_len = 0;
563 return -ENOMEM; 575 sess_data->buf0_type = CIFS_NO_BUFFER;
564 ses->ntlmssp->sesskey_per_smbsess = false; 576 return rc;
577}
578
579static void
580sess_free_buffer(struct sess_data *sess_data)
581{
565 582
583 free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
584 sess_data->buf0_type = CIFS_NO_BUFFER;
585 kfree(sess_data->iov[2].iov_base);
586}
587
588static int
589sess_establish_session(struct sess_data *sess_data)
590{
591 struct cifs_ses *ses = sess_data->ses;
592
593 mutex_lock(&ses->server->srv_mutex);
594 if (!ses->server->session_estab) {
595 if (ses->server->sign) {
596 ses->server->session_key.response =
597 kmemdup(ses->auth_key.response,
598 ses->auth_key.len, GFP_KERNEL);
599 if (!ses->server->session_key.response) {
600 mutex_unlock(&ses->server->srv_mutex);
601 return -ENOMEM;
602 }
603 ses->server->session_key.len =
604 ses->auth_key.len;
605 }
606 ses->server->sequence_number = 0x2;
607 ses->server->session_estab = true;
566 } 608 }
609 mutex_unlock(&ses->server->srv_mutex);
567 610
568ssetup_ntlmssp_authenticate: 611 cifs_dbg(FYI, "CIFS session established successfully\n");
569 if (phase == NtLmChallenge) 612 spin_lock(&GlobalMid_Lock);
570 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 613 ses->status = CifsGood;
614 ses->need_reconnect = false;
615 spin_unlock(&GlobalMid_Lock);
571 616
572 if (type == LANMAN) { 617 return 0;
573#ifndef CONFIG_CIFS_WEAK_PW_HASH 618}
574 /* LANMAN and plaintext are less secure and off by default.
575 So we make this explicitly be turned on in kconfig (in the
576 build) and turned on at runtime (changed from the default)
577 in proc/fs/cifs or via mount parm. Unfortunately this is
578 needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
579 return -EOPNOTSUPP;
580#endif
581 wct = 10; /* lanman 2 style sessionsetup */
582 } else if ((type == NTLM) || (type == NTLMv2)) {
583 /* For NTLMv2 failures eventually may need to retry NTLM */
584 wct = 13; /* old style NTLM sessionsetup */
585 } else /* same size: negotiate or auth, NTLMSSP or extended security */
586 wct = 12;
587 619
588 rc = small_smb_init_no_tc(SMB_COM_SESSION_SETUP_ANDX, wct, ses, 620static int
589 (void **)&smb_buf); 621sess_sendreceive(struct sess_data *sess_data)
590 if (rc) 622{
591 return rc; 623 int rc;
624 struct smb_hdr *smb_buf = (struct smb_hdr *) sess_data->iov[0].iov_base;
625 __u16 count;
592 626
593 pSMB = (SESSION_SETUP_ANDX *)smb_buf; 627 count = sess_data->iov[1].iov_len + sess_data->iov[2].iov_len;
628 smb_buf->smb_buf_length =
629 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
630 put_bcc(count, smb_buf);
631
632 rc = SendReceive2(sess_data->xid, sess_data->ses,
633 sess_data->iov, 3 /* num_iovecs */,
634 &sess_data->buf0_type,
635 CIFS_LOG_ERROR);
636
637 return rc;
638}
594 639
640/*
641 * LANMAN and plaintext are less secure and off by default.
642 * So we make this explicitly be turned on in kconfig (in the
643 * build) and turned on at runtime (changed from the default)
644 * in proc/fs/cifs or via mount parm. Unfortunately this is
645 * needed for old Win (e.g. Win95), some obscure NAS and OS/2
646 */
647#ifdef CONFIG_CIFS_WEAK_PW_HASH
648static void
649sess_auth_lanman(struct sess_data *sess_data)
650{
651 int rc = 0;
652 struct smb_hdr *smb_buf;
653 SESSION_SETUP_ANDX *pSMB;
654 char *bcc_ptr;
655 struct cifs_ses *ses = sess_data->ses;
656 char lnm_session_key[CIFS_AUTH_RESP_SIZE];
657 __u32 capabilities;
658 __u16 bytes_remaining;
659
660 /* lanman 2 style sessionsetup */
661 /* wct = 10 */
662 rc = sess_alloc_buffer(sess_data, 10);
663 if (rc)
664 goto out;
665
666 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
667 bcc_ptr = sess_data->iov[2].iov_base;
595 capabilities = cifs_ssetup_hdr(ses, pSMB); 668 capabilities = cifs_ssetup_hdr(ses, pSMB);
596 669
597 /* we will send the SMB in three pieces: 670 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
598 a fixed length beginning part, an optional
599 SPNEGO blob (which can be zero length), and a
600 last part which will include the strings
601 and rest of bcc area. This allows us to avoid
602 a large buffer 17K allocation */
603 iov[0].iov_base = (char *)pSMB;
604 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
605
606 /* setting this here allows the code at the end of the function
607 to free the request buffer if there's an error */
608 resp_buf_type = CIFS_SMALL_BUFFER;
609 671
610 /* 2000 big enough to fit max user, domain, NOS name etc. */ 672 /* no capabilities flags in old lanman negotiation */
611 str_area = kmalloc(2000, GFP_KERNEL); 673 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
612 if (str_area == NULL) {
613 rc = -ENOMEM;
614 goto ssetup_exit;
615 }
616 bcc_ptr = str_area;
617 674
618 iov[1].iov_base = NULL; 675 /* Calculate hash with password and copy into bcc_ptr.
619 iov[1].iov_len = 0; 676 * Encryption Key (stored as in cryptkey) gets used if the
677 * security mode bit in Negottiate Protocol response states
678 * to use challenge/response method (i.e. Password bit is 1).
679 */
680 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
681 ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
682 true : false, lnm_session_key);
620 683
621 if (type == LANMAN) { 684 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
622#ifdef CONFIG_CIFS_WEAK_PW_HASH 685 bcc_ptr += CIFS_AUTH_RESP_SIZE;
623 char lnm_session_key[CIFS_AUTH_RESP_SIZE]; 686
687 /*
688 * can not sign if LANMAN negotiated so no need
689 * to calculate signing key? but what if server
690 * changed to do higher than lanman dialect and
691 * we reconnected would we ever calc signing_key?
692 */
624 693
625 pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; 694 cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n");
695 /* Unicode not allowed for LANMAN dialects */
696 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
626 697
627 /* no capabilities flags in old lanman negotiation */ 698 sess_data->iov[2].iov_len = (long) bcc_ptr -
699 (long) sess_data->iov[2].iov_base;
628 700
629 pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE); 701 rc = sess_sendreceive(sess_data);
702 if (rc)
703 goto out;
630 704
631 /* Calculate hash with password and copy into bcc_ptr. 705 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
632 * Encryption Key (stored as in cryptkey) gets used if the 706 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
633 * security mode bit in Negottiate Protocol response states
634 * to use challenge/response method (i.e. Password bit is 1).
635 */
636 707
637 rc = calc_lanman_hash(ses->password, ses->server->cryptkey, 708 /* lanman response has a word count of 3 */
638 ses->server->sec_mode & SECMODE_PW_ENCRYPT ? 709 if (smb_buf->WordCount != 3) {
639 true : false, lnm_session_key); 710 rc = -EIO;
711 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
712 goto out;
713 }
640 714
641 memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE); 715 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
642 bcc_ptr += CIFS_AUTH_RESP_SIZE; 716 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
717
718 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
719 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
643 720
644 /* can not sign if LANMAN negotiated so no need 721 bytes_remaining = get_bcc(smb_buf);
645 to calculate signing key? but what if server 722 bcc_ptr = pByteArea(smb_buf);
646 changed to do higher than lanman dialect and
647 we reconnected would we ever calc signing_key? */
648 723
649 cifs_dbg(FYI, "Negotiating LANMAN setting up strings\n"); 724 /* BB check if Unicode and decode strings */
650 /* Unicode not allowed for LANMAN dialects */ 725 if (bytes_remaining == 0) {
651 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 726 /* no string area to decode, do nothing */
727 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
728 /* unicode string area must be word-aligned */
729 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
730 ++bcc_ptr;
731 --bytes_remaining;
732 }
733 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
734 sess_data->nls_cp);
735 } else {
736 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
737 sess_data->nls_cp);
738 }
739
740 rc = sess_establish_session(sess_data);
741out:
742 sess_data->result = rc;
743 sess_data->func = NULL;
744 sess_free_buffer(sess_data);
745}
746
747#else
748
749static void
750sess_auth_lanman(struct sess_data *sess_data)
751{
752 sess_data->result = -EOPNOTSUPP;
753 sess_data->func = NULL;
754}
652#endif 755#endif
653 } else if (type == NTLM) { 756
654 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities); 757static void
655 pSMB->req_no_secext.CaseInsensitivePasswordLength = 758sess_auth_ntlm(struct sess_data *sess_data)
759{
760 int rc = 0;
761 struct smb_hdr *smb_buf;
762 SESSION_SETUP_ANDX *pSMB;
763 char *bcc_ptr;
764 struct cifs_ses *ses = sess_data->ses;
765 __u32 capabilities;
766 __u16 bytes_remaining;
767
768 /* old style NTLM sessionsetup */
769 /* wct = 13 */
770 rc = sess_alloc_buffer(sess_data, 13);
771 if (rc)
772 goto out;
773
774 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
775 bcc_ptr = sess_data->iov[2].iov_base;
776 capabilities = cifs_ssetup_hdr(ses, pSMB);
777
778 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
779 pSMB->req_no_secext.CaseInsensitivePasswordLength =
656 cpu_to_le16(CIFS_AUTH_RESP_SIZE); 780 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
657 pSMB->req_no_secext.CaseSensitivePasswordLength = 781 pSMB->req_no_secext.CaseSensitivePasswordLength =
658 cpu_to_le16(CIFS_AUTH_RESP_SIZE); 782 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
659 783
660 /* calculate ntlm response and session key */ 784 /* calculate ntlm response and session key */
661 rc = setup_ntlm_response(ses, nls_cp); 785 rc = setup_ntlm_response(ses, sess_data->nls_cp);
662 if (rc) { 786 if (rc) {
663 cifs_dbg(VFS, "Error %d during NTLM authentication\n", 787 cifs_dbg(VFS, "Error %d during NTLM authentication\n",
664 rc); 788 rc);
665 goto ssetup_exit; 789 goto out;
666 } 790 }
667 791
668 /* copy ntlm response */ 792 /* copy ntlm response */
669 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, 793 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
670 CIFS_AUTH_RESP_SIZE); 794 CIFS_AUTH_RESP_SIZE);
671 bcc_ptr += CIFS_AUTH_RESP_SIZE; 795 bcc_ptr += CIFS_AUTH_RESP_SIZE;
672 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, 796 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
673 CIFS_AUTH_RESP_SIZE); 797 CIFS_AUTH_RESP_SIZE);
674 bcc_ptr += CIFS_AUTH_RESP_SIZE; 798 bcc_ptr += CIFS_AUTH_RESP_SIZE;
675 799
676 if (ses->capabilities & CAP_UNICODE) { 800 if (ses->capabilities & CAP_UNICODE) {
677 /* unicode strings must be word aligned */ 801 /* unicode strings must be word aligned */
678 if (iov[0].iov_len % 2) { 802 if (sess_data->iov[0].iov_len % 2) {
679 *bcc_ptr = 0; 803 *bcc_ptr = 0;
680 bcc_ptr++; 804 bcc_ptr++;
681 }
682 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
683 } else
684 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
685 } else if (type == NTLMv2) {
686 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
687
688 /* LM2 password would be here if we supported it */
689 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
690
691 /* calculate nlmv2 response and session key */
692 rc = setup_ntlmv2_rsp(ses, nls_cp);
693 if (rc) {
694 cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n",
695 rc);
696 goto ssetup_exit;
697 } 805 }
698 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE, 806 unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
699 ses->auth_key.len - CIFS_SESS_KEY_SIZE); 807 } else {
700 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE; 808 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
701 809 }
702 /* set case sensitive password length after tilen may get
703 * assigned, tilen is 0 otherwise.
704 */
705 pSMB->req_no_secext.CaseSensitivePasswordLength =
706 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
707 810
708 if (ses->capabilities & CAP_UNICODE) {
709 if (iov[0].iov_len % 2) {
710 *bcc_ptr = 0;
711 bcc_ptr++;
712 }
713 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
714 } else
715 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
716 } else if (type == Kerberos) {
717#ifdef CONFIG_CIFS_UPCALL
718 struct cifs_spnego_msg *msg;
719 811
720 spnego_key = cifs_get_spnego_key(ses); 812 sess_data->iov[2].iov_len = (long) bcc_ptr -
721 if (IS_ERR(spnego_key)) { 813 (long) sess_data->iov[2].iov_base;
722 rc = PTR_ERR(spnego_key);
723 spnego_key = NULL;
724 goto ssetup_exit;
725 }
726 814
727 msg = spnego_key->payload.data; 815 rc = sess_sendreceive(sess_data);
728 /* check version field to make sure that cifs.upcall is 816 if (rc)
729 sending us a response in an expected form */ 817 goto out;
730 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
731 cifs_dbg(VFS, "incorrect version of cifs.upcall "
732 "expected %d but got %d)",
733 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
734 rc = -EKEYREJECTED;
735 goto ssetup_exit;
736 }
737 818
738 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len, 819 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
739 GFP_KERNEL); 820 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
740 if (!ses->auth_key.response) {
741 cifs_dbg(VFS,
742 "Kerberos can't allocate (%u bytes) memory",
743 msg->sesskey_len);
744 rc = -ENOMEM;
745 goto ssetup_exit;
746 }
747 ses->auth_key.len = msg->sesskey_len;
748
749 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
750 capabilities |= CAP_EXTENDED_SECURITY;
751 pSMB->req.Capabilities = cpu_to_le32(capabilities);
752 iov[1].iov_base = msg->data + msg->sesskey_len;
753 iov[1].iov_len = msg->secblob_len;
754 pSMB->req.SecurityBlobLength = cpu_to_le16(iov[1].iov_len);
755
756 if (ses->capabilities & CAP_UNICODE) {
757 /* unicode strings must be word aligned */
758 if ((iov[0].iov_len + iov[1].iov_len) % 2) {
759 *bcc_ptr = 0;
760 bcc_ptr++;
761 }
762 unicode_oslm_strings(&bcc_ptr, nls_cp);
763 unicode_domain_string(&bcc_ptr, ses, nls_cp);
764 } else
765 /* BB: is this right? */
766 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
767#else /* ! CONFIG_CIFS_UPCALL */
768 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
769 rc = -ENOSYS;
770 goto ssetup_exit;
771#endif /* CONFIG_CIFS_UPCALL */
772 } else if (type == RawNTLMSSP) {
773 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
774 cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
775 rc = -ENOSYS;
776 goto ssetup_exit;
777 }
778 821
779 cifs_dbg(FYI, "ntlmssp session setup phase %d\n", phase); 822 if (smb_buf->WordCount != 3) {
780 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 823 rc = -EIO;
781 capabilities |= CAP_EXTENDED_SECURITY; 824 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
782 pSMB->req.Capabilities |= cpu_to_le32(capabilities); 825 goto out;
783 switch(phase) { 826 }
784 case NtLmNegotiate:
785 build_ntlmssp_negotiate_blob(
786 pSMB->req.SecurityBlob, ses);
787 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
788 iov[1].iov_base = pSMB->req.SecurityBlob;
789 pSMB->req.SecurityBlobLength =
790 cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
791 break;
792 case NtLmAuthenticate:
793 /*
794 * 5 is an empirical value, large enough to hold
795 * authenticate message plus max 10 of av paris,
796 * domain, user, workstation names, flags, etc.
797 */
798 ntlmsspblob = kzalloc(
799 5*sizeof(struct _AUTHENTICATE_MESSAGE),
800 GFP_KERNEL);
801 if (!ntlmsspblob) {
802 rc = -ENOMEM;
803 goto ssetup_exit;
804 }
805 827
806 rc = build_ntlmssp_auth_blob(ntlmsspblob, 828 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
807 &blob_len, ses, nls_cp); 829 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
808 if (rc) 830
809 goto ssetup_exit; 831 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
810 iov[1].iov_len = blob_len; 832 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
811 iov[1].iov_base = ntlmsspblob; 833
812 pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len); 834 bytes_remaining = get_bcc(smb_buf);
813 /* 835 bcc_ptr = pByteArea(smb_buf);
814 * Make sure that we tell the server that we are using 836
815 * the uid that it just gave us back on the response 837 /* BB check if Unicode and decode strings */
816 * (challenge) 838 if (bytes_remaining == 0) {
817 */ 839 /* no string area to decode, do nothing */
818 smb_buf->Uid = ses->Suid; 840 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
819 break; 841 /* unicode string area must be word-aligned */
820 default: 842 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
821 cifs_dbg(VFS, "invalid phase %d\n", phase); 843 ++bcc_ptr;
822 rc = -ENOSYS; 844 --bytes_remaining;
823 goto ssetup_exit;
824 } 845 }
825 /* unicode strings must be word aligned */ 846 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
826 if ((iov[0].iov_len + iov[1].iov_len) % 2) { 847 sess_data->nls_cp);
848 } else {
849 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
850 sess_data->nls_cp);
851 }
852
853 rc = sess_establish_session(sess_data);
854out:
855 sess_data->result = rc;
856 sess_data->func = NULL;
857 sess_free_buffer(sess_data);
858 kfree(ses->auth_key.response);
859 ses->auth_key.response = NULL;
860}
861
862static void
863sess_auth_ntlmv2(struct sess_data *sess_data)
864{
865 int rc = 0;
866 struct smb_hdr *smb_buf;
867 SESSION_SETUP_ANDX *pSMB;
868 char *bcc_ptr;
869 struct cifs_ses *ses = sess_data->ses;
870 __u32 capabilities;
871 __u16 bytes_remaining;
872
873 /* old style NTLM sessionsetup */
874 /* wct = 13 */
875 rc = sess_alloc_buffer(sess_data, 13);
876 if (rc)
877 goto out;
878
879 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
880 bcc_ptr = sess_data->iov[2].iov_base;
881 capabilities = cifs_ssetup_hdr(ses, pSMB);
882
883 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
884
885 /* LM2 password would be here if we supported it */
886 pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
887
888 /* calculate nlmv2 response and session key */
889 rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
890 if (rc) {
891 cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
892 goto out;
893 }
894
895 memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
896 ses->auth_key.len - CIFS_SESS_KEY_SIZE);
897 bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
898
899 /* set case sensitive password length after tilen may get
900 * assigned, tilen is 0 otherwise.
901 */
902 pSMB->req_no_secext.CaseSensitivePasswordLength =
903 cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
904
905 if (ses->capabilities & CAP_UNICODE) {
906 if (sess_data->iov[0].iov_len % 2) {
827 *bcc_ptr = 0; 907 *bcc_ptr = 0;
828 bcc_ptr++; 908 bcc_ptr++;
829 } 909 }
830 unicode_oslm_strings(&bcc_ptr, nls_cp); 910 unicode_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
831 } else { 911 } else {
832 cifs_dbg(VFS, "secType %d not supported!\n", type); 912 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
833 rc = -ENOSYS;
834 goto ssetup_exit;
835 } 913 }
836 914
837 iov[2].iov_base = str_area;
838 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
839 915
840 count = iov[1].iov_len + iov[2].iov_len; 916 sess_data->iov[2].iov_len = (long) bcc_ptr -
841 smb_buf->smb_buf_length = 917 (long) sess_data->iov[2].iov_base;
842 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
843 918
844 put_bcc(count, smb_buf); 919 rc = sess_sendreceive(sess_data);
920 if (rc)
921 goto out;
845 922
846 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 923 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
847 CIFS_LOG_ERROR); 924 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
848 /* SMB request buf freed in SendReceive2 */ 925
926 if (smb_buf->WordCount != 3) {
927 rc = -EIO;
928 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
929 goto out;
930 }
931
932 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
933 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
934
935 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
936 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
849 937
850 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 938 bytes_remaining = get_bcc(smb_buf);
851 smb_buf = (struct smb_hdr *)iov[0].iov_base; 939 bcc_ptr = pByteArea(smb_buf);
852 940
853 if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) && 941 /* BB check if Unicode and decode strings */
854 (smb_buf->Status.CifsError == 942 if (bytes_remaining == 0) {
855 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { 943 /* no string area to decode, do nothing */
856 if (phase != NtLmNegotiate) { 944 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
857 cifs_dbg(VFS, "Unexpected more processing error\n"); 945 /* unicode string area must be word-aligned */
858 goto ssetup_exit; 946 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
947 ++bcc_ptr;
948 --bytes_remaining;
859 } 949 }
860 /* NTLMSSP Negotiate sent now processing challenge (response) */ 950 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
861 phase = NtLmChallenge; /* process ntlmssp challenge */ 951 sess_data->nls_cp);
862 rc = 0; /* MORE_PROC rc is not an error here, but expected */ 952 } else {
953 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
954 sess_data->nls_cp);
863 } 955 }
956
957 rc = sess_establish_session(sess_data);
958out:
959 sess_data->result = rc;
960 sess_data->func = NULL;
961 sess_free_buffer(sess_data);
962 kfree(ses->auth_key.response);
963 ses->auth_key.response = NULL;
964}
965
966#ifdef CONFIG_CIFS_UPCALL
967static void
968sess_auth_kerberos(struct sess_data *sess_data)
969{
970 int rc = 0;
971 struct smb_hdr *smb_buf;
972 SESSION_SETUP_ANDX *pSMB;
973 char *bcc_ptr;
974 struct cifs_ses *ses = sess_data->ses;
975 __u32 capabilities;
976 __u16 bytes_remaining;
977 struct key *spnego_key = NULL;
978 struct cifs_spnego_msg *msg;
979 u16 blob_len;
980
981 /* extended security */
982 /* wct = 12 */
983 rc = sess_alloc_buffer(sess_data, 12);
864 if (rc) 984 if (rc)
865 goto ssetup_exit; 985 goto out;
866 986
867 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { 987 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
988 bcc_ptr = sess_data->iov[2].iov_base;
989 capabilities = cifs_ssetup_hdr(ses, pSMB);
990
991 spnego_key = cifs_get_spnego_key(ses);
992 if (IS_ERR(spnego_key)) {
993 rc = PTR_ERR(spnego_key);
994 spnego_key = NULL;
995 goto out;
996 }
997
998 msg = spnego_key->payload.data;
999 /*
1000 * check version field to make sure that cifs.upcall is
1001 * sending us a response in an expected form
1002 */
1003 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
1004 cifs_dbg(VFS,
1005 "incorrect version of cifs.upcall (expected %d but got %d)",
1006 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
1007 rc = -EKEYREJECTED;
1008 goto out_put_spnego_key;
1009 }
1010
1011 ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
1012 GFP_KERNEL);
1013 if (!ses->auth_key.response) {
1014 cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory",
1015 msg->sesskey_len);
1016 rc = -ENOMEM;
1017 goto out_put_spnego_key;
1018 }
1019 ses->auth_key.len = msg->sesskey_len;
1020
1021 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
1022 capabilities |= CAP_EXTENDED_SECURITY;
1023 pSMB->req.Capabilities = cpu_to_le32(capabilities);
1024 sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
1025 sess_data->iov[1].iov_len = msg->secblob_len;
1026 pSMB->req.SecurityBlobLength = cpu_to_le16(sess_data->iov[1].iov_len);
1027
1028 if (ses->capabilities & CAP_UNICODE) {
1029 /* unicode strings must be word aligned */
1030 if ((sess_data->iov[0].iov_len
1031 + sess_data->iov[1].iov_len) % 2) {
1032 *bcc_ptr = 0;
1033 bcc_ptr++;
1034 }
1035 unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
1036 unicode_domain_string(&bcc_ptr, ses, sess_data->nls_cp);
1037 } else {
1038 /* BB: is this right? */
1039 ascii_ssetup_strings(&bcc_ptr, ses, sess_data->nls_cp);
1040 }
1041
1042 sess_data->iov[2].iov_len = (long) bcc_ptr -
1043 (long) sess_data->iov[2].iov_base;
1044
1045 rc = sess_sendreceive(sess_data);
1046 if (rc)
1047 goto out_put_spnego_key;
1048
1049 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1050 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
1051
1052 if (smb_buf->WordCount != 4) {
868 rc = -EIO; 1053 rc = -EIO;
869 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); 1054 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
870 goto ssetup_exit; 1055 goto out_put_spnego_key;
871 } 1056 }
872 action = le16_to_cpu(pSMB->resp.Action); 1057
873 if (action & GUEST_LOGIN) 1058 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
874 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ 1059 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
1060
875 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ 1061 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
876 cifs_dbg(FYI, "UID = %llu\n", ses->Suid); 1062 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
877 /* response can have either 3 or 4 word count - Samba sends 3 */ 1063
878 /* and lanman response is 3 */
879 bytes_remaining = get_bcc(smb_buf); 1064 bytes_remaining = get_bcc(smb_buf);
880 bcc_ptr = pByteArea(smb_buf); 1065 bcc_ptr = pByteArea(smb_buf);
881 1066
882 if (smb_buf->WordCount == 4) { 1067 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
883 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); 1068 if (blob_len > bytes_remaining) {
884 if (blob_len > bytes_remaining) { 1069 cifs_dbg(VFS, "bad security blob length %d\n",
885 cifs_dbg(VFS, "bad security blob length %d\n", 1070 blob_len);
886 blob_len); 1071 rc = -EINVAL;
887 rc = -EINVAL; 1072 goto out_put_spnego_key;
888 goto ssetup_exit;
889 }
890 if (phase == NtLmChallenge) {
891 rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
892 /* now goto beginning for ntlmssp authenticate phase */
893 if (rc)
894 goto ssetup_exit;
895 }
896 bcc_ptr += blob_len;
897 bytes_remaining -= blob_len;
898 } 1073 }
1074 bcc_ptr += blob_len;
1075 bytes_remaining -= blob_len;
899 1076
900 /* BB check if Unicode and decode strings */ 1077 /* BB check if Unicode and decode strings */
901 if (bytes_remaining == 0) { 1078 if (bytes_remaining == 0) {
@@ -906,60 +1083,371 @@ ssetup_ntlmssp_authenticate:
906 ++bcc_ptr; 1083 ++bcc_ptr;
907 --bytes_remaining; 1084 --bytes_remaining;
908 } 1085 }
909 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); 1086 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
1087 sess_data->nls_cp);
910 } else { 1088 } else {
911 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp); 1089 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
1090 sess_data->nls_cp);
912 } 1091 }
913 1092
914ssetup_exit: 1093 rc = sess_establish_session(sess_data);
915 if (spnego_key) { 1094out_put_spnego_key:
916 key_invalidate(spnego_key); 1095 key_invalidate(spnego_key);
917 key_put(spnego_key); 1096 key_put(spnego_key);
1097out:
1098 sess_data->result = rc;
1099 sess_data->func = NULL;
1100 sess_free_buffer(sess_data);
1101 kfree(ses->auth_key.response);
1102 ses->auth_key.response = NULL;
1103}
1104
1105#else
1106
1107static void
1108sess_auth_kerberos(struct sess_data *sess_data)
1109{
1110 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
1111 sess_data->result = -ENOSYS;
1112 sess_data->func = NULL;
1113}
1114#endif /* ! CONFIG_CIFS_UPCALL */
1115
1116/*
1117 * The required kvec buffers have to be allocated before calling this
1118 * function.
1119 */
1120static int
1121_sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
1122{
1123 struct smb_hdr *smb_buf;
1124 SESSION_SETUP_ANDX *pSMB;
1125 struct cifs_ses *ses = sess_data->ses;
1126 __u32 capabilities;
1127 char *bcc_ptr;
1128
1129 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1130 smb_buf = (struct smb_hdr *)pSMB;
1131
1132 capabilities = cifs_ssetup_hdr(ses, pSMB);
1133 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
1134 cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
1135 return -ENOSYS;
918 } 1136 }
919 kfree(str_area);
920 kfree(ntlmsspblob);
921 ntlmsspblob = NULL;
922 if (resp_buf_type == CIFS_SMALL_BUFFER) {
923 cifs_dbg(FYI, "ssetup freeing small buf %p\n", iov[0].iov_base);
924 cifs_small_buf_release(iov[0].iov_base);
925 } else if (resp_buf_type == CIFS_LARGE_BUFFER)
926 cifs_buf_release(iov[0].iov_base);
927 1137
928 /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */ 1138 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
929 if ((phase == NtLmChallenge) && (rc == 0)) 1139 capabilities |= CAP_EXTENDED_SECURITY;
930 goto ssetup_ntlmssp_authenticate; 1140 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
1141
1142 bcc_ptr = sess_data->iov[2].iov_base;
1143 /* unicode strings must be word aligned */
1144 if ((sess_data->iov[0].iov_len + sess_data->iov[1].iov_len) % 2) {
1145 *bcc_ptr = 0;
1146 bcc_ptr++;
1147 }
1148 unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
1149
1150 sess_data->iov[2].iov_len = (long) bcc_ptr -
1151 (long) sess_data->iov[2].iov_base;
1152
1153 return 0;
1154}
1155
1156static void
1157sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data);
1158
1159static void
1160sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
1161{
1162 int rc;
1163 struct smb_hdr *smb_buf;
1164 SESSION_SETUP_ANDX *pSMB;
1165 struct cifs_ses *ses = sess_data->ses;
1166 __u16 bytes_remaining;
1167 char *bcc_ptr;
1168 u16 blob_len;
1169
1170 cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n");
1171
1172 /*
1173 * if memory allocation is successful, caller of this function
1174 * frees it.
1175 */
1176 ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
1177 if (!ses->ntlmssp) {
1178 rc = -ENOMEM;
1179 goto out;
1180 }
1181 ses->ntlmssp->sesskey_per_smbsess = false;
1182
1183 /* wct = 12 */
1184 rc = sess_alloc_buffer(sess_data, 12);
1185 if (rc)
1186 goto out;
1187
1188 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1189
1190 /* Build security blob before we assemble the request */
1191 build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses);
1192 sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
1193 sess_data->iov[1].iov_base = pSMB->req.SecurityBlob;
1194 pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
1195
1196 rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
1197 if (rc)
1198 goto out;
1199
1200 rc = sess_sendreceive(sess_data);
1201
1202 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1203 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
1204
1205 /* If true, rc here is expected and not an error */
1206 if (sess_data->buf0_type != CIFS_NO_BUFFER &&
1207 smb_buf->Status.CifsError ==
1208 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))
1209 rc = 0;
1210
1211 if (rc)
1212 goto out;
1213
1214 cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
1215
1216 if (smb_buf->WordCount != 4) {
1217 rc = -EIO;
1218 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
1219 goto out;
1220 }
1221
1222 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
1223 cifs_dbg(FYI, "UID = %llu\n", ses->Suid);
1224
1225 bytes_remaining = get_bcc(smb_buf);
1226 bcc_ptr = pByteArea(smb_buf);
1227
1228 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
1229 if (blob_len > bytes_remaining) {
1230 cifs_dbg(VFS, "bad security blob length %d\n",
1231 blob_len);
1232 rc = -EINVAL;
1233 goto out;
1234 }
1235
1236 rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
1237out:
1238 sess_free_buffer(sess_data);
931 1239
932 if (!rc) { 1240 if (!rc) {
933 mutex_lock(&ses->server->srv_mutex); 1241 sess_data->func = sess_auth_rawntlmssp_authenticate;
934 if (!ses->server->session_estab) { 1242 return;
935 if (ses->server->sign) { 1243 }
936 ses->server->session_key.response = 1244
937 kmemdup(ses->auth_key.response, 1245 /* Else error. Cleanup */
938 ses->auth_key.len, GFP_KERNEL); 1246 kfree(ses->auth_key.response);
939 if (!ses->server->session_key.response) { 1247 ses->auth_key.response = NULL;
940 rc = -ENOMEM; 1248 kfree(ses->ntlmssp);
941 mutex_unlock(&ses->server->srv_mutex); 1249 ses->ntlmssp = NULL;
942 goto keycp_exit; 1250
943 } 1251 sess_data->func = NULL;
944 ses->server->session_key.len = 1252 sess_data->result = rc;
945 ses->auth_key.len; 1253}
946 }
947 ses->server->sequence_number = 0x2;
948 ses->server->session_estab = true;
949 }
950 mutex_unlock(&ses->server->srv_mutex);
951 1254
952 cifs_dbg(FYI, "CIFS session established successfully\n"); 1255static void
953 spin_lock(&GlobalMid_Lock); 1256sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
954 ses->status = CifsGood; 1257{
955 ses->need_reconnect = false; 1258 int rc;
956 spin_unlock(&GlobalMid_Lock); 1259 struct smb_hdr *smb_buf;
1260 SESSION_SETUP_ANDX *pSMB;
1261 struct cifs_ses *ses = sess_data->ses;
1262 __u16 bytes_remaining;
1263 char *bcc_ptr;
1264 char *ntlmsspblob = NULL;
1265 u16 blob_len;
1266
1267 cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
1268
1269 /* wct = 12 */
1270 rc = sess_alloc_buffer(sess_data, 12);
1271 if (rc)
1272 goto out;
1273
1274 /* Build security blob before we assemble the request */
1275 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1276 smb_buf = (struct smb_hdr *)pSMB;
1277 /*
1278 * 5 is an empirical value, large enough to hold
1279 * authenticate message plus max 10 of av paris,
1280 * domain, user, workstation names, flags, etc.
1281 */
1282 ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
1283 GFP_KERNEL);
1284 if (!ntlmsspblob) {
1285 rc = -ENOMEM;
1286 goto out;
957 } 1287 }
958 1288
959keycp_exit: 1289 rc = build_ntlmssp_auth_blob(ntlmsspblob,
1290 &blob_len, ses, sess_data->nls_cp);
1291 if (rc)
1292 goto out_free_ntlmsspblob;
1293 sess_data->iov[1].iov_len = blob_len;
1294 sess_data->iov[1].iov_base = ntlmsspblob;
1295 pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
1296 /*
1297 * Make sure that we tell the server that we are using
1298 * the uid that it just gave us back on the response
1299 * (challenge)
1300 */
1301 smb_buf->Uid = ses->Suid;
1302
1303 rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
1304 if (rc)
1305 goto out_free_ntlmsspblob;
1306
1307 rc = sess_sendreceive(sess_data);
1308 if (rc)
1309 goto out_free_ntlmsspblob;
1310
1311 pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
1312 smb_buf = (struct smb_hdr *)sess_data->iov[0].iov_base;
1313 if (smb_buf->WordCount != 4) {
1314 rc = -EIO;
1315 cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount);
1316 goto out_free_ntlmsspblob;
1317 }
1318
1319 if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
1320 cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
1321
1322 bytes_remaining = get_bcc(smb_buf);
1323 bcc_ptr = pByteArea(smb_buf);
1324 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
1325 if (blob_len > bytes_remaining) {
1326 cifs_dbg(VFS, "bad security blob length %d\n",
1327 blob_len);
1328 rc = -EINVAL;
1329 goto out_free_ntlmsspblob;
1330 }
1331 bcc_ptr += blob_len;
1332 bytes_remaining -= blob_len;
1333
1334
1335 /* BB check if Unicode and decode strings */
1336 if (bytes_remaining == 0) {
1337 /* no string area to decode, do nothing */
1338 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
1339 /* unicode string area must be word-aligned */
1340 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
1341 ++bcc_ptr;
1342 --bytes_remaining;
1343 }
1344 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses,
1345 sess_data->nls_cp);
1346 } else {
1347 decode_ascii_ssetup(&bcc_ptr, bytes_remaining, ses,
1348 sess_data->nls_cp);
1349 }
1350
1351out_free_ntlmsspblob:
1352 kfree(ntlmsspblob);
1353out:
1354 sess_free_buffer(sess_data);
1355
1356 if (!rc)
1357 rc = sess_establish_session(sess_data);
1358
1359 /* Cleanup */
960 kfree(ses->auth_key.response); 1360 kfree(ses->auth_key.response);
961 ses->auth_key.response = NULL; 1361 ses->auth_key.response = NULL;
962 kfree(ses->ntlmssp); 1362 kfree(ses->ntlmssp);
1363 ses->ntlmssp = NULL;
1364
1365 sess_data->func = NULL;
1366 sess_data->result = rc;
1367}
1368
1369static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
1370{
1371 int type;
1372
1373 type = select_sectype(ses->server, ses->sectype);
1374 cifs_dbg(FYI, "sess setup type %d\n", type);
1375 if (type == Unspecified) {
1376 cifs_dbg(VFS,
1377 "Unable to select appropriate authentication method!");
1378 return -EINVAL;
1379 }
1380
1381 switch (type) {
1382 case LANMAN:
1383 /* LANMAN and plaintext are less secure and off by default.
1384 * So we make this explicitly be turned on in kconfig (in the
1385 * build) and turned on at runtime (changed from the default)
1386 * in proc/fs/cifs or via mount parm. Unfortunately this is
1387 * needed for old Win (e.g. Win95), some obscure NAS and OS/2 */
1388#ifdef CONFIG_CIFS_WEAK_PW_HASH
1389 sess_data->func = sess_auth_lanman;
1390 break;
1391#else
1392 return -EOPNOTSUPP;
1393#endif
1394 case NTLM:
1395 sess_data->func = sess_auth_ntlm;
1396 break;
1397 case NTLMv2:
1398 sess_data->func = sess_auth_ntlmv2;
1399 break;
1400 case Kerberos:
1401#ifdef CONFIG_CIFS_UPCALL
1402 sess_data->func = sess_auth_kerberos;
1403 break;
1404#else
1405 cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
1406 return -ENOSYS;
1407 break;
1408#endif /* CONFIG_CIFS_UPCALL */
1409 case RawNTLMSSP:
1410 sess_data->func = sess_auth_rawntlmssp_negotiate;
1411 break;
1412 default:
1413 cifs_dbg(VFS, "secType %d not supported!\n", type);
1414 return -ENOSYS;
1415 }
1416
1417 return 0;
1418}
1419
1420int CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
1421 const struct nls_table *nls_cp)
1422{
1423 int rc = 0;
1424 struct sess_data *sess_data;
1425
1426 if (ses == NULL) {
1427 WARN(1, "%s: ses == NULL!", __func__);
1428 return -EINVAL;
1429 }
1430
1431 sess_data = kzalloc(sizeof(struct sess_data), GFP_KERNEL);
1432 if (!sess_data)
1433 return -ENOMEM;
1434
1435 rc = select_sec(ses, sess_data);
1436 if (rc)
1437 goto out;
1438
1439 sess_data->xid = xid;
1440 sess_data->ses = ses;
1441 sess_data->buf0_type = CIFS_NO_BUFFER;
1442 sess_data->nls_cp = (struct nls_table *) nls_cp;
1443
1444 while (sess_data->func)
1445 sess_data->func(sess_data);
1446
1447 /* Store result before we free sess_data */
1448 rc = sess_data->result;
963 1449
1450out:
1451 kfree(sess_data);
964 return rc; 1452 return rc;
965} 1453}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d1fdfa848703..5e8c22d6c7b9 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock)
1009 return oplock == OPLOCK_READ; 1009 return oplock == OPLOCK_READ;
1010} 1010}
1011 1011
1012static unsigned int
1013cifs_wp_retry_size(struct inode *inode)
1014{
1015 return CIFS_SB(inode->i_sb)->wsize;
1016}
1017
1012struct smb_version_operations smb1_operations = { 1018struct smb_version_operations smb1_operations = {
1013 .send_cancel = send_nt_cancel, 1019 .send_cancel = send_nt_cancel,
1014 .compare_fids = cifs_compare_fids, 1020 .compare_fids = cifs_compare_fids,
@@ -1019,6 +1025,7 @@ struct smb_version_operations smb1_operations = {
1019 .set_credits = cifs_set_credits, 1025 .set_credits = cifs_set_credits,
1020 .get_credits_field = cifs_get_credits_field, 1026 .get_credits_field = cifs_get_credits_field,
1021 .get_credits = cifs_get_credits, 1027 .get_credits = cifs_get_credits,
1028 .wait_mtu_credits = cifs_wait_mtu_credits,
1022 .get_next_mid = cifs_get_next_mid, 1029 .get_next_mid = cifs_get_next_mid,
1023 .read_data_offset = cifs_read_data_offset, 1030 .read_data_offset = cifs_read_data_offset,
1024 .read_data_length = cifs_read_data_length, 1031 .read_data_length = cifs_read_data_length,
@@ -1078,6 +1085,7 @@ struct smb_version_operations smb1_operations = {
1078 .query_mf_symlink = cifs_query_mf_symlink, 1085 .query_mf_symlink = cifs_query_mf_symlink,
1079 .create_mf_symlink = cifs_create_mf_symlink, 1086 .create_mf_symlink = cifs_create_mf_symlink,
1080 .is_read_op = cifs_is_read_op, 1087 .is_read_op = cifs_is_read_op,
1088 .wp_retry_size = cifs_wp_retry_size,
1081#ifdef CONFIG_CIFS_XATTR 1089#ifdef CONFIG_CIFS_XATTR
1082 .query_all_EAs = CIFSSMBQAllEAs, 1090 .query_all_EAs = CIFSSMBQAllEAs,
1083 .set_EA = CIFSSMBSetEA, 1091 .set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 84c012a6aba0..0150182a4494 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -91,7 +91,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
91 case SMB2_OP_SET_EOF: 91 case SMB2_OP_SET_EOF:
92 tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid, 92 tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid,
93 fid.volatile_fid, current->tgid, 93 fid.volatile_fid, current->tgid,
94 (__le64 *)data); 94 (__le64 *)data, false);
95 break; 95 break;
96 case SMB2_OP_SET_INFO: 96 case SMB2_OP_SET_INFO:
97 tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid, 97 tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid,
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 94bd4fbb13d3..e31a9dfdcd39 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
605 {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, 605 {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"},
606 {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, 606 {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"},
607 {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, 607 {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"},
608 {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, 608 {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"},
609 {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, 609 {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"},
610 {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, 610 {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"},
611 {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, 611 {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b8021fde987d..f2e6ac29a8d6 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -437,7 +437,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
437 continue; 437 continue;
438 438
439 cifs_dbg(FYI, "found in the open list\n"); 439 cifs_dbg(FYI, "found in the open list\n");
440 cifs_dbg(FYI, "lease key match, lease break 0x%d\n", 440 cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
441 le32_to_cpu(rsp->NewLeaseState)); 441 le32_to_cpu(rsp->NewLeaseState));
442 442
443 server->ops->set_oplock_level(cinode, lease_state, 0, NULL); 443 server->ops->set_oplock_level(cinode, lease_state, 0, NULL);
@@ -467,7 +467,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
467 } 467 }
468 468
469 cifs_dbg(FYI, "found in the pending open list\n"); 469 cifs_dbg(FYI, "found in the pending open list\n");
470 cifs_dbg(FYI, "lease key match, lease break 0x%d\n", 470 cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
471 le32_to_cpu(rsp->NewLeaseState)); 471 le32_to_cpu(rsp->NewLeaseState));
472 472
473 open->oplock = lease_state; 473 open->oplock = lease_state;
@@ -546,7 +546,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
546 return false; 546 return false;
547 } 547 }
548 548
549 cifs_dbg(FYI, "oplock level 0x%d\n", rsp->OplockLevel); 549 cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel);
550 550
551 /* look up tcon based on tid & uid */ 551 /* look up tcon based on tid & uid */
552 spin_lock(&cifs_tcp_ses_lock); 552 spin_lock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 787844bde384..77f8aeb9c2fc 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -19,6 +19,7 @@
19 19
20#include <linux/pagemap.h> 20#include <linux/pagemap.h>
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/falloc.h>
22#include "cifsglob.h" 23#include "cifsglob.h"
23#include "smb2pdu.h" 24#include "smb2pdu.h"
24#include "smb2proto.h" 25#include "smb2proto.h"
@@ -112,6 +113,53 @@ smb2_get_credits(struct mid_q_entry *mid)
112 return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest); 113 return le16_to_cpu(((struct smb2_hdr *)mid->resp_buf)->CreditRequest);
113} 114}
114 115
116static int
117smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
118 unsigned int *num, unsigned int *credits)
119{
120 int rc = 0;
121 unsigned int scredits;
122
123 spin_lock(&server->req_lock);
124 while (1) {
125 if (server->credits <= 0) {
126 spin_unlock(&server->req_lock);
127 cifs_num_waiters_inc(server);
128 rc = wait_event_killable(server->request_q,
129 has_credits(server, &server->credits));
130 cifs_num_waiters_dec(server);
131 if (rc)
132 return rc;
133 spin_lock(&server->req_lock);
134 } else {
135 if (server->tcpStatus == CifsExiting) {
136 spin_unlock(&server->req_lock);
137 return -ENOENT;
138 }
139
140 scredits = server->credits;
141 /* can deadlock with reopen */
142 if (scredits == 1) {
143 *num = SMB2_MAX_BUFFER_SIZE;
144 *credits = 0;
145 break;
146 }
147
148 /* leave one credit for a possible reopen */
149 scredits--;
150 *num = min_t(unsigned int, size,
151 scredits * SMB2_MAX_BUFFER_SIZE);
152
153 *credits = DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
154 server->credits -= *credits;
155 server->in_flight++;
156 break;
157 }
158 }
159 spin_unlock(&server->req_lock);
160 return rc;
161}
162
115static __u64 163static __u64
116smb2_get_next_mid(struct TCP_Server_Info *server) 164smb2_get_next_mid(struct TCP_Server_Info *server)
117{ 165{
@@ -182,8 +230,9 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
182 /* start with specified wsize, or default */ 230 /* start with specified wsize, or default */
183 wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE; 231 wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
184 wsize = min_t(unsigned int, wsize, server->max_write); 232 wsize = min_t(unsigned int, wsize, server->max_write);
185 /* set it to the maximum buffer size value we can send with 1 credit */ 233
186 wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE); 234 if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
235 wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
187 236
188 return wsize; 237 return wsize;
189} 238}
@@ -197,8 +246,9 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
197 /* start with specified rsize, or default */ 246 /* start with specified rsize, or default */
198 rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE; 247 rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
199 rsize = min_t(unsigned int, rsize, server->max_read); 248 rsize = min_t(unsigned int, rsize, server->max_read);
200 /* set it to the maximum buffer size value we can send with 1 credit */ 249
201 rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE); 250 if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
251 rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
202 252
203 return rsize; 253 return rsize;
204} 254}
@@ -687,7 +737,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
687{ 737{
688 __le64 eof = cpu_to_le64(size); 738 __le64 eof = cpu_to_le64(size);
689 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, 739 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
690 cfile->fid.volatile_fid, cfile->pid, &eof); 740 cfile->fid.volatile_fid, cfile->pid, &eof, false);
691} 741}
692 742
693static int 743static int
@@ -1104,6 +1154,13 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch)
1104 return le32_to_cpu(lc->lcontext.LeaseState); 1154 return le32_to_cpu(lc->lcontext.LeaseState);
1105} 1155}
1106 1156
1157static unsigned int
1158smb2_wp_retry_size(struct inode *inode)
1159{
1160 return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize,
1161 SMB2_MAX_BUFFER_SIZE);
1162}
1163
1107struct smb_version_operations smb20_operations = { 1164struct smb_version_operations smb20_operations = {
1108 .compare_fids = smb2_compare_fids, 1165 .compare_fids = smb2_compare_fids,
1109 .setup_request = smb2_setup_request, 1166 .setup_request = smb2_setup_request,
@@ -1113,6 +1170,7 @@ struct smb_version_operations smb20_operations = {
1113 .set_credits = smb2_set_credits, 1170 .set_credits = smb2_set_credits,
1114 .get_credits_field = smb2_get_credits_field, 1171 .get_credits_field = smb2_get_credits_field,
1115 .get_credits = smb2_get_credits, 1172 .get_credits = smb2_get_credits,
1173 .wait_mtu_credits = cifs_wait_mtu_credits,
1116 .get_next_mid = smb2_get_next_mid, 1174 .get_next_mid = smb2_get_next_mid,
1117 .read_data_offset = smb2_read_data_offset, 1175 .read_data_offset = smb2_read_data_offset,
1118 .read_data_length = smb2_read_data_length, 1176 .read_data_length = smb2_read_data_length,
@@ -1177,6 +1235,7 @@ struct smb_version_operations smb20_operations = {
1177 .create_lease_buf = smb2_create_lease_buf, 1235 .create_lease_buf = smb2_create_lease_buf,
1178 .parse_lease_buf = smb2_parse_lease_buf, 1236 .parse_lease_buf = smb2_parse_lease_buf,
1179 .clone_range = smb2_clone_range, 1237 .clone_range = smb2_clone_range,
1238 .wp_retry_size = smb2_wp_retry_size,
1180}; 1239};
1181 1240
1182struct smb_version_operations smb21_operations = { 1241struct smb_version_operations smb21_operations = {
@@ -1188,6 +1247,7 @@ struct smb_version_operations smb21_operations = {
1188 .set_credits = smb2_set_credits, 1247 .set_credits = smb2_set_credits,
1189 .get_credits_field = smb2_get_credits_field, 1248 .get_credits_field = smb2_get_credits_field,
1190 .get_credits = smb2_get_credits, 1249 .get_credits = smb2_get_credits,
1250 .wait_mtu_credits = smb2_wait_mtu_credits,
1191 .get_next_mid = smb2_get_next_mid, 1251 .get_next_mid = smb2_get_next_mid,
1192 .read_data_offset = smb2_read_data_offset, 1252 .read_data_offset = smb2_read_data_offset,
1193 .read_data_length = smb2_read_data_length, 1253 .read_data_length = smb2_read_data_length,
@@ -1252,6 +1312,7 @@ struct smb_version_operations smb21_operations = {
1252 .create_lease_buf = smb2_create_lease_buf, 1312 .create_lease_buf = smb2_create_lease_buf,
1253 .parse_lease_buf = smb2_parse_lease_buf, 1313 .parse_lease_buf = smb2_parse_lease_buf,
1254 .clone_range = smb2_clone_range, 1314 .clone_range = smb2_clone_range,
1315 .wp_retry_size = smb2_wp_retry_size,
1255}; 1316};
1256 1317
1257struct smb_version_operations smb30_operations = { 1318struct smb_version_operations smb30_operations = {
@@ -1263,6 +1324,7 @@ struct smb_version_operations smb30_operations = {
1263 .set_credits = smb2_set_credits, 1324 .set_credits = smb2_set_credits,
1264 .get_credits_field = smb2_get_credits_field, 1325 .get_credits_field = smb2_get_credits_field,
1265 .get_credits = smb2_get_credits, 1326 .get_credits = smb2_get_credits,
1327 .wait_mtu_credits = smb2_wait_mtu_credits,
1266 .get_next_mid = smb2_get_next_mid, 1328 .get_next_mid = smb2_get_next_mid,
1267 .read_data_offset = smb2_read_data_offset, 1329 .read_data_offset = smb2_read_data_offset,
1268 .read_data_length = smb2_read_data_length, 1330 .read_data_length = smb2_read_data_length,
@@ -1330,6 +1392,7 @@ struct smb_version_operations smb30_operations = {
1330 .parse_lease_buf = smb3_parse_lease_buf, 1392 .parse_lease_buf = smb3_parse_lease_buf,
1331 .clone_range = smb2_clone_range, 1393 .clone_range = smb2_clone_range,
1332 .validate_negotiate = smb3_validate_negotiate, 1394 .validate_negotiate = smb3_validate_negotiate,
1395 .wp_retry_size = smb2_wp_retry_size,
1333}; 1396};
1334 1397
1335struct smb_version_values smb20_values = { 1398struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index b0b260dbb19d..42ebc1a8be6c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -108,7 +108,6 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
108 if (!tcon) 108 if (!tcon)
109 goto out; 109 goto out;
110 110
111 /* BB FIXME when we do write > 64K add +1 for every 64K in req or rsp */
112 /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ 111 /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
113 /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ 112 /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
114 if ((tcon->ses) && 113 if ((tcon->ses) &&
@@ -245,10 +244,6 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
245 if (rc) 244 if (rc)
246 goto out; 245 goto out;
247 atomic_inc(&tconInfoReconnectCount); 246 atomic_inc(&tconInfoReconnectCount);
248 /*
249 * BB FIXME add code to check if wsize needs update due to negotiated
250 * smb buffer size shrinking.
251 */
252out: 247out:
253 /* 248 /*
254 * Check if handle based operation so we know whether we can continue 249 * Check if handle based operation so we know whether we can continue
@@ -309,16 +304,6 @@ small_smb2_init(__le16 smb2_command, struct cifs_tcon *tcon,
309 return rc; 304 return rc;
310} 305}
311 306
312static void
313free_rsp_buf(int resp_buftype, void *rsp)
314{
315 if (resp_buftype == CIFS_SMALL_BUFFER)
316 cifs_small_buf_release(rsp);
317 else if (resp_buftype == CIFS_LARGE_BUFFER)
318 cifs_buf_release(rsp);
319}
320
321
322/* 307/*
323 * 308 *
324 * SMB2 Worker functions follow: 309 * SMB2 Worker functions follow:
@@ -1738,12 +1723,18 @@ smb2_readv_callback(struct mid_q_entry *mid)
1738 rc); 1723 rc);
1739 } 1724 }
1740 /* FIXME: should this be counted toward the initiating task? */ 1725 /* FIXME: should this be counted toward the initiating task? */
1741 task_io_account_read(rdata->bytes); 1726 task_io_account_read(rdata->got_bytes);
1742 cifs_stats_bytes_read(tcon, rdata->bytes); 1727 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1743 break; 1728 break;
1744 case MID_REQUEST_SUBMITTED: 1729 case MID_REQUEST_SUBMITTED:
1745 case MID_RETRY_NEEDED: 1730 case MID_RETRY_NEEDED:
1746 rdata->result = -EAGAIN; 1731 rdata->result = -EAGAIN;
1732 if (server->sign && rdata->got_bytes)
1733 /* reset bytes number since we can not check a sign */
1734 rdata->got_bytes = 0;
1735 /* FIXME: should this be counted toward the initiating task? */
1736 task_io_account_read(rdata->got_bytes);
1737 cifs_stats_bytes_read(tcon, rdata->got_bytes);
1747 break; 1738 break;
1748 default: 1739 default:
1749 if (rdata->result != -ENODATA) 1740 if (rdata->result != -ENODATA)
@@ -1762,11 +1753,12 @@ smb2_readv_callback(struct mid_q_entry *mid)
1762int 1753int
1763smb2_async_readv(struct cifs_readdata *rdata) 1754smb2_async_readv(struct cifs_readdata *rdata)
1764{ 1755{
1765 int rc; 1756 int rc, flags = 0;
1766 struct smb2_hdr *buf; 1757 struct smb2_hdr *buf;
1767 struct cifs_io_parms io_parms; 1758 struct cifs_io_parms io_parms;
1768 struct smb_rqst rqst = { .rq_iov = &rdata->iov, 1759 struct smb_rqst rqst = { .rq_iov = &rdata->iov,
1769 .rq_nvec = 1 }; 1760 .rq_nvec = 1 };
1761 struct TCP_Server_Info *server;
1770 1762
1771 cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", 1763 cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n",
1772 __func__, rdata->offset, rdata->bytes); 1764 __func__, rdata->offset, rdata->bytes);
@@ -1777,18 +1769,41 @@ smb2_async_readv(struct cifs_readdata *rdata)
1777 io_parms.persistent_fid = rdata->cfile->fid.persistent_fid; 1769 io_parms.persistent_fid = rdata->cfile->fid.persistent_fid;
1778 io_parms.volatile_fid = rdata->cfile->fid.volatile_fid; 1770 io_parms.volatile_fid = rdata->cfile->fid.volatile_fid;
1779 io_parms.pid = rdata->pid; 1771 io_parms.pid = rdata->pid;
1772
1773 server = io_parms.tcon->ses->server;
1774
1780 rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0); 1775 rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0);
1781 if (rc) 1776 if (rc) {
1777 if (rc == -EAGAIN && rdata->credits) {
1778 /* credits was reset by reconnect */
1779 rdata->credits = 0;
1780 /* reduce in_flight value since we won't send the req */
1781 spin_lock(&server->req_lock);
1782 server->in_flight--;
1783 spin_unlock(&server->req_lock);
1784 }
1782 return rc; 1785 return rc;
1786 }
1783 1787
1784 buf = (struct smb2_hdr *)rdata->iov.iov_base; 1788 buf = (struct smb2_hdr *)rdata->iov.iov_base;
1785 /* 4 for rfc1002 length field */ 1789 /* 4 for rfc1002 length field */
1786 rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4; 1790 rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4;
1787 1791
1792 if (rdata->credits) {
1793 buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
1794 SMB2_MAX_BUFFER_SIZE));
1795 spin_lock(&server->req_lock);
1796 server->credits += rdata->credits -
1797 le16_to_cpu(buf->CreditCharge);
1798 spin_unlock(&server->req_lock);
1799 wake_up(&server->request_q);
1800 flags = CIFS_HAS_CREDITS;
1801 }
1802
1788 kref_get(&rdata->refcount); 1803 kref_get(&rdata->refcount);
1789 rc = cifs_call_async(io_parms.tcon->ses->server, &rqst, 1804 rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
1790 cifs_readv_receive, smb2_readv_callback, 1805 cifs_readv_receive, smb2_readv_callback,
1791 rdata, 0); 1806 rdata, flags);
1792 if (rc) { 1807 if (rc) {
1793 kref_put(&rdata->refcount, cifs_readdata_release); 1808 kref_put(&rdata->refcount, cifs_readdata_release);
1794 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE); 1809 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
@@ -1906,15 +1921,25 @@ int
1906smb2_async_writev(struct cifs_writedata *wdata, 1921smb2_async_writev(struct cifs_writedata *wdata,
1907 void (*release)(struct kref *kref)) 1922 void (*release)(struct kref *kref))
1908{ 1923{
1909 int rc = -EACCES; 1924 int rc = -EACCES, flags = 0;
1910 struct smb2_write_req *req = NULL; 1925 struct smb2_write_req *req = NULL;
1911 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 1926 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1927 struct TCP_Server_Info *server = tcon->ses->server;
1912 struct kvec iov; 1928 struct kvec iov;
1913 struct smb_rqst rqst; 1929 struct smb_rqst rqst;
1914 1930
1915 rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req); 1931 rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
1916 if (rc) 1932 if (rc) {
1933 if (rc == -EAGAIN && wdata->credits) {
1934 /* credits was reset by reconnect */
1935 wdata->credits = 0;
1936 /* reduce in_flight value since we won't send the req */
1937 spin_lock(&server->req_lock);
1938 server->in_flight--;
1939 spin_unlock(&server->req_lock);
1940 }
1917 goto async_writev_out; 1941 goto async_writev_out;
1942 }
1918 1943
1919 req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid); 1944 req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
1920 1945
@@ -1947,9 +1972,20 @@ smb2_async_writev(struct cifs_writedata *wdata,
1947 1972
1948 inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */); 1973 inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
1949 1974
1975 if (wdata->credits) {
1976 req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
1977 SMB2_MAX_BUFFER_SIZE));
1978 spin_lock(&server->req_lock);
1979 server->credits += wdata->credits -
1980 le16_to_cpu(req->hdr.CreditCharge);
1981 spin_unlock(&server->req_lock);
1982 wake_up(&server->request_q);
1983 flags = CIFS_HAS_CREDITS;
1984 }
1985
1950 kref_get(&wdata->refcount); 1986 kref_get(&wdata->refcount);
1951 rc = cifs_call_async(tcon->ses->server, &rqst, NULL, 1987 rc = cifs_call_async(server, &rqst, NULL, smb2_writev_callback, wdata,
1952 smb2_writev_callback, wdata, 0); 1988 flags);
1953 1989
1954 if (rc) { 1990 if (rc) {
1955 kref_put(&wdata->refcount, release); 1991 kref_put(&wdata->refcount, release);
@@ -2325,7 +2361,7 @@ SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
2325 2361
2326int 2362int
2327SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, 2363SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
2328 u64 volatile_fid, u32 pid, __le64 *eof) 2364 u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc)
2329{ 2365{
2330 struct smb2_file_eof_info info; 2366 struct smb2_file_eof_info info;
2331 void *data; 2367 void *data;
@@ -2336,8 +2372,12 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
2336 data = &info; 2372 data = &info;
2337 size = sizeof(struct smb2_file_eof_info); 2373 size = sizeof(struct smb2_file_eof_info);
2338 2374
2339 return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid, 2375 if (is_falloc)
2340 FILE_END_OF_FILE_INFORMATION, 1, &data, &size); 2376 return send_set_info(xid, tcon, persistent_fid, volatile_fid,
2377 pid, FILE_ALLOCATION_INFORMATION, 1, &data, &size);
2378 else
2379 return send_set_info(xid, tcon, persistent_fid, volatile_fid,
2380 pid, FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
2341} 2381}
2342 2382
2343int 2383int
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 0ce48db20a65..67e8ce8055de 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -139,7 +139,7 @@ extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
139 __le16 *target_file); 139 __le16 *target_file);
140extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, 140extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
141 u64 persistent_fid, u64 volatile_fid, u32 pid, 141 u64 persistent_fid, u64 volatile_fid, u32 pid,
142 __le64 *eof); 142 __le64 *eof, bool is_fallocate);
143extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, 143extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
144 u64 persistent_fid, u64 volatile_fid, 144 u64 persistent_fid, u64 volatile_fid,
145 FILE_BASIC_INFO *buf); 145 FILE_BASIC_INFO *buf);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 59c748ce872f..5111e7272db6 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -466,7 +466,12 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
466static inline void 466static inline void
467smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr) 467smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr)
468{ 468{
469 unsigned int i, num = le16_to_cpu(hdr->CreditCharge);
470
469 hdr->MessageId = get_next_mid64(server); 471 hdr->MessageId = get_next_mid64(server);
472 /* skip message numbers according to CreditCharge field */
473 for (i = 1; i < num; i++)
474 get_next_mid(server);
470} 475}
471 476
472static struct mid_q_entry * 477static struct mid_q_entry *
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 18cd5650a5fc..9d087f4e7d4e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -448,6 +448,15 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout,
448 return wait_for_free_credits(server, timeout, val); 448 return wait_for_free_credits(server, timeout, val);
449} 449}
450 450
451int
452cifs_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
453 unsigned int *num, unsigned int *credits)
454{
455 *num = size;
456 *credits = 0;
457 return 0;
458}
459
451static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, 460static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
452 struct mid_q_entry **ppmidQ) 461 struct mid_q_entry **ppmidQ)
453{ 462{
@@ -531,20 +540,23 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
531{ 540{
532 int rc, timeout, optype; 541 int rc, timeout, optype;
533 struct mid_q_entry *mid; 542 struct mid_q_entry *mid;
543 unsigned int credits = 0;
534 544
535 timeout = flags & CIFS_TIMEOUT_MASK; 545 timeout = flags & CIFS_TIMEOUT_MASK;
536 optype = flags & CIFS_OP_MASK; 546 optype = flags & CIFS_OP_MASK;
537 547
538 rc = wait_for_free_request(server, timeout, optype); 548 if ((flags & CIFS_HAS_CREDITS) == 0) {
539 if (rc) 549 rc = wait_for_free_request(server, timeout, optype);
540 return rc; 550 if (rc)
551 return rc;
552 credits = 1;
553 }
541 554
542 mutex_lock(&server->srv_mutex); 555 mutex_lock(&server->srv_mutex);
543 mid = server->ops->setup_async_request(server, rqst); 556 mid = server->ops->setup_async_request(server, rqst);
544 if (IS_ERR(mid)) { 557 if (IS_ERR(mid)) {
545 mutex_unlock(&server->srv_mutex); 558 mutex_unlock(&server->srv_mutex);
546 add_credits(server, 1, optype); 559 add_credits_and_wake_if(server, credits, optype);
547 wake_up(&server->request_q);
548 return PTR_ERR(mid); 560 return PTR_ERR(mid);
549 } 561 }
550 562
@@ -572,8 +584,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
572 return 0; 584 return 0;
573 585
574 cifs_delete_mid(mid); 586 cifs_delete_mid(mid);
575 add_credits(server, 1, optype); 587 add_credits_and_wake_if(server, credits, optype);
576 wake_up(&server->request_q);
577 return rc; 588 return rc;
578} 589}
579 590
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 1da168c61d35..278f8fdeb9ef 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -13,7 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <asm/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2849f41e72a2..1326d38960db 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -13,7 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <asm/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/string.h> 17#include <linux/string.h>
18 18
19#include <linux/coda.h> 19#include <linux/coda.h>
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index cd8a63238b11..9c3dedc000d1 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -19,8 +19,7 @@
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22 22#include <linux/uaccess.h>
23#include <asm/uaccess.h>
24 23
25#include <linux/coda.h> 24#include <linux/coda.h>
26#include <linux/coda_psdev.h> 25#include <linux/coda_psdev.h>
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 9e83b7790212..d244d743a232 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -18,7 +18,7 @@
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <asm/uaccess.h> 21#include <linux/uaccess.h>
22 22
23#include <linux/coda.h> 23#include <linux/coda.h>
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index fe3afb2de880..b945410bfcd5 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -21,9 +21,7 @@
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/pid_namespace.h> 23#include <linux/pid_namespace.h>
24 24#include <linux/uaccess.h>
25#include <asm/uaccess.h>
26
27#include <linux/fs.h> 25#include <linux/fs.h>
28#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
29 27
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 3f5de96bbb58..4326d172fc27 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -16,7 +16,7 @@
16#include <linux/string.h> 16#include <linux/string.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <asm/uaccess.h> 19#include <linux/uaccess.h>
20 20
21#include <linux/coda.h> 21#include <linux/coda.h>
22#include <linux/coda_psdev.h> 22#include <linux/coda_psdev.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 5c1e4242368b..822629126e89 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -40,7 +40,7 @@
40#include <linux/pid_namespace.h> 40#include <linux/pid_namespace.h>
41#include <asm/io.h> 41#include <asm/io.h>
42#include <asm/poll.h> 42#include <asm/poll.h>
43#include <asm/uaccess.h> 43#include <linux/uaccess.h>
44 44
45#include <linux/coda.h> 45#include <linux/coda.h>
46#include <linux/coda_psdev.h> 46#include <linux/coda_psdev.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 21fcf8dcb9cd..5bb6e27298a4 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -27,7 +27,7 @@
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/mutex.h> 29#include <linux/mutex.h>
30#include <asm/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/vmalloc.h> 31#include <linux/vmalloc.h>
32#include <linux/vfs.h> 32#include <linux/vfs.h>
33 33
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e82289047272..afec6450450f 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -59,7 +59,7 @@
59#include <linux/gfp.h> 59#include <linux/gfp.h>
60 60
61#include <net/bluetooth/bluetooth.h> 61#include <net/bluetooth/bluetooth.h>
62#include <net/bluetooth/hci.h> 62#include <net/bluetooth/hci_sock.h>
63#include <net/bluetooth/rfcomm.h> 63#include <net/bluetooth/rfcomm.h>
64 64
65#include <linux/capi.h> 65#include <linux/capi.h>
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index ddcfe590b8a8..355c522f3585 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -11,6 +11,8 @@
11 * The actual compression is based on zlib, see the other files. 11 * The actual compression is based on zlib, see the other files.
12 */ 12 */
13 13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
14#include <linux/module.h> 16#include <linux/module.h>
15#include <linux/fs.h> 17#include <linux/fs.h>
16#include <linux/pagemap.h> 18#include <linux/pagemap.h>
@@ -21,7 +23,7 @@
21#include <linux/vfs.h> 23#include <linux/vfs.h>
22#include <linux/mutex.h> 24#include <linux/mutex.h>
23#include <uapi/linux/cramfs_fs.h> 25#include <uapi/linux/cramfs_fs.h>
24#include <asm/uaccess.h> 26#include <linux/uaccess.h>
25 27
26#include "internal.h" 28#include "internal.h"
27 29
@@ -153,7 +155,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
153 155
154static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE]; 156static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
155static unsigned buffer_blocknr[READ_BUFFERS]; 157static unsigned buffer_blocknr[READ_BUFFERS];
156static struct super_block * buffer_dev[READ_BUFFERS]; 158static struct super_block *buffer_dev[READ_BUFFERS];
157static int next_buffer; 159static int next_buffer;
158 160
159/* 161/*
@@ -205,6 +207,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
205 207
206 for (i = 0; i < BLKS_PER_BUF; i++) { 208 for (i = 0; i < BLKS_PER_BUF; i++) {
207 struct page *page = pages[i]; 209 struct page *page = pages[i];
210
208 if (page) { 211 if (page) {
209 wait_on_page_locked(page); 212 wait_on_page_locked(page);
210 if (!PageUptodate(page)) { 213 if (!PageUptodate(page)) {
@@ -223,6 +226,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
223 data = read_buffers[buffer]; 226 data = read_buffers[buffer];
224 for (i = 0; i < BLKS_PER_BUF; i++) { 227 for (i = 0; i < BLKS_PER_BUF; i++) {
225 struct page *page = pages[i]; 228 struct page *page = pages[i];
229
226 if (page) { 230 if (page) {
227 memcpy(data, kmap(page), PAGE_CACHE_SIZE); 231 memcpy(data, kmap(page), PAGE_CACHE_SIZE);
228 kunmap(page); 232 kunmap(page);
@@ -237,6 +241,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
237static void cramfs_kill_sb(struct super_block *sb) 241static void cramfs_kill_sb(struct super_block *sb)
238{ 242{
239 struct cramfs_sb_info *sbi = CRAMFS_SB(sb); 243 struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
244
240 kill_block_super(sb); 245 kill_block_super(sb);
241 kfree(sbi); 246 kfree(sbi);
242} 247}
@@ -277,7 +282,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
277 /* check for wrong endianness */ 282 /* check for wrong endianness */
278 if (super.magic == CRAMFS_MAGIC_WEND) { 283 if (super.magic == CRAMFS_MAGIC_WEND) {
279 if (!silent) 284 if (!silent)
280 printk(KERN_ERR "cramfs: wrong endianness\n"); 285 pr_err("wrong endianness\n");
281 return -EINVAL; 286 return -EINVAL;
282 } 287 }
283 288
@@ -287,22 +292,22 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
287 mutex_unlock(&read_mutex); 292 mutex_unlock(&read_mutex);
288 if (super.magic != CRAMFS_MAGIC) { 293 if (super.magic != CRAMFS_MAGIC) {
289 if (super.magic == CRAMFS_MAGIC_WEND && !silent) 294 if (super.magic == CRAMFS_MAGIC_WEND && !silent)
290 printk(KERN_ERR "cramfs: wrong endianness\n"); 295 pr_err("wrong endianness\n");
291 else if (!silent) 296 else if (!silent)
292 printk(KERN_ERR "cramfs: wrong magic\n"); 297 pr_err("wrong magic\n");
293 return -EINVAL; 298 return -EINVAL;
294 } 299 }
295 } 300 }
296 301
297 /* get feature flags first */ 302 /* get feature flags first */
298 if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) { 303 if (super.flags & ~CRAMFS_SUPPORTED_FLAGS) {
299 printk(KERN_ERR "cramfs: unsupported filesystem features\n"); 304 pr_err("unsupported filesystem features\n");
300 return -EINVAL; 305 return -EINVAL;
301 } 306 }
302 307
303 /* Check that the root inode is in a sane state */ 308 /* Check that the root inode is in a sane state */
304 if (!S_ISDIR(super.root.mode)) { 309 if (!S_ISDIR(super.root.mode)) {
305 printk(KERN_ERR "cramfs: root is not a directory\n"); 310 pr_err("root is not a directory\n");
306 return -EINVAL; 311 return -EINVAL;
307 } 312 }
308 /* correct strange, hard-coded permissions of mkcramfs */ 313 /* correct strange, hard-coded permissions of mkcramfs */
@@ -310,23 +315,23 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
310 315
311 root_offset = super.root.offset << 2; 316 root_offset = super.root.offset << 2;
312 if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) { 317 if (super.flags & CRAMFS_FLAG_FSID_VERSION_2) {
313 sbi->size=super.size; 318 sbi->size = super.size;
314 sbi->blocks=super.fsid.blocks; 319 sbi->blocks = super.fsid.blocks;
315 sbi->files=super.fsid.files; 320 sbi->files = super.fsid.files;
316 } else { 321 } else {
317 sbi->size=1<<28; 322 sbi->size = 1<<28;
318 sbi->blocks=0; 323 sbi->blocks = 0;
319 sbi->files=0; 324 sbi->files = 0;
320 } 325 }
321 sbi->magic=super.magic; 326 sbi->magic = super.magic;
322 sbi->flags=super.flags; 327 sbi->flags = super.flags;
323 if (root_offset == 0) 328 if (root_offset == 0)
324 printk(KERN_INFO "cramfs: empty filesystem"); 329 pr_info("empty filesystem");
325 else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && 330 else if (!(super.flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
326 ((root_offset != sizeof(struct cramfs_super)) && 331 ((root_offset != sizeof(struct cramfs_super)) &&
327 (root_offset != 512 + sizeof(struct cramfs_super)))) 332 (root_offset != 512 + sizeof(struct cramfs_super))))
328 { 333 {
329 printk(KERN_ERR "cramfs: bad root offset %lu\n", root_offset); 334 pr_err("bad root offset %lu\n", root_offset);
330 return -EINVAL; 335 return -EINVAL;
331 } 336 }
332 337
@@ -425,7 +430,7 @@ static int cramfs_readdir(struct file *file, struct dir_context *ctx)
425/* 430/*
426 * Lookup and fill in the inode data.. 431 * Lookup and fill in the inode data..
427 */ 432 */
428static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 433static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
429{ 434{
430 unsigned int offset = 0; 435 unsigned int offset = 0;
431 struct inode *inode = NULL; 436 struct inode *inode = NULL;
@@ -483,7 +488,7 @@ out:
483 return NULL; 488 return NULL;
484} 489}
485 490
486static int cramfs_readpage(struct file *file, struct page * page) 491static int cramfs_readpage(struct file *file, struct page *page)
487{ 492{
488 struct inode *inode = page->mapping->host; 493 struct inode *inode = page->mapping->host;
489 u32 maxblock; 494 u32 maxblock;
@@ -511,7 +516,7 @@ static int cramfs_readpage(struct file *file, struct page * page)
511 if (compr_len == 0) 516 if (compr_len == 0)
512 ; /* hole */ 517 ; /* hole */
513 else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) { 518 else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
514 pr_err("cramfs: bad compressed blocksize %u\n", 519 pr_err("bad compressed blocksize %u\n",
515 compr_len); 520 compr_len);
516 goto err; 521 goto err;
517 } else { 522 } else {
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 1760c1b84d97..ec4f1d4fdad0 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -15,6 +15,8 @@
15 * then is used by multiple filesystems. 15 * then is used by multiple filesystems.
16 */ 16 */
17 17
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
18#include <linux/kernel.h> 20#include <linux/kernel.h>
19#include <linux/errno.h> 21#include <linux/errno.h>
20#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
@@ -37,7 +39,7 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
37 39
38 err = zlib_inflateReset(&stream); 40 err = zlib_inflateReset(&stream);
39 if (err != Z_OK) { 41 if (err != Z_OK) {
40 printk("zlib_inflateReset error %d\n", err); 42 pr_err("zlib_inflateReset error %d\n", err);
41 zlib_inflateEnd(&stream); 43 zlib_inflateEnd(&stream);
42 zlib_inflateInit(&stream); 44 zlib_inflateInit(&stream);
43 } 45 }
@@ -48,8 +50,8 @@ int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen)
48 return stream.total_out; 50 return stream.total_out;
49 51
50err: 52err:
51 printk("Error %d while decompressing!\n", err); 53 pr_err("Error %d while decompressing!\n", err);
52 printk("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen); 54 pr_err("%p(%d)->%p(%d)\n", src, srclen, dst, dstlen);
53 return -EIO; 55 return -EIO;
54} 56}
55 57
@@ -57,7 +59,7 @@ int cramfs_uncompress_init(void)
57{ 59{
58 if (!initialized++) { 60 if (!initialized++) {
59 stream.workspace = vmalloc(zlib_inflate_workspacesize()); 61 stream.workspace = vmalloc(zlib_inflate_workspacesize());
60 if ( !stream.workspace ) { 62 if (!stream.workspace) {
61 initialized = 0; 63 initialized = 0;
62 return -ENOMEM; 64 return -ENOMEM;
63 } 65 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 06f65857a855..d30ce699ae4b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -731,8 +731,6 @@ EXPORT_SYMBOL(dget_parent);
731/** 731/**
732 * d_find_alias - grab a hashed alias of inode 732 * d_find_alias - grab a hashed alias of inode
733 * @inode: inode in question 733 * @inode: inode in question
734 * @want_discon: flag, used by d_splice_alias, to request
735 * that only a DISCONNECTED alias be returned.
736 * 734 *
737 * If inode has a hashed alias, or is a directory and has any alias, 735 * If inode has a hashed alias, or is a directory and has any alias,
738 * acquire the reference to alias and return it. Otherwise return NULL. 736 * acquire the reference to alias and return it. Otherwise return NULL.
@@ -741,10 +739,9 @@ EXPORT_SYMBOL(dget_parent);
741 * of a filesystem. 739 * of a filesystem.
742 * 740 *
743 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer 741 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
744 * any other hashed alias over that one unless @want_discon is set, 742 * any other hashed alias over that one.
745 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
746 */ 743 */
747static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 744static struct dentry *__d_find_alias(struct inode *inode)
748{ 745{
749 struct dentry *alias, *discon_alias; 746 struct dentry *alias, *discon_alias;
750 747
@@ -756,7 +753,7 @@ again:
756 if (IS_ROOT(alias) && 753 if (IS_ROOT(alias) &&
757 (alias->d_flags & DCACHE_DISCONNECTED)) { 754 (alias->d_flags & DCACHE_DISCONNECTED)) {
758 discon_alias = alias; 755 discon_alias = alias;
759 } else if (!want_discon) { 756 } else {
760 __dget_dlock(alias); 757 __dget_dlock(alias);
761 spin_unlock(&alias->d_lock); 758 spin_unlock(&alias->d_lock);
762 return alias; 759 return alias;
@@ -768,12 +765,9 @@ again:
768 alias = discon_alias; 765 alias = discon_alias;
769 spin_lock(&alias->d_lock); 766 spin_lock(&alias->d_lock);
770 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 767 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
771 if (IS_ROOT(alias) && 768 __dget_dlock(alias);
772 (alias->d_flags & DCACHE_DISCONNECTED)) { 769 spin_unlock(&alias->d_lock);
773 __dget_dlock(alias); 770 return alias;
774 spin_unlock(&alias->d_lock);
775 return alias;
776 }
777 } 771 }
778 spin_unlock(&alias->d_lock); 772 spin_unlock(&alias->d_lock);
779 goto again; 773 goto again;
@@ -787,7 +781,7 @@ struct dentry *d_find_alias(struct inode *inode)
787 781
788 if (!hlist_empty(&inode->i_dentry)) { 782 if (!hlist_empty(&inode->i_dentry)) {
789 spin_lock(&inode->i_lock); 783 spin_lock(&inode->i_lock);
790 de = __d_find_alias(inode, 0); 784 de = __d_find_alias(inode);
791 spin_unlock(&inode->i_lock); 785 spin_unlock(&inode->i_lock);
792 } 786 }
793 return de; 787 return de;
@@ -1781,25 +1775,7 @@ struct dentry *d_find_any_alias(struct inode *inode)
1781} 1775}
1782EXPORT_SYMBOL(d_find_any_alias); 1776EXPORT_SYMBOL(d_find_any_alias);
1783 1777
1784/** 1778static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1785 * d_obtain_alias - find or allocate a dentry for a given inode
1786 * @inode: inode to allocate the dentry for
1787 *
1788 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1789 * similar open by handle operations. The returned dentry may be anonymous,
1790 * or may have a full name (if the inode was already in the cache).
1791 *
1792 * When called on a directory inode, we must ensure that the inode only ever
1793 * has one dentry. If a dentry is found, that is returned instead of
1794 * allocating a new one.
1795 *
1796 * On successful return, the reference to the inode has been transferred
1797 * to the dentry. In case of an error the reference on the inode is released.
1798 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1799 * be passed in and will be the error will be propagate to the return value,
1800 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1801 */
1802struct dentry *d_obtain_alias(struct inode *inode)
1803{ 1779{
1804 static const struct qstr anonstring = QSTR_INIT("/", 1); 1780 static const struct qstr anonstring = QSTR_INIT("/", 1);
1805 struct dentry *tmp; 1781 struct dentry *tmp;
@@ -1830,7 +1806,10 @@ struct dentry *d_obtain_alias(struct inode *inode)
1830 } 1806 }
1831 1807
1832 /* attach a disconnected dentry */ 1808 /* attach a disconnected dentry */
1833 add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; 1809 add_flags = d_flags_for_inode(inode);
1810
1811 if (disconnected)
1812 add_flags |= DCACHE_DISCONNECTED;
1834 1813
1835 spin_lock(&tmp->d_lock); 1814 spin_lock(&tmp->d_lock);
1836 tmp->d_inode = inode; 1815 tmp->d_inode = inode;
@@ -1851,59 +1830,51 @@ struct dentry *d_obtain_alias(struct inode *inode)
1851 iput(inode); 1830 iput(inode);
1852 return res; 1831 return res;
1853} 1832}
1854EXPORT_SYMBOL(d_obtain_alias);
1855 1833
1856/** 1834/**
1857 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1835 * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
1858 * @inode: the inode which may have a disconnected dentry 1836 * @inode: inode to allocate the dentry for
1859 * @dentry: a negative dentry which we want to point to the inode.
1860 *
1861 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1862 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1863 * and return it, else simply d_add the inode to the dentry and return NULL.
1864 * 1837 *
1865 * This is needed in the lookup routine of any filesystem that is exportable 1838 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1866 * (via knfsd) so that we can build dcache paths to directories effectively. 1839 * similar open by handle operations. The returned dentry may be anonymous,
1840 * or may have a full name (if the inode was already in the cache).
1867 * 1841 *
1868 * If a dentry was found and moved, then it is returned. Otherwise NULL 1842 * When called on a directory inode, we must ensure that the inode only ever
1869 * is returned. This matches the expected return value of ->lookup. 1843 * has one dentry. If a dentry is found, that is returned instead of
1844 * allocating a new one.
1870 * 1845 *
1871 * Cluster filesystems may call this function with a negative, hashed dentry. 1846 * On successful return, the reference to the inode has been transferred
1872 * In that case, we know that the inode will be a regular file, and also this 1847 * to the dentry. In case of an error the reference on the inode is released.
1873 * will only occur during atomic_open. So we need to check for the dentry 1848 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1874 * being already hashed only in the final case. 1849 * be passed in and the error will be propagated to the return value,
1850 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1875 */ 1851 */
1876struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) 1852struct dentry *d_obtain_alias(struct inode *inode)
1877{ 1853{
1878 struct dentry *new = NULL; 1854 return __d_obtain_alias(inode, 1);
1879 1855}
1880 if (IS_ERR(inode)) 1856EXPORT_SYMBOL(d_obtain_alias);
1881 return ERR_CAST(inode);
1882 1857
1883 if (inode && S_ISDIR(inode->i_mode)) { 1858/**
1884 spin_lock(&inode->i_lock); 1859 * d_obtain_root - find or allocate a dentry for a given inode
1885 new = __d_find_alias(inode, 1); 1860 * @inode: inode to allocate the dentry for
1886 if (new) { 1861 *
1887 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1862 * Obtain an IS_ROOT dentry for the root of a filesystem.
1888 spin_unlock(&inode->i_lock); 1863 *
1889 security_d_instantiate(new, inode); 1864 * We must ensure that directory inodes only ever have one dentry. If a
1890 d_move(new, dentry); 1865 * dentry is found, that is returned instead of allocating a new one.
1891 iput(inode); 1866 *
1892 } else { 1867 * On successful return, the reference to the inode has been transferred
1893 /* already taking inode->i_lock, so d_add() by hand */ 1868 * to the dentry. In case of an error the reference on the inode is
1894 __d_instantiate(dentry, inode); 1869 * released. A %NULL or IS_ERR inode may be passed in and will be the
1895 spin_unlock(&inode->i_lock); 1870 * error will be propagate to the return value, with a %NULL @inode
1896 security_d_instantiate(dentry, inode); 1871 * replaced by ERR_PTR(-ESTALE).
1897 d_rehash(dentry); 1872 */
1898 } 1873struct dentry *d_obtain_root(struct inode *inode)
1899 } else { 1874{
1900 d_instantiate(dentry, inode); 1875 return __d_obtain_alias(inode, 0);
1901 if (d_unhashed(dentry))
1902 d_rehash(dentry);
1903 }
1904 return new;
1905} 1876}
1906EXPORT_SYMBOL(d_splice_alias); 1877EXPORT_SYMBOL(d_obtain_root);
1907 1878
1908/** 1879/**
1909 * d_add_ci - lookup or allocate new dentry with case-exact name 1880 * d_add_ci - lookup or allocate new dentry with case-exact name
@@ -2697,6 +2668,75 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2697} 2668}
2698 2669
2699/** 2670/**
2671 * d_splice_alias - splice a disconnected dentry into the tree if one exists
2672 * @inode: the inode which may have a disconnected dentry
2673 * @dentry: a negative dentry which we want to point to the inode.
2674 *
2675 * If inode is a directory and has an IS_ROOT alias, then d_move that in
2676 * place of the given dentry and return it, else simply d_add the inode
2677 * to the dentry and return NULL.
2678 *
2679 * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
2680 * we should error out: directories can't have multiple aliases.
2681 *
2682 * This is needed in the lookup routine of any filesystem that is exportable
2683 * (via knfsd) so that we can build dcache paths to directories effectively.
2684 *
2685 * If a dentry was found and moved, then it is returned. Otherwise NULL
2686 * is returned. This matches the expected return value of ->lookup.
2687 *
2688 * Cluster filesystems may call this function with a negative, hashed dentry.
2689 * In that case, we know that the inode will be a regular file, and also this
2690 * will only occur during atomic_open. So we need to check for the dentry
2691 * being already hashed only in the final case.
2692 */
2693struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2694{
2695 struct dentry *new = NULL;
2696
2697 if (IS_ERR(inode))
2698 return ERR_CAST(inode);
2699
2700 if (inode && S_ISDIR(inode->i_mode)) {
2701 spin_lock(&inode->i_lock);
2702 new = __d_find_any_alias(inode);
2703 if (new) {
2704 if (!IS_ROOT(new)) {
2705 spin_unlock(&inode->i_lock);
2706 dput(new);
2707 return ERR_PTR(-EIO);
2708 }
2709 if (d_ancestor(new, dentry)) {
2710 spin_unlock(&inode->i_lock);
2711 dput(new);
2712 return ERR_PTR(-EIO);
2713 }
2714 write_seqlock(&rename_lock);
2715 __d_materialise_dentry(dentry, new);
2716 write_sequnlock(&rename_lock);
2717 __d_drop(new);
2718 _d_rehash(new);
2719 spin_unlock(&new->d_lock);
2720 spin_unlock(&inode->i_lock);
2721 security_d_instantiate(new, inode);
2722 iput(inode);
2723 } else {
2724 /* already taking inode->i_lock, so d_add() by hand */
2725 __d_instantiate(dentry, inode);
2726 spin_unlock(&inode->i_lock);
2727 security_d_instantiate(dentry, inode);
2728 d_rehash(dentry);
2729 }
2730 } else {
2731 d_instantiate(dentry, inode);
2732 if (d_unhashed(dentry))
2733 d_rehash(dentry);
2734 }
2735 return new;
2736}
2737EXPORT_SYMBOL(d_splice_alias);
2738
2739/**
2700 * d_materialise_unique - introduce an inode into the tree 2740 * d_materialise_unique - introduce an inode into the tree
2701 * @dentry: candidate dentry 2741 * @dentry: candidate dentry
2702 * @inode: inode to bind to the dentry, to which aliases may be attached 2742 * @inode: inode to bind to the dentry, to which aliases may be attached
@@ -2724,7 +2764,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2724 struct dentry *alias; 2764 struct dentry *alias;
2725 2765
2726 /* Does an aliased dentry already exist? */ 2766 /* Does an aliased dentry already exist? */
2727 alias = __d_find_alias(inode, 0); 2767 alias = __d_find_alias(inode);
2728 if (alias) { 2768 if (alias) {
2729 actual = alias; 2769 actual = alias;
2730 write_seqlock(&rename_lock); 2770 write_seqlock(&rename_lock);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 63146295153b..76c08c2beb2f 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -451,7 +451,7 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf,
451{ 451{
452 char buf[3]; 452 char buf[3];
453 u32 *val = file->private_data; 453 u32 *val = file->private_data;
454 454
455 if (*val) 455 if (*val)
456 buf[0] = 'Y'; 456 buf[0] = 'Y';
457 else 457 else
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 8c41b52da358..1e3b99d3db0d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -66,7 +66,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
66 break; 66 break;
67 } 67 }
68 } 68 }
69 return inode; 69 return inode;
70} 70}
71 71
72/* SMP-safe */ 72/* SMP-safe */
@@ -317,7 +317,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
317 goto exit; 317 goto exit;
318 318
319 /* If the parent is not specified, we create it in the root. 319 /* If the parent is not specified, we create it in the root.
320 * We need the root dentry to do this, which is in the super 320 * We need the root dentry to do this, which is in the super
321 * block. A pointer to that is in the struct vfsmount that we 321 * block. A pointer to that is in the struct vfsmount that we
322 * have around. 322 * have around.
323 */ 323 */
@@ -330,7 +330,7 @@ static struct dentry *__create_file(const char *name, umode_t mode,
330 switch (mode & S_IFMT) { 330 switch (mode & S_IFMT) {
331 case S_IFDIR: 331 case S_IFDIR:
332 error = debugfs_mkdir(parent->d_inode, dentry, mode); 332 error = debugfs_mkdir(parent->d_inode, dentry, mode);
333 333
334 break; 334 break;
335 case S_IFLNK: 335 case S_IFLNK:
336 error = debugfs_link(parent->d_inode, dentry, mode, 336 error = debugfs_link(parent->d_inode, dentry, mode,
@@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
534 */ 534 */
535void debugfs_remove_recursive(struct dentry *dentry) 535void debugfs_remove_recursive(struct dentry *dentry)
536{ 536{
537 struct dentry *child, *next, *parent; 537 struct dentry *child, *parent;
538 538
539 if (IS_ERR_OR_NULL(dentry)) 539 if (IS_ERR_OR_NULL(dentry))
540 return; 540 return;
@@ -546,30 +546,49 @@ void debugfs_remove_recursive(struct dentry *dentry)
546 parent = dentry; 546 parent = dentry;
547 down: 547 down:
548 mutex_lock(&parent->d_inode->i_mutex); 548 mutex_lock(&parent->d_inode->i_mutex);
549 list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { 549 loop:
550 /*
551 * The parent->d_subdirs is protected by the d_lock. Outside that
552 * lock, the child can be unlinked and set to be freed which can
553 * use the d_u.d_child as the rcu head and corrupt this list.
554 */
555 spin_lock(&parent->d_lock);
556 list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
550 if (!debugfs_positive(child)) 557 if (!debugfs_positive(child))
551 continue; 558 continue;
552 559
553 /* perhaps simple_empty(child) makes more sense */ 560 /* perhaps simple_empty(child) makes more sense */
554 if (!list_empty(&child->d_subdirs)) { 561 if (!list_empty(&child->d_subdirs)) {
562 spin_unlock(&parent->d_lock);
555 mutex_unlock(&parent->d_inode->i_mutex); 563 mutex_unlock(&parent->d_inode->i_mutex);
556 parent = child; 564 parent = child;
557 goto down; 565 goto down;
558 } 566 }
559 up: 567
568 spin_unlock(&parent->d_lock);
569
560 if (!__debugfs_remove(child, parent)) 570 if (!__debugfs_remove(child, parent))
561 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 571 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
572
573 /*
574 * The parent->d_lock protects agaist child from unlinking
575 * from d_subdirs. When releasing the parent->d_lock we can
576 * no longer trust that the next pointer is valid.
577 * Restart the loop. We'll skip this one with the
578 * debugfs_positive() check.
579 */
580 goto loop;
562 } 581 }
582 spin_unlock(&parent->d_lock);
563 583
564 mutex_unlock(&parent->d_inode->i_mutex); 584 mutex_unlock(&parent->d_inode->i_mutex);
565 child = parent; 585 child = parent;
566 parent = parent->d_parent; 586 parent = parent->d_parent;
567 mutex_lock(&parent->d_inode->i_mutex); 587 mutex_lock(&parent->d_inode->i_mutex);
568 588
569 if (child != dentry) { 589 if (child != dentry)
570 next = list_next_entry(child, d_u.d_child); 590 /* go up */
571 goto up; 591 goto loop;
572 }
573 592
574 if (!__debugfs_remove(child, parent)) 593 if (!__debugfs_remove(child, parent))
575 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 594 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 17e39b047de5..c3116404ab49 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -158,7 +158,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
158{ 158{
159 ssize_t ret; 159 ssize_t ret;
160 160
161 ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, 161 ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES,
162 &sdio->from); 162 &sdio->from);
163 163
164 if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { 164 if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 8d77ba7b1756..1323c568e362 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -718,16 +718,11 @@ static const struct file_operations waiters_fops = {
718 718
719void dlm_delete_debug_file(struct dlm_ls *ls) 719void dlm_delete_debug_file(struct dlm_ls *ls)
720{ 720{
721 if (ls->ls_debug_rsb_dentry) 721 debugfs_remove(ls->ls_debug_rsb_dentry);
722 debugfs_remove(ls->ls_debug_rsb_dentry); 722 debugfs_remove(ls->ls_debug_waiters_dentry);
723 if (ls->ls_debug_waiters_dentry) 723 debugfs_remove(ls->ls_debug_locks_dentry);
724 debugfs_remove(ls->ls_debug_waiters_dentry); 724 debugfs_remove(ls->ls_debug_all_dentry);
725 if (ls->ls_debug_locks_dentry) 725 debugfs_remove(ls->ls_debug_toss_dentry);
726 debugfs_remove(ls->ls_debug_locks_dentry);
727 if (ls->ls_debug_all_dentry)
728 debugfs_remove(ls->ls_debug_all_dentry);
729 if (ls->ls_debug_toss_dentry)
730 debugfs_remove(ls->ls_debug_toss_dentry);
731} 726}
732 727
733int dlm_create_debug_file(struct dlm_ls *ls) 728int dlm_create_debug_file(struct dlm_ls *ls)
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 356c044e2cd3..bbee8f063dfa 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -12,7 +12,8 @@
12#include "efs.h" 12#include "efs.h"
13 13
14 14
15static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) { 15static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
16{
16 struct buffer_head *bh; 17 struct buffer_head *bh;
17 18
18 int slot, namelen; 19 int slot, namelen;
@@ -40,10 +41,10 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
40 if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) { 41 if (be16_to_cpu(dirblock->magic) != EFS_DIRBLK_MAGIC) {
41 pr_err("%s(): invalid directory block\n", __func__); 42 pr_err("%s(): invalid directory block\n", __func__);
42 brelse(bh); 43 brelse(bh);
43 return(0); 44 return 0;
44 } 45 }
45 46
46 for(slot = 0; slot < dirblock->slots; slot++) { 47 for (slot = 0; slot < dirblock->slots; slot++) {
47 dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot)); 48 dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot));
48 49
49 namelen = dirslot->namelen; 50 namelen = dirslot->namelen;
@@ -52,12 +53,12 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
52 if ((namelen == len) && (!memcmp(name, nameptr, len))) { 53 if ((namelen == len) && (!memcmp(name, nameptr, len))) {
53 inodenum = be32_to_cpu(dirslot->inode); 54 inodenum = be32_to_cpu(dirslot->inode);
54 brelse(bh); 55 brelse(bh);
55 return(inodenum); 56 return inodenum;
56 } 57 }
57 } 58 }
58 brelse(bh); 59 brelse(bh);
59 } 60 }
60 return(0); 61 return 0;
61} 62}
62 63
63struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 64struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..a2b42a98c743 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -368,10 +368,6 @@ static int bprm_mm_init(struct linux_binprm *bprm)
368 if (!mm) 368 if (!mm)
369 goto err; 369 goto err;
370 370
371 err = init_new_context(current, mm);
372 if (err)
373 goto err;
374
375 err = __bprm_mm_init(bprm); 371 err = __bprm_mm_init(bprm);
376 if (err) 372 if (err)
377 goto err; 373 goto err;
@@ -1216,7 +1212,7 @@ EXPORT_SYMBOL(install_exec_creds);
1216/* 1212/*
1217 * determine how safe it is to execute the proposed program 1213 * determine how safe it is to execute the proposed program
1218 * - the caller must hold ->cred_guard_mutex to protect against 1214 * - the caller must hold ->cred_guard_mutex to protect against
1219 * PTRACE_ATTACH 1215 * PTRACE_ATTACH or seccomp thread-sync
1220 */ 1216 */
1221static void check_unsafe_exec(struct linux_binprm *bprm) 1217static void check_unsafe_exec(struct linux_binprm *bprm)
1222{ 1218{
@@ -1234,7 +1230,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
1234 * This isn't strictly necessary, but it makes it harder for LSMs to 1230 * This isn't strictly necessary, but it makes it harder for LSMs to
1235 * mess up. 1231 * mess up.
1236 */ 1232 */
1237 if (current->no_new_privs) 1233 if (task_no_new_privs(current))
1238 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; 1234 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1239 1235
1240 t = p; 1236 t = p;
@@ -1272,7 +1268,7 @@ int prepare_binprm(struct linux_binprm *bprm)
1272 bprm->cred->egid = current_egid(); 1268 bprm->cred->egid = current_egid();
1273 1269
1274 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && 1270 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1275 !current->no_new_privs && 1271 !task_no_new_privs(current) &&
1276 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && 1272 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1277 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { 1273 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1278 /* Set-uid? */ 1274 /* Set-uid? */
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 7f20f25c232c..84529b8a331b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -116,7 +116,7 @@ static int _sp2d_alloc(unsigned pages_in_unit, unsigned group_width,
116 num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa, 116 num_a1pa = min_t(unsigned, PAGE_SIZE / sizeof__a1pa,
117 pages_in_unit - i); 117 pages_in_unit - i);
118 118
119 __a1pa = kzalloc(num_a1pa * sizeof__a1pa, GFP_KERNEL); 119 __a1pa = kcalloc(num_a1pa, sizeof__a1pa, GFP_KERNEL);
120 if (unlikely(!__a1pa)) { 120 if (unlikely(!__a1pa)) {
121 ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n", 121 ORE_DBGMSG("!! Failed to _alloc_1p_arrays=%d\n",
122 num_a1pa); 122 num_a1pa);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3750031cfa2f..b88edc05c230 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,7 +161,7 @@ static struct kmem_cache * ext2_inode_cachep;
161static struct inode *ext2_alloc_inode(struct super_block *sb) 161static struct inode *ext2_alloc_inode(struct super_block *sb)
162{ 162{
163 struct ext2_inode_info *ei; 163 struct ext2_inode_info *ei;
164 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); 164 ei = kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
165 if (!ei) 165 if (!ei)
166 return NULL; 166 return NULL;
167 ei->i_block_alloc_info = NULL; 167 ei->i_block_alloc_info = NULL;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index fca382037ddd..581ef40fbe90 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -639,7 +639,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
639 if (!(*errp) && 639 if (!(*errp) &&
640 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { 640 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
641 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 641 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
642 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
643 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 642 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
644 dquot_alloc_block_nofail(inode, 643 dquot_alloc_block_nofail(inode,
645 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); 644 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ef1bed66c14f..0bb3f9ea0832 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -571,6 +571,31 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
571 return 0; 571 return 0;
572} 572}
573 573
574int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
575 int buf_size)
576{
577 struct ext4_dir_entry_2 *de;
578 int nlen, rlen;
579 unsigned int offset = 0;
580 char *top;
581
582 de = (struct ext4_dir_entry_2 *)buf;
583 top = buf + buf_size;
584 while ((char *) de < top) {
585 if (ext4_check_dir_entry(dir, NULL, de, bh,
586 buf, buf_size, offset))
587 return -EIO;
588 nlen = EXT4_DIR_REC_LEN(de->name_len);
589 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
590 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
591 offset += rlen;
592 }
593 if ((char *) de > top)
594 return -EIO;
595
596 return 0;
597}
598
574const struct file_operations ext4_dir_operations = { 599const struct file_operations ext4_dir_operations = {
575 .llseek = ext4_dir_llseek, 600 .llseek = ext4_dir_llseek,
576 .read = generic_read_dir, 601 .read = generic_read_dir,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 7cc5a0e23688..5b19760b1de5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -591,7 +591,6 @@ enum {
591#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 591#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
592#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 592#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
593#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 593#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
594#define EXT4_FREE_BLOCKS_RESERVE 0x0040
595 594
596/* 595/*
597 * ioctl commands 596 * ioctl commands
@@ -2029,6 +2028,8 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype)
2029 2028
2030 return ext4_filetype_table[filetype]; 2029 return ext4_filetype_table[filetype];
2031} 2030}
2031extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
2032 void *buf, int buf_size);
2032 2033
2033/* fsync.c */ 2034/* fsync.c */
2034extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 2035extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
@@ -2144,8 +2145,8 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2144extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2145extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2145extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2146extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2146extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2147extern void ext4_ind_truncate(handle_t *, struct inode *inode);
2147extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, 2148extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
2148 ext4_lblk_t first, ext4_lblk_t stop); 2149 ext4_lblk_t start, ext4_lblk_t end);
2149 2150
2150/* ioctl.c */ 2151/* ioctl.c */
2151extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 2152extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2560,7 +2561,6 @@ extern const struct file_operations ext4_file_operations;
2560extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2561extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2561 2562
2562/* inline.c */ 2563/* inline.c */
2563extern int ext4_has_inline_data(struct inode *inode);
2564extern int ext4_get_max_inline_size(struct inode *inode); 2564extern int ext4_get_max_inline_size(struct inode *inode);
2565extern int ext4_find_inline_data_nolock(struct inode *inode); 2565extern int ext4_find_inline_data_nolock(struct inode *inode);
2566extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, 2566extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
@@ -2626,6 +2626,12 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
2626 2626
2627extern int ext4_convert_inline_data(struct inode *inode); 2627extern int ext4_convert_inline_data(struct inode *inode);
2628 2628
2629static inline int ext4_has_inline_data(struct inode *inode)
2630{
2631 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
2632 EXT4_I(inode)->i_inline_off;
2633}
2634
2629/* namei.c */ 2635/* namei.c */
2630extern const struct inode_operations ext4_dir_inode_operations; 2636extern const struct inode_operations ext4_dir_inode_operations;
2631extern const struct inode_operations ext4_special_inode_operations; 2637extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a0e6d0..76c2df382b7d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -161,6 +161,8 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
161 struct inode *inode, struct ext4_ext_path *path) 161 struct inode *inode, struct ext4_ext_path *path)
162{ 162{
163 int err; 163 int err;
164
165 WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
164 if (path->p_bh) { 166 if (path->p_bh) {
165 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); 167 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
166 /* path points to block */ 168 /* path points to block */
@@ -1808,8 +1810,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
1808 1810
1809 brelse(path[1].p_bh); 1811 brelse(path[1].p_bh);
1810 ext4_free_blocks(handle, inode, NULL, blk, 1, 1812 ext4_free_blocks(handle, inode, NULL, blk, 1,
1811 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET | 1813 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1812 EXT4_FREE_BLOCKS_RESERVE);
1813} 1814}
1814 1815
1815/* 1816/*
@@ -3253,7 +3254,7 @@ out:
3253 3254
3254fix_extent_len: 3255fix_extent_len:
3255 ex->ee_len = orig_ex.ee_len; 3256 ex->ee_len = orig_ex.ee_len;
3256 ext4_ext_dirty(handle, inode, path + depth); 3257 ext4_ext_dirty(handle, inode, path + path->p_depth);
3257 return err; 3258 return err;
3258} 3259}
3259 3260
@@ -5403,16 +5404,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5403 int ret; 5404 int ret;
5404 5405
5405 /* Collapse range works only on fs block size aligned offsets. */ 5406 /* Collapse range works only on fs block size aligned offsets. */
5406 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || 5407 if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
5407 len & (EXT4_BLOCK_SIZE(sb) - 1)) 5408 len & (EXT4_CLUSTER_SIZE(sb) - 1))
5408 return -EINVAL; 5409 return -EINVAL;
5409 5410
5410 if (!S_ISREG(inode->i_mode)) 5411 if (!S_ISREG(inode->i_mode))
5411 return -EINVAL; 5412 return -EINVAL;
5412 5413
5413 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
5414 return -EOPNOTSUPP;
5415
5416 trace_ext4_collapse_range(inode, offset, len); 5414 trace_ext4_collapse_range(inode, offset, len);
5417 5415
5418 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); 5416 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8695f70af1ef..aca7b24a4432 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,10 +200,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
200 200
201static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 201static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
202{ 202{
203 struct address_space *mapping = file->f_mapping;
204
205 if (!mapping->a_ops->readpage)
206 return -ENOEXEC;
207 file_accessed(file); 203 file_accessed(file);
208 vma->vm_ops = &ext4_file_vm_ops; 204 vma->vm_ops = &ext4_file_vm_ops;
209 return 0; 205 return 0;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index fd69da194826..e75f840000a0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1295,97 +1295,220 @@ do_indirects:
1295 } 1295 }
1296} 1296}
1297 1297
1298static int free_hole_blocks(handle_t *handle, struct inode *inode, 1298/**
1299 struct buffer_head *parent_bh, __le32 *i_data, 1299 * ext4_ind_remove_space - remove space from the range
1300 int level, ext4_lblk_t first, 1300 * @handle: JBD handle for this transaction
1301 ext4_lblk_t count, int max) 1301 * @inode: inode we are dealing with
1302 * @start: First block to remove
1303 * @end: One block after the last block to remove (exclusive)
1304 *
1305 * Free the blocks in the defined range (end is exclusive endpoint of
1306 * range). This is used by ext4_punch_hole().
1307 */
1308int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
1309 ext4_lblk_t start, ext4_lblk_t end)
1302{ 1310{
1303 struct buffer_head *bh = NULL; 1311 struct ext4_inode_info *ei = EXT4_I(inode);
1312 __le32 *i_data = ei->i_data;
1304 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1313 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1305 int ret = 0; 1314 ext4_lblk_t offsets[4], offsets2[4];
1306 int i, inc; 1315 Indirect chain[4], chain2[4];
1307 ext4_lblk_t offset; 1316 Indirect *partial, *partial2;
1308 __le32 blk; 1317 ext4_lblk_t max_block;
1309 1318 __le32 nr = 0, nr2 = 0;
1310 inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level); 1319 int n = 0, n2 = 0;
1311 for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) { 1320 unsigned blocksize = inode->i_sb->s_blocksize;
1312 if (offset >= count + first)
1313 break;
1314 if (*i_data == 0 || (offset + inc) <= first)
1315 continue;
1316 blk = *i_data;
1317 if (level > 0) {
1318 ext4_lblk_t first2;
1319 ext4_lblk_t count2;
1320 1321
1321 bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); 1322 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1322 if (!bh) { 1323 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1323 EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), 1324 if (end >= max_block)
1324 "Read failure"); 1325 end = max_block;
1325 return -EIO; 1326 if ((start >= end) || (start > max_block))
1326 } 1327 return 0;
1327 if (first > offset) { 1328
1328 first2 = first - offset; 1329 n = ext4_block_to_path(inode, start, offsets, NULL);
1329 count2 = count; 1330 n2 = ext4_block_to_path(inode, end, offsets2, NULL);
1331
1332 BUG_ON(n > n2);
1333
1334 if ((n == 1) && (n == n2)) {
1335 /* We're punching only within direct block range */
1336 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1337 i_data + offsets2[0]);
1338 return 0;
1339 } else if (n2 > n) {
1340 /*
1341 * Start and end are on a different levels so we're going to
1342 * free partial block at start, and partial block at end of
1343 * the range. If there are some levels in between then
1344 * do_indirects label will take care of that.
1345 */
1346
1347 if (n == 1) {
1348 /*
1349 * Start is at the direct block level, free
1350 * everything to the end of the level.
1351 */
1352 ext4_free_data(handle, inode, NULL, i_data + offsets[0],
1353 i_data + EXT4_NDIR_BLOCKS);
1354 goto end_range;
1355 }
1356
1357
1358 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1359 if (nr) {
1360 if (partial == chain) {
1361 /* Shared branch grows from the inode */
1362 ext4_free_branches(handle, inode, NULL,
1363 &nr, &nr+1, (chain+n-1) - partial);
1364 *partial->p = 0;
1330 } else { 1365 } else {
1331 first2 = 0; 1366 /* Shared branch grows from an indirect block */
1332 count2 = count - (offset - first); 1367 BUFFER_TRACE(partial->bh, "get_write_access");
1368 ext4_free_branches(handle, inode, partial->bh,
1369 partial->p,
1370 partial->p+1, (chain+n-1) - partial);
1333 } 1371 }
1334 ret = free_hole_blocks(handle, inode, bh, 1372 }
1335 (__le32 *)bh->b_data, level - 1, 1373
1336 first2, count2, 1374 /*
1337 inode->i_sb->s_blocksize >> 2); 1375 * Clear the ends of indirect blocks on the shared branch
1338 if (ret) { 1376 * at the start of the range
1339 brelse(bh); 1377 */
1340 goto err; 1378 while (partial > chain) {
1379 ext4_free_branches(handle, inode, partial->bh,
1380 partial->p + 1,
1381 (__le32 *)partial->bh->b_data+addr_per_block,
1382 (chain+n-1) - partial);
1383 BUFFER_TRACE(partial->bh, "call brelse");
1384 brelse(partial->bh);
1385 partial--;
1386 }
1387
1388end_range:
1389 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1390 if (nr2) {
1391 if (partial2 == chain2) {
1392 /*
1393 * Remember, end is exclusive so here we're at
1394 * the start of the next level we're not going
1395 * to free. Everything was covered by the start
1396 * of the range.
1397 */
1398 return 0;
1399 } else {
1400 /* Shared branch grows from an indirect block */
1401 partial2--;
1341 } 1402 }
1403 } else {
1404 /*
1405 * ext4_find_shared returns Indirect structure which
1406 * points to the last element which should not be
1407 * removed by truncate. But this is end of the range
1408 * in punch_hole so we need to point to the next element
1409 */
1410 partial2->p++;
1342 } 1411 }
1343 if (level == 0 || 1412
1344 (bh && all_zeroes((__le32 *)bh->b_data, 1413 /*
1345 (__le32 *)bh->b_data + addr_per_block))) { 1414 * Clear the ends of indirect blocks on the shared branch
1346 ext4_free_data(handle, inode, parent_bh, 1415 * at the end of the range
1347 i_data, i_data + 1); 1416 */
1417 while (partial2 > chain2) {
1418 ext4_free_branches(handle, inode, partial2->bh,
1419 (__le32 *)partial2->bh->b_data,
1420 partial2->p,
1421 (chain2+n2-1) - partial2);
1422 BUFFER_TRACE(partial2->bh, "call brelse");
1423 brelse(partial2->bh);
1424 partial2--;
1348 } 1425 }
1349 brelse(bh); 1426 goto do_indirects;
1350 bh = NULL;
1351 } 1427 }
1352 1428
1353err: 1429 /* Punch happened within the same level (n == n2) */
1354 return ret; 1430 partial = ext4_find_shared(inode, n, offsets, chain, &nr);
1355} 1431 partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
1356 1432 /*
1357int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, 1433 * ext4_find_shared returns Indirect structure which
1358 ext4_lblk_t first, ext4_lblk_t stop) 1434 * points to the last element which should not be
1359{ 1435 * removed by truncate. But this is end of the range
1360 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1436 * in punch_hole so we need to point to the next element
1361 int level, ret = 0; 1437 */
1362 int num = EXT4_NDIR_BLOCKS; 1438 partial2->p++;
1363 ext4_lblk_t count, max = EXT4_NDIR_BLOCKS; 1439 while ((partial > chain) || (partial2 > chain2)) {
1364 __le32 *i_data = EXT4_I(inode)->i_data; 1440 /* We're at the same block, so we're almost finished */
1365 1441 if ((partial->bh && partial2->bh) &&
1366 count = stop - first; 1442 (partial->bh->b_blocknr == partial2->bh->b_blocknr)) {
1367 for (level = 0; level < 4; level++, max *= addr_per_block) { 1443 if ((partial > chain) && (partial2 > chain2)) {
1368 if (first < max) { 1444 ext4_free_branches(handle, inode, partial->bh,
1369 ret = free_hole_blocks(handle, inode, NULL, i_data, 1445 partial->p + 1,
1370 level, first, count, num); 1446 partial2->p,
1371 if (ret) 1447 (chain+n-1) - partial);
1372 goto err; 1448 BUFFER_TRACE(partial->bh, "call brelse");
1373 if (count > max - first) 1449 brelse(partial->bh);
1374 count -= max - first; 1450 BUFFER_TRACE(partial2->bh, "call brelse");
1375 else 1451 brelse(partial2->bh);
1376 break; 1452 }
1377 first = 0; 1453 return 0;
1378 } else {
1379 first -= max;
1380 } 1454 }
1381 i_data += num; 1455 /*
1382 if (level == 0) { 1456 * Clear the ends of indirect blocks on the shared branch
1383 num = 1; 1457 * at the start of the range
1384 max = 1; 1458 */
1459 if (partial > chain) {
1460 ext4_free_branches(handle, inode, partial->bh,
1461 partial->p + 1,
1462 (__le32 *)partial->bh->b_data+addr_per_block,
1463 (chain+n-1) - partial);
1464 BUFFER_TRACE(partial->bh, "call brelse");
1465 brelse(partial->bh);
1466 partial--;
1467 }
1468 /*
1469 * Clear the ends of indirect blocks on the shared branch
1470 * at the end of the range
1471 */
1472 if (partial2 > chain2) {
1473 ext4_free_branches(handle, inode, partial2->bh,
1474 (__le32 *)partial2->bh->b_data,
1475 partial2->p,
1476 (chain2+n-1) - partial2);
1477 BUFFER_TRACE(partial2->bh, "call brelse");
1478 brelse(partial2->bh);
1479 partial2--;
1385 } 1480 }
1386 } 1481 }
1387 1482
1388err: 1483do_indirects:
1389 return ret; 1484 /* Kill the remaining (whole) subtrees */
1485 switch (offsets[0]) {
1486 default:
1487 if (++n >= n2)
1488 return 0;
1489 nr = i_data[EXT4_IND_BLOCK];
1490 if (nr) {
1491 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1492 i_data[EXT4_IND_BLOCK] = 0;
1493 }
1494 case EXT4_IND_BLOCK:
1495 if (++n >= n2)
1496 return 0;
1497 nr = i_data[EXT4_DIND_BLOCK];
1498 if (nr) {
1499 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1500 i_data[EXT4_DIND_BLOCK] = 0;
1501 }
1502 case EXT4_DIND_BLOCK:
1503 if (++n >= n2)
1504 return 0;
1505 nr = i_data[EXT4_TIND_BLOCK];
1506 if (nr) {
1507 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1508 i_data[EXT4_TIND_BLOCK] = 0;
1509 }
1510 case EXT4_TIND_BLOCK:
1511 ;
1512 }
1513 return 0;
1390} 1514}
1391
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 645205d8ada6..bea662bd0ca6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -120,12 +120,6 @@ int ext4_get_max_inline_size(struct inode *inode)
120 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 120 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
121} 121}
122 122
123int ext4_has_inline_data(struct inode *inode)
124{
125 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
126 EXT4_I(inode)->i_inline_off;
127}
128
129/* 123/*
130 * this function does not take xattr_sem, which is OK because it is 124 * this function does not take xattr_sem, which is OK because it is
131 * currently only used in a code path coming form ext4_iget, before 125 * currently only used in a code path coming form ext4_iget, before
@@ -1178,6 +1172,18 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
1178 if (error < 0) 1172 if (error < 0)
1179 goto out; 1173 goto out;
1180 1174
1175 /*
1176 * Make sure the inline directory entries pass checks before we try to
1177 * convert them, so that we avoid touching stuff that needs fsck.
1178 */
1179 if (S_ISDIR(inode->i_mode)) {
1180 error = ext4_check_all_de(inode, iloc->bh,
1181 buf + EXT4_INLINE_DOTDOT_SIZE,
1182 inline_size - EXT4_INLINE_DOTDOT_SIZE);
1183 if (error)
1184 goto out;
1185 }
1186
1181 error = ext4_destroy_inline_data_nolock(handle, inode); 1187 error = ext4_destroy_inline_data_nolock(handle, inode);
1182 if (error) 1188 if (error)
1183 goto out; 1189 goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8a064734e6eb..367a60c07cf0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -325,18 +325,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
325#endif 325#endif
326 326
327/* 327/*
328 * Calculate the number of metadata blocks need to reserve
329 * to allocate a block located at @lblock
330 */
331static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
332{
333 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
334 return ext4_ext_calc_metadata_amount(inode, lblock);
335
336 return ext4_ind_calc_metadata_amount(inode, lblock);
337}
338
339/*
340 * Called with i_data_sem down, which is important since we can call 328 * Called with i_data_sem down, which is important since we can call
341 * ext4_discard_preallocations() from here. 329 * ext4_discard_preallocations() from here.
342 */ 330 */
@@ -357,35 +345,10 @@ void ext4_da_update_reserve_space(struct inode *inode,
357 used = ei->i_reserved_data_blocks; 345 used = ei->i_reserved_data_blocks;
358 } 346 }
359 347
360 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
361 ext4_warning(inode->i_sb, "ino %lu, allocated %d "
362 "with only %d reserved metadata blocks "
363 "(releasing %d blocks with reserved %d data blocks)",
364 inode->i_ino, ei->i_allocated_meta_blocks,
365 ei->i_reserved_meta_blocks, used,
366 ei->i_reserved_data_blocks);
367 WARN_ON(1);
368 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
369 }
370
371 /* Update per-inode reservations */ 348 /* Update per-inode reservations */
372 ei->i_reserved_data_blocks -= used; 349 ei->i_reserved_data_blocks -= used;
373 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 350 percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
374 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
375 used + ei->i_allocated_meta_blocks);
376 ei->i_allocated_meta_blocks = 0;
377 351
378 if (ei->i_reserved_data_blocks == 0) {
379 /*
380 * We can release all of the reserved metadata blocks
381 * only when we have written all of the delayed
382 * allocation blocks.
383 */
384 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
385 ei->i_reserved_meta_blocks);
386 ei->i_reserved_meta_blocks = 0;
387 ei->i_da_metadata_calc_len = 0;
388 }
389 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 352 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
390 353
391 /* Update quota subsystem for data blocks */ 354 /* Update quota subsystem for data blocks */
@@ -1222,49 +1185,6 @@ static int ext4_journalled_write_end(struct file *file,
1222} 1185}
1223 1186
1224/* 1187/*
1225 * Reserve a metadata for a single block located at lblock
1226 */
1227static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1228{
1229 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1230 struct ext4_inode_info *ei = EXT4_I(inode);
1231 unsigned int md_needed;
1232 ext4_lblk_t save_last_lblock;
1233 int save_len;
1234
1235 /*
1236 * recalculate the amount of metadata blocks to reserve
1237 * in order to allocate nrblocks
1238 * worse case is one extent per block
1239 */
1240 spin_lock(&ei->i_block_reservation_lock);
1241 /*
1242 * ext4_calc_metadata_amount() has side effects, which we have
1243 * to be prepared undo if we fail to claim space.
1244 */
1245 save_len = ei->i_da_metadata_calc_len;
1246 save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1247 md_needed = EXT4_NUM_B2C(sbi,
1248 ext4_calc_metadata_amount(inode, lblock));
1249 trace_ext4_da_reserve_space(inode, md_needed);
1250
1251 /*
1252 * We do still charge estimated metadata to the sb though;
1253 * we cannot afford to run out of free blocks.
1254 */
1255 if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
1256 ei->i_da_metadata_calc_len = save_len;
1257 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1258 spin_unlock(&ei->i_block_reservation_lock);
1259 return -ENOSPC;
1260 }
1261 ei->i_reserved_meta_blocks += md_needed;
1262 spin_unlock(&ei->i_block_reservation_lock);
1263
1264 return 0; /* success */
1265}
1266
1267/*
1268 * Reserve a single cluster located at lblock 1188 * Reserve a single cluster located at lblock
1269 */ 1189 */
1270static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1190static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1273,8 +1193,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1273 struct ext4_inode_info *ei = EXT4_I(inode); 1193 struct ext4_inode_info *ei = EXT4_I(inode);
1274 unsigned int md_needed; 1194 unsigned int md_needed;
1275 int ret; 1195 int ret;
1276 ext4_lblk_t save_last_lblock;
1277 int save_len;
1278 1196
1279 /* 1197 /*
1280 * We will charge metadata quota at writeout time; this saves 1198 * We will charge metadata quota at writeout time; this saves
@@ -1295,25 +1213,15 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1295 * ext4_calc_metadata_amount() has side effects, which we have 1213 * ext4_calc_metadata_amount() has side effects, which we have
1296 * to be prepared undo if we fail to claim space. 1214 * to be prepared undo if we fail to claim space.
1297 */ 1215 */
1298 save_len = ei->i_da_metadata_calc_len; 1216 md_needed = 0;
1299 save_last_lblock = ei->i_da_metadata_calc_last_lblock; 1217 trace_ext4_da_reserve_space(inode, 0);
1300 md_needed = EXT4_NUM_B2C(sbi,
1301 ext4_calc_metadata_amount(inode, lblock));
1302 trace_ext4_da_reserve_space(inode, md_needed);
1303 1218
1304 /* 1219 if (ext4_claim_free_clusters(sbi, 1, 0)) {
1305 * We do still charge estimated metadata to the sb though;
1306 * we cannot afford to run out of free blocks.
1307 */
1308 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1309 ei->i_da_metadata_calc_len = save_len;
1310 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1311 spin_unlock(&ei->i_block_reservation_lock); 1220 spin_unlock(&ei->i_block_reservation_lock);
1312 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1221 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1313 return -ENOSPC; 1222 return -ENOSPC;
1314 } 1223 }
1315 ei->i_reserved_data_blocks++; 1224 ei->i_reserved_data_blocks++;
1316 ei->i_reserved_meta_blocks += md_needed;
1317 spin_unlock(&ei->i_block_reservation_lock); 1225 spin_unlock(&ei->i_block_reservation_lock);
1318 1226
1319 return 0; /* success */ 1227 return 0; /* success */
@@ -1346,20 +1254,6 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1346 } 1254 }
1347 ei->i_reserved_data_blocks -= to_free; 1255 ei->i_reserved_data_blocks -= to_free;
1348 1256
1349 if (ei->i_reserved_data_blocks == 0) {
1350 /*
1351 * We can release all of the reserved metadata blocks
1352 * only when we have written all of the delayed
1353 * allocation blocks.
1354 * Note that in case of bigalloc, i_reserved_meta_blocks,
1355 * i_reserved_data_blocks, etc. refer to number of clusters.
1356 */
1357 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1358 ei->i_reserved_meta_blocks);
1359 ei->i_reserved_meta_blocks = 0;
1360 ei->i_da_metadata_calc_len = 0;
1361 }
1362
1363 /* update fs dirty data blocks counter */ 1257 /* update fs dirty data blocks counter */
1364 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); 1258 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1365 1259
@@ -1500,10 +1394,6 @@ static void ext4_print_free_blocks(struct inode *inode)
1500 ext4_msg(sb, KERN_CRIT, "Block reservation details"); 1394 ext4_msg(sb, KERN_CRIT, "Block reservation details");
1501 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 1395 ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
1502 ei->i_reserved_data_blocks); 1396 ei->i_reserved_data_blocks);
1503 ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
1504 ei->i_reserved_meta_blocks);
1505 ext4_msg(sb, KERN_CRIT, "i_allocated_meta_blocks=%u",
1506 ei->i_allocated_meta_blocks);
1507 return; 1397 return;
1508} 1398}
1509 1399
@@ -1620,13 +1510,6 @@ add_delayed:
1620 retval = ret; 1510 retval = ret;
1621 goto out_unlock; 1511 goto out_unlock;
1622 } 1512 }
1623 } else {
1624 ret = ext4_da_reserve_metadata(inode, iblock);
1625 if (ret) {
1626 /* not enough space to reserve */
1627 retval = ret;
1628 goto out_unlock;
1629 }
1630 } 1513 }
1631 1514
1632 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 1515 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2843,8 +2726,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
2843{ 2726{
2844 trace_ext4_alloc_da_blocks(inode); 2727 trace_ext4_alloc_da_blocks(inode);
2845 2728
2846 if (!EXT4_I(inode)->i_reserved_data_blocks && 2729 if (!EXT4_I(inode)->i_reserved_data_blocks)
2847 !EXT4_I(inode)->i_reserved_meta_blocks)
2848 return 0; 2730 return 0;
2849 2731
2850 /* 2732 /*
@@ -3624,7 +3506,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3624 ret = ext4_ext_remove_space(inode, first_block, 3506 ret = ext4_ext_remove_space(inode, first_block,
3625 stop_block - 1); 3507 stop_block - 1);
3626 else 3508 else
3627 ret = ext4_free_hole_blocks(handle, inode, first_block, 3509 ret = ext4_ind_remove_space(handle, inode, first_block,
3628 stop_block); 3510 stop_block);
3629 3511
3630 up_write(&EXT4_I(inode)->i_data_sem); 3512 up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 2dcb936be90e..956027711faf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3075,8 +3075,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3075 (23 - bsbits)) << 23; 3075 (23 - bsbits)) << 23;
3076 size = 8 * 1024 * 1024; 3076 size = 8 * 1024 * 1024;
3077 } else { 3077 } else {
3078 start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; 3078 start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
3079 size = ac->ac_o_ex.fe_len << bsbits; 3079 size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
3080 ac->ac_o_ex.fe_len) << bsbits;
3080 } 3081 }
3081 size = size >> bsbits; 3082 size = size >> bsbits;
3082 start = start_off >> bsbits; 3083 start = start_off >> bsbits;
@@ -3216,8 +3217,27 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3216static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) 3217static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3217{ 3218{
3218 struct ext4_prealloc_space *pa = ac->ac_pa; 3219 struct ext4_prealloc_space *pa = ac->ac_pa;
3220 struct ext4_buddy e4b;
3221 int err;
3219 3222
3220 if (pa && pa->pa_type == MB_INODE_PA) 3223 if (pa == NULL) {
3224 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3225 if (err) {
3226 /*
3227 * This should never happen since we pin the
3228 * pages in the ext4_allocation_context so
3229 * ext4_mb_load_buddy() should never fail.
3230 */
3231 WARN(1, "mb_load_buddy failed (%d)", err);
3232 return;
3233 }
3234 ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3235 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3236 ac->ac_f_ex.fe_len);
3237 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3238 return;
3239 }
3240 if (pa->pa_type == MB_INODE_PA)
3221 pa->pa_free += ac->ac_b_ex.fe_len; 3241 pa->pa_free += ac->ac_b_ex.fe_len;
3222} 3242}
3223 3243
@@ -4627,7 +4647,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4627 struct buffer_head *gd_bh; 4647 struct buffer_head *gd_bh;
4628 ext4_group_t block_group; 4648 ext4_group_t block_group;
4629 struct ext4_sb_info *sbi; 4649 struct ext4_sb_info *sbi;
4630 struct ext4_inode_info *ei = EXT4_I(inode);
4631 struct ext4_buddy e4b; 4650 struct ext4_buddy e4b;
4632 unsigned int count_clusters; 4651 unsigned int count_clusters;
4633 int err = 0; 4652 int err = 0;
@@ -4838,19 +4857,7 @@ do_more:
4838 &sbi->s_flex_groups[flex_group].free_clusters); 4857 &sbi->s_flex_groups[flex_group].free_clusters);
4839 } 4858 }
4840 4859
4841 if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) { 4860 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4842 percpu_counter_add(&sbi->s_dirtyclusters_counter,
4843 count_clusters);
4844 spin_lock(&ei->i_block_reservation_lock);
4845 if (flags & EXT4_FREE_BLOCKS_METADATA)
4846 ei->i_reserved_meta_blocks += count_clusters;
4847 else
4848 ei->i_reserved_data_blocks += count_clusters;
4849 spin_unlock(&ei->i_block_reservation_lock);
4850 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4851 dquot_reclaim_block(inode,
4852 EXT4_C2B(sbi, count_clusters));
4853 } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4854 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4861 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4855 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters); 4862 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4856 4863
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index ec092437d3e0..d3567f27bae7 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -39,6 +39,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
39 newext.ee_block = cpu_to_le32(lb->first_block); 39 newext.ee_block = cpu_to_le32(lb->first_block);
40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); 40 newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
41 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 ext4_ext_store_pblock(&newext, lb->first_pblock);
42 /* Locking only for convinience since we are operating on temp inode */
43 down_write(&EXT4_I(inode)->i_data_sem);
42 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); 44 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
43 45
44 if (IS_ERR(path)) { 46 if (IS_ERR(path)) {
@@ -61,7 +63,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
61 */ 63 */
62 if (needed && ext4_handle_has_enough_credits(handle, 64 if (needed && ext4_handle_has_enough_credits(handle,
63 EXT4_RESERVE_TRANS_BLOCKS)) { 65 EXT4_RESERVE_TRANS_BLOCKS)) {
66 up_write((&EXT4_I(inode)->i_data_sem));
64 retval = ext4_journal_restart(handle, needed); 67 retval = ext4_journal_restart(handle, needed);
68 down_write((&EXT4_I(inode)->i_data_sem));
65 if (retval) 69 if (retval)
66 goto err_out; 70 goto err_out;
67 } else if (needed) { 71 } else if (needed) {
@@ -70,13 +74,16 @@ static int finish_range(handle_t *handle, struct inode *inode,
70 /* 74 /*
71 * IF not able to extend the journal restart the journal 75 * IF not able to extend the journal restart the journal
72 */ 76 */
77 up_write((&EXT4_I(inode)->i_data_sem));
73 retval = ext4_journal_restart(handle, needed); 78 retval = ext4_journal_restart(handle, needed);
79 down_write((&EXT4_I(inode)->i_data_sem));
74 if (retval) 80 if (retval)
75 goto err_out; 81 goto err_out;
76 } 82 }
77 } 83 }
78 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 84 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
79err_out: 85err_out:
86 up_write((&EXT4_I(inode)->i_data_sem));
80 if (path) { 87 if (path) {
81 ext4_ext_drop_refs(path); 88 ext4_ext_drop_refs(path);
82 kfree(path); 89 kfree(path);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2484c7ec6a72..671a74b14fd7 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1013,10 +1013,11 @@ data_copy:
1013 *err = -EBUSY; 1013 *err = -EBUSY;
1014 goto unlock_pages; 1014 goto unlock_pages;
1015 } 1015 }
1016 1016 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1017 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 1017 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
1018 orig_blk_offset, 1018 orig_blk_offset,
1019 block_len_in_page, err); 1019 block_len_in_page, err);
1020 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1020 if (*err) { 1021 if (*err) {
1021 if (replaced_count) { 1022 if (replaced_count) {
1022 block_len_in_page = replaced_count; 1023 block_len_in_page = replaced_count;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3520ab8a6639..b147a67baa0d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3455,7 +3455,6 @@ const struct inode_operations ext4_dir_inode_operations = {
3455 .rmdir = ext4_rmdir, 3455 .rmdir = ext4_rmdir,
3456 .mknod = ext4_mknod, 3456 .mknod = ext4_mknod,
3457 .tmpfile = ext4_tmpfile, 3457 .tmpfile = ext4_tmpfile,
3458 .rename = ext4_rename,
3459 .rename2 = ext4_rename2, 3458 .rename2 = ext4_rename2,
3460 .setattr = ext4_setattr, 3459 .setattr = ext4_setattr,
3461 .setxattr = generic_setxattr, 3460 .setxattr = generic_setxattr,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6df7bc611dbd..32b43ad154b9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2142,10 +2142,6 @@ static int ext4_check_descriptors(struct super_block *sb,
2142 } 2142 }
2143 if (NULL != first_not_zeroed) 2143 if (NULL != first_not_zeroed)
2144 *first_not_zeroed = grp; 2144 *first_not_zeroed = grp;
2145
2146 ext4_free_blocks_count_set(sbi->s_es,
2147 EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2148 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2149 return 1; 2145 return 1;
2150} 2146}
2151 2147
@@ -3883,13 +3879,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3883 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3879 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3884 goto failed_mount2; 3880 goto failed_mount2;
3885 } 3881 }
3886 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3887 if (!ext4_fill_flex_info(sb)) {
3888 ext4_msg(sb, KERN_ERR,
3889 "unable to initialize "
3890 "flex_bg meta info!");
3891 goto failed_mount2;
3892 }
3893 3882
3894 sbi->s_gdb_count = db_count; 3883 sbi->s_gdb_count = db_count;
3895 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3884 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
@@ -3902,23 +3891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3902 /* Register extent status tree shrinker */ 3891 /* Register extent status tree shrinker */
3903 ext4_es_register_shrinker(sbi); 3892 ext4_es_register_shrinker(sbi);
3904 3893
3905 err = percpu_counter_init(&sbi->s_freeclusters_counter, 3894 if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
3906 ext4_count_free_clusters(sb));
3907 if (!err) {
3908 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3909 ext4_count_free_inodes(sb));
3910 }
3911 if (!err) {
3912 err = percpu_counter_init(&sbi->s_dirs_counter,
3913 ext4_count_dirs(sb));
3914 }
3915 if (!err) {
3916 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3917 }
3918 if (!err) {
3919 err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
3920 }
3921 if (err) {
3922 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3895 ext4_msg(sb, KERN_ERR, "insufficient memory");
3923 goto failed_mount3; 3896 goto failed_mount3;
3924 } 3897 }
@@ -4022,18 +3995,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
4022 3995
4023 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; 3996 sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
4024 3997
4025 /*
4026 * The journal may have updated the bg summary counts, so we
4027 * need to update the global counters.
4028 */
4029 percpu_counter_set(&sbi->s_freeclusters_counter,
4030 ext4_count_free_clusters(sb));
4031 percpu_counter_set(&sbi->s_freeinodes_counter,
4032 ext4_count_free_inodes(sb));
4033 percpu_counter_set(&sbi->s_dirs_counter,
4034 ext4_count_dirs(sb));
4035 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
4036
4037no_journal: 3998no_journal:
4038 if (ext4_mballoc_ready) { 3999 if (ext4_mballoc_ready) {
4039 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); 4000 sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
@@ -4141,6 +4102,33 @@ no_journal:
4141 goto failed_mount5; 4102 goto failed_mount5;
4142 } 4103 }
4143 4104
4105 block = ext4_count_free_clusters(sb);
4106 ext4_free_blocks_count_set(sbi->s_es,
4107 EXT4_C2B(sbi, block));
4108 err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
4109 if (!err) {
4110 unsigned long freei = ext4_count_free_inodes(sb);
4111 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
4112 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
4113 }
4114 if (!err)
4115 err = percpu_counter_init(&sbi->s_dirs_counter,
4116 ext4_count_dirs(sb));
4117 if (!err)
4118 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
4119 if (err) {
4120 ext4_msg(sb, KERN_ERR, "insufficient memory");
4121 goto failed_mount6;
4122 }
4123
4124 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
4125 if (!ext4_fill_flex_info(sb)) {
4126 ext4_msg(sb, KERN_ERR,
4127 "unable to initialize "
4128 "flex_bg meta info!");
4129 goto failed_mount6;
4130 }
4131
4144 err = ext4_register_li_request(sb, first_not_zeroed); 4132 err = ext4_register_li_request(sb, first_not_zeroed);
4145 if (err) 4133 if (err)
4146 goto failed_mount6; 4134 goto failed_mount6;
@@ -4215,6 +4203,12 @@ failed_mount7:
4215 ext4_unregister_li_request(sb); 4203 ext4_unregister_li_request(sb);
4216failed_mount6: 4204failed_mount6:
4217 ext4_mb_release(sb); 4205 ext4_mb_release(sb);
4206 if (sbi->s_flex_groups)
4207 ext4_kvfree(sbi->s_flex_groups);
4208 percpu_counter_destroy(&sbi->s_freeclusters_counter);
4209 percpu_counter_destroy(&sbi->s_freeinodes_counter);
4210 percpu_counter_destroy(&sbi->s_dirs_counter);
4211 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4218failed_mount5: 4212failed_mount5:
4219 ext4_ext_release(sb); 4213 ext4_ext_release(sb);
4220 ext4_release_system_zone(sb); 4214 ext4_release_system_zone(sb);
@@ -4233,12 +4227,6 @@ failed_mount_wq:
4233failed_mount3: 4227failed_mount3:
4234 ext4_es_unregister_shrinker(sbi); 4228 ext4_es_unregister_shrinker(sbi);
4235 del_timer_sync(&sbi->s_err_report); 4229 del_timer_sync(&sbi->s_err_report);
4236 if (sbi->s_flex_groups)
4237 ext4_kvfree(sbi->s_flex_groups);
4238 percpu_counter_destroy(&sbi->s_freeclusters_counter);
4239 percpu_counter_destroy(&sbi->s_freeinodes_counter);
4240 percpu_counter_destroy(&sbi->s_dirs_counter);
4241 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
4242 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4230 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4243 if (sbi->s_mmp_tsk) 4231 if (sbi->s_mmp_tsk)
4244 kthread_stop(sbi->s_mmp_tsk); 4232 kthread_stop(sbi->s_mmp_tsk);
@@ -4556,11 +4544,13 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4556 else 4544 else
4557 es->s_kbytes_written = 4545 es->s_kbytes_written =
4558 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4546 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4559 ext4_free_blocks_count_set(es, 4547 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
4548 ext4_free_blocks_count_set(es,
4560 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( 4549 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4561 &EXT4_SB(sb)->s_freeclusters_counter))); 4550 &EXT4_SB(sb)->s_freeclusters_counter)));
4562 es->s_free_inodes_count = 4551 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
4563 cpu_to_le32(percpu_counter_sum_positive( 4552 es->s_free_inodes_count =
4553 cpu_to_le32(percpu_counter_sum_positive(
4564 &EXT4_SB(sb)->s_freeinodes_counter)); 4554 &EXT4_SB(sb)->s_freeinodes_counter));
4565 BUFFER_TRACE(sbh, "marking dirty"); 4555 BUFFER_TRACE(sbh, "marking dirty");
4566 ext4_superblock_csum_set(sb); 4556 ext4_superblock_csum_set(sb);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
203 size_t size = 0; 203 size_t size = 0;
204 int error; 204 int error;
205 205
206 if (acl) {
207 error = posix_acl_valid(acl);
208 if (error < 0)
209 return error;
210 }
211
212 switch (type) { 206 switch (type) {
213 case ACL_TYPE_ACCESS: 207 case ACL_TYPE_ACCESS:
214 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c1d370..6aeed5bada52 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
22#include "segment.h" 22#include "segment.h"
23#include <trace/events/f2fs.h> 23#include <trace/events/f2fs.h>
24 24
25static struct kmem_cache *orphan_entry_slab; 25static struct kmem_cache *ino_entry_slab;
26static struct kmem_cache *inode_entry_slab; 26static struct kmem_cache *inode_entry_slab;
27 27
28/* 28/*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
282 .set_page_dirty = f2fs_set_meta_page_dirty, 282 .set_page_dirty = f2fs_set_meta_page_dirty,
283}; 283};
284 284
285static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
286{
287 struct ino_entry *e;
288retry:
289 spin_lock(&sbi->ino_lock[type]);
290
291 e = radix_tree_lookup(&sbi->ino_root[type], ino);
292 if (!e) {
293 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
294 if (!e) {
295 spin_unlock(&sbi->ino_lock[type]);
296 goto retry;
297 }
298 if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
299 spin_unlock(&sbi->ino_lock[type]);
300 kmem_cache_free(ino_entry_slab, e);
301 goto retry;
302 }
303 memset(e, 0, sizeof(struct ino_entry));
304 e->ino = ino;
305
306 list_add_tail(&e->list, &sbi->ino_list[type]);
307 }
308 spin_unlock(&sbi->ino_lock[type]);
309}
310
311static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
312{
313 struct ino_entry *e;
314
315 spin_lock(&sbi->ino_lock[type]);
316 e = radix_tree_lookup(&sbi->ino_root[type], ino);
317 if (e) {
318 list_del(&e->list);
319 radix_tree_delete(&sbi->ino_root[type], ino);
320 if (type == ORPHAN_INO)
321 sbi->n_orphans--;
322 spin_unlock(&sbi->ino_lock[type]);
323 kmem_cache_free(ino_entry_slab, e);
324 return;
325 }
326 spin_unlock(&sbi->ino_lock[type]);
327}
328
329void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
330{
331 /* add new dirty ino entry into list */
332 __add_ino_entry(sbi, ino, type);
333}
334
335void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
336{
337 /* remove dirty ino entry from list */
338 __remove_ino_entry(sbi, ino, type);
339}
340
341/* mode should be APPEND_INO or UPDATE_INO */
342bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
343{
344 struct ino_entry *e;
345 spin_lock(&sbi->ino_lock[mode]);
346 e = radix_tree_lookup(&sbi->ino_root[mode], ino);
347 spin_unlock(&sbi->ino_lock[mode]);
348 return e ? true : false;
349}
350
351static void release_dirty_inode(struct f2fs_sb_info *sbi)
352{
353 struct ino_entry *e, *tmp;
354 int i;
355
356 for (i = APPEND_INO; i <= UPDATE_INO; i++) {
357 spin_lock(&sbi->ino_lock[i]);
358 list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
359 list_del(&e->list);
360 radix_tree_delete(&sbi->ino_root[i], e->ino);
361 kmem_cache_free(ino_entry_slab, e);
362 }
363 spin_unlock(&sbi->ino_lock[i]);
364 }
365}
366
285int acquire_orphan_inode(struct f2fs_sb_info *sbi) 367int acquire_orphan_inode(struct f2fs_sb_info *sbi)
286{ 368{
287 int err = 0; 369 int err = 0;
288 370
289 spin_lock(&sbi->orphan_inode_lock); 371 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
290 if (unlikely(sbi->n_orphans >= sbi->max_orphans)) 372 if (unlikely(sbi->n_orphans >= sbi->max_orphans))
291 err = -ENOSPC; 373 err = -ENOSPC;
292 else 374 else
293 sbi->n_orphans++; 375 sbi->n_orphans++;
294 spin_unlock(&sbi->orphan_inode_lock); 376 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
295 377
296 return err; 378 return err;
297} 379}
298 380
299void release_orphan_inode(struct f2fs_sb_info *sbi) 381void release_orphan_inode(struct f2fs_sb_info *sbi)
300{ 382{
301 spin_lock(&sbi->orphan_inode_lock); 383 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
302 f2fs_bug_on(sbi->n_orphans == 0); 384 f2fs_bug_on(sbi->n_orphans == 0);
303 sbi->n_orphans--; 385 sbi->n_orphans--;
304 spin_unlock(&sbi->orphan_inode_lock); 386 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
305} 387}
306 388
307void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 389void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
308{ 390{
309 struct list_head *head; 391 /* add new orphan ino entry into list */
310 struct orphan_inode_entry *new, *orphan; 392 __add_ino_entry(sbi, ino, ORPHAN_INO);
311
312 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
313 new->ino = ino;
314
315 spin_lock(&sbi->orphan_inode_lock);
316 head = &sbi->orphan_inode_list;
317 list_for_each_entry(orphan, head, list) {
318 if (orphan->ino == ino) {
319 spin_unlock(&sbi->orphan_inode_lock);
320 kmem_cache_free(orphan_entry_slab, new);
321 return;
322 }
323
324 if (orphan->ino > ino)
325 break;
326 }
327
328 /* add new orphan entry into list which is sorted by inode number */
329 list_add_tail(&new->list, &orphan->list);
330 spin_unlock(&sbi->orphan_inode_lock);
331} 393}
332 394
333void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 395void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
334{ 396{
335 struct list_head *head; 397 /* remove orphan entry from orphan list */
336 struct orphan_inode_entry *orphan; 398 __remove_ino_entry(sbi, ino, ORPHAN_INO);
337
338 spin_lock(&sbi->orphan_inode_lock);
339 head = &sbi->orphan_inode_list;
340 list_for_each_entry(orphan, head, list) {
341 if (orphan->ino == ino) {
342 list_del(&orphan->list);
343 f2fs_bug_on(sbi->n_orphans == 0);
344 sbi->n_orphans--;
345 spin_unlock(&sbi->orphan_inode_lock);
346 kmem_cache_free(orphan_entry_slab, orphan);
347 return;
348 }
349 }
350 spin_unlock(&sbi->orphan_inode_lock);
351} 399}
352 400
353static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 401static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
401 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 449 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
402 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 450 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
403 struct page *page = NULL; 451 struct page *page = NULL;
404 struct orphan_inode_entry *orphan = NULL; 452 struct ino_entry *orphan = NULL;
405 453
406 for (index = 0; index < orphan_blocks; index++) 454 for (index = 0; index < orphan_blocks; index++)
407 grab_meta_page(sbi, start_blk + index); 455 grab_meta_page(sbi, start_blk + index);
408 456
409 index = 1; 457 index = 1;
410 spin_lock(&sbi->orphan_inode_lock); 458 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
411 head = &sbi->orphan_inode_list; 459 head = &sbi->ino_list[ORPHAN_INO];
412 460
413 /* loop for each orphan inode entry and write them in Jornal block */ 461 /* loop for each orphan inode entry and write them in Jornal block */
414 list_for_each_entry(orphan, head, list) { 462 list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
448 f2fs_put_page(page, 1); 496 f2fs_put_page(page, 1);
449 } 497 }
450 498
451 spin_unlock(&sbi->orphan_inode_lock); 499 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
452} 500}
453 501
454static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 502static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ retry_flush_dents:
714 * until finishing nat/sit flush. 762 * until finishing nat/sit flush.
715 */ 763 */
716retry_flush_nodes: 764retry_flush_nodes:
717 mutex_lock(&sbi->node_write); 765 down_write(&sbi->node_write);
718 766
719 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 767 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
720 mutex_unlock(&sbi->node_write); 768 up_write(&sbi->node_write);
721 sync_node_pages(sbi, 0, &wbc); 769 sync_node_pages(sbi, 0, &wbc);
722 goto retry_flush_nodes; 770 goto retry_flush_nodes;
723 } 771 }
@@ -726,7 +774,7 @@ retry_flush_nodes:
726 774
727static void unblock_operations(struct f2fs_sb_info *sbi) 775static void unblock_operations(struct f2fs_sb_info *sbi)
728{ 776{
729 mutex_unlock(&sbi->node_write); 777 up_write(&sbi->node_write);
730 f2fs_unlock_all(sbi); 778 f2fs_unlock_all(sbi);
731} 779}
732 780
@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
748static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 796static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
749{ 797{
750 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 798 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
799 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
751 nid_t last_nid = 0; 800 nid_t last_nid = 0;
752 block_t start_blk; 801 block_t start_blk;
753 struct page *cp_page; 802 struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
761 * This avoids to conduct wrong roll-forward operations and uses 810 * This avoids to conduct wrong roll-forward operations and uses
762 * metapages, so should be called prior to sync_meta_pages below. 811 * metapages, so should be called prior to sync_meta_pages below.
763 */ 812 */
764 discard_next_dnode(sbi); 813 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
765 814
766 /* Flush all the NAT/SIT pages */ 815 /* Flush all the NAT/SIT pages */
767 while (get_pages(sbi, F2FS_DIRTY_META)) 816 while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
885 /* Here, we only have one bio having CP pack */ 934 /* Here, we only have one bio having CP pack */
886 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 935 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
887 936
888 if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 937 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
889 clear_prefree_segments(sbi); 938 clear_prefree_segments(sbi);
939 release_dirty_inode(sbi);
890 F2FS_RESET_SB_DIRT(sbi); 940 F2FS_RESET_SB_DIRT(sbi);
891 } 941 }
892} 942}
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
932 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 982 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
933} 983}
934 984
935void init_orphan_info(struct f2fs_sb_info *sbi) 985void init_ino_entry_info(struct f2fs_sb_info *sbi)
936{ 986{
937 spin_lock_init(&sbi->orphan_inode_lock); 987 int i;
938 INIT_LIST_HEAD(&sbi->orphan_inode_list); 988
939 sbi->n_orphans = 0; 989 for (i = 0; i < MAX_INO_ENTRY; i++) {
990 INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
991 spin_lock_init(&sbi->ino_lock[i]);
992 INIT_LIST_HEAD(&sbi->ino_list[i]);
993 }
994
940 /* 995 /*
941 * considering 512 blocks in a segment 8 blocks are needed for cp 996 * considering 512 blocks in a segment 8 blocks are needed for cp
942 * and log segment summaries. Remaining blocks are used to keep 997 * and log segment summaries. Remaining blocks are used to keep
943 * orphan entries with the limitation one reserved segment 998 * orphan entries with the limitation one reserved segment
944 * for cp pack we can have max 1020*504 orphan entries 999 * for cp pack we can have max 1020*504 orphan entries
945 */ 1000 */
1001 sbi->n_orphans = 0;
946 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 1002 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
947 * F2FS_ORPHANS_PER_BLOCK; 1003 * F2FS_ORPHANS_PER_BLOCK;
948} 1004}
949 1005
950int __init create_checkpoint_caches(void) 1006int __init create_checkpoint_caches(void)
951{ 1007{
952 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 1008 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
953 sizeof(struct orphan_inode_entry)); 1009 sizeof(struct ino_entry));
954 if (!orphan_entry_slab) 1010 if (!ino_entry_slab)
955 return -ENOMEM; 1011 return -ENOMEM;
956 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 1012 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
957 sizeof(struct dir_inode_entry)); 1013 sizeof(struct dir_inode_entry));
958 if (!inode_entry_slab) { 1014 if (!inode_entry_slab) {
959 kmem_cache_destroy(orphan_entry_slab); 1015 kmem_cache_destroy(ino_entry_slab);
960 return -ENOMEM; 1016 return -ENOMEM;
961 } 1017 }
962 return 0; 1018 return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)
964 1020
965void destroy_checkpoint_caches(void) 1021void destroy_checkpoint_caches(void)
966{ 1022{
967 kmem_cache_destroy(orphan_entry_slab); 1023 kmem_cache_destroy(ino_entry_slab);
968 kmem_cache_destroy(inode_entry_slab); 1024 kmem_cache_destroy(inode_entry_slab);
969} 1025}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619edb5f..03313099c51c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
139 /* change META to META_FLUSH in the checkpoint procedure */ 139 /* change META to META_FLUSH in the checkpoint procedure */
140 if (type >= META_FLUSH) { 140 if (type >= META_FLUSH) {
141 io->fio.type = META_FLUSH; 141 io->fio.type = META_FLUSH;
142 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 if (test_opt(sbi, NOBARRIER))
143 io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
144 else
145 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
143 } 146 }
144 __submit_merged_bio(io); 147 __submit_merged_bio(io);
145 up_write(&io->io_rwsem); 148 up_write(&io->io_rwsem);
@@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
626 if (check_extent_cache(inode, pgofs, bh_result)) 629 if (check_extent_cache(inode, pgofs, bh_result))
627 goto out; 630 goto out;
628 631
629 if (create) 632 if (create) {
633 f2fs_balance_fs(sbi);
630 f2fs_lock_op(sbi); 634 f2fs_lock_op(sbi);
635 }
631 636
632 /* When reading holes, we need its node page */ 637 /* When reading holes, we need its node page */
633 set_new_dnode(&dn, inode, NULL, NULL, 0); 638 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
784 !is_cold_data(page) && 789 !is_cold_data(page) &&
785 need_inplace_update(inode))) { 790 need_inplace_update(inode))) {
786 rewrite_data_page(page, old_blkaddr, fio); 791 rewrite_data_page(page, old_blkaddr, fio);
792 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
787 } else { 793 } else {
788 write_data_page(page, &dn, &new_blkaddr, fio); 794 write_data_page(page, &dn, &new_blkaddr, fio);
789 update_extent_cache(new_blkaddr, &dn); 795 update_extent_cache(new_blkaddr, &dn);
796 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
790 } 797 }
791out_writepage: 798out_writepage:
792 f2fs_put_dnode(&dn); 799 f2fs_put_dnode(&dn);
@@ -914,6 +921,16 @@ skip_write:
914 return 0; 921 return 0;
915} 922}
916 923
924static void f2fs_write_failed(struct address_space *mapping, loff_t to)
925{
926 struct inode *inode = mapping->host;
927
928 if (to > inode->i_size) {
929 truncate_pagecache(inode, inode->i_size);
930 truncate_blocks(inode, inode->i_size);
931 }
932}
933
917static int f2fs_write_begin(struct file *file, struct address_space *mapping, 934static int f2fs_write_begin(struct file *file, struct address_space *mapping,
918 loff_t pos, unsigned len, unsigned flags, 935 loff_t pos, unsigned len, unsigned flags,
919 struct page **pagep, void **fsdata) 936 struct page **pagep, void **fsdata)
@@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
931repeat: 948repeat:
932 err = f2fs_convert_inline_data(inode, pos + len); 949 err = f2fs_convert_inline_data(inode, pos + len);
933 if (err) 950 if (err)
934 return err; 951 goto fail;
935 952
936 page = grab_cache_page_write_begin(mapping, index, flags); 953 page = grab_cache_page_write_begin(mapping, index, flags);
937 if (!page) 954 if (!page) {
938 return -ENOMEM; 955 err = -ENOMEM;
956 goto fail;
957 }
939 958
940 /* to avoid latency during memory pressure */ 959 /* to avoid latency during memory pressure */
941 unlock_page(page); 960 unlock_page(page);
@@ -949,10 +968,9 @@ repeat:
949 set_new_dnode(&dn, inode, NULL, NULL, 0); 968 set_new_dnode(&dn, inode, NULL, NULL, 0);
950 err = f2fs_reserve_block(&dn, index); 969 err = f2fs_reserve_block(&dn, index);
951 f2fs_unlock_op(sbi); 970 f2fs_unlock_op(sbi);
952
953 if (err) { 971 if (err) {
954 f2fs_put_page(page, 0); 972 f2fs_put_page(page, 0);
955 return err; 973 goto fail;
956 } 974 }
957inline_data: 975inline_data:
958 lock_page(page); 976 lock_page(page);
@@ -982,19 +1000,20 @@ inline_data:
982 err = f2fs_read_inline_data(inode, page); 1000 err = f2fs_read_inline_data(inode, page);
983 if (err) { 1001 if (err) {
984 page_cache_release(page); 1002 page_cache_release(page);
985 return err; 1003 goto fail;
986 } 1004 }
987 } else { 1005 } else {
988 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 1006 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
989 READ_SYNC); 1007 READ_SYNC);
990 if (err) 1008 if (err)
991 return err; 1009 goto fail;
992 } 1010 }
993 1011
994 lock_page(page); 1012 lock_page(page);
995 if (unlikely(!PageUptodate(page))) { 1013 if (unlikely(!PageUptodate(page))) {
996 f2fs_put_page(page, 1); 1014 f2fs_put_page(page, 1);
997 return -EIO; 1015 err = -EIO;
1016 goto fail;
998 } 1017 }
999 if (unlikely(page->mapping != mapping)) { 1018 if (unlikely(page->mapping != mapping)) {
1000 f2fs_put_page(page, 1); 1019 f2fs_put_page(page, 1);
@@ -1005,6 +1024,9 @@ out:
1005 SetPageUptodate(page); 1024 SetPageUptodate(page);
1006 clear_cold_data(page); 1025 clear_cold_data(page);
1007 return 0; 1026 return 0;
1027fail:
1028 f2fs_write_failed(mapping, pos + len);
1029 return err;
1008} 1030}
1009 1031
1010static int f2fs_write_end(struct file *file, 1032static int f2fs_write_end(struct file *file,
@@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,
1016 1038
1017 trace_f2fs_write_end(inode, pos, len, copied); 1039 trace_f2fs_write_end(inode, pos, len, copied);
1018 1040
1019 SetPageUptodate(page);
1020 set_page_dirty(page); 1041 set_page_dirty(page);
1021 1042
1022 if (pos + copied > i_size_read(inode)) { 1043 if (pos + copied > i_size_read(inode)) {
@@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1050 struct iov_iter *iter, loff_t offset) 1071 struct iov_iter *iter, loff_t offset)
1051{ 1072{
1052 struct file *file = iocb->ki_filp; 1073 struct file *file = iocb->ki_filp;
1053 struct inode *inode = file->f_mapping->host; 1074 struct address_space *mapping = file->f_mapping;
1075 struct inode *inode = mapping->host;
1076 size_t count = iov_iter_count(iter);
1077 int err;
1054 1078
1055 /* Let buffer I/O handle the inline data case. */ 1079 /* Let buffer I/O handle the inline data case. */
1056 if (f2fs_has_inline_data(inode)) 1080 if (f2fs_has_inline_data(inode))
@@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1062 /* clear fsync mark to recover these blocks */ 1086 /* clear fsync mark to recover these blocks */
1063 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); 1087 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1064 1088
1065 return blockdev_direct_IO(rw, iocb, inode, iter, offset, 1089 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1066 get_data_block); 1090
1091 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
1092 if (err < 0 && (rw & WRITE))
1093 f2fs_write_failed(mapping, offset + count);
1094
1095 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
1096
1097 return err;
1067} 1098}
1068 1099
1069static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1100static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..a441ba33be11 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
167 si->cache_mem += npages << PAGE_CACHE_SHIFT; 167 si->cache_mem += npages << PAGE_CACHE_SHIFT;
168 npages = META_MAPPING(sbi)->nrpages; 168 npages = META_MAPPING(sbi)->nrpages;
169 si->cache_mem += npages << PAGE_CACHE_SHIFT; 169 si->cache_mem += npages << PAGE_CACHE_SHIFT;
170 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); 170 si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); 171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
172} 172}
173 173
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
345 345
346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); 346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
347 if (!f2fs_debugfs_root) 347 if (!f2fs_debugfs_root)
348 goto bail; 348 return;
349 349
350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, 350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
351 NULL, &stat_fops); 351 NULL, &stat_fops);
352 if (!file) 352 if (!file) {
353 goto free_debugfs_dir; 353 debugfs_remove(f2fs_debugfs_root);
354 354 f2fs_debugfs_root = NULL;
355 return; 355 }
356
357free_debugfs_dir:
358 debugfs_remove(f2fs_debugfs_root);
359
360bail:
361 f2fs_debugfs_root = NULL;
362 return;
363} 356}
364 357
365void f2fs_destroy_root_stats(void) 358void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a4addd72ebbd..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
77 return bidx; 77 return bidx;
78} 78}
79 79
80static bool early_match_name(const char *name, size_t namelen, 80static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
81 f2fs_hash_t namehash, struct f2fs_dir_entry *de) 81 struct f2fs_dir_entry *de)
82{ 82{
83 if (le16_to_cpu(de->name_len) != namelen) 83 if (le16_to_cpu(de->name_len) != namelen)
84 return false; 84 return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
90} 90}
91 91
92static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, 92static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
93 const char *name, size_t namelen, int *max_slots, 93 struct qstr *name, int *max_slots,
94 f2fs_hash_t namehash, struct page **res_page) 94 f2fs_hash_t namehash, struct page **res_page)
95{ 95{
96 struct f2fs_dir_entry *de; 96 struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
109 continue; 109 continue;
110 } 110 }
111 de = &dentry_blk->dentry[bit_pos]; 111 de = &dentry_blk->dentry[bit_pos];
112 if (early_match_name(name, namelen, namehash, de)) { 112 if (early_match_name(name->len, namehash, de)) {
113 if (!memcmp(dentry_blk->filename[bit_pos], 113 if (!memcmp(dentry_blk->filename[bit_pos],
114 name, namelen)) { 114 name->name,
115 name->len)) {
115 *res_page = dentry_page; 116 *res_page = dentry_page;
116 goto found; 117 goto found;
117 } 118 }
@@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
120 *max_slots = max_len; 121 *max_slots = max_len;
121 max_len = 0; 122 max_len = 0;
122 } 123 }
124
125 /*
126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs are occurred.
128 */
129 f2fs_bug_on(!de->name_len);
130
123 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
124 } 132 }
125 133
@@ -132,10 +140,10 @@ found:
132} 140}
133 141
134static struct f2fs_dir_entry *find_in_level(struct inode *dir, 142static struct f2fs_dir_entry *find_in_level(struct inode *dir,
135 unsigned int level, const char *name, size_t namelen, 143 unsigned int level, struct qstr *name,
136 f2fs_hash_t namehash, struct page **res_page) 144 f2fs_hash_t namehash, struct page **res_page)
137{ 145{
138 int s = GET_DENTRY_SLOTS(namelen); 146 int s = GET_DENTRY_SLOTS(name->len);
139 unsigned int nbucket, nblock; 147 unsigned int nbucket, nblock;
140 unsigned int bidx, end_block; 148 unsigned int bidx, end_block;
141 struct page *dentry_page; 149 struct page *dentry_page;
@@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
160 continue; 168 continue;
161 } 169 }
162 170
163 de = find_in_block(dentry_page, name, namelen, 171 de = find_in_block(dentry_page, name, &max_slots,
164 &max_slots, namehash, res_page); 172 namehash, res_page);
165 if (de) 173 if (de)
166 break; 174 break;
167 175
@@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
187struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, 195struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
188 struct qstr *child, struct page **res_page) 196 struct qstr *child, struct page **res_page)
189{ 197{
190 const char *name = child->name;
191 size_t namelen = child->len;
192 unsigned long npages = dir_blocks(dir); 198 unsigned long npages = dir_blocks(dir);
193 struct f2fs_dir_entry *de = NULL; 199 struct f2fs_dir_entry *de = NULL;
194 f2fs_hash_t name_hash; 200 f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
200 206
201 *res_page = NULL; 207 *res_page = NULL;
202 208
203 name_hash = f2fs_dentry_hash(name, namelen); 209 name_hash = f2fs_dentry_hash(child);
204 max_depth = F2FS_I(dir)->i_current_depth; 210 max_depth = F2FS_I(dir)->i_current_depth;
205 211
206 for (level = 0; level < max_depth; level++) { 212 for (level = 0; level < max_depth; level++) {
207 de = find_in_level(dir, level, name, 213 de = find_in_level(dir, level, child, name_hash, res_page);
208 namelen, name_hash, res_page);
209 if (de) 214 if (de)
210 break; 215 break;
211 } 216 }
@@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,
298 struct page *dentry_page; 303 struct page *dentry_page;
299 struct f2fs_dentry_block *dentry_blk; 304 struct f2fs_dentry_block *dentry_blk;
300 struct f2fs_dir_entry *de; 305 struct f2fs_dir_entry *de;
301 void *kaddr;
302 306
303 dentry_page = get_new_data_page(inode, page, 0, true); 307 dentry_page = get_new_data_page(inode, page, 0, true);
304 if (IS_ERR(dentry_page)) 308 if (IS_ERR(dentry_page))
305 return PTR_ERR(dentry_page); 309 return PTR_ERR(dentry_page);
306 310
307 kaddr = kmap_atomic(dentry_page); 311
308 dentry_blk = (struct f2fs_dentry_block *)kaddr; 312 dentry_blk = kmap_atomic(dentry_page);
309 313
310 de = &dentry_blk->dentry[0]; 314 de = &dentry_blk->dentry[0];
311 de->name_len = cpu_to_le16(1); 315 de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,
323 327
324 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); 328 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
325 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); 329 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
326 kunmap_atomic(kaddr); 330 kunmap_atomic(dentry_blk);
327 331
328 set_page_dirty(dentry_page); 332 set_page_dirty(dentry_page);
329 f2fs_put_page(dentry_page, 1); 333 f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,
333static struct page *init_inode_metadata(struct inode *inode, 337static struct page *init_inode_metadata(struct inode *inode,
334 struct inode *dir, const struct qstr *name) 338 struct inode *dir, const struct qstr *name)
335{ 339{
340 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
336 struct page *page; 341 struct page *page;
337 int err; 342 int err;
338 343
339 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 344 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
340 page = new_inode_page(inode, name); 345 page = new_inode_page(inode);
341 if (IS_ERR(page)) 346 if (IS_ERR(page))
342 return page; 347 return page;
343 348
@@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,
362 set_cold_node(inode, page); 367 set_cold_node(inode, page);
363 } 368 }
364 369
365 init_dent_inode(name, page); 370 if (name)
371 init_dent_inode(name, page);
366 372
367 /* 373 /*
368 * This file should be checkpointed during fsync. 374 * This file should be checkpointed during fsync.
@@ -370,6 +376,12 @@ static struct page *init_inode_metadata(struct inode *inode,
370 */ 376 */
371 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 377 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
372 file_lost_pino(inode); 378 file_lost_pino(inode);
379 /*
380 * If link the tmpfile to alias through linkat path,
381 * we should remove this inode from orphan list.
382 */
383 if (inode->i_nlink == 0)
384 remove_orphan_inode(sbi, inode->i_ino);
373 inc_nlink(inode); 385 inc_nlink(inode);
374 } 386 }
375 return page; 387 return page;
@@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
453 int err = 0; 465 int err = 0;
454 int i; 466 int i;
455 467
456 dentry_hash = f2fs_dentry_hash(name->name, name->len); 468 dentry_hash = f2fs_dentry_hash(name);
457 level = 0; 469 level = 0;
458 current_depth = F2FS_I(dir)->i_current_depth; 470 current_depth = F2FS_I(dir)->i_current_depth;
459 if (F2FS_I(dir)->chash == dentry_hash) { 471 if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@ fail:
529 return err; 541 return err;
530} 542}
531 543
544int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
545{
546 struct page *page;
547 int err = 0;
548
549 down_write(&F2FS_I(inode)->i_sem);
550 page = init_inode_metadata(inode, dir, NULL);
551 if (IS_ERR(page)) {
552 err = PTR_ERR(page);
553 goto fail;
554 }
555 /* we don't need to mark_inode_dirty now */
556 update_inode(inode, page);
557 f2fs_put_page(page, 1);
558
559 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
560fail:
561 up_write(&F2FS_I(inode)->i_sem);
562 return err;
563}
564
532/* 565/*
533 * It only removes the dentry from the dentry page,corresponding name 566 * It only removes the dentry from the dentry page,corresponding name
534 * entry in name page does not need to be touched during deletion. 567 * entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
541 struct address_space *mapping = page->mapping; 574 struct address_space *mapping = page->mapping;
542 struct inode *dir = mapping->host; 575 struct inode *dir = mapping->host;
543 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 576 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
544 void *kaddr = page_address(page);
545 int i; 577 int i;
546 578
547 lock_page(page); 579 lock_page(page);
548 f2fs_wait_on_page_writeback(page, DATA); 580 f2fs_wait_on_page_writeback(page, DATA);
549 581
550 dentry_blk = (struct f2fs_dentry_block *)kaddr; 582 dentry_blk = page_address(page);
551 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 583 bit_pos = dentry - dentry_blk->dentry;
552 for (i = 0; i < slots; i++) 584 for (i = 0; i < slots; i++)
553 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); 585 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
554 586
@@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)
603 unsigned long nblock = dir_blocks(dir); 635 unsigned long nblock = dir_blocks(dir);
604 636
605 for (bidx = 0; bidx < nblock; bidx++) { 637 for (bidx = 0; bidx < nblock; bidx++) {
606 void *kaddr;
607 dentry_page = get_lock_data_page(dir, bidx); 638 dentry_page = get_lock_data_page(dir, bidx);
608 if (IS_ERR(dentry_page)) { 639 if (IS_ERR(dentry_page)) {
609 if (PTR_ERR(dentry_page) == -ENOENT) 640 if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)
612 return false; 643 return false;
613 } 644 }
614 645
615 kaddr = kmap_atomic(dentry_page); 646
616 dentry_blk = (struct f2fs_dentry_block *)kaddr; 647 dentry_blk = kmap_atomic(dentry_page);
617 if (bidx == 0) 648 if (bidx == 0)
618 bit_pos = 2; 649 bit_pos = 2;
619 else 650 else
@@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)
621 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 652 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
622 NR_DENTRY_IN_BLOCK, 653 NR_DENTRY_IN_BLOCK,
623 bit_pos); 654 bit_pos);
624 kunmap_atomic(kaddr); 655 kunmap_atomic(dentry_blk);
625 656
626 f2fs_put_page(dentry_page, 1); 657 f2fs_put_page(dentry_page, 1);
627 658
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58df97e174d0..4dab5338a97a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
41#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100 42#define F2FS_MOUNT_INLINE_DATA 0x00000100
43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
44#define F2FS_MOUNT_NOBARRIER 0x00000400
44 45
45#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 46#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
46#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 47#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -99,8 +100,15 @@ enum {
99 META_SSA 100 META_SSA
100}; 101};
101 102
102/* for the list of orphan inodes */ 103/* for the list of ino */
103struct orphan_inode_entry { 104enum {
105 ORPHAN_INO, /* for orphan ino list */
106 APPEND_INO, /* for append ino list */
107 UPDATE_INO, /* for update ino list */
108 MAX_INO_ENTRY, /* max. list */
109};
110
111struct ino_entry {
104 struct list_head list; /* list head */ 112 struct list_head list; /* list head */
105 nid_t ino; /* inode number */ 113 nid_t ino; /* inode number */
106}; 114};
@@ -256,6 +264,8 @@ struct f2fs_nm_info {
256 unsigned int nat_cnt; /* the # of cached nat entries */ 264 unsigned int nat_cnt; /* the # of cached nat entries */
257 struct list_head nat_entries; /* cached nat entry list (clean) */ 265 struct list_head nat_entries; /* cached nat entry list (clean) */
258 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 266 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
267 struct list_head nat_entry_set; /* nat entry set list */
268 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
259 269
260 /* free node ids management */ 270 /* free node ids management */
261 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 271 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -442,14 +452,17 @@ struct f2fs_sb_info {
442 struct inode *meta_inode; /* cache meta blocks */ 452 struct inode *meta_inode; /* cache meta blocks */
443 struct mutex cp_mutex; /* checkpoint procedure lock */ 453 struct mutex cp_mutex; /* checkpoint procedure lock */
444 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 454 struct rw_semaphore cp_rwsem; /* blocking FS operations */
445 struct mutex node_write; /* locking node writes */ 455 struct rw_semaphore node_write; /* locking node writes */
446 struct mutex writepages; /* mutex for writepages() */ 456 struct mutex writepages; /* mutex for writepages() */
447 bool por_doing; /* recovery is doing or not */ 457 bool por_doing; /* recovery is doing or not */
448 wait_queue_head_t cp_wait; 458 wait_queue_head_t cp_wait;
449 459
450 /* for orphan inode management */ 460 /* for inode management */
451 struct list_head orphan_inode_list; /* orphan inode list */ 461 struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
452 spinlock_t orphan_inode_lock; /* for orphan inode list */ 462 spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
463 struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
464
465 /* for orphan inode, use 0'th array */
453 unsigned int n_orphans; /* # of orphan inodes */ 466 unsigned int n_orphans; /* # of orphan inodes */
454 unsigned int max_orphans; /* max orphan inodes */ 467 unsigned int max_orphans; /* max orphan inodes */
455 468
@@ -768,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
768 if (flag == NAT_BITMAP) 781 if (flag == NAT_BITMAP)
769 return &ckpt->sit_nat_version_bitmap; 782 return &ckpt->sit_nat_version_bitmap;
770 else 783 else
771 return ((unsigned char *)ckpt + F2FS_BLKSIZE); 784 return (unsigned char *)ckpt + F2FS_BLKSIZE;
772 } else { 785 } else {
773 offset = (flag == NAT_BITMAP) ? 786 offset = (flag == NAT_BITMAP) ?
774 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; 787 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -983,11 +996,15 @@ enum {
983 FI_NO_EXTENT, /* not to use the extent cache */ 996 FI_NO_EXTENT, /* not to use the extent cache */
984 FI_INLINE_XATTR, /* used for inline xattr */ 997 FI_INLINE_XATTR, /* used for inline xattr */
985 FI_INLINE_DATA, /* used for inline data*/ 998 FI_INLINE_DATA, /* used for inline data*/
999 FI_APPEND_WRITE, /* inode has appended data */
1000 FI_UPDATE_WRITE, /* inode has in-place-update data */
1001 FI_NEED_IPU, /* used fo ipu for fdatasync */
986}; 1002};
987 1003
988static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1004static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
989{ 1005{
990 set_bit(flag, &fi->flags); 1006 if (!test_bit(flag, &fi->flags))
1007 set_bit(flag, &fi->flags);
991} 1008}
992 1009
993static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) 1010static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -997,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
997 1014
998static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) 1015static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
999{ 1016{
1000 clear_bit(flag, &fi->flags); 1017 if (test_bit(flag, &fi->flags))
1018 clear_bit(flag, &fi->flags);
1001} 1019}
1002 1020
1003static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) 1021static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1136,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1136int update_dent_inode(struct inode *, const struct qstr *); 1154int update_dent_inode(struct inode *, const struct qstr *);
1137int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1155int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
1138void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); 1156void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
1157int f2fs_do_tmpfile(struct inode *, struct inode *);
1139int f2fs_make_empty(struct inode *, struct inode *); 1158int f2fs_make_empty(struct inode *, struct inode *);
1140bool f2fs_empty_dir(struct inode *); 1159bool f2fs_empty_dir(struct inode *);
1141 1160
@@ -1155,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
1155/* 1174/*
1156 * hash.c 1175 * hash.c
1157 */ 1176 */
1158f2fs_hash_t f2fs_dentry_hash(const char *, size_t); 1177f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
1159 1178
1160/* 1179/*
1161 * node.c 1180 * node.c
@@ -1173,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
1173int truncate_xattr_node(struct inode *, struct page *); 1192int truncate_xattr_node(struct inode *, struct page *);
1174int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1193int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
1175void remove_inode_page(struct inode *); 1194void remove_inode_page(struct inode *);
1176struct page *new_inode_page(struct inode *, const struct qstr *); 1195struct page *new_inode_page(struct inode *);
1177struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1196struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
1178void ra_node_page(struct f2fs_sb_info *, nid_t); 1197void ra_node_page(struct f2fs_sb_info *, nid_t);
1179struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 1198struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1185,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1185void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1204void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1186void recover_node_page(struct f2fs_sb_info *, struct page *, 1205void recover_node_page(struct f2fs_sb_info *, struct page *,
1187 struct f2fs_summary *, struct node_info *, block_t); 1206 struct f2fs_summary *, struct node_info *, block_t);
1207void recover_inline_xattr(struct inode *, struct page *);
1188bool recover_xattr_data(struct inode *, struct page *, block_t); 1208bool recover_xattr_data(struct inode *, struct page *, block_t);
1189int recover_inode_page(struct f2fs_sb_info *, struct page *); 1209int recover_inode_page(struct f2fs_sb_info *, struct page *);
1190int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1210int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1206,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
1206void invalidate_blocks(struct f2fs_sb_info *, block_t); 1226void invalidate_blocks(struct f2fs_sb_info *, block_t);
1207void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1227void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1208void clear_prefree_segments(struct f2fs_sb_info *); 1228void clear_prefree_segments(struct f2fs_sb_info *);
1209void discard_next_dnode(struct f2fs_sb_info *); 1229void discard_next_dnode(struct f2fs_sb_info *, block_t);
1210int npages_for_summary_flush(struct f2fs_sb_info *); 1230int npages_for_summary_flush(struct f2fs_sb_info *);
1211void allocate_new_segments(struct f2fs_sb_info *); 1231void allocate_new_segments(struct f2fs_sb_info *);
1212struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1232struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1240,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1240struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1260struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1241int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1261int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1242long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1262long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1263void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1264void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1265bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1243int acquire_orphan_inode(struct f2fs_sb_info *); 1266int acquire_orphan_inode(struct f2fs_sb_info *);
1244void release_orphan_inode(struct f2fs_sb_info *); 1267void release_orphan_inode(struct f2fs_sb_info *);
1245void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1268void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1251,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);
1251void remove_dirty_dir_inode(struct inode *); 1274void remove_dirty_dir_inode(struct inode *);
1252void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1275void sync_dirty_dir_inodes(struct f2fs_sb_info *);
1253void write_checkpoint(struct f2fs_sb_info *, bool); 1276void write_checkpoint(struct f2fs_sb_info *, bool);
1254void init_orphan_info(struct f2fs_sb_info *); 1277void init_ino_entry_info(struct f2fs_sb_info *);
1255int __init create_checkpoint_caches(void); 1278int __init create_checkpoint_caches(void);
1256void destroy_checkpoint_caches(void); 1279void destroy_checkpoint_caches(void);
1257 1280
@@ -1295,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
1295struct f2fs_stat_info { 1318struct f2fs_stat_info {
1296 struct list_head stat_list; 1319 struct list_head stat_list;
1297 struct f2fs_sb_info *sbi; 1320 struct f2fs_sb_info *sbi;
1298 struct mutex stat_lock;
1299 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1321 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1300 int main_area_segs, main_area_sections, main_area_zones; 1322 int main_area_segs, main_area_sections, main_area_zones;
1301 int hit_ext, total_ext; 1323 int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d8b96275092..208f1a9bd569 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
127 return 0; 127 return 0;
128 128
129 trace_f2fs_sync_file_enter(inode); 129 trace_f2fs_sync_file_enter(inode);
130
131 /* if fdatasync is triggered, let's do in-place-update */
132 if (datasync)
133 set_inode_flag(fi, FI_NEED_IPU);
134
130 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 135 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
136 if (datasync)
137 clear_inode_flag(fi, FI_NEED_IPU);
131 if (ret) { 138 if (ret) {
132 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 139 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
133 return ret; 140 return ret;
134 } 141 }
135 142
143 /*
144 * if there is no written data, don't waste time to write recovery info.
145 */
146 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
147 !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
148 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
149 exist_written_data(sbi, inode->i_ino, UPDATE_INO))
150 goto flush_out;
151 goto out;
152 }
153
136 /* guarantee free sections for fsync */ 154 /* guarantee free sections for fsync */
137 f2fs_balance_fs(sbi); 155 f2fs_balance_fs(sbi);
138 156
@@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
188 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 206 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
189 if (ret) 207 if (ret)
190 goto out; 208 goto out;
209
210 /* once recovery info is written, don't need to tack this */
211 remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
212 clear_inode_flag(fi, FI_APPEND_WRITE);
213flush_out:
214 remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
215 clear_inode_flag(fi, FI_UPDATE_WRITE);
191 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 216 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
192 } 217 }
193out: 218out:
@@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
206 231
207 /* find first dirty page index */ 232 /* find first dirty page index */
208 pagevec_init(&pvec, 0); 233 pagevec_init(&pvec, 0);
209 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); 234 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
210 pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; 235 PAGECACHE_TAG_DIRTY, 1);
236 pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
211 pagevec_release(&pvec); 237 pagevec_release(&pvec);
212 return pgofs; 238 return pgofs;
213} 239}
@@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
272 } 298 }
273 } 299 }
274 300
275 end_offset = IS_INODE(dn.node_page) ? 301 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
276 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
277 302
278 /* find data/hole in dnode block */ 303 /* find data/hole in dnode block */
279 for (; dn.ofs_in_node < end_offset; 304 for (; dn.ofs_in_node < end_offset;
@@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
380 return; 405 return;
381 406
382 lock_page(page); 407 lock_page(page);
383 if (unlikely(page->mapping != inode->i_mapping)) { 408 if (unlikely(!PageUptodate(page) ||
384 f2fs_put_page(page, 1); 409 page->mapping != inode->i_mapping))
385 return; 410 goto out;
386 } 411
387 f2fs_wait_on_page_writeback(page, DATA); 412 f2fs_wait_on_page_writeback(page, DATA);
388 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 413 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
389 set_page_dirty(page); 414 set_page_dirty(page);
415
416out:
390 f2fs_put_page(page, 1); 417 f2fs_put_page(page, 1);
391} 418}
392 419
@@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
645 loff_t off_start, off_end; 672 loff_t off_start, off_end;
646 int ret = 0; 673 int ret = 0;
647 674
675 f2fs_balance_fs(sbi);
676
648 ret = inode_newsize_ok(inode, (len + offset)); 677 ret = inode_newsize_ok(inode, (len + offset));
649 if (ret) 678 if (ret)
650 return ret; 679 return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b90dbe55403a..d7947d90ccc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
186static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 186static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
187{ 187{
188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
189 unsigned int hint = 0;
190 unsigned int secno; 189 unsigned int secno;
191 190
192 /* 191 /*
@@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
194 * selected by background GC before. 193 * selected by background GC before.
195 * Those segments guarantee they have small valid blocks. 194 * Those segments guarantee they have small valid blocks.
196 */ 195 */
197next: 196 for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
198 secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
199 if (secno < TOTAL_SECS(sbi)) {
200 if (sec_usage_check(sbi, secno)) 197 if (sec_usage_check(sbi, secno))
201 goto next; 198 continue;
202 clear_bit(secno, dirty_i->victim_secmap); 199 clear_bit(secno, dirty_i->victim_secmap);
203 return secno * sbi->segs_per_sec; 200 return secno * sbi->segs_per_sec;
204 } 201 }
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
69 *buf++ = pad; 69 *buf++ = pad;
70} 70}
71 71
72f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) 72f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
73{ 73{
74 __u32 hash; 74 __u32 hash;
75 f2fs_hash_t f2fs_hash; 75 f2fs_hash_t f2fs_hash;
76 const char *p; 76 const char *p;
77 __u32 in[8], buf[4]; 77 __u32 in[8], buf[4];
78 const char *name = name_info->name;
79 size_t len = name_info->len;
78 80
79 if ((len <= 2) && (name[0] == '.') && 81 if ((len <= 2) && (name[0] == '.') &&
80 (name[1] == '.' || name[1] == '\0')) 82 (name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba5228c197..5beeccef9ae1 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,
172 stat_inc_inline_inode(inode); 172 stat_inc_inline_inode(inode);
173 } 173 }
174 174
175 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
175 sync_inode_page(&dn); 176 sync_inode_page(&dn);
176 f2fs_put_dnode(&dn); 177 f2fs_put_dnode(&dn);
177 178
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2cf6962f6cc8..2c39999f3868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -267,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
267void f2fs_evict_inode(struct inode *inode) 267void f2fs_evict_inode(struct inode *inode)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
270 271
271 trace_f2fs_evict_inode(inode); 272 trace_f2fs_evict_inode(inode);
272 truncate_inode_pages_final(&inode->i_data); 273 truncate_inode_pages_final(&inode->i_data);
273 274
274 if (inode->i_ino == F2FS_NODE_INO(sbi) || 275 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
275 inode->i_ino == F2FS_META_INO(sbi)) 276 inode->i_ino == F2FS_META_INO(sbi))
276 goto no_delete; 277 goto out_clear;
277 278
278 f2fs_bug_on(get_dirty_dents(inode)); 279 f2fs_bug_on(get_dirty_dents(inode));
279 remove_dirty_dir_inode(inode); 280 remove_dirty_dir_inode(inode);
@@ -295,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)
295 296
296 sb_end_intwrite(inode->i_sb); 297 sb_end_intwrite(inode->i_sb);
297no_delete: 298no_delete:
298 clear_inode(inode);
299 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 299 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
300 if (xnid)
301 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
302 if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
303 add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
304 if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
305 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
306out_clear:
307 clear_inode(inode);
300} 308}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a6bdddc33ce2..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dcache.h>
16 17
17#include "f2fs.h" 18#include "f2fs.h"
18#include "node.h" 19#include "node.h"
@@ -22,14 +23,13 @@
22 23
23static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
24{ 25{
25 struct super_block *sb = dir->i_sb; 26 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
26 struct f2fs_sb_info *sbi = F2FS_SB(sb);
27 nid_t ino; 27 nid_t ino;
28 struct inode *inode; 28 struct inode *inode;
29 bool nid_free = false; 29 bool nid_free = false;
30 int err; 30 int err;
31 31
32 inode = new_inode(sb); 32 inode = new_inode(dir->i_sb);
33 if (!inode) 33 if (!inode)
34 return ERR_PTR(-ENOMEM); 34 return ERR_PTR(-ENOMEM);
35 35
@@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
103 bool excl) 103 bool excl)
104{ 104{
105 struct super_block *sb = dir->i_sb; 105 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
106 struct f2fs_sb_info *sbi = F2FS_SB(sb);
107 struct inode *inode; 106 struct inode *inode;
108 nid_t ino = 0; 107 nid_t ino = 0;
109 int err; 108 int err;
@@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
146 struct dentry *dentry) 145 struct dentry *dentry)
147{ 146{
148 struct inode *inode = old_dentry->d_inode; 147 struct inode *inode = old_dentry->d_inode;
149 struct super_block *sb = dir->i_sb; 148 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
150 struct f2fs_sb_info *sbi = F2FS_SB(sb);
151 int err; 149 int err;
152 150
153 f2fs_balance_fs(sbi); 151 f2fs_balance_fs(sbi);
@@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
207 205
208static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 206static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
209{ 207{
210 struct super_block *sb = dir->i_sb; 208 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
211 struct f2fs_sb_info *sbi = F2FS_SB(sb);
212 struct inode *inode = dentry->d_inode; 209 struct inode *inode = dentry->d_inode;
213 struct f2fs_dir_entry *de; 210 struct f2fs_dir_entry *de;
214 struct page *page; 211 struct page *page;
@@ -242,8 +239,7 @@ fail:
242static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 239static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
243 const char *symname) 240 const char *symname)
244{ 241{
245 struct super_block *sb = dir->i_sb; 242 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
246 struct f2fs_sb_info *sbi = F2FS_SB(sb);
247 struct inode *inode; 243 struct inode *inode;
248 size_t symlen = strlen(symname) + 1; 244 size_t symlen = strlen(symname) + 1;
249 int err; 245 int err;
@@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
330static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 326static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
331 umode_t mode, dev_t rdev) 327 umode_t mode, dev_t rdev)
332{ 328{
333 struct super_block *sb = dir->i_sb; 329 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
334 struct f2fs_sb_info *sbi = F2FS_SB(sb);
335 struct inode *inode; 330 struct inode *inode;
336 int err = 0; 331 int err = 0;
337 332
@@ -369,8 +364,7 @@ out:
369static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 364static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
370 struct inode *new_dir, struct dentry *new_dentry) 365 struct inode *new_dir, struct dentry *new_dentry)
371{ 366{
372 struct super_block *sb = old_dir->i_sb; 367 struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
373 struct f2fs_sb_info *sbi = F2FS_SB(sb);
374 struct inode *old_inode = old_dentry->d_inode; 368 struct inode *old_inode = old_dentry->d_inode;
375 struct inode *new_inode = new_dentry->d_inode; 369 struct inode *new_inode = new_dentry->d_inode;
376 struct page *old_dir_page; 370 struct page *old_dir_page;
@@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
393 goto out_old; 387 goto out_old;
394 } 388 }
395 389
396 f2fs_lock_op(sbi);
397
398 if (new_inode) { 390 if (new_inode) {
399 391
400 err = -ENOTEMPTY; 392 err = -ENOTEMPTY;
@@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
407 if (!new_entry) 399 if (!new_entry)
408 goto out_dir; 400 goto out_dir;
409 401
402 f2fs_lock_op(sbi);
403
410 err = acquire_orphan_inode(sbi); 404 err = acquire_orphan_inode(sbi);
411 if (err) 405 if (err)
412 goto put_out_dir; 406 goto put_out_dir;
@@ -435,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
435 update_inode_page(old_inode); 429 update_inode_page(old_inode);
436 update_inode_page(new_inode); 430 update_inode_page(new_inode);
437 } else { 431 } else {
432 f2fs_lock_op(sbi);
433
438 err = f2fs_add_link(new_dentry, old_inode); 434 err = f2fs_add_link(new_dentry, old_inode);
439 if (err) 435 if (err) {
436 f2fs_unlock_op(sbi);
440 goto out_dir; 437 goto out_dir;
438 }
441 439
442 if (old_dir_entry) { 440 if (old_dir_entry) {
443 inc_nlink(new_dir); 441 inc_nlink(new_dir);
@@ -472,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
472 return 0; 470 return 0;
473 471
474put_out_dir: 472put_out_dir:
473 f2fs_unlock_op(sbi);
475 kunmap(new_page); 474 kunmap(new_page);
476 f2fs_put_page(new_page, 0); 475 f2fs_put_page(new_page, 0);
477out_dir: 476out_dir:
@@ -479,7 +478,151 @@ out_dir:
479 kunmap(old_dir_page); 478 kunmap(old_dir_page);
480 f2fs_put_page(old_dir_page, 0); 479 f2fs_put_page(old_dir_page, 0);
481 } 480 }
481out_old:
482 kunmap(old_page);
483 f2fs_put_page(old_page, 0);
484out:
485 return err;
486}
487
488static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
489 struct inode *new_dir, struct dentry *new_dentry)
490{
491 struct super_block *sb = old_dir->i_sb;
492 struct f2fs_sb_info *sbi = F2FS_SB(sb);
493 struct inode *old_inode = old_dentry->d_inode;
494 struct inode *new_inode = new_dentry->d_inode;
495 struct page *old_dir_page, *new_dir_page;
496 struct page *old_page, *new_page;
497 struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
498 struct f2fs_dir_entry *old_entry, *new_entry;
499 int old_nlink = 0, new_nlink = 0;
500 int err = -ENOENT;
501
502 f2fs_balance_fs(sbi);
503
504 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
505 if (!old_entry)
506 goto out;
507
508 new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
509 if (!new_entry)
510 goto out_old;
511
512 /* prepare for updating ".." directory entry info later */
513 if (old_dir != new_dir) {
514 if (S_ISDIR(old_inode->i_mode)) {
515 err = -EIO;
516 old_dir_entry = f2fs_parent_dir(old_inode,
517 &old_dir_page);
518 if (!old_dir_entry)
519 goto out_new;
520 }
521
522 if (S_ISDIR(new_inode->i_mode)) {
523 err = -EIO;
524 new_dir_entry = f2fs_parent_dir(new_inode,
525 &new_dir_page);
526 if (!new_dir_entry)
527 goto out_old_dir;
528 }
529 }
530
531 /*
532 * If cross rename between file and directory those are not
533 * in the same directory, we will inc nlink of file's parent
534 * later, so we should check upper boundary of its nlink.
535 */
536 if ((!old_dir_entry || !new_dir_entry) &&
537 old_dir_entry != new_dir_entry) {
538 old_nlink = old_dir_entry ? -1 : 1;
539 new_nlink = -old_nlink;
540 err = -EMLINK;
541 if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
542 (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
543 goto out_new_dir;
544 }
545
546 f2fs_lock_op(sbi);
547
548 err = update_dent_inode(old_inode, &new_dentry->d_name);
549 if (err)
550 goto out_unlock;
551
552 err = update_dent_inode(new_inode, &old_dentry->d_name);
553 if (err)
554 goto out_undo;
555
556 /* update ".." directory entry info of old dentry */
557 if (old_dir_entry)
558 f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
559
560 /* update ".." directory entry info of new dentry */
561 if (new_dir_entry)
562 f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
563
564 /* update directory entry info of old dir inode */
565 f2fs_set_link(old_dir, old_entry, old_page, new_inode);
566
567 down_write(&F2FS_I(old_inode)->i_sem);
568 file_lost_pino(old_inode);
569 up_write(&F2FS_I(old_inode)->i_sem);
570
571 update_inode_page(old_inode);
572
573 old_dir->i_ctime = CURRENT_TIME;
574 if (old_nlink) {
575 down_write(&F2FS_I(old_dir)->i_sem);
576 if (old_nlink < 0)
577 drop_nlink(old_dir);
578 else
579 inc_nlink(old_dir);
580 up_write(&F2FS_I(old_dir)->i_sem);
581 }
582 mark_inode_dirty(old_dir);
583 update_inode_page(old_dir);
584
585 /* update directory entry info of new dir inode */
586 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
587
588 down_write(&F2FS_I(new_inode)->i_sem);
589 file_lost_pino(new_inode);
590 up_write(&F2FS_I(new_inode)->i_sem);
591
592 update_inode_page(new_inode);
593
594 new_dir->i_ctime = CURRENT_TIME;
595 if (new_nlink) {
596 down_write(&F2FS_I(new_dir)->i_sem);
597 if (new_nlink < 0)
598 drop_nlink(new_dir);
599 else
600 inc_nlink(new_dir);
601 up_write(&F2FS_I(new_dir)->i_sem);
602 }
603 mark_inode_dirty(new_dir);
604 update_inode_page(new_dir);
605
606 f2fs_unlock_op(sbi);
607 return 0;
608out_undo:
609 /* Still we may fail to recover name info of f2fs_inode here */
610 update_dent_inode(old_inode, &old_dentry->d_name);
611out_unlock:
482 f2fs_unlock_op(sbi); 612 f2fs_unlock_op(sbi);
613out_new_dir:
614 if (new_dir_entry) {
615 kunmap(new_dir_page);
616 f2fs_put_page(new_dir_page, 0);
617 }
618out_old_dir:
619 if (old_dir_entry) {
620 kunmap(old_dir_page);
621 f2fs_put_page(old_dir_page, 0);
622 }
623out_new:
624 kunmap(new_page);
625 f2fs_put_page(new_page, 0);
483out_old: 626out_old:
484 kunmap(old_page); 627 kunmap(old_page);
485 f2fs_put_page(old_page, 0); 628 f2fs_put_page(old_page, 0);
@@ -487,6 +630,71 @@ out:
487 return err; 630 return err;
488} 631}
489 632
633static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
634 struct inode *new_dir, struct dentry *new_dentry,
635 unsigned int flags)
636{
637 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
638 return -EINVAL;
639
640 if (flags & RENAME_EXCHANGE) {
641 return f2fs_cross_rename(old_dir, old_dentry,
642 new_dir, new_dentry);
643 }
644 /*
645 * VFS has already handled the new dentry existence case,
646 * here, we just deal with "RENAME_NOREPLACE" as regular rename.
647 */
648 return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
649}
650
651static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
652{
653 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
654 struct inode *inode;
655 int err;
656
657 inode = f2fs_new_inode(dir, mode);
658 if (IS_ERR(inode))
659 return PTR_ERR(inode);
660
661 inode->i_op = &f2fs_file_inode_operations;
662 inode->i_fop = &f2fs_file_operations;
663 inode->i_mapping->a_ops = &f2fs_dblock_aops;
664
665 f2fs_lock_op(sbi);
666 err = acquire_orphan_inode(sbi);
667 if (err)
668 goto out;
669
670 err = f2fs_do_tmpfile(inode, dir);
671 if (err)
672 goto release_out;
673
674 /*
675 * add this non-linked tmpfile to orphan list, in this way we could
676 * remove all unused data of tmpfile after abnormal power-off.
677 */
678 add_orphan_inode(sbi, inode->i_ino);
679 f2fs_unlock_op(sbi);
680
681 alloc_nid_done(sbi, inode->i_ino);
682 d_tmpfile(dentry, inode);
683 unlock_new_inode(inode);
684 return 0;
685
686release_out:
687 release_orphan_inode(sbi);
688out:
689 f2fs_unlock_op(sbi);
690 clear_nlink(inode);
691 unlock_new_inode(inode);
692 make_bad_inode(inode);
693 iput(inode);
694 alloc_nid_failed(sbi, inode->i_ino);
695 return err;
696}
697
490const struct inode_operations f2fs_dir_inode_operations = { 698const struct inode_operations f2fs_dir_inode_operations = {
491 .create = f2fs_create, 699 .create = f2fs_create,
492 .lookup = f2fs_lookup, 700 .lookup = f2fs_lookup,
@@ -497,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {
497 .rmdir = f2fs_rmdir, 705 .rmdir = f2fs_rmdir,
498 .mknod = f2fs_mknod, 706 .mknod = f2fs_mknod,
499 .rename = f2fs_rename, 707 .rename = f2fs_rename,
708 .rename2 = f2fs_rename2,
709 .tmpfile = f2fs_tmpfile,
500 .getattr = f2fs_getattr, 710 .getattr = f2fs_getattr,
501 .setattr = f2fs_setattr, 711 .setattr = f2fs_setattr,
502 .get_acl = f2fs_get_acl, 712 .get_acl = f2fs_get_acl,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4b697ccc9b0c..d3d90d284631 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
25 25
26static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
27static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
28static struct kmem_cache *nat_entry_set_slab;
28 29
29bool available_free_memory(struct f2fs_sb_info *sbi, int type) 30bool available_free_memory(struct f2fs_sb_info *sbi, int type)
30{ 31{
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
90 91
91 /* get current nat block page with lock */ 92 /* get current nat block page with lock */
92 src_page = get_meta_page(sbi, src_off); 93 src_page = get_meta_page(sbi, src_off);
93
94 /* Dirty src_page means that it is already the new target NAT page. */
95 if (PageDirty(src_page))
96 return src_page;
97
98 dst_page = grab_meta_page(sbi, dst_off); 94 dst_page = grab_meta_page(sbi, dst_off);
95 f2fs_bug_on(PageDirty(src_page));
99 96
100 src_addr = page_address(src_page); 97 src_addr = page_address(src_page);
101 dst_addr = page_address(dst_page); 98 dst_addr = page_address(dst_page);
@@ -845,7 +842,7 @@ void remove_inode_page(struct inode *inode)
845 truncate_node(&dn); 842 truncate_node(&dn);
846} 843}
847 844
848struct page *new_inode_page(struct inode *inode, const struct qstr *name) 845struct page *new_inode_page(struct inode *inode)
849{ 846{
850 struct dnode_of_data dn; 847 struct dnode_of_data dn;
851 848
@@ -1234,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
1234 if (wbc->for_reclaim) 1231 if (wbc->for_reclaim)
1235 goto redirty_out; 1232 goto redirty_out;
1236 1233
1237 mutex_lock(&sbi->node_write); 1234 down_read(&sbi->node_write);
1238 set_page_writeback(page); 1235 set_page_writeback(page);
1239 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1236 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1240 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); 1237 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1241 dec_page_count(sbi, F2FS_DIRTY_NODES); 1238 dec_page_count(sbi, F2FS_DIRTY_NODES);
1242 mutex_unlock(&sbi->node_write); 1239 up_read(&sbi->node_write);
1243 unlock_page(page); 1240 unlock_page(page);
1244 return 0; 1241 return 0;
1245 1242
@@ -1552,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1552 clear_node_page_dirty(page); 1549 clear_node_page_dirty(page);
1553} 1550}
1554 1551
1555static void recover_inline_xattr(struct inode *inode, struct page *page) 1552void recover_inline_xattr(struct inode *inode, struct page *page)
1556{ 1553{
1557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1558 void *src_addr, *dst_addr; 1555 void *src_addr, *dst_addr;
@@ -1591,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1591 nid_t new_xnid = nid_of_node(page); 1588 nid_t new_xnid = nid_of_node(page);
1592 struct node_info ni; 1589 struct node_info ni;
1593 1590
1594 recover_inline_xattr(inode, page);
1595
1596 if (!f2fs_has_xattr_block(ofs_of_node(page))) 1591 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1597 return false; 1592 return false;
1598 1593
@@ -1744,7 +1739,90 @@ skip:
1744 return err; 1739 return err;
1745} 1740}
1746 1741
1747static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1742static struct nat_entry_set *grab_nat_entry_set(void)
1743{
1744 struct nat_entry_set *nes =
1745 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1746
1747 nes->entry_cnt = 0;
1748 INIT_LIST_HEAD(&nes->set_list);
1749 INIT_LIST_HEAD(&nes->entry_list);
1750 return nes;
1751}
1752
1753static void release_nat_entry_set(struct nat_entry_set *nes,
1754 struct f2fs_nm_info *nm_i)
1755{
1756 f2fs_bug_on(!list_empty(&nes->entry_list));
1757
1758 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1759 list_del(&nes->set_list);
1760 kmem_cache_free(nat_entry_set_slab, nes);
1761}
1762
1763static void adjust_nat_entry_set(struct nat_entry_set *nes,
1764 struct list_head *head)
1765{
1766 struct nat_entry_set *next = nes;
1767
1768 if (list_is_last(&nes->set_list, head))
1769 return;
1770
1771 list_for_each_entry_continue(next, head, set_list)
1772 if (nes->entry_cnt <= next->entry_cnt)
1773 break;
1774
1775 list_move_tail(&nes->set_list, &next->set_list);
1776}
1777
1778static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1779{
1780 struct nat_entry_set *nes;
1781 nid_t start_nid = START_NID(ne->ni.nid);
1782
1783 list_for_each_entry(nes, head, set_list) {
1784 if (nes->start_nid == start_nid) {
1785 list_move_tail(&ne->list, &nes->entry_list);
1786 nes->entry_cnt++;
1787 adjust_nat_entry_set(nes, head);
1788 return;
1789 }
1790 }
1791
1792 nes = grab_nat_entry_set();
1793
1794 nes->start_nid = start_nid;
1795 list_move_tail(&ne->list, &nes->entry_list);
1796 nes->entry_cnt++;
1797 list_add(&nes->set_list, head);
1798}
1799
1800static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1801{
1802 struct f2fs_nm_info *nm_i = NM_I(sbi);
1803 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1804 struct list_head *set_list = &nm_i->nat_entry_set;
1805 struct nat_entry *ne, *tmp;
1806
1807 write_lock(&nm_i->nat_tree_lock);
1808 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1809 if (nat_get_blkaddr(ne) == NEW_ADDR)
1810 continue;
1811 add_nat_entry(ne, set_list);
1812 nm_i->dirty_nat_cnt++;
1813 }
1814 write_unlock(&nm_i->nat_tree_lock);
1815}
1816
1817static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1818{
1819 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1820 return true;
1821 else
1822 return false;
1823}
1824
1825static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1748{ 1826{
1749 struct f2fs_nm_info *nm_i = NM_I(sbi); 1827 struct f2fs_nm_info *nm_i = NM_I(sbi);
1750 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1828 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1752 int i; 1830 int i;
1753 1831
1754 mutex_lock(&curseg->curseg_mutex); 1832 mutex_lock(&curseg->curseg_mutex);
1755
1756 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1757 mutex_unlock(&curseg->curseg_mutex);
1758 return false;
1759 }
1760
1761 for (i = 0; i < nats_in_cursum(sum); i++) { 1833 for (i = 0; i < nats_in_cursum(sum); i++) {
1762 struct nat_entry *ne; 1834 struct nat_entry *ne;
1763 struct f2fs_nat_entry raw_ne; 1835 struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1767retry: 1839retry:
1768 write_lock(&nm_i->nat_tree_lock); 1840 write_lock(&nm_i->nat_tree_lock);
1769 ne = __lookup_nat_cache(nm_i, nid); 1841 ne = __lookup_nat_cache(nm_i, nid);
1770 if (ne) { 1842 if (ne)
1771 __set_nat_cache_dirty(nm_i, ne); 1843 goto found;
1772 write_unlock(&nm_i->nat_tree_lock); 1844
1773 continue;
1774 }
1775 ne = grab_nat_entry(nm_i, nid); 1845 ne = grab_nat_entry(nm_i, nid);
1776 if (!ne) { 1846 if (!ne) {
1777 write_unlock(&nm_i->nat_tree_lock); 1847 write_unlock(&nm_i->nat_tree_lock);
1778 goto retry; 1848 goto retry;
1779 } 1849 }
1780 node_info_from_raw_nat(&ne->ni, &raw_ne); 1850 node_info_from_raw_nat(&ne->ni, &raw_ne);
1851found:
1781 __set_nat_cache_dirty(nm_i, ne); 1852 __set_nat_cache_dirty(nm_i, ne);
1782 write_unlock(&nm_i->nat_tree_lock); 1853 write_unlock(&nm_i->nat_tree_lock);
1783 } 1854 }
1784 update_nats_in_cursum(sum, -i); 1855 update_nats_in_cursum(sum, -i);
1785 mutex_unlock(&curseg->curseg_mutex); 1856 mutex_unlock(&curseg->curseg_mutex);
1786 return true;
1787} 1857}
1788 1858
1789/* 1859/*
@@ -1794,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1794 struct f2fs_nm_info *nm_i = NM_I(sbi); 1864 struct f2fs_nm_info *nm_i = NM_I(sbi);
1795 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1865 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1796 struct f2fs_summary_block *sum = curseg->sum_blk; 1866 struct f2fs_summary_block *sum = curseg->sum_blk;
1797 struct nat_entry *ne, *cur; 1867 struct nat_entry_set *nes, *tmp;
1798 struct page *page = NULL; 1868 struct list_head *head = &nm_i->nat_entry_set;
1799 struct f2fs_nat_block *nat_blk = NULL; 1869 bool to_journal = true;
1800 nid_t start_nid = 0, end_nid = 0;
1801 bool flushed;
1802 1870
1803 flushed = flush_nats_in_journal(sbi); 1871 /* merge nat entries of dirty list to nat entry set temporarily */
1804 1872 merge_nats_in_set(sbi);
1805 if (!flushed)
1806 mutex_lock(&curseg->curseg_mutex);
1807
1808 /* 1) flush dirty nat caches */
1809 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1810 nid_t nid;
1811 struct f2fs_nat_entry raw_ne;
1812 int offset = -1;
1813
1814 if (nat_get_blkaddr(ne) == NEW_ADDR)
1815 continue;
1816 1873
1817 nid = nat_get_nid(ne); 1874 /*
1875 * if there are no enough space in journal to store dirty nat
1876 * entries, remove all entries from journal and merge them
1877 * into nat entry set.
1878 */
1879 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
1880 remove_nats_in_journal(sbi);
1818 1881
1819 if (flushed) 1882 /*
1820 goto to_nat_page; 1883 * merge nat entries of dirty list to nat entry set temporarily
1884 */
1885 merge_nats_in_set(sbi);
1886 }
1821 1887
1822 /* if there is room for nat enries in curseg->sumpage */ 1888 if (!nm_i->dirty_nat_cnt)
1823 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1889 return;
1824 if (offset >= 0) {
1825 raw_ne = nat_in_journal(sum, offset);
1826 goto flush_now;
1827 }
1828to_nat_page:
1829 if (!page || (start_nid > nid || nid > end_nid)) {
1830 if (page) {
1831 f2fs_put_page(page, 1);
1832 page = NULL;
1833 }
1834 start_nid = START_NID(nid);
1835 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1836 1890
1837 /* 1891 /*
1838 * get nat block with dirty flag, increased reference 1892 * there are two steps to flush nat entries:
1839 * count, mapped and lock 1893 * #1, flush nat entries to journal in current hot data summary block.
1840 */ 1894 * #2, flush nat entries to nat page.
1895 */
1896 list_for_each_entry_safe(nes, tmp, head, set_list) {
1897 struct f2fs_nat_block *nat_blk;
1898 struct nat_entry *ne, *cur;
1899 struct page *page;
1900 nid_t start_nid = nes->start_nid;
1901
1902 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
1903 to_journal = false;
1904
1905 if (to_journal) {
1906 mutex_lock(&curseg->curseg_mutex);
1907 } else {
1841 page = get_next_nat_page(sbi, start_nid); 1908 page = get_next_nat_page(sbi, start_nid);
1842 nat_blk = page_address(page); 1909 nat_blk = page_address(page);
1910 f2fs_bug_on(!nat_blk);
1843 } 1911 }
1844 1912
1845 f2fs_bug_on(!nat_blk); 1913 /* flush dirty nats in nat entry set */
1846 raw_ne = nat_blk->entries[nid - start_nid]; 1914 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
1847flush_now: 1915 struct f2fs_nat_entry *raw_ne;
1848 raw_nat_from_node_info(&raw_ne, &ne->ni); 1916 nid_t nid = nat_get_nid(ne);
1849 1917 int offset;
1850 if (offset < 0) { 1918
1851 nat_blk->entries[nid - start_nid] = raw_ne; 1919 if (to_journal) {
1852 } else { 1920 offset = lookup_journal_in_cursum(sum,
1853 nat_in_journal(sum, offset) = raw_ne; 1921 NAT_JOURNAL, nid, 1);
1854 nid_in_journal(sum, offset) = cpu_to_le32(nid); 1922 f2fs_bug_on(offset < 0);
1855 } 1923 raw_ne = &nat_in_journal(sum, offset);
1924 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1925 } else {
1926 raw_ne = &nat_blk->entries[nid - start_nid];
1927 }
1928 raw_nat_from_node_info(raw_ne, &ne->ni);
1856 1929
1857 if (nat_get_blkaddr(ne) == NULL_ADDR && 1930 if (nat_get_blkaddr(ne) == NULL_ADDR &&
1858 add_free_nid(sbi, nid, false) <= 0) { 1931 add_free_nid(sbi, nid, false) <= 0) {
1859 write_lock(&nm_i->nat_tree_lock); 1932 write_lock(&nm_i->nat_tree_lock);
1860 __del_from_nat_cache(nm_i, ne); 1933 __del_from_nat_cache(nm_i, ne);
1861 write_unlock(&nm_i->nat_tree_lock); 1934 write_unlock(&nm_i->nat_tree_lock);
1862 } else { 1935 } else {
1863 write_lock(&nm_i->nat_tree_lock); 1936 write_lock(&nm_i->nat_tree_lock);
1864 __clear_nat_cache_dirty(nm_i, ne); 1937 __clear_nat_cache_dirty(nm_i, ne);
1865 write_unlock(&nm_i->nat_tree_lock); 1938 write_unlock(&nm_i->nat_tree_lock);
1939 }
1866 } 1940 }
1941
1942 if (to_journal)
1943 mutex_unlock(&curseg->curseg_mutex);
1944 else
1945 f2fs_put_page(page, 1);
1946
1947 release_nat_entry_set(nes, nm_i);
1867 } 1948 }
1868 if (!flushed) 1949
1869 mutex_unlock(&curseg->curseg_mutex); 1950 f2fs_bug_on(!list_empty(head));
1870 f2fs_put_page(page, 1); 1951 f2fs_bug_on(nm_i->dirty_nat_cnt);
1871} 1952}
1872 1953
1873static int init_node_manager(struct f2fs_sb_info *sbi) 1954static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1896 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1977 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1897 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 INIT_LIST_HEAD(&nm_i->nat_entries);
1898 INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1979 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1980 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1899 1981
1900 mutex_init(&nm_i->build_lock); 1982 mutex_init(&nm_i->build_lock);
1901 spin_lock_init(&nm_i->free_nid_list_lock); 1983 spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2058,30 @@ int __init create_node_manager_caches(void)
1976 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2058 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1977 sizeof(struct nat_entry)); 2059 sizeof(struct nat_entry));
1978 if (!nat_entry_slab) 2060 if (!nat_entry_slab)
1979 return -ENOMEM; 2061 goto fail;
1980 2062
1981 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2063 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1982 sizeof(struct free_nid)); 2064 sizeof(struct free_nid));
1983 if (!free_nid_slab) { 2065 if (!free_nid_slab)
1984 kmem_cache_destroy(nat_entry_slab); 2066 goto destory_nat_entry;
1985 return -ENOMEM; 2067
1986 } 2068 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2069 sizeof(struct nat_entry_set));
2070 if (!nat_entry_set_slab)
2071 goto destory_free_nid;
1987 return 0; 2072 return 0;
2073
2074destory_free_nid:
2075 kmem_cache_destroy(free_nid_slab);
2076destory_nat_entry:
2077 kmem_cache_destroy(nat_entry_slab);
2078fail:
2079 return -ENOMEM;
1988} 2080}
1989 2081
1990void destroy_node_manager_caches(void) 2082void destroy_node_manager_caches(void)
1991{ 2083{
2084 kmem_cache_destroy(nat_entry_set_slab);
1992 kmem_cache_destroy(free_nid_slab); 2085 kmem_cache_destroy(free_nid_slab);
1993 kmem_cache_destroy(nat_entry_slab); 2086 kmem_cache_destroy(nat_entry_slab);
1994} 2087}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
89 DIRTY_DENTS /* indicates dirty dentry pages */ 89 DIRTY_DENTS /* indicates dirty dentry pages */
90}; 90};
91 91
92struct nat_entry_set {
93 struct list_head set_list; /* link with all nat sets */
94 struct list_head entry_list; /* link with dirty nat entries */
95 nid_t start_nid; /* start nid of nats in set */
96 unsigned int entry_cnt; /* the # of nat entries in set */
97};
98
92/* 99/*
93 * For free nid mangement 100 * For free nid mangement
94 */ 101 */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368a4a86..fe1c6d921ba2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
300 struct node_info ni; 300 struct node_info ni;
301 int err = 0, recovered = 0; 301 int err = 0, recovered = 0;
302 302
303 recover_inline_xattr(inode, page);
304
303 if (recover_inline_data(inode, page)) 305 if (recover_inline_data(inode, page))
304 goto out; 306 goto out;
305 307
@@ -434,7 +436,9 @@ next:
434 436
435int recover_fsync_data(struct f2fs_sb_info *sbi) 437int recover_fsync_data(struct f2fs_sb_info *sbi)
436{ 438{
439 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
437 struct list_head inode_list; 440 struct list_head inode_list;
441 block_t blkaddr;
438 int err; 442 int err;
439 bool need_writecp = false; 443 bool need_writecp = false;
440 444
@@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
447 451
448 /* step #1: find fsynced inode numbers */ 452 /* step #1: find fsynced inode numbers */
449 sbi->por_doing = true; 453 sbi->por_doing = true;
454
455 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
456
450 err = find_fsync_dnodes(sbi, &inode_list); 457 err = find_fsync_dnodes(sbi, &inode_list);
451 if (err) 458 if (err)
452 goto out; 459 goto out;
@@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
462out: 469out:
463 destroy_fsync_dnodes(&inode_list); 470 destroy_fsync_dnodes(&inode_list);
464 kmem_cache_destroy(fsync_entry_slab); 471 kmem_cache_destroy(fsync_entry_slab);
472
473 if (err) {
474 truncate_inode_pages_final(NODE_MAPPING(sbi));
475 truncate_inode_pages_final(META_MAPPING(sbi));
476 }
477
465 sbi->por_doing = false; 478 sbi->por_doing = false;
466 if (!err && need_writecp) 479 if (err) {
480 discard_next_dnode(sbi, blkaddr);
481
482 /* Flush all the NAT/SIT pages */
483 while (get_pages(sbi, F2FS_DIRTY_META))
484 sync_meta_pages(sbi, META, LONG_MAX);
485 } else if (need_writecp) {
467 write_checkpoint(sbi, false); 486 write_checkpoint(sbi, false);
487 }
468 return err; 488 return err;
469} 489}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d04613df710a..0dfeebae2a50 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
240 struct flush_cmd cmd; 240 struct flush_cmd cmd;
241 241
242 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
243 test_opt(sbi, FLUSH_MERGE));
244
245 if (test_opt(sbi, NOBARRIER))
246 return 0;
247
242 if (!test_opt(sbi, FLUSH_MERGE)) 248 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244 250
@@ -272,13 +278,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
272 return -ENOMEM; 278 return -ENOMEM;
273 spin_lock_init(&fcc->issue_lock); 279 spin_lock_init(&fcc->issue_lock);
274 init_waitqueue_head(&fcc->flush_wait_queue); 280 init_waitqueue_head(&fcc->flush_wait_queue);
275 sbi->sm_info->cmd_control_info = fcc; 281 SM_I(sbi)->cmd_control_info = fcc;
276 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 282 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
277 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 283 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
278 if (IS_ERR(fcc->f2fs_issue_flush)) { 284 if (IS_ERR(fcc->f2fs_issue_flush)) {
279 err = PTR_ERR(fcc->f2fs_issue_flush); 285 err = PTR_ERR(fcc->f2fs_issue_flush);
280 kfree(fcc); 286 kfree(fcc);
281 sbi->sm_info->cmd_control_info = NULL; 287 SM_I(sbi)->cmd_control_info = NULL;
282 return err; 288 return err;
283 } 289 }
284 290
@@ -287,13 +293,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
287 293
288void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) 294void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
289{ 295{
290 struct flush_cmd_control *fcc = 296 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
291 sbi->sm_info->cmd_control_info;
292 297
293 if (fcc && fcc->f2fs_issue_flush) 298 if (fcc && fcc->f2fs_issue_flush)
294 kthread_stop(fcc->f2fs_issue_flush); 299 kthread_stop(fcc->f2fs_issue_flush);
295 kfree(fcc); 300 kfree(fcc);
296 sbi->sm_info->cmd_control_info = NULL; 301 SM_I(sbi)->cmd_control_info = NULL;
297} 302}
298 303
299static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 304static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -377,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
377 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 382 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
378} 383}
379 384
380void discard_next_dnode(struct f2fs_sb_info *sbi) 385void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
381{ 386{
382 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
383 block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
384
385 if (f2fs_issue_discard(sbi, blkaddr, 1)) { 387 if (f2fs_issue_discard(sbi, blkaddr, 1)) {
386 struct page *page = grab_meta_page(sbi, blkaddr); 388 struct page *page = grab_meta_page(sbi, blkaddr);
387 /* zero-filled page */ 389 /* zero-filled page */
@@ -437,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
437static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 439static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
438{ 440{
439 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 441 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
440 unsigned int segno = -1; 442 unsigned int segno;
441 unsigned int total_segs = TOTAL_SEGS(sbi); 443 unsigned int total_segs = TOTAL_SEGS(sbi);
442 444
443 mutex_lock(&dirty_i->seglist_lock); 445 mutex_lock(&dirty_i->seglist_lock);
444 while (1) { 446 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
445 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
446 segno + 1);
447 if (segno >= total_segs)
448 break;
449 __set_test_and_free(sbi, segno); 447 __set_test_and_free(sbi, segno);
450 }
451 mutex_unlock(&dirty_i->seglist_lock); 448 mutex_unlock(&dirty_i->seglist_lock);
452} 449}
453 450
@@ -974,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
974{ 971{
975 struct sit_info *sit_i = SIT_I(sbi); 972 struct sit_info *sit_i = SIT_I(sbi);
976 struct curseg_info *curseg; 973 struct curseg_info *curseg;
977 unsigned int old_cursegno;
978 974
979 curseg = CURSEG_I(sbi, type); 975 curseg = CURSEG_I(sbi, type);
980 976
981 mutex_lock(&curseg->curseg_mutex); 977 mutex_lock(&curseg->curseg_mutex);
982 978
983 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 979 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
984 old_cursegno = curseg->segno;
985 980
986 /* 981 /*
987 * __add_sum_entry should be resided under the curseg_mutex 982 * __add_sum_entry should be resided under the curseg_mutex
@@ -1002,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1002 * since SSR needs latest valid block information. 997 * since SSR needs latest valid block information.
1003 */ 998 */
1004 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 999 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1005 locate_dirty_segment(sbi, old_cursegno);
1006 1000
1007 mutex_unlock(&sit_i->sentry_lock); 1001 mutex_unlock(&sit_i->sentry_lock);
1008 1002
@@ -1532,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1532 struct page *page = NULL; 1526 struct page *page = NULL;
1533 struct f2fs_sit_block *raw_sit = NULL; 1527 struct f2fs_sit_block *raw_sit = NULL;
1534 unsigned int start = 0, end = 0; 1528 unsigned int start = 0, end = 0;
1535 unsigned int segno = -1; 1529 unsigned int segno;
1536 bool flushed; 1530 bool flushed;
1537 1531
1538 mutex_lock(&curseg->curseg_mutex); 1532 mutex_lock(&curseg->curseg_mutex);
@@ -1544,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1544 */ 1538 */
1545 flushed = flush_sits_in_journal(sbi); 1539 flushed = flush_sits_in_journal(sbi);
1546 1540
1547 while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { 1541 for_each_set_bit(segno, bitmap, nsegs) {
1548 struct seg_entry *se = get_seg_entry(sbi, segno); 1542 struct seg_entry *se = get_seg_entry(sbi, segno);
1549 int sit_offset, offset; 1543 int sit_offset, offset;
1550 1544
@@ -1703,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1703 struct curseg_info *array; 1697 struct curseg_info *array;
1704 int i; 1698 int i;
1705 1699
1706 array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); 1700 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1707 if (!array) 1701 if (!array)
1708 return -ENOMEM; 1702 return -ENOMEM;
1709 1703
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..55973f7b0330 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
347 if (test_and_clear_bit(segno, free_i->free_segmap)) { 347 if (test_and_clear_bit(segno, free_i->free_segmap)) {
348 free_i->free_segments++; 348 free_i->free_segments++;
349 349
350 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), 350 next = find_next_bit(free_i->free_segmap,
351 start_segno); 351 start_segno + sbi->segs_per_sec, start_segno);
352 if (next >= start_segno + sbi->segs_per_sec) { 352 if (next >= start_segno + sbi->segs_per_sec) {
353 if (test_and_clear_bit(secno, free_i->free_secmap)) 353 if (test_and_clear_bit(secno, free_i->free_secmap))
354 free_i->free_sections++; 354 free_i->free_sections++;
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
486 if (S_ISDIR(inode->i_mode)) 486 if (S_ISDIR(inode->i_mode))
487 return false; 487 return false;
488 488
489 /* this is only set during fdatasync */
490 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
491 return true;
492
489 switch (SM_I(sbi)->ipu_policy) { 493 switch (SM_I(sbi)->ipu_policy) {
490 case F2FS_IPU_FORCE: 494 case F2FS_IPU_FORCE:
491 return true; 495 return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f96d9372ade..657582fc7601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data, 53 Opt_inline_data,
54 Opt_flush_merge, 54 Opt_flush_merge,
55 Opt_nobarrier,
55 Opt_err, 56 Opt_err,
56}; 57};
57 58
@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
69 {Opt_inline_xattr, "inline_xattr"}, 70 {Opt_inline_xattr, "inline_xattr"},
70 {Opt_inline_data, "inline_data"}, 71 {Opt_inline_data, "inline_data"},
71 {Opt_flush_merge, "flush_merge"}, 72 {Opt_flush_merge, "flush_merge"},
73 {Opt_nobarrier, "nobarrier"},
72 {Opt_err, NULL}, 74 {Opt_err, NULL},
73}; 75};
74 76
@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
339 case Opt_flush_merge: 341 case Opt_flush_merge:
340 set_opt(sbi, FLUSH_MERGE); 342 set_opt(sbi, FLUSH_MERGE);
341 break; 343 break;
344 case Opt_nobarrier:
345 set_opt(sbi, NOBARRIER);
346 break;
342 default: 347 default:
343 f2fs_msg(sb, KERN_ERR, 348 f2fs_msg(sb, KERN_ERR,
344 "Unrecognized mount option \"%s\" or missing value", 349 "Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
544 seq_puts(seq, ",inline_data"); 549 seq_puts(seq, ",inline_data");
545 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 550 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge"); 551 seq_puts(seq, ",flush_merge");
552 if (test_opt(sbi, NOBARRIER))
553 seq_puts(seq, ",nobarrier");
547 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 554 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
548 555
549 return 0; 556 return 0;
@@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
615 * Previous and new state of filesystem is RO, 622 * Previous and new state of filesystem is RO,
616 * so skip checking GC and FLUSH_MERGE conditions. 623 * so skip checking GC and FLUSH_MERGE conditions.
617 */ 624 */
618 if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 625 if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
619 goto skip; 626 goto skip;
620 627
621 /* 628 /*
@@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
642 */ 649 */
643 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { 650 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
644 destroy_flush_cmd_control(sbi); 651 destroy_flush_cmd_control(sbi);
645 } else if (test_opt(sbi, FLUSH_MERGE) && 652 } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
646 !sbi->sm_info->cmd_control_info) {
647 err = create_flush_cmd_control(sbi); 653 err = create_flush_cmd_control(sbi);
648 if (err) 654 if (err)
649 goto restore_gc; 655 goto restore_gc;
@@ -947,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
947 mutex_init(&sbi->gc_mutex); 953 mutex_init(&sbi->gc_mutex);
948 mutex_init(&sbi->writepages); 954 mutex_init(&sbi->writepages);
949 mutex_init(&sbi->cp_mutex); 955 mutex_init(&sbi->cp_mutex);
950 mutex_init(&sbi->node_write); 956 init_rwsem(&sbi->node_write);
951 sbi->por_doing = false; 957 sbi->por_doing = false;
952 spin_lock_init(&sbi->stat_lock); 958 spin_lock_init(&sbi->stat_lock);
953 959
@@ -997,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
997 INIT_LIST_HEAD(&sbi->dir_inode_list); 1003 INIT_LIST_HEAD(&sbi->dir_inode_list);
998 spin_lock_init(&sbi->dir_inode_lock); 1004 spin_lock_init(&sbi->dir_inode_lock);
999 1005
1000 init_orphan_info(sbi); 1006 init_ino_entry_info(sbi);
1001 1007
1002 /* setup f2fs internal modules */ 1008 /* setup f2fs internal modules */
1003 err = build_segment_manager(sbi); 1009 err = build_segment_manager(sbi);
@@ -1034,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1034 goto free_node_inode; 1040 goto free_node_inode;
1035 } 1041 }
1036 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1042 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1043 iput(root);
1037 err = -EINVAL; 1044 err = -EINVAL;
1038 goto free_root_inode; 1045 goto free_node_inode;
1039 } 1046 }
1040 1047
1041 sb->s_root = d_make_root(root); /* allocate root dentry */ 1048 sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -1082,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1082 * If filesystem is not mounted as read-only then 1089 * If filesystem is not mounted as read-only then
1083 * do start the gc_thread. 1090 * do start the gc_thread.
1084 */ 1091 */
1085 if (!(sb->s_flags & MS_RDONLY)) { 1092 if (!f2fs_readonly(sb)) {
1086 /* After POR, we can run background GC thread.*/ 1093 /* After POR, we can run background GC thread.*/
1087 err = start_gc_thread(sbi); 1094 err = start_gc_thread(sbi);
1088 if (err) 1095 if (err)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
21#include <linux/rcupdate.h> 21#include <linux/rcupdate.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <linux/user_namespace.h> 23#include <linux/user_namespace.h>
24#include <linux/shmem_fs.h>
24 25
25#include <asm/poll.h> 26#include <asm/poll.h>
26#include <asm/siginfo.h> 27#include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
336 case F_GETPIPE_SZ: 337 case F_GETPIPE_SZ:
337 err = pipe_fcntl(filp, cmd, arg); 338 err = pipe_fcntl(filp, cmd, arg);
338 break; 339 break;
340 case F_ADD_SEALS:
341 case F_GET_SEALS:
342 err = shmem_fcntl(filp, cmd, arg);
343 break;
339 default: 344 default:
340 break; 345 break;
341 } 346 }
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
new file mode 100644
index 000000000000..9368236ca100
--- /dev/null
+++ b/fs/fs_pin.c
@@ -0,0 +1,78 @@
1#include <linux/fs.h>
2#include <linux/slab.h>
3#include <linux/fs_pin.h>
4#include "internal.h"
5#include "mount.h"
6
7static void pin_free_rcu(struct rcu_head *head)
8{
9 kfree(container_of(head, struct fs_pin, rcu));
10}
11
12static DEFINE_SPINLOCK(pin_lock);
13
14void pin_put(struct fs_pin *p)
15{
16 if (atomic_long_dec_and_test(&p->count))
17 call_rcu(&p->rcu, pin_free_rcu);
18}
19
20void pin_remove(struct fs_pin *pin)
21{
22 spin_lock(&pin_lock);
23 hlist_del(&pin->m_list);
24 hlist_del(&pin->s_list);
25 spin_unlock(&pin_lock);
26}
27
28void pin_insert(struct fs_pin *pin, struct vfsmount *m)
29{
30 spin_lock(&pin_lock);
31 hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
32 hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
33 spin_unlock(&pin_lock);
34}
35
36void mnt_pin_kill(struct mount *m)
37{
38 while (1) {
39 struct hlist_node *p;
40 struct fs_pin *pin;
41 rcu_read_lock();
42 p = ACCESS_ONCE(m->mnt_pins.first);
43 if (!p) {
44 rcu_read_unlock();
45 break;
46 }
47 pin = hlist_entry(p, struct fs_pin, m_list);
48 if (!atomic_long_inc_not_zero(&pin->count)) {
49 rcu_read_unlock();
50 cpu_relax();
51 continue;
52 }
53 rcu_read_unlock();
54 pin->kill(pin);
55 }
56}
57
58void sb_pin_kill(struct super_block *sb)
59{
60 while (1) {
61 struct hlist_node *p;
62 struct fs_pin *pin;
63 rcu_read_lock();
64 p = ACCESS_ONCE(sb->s_pins.first);
65 if (!p) {
66 rcu_read_unlock();
67 break;
68 }
69 pin = hlist_entry(p, struct fs_pin, s_list);
70 if (!atomic_long_inc_not_zero(&pin->count)) {
71 rcu_read_unlock();
72 cpu_relax();
73 continue;
74 }
75 rcu_read_unlock();
76 pin->kill(pin);
77 }
78}
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index a31b83c5cbd9..b39d487ccfb0 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -67,7 +67,7 @@ static int fscache_max_active_sysctl(struct ctl_table *table, int write,
67 return ret; 67 return ret;
68} 68}
69 69
70struct ctl_table fscache_sysctls[] = { 70static struct ctl_table fscache_sysctls[] = {
71 { 71 {
72 .procname = "object_max_active", 72 .procname = "object_max_active",
73 .data = &fscache_object_max_active, 73 .data = &fscache_object_max_active,
@@ -87,7 +87,7 @@ struct ctl_table fscache_sysctls[] = {
87 {} 87 {}
88}; 88};
89 89
90struct ctl_table fscache_sysctls_root[] = { 90static struct ctl_table fscache_sysctls_root[] = {
91 { 91 {
92 .procname = "fscache", 92 .procname = "fscache",
93 .mode = 0555, 93 .mode = 0555,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0c6048247a34..de1d84af9f7c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -845,12 +845,6 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
845 return err; 845 return err;
846} 846}
847 847
848static int fuse_rename(struct inode *olddir, struct dentry *oldent,
849 struct inode *newdir, struct dentry *newent)
850{
851 return fuse_rename2(olddir, oldent, newdir, newent, 0);
852}
853
854static int fuse_link(struct dentry *entry, struct inode *newdir, 848static int fuse_link(struct dentry *entry, struct inode *newdir,
855 struct dentry *newent) 849 struct dentry *newent)
856{ 850{
@@ -2024,7 +2018,6 @@ static const struct inode_operations fuse_dir_inode_operations = {
2024 .symlink = fuse_symlink, 2018 .symlink = fuse_symlink,
2025 .unlink = fuse_unlink, 2019 .unlink = fuse_unlink,
2026 .rmdir = fuse_rmdir, 2020 .rmdir = fuse_rmdir,
2027 .rename = fuse_rename,
2028 .rename2 = fuse_rename2, 2021 .rename2 = fuse_rename2,
2029 .link = fuse_link, 2022 .link = fuse_link,
2030 .setattr = fuse_setattr, 2023 .setattr = fuse_setattr,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 40ac2628ddcf..912061ac4baf 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1303,10 +1303,10 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1303 while (nbytes < *nbytesp && req->num_pages < req->max_pages) { 1303 while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1304 unsigned npages; 1304 unsigned npages;
1305 size_t start; 1305 size_t start;
1306 unsigned n = req->max_pages - req->num_pages;
1307 ssize_t ret = iov_iter_get_pages(ii, 1306 ssize_t ret = iov_iter_get_pages(ii,
1308 &req->pages[req->num_pages], 1307 &req->pages[req->num_pages],
1309 n * PAGE_SIZE, &start); 1308 req->max_pages - req->num_pages,
1309 &start);
1310 if (ret < 0) 1310 if (ret < 0)
1311 return ret; 1311 return ret;
1312 1312
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 9c88da0e855a..4fcd40d6f308 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -89,6 +89,7 @@ extern int do_mknod(const char *file, int mode, unsigned int major,
89extern int link_file(const char *from, const char *to); 89extern int link_file(const char *from, const char *to);
90extern int hostfs_do_readlink(char *file, char *buf, int size); 90extern int hostfs_do_readlink(char *file, char *buf, int size);
91extern int rename_file(char *from, char *to); 91extern int rename_file(char *from, char *to);
92extern int rename2_file(char *from, char *to, unsigned int flags);
92extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, 93extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
93 long long *bfree_out, long long *bavail_out, 94 long long *bfree_out, long long *bavail_out,
94 long long *files_out, long long *ffree_out, 95 long long *files_out, long long *ffree_out,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index bb529f3b7f2b..fd62cae0fdcb 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -741,21 +741,31 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
741 return err; 741 return err;
742} 742}
743 743
744static int hostfs_rename(struct inode *from_ino, struct dentry *from, 744static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
745 struct inode *to_ino, struct dentry *to) 745 struct inode *new_dir, struct dentry *new_dentry,
746 unsigned int flags)
746{ 747{
747 char *from_name, *to_name; 748 char *old_name, *new_name;
748 int err; 749 int err;
749 750
750 if ((from_name = dentry_name(from)) == NULL) 751 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
752 return -EINVAL;
753
754 old_name = dentry_name(old_dentry);
755 if (old_name == NULL)
751 return -ENOMEM; 756 return -ENOMEM;
752 if ((to_name = dentry_name(to)) == NULL) { 757 new_name = dentry_name(new_dentry);
753 __putname(from_name); 758 if (new_name == NULL) {
759 __putname(old_name);
754 return -ENOMEM; 760 return -ENOMEM;
755 } 761 }
756 err = rename_file(from_name, to_name); 762 if (!flags)
757 __putname(from_name); 763 err = rename_file(old_name, new_name);
758 __putname(to_name); 764 else
765 err = rename2_file(old_name, new_name, flags);
766
767 __putname(old_name);
768 __putname(new_name);
759 return err; 769 return err;
760} 770}
761 771
@@ -867,7 +877,7 @@ static const struct inode_operations hostfs_dir_iops = {
867 .mkdir = hostfs_mkdir, 877 .mkdir = hostfs_mkdir,
868 .rmdir = hostfs_rmdir, 878 .rmdir = hostfs_rmdir,
869 .mknod = hostfs_mknod, 879 .mknod = hostfs_mknod,
870 .rename = hostfs_rename, 880 .rename2 = hostfs_rename2,
871 .permission = hostfs_permission, 881 .permission = hostfs_permission,
872 .setattr = hostfs_setattr, 882 .setattr = hostfs_setattr,
873}; 883};
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 67838f3aa20a..9765dab95cbd 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -14,6 +14,7 @@
14#include <sys/time.h> 14#include <sys/time.h>
15#include <sys/types.h> 15#include <sys/types.h>
16#include <sys/vfs.h> 16#include <sys/vfs.h>
17#include <sys/syscall.h>
17#include "hostfs.h" 18#include "hostfs.h"
18#include <utime.h> 19#include <utime.h>
19 20
@@ -360,6 +361,33 @@ int rename_file(char *from, char *to)
360 return 0; 361 return 0;
361} 362}
362 363
364int rename2_file(char *from, char *to, unsigned int flags)
365{
366 int err;
367
368#ifndef SYS_renameat2
369# ifdef __x86_64__
370# define SYS_renameat2 316
371# endif
372# ifdef __i386__
373# define SYS_renameat2 353
374# endif
375#endif
376
377#ifdef SYS_renameat2
378 err = syscall(SYS_renameat2, AT_FDCWD, from, AT_FDCWD, to, flags);
379 if (err < 0) {
380 if (errno != ENOSYS)
381 return -errno;
382 else
383 return -EINVAL;
384 }
385 return 0;
386#else
387 return -EINVAL;
388#endif
389}
390
363int do_statfs(char *root, long *bsize_out, long long *blocks_out, 391int do_statfs(char *root, long *bsize_out, long long *blocks_out,
364 long long *bfree_out, long long *bavail_out, 392 long long *bfree_out, long long *bavail_out,
365 long long *files_out, long long *ffree_out, 393 long long *files_out, long long *ffree_out,
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index f36fc010fccb..2923a7bd82ac 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -545,12 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
545 struct dnode *d1; 545 struct dnode *d1;
546 struct quad_buffer_head qbh1; 546 struct quad_buffer_head qbh1;
547 if (hpfs_sb(i->i_sb)->sb_chk) 547 if (hpfs_sb(i->i_sb)->sb_chk)
548 if (up != i->i_ino) { 548 if (up != i->i_ino) {
549 hpfs_error(i->i_sb, 549 hpfs_error(i->i_sb,
550 "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx", 550 "bad pointer to fnode, dnode %08x, pointing to %08x, should be %08lx",
551 dno, up, (unsigned long)i->i_ino); 551 dno, up,
552 return; 552 (unsigned long)i->i_ino);
553 } 553 return;
554 }
554 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { 555 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
555 d1->up = cpu_to_le32(up); 556 d1->up = cpu_to_le32(up);
556 d1->root_dnode = 1; 557 d1->root_dnode = 1;
@@ -1061,8 +1062,8 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1061 hpfs_brelse4(qbh); 1062 hpfs_brelse4(qbh);
1062 if (hpfs_sb(s)->sb_chk) 1063 if (hpfs_sb(s)->sb_chk)
1063 if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) { 1064 if (hpfs_stop_cycles(s, dno, &c1, &c2, "map_fnode_dirent #1")) {
1064 kfree(name2); 1065 kfree(name2);
1065 return NULL; 1066 return NULL;
1066 } 1067 }
1067 goto go_down; 1068 goto go_down;
1068 } 1069 }
diff --git a/fs/inode.c b/fs/inode.c
index 5938f3928944..26753ba7b6d6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
165 mapping->a_ops = &empty_aops; 165 mapping->a_ops = &empty_aops;
166 mapping->host = inode; 166 mapping->host = inode;
167 mapping->flags = 0; 167 mapping->flags = 0;
168 atomic_set(&mapping->i_mmap_writable, 0);
168 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE); 169 mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
169 mapping->private_data = NULL; 170 mapping->private_data = NULL;
170 mapping->backing_dev_info = &default_backing_dev_info; 171 mapping->backing_dev_info = &default_backing_dev_info;
diff --git a/fs/internal.h b/fs/internal.h
index 465742407466..e325b4f9c799 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -131,7 +131,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
131/* 131/*
132 * read_write.c 132 * read_write.c
133 */ 133 */
134extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
135extern int rw_verify_area(int, struct file *, const loff_t *, size_t); 134extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
136 135
137/* 136/*
@@ -144,3 +143,9 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
144 * pipe.c 143 * pipe.c
145 */ 144 */
146extern const struct file_operations pipefifo_fops; 145extern const struct file_operations pipefifo_fops;
146
147/*
148 * fs_pin.c
149 */
150extern void sb_pin_kill(struct super_block *sb);
151extern void mnt_pin_kill(struct mount *m);
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 592e5115a561..f311bf084015 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -158,8 +158,8 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
158 "zisofs: zisofs_inflate returned" 158 "zisofs: zisofs_inflate returned"
159 " %d, inode = %lu," 159 " %d, inode = %lu,"
160 " page idx = %d, bh idx = %d," 160 " page idx = %d, bh idx = %d,"
161 " avail_in = %d," 161 " avail_in = %ld,"
162 " avail_out = %d\n", 162 " avail_out = %ld\n",
163 zerr, inode->i_ino, curpage, 163 zerr, inode->i_ino, curpage,
164 curbh, stream.avail_in, 164 curbh, stream.avail_in,
165 stream.avail_out); 165 stream.avail_out);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 009ec0b5993d..2f7a3c090489 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -202,8 +202,7 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
202 } else { 202 } else {
203 acl = ERR_PTR(rc); 203 acl = ERR_PTR(rc);
204 } 204 }
205 if (value) 205 kfree(value);
206 kfree(value);
207 if (!IS_ERR(acl)) 206 if (!IS_ERR(acl))
208 set_cached_acl(inode, type, acl); 207 set_cached_acl(inode, type, acl);
209 return acl; 208 return acl;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 0b9a1e44e833..5698dae5d92d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -94,11 +94,12 @@ static int jffs2_zlib_compress(unsigned char *data_in,
94 94
95 while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) { 95 while (def_strm.total_out < *dstlen - STREAM_END_SPACE && def_strm.total_in < *sourcelen) {
96 def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE); 96 def_strm.avail_out = *dstlen - (def_strm.total_out + STREAM_END_SPACE);
97 def_strm.avail_in = min((unsigned)(*sourcelen-def_strm.total_in), def_strm.avail_out); 97 def_strm.avail_in = min_t(unsigned long,
98 jffs2_dbg(1, "calling deflate with avail_in %d, avail_out %d\n", 98 (*sourcelen-def_strm.total_in), def_strm.avail_out);
99 jffs2_dbg(1, "calling deflate with avail_in %ld, avail_out %ld\n",
99 def_strm.avail_in, def_strm.avail_out); 100 def_strm.avail_in, def_strm.avail_out);
100 ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH); 101 ret = zlib_deflate(&def_strm, Z_PARTIAL_FLUSH);
101 jffs2_dbg(1, "deflate returned with avail_in %d, avail_out %d, total_in %ld, total_out %ld\n", 102 jffs2_dbg(1, "deflate returned with avail_in %ld, avail_out %ld, total_in %ld, total_out %ld\n",
102 def_strm.avail_in, def_strm.avail_out, 103 def_strm.avail_in, def_strm.avail_out,
103 def_strm.total_in, def_strm.total_out); 104 def_strm.total_in, def_strm.total_out);
104 if (ret != Z_OK) { 105 if (ret != Z_OK) {
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index ad0f2e2a1700..d72817ac51f6 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -756,8 +756,7 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
756 for (i=0; i < XATTRINDEX_HASHSIZE; i++) { 756 for (i=0; i < XATTRINDEX_HASHSIZE; i++) {
757 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) { 757 list_for_each_entry_safe(xd, _xd, &c->xattrindex[i], xindex) {
758 list_del(&xd->xindex); 758 list_del(&xd->xindex);
759 if (xd->xname) 759 kfree(xd->xname);
760 kfree(xd->xname);
761 jffs2_free_xattr_datum(xd); 760 jffs2_free_xattr_datum(xd);
762 } 761 }
763 } 762 }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index d895b4b7b661..4429d6d9217f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -896,7 +896,7 @@ const struct file_operations kernfs_file_fops = {
896 * @ops: kernfs operations for the file 896 * @ops: kernfs operations for the file
897 * @priv: private data for the file 897 * @priv: private data for the file
898 * @ns: optional namespace tag of the file 898 * @ns: optional namespace tag of the file
899 * @static_name: don't copy file name 899 * @name_is_static: don't copy file name
900 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 900 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
901 * 901 *
902 * Returns the created node on success, ERR_PTR() value on error. 902 * Returns the created node on success, ERR_PTR() value on error.
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..daa8e7514eae 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -306,11 +306,9 @@ static struct nsm_handle *nsm_lookup_priv(const struct nsm_private *priv)
306static void nsm_init_private(struct nsm_handle *nsm) 306static void nsm_init_private(struct nsm_handle *nsm)
307{ 307{
308 u64 *p = (u64 *)&nsm->sm_priv.data; 308 u64 *p = (u64 *)&nsm->sm_priv.data;
309 struct timespec ts;
310 s64 ns; 309 s64 ns;
311 310
312 ktime_get_ts(&ts); 311 ns = ktime_get_ns();
313 ns = timespec_to_ns(&ts);
314 put_unaligned(ns, p); 312 put_unaligned(ns, p);
315 put_unaligned((unsigned long)nsm, p + 1); 313 put_unaligned((unsigned long)nsm, p + 1);
316} 314}
diff --git a/fs/locks.c b/fs/locks.c
index a6f54802d277..cb66fb05ad4a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -247,6 +247,18 @@ void locks_free_lock(struct file_lock *fl)
247} 247}
248EXPORT_SYMBOL(locks_free_lock); 248EXPORT_SYMBOL(locks_free_lock);
249 249
250static void
251locks_dispose_list(struct list_head *dispose)
252{
253 struct file_lock *fl;
254
255 while (!list_empty(dispose)) {
256 fl = list_first_entry(dispose, struct file_lock, fl_block);
257 list_del_init(&fl->fl_block);
258 locks_free_lock(fl);
259 }
260}
261
250void locks_init_lock(struct file_lock *fl) 262void locks_init_lock(struct file_lock *fl)
251{ 263{
252 memset(fl, 0, sizeof(struct file_lock)); 264 memset(fl, 0, sizeof(struct file_lock));
@@ -285,7 +297,8 @@ EXPORT_SYMBOL(__locks_copy_lock);
285 297
286void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 298void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
287{ 299{
288 locks_release_private(new); 300 /* "new" must be a freshly-initialized lock */
301 WARN_ON_ONCE(new->fl_ops);
289 302
290 __locks_copy_lock(new, fl); 303 __locks_copy_lock(new, fl);
291 new->fl_file = fl->fl_file; 304 new->fl_file = fl->fl_file;
@@ -650,12 +663,16 @@ static void locks_unlink_lock(struct file_lock **thisfl_p)
650 * 663 *
651 * Must be called with i_lock held! 664 * Must be called with i_lock held!
652 */ 665 */
653static void locks_delete_lock(struct file_lock **thisfl_p) 666static void locks_delete_lock(struct file_lock **thisfl_p,
667 struct list_head *dispose)
654{ 668{
655 struct file_lock *fl = *thisfl_p; 669 struct file_lock *fl = *thisfl_p;
656 670
657 locks_unlink_lock(thisfl_p); 671 locks_unlink_lock(thisfl_p);
658 locks_free_lock(fl); 672 if (dispose)
673 list_add(&fl->fl_block, dispose);
674 else
675 locks_free_lock(fl);
659} 676}
660 677
661/* Determine if lock sys_fl blocks lock caller_fl. Common functionality 678/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
@@ -811,6 +828,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
811 struct inode * inode = file_inode(filp); 828 struct inode * inode = file_inode(filp);
812 int error = 0; 829 int error = 0;
813 int found = 0; 830 int found = 0;
831 LIST_HEAD(dispose);
814 832
815 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 833 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
816 new_fl = locks_alloc_lock(); 834 new_fl = locks_alloc_lock();
@@ -833,7 +851,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
833 if (request->fl_type == fl->fl_type) 851 if (request->fl_type == fl->fl_type)
834 goto out; 852 goto out;
835 found = 1; 853 found = 1;
836 locks_delete_lock(before); 854 locks_delete_lock(before, &dispose);
837 break; 855 break;
838 } 856 }
839 857
@@ -880,6 +898,7 @@ out:
880 spin_unlock(&inode->i_lock); 898 spin_unlock(&inode->i_lock);
881 if (new_fl) 899 if (new_fl)
882 locks_free_lock(new_fl); 900 locks_free_lock(new_fl);
901 locks_dispose_list(&dispose);
883 return error; 902 return error;
884} 903}
885 904
@@ -893,6 +912,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
893 struct file_lock **before; 912 struct file_lock **before;
894 int error; 913 int error;
895 bool added = false; 914 bool added = false;
915 LIST_HEAD(dispose);
896 916
897 /* 917 /*
898 * We may need two file_lock structures for this operation, 918 * We may need two file_lock structures for this operation,
@@ -988,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
988 else 1008 else
989 request->fl_end = fl->fl_end; 1009 request->fl_end = fl->fl_end;
990 if (added) { 1010 if (added) {
991 locks_delete_lock(before); 1011 locks_delete_lock(before, &dispose);
992 continue; 1012 continue;
993 } 1013 }
994 request = fl; 1014 request = fl;
@@ -1018,21 +1038,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1018 * one (This may happen several times). 1038 * one (This may happen several times).
1019 */ 1039 */
1020 if (added) { 1040 if (added) {
1021 locks_delete_lock(before); 1041 locks_delete_lock(before, &dispose);
1022 continue; 1042 continue;
1023 } 1043 }
1024 /* Replace the old lock with the new one. 1044 /*
1025 * Wake up anybody waiting for the old one, 1045 * Replace the old lock with new_fl, and
1026 * as the change in lock type might satisfy 1046 * remove the old one. It's safe to do the
1027 * their needs. 1047 * insert here since we know that we won't be
1048 * using new_fl later, and that the lock is
1049 * just replacing an existing lock.
1028 */ 1050 */
1029 locks_wake_up_blocks(fl); 1051 error = -ENOLCK;
1030 fl->fl_start = request->fl_start; 1052 if (!new_fl)
1031 fl->fl_end = request->fl_end; 1053 goto out;
1032 fl->fl_type = request->fl_type; 1054 locks_copy_lock(new_fl, request);
1033 locks_release_private(fl); 1055 request = new_fl;
1034 locks_copy_private(fl, request); 1056 new_fl = NULL;
1035 request = fl; 1057 locks_delete_lock(before, &dispose);
1058 locks_insert_lock(before, request);
1036 added = true; 1059 added = true;
1037 } 1060 }
1038 } 1061 }
@@ -1093,6 +1116,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1093 locks_free_lock(new_fl); 1116 locks_free_lock(new_fl);
1094 if (new_fl2) 1117 if (new_fl2)
1095 locks_free_lock(new_fl2); 1118 locks_free_lock(new_fl2);
1119 locks_dispose_list(&dispose);
1096 return error; 1120 return error;
1097} 1121}
1098 1122
@@ -1268,7 +1292,7 @@ int lease_modify(struct file_lock **before, int arg)
1268 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); 1292 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1269 fl->fl_fasync = NULL; 1293 fl->fl_fasync = NULL;
1270 } 1294 }
1271 locks_delete_lock(before); 1295 locks_delete_lock(before, NULL);
1272 } 1296 }
1273 return 0; 1297 return 0;
1274} 1298}
@@ -1737,13 +1761,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1737 ret = fl; 1761 ret = fl;
1738 spin_lock(&inode->i_lock); 1762 spin_lock(&inode->i_lock);
1739 error = __vfs_setlease(filp, arg, &ret); 1763 error = __vfs_setlease(filp, arg, &ret);
1740 if (error) { 1764 if (error)
1741 spin_unlock(&inode->i_lock); 1765 goto out_unlock;
1742 locks_free_lock(fl); 1766 if (ret == fl)
1743 goto out_free_fasync; 1767 fl = NULL;
1744 }
1745 if (ret != fl)
1746 locks_free_lock(fl);
1747 1768
1748 /* 1769 /*
1749 * fasync_insert_entry() returns the old entry if any. 1770 * fasync_insert_entry() returns the old entry if any.
@@ -1755,9 +1776,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1755 new = NULL; 1776 new = NULL;
1756 1777
1757 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 1778 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1779out_unlock:
1758 spin_unlock(&inode->i_lock); 1780 spin_unlock(&inode->i_lock);
1759 1781 if (fl)
1760out_free_fasync: 1782 locks_free_lock(fl);
1761 if (new) 1783 if (new)
1762 fasync_free(new); 1784 fasync_free(new);
1763 return error; 1785 return error;
@@ -2320,6 +2342,7 @@ void locks_remove_file(struct file *filp)
2320 struct inode * inode = file_inode(filp); 2342 struct inode * inode = file_inode(filp);
2321 struct file_lock *fl; 2343 struct file_lock *fl;
2322 struct file_lock **before; 2344 struct file_lock **before;
2345 LIST_HEAD(dispose);
2323 2346
2324 if (!inode->i_flock) 2347 if (!inode->i_flock)
2325 return; 2348 return;
@@ -2365,12 +2388,13 @@ void locks_remove_file(struct file *filp)
2365 fl->fl_type, fl->fl_flags, 2388 fl->fl_type, fl->fl_flags,
2366 fl->fl_start, fl->fl_end); 2389 fl->fl_start, fl->fl_end);
2367 2390
2368 locks_delete_lock(before); 2391 locks_delete_lock(before, &dispose);
2369 continue; 2392 continue;
2370 } 2393 }
2371 before = &fl->fl_next; 2394 before = &fl->fl_next;
2372 } 2395 }
2373 spin_unlock(&inode->i_lock); 2396 spin_unlock(&inode->i_lock);
2397 locks_dispose_list(&dispose);
2374} 2398}
2375 2399
2376/** 2400/**
@@ -2452,7 +2476,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2452 seq_puts(f, "FLOCK ADVISORY "); 2476 seq_puts(f, "FLOCK ADVISORY ");
2453 } 2477 }
2454 } else if (IS_LEASE(fl)) { 2478 } else if (IS_LEASE(fl)) {
2455 seq_puts(f, "LEASE "); 2479 if (fl->fl_flags & FL_DELEG)
2480 seq_puts(f, "DELEG ");
2481 else
2482 seq_puts(f, "LEASE ");
2483
2456 if (lease_breaking(fl)) 2484 if (lease_breaking(fl))
2457 seq_puts(f, "BREAKING "); 2485 seq_puts(f, "BREAKING ");
2458 else if (fl->fl_file) 2486 else if (fl->fl_file)
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 48140315f627..380d86e1ab45 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1019,11 +1019,11 @@ static int __logfs_is_valid_block(struct inode *inode, u64 bix, u64 ofs)
1019/** 1019/**
1020 * logfs_is_valid_block - check whether this block is still valid 1020 * logfs_is_valid_block - check whether this block is still valid
1021 * 1021 *
1022 * @sb - superblock 1022 * @sb: superblock
1023 * @ofs - block physical offset 1023 * @ofs: block physical offset
1024 * @ino - block inode number 1024 * @ino: block inode number
1025 * @bix - block index 1025 * @bix: block index
1026 * @level - block level 1026 * @gc_level: block level
1027 * 1027 *
1028 * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will 1028 * Returns 0 if the block is invalid, 1 if it is valid and 2 if it will
1029 * become invalid once the journal is written. 1029 * become invalid once the journal is written.
@@ -2226,10 +2226,9 @@ void btree_write_block(struct logfs_block *block)
2226 * 2226 *
2227 * @inode: parent inode (ifile or directory) 2227 * @inode: parent inode (ifile or directory)
2228 * @buf: object to write (inode or dentry) 2228 * @buf: object to write (inode or dentry)
2229 * @n: object size 2229 * @count: object size
2230 * @_pos: object number (file position in blocks/objects) 2230 * @bix: block index
2231 * @flags: write flags 2231 * @flags: write flags
2232 * @lock: 0 if write lock is already taken, 1 otherwise
2233 * @shadow_tree: shadow below this inode 2232 * @shadow_tree: shadow below this inode
2234 * 2233 *
2235 * FIXME: All caller of this put a 200-300 byte variable on the stack, 2234 * FIXME: All caller of this put a 200-300 byte variable on the stack,
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 4bc50dac8e97..742942a983be 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -96,7 +96,7 @@ int minix_new_block(struct inode * inode)
96unsigned long minix_count_free_blocks(struct super_block *sb) 96unsigned long minix_count_free_blocks(struct super_block *sb)
97{ 97{
98 struct minix_sb_info *sbi = minix_sb(sb); 98 struct minix_sb_info *sbi = minix_sb(sb);
99 u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1); 99 u32 bits = sbi->s_nzones - sbi->s_firstdatazone + 1;
100 100
101 return (count_free(sbi->s_zmap, sb->s_blocksize, bits) 101 return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
102 << sbi->s_log_zone_size); 102 << sbi->s_log_zone_size);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f007a3355570..3f57af196a7d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -267,12 +267,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
267 block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize); 267 block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
268 if (sbi->s_imap_blocks < block) { 268 if (sbi->s_imap_blocks < block) {
269 printk("MINIX-fs: file system does not have enough " 269 printk("MINIX-fs: file system does not have enough "
270 "imap blocks allocated. Refusing to mount\n"); 270 "imap blocks allocated. Refusing to mount.\n");
271 goto out_no_bitmap; 271 goto out_no_bitmap;
272 } 272 }
273 273
274 block = minix_blocks_needed( 274 block = minix_blocks_needed(
275 (sbi->s_nzones - (sbi->s_firstdatazone + 1)), 275 (sbi->s_nzones - sbi->s_firstdatazone + 1),
276 s->s_blocksize); 276 s->s_blocksize);
277 if (sbi->s_zmap_blocks < block) { 277 if (sbi->s_zmap_blocks < block) {
278 printk("MINIX-fs: file system does not have enough " 278 printk("MINIX-fs: file system does not have enough "
diff --git a/fs/mount.h b/fs/mount.h
index d55297f2fa05..6740a6215529 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -55,7 +55,7 @@ struct mount {
55 int mnt_id; /* mount identifier */ 55 int mnt_id; /* mount identifier */
56 int mnt_group_id; /* peer group identifier */ 56 int mnt_group_id; /* peer group identifier */
57 int mnt_expiry_mark; /* true if marked for expiry */ 57 int mnt_expiry_mark; /* true if marked for expiry */
58 int mnt_pinned; 58 struct hlist_head mnt_pins;
59 struct path mnt_ex_mountpoint; 59 struct path mnt_ex_mountpoint;
60}; 60};
61 61
diff --git a/fs/namei.c b/fs/namei.c
index 9eb787e5c167..a996bb48dfab 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1091,10 +1091,10 @@ int follow_down_one(struct path *path)
1091} 1091}
1092EXPORT_SYMBOL(follow_down_one); 1092EXPORT_SYMBOL(follow_down_one);
1093 1093
1094static inline bool managed_dentry_might_block(struct dentry *dentry) 1094static inline int managed_dentry_rcu(struct dentry *dentry)
1095{ 1095{
1096 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && 1096 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
1097 dentry->d_op->d_manage(dentry, true) < 0); 1097 dentry->d_op->d_manage(dentry, true) : 0;
1098} 1098}
1099 1099
1100/* 1100/*
@@ -1110,11 +1110,18 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1110 * Don't forget we might have a non-mountpoint managed dentry 1110 * Don't forget we might have a non-mountpoint managed dentry
1111 * that wants to block transit. 1111 * that wants to block transit.
1112 */ 1112 */
1113 if (unlikely(managed_dentry_might_block(path->dentry))) 1113 switch (managed_dentry_rcu(path->dentry)) {
1114 case -ECHILD:
1115 default:
1114 return false; 1116 return false;
1117 case -EISDIR:
1118 return true;
1119 case 0:
1120 break;
1121 }
1115 1122
1116 if (!d_mountpoint(path->dentry)) 1123 if (!d_mountpoint(path->dentry))
1117 return true; 1124 return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
1118 1125
1119 mounted = __lookup_mnt(path->mnt, path->dentry); 1126 mounted = __lookup_mnt(path->mnt, path->dentry);
1120 if (!mounted) 1127 if (!mounted)
@@ -1130,7 +1137,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1130 */ 1137 */
1131 *inode = path->dentry->d_inode; 1138 *inode = path->dentry->d_inode;
1132 } 1139 }
1133 return read_seqretry(&mount_lock, nd->m_seq); 1140 return read_seqretry(&mount_lock, nd->m_seq) &&
1141 !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
1134} 1142}
1135 1143
1136static int follow_dotdot_rcu(struct nameidata *nd) 1144static int follow_dotdot_rcu(struct nameidata *nd)
@@ -1402,11 +1410,8 @@ static int lookup_fast(struct nameidata *nd,
1402 } 1410 }
1403 path->mnt = mnt; 1411 path->mnt = mnt;
1404 path->dentry = dentry; 1412 path->dentry = dentry;
1405 if (unlikely(!__follow_mount_rcu(nd, path, inode))) 1413 if (likely(__follow_mount_rcu(nd, path, inode)))
1406 goto unlazy; 1414 return 0;
1407 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
1408 goto unlazy;
1409 return 0;
1410unlazy: 1415unlazy:
1411 if (unlazy_walk(nd, dentry)) 1416 if (unlazy_walk(nd, dentry))
1412 return -ECHILD; 1417 return -ECHILD;
@@ -4019,7 +4024,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
4019 * The worst of all namespace operations - renaming directory. "Perverted" 4024 * The worst of all namespace operations - renaming directory. "Perverted"
4020 * doesn't even start to describe it. Somebody in UCB had a heck of a trip... 4025 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
4021 * Problems: 4026 * Problems:
4022 * a) we can get into loop creation. Check is done in is_subdir(). 4027 * a) we can get into loop creation.
4023 * b) race potential - two innocent renames can create a loop together. 4028 * b) race potential - two innocent renames can create a loop together.
4024 * That's where 4.4 screws up. Current fix: serialization on 4029 * That's where 4.4 screws up. Current fix: serialization on
4025 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another 4030 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
@@ -4075,7 +4080,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4075 if (error) 4080 if (error)
4076 return error; 4081 return error;
4077 4082
4078 if (!old_dir->i_op->rename) 4083 if (!old_dir->i_op->rename && !old_dir->i_op->rename2)
4079 return -EPERM; 4084 return -EPERM;
4080 4085
4081 if (flags && !old_dir->i_op->rename2) 4086 if (flags && !old_dir->i_op->rename2)
@@ -4134,10 +4139,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4134 if (error) 4139 if (error)
4135 goto out; 4140 goto out;
4136 } 4141 }
4137 if (!flags) { 4142 if (!old_dir->i_op->rename2) {
4138 error = old_dir->i_op->rename(old_dir, old_dentry, 4143 error = old_dir->i_op->rename(old_dir, old_dentry,
4139 new_dir, new_dentry); 4144 new_dir, new_dentry);
4140 } else { 4145 } else {
4146 WARN_ON(old_dir->i_op->rename != NULL);
4141 error = old_dir->i_op->rename2(old_dir, old_dentry, 4147 error = old_dir->i_op->rename2(old_dir, old_dentry,
4142 new_dir, new_dentry, flags); 4148 new_dir, new_dentry, flags);
4143 } 4149 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41cd887..a01c7730e9af 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -16,7 +16,6 @@
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/security.h> 17#include <linux/security.h>
18#include <linux/idr.h> 18#include <linux/idr.h>
19#include <linux/acct.h> /* acct_auto_close_mnt */
20#include <linux/init.h> /* init_rootfs */ 19#include <linux/init.h> /* init_rootfs */
21#include <linux/fs_struct.h> /* get_fs_root et.al. */ 20#include <linux/fs_struct.h> /* get_fs_root et.al. */
22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 21#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
@@ -779,6 +778,20 @@ static void attach_mnt(struct mount *mnt,
779 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 778 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
780} 779}
781 780
781static void attach_shadowed(struct mount *mnt,
782 struct mount *parent,
783 struct mount *shadows)
784{
785 if (shadows) {
786 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
787 list_add(&mnt->mnt_child, &shadows->mnt_child);
788 } else {
789 hlist_add_head_rcu(&mnt->mnt_hash,
790 m_hash(&parent->mnt, mnt->mnt_mountpoint));
791 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
792 }
793}
794
782/* 795/*
783 * vfsmount lock must be held for write 796 * vfsmount lock must be held for write
784 */ 797 */
@@ -797,12 +810,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
797 810
798 list_splice(&head, n->list.prev); 811 list_splice(&head, n->list.prev);
799 812
800 if (shadows) 813 attach_shadowed(mnt, parent, shadows);
801 hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
802 else
803 hlist_add_head_rcu(&mnt->mnt_hash,
804 m_hash(&parent->mnt, mnt->mnt_mountpoint));
805 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
806 touch_mnt_namespace(n); 814 touch_mnt_namespace(n);
807} 815}
808 816
@@ -890,8 +898,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
890 898
891 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); 899 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
892 /* Don't allow unprivileged users to change mount flags */ 900 /* Don't allow unprivileged users to change mount flags */
893 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 901 if (flag & CL_UNPRIVILEGED) {
894 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 902 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
903
904 if (mnt->mnt.mnt_flags & MNT_READONLY)
905 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
906
907 if (mnt->mnt.mnt_flags & MNT_NODEV)
908 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
909
910 if (mnt->mnt.mnt_flags & MNT_NOSUID)
911 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
912
913 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
914 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
915 }
895 916
896 /* Don't allow unprivileged users to reveal what is under a mount */ 917 /* Don't allow unprivileged users to reveal what is under a mount */
897 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) 918 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
@@ -938,7 +959,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
938 959
939static void mntput_no_expire(struct mount *mnt) 960static void mntput_no_expire(struct mount *mnt)
940{ 961{
941put_again:
942 rcu_read_lock(); 962 rcu_read_lock();
943 mnt_add_count(mnt, -1); 963 mnt_add_count(mnt, -1);
944 if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ 964 if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
@@ -951,14 +971,6 @@ put_again:
951 unlock_mount_hash(); 971 unlock_mount_hash();
952 return; 972 return;
953 } 973 }
954 if (unlikely(mnt->mnt_pinned)) {
955 mnt_add_count(mnt, mnt->mnt_pinned + 1);
956 mnt->mnt_pinned = 0;
957 rcu_read_unlock();
958 unlock_mount_hash();
959 acct_auto_close_mnt(&mnt->mnt);
960 goto put_again;
961 }
962 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { 974 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
963 rcu_read_unlock(); 975 rcu_read_unlock();
964 unlock_mount_hash(); 976 unlock_mount_hash();
@@ -981,6 +993,8 @@ put_again:
981 * so mnt_get_writers() below is safe. 993 * so mnt_get_writers() below is safe.
982 */ 994 */
983 WARN_ON(mnt_get_writers(mnt)); 995 WARN_ON(mnt_get_writers(mnt));
996 if (unlikely(mnt->mnt_pins.first))
997 mnt_pin_kill(mnt);
984 fsnotify_vfsmount_delete(&mnt->mnt); 998 fsnotify_vfsmount_delete(&mnt->mnt);
985 dput(mnt->mnt.mnt_root); 999 dput(mnt->mnt.mnt_root);
986 deactivate_super(mnt->mnt.mnt_sb); 1000 deactivate_super(mnt->mnt.mnt_sb);
@@ -1008,25 +1022,15 @@ struct vfsmount *mntget(struct vfsmount *mnt)
1008} 1022}
1009EXPORT_SYMBOL(mntget); 1023EXPORT_SYMBOL(mntget);
1010 1024
1011void mnt_pin(struct vfsmount *mnt) 1025struct vfsmount *mnt_clone_internal(struct path *path)
1012{
1013 lock_mount_hash();
1014 real_mount(mnt)->mnt_pinned++;
1015 unlock_mount_hash();
1016}
1017EXPORT_SYMBOL(mnt_pin);
1018
1019void mnt_unpin(struct vfsmount *m)
1020{ 1026{
1021 struct mount *mnt = real_mount(m); 1027 struct mount *p;
1022 lock_mount_hash(); 1028 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1023 if (mnt->mnt_pinned) { 1029 if (IS_ERR(p))
1024 mnt_add_count(mnt, 1); 1030 return ERR_CAST(p);
1025 mnt->mnt_pinned--; 1031 p->mnt.mnt_flags |= MNT_INTERNAL;
1026 } 1032 return &p->mnt;
1027 unlock_mount_hash();
1028} 1033}
1029EXPORT_SYMBOL(mnt_unpin);
1030 1034
1031static inline void mangle(struct seq_file *m, const char *s) 1035static inline void mangle(struct seq_file *m, const char *s)
1032{ 1036{
@@ -1492,6 +1496,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1492 continue; 1496 continue;
1493 1497
1494 for (s = r; s; s = next_mnt(s, r)) { 1498 for (s = r; s; s = next_mnt(s, r)) {
1499 struct mount *t = NULL;
1495 if (!(flag & CL_COPY_UNBINDABLE) && 1500 if (!(flag & CL_COPY_UNBINDABLE) &&
1496 IS_MNT_UNBINDABLE(s)) { 1501 IS_MNT_UNBINDABLE(s)) {
1497 s = skip_mnt_tree(s); 1502 s = skip_mnt_tree(s);
@@ -1513,7 +1518,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1513 goto out; 1518 goto out;
1514 lock_mount_hash(); 1519 lock_mount_hash();
1515 list_add_tail(&q->mnt_list, &res->mnt_list); 1520 list_add_tail(&q->mnt_list, &res->mnt_list);
1516 attach_mnt(q, parent, p->mnt_mp); 1521 mnt_set_mountpoint(parent, p->mnt_mp, q);
1522 if (!list_empty(&parent->mnt_mounts)) {
1523 t = list_last_entry(&parent->mnt_mounts,
1524 struct mount, mnt_child);
1525 if (t->mnt_mp != p->mnt_mp)
1526 t = NULL;
1527 }
1528 attach_shadowed(q, parent, t);
1517 unlock_mount_hash(); 1529 unlock_mount_hash();
1518 } 1530 }
1519 } 1531 }
@@ -1896,9 +1908,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1896 if (readonly_request == __mnt_is_readonly(mnt)) 1908 if (readonly_request == __mnt_is_readonly(mnt))
1897 return 0; 1909 return 0;
1898 1910
1899 if (mnt->mnt_flags & MNT_LOCK_READONLY)
1900 return -EPERM;
1901
1902 if (readonly_request) 1911 if (readonly_request)
1903 error = mnt_make_readonly(real_mount(mnt)); 1912 error = mnt_make_readonly(real_mount(mnt));
1904 else 1913 else
@@ -1924,6 +1933,33 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1924 if (path->dentry != path->mnt->mnt_root) 1933 if (path->dentry != path->mnt->mnt_root)
1925 return -EINVAL; 1934 return -EINVAL;
1926 1935
1936 /* Don't allow changing of locked mnt flags.
1937 *
1938 * No locks need to be held here while testing the various
1939 * MNT_LOCK flags because those flags can never be cleared
1940 * once they are set.
1941 */
1942 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
1943 !(mnt_flags & MNT_READONLY)) {
1944 return -EPERM;
1945 }
1946 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
1947 !(mnt_flags & MNT_NODEV)) {
1948 return -EPERM;
1949 }
1950 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
1951 !(mnt_flags & MNT_NOSUID)) {
1952 return -EPERM;
1953 }
1954 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
1955 !(mnt_flags & MNT_NOEXEC)) {
1956 return -EPERM;
1957 }
1958 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
1959 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
1960 return -EPERM;
1961 }
1962
1927 err = security_sb_remount(sb, data); 1963 err = security_sb_remount(sb, data);
1928 if (err) 1964 if (err)
1929 return err; 1965 return err;
@@ -1937,7 +1973,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1937 err = do_remount_sb(sb, flags, data, 0); 1973 err = do_remount_sb(sb, flags, data, 0);
1938 if (!err) { 1974 if (!err) {
1939 lock_mount_hash(); 1975 lock_mount_hash();
1940 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; 1976 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
1941 mnt->mnt.mnt_flags = mnt_flags; 1977 mnt->mnt.mnt_flags = mnt_flags;
1942 touch_mnt_namespace(mnt->mnt_ns); 1978 touch_mnt_namespace(mnt->mnt_ns);
1943 unlock_mount_hash(); 1979 unlock_mount_hash();
@@ -2122,7 +2158,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
2122 */ 2158 */
2123 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { 2159 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2124 flags |= MS_NODEV; 2160 flags |= MS_NODEV;
2125 mnt_flags |= MNT_NODEV; 2161 mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
2126 } 2162 }
2127 } 2163 }
2128 2164
@@ -2436,6 +2472,14 @@ long do_mount(const char *dev_name, const char *dir_name,
2436 if (flags & MS_RDONLY) 2472 if (flags & MS_RDONLY)
2437 mnt_flags |= MNT_READONLY; 2473 mnt_flags |= MNT_READONLY;
2438 2474
2475 /* The default atime for remount is preservation */
2476 if ((flags & MS_REMOUNT) &&
2477 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2478 MS_STRICTATIME)) == 0)) {
2479 mnt_flags &= ~MNT_ATIME_MASK;
2480 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2481 }
2482
2439 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | 2483 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2440 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 2484 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2441 MS_STRICTATIME); 2485 MS_STRICTATIME);
@@ -2972,13 +3016,13 @@ static void *mntns_get(struct task_struct *task)
2972 struct mnt_namespace *ns = NULL; 3016 struct mnt_namespace *ns = NULL;
2973 struct nsproxy *nsproxy; 3017 struct nsproxy *nsproxy;
2974 3018
2975 rcu_read_lock(); 3019 task_lock(task);
2976 nsproxy = task_nsproxy(task); 3020 nsproxy = task->nsproxy;
2977 if (nsproxy) { 3021 if (nsproxy) {
2978 ns = nsproxy->mnt_ns; 3022 ns = nsproxy->mnt_ns;
2979 get_mnt_ns(ns); 3023 get_mnt_ns(ns);
2980 } 3024 }
2981 rcu_read_unlock(); 3025 task_unlock(task);
2982 3026
2983 return ns; 3027 return ns;
2984} 3028}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..cbb1797149d5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
210 SetPageUptodate(bvec->bv_page); 210 SetPageUptodate(bvec->bv_page);
211 211
212 if (err) { 212 if (err) {
213 struct nfs_pgio_data *rdata = par->data; 213 struct nfs_pgio_header *header = par->data;
214 struct nfs_pgio_header *header = rdata->header;
215 214
216 if (!header->pnfs_error) 215 if (!header->pnfs_error)
217 header->pnfs_error = -EIO; 216 header->pnfs_error = -EIO;
@@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
224static void bl_read_cleanup(struct work_struct *work) 223static void bl_read_cleanup(struct work_struct *work)
225{ 224{
226 struct rpc_task *task; 225 struct rpc_task *task;
227 struct nfs_pgio_data *rdata; 226 struct nfs_pgio_header *hdr;
228 dprintk("%s enter\n", __func__); 227 dprintk("%s enter\n", __func__);
229 task = container_of(work, struct rpc_task, u.tk_work); 228 task = container_of(work, struct rpc_task, u.tk_work);
230 rdata = container_of(task, struct nfs_pgio_data, task); 229 hdr = container_of(task, struct nfs_pgio_header, task);
231 pnfs_ld_read_done(rdata); 230 pnfs_ld_read_done(hdr);
232} 231}
233 232
234static void 233static void
235bl_end_par_io_read(void *data, int unused) 234bl_end_par_io_read(void *data, int unused)
236{ 235{
237 struct nfs_pgio_data *rdata = data; 236 struct nfs_pgio_header *hdr = data;
238 237
239 rdata->task.tk_status = rdata->header->pnfs_error; 238 hdr->task.tk_status = hdr->pnfs_error;
240 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 239 INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
241 schedule_work(&rdata->task.u.tk_work); 240 schedule_work(&hdr->task.u.tk_work);
242} 241}
243 242
244static enum pnfs_try_status 243static enum pnfs_try_status
245bl_read_pagelist(struct nfs_pgio_data *rdata) 244bl_read_pagelist(struct nfs_pgio_header *hdr)
246{ 245{
247 struct nfs_pgio_header *header = rdata->header; 246 struct nfs_pgio_header *header = hdr;
248 int i, hole; 247 int i, hole;
249 struct bio *bio = NULL; 248 struct bio *bio = NULL;
250 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 249 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
251 sector_t isect, extent_length = 0; 250 sector_t isect, extent_length = 0;
252 struct parallel_io *par; 251 struct parallel_io *par;
253 loff_t f_offset = rdata->args.offset; 252 loff_t f_offset = hdr->args.offset;
254 size_t bytes_left = rdata->args.count; 253 size_t bytes_left = hdr->args.count;
255 unsigned int pg_offset, pg_len; 254 unsigned int pg_offset, pg_len;
256 struct page **pages = rdata->args.pages; 255 struct page **pages = hdr->args.pages;
257 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 256 int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
258 const bool is_dio = (header->dreq != NULL); 257 const bool is_dio = (header->dreq != NULL);
259 258
260 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 259 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
261 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); 260 hdr->page_array.npages, f_offset,
261 (unsigned int)hdr->args.count);
262 262
263 par = alloc_parallel(rdata); 263 par = alloc_parallel(hdr);
264 if (!par) 264 if (!par)
265 goto use_mds; 265 goto use_mds;
266 par->pnfs_callback = bl_end_par_io_read; 266 par->pnfs_callback = bl_end_par_io_read;
@@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
268 268
269 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 269 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
270 /* Code assumes extents are page-aligned */ 270 /* Code assumes extents are page-aligned */
271 for (i = pg_index; i < rdata->pages.npages; i++) { 271 for (i = pg_index; i < hdr->page_array.npages; i++) {
272 if (!extent_length) { 272 if (!extent_length) {
273 /* We've used up the previous extent */ 273 /* We've used up the previous extent */
274 bl_put_extent(be); 274 bl_put_extent(be);
@@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
317 struct pnfs_block_extent *be_read; 317 struct pnfs_block_extent *be_read;
318 318
319 be_read = (hole && cow_read) ? cow_read : be; 319 be_read = (hole && cow_read) ? cow_read : be;
320 bio = do_add_page_to_bio(bio, rdata->pages.npages - i, 320 bio = do_add_page_to_bio(bio,
321 hdr->page_array.npages - i,
321 READ, 322 READ,
322 isect, pages[i], be_read, 323 isect, pages[i], be_read,
323 bl_end_io_read, par, 324 bl_end_io_read, par,
@@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
332 extent_length -= PAGE_CACHE_SECTORS; 333 extent_length -= PAGE_CACHE_SECTORS;
333 } 334 }
334 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 335 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
335 rdata->res.eof = 1; 336 hdr->res.eof = 1;
336 rdata->res.count = header->inode->i_size - rdata->args.offset; 337 hdr->res.count = header->inode->i_size - hdr->args.offset;
337 } else { 338 } else {
338 rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; 339 hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
339 } 340 }
340out: 341out:
341 bl_put_extent(be); 342 bl_put_extent(be);
@@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
390 } 391 }
391 392
392 if (unlikely(err)) { 393 if (unlikely(err)) {
393 struct nfs_pgio_data *data = par->data; 394 struct nfs_pgio_header *header = par->data;
394 struct nfs_pgio_header *header = data->header;
395 395
396 if (!header->pnfs_error) 396 if (!header->pnfs_error)
397 header->pnfs_error = -EIO; 397 header->pnfs_error = -EIO;
@@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
405{ 405{
406 struct parallel_io *par = bio->bi_private; 406 struct parallel_io *par = bio->bi_private;
407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 407 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
408 struct nfs_pgio_data *data = par->data; 408 struct nfs_pgio_header *header = par->data;
409 struct nfs_pgio_header *header = data->header;
410 409
411 if (!uptodate) { 410 if (!uptodate) {
412 if (!header->pnfs_error) 411 if (!header->pnfs_error)
@@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
423static void bl_write_cleanup(struct work_struct *work) 422static void bl_write_cleanup(struct work_struct *work)
424{ 423{
425 struct rpc_task *task; 424 struct rpc_task *task;
426 struct nfs_pgio_data *wdata; 425 struct nfs_pgio_header *hdr;
427 dprintk("%s enter\n", __func__); 426 dprintk("%s enter\n", __func__);
428 task = container_of(work, struct rpc_task, u.tk_work); 427 task = container_of(work, struct rpc_task, u.tk_work);
429 wdata = container_of(task, struct nfs_pgio_data, task); 428 hdr = container_of(task, struct nfs_pgio_header, task);
430 if (likely(!wdata->header->pnfs_error)) { 429 if (likely(!hdr->pnfs_error)) {
431 /* Marks for LAYOUTCOMMIT */ 430 /* Marks for LAYOUTCOMMIT */
432 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 431 mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
433 wdata->args.offset, wdata->args.count); 432 hdr->args.offset, hdr->args.count);
434 } 433 }
435 pnfs_ld_write_done(wdata); 434 pnfs_ld_write_done(hdr);
436} 435}
437 436
438/* Called when last of bios associated with a bl_write_pagelist call finishes */ 437/* Called when last of bios associated with a bl_write_pagelist call finishes */
439static void bl_end_par_io_write(void *data, int num_se) 438static void bl_end_par_io_write(void *data, int num_se)
440{ 439{
441 struct nfs_pgio_data *wdata = data; 440 struct nfs_pgio_header *hdr = data;
442 441
443 if (unlikely(wdata->header->pnfs_error)) { 442 if (unlikely(hdr->pnfs_error)) {
444 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 443 bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
445 num_se); 444 num_se);
446 } 445 }
447 446
448 wdata->task.tk_status = wdata->header->pnfs_error; 447 hdr->task.tk_status = hdr->pnfs_error;
449 wdata->verf.committed = NFS_FILE_SYNC; 448 hdr->verf.committed = NFS_FILE_SYNC;
450 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 449 INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
451 schedule_work(&wdata->task.u.tk_work); 450 schedule_work(&hdr->task.u.tk_work);
452} 451}
453 452
454/* FIXME STUB - mark intersection of layout and page as bad, so is not 453/* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -673,18 +672,17 @@ check_page:
673} 672}
674 673
675static enum pnfs_try_status 674static enum pnfs_try_status
676bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) 675bl_write_pagelist(struct nfs_pgio_header *header, int sync)
677{ 676{
678 struct nfs_pgio_header *header = wdata->header;
679 int i, ret, npg_zero, pg_index, last = 0; 677 int i, ret, npg_zero, pg_index, last = 0;
680 struct bio *bio = NULL; 678 struct bio *bio = NULL;
681 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 679 struct pnfs_block_extent *be = NULL, *cow_read = NULL;
682 sector_t isect, last_isect = 0, extent_length = 0; 680 sector_t isect, last_isect = 0, extent_length = 0;
683 struct parallel_io *par = NULL; 681 struct parallel_io *par = NULL;
684 loff_t offset = wdata->args.offset; 682 loff_t offset = header->args.offset;
685 size_t count = wdata->args.count; 683 size_t count = header->args.count;
686 unsigned int pg_offset, pg_len, saved_len; 684 unsigned int pg_offset, pg_len, saved_len;
687 struct page **pages = wdata->args.pages; 685 struct page **pages = header->args.pages;
688 struct page *page; 686 struct page *page;
689 pgoff_t index; 687 pgoff_t index;
690 u64 temp; 688 u64 temp;
@@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
699 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); 697 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
700 goto out_mds; 698 goto out_mds;
701 } 699 }
702 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 700 /* At this point, header->page_aray is a (sequential) list of nfs_pages.
703 * We want to write each, and if there is an error set pnfs_error 701 * We want to write each, and if there is an error set pnfs_error
704 * to have it redone using nfs. 702 * to have it redone using nfs.
705 */ 703 */
706 par = alloc_parallel(wdata); 704 par = alloc_parallel(header);
707 if (!par) 705 if (!par)
708 goto out_mds; 706 goto out_mds;
709 par->pnfs_callback = bl_end_par_io_write; 707 par->pnfs_callback = bl_end_par_io_write;
@@ -790,8 +788,8 @@ next_page:
790 bio = bl_submit_bio(WRITE, bio); 788 bio = bl_submit_bio(WRITE, bio);
791 789
792 /* Middle pages */ 790 /* Middle pages */
793 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 791 pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
794 for (i = pg_index; i < wdata->pages.npages; i++) { 792 for (i = pg_index; i < header->page_array.npages; i++) {
795 if (!extent_length) { 793 if (!extent_length) {
796 /* We've used up the previous extent */ 794 /* We've used up the previous extent */
797 bl_put_extent(be); 795 bl_put_extent(be);
@@ -862,7 +860,8 @@ next_page:
862 } 860 }
863 861
864 862
865 bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, 863 bio = do_add_page_to_bio(bio, header->page_array.npages - i,
864 WRITE,
866 isect, pages[i], be, 865 isect, pages[i], be,
867 bl_end_io_write, par, 866 bl_end_io_write, par,
868 pg_offset, pg_len); 867 pg_offset, pg_len);
@@ -890,7 +889,7 @@ next_page:
890 } 889 }
891 890
892write_done: 891write_done:
893 wdata->res.count = wdata->args.count; 892 header->res.count = header->args.count;
894out: 893out:
895 bl_put_extent(be); 894 bl_put_extent(be);
896 bl_put_extent(cow_read); 895 bl_put_extent(cow_read);
@@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
1063 return ERR_PTR(-ENOMEM); 1062 return ERR_PTR(-ENOMEM);
1064 } 1063 }
1065 1064
1066 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); 1065 pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
1067 if (pages == NULL) { 1066 if (pages == NULL) {
1068 kfree(dev); 1067 kfree(dev);
1069 return ERR_PTR(-ENOMEM); 1068 return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
428 if (p == NULL) 428 if (p == NULL)
429 return 0; 429 return 0;
430 430
431 /*
432 * Did we get the acceptor from userland during the SETCLIENID
433 * negotiation?
434 */
435 if (clp->cl_acceptor)
436 return !strcmp(p, clp->cl_acceptor);
437
438 /*
439 * Otherwise try to verify it using the cl_hostname. Note that this
440 * doesn't work if a non-canonical hostname was used in the devname.
441 */
442
431 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ 443 /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
432 444
433 if (memcmp(p, "nfs@", 4) != 0) 445 if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1d09289c8f0e..1c5ff6d58385 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
110 mutex_unlock(&nfs_version_mutex); 110 mutex_unlock(&nfs_version_mutex);
111 } 111 }
112 112
113 if (!IS_ERR(nfs)) 113 if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
114 try_module_get(nfs->owner); 114 return ERR_PTR(-EAGAIN);
115 return nfs; 115 return nfs;
116} 116}
117 117
@@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
158 goto error_0; 158 goto error_0;
159 159
160 clp->cl_nfs_mod = cl_init->nfs_mod; 160 clp->cl_nfs_mod = cl_init->nfs_mod;
161 try_module_get(clp->cl_nfs_mod->owner); 161 if (!try_module_get(clp->cl_nfs_mod->owner))
162 goto error_dealloc;
162 163
163 clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; 164 clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
164 165
@@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
190 191
191error_cleanup: 192error_cleanup:
192 put_nfs_version(clp->cl_nfs_mod); 193 put_nfs_version(clp->cl_nfs_mod);
194error_dealloc:
193 kfree(clp); 195 kfree(clp);
194error_0: 196error_0:
195 return ERR_PTR(err); 197 return ERR_PTR(err);
@@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
252 put_net(clp->cl_net); 254 put_net(clp->cl_net);
253 put_nfs_version(clp->cl_nfs_mod); 255 put_nfs_version(clp->cl_nfs_mod);
254 kfree(clp->cl_hostname); 256 kfree(clp->cl_hostname);
257 kfree(clp->cl_acceptor);
255 kfree(clp); 258 kfree(clp);
256 259
257 dprintk("<-- nfs_free_client()\n"); 260 dprintk("<-- nfs_free_client()\n");
@@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
482 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); 485 struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
483 const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; 486 const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
484 487
488 if (cl_init->hostname == NULL) {
489 WARN_ON(1);
490 return NULL;
491 }
492
485 dprintk("--> nfs_get_client(%s,v%u)\n", 493 dprintk("--> nfs_get_client(%s,v%u)\n",
486 cl_init->hostname ?: "", rpc_ops->version); 494 cl_init->hostname, rpc_ops->version);
487 495
488 /* see if the client already exists */ 496 /* see if the client already exists */
489 do { 497 do {
@@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
510 } while (!IS_ERR(new)); 518 } while (!IS_ERR(new));
511 519
512 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", 520 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
513 cl_init->hostname ?: "", PTR_ERR(new)); 521 cl_init->hostname, PTR_ERR(new));
514 return new; 522 return new;
515} 523}
516EXPORT_SYMBOL_GPL(nfs_get_client); 524EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -1205,7 +1213,7 @@ static const struct file_operations nfs_server_list_fops = {
1205 .open = nfs_server_list_open, 1213 .open = nfs_server_list_open,
1206 .read = seq_read, 1214 .read = seq_read,
1207 .llseek = seq_lseek, 1215 .llseek = seq_lseek,
1208 .release = seq_release, 1216 .release = seq_release_net,
1209 .owner = THIS_MODULE, 1217 .owner = THIS_MODULE,
1210}; 1218};
1211 1219
@@ -1226,7 +1234,7 @@ static const struct file_operations nfs_volume_list_fops = {
1226 .open = nfs_volume_list_open, 1234 .open = nfs_volume_list_open,
1227 .read = seq_read, 1235 .read = seq_read,
1228 .llseek = seq_lseek, 1236 .llseek = seq_lseek,
1229 .release = seq_release, 1237 .release = seq_release_net,
1230 .owner = THIS_MODULE, 1238 .owner = THIS_MODULE,
1231}; 1239};
1232 1240
@@ -1236,19 +1244,8 @@ static const struct file_operations nfs_volume_list_fops = {
1236 */ 1244 */
1237static int nfs_server_list_open(struct inode *inode, struct file *file) 1245static int nfs_server_list_open(struct inode *inode, struct file *file)
1238{ 1246{
1239 struct seq_file *m; 1247 return seq_open_net(inode, file, &nfs_server_list_ops,
1240 int ret; 1248 sizeof(struct seq_net_private));
1241 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1242 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1243
1244 ret = seq_open(file, &nfs_server_list_ops);
1245 if (ret < 0)
1246 return ret;
1247
1248 m = file->private_data;
1249 m->private = net;
1250
1251 return 0;
1252} 1249}
1253 1250
1254/* 1251/*
@@ -1256,7 +1253,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
1256 */ 1253 */
1257static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) 1254static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1258{ 1255{
1259 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1256 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1260 1257
1261 /* lock the list against modification */ 1258 /* lock the list against modification */
1262 spin_lock(&nn->nfs_client_lock); 1259 spin_lock(&nn->nfs_client_lock);
@@ -1268,7 +1265,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
1268 */ 1265 */
1269static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) 1266static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1270{ 1267{
1271 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1268 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1272 1269
1273 return seq_list_next(v, &nn->nfs_client_list, pos); 1270 return seq_list_next(v, &nn->nfs_client_list, pos);
1274} 1271}
@@ -1278,7 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
1278 */ 1275 */
1279static void nfs_server_list_stop(struct seq_file *p, void *v) 1276static void nfs_server_list_stop(struct seq_file *p, void *v)
1280{ 1277{
1281 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1278 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1282 1279
1283 spin_unlock(&nn->nfs_client_lock); 1280 spin_unlock(&nn->nfs_client_lock);
1284} 1281}
@@ -1289,7 +1286,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
1289static int nfs_server_list_show(struct seq_file *m, void *v) 1286static int nfs_server_list_show(struct seq_file *m, void *v)
1290{ 1287{
1291 struct nfs_client *clp; 1288 struct nfs_client *clp;
1292 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1289 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1293 1290
1294 /* display header on line 1 */ 1291 /* display header on line 1 */
1295 if (v == &nn->nfs_client_list) { 1292 if (v == &nn->nfs_client_list) {
@@ -1321,19 +1318,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
1321 */ 1318 */
1322static int nfs_volume_list_open(struct inode *inode, struct file *file) 1319static int nfs_volume_list_open(struct inode *inode, struct file *file)
1323{ 1320{
1324 struct seq_file *m; 1321 return seq_open_net(inode, file, &nfs_server_list_ops,
1325 int ret; 1322 sizeof(struct seq_net_private));
1326 struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
1327 struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
1328
1329 ret = seq_open(file, &nfs_volume_list_ops);
1330 if (ret < 0)
1331 return ret;
1332
1333 m = file->private_data;
1334 m->private = net;
1335
1336 return 0;
1337} 1323}
1338 1324
1339/* 1325/*
@@ -1341,7 +1327,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
1341 */ 1327 */
1342static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) 1328static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1343{ 1329{
1344 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1330 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1345 1331
1346 /* lock the list against modification */ 1332 /* lock the list against modification */
1347 spin_lock(&nn->nfs_client_lock); 1333 spin_lock(&nn->nfs_client_lock);
@@ -1353,7 +1339,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
1353 */ 1339 */
1354static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) 1340static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1355{ 1341{
1356 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1342 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1357 1343
1358 return seq_list_next(v, &nn->nfs_volume_list, pos); 1344 return seq_list_next(v, &nn->nfs_volume_list, pos);
1359} 1345}
@@ -1363,7 +1349,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
1363 */ 1349 */
1364static void nfs_volume_list_stop(struct seq_file *p, void *v) 1350static void nfs_volume_list_stop(struct seq_file *p, void *v)
1365{ 1351{
1366 struct nfs_net *nn = net_generic(p->private, nfs_net_id); 1352 struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
1367 1353
1368 spin_unlock(&nn->nfs_client_lock); 1354 spin_unlock(&nn->nfs_client_lock);
1369} 1355}
@@ -1376,7 +1362,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1376 struct nfs_server *server; 1362 struct nfs_server *server;
1377 struct nfs_client *clp; 1363 struct nfs_client *clp;
1378 char dev[8], fsid[17]; 1364 char dev[8], fsid[17];
1379 struct nfs_net *nn = net_generic(m->private, nfs_net_id); 1365 struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
1380 1366
1381 /* display header on line 1 */ 1367 /* display header on line 1 */
1382 if (v == &nn->nfs_volume_list) { 1368 if (v == &nn->nfs_volume_list) {
@@ -1407,6 +1393,45 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1407 return 0; 1393 return 0;
1408} 1394}
1409 1395
1396int nfs_fs_proc_net_init(struct net *net)
1397{
1398 struct nfs_net *nn = net_generic(net, nfs_net_id);
1399 struct proc_dir_entry *p;
1400
1401 nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net);
1402 if (!nn->proc_nfsfs)
1403 goto error_0;
1404
1405 /* a file of servers with which we're dealing */
1406 p = proc_create("servers", S_IFREG|S_IRUGO,
1407 nn->proc_nfsfs, &nfs_server_list_fops);
1408 if (!p)
1409 goto error_1;
1410
1411 /* a file of volumes that we have mounted */
1412 p = proc_create("volumes", S_IFREG|S_IRUGO,
1413 nn->proc_nfsfs, &nfs_volume_list_fops);
1414 if (!p)
1415 goto error_2;
1416 return 0;
1417
1418error_2:
1419 remove_proc_entry("servers", nn->proc_nfsfs);
1420error_1:
1421 remove_proc_entry("fs/nfsfs", NULL);
1422error_0:
1423 return -ENOMEM;
1424}
1425
1426void nfs_fs_proc_net_exit(struct net *net)
1427{
1428 struct nfs_net *nn = net_generic(net, nfs_net_id);
1429
1430 remove_proc_entry("volumes", nn->proc_nfsfs);
1431 remove_proc_entry("servers", nn->proc_nfsfs);
1432 remove_proc_entry("fs/nfsfs", NULL);
1433}
1434
1410/* 1435/*
1411 * initialise the /proc/fs/nfsfs/ directory 1436 * initialise the /proc/fs/nfsfs/ directory
1412 */ 1437 */
@@ -1419,14 +1444,12 @@ int __init nfs_fs_proc_init(void)
1419 goto error_0; 1444 goto error_0;
1420 1445
1421 /* a file of servers with which we're dealing */ 1446 /* a file of servers with which we're dealing */
1422 p = proc_create("servers", S_IFREG|S_IRUGO, 1447 p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
1423 proc_fs_nfs, &nfs_server_list_fops);
1424 if (!p) 1448 if (!p)
1425 goto error_1; 1449 goto error_1;
1426 1450
1427 /* a file of volumes that we have mounted */ 1451 /* a file of volumes that we have mounted */
1428 p = proc_create("volumes", S_IFREG|S_IRUGO, 1452 p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
1429 proc_fs_nfs, &nfs_volume_list_fops);
1430 if (!p) 1453 if (!p)
1431 goto error_2; 1454 goto error_2;
1432 return 0; 1455 return 0;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5d8ccecf5f5c..5853f53db732 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
41 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); 41 set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
42} 42}
43 43
44/** 44static int
45 * nfs_have_delegation - check if inode has a delegation 45nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
46 * @inode: inode to check
47 * @flags: delegation types to check for
48 *
49 * Returns one if inode has the indicated delegation, otherwise zero.
50 */
51int nfs4_have_delegation(struct inode *inode, fmode_t flags)
52{ 46{
53 struct nfs_delegation *delegation; 47 struct nfs_delegation *delegation;
54 int ret = 0; 48 int ret = 0;
@@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
58 delegation = rcu_dereference(NFS_I(inode)->delegation); 52 delegation = rcu_dereference(NFS_I(inode)->delegation);
59 if (delegation != NULL && (delegation->type & flags) == flags && 53 if (delegation != NULL && (delegation->type & flags) == flags &&
60 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { 54 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
61 nfs_mark_delegation_referenced(delegation); 55 if (mark)
56 nfs_mark_delegation_referenced(delegation);
62 ret = 1; 57 ret = 1;
63 } 58 }
64 rcu_read_unlock(); 59 rcu_read_unlock();
65 return ret; 60 return ret;
66} 61}
62/**
63 * nfs_have_delegation - check if inode has a delegation, mark it
64 * NFS_DELEGATION_REFERENCED if there is one.
65 * @inode: inode to check
66 * @flags: delegation types to check for
67 *
68 * Returns one if inode has the indicated delegation, otherwise zero.
69 */
70int nfs4_have_delegation(struct inode *inode, fmode_t flags)
71{
72 return nfs4_do_check_delegation(inode, flags, true);
73}
74
75/*
76 * nfs4_check_delegation - check if inode has a delegation, do not mark
77 * NFS_DELEGATION_REFERENCED if it has one.
78 */
79int nfs4_check_delegation(struct inode *inode, fmode_t flags)
80{
81 return nfs4_do_check_delegation(inode, flags, false);
82}
67 83
68static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) 84static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
69{ 85{
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..5c1cce39297f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
59 59
60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); 60void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
61int nfs4_have_delegation(struct inode *inode, fmode_t flags); 61int nfs4_have_delegation(struct inode *inode, fmode_t flags);
62int nfs4_check_delegation(struct inode *inode, fmode_t flags);
62 63
63#endif 64#endif
64 65
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a3d4ef76127..36d921f0c602 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
988 * A check for whether or not the parent directory has changed. 988 * A check for whether or not the parent directory has changed.
989 * In the case it has, we assume that the dentries are untrustworthy 989 * In the case it has, we assume that the dentries are untrustworthy
990 * and may need to be looked up again. 990 * and may need to be looked up again.
991 * If rcu_walk prevents us from performing a full check, return 0.
991 */ 992 */
992static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) 993static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
994 int rcu_walk)
993{ 995{
996 int ret;
997
994 if (IS_ROOT(dentry)) 998 if (IS_ROOT(dentry))
995 return 1; 999 return 1;
996 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) 1000 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
998 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 1002 if (!nfs_verify_change_attribute(dir, dentry->d_time))
999 return 0; 1003 return 0;
1000 /* Revalidate nfsi->cache_change_attribute before we declare a match */ 1004 /* Revalidate nfsi->cache_change_attribute before we declare a match */
1001 if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) 1005 if (rcu_walk)
1006 ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
1007 else
1008 ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
1009 if (ret < 0)
1002 return 0; 1010 return 0;
1003 if (!nfs_verify_change_attribute(dir, dentry->d_time)) 1011 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1004 return 0; 1012 return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1042out: 1050out:
1043 return (inode->i_nlink == 0) ? -ENOENT : 0; 1051 return (inode->i_nlink == 0) ? -ENOENT : 0;
1044out_force: 1052out_force:
1053 if (flags & LOOKUP_RCU)
1054 return -ECHILD;
1045 ret = __nfs_revalidate_inode(server, inode); 1055 ret = __nfs_revalidate_inode(server, inode);
1046 if (ret != 0) 1056 if (ret != 0)
1047 return ret; 1057 return ret;
@@ -1054,6 +1064,9 @@ out_force:
1054 * 1064 *
1055 * If parent mtime has changed, we revalidate, else we wait for a 1065 * If parent mtime has changed, we revalidate, else we wait for a
1056 * period corresponding to the parent's attribute cache timeout value. 1066 * period corresponding to the parent's attribute cache timeout value.
1067 *
1068 * If LOOKUP_RCU prevents us from performing a full check, return 1
1069 * suggesting a reval is needed.
1057 */ 1070 */
1058static inline 1071static inline
1059int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, 1072int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1064 return 0; 1077 return 0;
1065 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) 1078 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1066 return 1; 1079 return 1;
1067 return !nfs_check_verifier(dir, dentry); 1080 return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1068} 1081}
1069 1082
1070/* 1083/*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1088 struct nfs4_label *label = NULL; 1101 struct nfs4_label *label = NULL;
1089 int error; 1102 int error;
1090 1103
1091 if (flags & LOOKUP_RCU) 1104 if (flags & LOOKUP_RCU) {
1092 return -ECHILD; 1105 parent = ACCESS_ONCE(dentry->d_parent);
1093 1106 dir = ACCESS_ONCE(parent->d_inode);
1094 parent = dget_parent(dentry); 1107 if (!dir)
1095 dir = parent->d_inode; 1108 return -ECHILD;
1109 } else {
1110 parent = dget_parent(dentry);
1111 dir = parent->d_inode;
1112 }
1096 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 1113 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1097 inode = dentry->d_inode; 1114 inode = dentry->d_inode;
1098 1115
1099 if (!inode) { 1116 if (!inode) {
1100 if (nfs_neg_need_reval(dir, dentry, flags)) 1117 if (nfs_neg_need_reval(dir, dentry, flags)) {
1118 if (flags & LOOKUP_RCU)
1119 return -ECHILD;
1101 goto out_bad; 1120 goto out_bad;
1121 }
1102 goto out_valid_noent; 1122 goto out_valid_noent;
1103 } 1123 }
1104 1124
1105 if (is_bad_inode(inode)) { 1125 if (is_bad_inode(inode)) {
1126 if (flags & LOOKUP_RCU)
1127 return -ECHILD;
1106 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", 1128 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1107 __func__, dentry); 1129 __func__, dentry);
1108 goto out_bad; 1130 goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1112 goto out_set_verifier; 1134 goto out_set_verifier;
1113 1135
1114 /* Force a full look up iff the parent directory has changed */ 1136 /* Force a full look up iff the parent directory has changed */
1115 if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { 1137 if (!nfs_is_exclusive_create(dir, flags) &&
1116 if (nfs_lookup_verify_inode(inode, flags)) 1138 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1139
1140 if (nfs_lookup_verify_inode(inode, flags)) {
1141 if (flags & LOOKUP_RCU)
1142 return -ECHILD;
1117 goto out_zap_parent; 1143 goto out_zap_parent;
1144 }
1118 goto out_valid; 1145 goto out_valid;
1119 } 1146 }
1120 1147
1148 if (flags & LOOKUP_RCU)
1149 return -ECHILD;
1150
1121 if (NFS_STALE(inode)) 1151 if (NFS_STALE(inode))
1122 goto out_bad; 1152 goto out_bad;
1123 1153
@@ -1153,13 +1183,18 @@ out_set_verifier:
1153 /* Success: notify readdir to use READDIRPLUS */ 1183 /* Success: notify readdir to use READDIRPLUS */
1154 nfs_advise_use_readdirplus(dir); 1184 nfs_advise_use_readdirplus(dir);
1155 out_valid_noent: 1185 out_valid_noent:
1156 dput(parent); 1186 if (flags & LOOKUP_RCU) {
1187 if (parent != ACCESS_ONCE(dentry->d_parent))
1188 return -ECHILD;
1189 } else
1190 dput(parent);
1157 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", 1191 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1158 __func__, dentry); 1192 __func__, dentry);
1159 return 1; 1193 return 1;
1160out_zap_parent: 1194out_zap_parent:
1161 nfs_zap_caches(dir); 1195 nfs_zap_caches(dir);
1162 out_bad: 1196 out_bad:
1197 WARN_ON(flags & LOOKUP_RCU);
1163 nfs_free_fattr(fattr); 1198 nfs_free_fattr(fattr);
1164 nfs_free_fhandle(fhandle); 1199 nfs_free_fhandle(fhandle);
1165 nfs4_label_free(label); 1200 nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
1185 __func__, dentry); 1220 __func__, dentry);
1186 return 0; 1221 return 0;
1187out_error: 1222out_error:
1223 WARN_ON(flags & LOOKUP_RCU);
1188 nfs_free_fattr(fattr); 1224 nfs_free_fattr(fattr);
1189 nfs_free_fhandle(fhandle); 1225 nfs_free_fhandle(fhandle);
1190 nfs4_label_free(label); 1226 nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
1529 1565
1530static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) 1566static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1531{ 1567{
1532 struct dentry *parent = NULL;
1533 struct inode *inode; 1568 struct inode *inode;
1534 struct inode *dir;
1535 int ret = 0; 1569 int ret = 0;
1536 1570
1537 if (flags & LOOKUP_RCU)
1538 return -ECHILD;
1539
1540 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) 1571 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1541 goto no_open; 1572 goto no_open;
1542 if (d_mountpoint(dentry)) 1573 if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1545 goto no_open; 1576 goto no_open;
1546 1577
1547 inode = dentry->d_inode; 1578 inode = dentry->d_inode;
1548 parent = dget_parent(dentry);
1549 dir = parent->d_inode;
1550 1579
1551 /* We can't create new files in nfs_open_revalidate(), so we 1580 /* We can't create new files in nfs_open_revalidate(), so we
1552 * optimize away revalidation of negative dentries. 1581 * optimize away revalidation of negative dentries.
1553 */ 1582 */
1554 if (inode == NULL) { 1583 if (inode == NULL) {
1584 struct dentry *parent;
1585 struct inode *dir;
1586
1587 if (flags & LOOKUP_RCU) {
1588 parent = ACCESS_ONCE(dentry->d_parent);
1589 dir = ACCESS_ONCE(parent->d_inode);
1590 if (!dir)
1591 return -ECHILD;
1592 } else {
1593 parent = dget_parent(dentry);
1594 dir = parent->d_inode;
1595 }
1555 if (!nfs_neg_need_reval(dir, dentry, flags)) 1596 if (!nfs_neg_need_reval(dir, dentry, flags))
1556 ret = 1; 1597 ret = 1;
1598 else if (flags & LOOKUP_RCU)
1599 ret = -ECHILD;
1600 if (!(flags & LOOKUP_RCU))
1601 dput(parent);
1602 else if (parent != ACCESS_ONCE(dentry->d_parent))
1603 return -ECHILD;
1557 goto out; 1604 goto out;
1558 } 1605 }
1559 1606
1560 /* NFS only supports OPEN on regular files */ 1607 /* NFS only supports OPEN on regular files */
1561 if (!S_ISREG(inode->i_mode)) 1608 if (!S_ISREG(inode->i_mode))
1562 goto no_open_dput; 1609 goto no_open;
1563 /* We cannot do exclusive creation on a positive dentry */ 1610 /* We cannot do exclusive creation on a positive dentry */
1564 if (flags & LOOKUP_EXCL) 1611 if (flags & LOOKUP_EXCL)
1565 goto no_open_dput; 1612 goto no_open;
1566 1613
1567 /* Let f_op->open() actually open (and revalidate) the file */ 1614 /* Let f_op->open() actually open (and revalidate) the file */
1568 ret = 1; 1615 ret = 1;
1569 1616
1570out: 1617out:
1571 dput(parent);
1572 return ret; 1618 return ret;
1573 1619
1574no_open_dput:
1575 dput(parent);
1576no_open: 1620no_open:
1577 return nfs_lookup_revalidate(dentry, flags); 1621 return nfs_lookup_revalidate(dentry, flags);
1578} 1622}
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
2028static LIST_HEAD(nfs_access_lru_list); 2072static LIST_HEAD(nfs_access_lru_list);
2029static atomic_long_t nfs_access_nr_entries; 2073static atomic_long_t nfs_access_nr_entries;
2030 2074
2075static unsigned long nfs_access_max_cachesize = ULONG_MAX;
2076module_param(nfs_access_max_cachesize, ulong, 0644);
2077MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2078
2031static void nfs_access_free_entry(struct nfs_access_entry *entry) 2079static void nfs_access_free_entry(struct nfs_access_entry *entry)
2032{ 2080{
2033 put_rpccred(entry->cred); 2081 put_rpccred(entry->cred);
2034 kfree(entry); 2082 kfree_rcu(entry, rcu_head);
2035 smp_mb__before_atomic(); 2083 smp_mb__before_atomic();
2036 atomic_long_dec(&nfs_access_nr_entries); 2084 atomic_long_dec(&nfs_access_nr_entries);
2037 smp_mb__after_atomic(); 2085 smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
2048 } 2096 }
2049} 2097}
2050 2098
2051unsigned long 2099static unsigned long
2052nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) 2100nfs_do_access_cache_scan(unsigned int nr_to_scan)
2053{ 2101{
2054 LIST_HEAD(head); 2102 LIST_HEAD(head);
2055 struct nfs_inode *nfsi, *next; 2103 struct nfs_inode *nfsi, *next;
2056 struct nfs_access_entry *cache; 2104 struct nfs_access_entry *cache;
2057 int nr_to_scan = sc->nr_to_scan;
2058 gfp_t gfp_mask = sc->gfp_mask;
2059 long freed = 0; 2105 long freed = 0;
2060 2106
2061 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2062 return SHRINK_STOP;
2063
2064 spin_lock(&nfs_access_lru_lock); 2107 spin_lock(&nfs_access_lru_lock);
2065 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { 2108 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2066 struct inode *inode; 2109 struct inode *inode;
@@ -2094,11 +2137,39 @@ remove_lru_entry:
2094} 2137}
2095 2138
2096unsigned long 2139unsigned long
2140nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2141{
2142 int nr_to_scan = sc->nr_to_scan;
2143 gfp_t gfp_mask = sc->gfp_mask;
2144
2145 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2146 return SHRINK_STOP;
2147 return nfs_do_access_cache_scan(nr_to_scan);
2148}
2149
2150
2151unsigned long
2097nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) 2152nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2098{ 2153{
2099 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); 2154 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2100} 2155}
2101 2156
2157static void
2158nfs_access_cache_enforce_limit(void)
2159{
2160 long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2161 unsigned long diff;
2162 unsigned int nr_to_scan;
2163
2164 if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2165 return;
2166 nr_to_scan = 100;
2167 diff = nr_entries - nfs_access_max_cachesize;
2168 if (diff < nr_to_scan)
2169 nr_to_scan = diff;
2170 nfs_do_access_cache_scan(nr_to_scan);
2171}
2172
2102static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) 2173static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2103{ 2174{
2104 struct rb_root *root_node = &nfsi->access_cache; 2175 struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
2186 return -ENOENT; 2257 return -ENOENT;
2187} 2258}
2188 2259
2260static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
2261{
2262 /* Only check the most recently returned cache entry,
2263 * but do it without locking.
2264 */
2265 struct nfs_inode *nfsi = NFS_I(inode);
2266 struct nfs_access_entry *cache;
2267 int err = -ECHILD;
2268 struct list_head *lh;
2269
2270 rcu_read_lock();
2271 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2272 goto out;
2273 lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
2274 cache = list_entry(lh, struct nfs_access_entry, lru);
2275 if (lh == &nfsi->access_cache_entry_lru ||
2276 cred != cache->cred)
2277 cache = NULL;
2278 if (cache == NULL)
2279 goto out;
2280 if (!nfs_have_delegated_attributes(inode) &&
2281 !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
2282 goto out;
2283 res->jiffies = cache->jiffies;
2284 res->cred = cache->cred;
2285 res->mask = cache->mask;
2286 err = 0;
2287out:
2288 rcu_read_unlock();
2289 return err;
2290}
2291
2189static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) 2292static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
2190{ 2293{
2191 struct nfs_inode *nfsi = NFS_I(inode); 2294 struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2229 cache->cred = get_rpccred(set->cred); 2332 cache->cred = get_rpccred(set->cred);
2230 cache->mask = set->mask; 2333 cache->mask = set->mask;
2231 2334
2335 /* The above field assignments must be visible
2336 * before this item appears on the lru. We cannot easily
2337 * use rcu_assign_pointer, so just force the memory barrier.
2338 */
2339 smp_wmb();
2232 nfs_access_add_rbtree(inode, cache); 2340 nfs_access_add_rbtree(inode, cache);
2233 2341
2234 /* Update accounting */ 2342 /* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2244 &nfs_access_lru_list); 2352 &nfs_access_lru_list);
2245 spin_unlock(&nfs_access_lru_lock); 2353 spin_unlock(&nfs_access_lru_lock);
2246 } 2354 }
2355 nfs_access_cache_enforce_limit();
2247} 2356}
2248EXPORT_SYMBOL_GPL(nfs_access_add_cache); 2357EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2249 2358
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
2267 2376
2268 trace_nfs_access_enter(inode); 2377 trace_nfs_access_enter(inode);
2269 2378
2270 status = nfs_access_get_cached(inode, cred, &cache); 2379 status = nfs_access_get_cached_rcu(inode, cred, &cache);
2380 if (status != 0)
2381 status = nfs_access_get_cached(inode, cred, &cache);
2271 if (status == 0) 2382 if (status == 0)
2272 goto out_cached; 2383 goto out_cached;
2273 2384
2385 status = -ECHILD;
2386 if (mask & MAY_NOT_BLOCK)
2387 goto out;
2388
2274 /* Be clever: ask server to check for all possible rights */ 2389 /* Be clever: ask server to check for all possible rights */
2275 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; 2390 cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
2276 cache.cred = cred; 2391 cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
2321 struct rpc_cred *cred; 2436 struct rpc_cred *cred;
2322 int res = 0; 2437 int res = 0;
2323 2438
2324 if (mask & MAY_NOT_BLOCK)
2325 return -ECHILD;
2326
2327 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2439 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2328 2440
2329 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2441 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
2350 if (!NFS_PROTO(inode)->access) 2462 if (!NFS_PROTO(inode)->access)
2351 goto out_notsup; 2463 goto out_notsup;
2352 2464
2353 cred = rpc_lookup_cred(); 2465 /* Always try fast lookups first */
2354 if (!IS_ERR(cred)) { 2466 rcu_read_lock();
2355 res = nfs_do_access(inode, cred, mask); 2467 cred = rpc_lookup_cred_nonblock();
2356 put_rpccred(cred); 2468 if (!IS_ERR(cred))
2357 } else 2469 res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
2470 else
2358 res = PTR_ERR(cred); 2471 res = PTR_ERR(cred);
2472 rcu_read_unlock();
2473 if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
2474 /* Fast lookup failed, try the slow way */
2475 cred = rpc_lookup_cred();
2476 if (!IS_ERR(cred)) {
2477 res = nfs_do_access(inode, cred, mask);
2478 put_rpccred(cred);
2479 } else
2480 res = PTR_ERR(cred);
2481 }
2359out: 2482out:
2360 if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) 2483 if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
2361 res = -EACCES; 2484 res = -EACCES;
@@ -2364,6 +2487,9 @@ out:
2364 inode->i_sb->s_id, inode->i_ino, mask, res); 2487 inode->i_sb->s_id, inode->i_ino, mask, res);
2365 return res; 2488 return res;
2366out_notsup: 2489out_notsup:
2490 if (mask & MAY_NOT_BLOCK)
2491 return -ECHILD;
2492
2367 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2493 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2368 if (res == 0) 2494 if (res == 0)
2369 res = generic_permission(inode, mask); 2495 res = generic_permission(inode, mask);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f11b9eed0de1..65ef6e00deee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
148{ 148{
149 struct nfs_writeverf *verfp; 149 struct nfs_writeverf *verfp;
150 150
151 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, 151 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
152 hdr->data->ds_idx); 152 hdr->ds_idx);
153 WARN_ON_ONCE(verfp->committed >= 0); 153 WARN_ON_ONCE(verfp->committed >= 0);
154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); 154 memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
155 WARN_ON_ONCE(verfp->committed < 0); 155 WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
169{ 169{
170 struct nfs_writeverf *verfp; 170 struct nfs_writeverf *verfp;
171 171
172 verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, 172 verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
173 hdr->data->ds_idx); 173 hdr->ds_idx);
174 if (verfp->committed < 0) { 174 if (verfp->committed < 0) {
175 nfs_direct_set_hdr_verf(dreq, hdr); 175 nfs_direct_set_hdr_verf(dreq, hdr);
176 return 0; 176 return 0;
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
715{ 715{
716 struct nfs_direct_req *dreq = hdr->dreq; 716 struct nfs_direct_req *dreq = hdr->dreq;
717 struct nfs_commit_info cinfo; 717 struct nfs_commit_info cinfo;
718 int bit = -1; 718 bool request_commit = false;
719 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 719 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
720 720
721 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 721 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
729 dreq->flags = 0; 729 dreq->flags = 0;
730 dreq->error = hdr->error; 730 dreq->error = hdr->error;
731 } 731 }
732 if (dreq->error != 0) 732 if (dreq->error == 0) {
733 bit = NFS_IOHDR_ERROR;
734 else {
735 dreq->count += hdr->good_bytes; 733 dreq->count += hdr->good_bytes;
736 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 734 if (nfs_write_need_commit(hdr)) {
737 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
738 bit = NFS_IOHDR_NEED_RESCHED;
739 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
740 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) 735 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
741 bit = NFS_IOHDR_NEED_RESCHED; 736 request_commit = true;
742 else if (dreq->flags == 0) { 737 else if (dreq->flags == 0) {
743 nfs_direct_set_hdr_verf(dreq, hdr); 738 nfs_direct_set_hdr_verf(dreq, hdr);
744 bit = NFS_IOHDR_NEED_COMMIT; 739 request_commit = true;
745 dreq->flags = NFS_ODIRECT_DO_COMMIT; 740 dreq->flags = NFS_ODIRECT_DO_COMMIT;
746 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { 741 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
747 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { 742 request_commit = true;
743 if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
748 dreq->flags = 744 dreq->flags =
749 NFS_ODIRECT_RESCHED_WRITES; 745 NFS_ODIRECT_RESCHED_WRITES;
750 bit = NFS_IOHDR_NEED_RESCHED;
751 } else
752 bit = NFS_IOHDR_NEED_COMMIT;
753 } 746 }
754 } 747 }
755 } 748 }
@@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
759 752
760 req = nfs_list_entry(hdr->pages.next); 753 req = nfs_list_entry(hdr->pages.next);
761 nfs_list_remove_request(req); 754 nfs_list_remove_request(req);
762 switch (bit) { 755 if (request_commit) {
763 case NFS_IOHDR_NEED_RESCHED:
764 case NFS_IOHDR_NEED_COMMIT:
765 kref_get(&req->wb_kref); 756 kref_get(&req->wb_kref);
766 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 757 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
767 } 758 }
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7e..1359c4a27393 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
84 BUG(); 84 BUG();
85} 85}
86 86
87static void filelayout_reset_write(struct nfs_pgio_data *data) 87static void filelayout_reset_write(struct nfs_pgio_header *hdr)
88{ 88{
89 struct nfs_pgio_header *hdr = data->header; 89 struct rpc_task *task = &hdr->task;
90 struct rpc_task *task = &data->task;
91 90
92 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 91 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
93 dprintk("%s Reset task %5u for i/o through MDS " 92 dprintk("%s Reset task %5u for i/o through MDS "
94 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 93 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
95 data->task.tk_pid, 94 hdr->task.tk_pid,
96 hdr->inode->i_sb->s_id, 95 hdr->inode->i_sb->s_id,
97 (unsigned long long)NFS_FILEID(hdr->inode), 96 (unsigned long long)NFS_FILEID(hdr->inode),
98 data->args.count, 97 hdr->args.count,
99 (unsigned long long)data->args.offset); 98 (unsigned long long)hdr->args.offset);
100 99
101 task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 100 task->tk_status = pnfs_write_done_resend_to_mds(hdr);
102 &hdr->pages,
103 hdr->completion_ops,
104 hdr->dreq);
105 } 101 }
106} 102}
107 103
108static void filelayout_reset_read(struct nfs_pgio_data *data) 104static void filelayout_reset_read(struct nfs_pgio_header *hdr)
109{ 105{
110 struct nfs_pgio_header *hdr = data->header; 106 struct rpc_task *task = &hdr->task;
111 struct rpc_task *task = &data->task;
112 107
113 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 108 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
114 dprintk("%s Reset task %5u for i/o through MDS " 109 dprintk("%s Reset task %5u for i/o through MDS "
115 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, 110 "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
116 data->task.tk_pid, 111 hdr->task.tk_pid,
117 hdr->inode->i_sb->s_id, 112 hdr->inode->i_sb->s_id,
118 (unsigned long long)NFS_FILEID(hdr->inode), 113 (unsigned long long)NFS_FILEID(hdr->inode),
119 data->args.count, 114 hdr->args.count,
120 (unsigned long long)data->args.offset); 115 (unsigned long long)hdr->args.offset);
121 116
122 task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 117 task->tk_status = pnfs_read_done_resend_to_mds(hdr);
123 &hdr->pages,
124 hdr->completion_ops,
125 hdr->dreq);
126 } 118 }
127} 119}
128 120
@@ -243,18 +235,17 @@ wait_on_recovery:
243/* NFS_PROTO call done callback routines */ 235/* NFS_PROTO call done callback routines */
244 236
245static int filelayout_read_done_cb(struct rpc_task *task, 237static int filelayout_read_done_cb(struct rpc_task *task,
246 struct nfs_pgio_data *data) 238 struct nfs_pgio_header *hdr)
247{ 239{
248 struct nfs_pgio_header *hdr = data->header;
249 int err; 240 int err;
250 241
251 trace_nfs4_pnfs_read(data, task->tk_status); 242 trace_nfs4_pnfs_read(hdr, task->tk_status);
252 err = filelayout_async_handle_error(task, data->args.context->state, 243 err = filelayout_async_handle_error(task, hdr->args.context->state,
253 data->ds_clp, hdr->lseg); 244 hdr->ds_clp, hdr->lseg);
254 245
255 switch (err) { 246 switch (err) {
256 case -NFS4ERR_RESET_TO_MDS: 247 case -NFS4ERR_RESET_TO_MDS:
257 filelayout_reset_read(data); 248 filelayout_reset_read(hdr);
258 return task->tk_status; 249 return task->tk_status;
259 case -EAGAIN: 250 case -EAGAIN:
260 rpc_restart_call_prepare(task); 251 rpc_restart_call_prepare(task);
@@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
270 * rfc5661 is not clear about which credential should be used. 261 * rfc5661 is not clear about which credential should be used.
271 */ 262 */
272static void 263static void
273filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) 264filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
274{ 265{
275 struct nfs_pgio_header *hdr = wdata->header;
276 266
277 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || 267 if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
278 wdata->res.verf->committed == NFS_FILE_SYNC) 268 hdr->res.verf->committed == NFS_FILE_SYNC)
279 return; 269 return;
280 270
281 pnfs_set_layoutcommit(wdata); 271 pnfs_set_layoutcommit(hdr);
282 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, 272 dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
283 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 273 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
284} 274}
@@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
305 */ 295 */
306static void filelayout_read_prepare(struct rpc_task *task, void *data) 296static void filelayout_read_prepare(struct rpc_task *task, void *data)
307{ 297{
308 struct nfs_pgio_data *rdata = data; 298 struct nfs_pgio_header *hdr = data;
309 299
310 if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { 300 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
311 rpc_exit(task, -EIO); 301 rpc_exit(task, -EIO);
312 return; 302 return;
313 } 303 }
314 if (filelayout_reset_to_mds(rdata->header->lseg)) { 304 if (filelayout_reset_to_mds(hdr->lseg)) {
315 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 305 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
316 filelayout_reset_read(rdata); 306 filelayout_reset_read(hdr);
317 rpc_exit(task, 0); 307 rpc_exit(task, 0);
318 return; 308 return;
319 } 309 }
320 rdata->pgio_done_cb = filelayout_read_done_cb; 310 hdr->pgio_done_cb = filelayout_read_done_cb;
321 311
322 if (nfs41_setup_sequence(rdata->ds_clp->cl_session, 312 if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
323 &rdata->args.seq_args, 313 &hdr->args.seq_args,
324 &rdata->res.seq_res, 314 &hdr->res.seq_res,
325 task)) 315 task))
326 return; 316 return;
327 if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, 317 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
328 rdata->args.lock_context, FMODE_READ) == -EIO) 318 hdr->args.lock_context, FMODE_READ) == -EIO)
329 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 319 rpc_exit(task, -EIO); /* lost lock, terminate I/O */
330} 320}
331 321
332static void filelayout_read_call_done(struct rpc_task *task, void *data) 322static void filelayout_read_call_done(struct rpc_task *task, void *data)
333{ 323{
334 struct nfs_pgio_data *rdata = data; 324 struct nfs_pgio_header *hdr = data;
335 325
336 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 326 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
337 327
338 if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && 328 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
339 task->tk_status == 0) { 329 task->tk_status == 0) {
340 nfs41_sequence_done(task, &rdata->res.seq_res); 330 nfs41_sequence_done(task, &hdr->res.seq_res);
341 return; 331 return;
342 } 332 }
343 333
344 /* Note this may cause RPC to be resent */ 334 /* Note this may cause RPC to be resent */
345 rdata->header->mds_ops->rpc_call_done(task, data); 335 hdr->mds_ops->rpc_call_done(task, data);
346} 336}
347 337
348static void filelayout_read_count_stats(struct rpc_task *task, void *data) 338static void filelayout_read_count_stats(struct rpc_task *task, void *data)
349{ 339{
350 struct nfs_pgio_data *rdata = data; 340 struct nfs_pgio_header *hdr = data;
351 341
352 rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); 342 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
353} 343}
354 344
355static void filelayout_read_release(void *data) 345static void filelayout_read_release(void *data)
356{ 346{
357 struct nfs_pgio_data *rdata = data; 347 struct nfs_pgio_header *hdr = data;
358 struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; 348 struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
359 349
360 filelayout_fenceme(lo->plh_inode, lo); 350 filelayout_fenceme(lo->plh_inode, lo);
361 nfs_put_client(rdata->ds_clp); 351 nfs_put_client(hdr->ds_clp);
362 rdata->header->mds_ops->rpc_release(data); 352 hdr->mds_ops->rpc_release(data);
363} 353}
364 354
365static int filelayout_write_done_cb(struct rpc_task *task, 355static int filelayout_write_done_cb(struct rpc_task *task,
366 struct nfs_pgio_data *data) 356 struct nfs_pgio_header *hdr)
367{ 357{
368 struct nfs_pgio_header *hdr = data->header;
369 int err; 358 int err;
370 359
371 trace_nfs4_pnfs_write(data, task->tk_status); 360 trace_nfs4_pnfs_write(hdr, task->tk_status);
372 err = filelayout_async_handle_error(task, data->args.context->state, 361 err = filelayout_async_handle_error(task, hdr->args.context->state,
373 data->ds_clp, hdr->lseg); 362 hdr->ds_clp, hdr->lseg);
374 363
375 switch (err) { 364 switch (err) {
376 case -NFS4ERR_RESET_TO_MDS: 365 case -NFS4ERR_RESET_TO_MDS:
377 filelayout_reset_write(data); 366 filelayout_reset_write(hdr);
378 return task->tk_status; 367 return task->tk_status;
379 case -EAGAIN: 368 case -EAGAIN:
380 rpc_restart_call_prepare(task); 369 rpc_restart_call_prepare(task);
381 return -EAGAIN; 370 return -EAGAIN;
382 } 371 }
383 372
384 filelayout_set_layoutcommit(data); 373 filelayout_set_layoutcommit(hdr);
385 return 0; 374 return 0;
386} 375}
387 376
@@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
419 408
420static void filelayout_write_prepare(struct rpc_task *task, void *data) 409static void filelayout_write_prepare(struct rpc_task *task, void *data)
421{ 410{
422 struct nfs_pgio_data *wdata = data; 411 struct nfs_pgio_header *hdr = data;
423 412
424 if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { 413 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
425 rpc_exit(task, -EIO); 414 rpc_exit(task, -EIO);
426 return; 415 return;
427 } 416 }
428 if (filelayout_reset_to_mds(wdata->header->lseg)) { 417 if (filelayout_reset_to_mds(hdr->lseg)) {
429 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); 418 dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
430 filelayout_reset_write(wdata); 419 filelayout_reset_write(hdr);
431 rpc_exit(task, 0); 420 rpc_exit(task, 0);
432 return; 421 return;
433 } 422 }
434 if (nfs41_setup_sequence(wdata->ds_clp->cl_session, 423 if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
435 &wdata->args.seq_args, 424 &hdr->args.seq_args,
436 &wdata->res.seq_res, 425 &hdr->res.seq_res,
437 task)) 426 task))
438 return; 427 return;
439 if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, 428 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
440 wdata->args.lock_context, FMODE_WRITE) == -EIO) 429 hdr->args.lock_context, FMODE_WRITE) == -EIO)
441 rpc_exit(task, -EIO); /* lost lock, terminate I/O */ 430 rpc_exit(task, -EIO); /* lost lock, terminate I/O */
442} 431}
443 432
444static void filelayout_write_call_done(struct rpc_task *task, void *data) 433static void filelayout_write_call_done(struct rpc_task *task, void *data)
445{ 434{
446 struct nfs_pgio_data *wdata = data; 435 struct nfs_pgio_header *hdr = data;
447 436
448 if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && 437 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
449 task->tk_status == 0) { 438 task->tk_status == 0) {
450 nfs41_sequence_done(task, &wdata->res.seq_res); 439 nfs41_sequence_done(task, &hdr->res.seq_res);
451 return; 440 return;
452 } 441 }
453 442
454 /* Note this may cause RPC to be resent */ 443 /* Note this may cause RPC to be resent */
455 wdata->header->mds_ops->rpc_call_done(task, data); 444 hdr->mds_ops->rpc_call_done(task, data);
456} 445}
457 446
458static void filelayout_write_count_stats(struct rpc_task *task, void *data) 447static void filelayout_write_count_stats(struct rpc_task *task, void *data)
459{ 448{
460 struct nfs_pgio_data *wdata = data; 449 struct nfs_pgio_header *hdr = data;
461 450
462 rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); 451 rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
463} 452}
464 453
465static void filelayout_write_release(void *data) 454static void filelayout_write_release(void *data)
466{ 455{
467 struct nfs_pgio_data *wdata = data; 456 struct nfs_pgio_header *hdr = data;
468 struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; 457 struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
469 458
470 filelayout_fenceme(lo->plh_inode, lo); 459 filelayout_fenceme(lo->plh_inode, lo);
471 nfs_put_client(wdata->ds_clp); 460 nfs_put_client(hdr->ds_clp);
472 wdata->header->mds_ops->rpc_release(data); 461 hdr->mds_ops->rpc_release(data);
473} 462}
474 463
475static void filelayout_commit_prepare(struct rpc_task *task, void *data) 464static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
529}; 518};
530 519
531static enum pnfs_try_status 520static enum pnfs_try_status
532filelayout_read_pagelist(struct nfs_pgio_data *data) 521filelayout_read_pagelist(struct nfs_pgio_header *hdr)
533{ 522{
534 struct nfs_pgio_header *hdr = data->header;
535 struct pnfs_layout_segment *lseg = hdr->lseg; 523 struct pnfs_layout_segment *lseg = hdr->lseg;
536 struct nfs4_pnfs_ds *ds; 524 struct nfs4_pnfs_ds *ds;
537 struct rpc_clnt *ds_clnt; 525 struct rpc_clnt *ds_clnt;
538 loff_t offset = data->args.offset; 526 loff_t offset = hdr->args.offset;
539 u32 j, idx; 527 u32 j, idx;
540 struct nfs_fh *fh; 528 struct nfs_fh *fh;
541 529
542 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", 530 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
543 __func__, hdr->inode->i_ino, 531 __func__, hdr->inode->i_ino,
544 data->args.pgbase, (size_t)data->args.count, offset); 532 hdr->args.pgbase, (size_t)hdr->args.count, offset);
545 533
546 /* Retrieve the correct rpc_client for the byte range */ 534 /* Retrieve the correct rpc_client for the byte range */
547 j = nfs4_fl_calc_j_index(lseg, offset); 535 j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
559 547
560 /* No multipath support. Use first DS */ 548 /* No multipath support. Use first DS */
561 atomic_inc(&ds->ds_clp->cl_count); 549 atomic_inc(&ds->ds_clp->cl_count);
562 data->ds_clp = ds->ds_clp; 550 hdr->ds_clp = ds->ds_clp;
563 data->ds_idx = idx; 551 hdr->ds_idx = idx;
564 fh = nfs4_fl_select_ds_fh(lseg, j); 552 fh = nfs4_fl_select_ds_fh(lseg, j);
565 if (fh) 553 if (fh)
566 data->args.fh = fh; 554 hdr->args.fh = fh;
567 555
568 data->args.offset = filelayout_get_dserver_offset(lseg, offset); 556 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
569 data->mds_offset = offset; 557 hdr->mds_offset = offset;
570 558
571 /* Perform an asynchronous read to ds */ 559 /* Perform an asynchronous read to ds */
572 nfs_initiate_pgio(ds_clnt, data, 560 nfs_initiate_pgio(ds_clnt, hdr,
573 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); 561 &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
574 return PNFS_ATTEMPTED; 562 return PNFS_ATTEMPTED;
575} 563}
576 564
577/* Perform async writes. */ 565/* Perform async writes. */
578static enum pnfs_try_status 566static enum pnfs_try_status
579filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) 567filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
580{ 568{
581 struct nfs_pgio_header *hdr = data->header;
582 struct pnfs_layout_segment *lseg = hdr->lseg; 569 struct pnfs_layout_segment *lseg = hdr->lseg;
583 struct nfs4_pnfs_ds *ds; 570 struct nfs4_pnfs_ds *ds;
584 struct rpc_clnt *ds_clnt; 571 struct rpc_clnt *ds_clnt;
585 loff_t offset = data->args.offset; 572 loff_t offset = hdr->args.offset;
586 u32 j, idx; 573 u32 j, idx;
587 struct nfs_fh *fh; 574 struct nfs_fh *fh;
588 575
@@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
598 return PNFS_NOT_ATTEMPTED; 585 return PNFS_NOT_ATTEMPTED;
599 586
600 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", 587 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
601 __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, 588 __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
602 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); 589 offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
603 590
604 data->pgio_done_cb = filelayout_write_done_cb; 591 hdr->pgio_done_cb = filelayout_write_done_cb;
605 atomic_inc(&ds->ds_clp->cl_count); 592 atomic_inc(&ds->ds_clp->cl_count);
606 data->ds_clp = ds->ds_clp; 593 hdr->ds_clp = ds->ds_clp;
607 data->ds_idx = idx; 594 hdr->ds_idx = idx;
608 fh = nfs4_fl_select_ds_fh(lseg, j); 595 fh = nfs4_fl_select_ds_fh(lseg, j);
609 if (fh) 596 if (fh)
610 data->args.fh = fh; 597 hdr->args.fh = fh;
611 598 hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
612 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
613 599
614 /* Perform an asynchronous write */ 600 /* Perform an asynchronous write */
615 nfs_initiate_pgio(ds_clnt, data, 601 nfs_initiate_pgio(ds_clnt, hdr,
616 &filelayout_write_call_ops, sync, 602 &filelayout_write_call_ops, sync,
617 RPC_TASK_SOFTCONN); 603 RPC_TASK_SOFTCONN);
618 return PNFS_ATTEMPTED; 604 return PNFS_ATTEMPTED;
@@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
1023 1009
1024/* The generic layer is about to remove the req from the commit list. 1010/* The generic layer is about to remove the req from the commit list.
1025 * If this will make the bucket empty, it will need to put the lseg reference. 1011 * If this will make the bucket empty, it will need to put the lseg reference.
1012 * Note this is must be called holding the inode (/cinfo) lock
1026 */ 1013 */
1027static void 1014static void
1028filelayout_clear_request_commit(struct nfs_page *req, 1015filelayout_clear_request_commit(struct nfs_page *req,
@@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req,
1030{ 1017{
1031 struct pnfs_layout_segment *freeme = NULL; 1018 struct pnfs_layout_segment *freeme = NULL;
1032 1019
1033 spin_lock(cinfo->lock);
1034 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) 1020 if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
1035 goto out; 1021 goto out;
1036 cinfo->ds->nwritten--; 1022 cinfo->ds->nwritten--;
@@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req,
1045 } 1031 }
1046out: 1032out:
1047 nfs_request_remove_commit_list(req, cinfo); 1033 nfs_request_remove_commit_list(req, cinfo);
1048 spin_unlock(cinfo->lock); 1034 pnfs_put_lseg_async(freeme);
1049 pnfs_put_lseg(freeme);
1050} 1035}
1051 1036
1052static struct list_head * 1037static void
1053filelayout_choose_commit_list(struct nfs_page *req, 1038filelayout_mark_request_commit(struct nfs_page *req,
1054 struct pnfs_layout_segment *lseg, 1039 struct pnfs_layout_segment *lseg,
1055 struct nfs_commit_info *cinfo) 1040 struct nfs_commit_info *cinfo)
1041
1056{ 1042{
1057 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 1043 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
1058 u32 i, j; 1044 u32 i, j;
1059 struct list_head *list; 1045 struct list_head *list;
1060 struct pnfs_commit_bucket *buckets; 1046 struct pnfs_commit_bucket *buckets;
1061 1047
1062 if (fl->commit_through_mds) 1048 if (fl->commit_through_mds) {
1063 return &cinfo->mds->list; 1049 list = &cinfo->mds->list;
1050 spin_lock(cinfo->lock);
1051 goto mds_commit;
1052 }
1064 1053
1065 /* Note that we are calling nfs4_fl_calc_j_index on each page 1054 /* Note that we are calling nfs4_fl_calc_j_index on each page
1066 * that ends up being committed to a data server. An attractive 1055 * that ends up being committed to a data server. An attractive
@@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req,
1084 } 1073 }
1085 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1074 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1086 cinfo->ds->nwritten++; 1075 cinfo->ds->nwritten++;
1087 spin_unlock(cinfo->lock);
1088 return list;
1089}
1090 1076
1091static void 1077mds_commit:
1092filelayout_mark_request_commit(struct nfs_page *req, 1078 /* nfs_request_add_commit_list(). We need to add req to list without
1093 struct pnfs_layout_segment *lseg, 1079 * dropping cinfo lock.
1094 struct nfs_commit_info *cinfo) 1080 */
1095{ 1081 set_bit(PG_CLEAN, &(req)->wb_flags);
1096 struct list_head *list; 1082 nfs_list_add_request(req, list);
1097 1083 cinfo->mds->ncommit++;
1098 list = filelayout_choose_commit_list(req, lseg, cinfo); 1084 spin_unlock(cinfo->lock);
1099 nfs_request_add_commit_list(req, list, cinfo); 1085 if (!cinfo->dreq) {
1086 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1087 inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1088 BDI_RECLAIMABLE);
1089 __mark_inode_dirty(req->wb_context->dentry->d_inode,
1090 I_DIRTY_DATASYNC);
1091 }
1100} 1092}
1101 1093
1102static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1094static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -1244,15 +1236,63 @@ restart:
1244 spin_unlock(cinfo->lock); 1236 spin_unlock(cinfo->lock);
1245} 1237}
1246 1238
1239/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
1240 * for @page
1241 * @cinfo - commit info for current inode
1242 * @page - page to search for matching head request
1243 *
1244 * Returns a the head request if one is found, otherwise returns NULL.
1245 */
1246static struct nfs_page *
1247filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
1248{
1249 struct nfs_page *freq, *t;
1250 struct pnfs_commit_bucket *b;
1251 int i;
1252
1253 /* Linearly search the commit lists for each bucket until a matching
1254 * request is found */
1255 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1256 list_for_each_entry_safe(freq, t, &b->written, wb_list) {
1257 if (freq->wb_page == page)
1258 return freq->wb_head;
1259 }
1260 list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
1261 if (freq->wb_page == page)
1262 return freq->wb_head;
1263 }
1264 }
1265
1266 return NULL;
1267}
1268
1269static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
1270{
1271 struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
1272 struct pnfs_commit_bucket *bucket = fl_cinfo->buckets;
1273 struct pnfs_layout_segment *freeme;
1274 int i;
1275
1276 for (i = idx; i < fl_cinfo->nbuckets; i++, bucket++) {
1277 if (list_empty(&bucket->committing))
1278 continue;
1279 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1280 spin_lock(cinfo->lock);
1281 freeme = bucket->clseg;
1282 bucket->clseg = NULL;
1283 spin_unlock(cinfo->lock);
1284 pnfs_put_lseg(freeme);
1285 }
1286}
1287
1247static unsigned int 1288static unsigned int
1248alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) 1289alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1249{ 1290{
1250 struct pnfs_ds_commit_info *fl_cinfo; 1291 struct pnfs_ds_commit_info *fl_cinfo;
1251 struct pnfs_commit_bucket *bucket; 1292 struct pnfs_commit_bucket *bucket;
1252 struct nfs_commit_data *data; 1293 struct nfs_commit_data *data;
1253 int i, j; 1294 int i;
1254 unsigned int nreq = 0; 1295 unsigned int nreq = 0;
1255 struct pnfs_layout_segment *freeme;
1256 1296
1257 fl_cinfo = cinfo->ds; 1297 fl_cinfo = cinfo->ds;
1258 bucket = fl_cinfo->buckets; 1298 bucket = fl_cinfo->buckets;
@@ -1272,16 +1312,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1272 } 1312 }
1273 1313
1274 /* Clean up on error */ 1314 /* Clean up on error */
1275 for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { 1315 filelayout_retry_commit(cinfo, i);
1276 if (list_empty(&bucket->committing))
1277 continue;
1278 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1279 spin_lock(cinfo->lock);
1280 freeme = bucket->clseg;
1281 bucket->clseg = NULL;
1282 spin_unlock(cinfo->lock);
1283 pnfs_put_lseg(freeme);
1284 }
1285 /* Caller will clean up entries put on list */ 1316 /* Caller will clean up entries put on list */
1286 return nreq; 1317 return nreq;
1287} 1318}
@@ -1301,8 +1332,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
1301 data->lseg = NULL; 1332 data->lseg = NULL;
1302 list_add(&data->pages, &list); 1333 list_add(&data->pages, &list);
1303 nreq++; 1334 nreq++;
1304 } else 1335 } else {
1305 nfs_retry_commit(mds_pages, NULL, cinfo); 1336 nfs_retry_commit(mds_pages, NULL, cinfo);
1337 filelayout_retry_commit(cinfo, 0);
1338 cinfo->completion_ops->error_cleanup(NFS_I(inode));
1339 return -ENOMEM;
1340 }
1306 } 1341 }
1307 1342
1308 nreq += alloc_ds_commits(cinfo, &list); 1343 nreq += alloc_ds_commits(cinfo, &list);
@@ -1380,6 +1415,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
1380 .clear_request_commit = filelayout_clear_request_commit, 1415 .clear_request_commit = filelayout_clear_request_commit,
1381 .scan_commit_lists = filelayout_scan_commit_lists, 1416 .scan_commit_lists = filelayout_scan_commit_lists,
1382 .recover_commit_reqs = filelayout_recover_commit_reqs, 1417 .recover_commit_reqs = filelayout_recover_commit_reqs,
1418 .search_commit_reqs = filelayout_search_commit_reqs,
1383 .commit_pagelist = filelayout_commit_pagelist, 1419 .commit_pagelist = filelayout_commit_pagelist,
1384 .read_pagelist = filelayout_read_pagelist, 1420 .read_pagelist = filelayout_read_pagelist,
1385 .write_pagelist = filelayout_write_pagelist, 1421 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index e2a0361e24c6..8540516f4d71 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode,
695 if (pdev == NULL) 695 if (pdev == NULL)
696 return NULL; 696 return NULL;
697 697
698 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); 698 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
699 if (pages == NULL) { 699 if (pages == NULL) {
700 kfree(pdev); 700 kfree(pdev);
701 return NULL; 701 return NULL;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b94f80420a58..880618a8b048 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
112 * if the dentry tree reaches them; however if the dentry already 112 * if the dentry tree reaches them; however if the dentry already
113 * exists, we'll pick it up at this point and use it as the root 113 * exists, we'll pick it up at this point and use it as the root
114 */ 114 */
115 ret = d_obtain_alias(inode); 115 ret = d_obtain_root(inode);
116 if (IS_ERR(ret)) { 116 if (IS_ERR(ret)) {
117 dprintk("nfs_get_root: get root dentry failed\n"); 117 dprintk("nfs_get_root: get root dentry failed\n");
118 goto out; 118 goto out;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 567983d2c0eb..7dd55b745c4d 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -174,7 +174,9 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
174 174
175static struct key_type key_type_id_resolver = { 175static struct key_type key_type_id_resolver = {
176 .name = "id_resolver", 176 .name = "id_resolver",
177 .instantiate = user_instantiate, 177 .preparse = user_preparse,
178 .free_preparse = user_free_preparse,
179 .instantiate = generic_key_instantiate,
178 .match = user_match, 180 .match = user_match,
179 .revoke = user_revoke, 181 .revoke = user_revoke,
180 .destroy = user_destroy, 182 .destroy = user_destroy,
@@ -282,6 +284,8 @@ static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
282 desc, "", 0, idmap); 284 desc, "", 0, idmap);
283 mutex_unlock(&idmap->idmap_mutex); 285 mutex_unlock(&idmap->idmap_mutex);
284 } 286 }
287 if (!IS_ERR(rkey))
288 set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
285 289
286 kfree(desc); 290 kfree(desc);
287 return rkey; 291 return rkey;
@@ -394,7 +398,9 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
394 398
395static struct key_type key_type_id_resolver_legacy = { 399static struct key_type key_type_id_resolver_legacy = {
396 .name = "id_legacy", 400 .name = "id_legacy",
397 .instantiate = user_instantiate, 401 .preparse = user_preparse,
402 .free_preparse = user_free_preparse,
403 .instantiate = generic_key_instantiate,
398 .match = user_match, 404 .match = user_match,
399 .revoke = user_revoke, 405 .revoke = user_revoke,
400 .destroy = user_destroy, 406 .destroy = user_destroy,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index abd37a380535..577a36f0a510 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
1002} 1002}
1003EXPORT_SYMBOL_GPL(nfs_revalidate_inode); 1003EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
1004 1004
1005int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
1006{
1007 if (!(NFS_I(inode)->cache_validity &
1008 (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
1009 && !nfs_attribute_cache_expired(inode))
1010 return NFS_STALE(inode) ? -ESTALE : 0;
1011 return -ECHILD;
1012}
1013
1005static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) 1014static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
1006{ 1015{
1007 struct nfs_inode *nfsi = NFS_I(inode); 1016 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1840,11 +1849,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
1840static int nfs_net_init(struct net *net) 1849static int nfs_net_init(struct net *net)
1841{ 1850{
1842 nfs_clients_init(net); 1851 nfs_clients_init(net);
1843 return 0; 1852 return nfs_fs_proc_net_init(net);
1844} 1853}
1845 1854
1846static void nfs_net_exit(struct net *net) 1855static void nfs_net_exit(struct net *net)
1847{ 1856{
1857 nfs_fs_proc_net_exit(net);
1848 nfs_cleanup_cb_ident_idr(net); 1858 nfs_cleanup_cb_ident_idr(net);
1849} 1859}
1850 1860
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 617f36611d4a..9056622d2230 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
195#ifdef CONFIG_PROC_FS 195#ifdef CONFIG_PROC_FS
196extern int __init nfs_fs_proc_init(void); 196extern int __init nfs_fs_proc_init(void);
197extern void nfs_fs_proc_exit(void); 197extern void nfs_fs_proc_exit(void);
198extern int nfs_fs_proc_net_init(struct net *net);
199extern void nfs_fs_proc_net_exit(struct net *net);
198#else 200#else
201static inline int nfs_fs_proc_net_init(struct net *net)
202{
203 return 0;
204}
205static inline void nfs_fs_proc_net_exit(struct net *net)
206{
207}
199static inline int nfs_fs_proc_init(void) 208static inline int nfs_fs_proc_init(void)
200{ 209{
201 return 0; 210 return 0;
@@ -238,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
238int nfs_iocounter_wait(struct nfs_io_counter *c); 247int nfs_iocounter_wait(struct nfs_io_counter *c);
239 248
240extern const struct nfs_pageio_ops nfs_pgio_rw_ops; 249extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
241struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); 250struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
242void nfs_rw_header_free(struct nfs_pgio_header *); 251void nfs_pgio_header_free(struct nfs_pgio_header *);
243void nfs_pgio_data_release(struct nfs_pgio_data *); 252void nfs_pgio_data_destroy(struct nfs_pgio_header *);
244int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); 253int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
245int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, 254int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
246 const struct rpc_call_ops *, int, int); 255 const struct rpc_call_ops *, int, int);
247void nfs_free_request(struct nfs_page *req); 256void nfs_free_request(struct nfs_page *req);
248 257
@@ -442,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
442void nfs_mark_request_commit(struct nfs_page *req, 451void nfs_mark_request_commit(struct nfs_page *req,
443 struct pnfs_layout_segment *lseg, 452 struct pnfs_layout_segment *lseg,
444 struct nfs_commit_info *cinfo); 453 struct nfs_commit_info *cinfo);
454int nfs_write_need_commit(struct nfs_pgio_header *);
445int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 455int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
446 int how, struct nfs_commit_info *cinfo); 456 int how, struct nfs_commit_info *cinfo);
447void nfs_retry_commit(struct list_head *page_list, 457void nfs_retry_commit(struct list_head *page_list,
@@ -482,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
482extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); 492extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
483 493
484/* nfs4proc.c */ 494/* nfs4proc.c */
485extern void __nfs4_read_done_cb(struct nfs_pgio_data *); 495extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
486extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, 496extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
487 const struct rpc_timeout *timeparms, 497 const struct rpc_timeout *timeparms,
488 const char *ip_addr); 498 const char *ip_addr);
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 8ee1fab83268..ef221fb8a183 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -29,6 +29,9 @@ struct nfs_net {
29#endif 29#endif
30 spinlock_t nfs_client_lock; 30 spinlock_t nfs_client_lock;
31 struct timespec boot_time; 31 struct timespec boot_time;
32#ifdef CONFIG_PROC_FS
33 struct proc_dir_entry *proc_nfsfs;
34#endif
32}; 35};
33 36
34extern int nfs_net_id; 37extern int nfs_net_id;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 8f854dde4150..d0fec260132a 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -256,7 +256,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
256 char *p = data + *result; 256 char *p = data + *result;
257 257
258 acl = get_acl(inode, type); 258 acl = get_acl(inode, type);
259 if (!acl) 259 if (IS_ERR_OR_NULL(acl))
260 return 0; 260 return 0;
261 261
262 posix_acl_release(acl); 262 posix_acl_release(acl);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f0afa291fd58..809670eba52a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
795 return status; 795 return status;
796} 796}
797 797
798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 798static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
799{ 799{
800 struct inode *inode = data->header->inode; 800 struct inode *inode = hdr->inode;
801 801
802 if (nfs3_async_handle_jukebox(task, inode)) 802 if (nfs3_async_handle_jukebox(task, inode))
803 return -EAGAIN; 803 return -EAGAIN;
804 804
805 nfs_invalidate_atime(inode); 805 nfs_invalidate_atime(inode);
806 nfs_refresh_inode(inode, &data->fattr); 806 nfs_refresh_inode(inode, &hdr->fattr);
807 return 0; 807 return 0;
808} 808}
809 809
810static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 810static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
811 struct rpc_message *msg)
811{ 812{
812 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; 813 msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
813} 814}
814 815
815static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 816static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
817 struct nfs_pgio_header *hdr)
816{ 818{
817 rpc_call_start(task); 819 rpc_call_start(task);
818 return 0; 820 return 0;
819} 821}
820 822
821static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 823static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
822{ 824{
823 struct inode *inode = data->header->inode; 825 struct inode *inode = hdr->inode;
824 826
825 if (nfs3_async_handle_jukebox(task, inode)) 827 if (nfs3_async_handle_jukebox(task, inode))
826 return -EAGAIN; 828 return -EAGAIN;
827 if (task->tk_status >= 0) 829 if (task->tk_status >= 0)
828 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 830 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
829 return 0; 831 return 0;
830} 832}
831 833
832static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 834static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
835 struct rpc_message *msg)
833{ 836{
834 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; 837 msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
835} 838}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..92193eddb41d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -54,7 +54,7 @@ struct nfs4_minor_version_ops {
54 const nfs4_stateid *); 54 const nfs4_stateid *);
55 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, 55 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
56 struct nfs_fsinfo *); 56 struct nfs_fsinfo *);
57 int (*free_lock_state)(struct nfs_server *, 57 void (*free_lock_state)(struct nfs_server *,
58 struct nfs4_lock_state *); 58 struct nfs4_lock_state *);
59 const struct rpc_call_ops *call_sync_ops; 59 const struct rpc_call_ops *call_sync_ops;
60 const struct nfs4_state_recovery_ops *reboot_recovery_ops; 60 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
@@ -129,27 +129,17 @@ enum {
129 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) 129 * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
130 */ 130 */
131 131
132struct nfs4_lock_owner {
133 unsigned int lo_type;
134#define NFS4_ANY_LOCK_TYPE (0U)
135#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
136#define NFS4_POSIX_LOCK_TYPE (1U << 1)
137 union {
138 fl_owner_t posix_owner;
139 pid_t flock_owner;
140 } lo_u;
141};
142
143struct nfs4_lock_state { 132struct nfs4_lock_state {
144 struct list_head ls_locks; /* Other lock stateids */ 133 struct list_head ls_locks; /* Other lock stateids */
145 struct nfs4_state * ls_state; /* Pointer to open state */ 134 struct nfs4_state * ls_state; /* Pointer to open state */
146#define NFS_LOCK_INITIALIZED 0 135#define NFS_LOCK_INITIALIZED 0
147#define NFS_LOCK_LOST 1 136#define NFS_LOCK_LOST 1
148 unsigned long ls_flags; 137 unsigned long ls_flags;
149 struct nfs_seqid_counter ls_seqid; 138 struct nfs_seqid_counter ls_seqid;
150 nfs4_stateid ls_stateid; 139 nfs4_stateid ls_stateid;
151 atomic_t ls_count; 140 atomic_t ls_count;
152 struct nfs4_lock_owner ls_owner; 141 fl_owner_t ls_owner;
142 struct work_struct ls_release;
153}; 143};
154 144
155/* bits for nfs4_state->flags */ 145/* bits for nfs4_state->flags */
@@ -337,11 +327,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
337 */ 327 */
338static inline void 328static inline void
339nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 329nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
340 struct rpc_message *msg, struct nfs_pgio_data *wdata) 330 struct rpc_message *msg, struct nfs_pgio_header *hdr)
341{ 331{
342 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && 332 if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
343 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) 333 !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
344 wdata->args.stable = NFS_FILE_SYNC; 334 hdr->args.stable = NFS_FILE_SYNC;
345} 335}
346#else /* CONFIG_NFS_v4_1 */ 336#else /* CONFIG_NFS_v4_1 */
347static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 337static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +359,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
369 359
370static inline void 360static inline void
371nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, 361nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
372 struct rpc_message *msg, struct nfs_pgio_data *wdata) 362 struct rpc_message *msg, struct nfs_pgio_header *hdr)
373{ 363{
374} 364}
375#endif /* CONFIG_NFS_V4_1 */ 365#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index aa9ef4876046..53e435a95260 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -855,6 +855,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
855 }; 855 };
856 struct rpc_timeout ds_timeout; 856 struct rpc_timeout ds_timeout;
857 struct nfs_client *clp; 857 struct nfs_client *clp;
858 char buf[INET6_ADDRSTRLEN + 1];
859
860 if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
861 return ERR_PTR(-EINVAL);
862 cl_init.hostname = buf;
858 863
859 /* 864 /*
860 * Set an authflavor equual to the MDS value. Use the MDS nfs_client 865 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a0..75ae8d22f067 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1952 return status; 1952 return status;
1953} 1953}
1954 1954
1955/*
1956 * Additional permission checks in order to distinguish between an
1957 * open for read, and an open for execute. This works around the
1958 * fact that NFSv4 OPEN treats read and execute permissions as being
1959 * the same.
1960 * Note that in the non-execute case, we want to turn off permission
1961 * checking if we just created a new file (POSIX open() semantics).
1962 */
1955static int nfs4_opendata_access(struct rpc_cred *cred, 1963static int nfs4_opendata_access(struct rpc_cred *cred,
1956 struct nfs4_opendata *opendata, 1964 struct nfs4_opendata *opendata,
1957 struct nfs4_state *state, fmode_t fmode, 1965 struct nfs4_state *state, fmode_t fmode,
@@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1966 return 0; 1974 return 0;
1967 1975
1968 mask = 0; 1976 mask = 0;
1969 /* don't check MAY_WRITE - a newly created file may not have 1977 /*
1970 * write mode bits, but POSIX allows the creating process to write. 1978 * Use openflags to check for exec, because fmode won't
1971 * use openflags to check for exec, because fmode won't 1979 * always have FMODE_EXEC set when file open for exec.
1972 * always have FMODE_EXEC set when file open for exec. */ 1980 */
1973 if (openflags & __FMODE_EXEC) { 1981 if (openflags & __FMODE_EXEC) {
1974 /* ONLY check for exec rights */ 1982 /* ONLY check for exec rights */
1975 mask = MAY_EXEC; 1983 mask = MAY_EXEC;
1976 } else if (fmode & FMODE_READ) 1984 } else if ((fmode & FMODE_READ) && !opendata->file_created)
1977 mask = MAY_READ; 1985 mask = MAY_READ;
1978 1986
1979 cache.cred = cred; 1987 cache.cred = cred;
@@ -2216,8 +2224,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2216 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 2224 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
2217 2225
2218 ret = _nfs4_proc_open(opendata); 2226 ret = _nfs4_proc_open(opendata);
2219 if (ret != 0) 2227 if (ret != 0) {
2228 if (ret == -ENOENT) {
2229 d_drop(opendata->dentry);
2230 d_add(opendata->dentry, NULL);
2231 nfs_set_verifier(opendata->dentry,
2232 nfs_save_change_attribute(opendata->dir->d_inode));
2233 }
2220 goto out; 2234 goto out;
2235 }
2221 2236
2222 state = nfs4_opendata_to_nfs4_state(opendata); 2237 state = nfs4_opendata_to_nfs4_state(opendata);
2223 ret = PTR_ERR(state); 2238 ret = PTR_ERR(state);
@@ -2647,6 +2662,48 @@ static const struct rpc_call_ops nfs4_close_ops = {
2647 .rpc_release = nfs4_free_closedata, 2662 .rpc_release = nfs4_free_closedata,
2648}; 2663};
2649 2664
2665static bool nfs4_state_has_opener(struct nfs4_state *state)
2666{
2667 /* first check existing openers */
2668 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 &&
2669 state->n_rdonly != 0)
2670 return true;
2671
2672 if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 &&
2673 state->n_wronly != 0)
2674 return true;
2675
2676 if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 &&
2677 state->n_rdwr != 0)
2678 return true;
2679
2680 return false;
2681}
2682
2683static bool nfs4_roc(struct inode *inode)
2684{
2685 struct nfs_inode *nfsi = NFS_I(inode);
2686 struct nfs_open_context *ctx;
2687 struct nfs4_state *state;
2688
2689 spin_lock(&inode->i_lock);
2690 list_for_each_entry(ctx, &nfsi->open_files, list) {
2691 state = ctx->state;
2692 if (state == NULL)
2693 continue;
2694 if (nfs4_state_has_opener(state)) {
2695 spin_unlock(&inode->i_lock);
2696 return false;
2697 }
2698 }
2699 spin_unlock(&inode->i_lock);
2700
2701 if (nfs4_check_delegation(inode, FMODE_READ))
2702 return false;
2703
2704 return pnfs_roc(inode);
2705}
2706
2650/* 2707/*
2651 * It is possible for data to be read/written from a mem-mapped file 2708 * It is possible for data to be read/written from a mem-mapped file
2652 * after the sys_close call (which hits the vfs layer as a flush). 2709 * after the sys_close call (which hits the vfs layer as a flush).
@@ -2697,7 +2754,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
2697 calldata->res.fattr = &calldata->fattr; 2754 calldata->res.fattr = &calldata->fattr;
2698 calldata->res.seqid = calldata->arg.seqid; 2755 calldata->res.seqid = calldata->arg.seqid;
2699 calldata->res.server = server; 2756 calldata->res.server = server;
2700 calldata->roc = pnfs_roc(state->inode); 2757 calldata->roc = nfs4_roc(state->inode);
2701 nfs_sb_active(calldata->inode->i_sb); 2758 nfs_sb_active(calldata->inode->i_sb);
2702 2759
2703 msg.rpc_argp = &calldata->arg; 2760 msg.rpc_argp = &calldata->arg;
@@ -4033,24 +4090,25 @@ static bool nfs4_error_stateid_expired(int err)
4033 return false; 4090 return false;
4034} 4091}
4035 4092
4036void __nfs4_read_done_cb(struct nfs_pgio_data *data) 4093void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
4037{ 4094{
4038 nfs_invalidate_atime(data->header->inode); 4095 nfs_invalidate_atime(hdr->inode);
4039} 4096}
4040 4097
4041static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) 4098static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
4042{ 4099{
4043 struct nfs_server *server = NFS_SERVER(data->header->inode); 4100 struct nfs_server *server = NFS_SERVER(hdr->inode);
4044 4101
4045 trace_nfs4_read(data, task->tk_status); 4102 trace_nfs4_read(hdr, task->tk_status);
4046 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 4103 if (nfs4_async_handle_error(task, server,
4104 hdr->args.context->state) == -EAGAIN) {
4047 rpc_restart_call_prepare(task); 4105 rpc_restart_call_prepare(task);
4048 return -EAGAIN; 4106 return -EAGAIN;
4049 } 4107 }
4050 4108
4051 __nfs4_read_done_cb(data); 4109 __nfs4_read_done_cb(hdr);
4052 if (task->tk_status > 0) 4110 if (task->tk_status > 0)
4053 renew_lease(server, data->timestamp); 4111 renew_lease(server, hdr->timestamp);
4054 return 0; 4112 return 0;
4055} 4113}
4056 4114
@@ -4068,54 +4126,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
4068 return true; 4126 return true;
4069} 4127}
4070 4128
4071static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 4129static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
4072{ 4130{
4073 4131
4074 dprintk("--> %s\n", __func__); 4132 dprintk("--> %s\n", __func__);
4075 4133
4076 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4134 if (!nfs4_sequence_done(task, &hdr->res.seq_res))
4077 return -EAGAIN; 4135 return -EAGAIN;
4078 if (nfs4_read_stateid_changed(task, &data->args)) 4136 if (nfs4_read_stateid_changed(task, &hdr->args))
4079 return -EAGAIN; 4137 return -EAGAIN;
4080 return data->pgio_done_cb ? data->pgio_done_cb(task, data) : 4138 return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
4081 nfs4_read_done_cb(task, data); 4139 nfs4_read_done_cb(task, hdr);
4082} 4140}
4083 4141
4084static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 4142static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
4143 struct rpc_message *msg)
4085{ 4144{
4086 data->timestamp = jiffies; 4145 hdr->timestamp = jiffies;
4087 data->pgio_done_cb = nfs4_read_done_cb; 4146 hdr->pgio_done_cb = nfs4_read_done_cb;
4088 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 4147 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
4089 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 4148 nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
4090} 4149}
4091 4150
4092static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 4151static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
4152 struct nfs_pgio_header *hdr)
4093{ 4153{
4094 if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), 4154 if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
4095 &data->args.seq_args, 4155 &hdr->args.seq_args,
4096 &data->res.seq_res, 4156 &hdr->res.seq_res,
4097 task)) 4157 task))
4098 return 0; 4158 return 0;
4099 if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, 4159 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
4100 data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) 4160 hdr->args.lock_context,
4161 hdr->rw_ops->rw_mode) == -EIO)
4101 return -EIO; 4162 return -EIO;
4102 if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) 4163 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
4103 return -EIO; 4164 return -EIO;
4104 return 0; 4165 return 0;
4105} 4166}
4106 4167
4107static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) 4168static int nfs4_write_done_cb(struct rpc_task *task,
4169 struct nfs_pgio_header *hdr)
4108{ 4170{
4109 struct inode *inode = data->header->inode; 4171 struct inode *inode = hdr->inode;
4110 4172
4111 trace_nfs4_write(data, task->tk_status); 4173 trace_nfs4_write(hdr, task->tk_status);
4112 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 4174 if (nfs4_async_handle_error(task, NFS_SERVER(inode),
4175 hdr->args.context->state) == -EAGAIN) {
4113 rpc_restart_call_prepare(task); 4176 rpc_restart_call_prepare(task);
4114 return -EAGAIN; 4177 return -EAGAIN;
4115 } 4178 }
4116 if (task->tk_status >= 0) { 4179 if (task->tk_status >= 0) {
4117 renew_lease(NFS_SERVER(inode), data->timestamp); 4180 renew_lease(NFS_SERVER(inode), hdr->timestamp);
4118 nfs_post_op_update_inode_force_wcc(inode, &data->fattr); 4181 nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
4119 } 4182 }
4120 return 0; 4183 return 0;
4121} 4184}
@@ -4134,23 +4197,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
4134 return true; 4197 return true;
4135} 4198}
4136 4199
4137static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 4200static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
4138{ 4201{
4139 if (!nfs4_sequence_done(task, &data->res.seq_res)) 4202 if (!nfs4_sequence_done(task, &hdr->res.seq_res))
4140 return -EAGAIN; 4203 return -EAGAIN;
4141 if (nfs4_write_stateid_changed(task, &data->args)) 4204 if (nfs4_write_stateid_changed(task, &hdr->args))
4142 return -EAGAIN; 4205 return -EAGAIN;
4143 return data->pgio_done_cb ? data->pgio_done_cb(task, data) : 4206 return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
4144 nfs4_write_done_cb(task, data); 4207 nfs4_write_done_cb(task, hdr);
4145} 4208}
4146 4209
4147static 4210static
4148bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) 4211bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
4149{ 4212{
4150 const struct nfs_pgio_header *hdr = data->header;
4151
4152 /* Don't request attributes for pNFS or O_DIRECT writes */ 4213 /* Don't request attributes for pNFS or O_DIRECT writes */
4153 if (data->ds_clp != NULL || hdr->dreq != NULL) 4214 if (hdr->ds_clp != NULL || hdr->dreq != NULL)
4154 return false; 4215 return false;
4155 /* Otherwise, request attributes if and only if we don't hold 4216 /* Otherwise, request attributes if and only if we don't hold
4156 * a delegation 4217 * a delegation
@@ -4158,23 +4219,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
4158 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; 4219 return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
4159} 4220}
4160 4221
4161static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 4222static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
4223 struct rpc_message *msg)
4162{ 4224{
4163 struct nfs_server *server = NFS_SERVER(data->header->inode); 4225 struct nfs_server *server = NFS_SERVER(hdr->inode);
4164 4226
4165 if (!nfs4_write_need_cache_consistency_data(data)) { 4227 if (!nfs4_write_need_cache_consistency_data(hdr)) {
4166 data->args.bitmask = NULL; 4228 hdr->args.bitmask = NULL;
4167 data->res.fattr = NULL; 4229 hdr->res.fattr = NULL;
4168 } else 4230 } else
4169 data->args.bitmask = server->cache_consistency_bitmask; 4231 hdr->args.bitmask = server->cache_consistency_bitmask;
4170 4232
4171 if (!data->pgio_done_cb) 4233 if (!hdr->pgio_done_cb)
4172 data->pgio_done_cb = nfs4_write_done_cb; 4234 hdr->pgio_done_cb = nfs4_write_done_cb;
4173 data->res.server = server; 4235 hdr->res.server = server;
4174 data->timestamp = jiffies; 4236 hdr->timestamp = jiffies;
4175 4237
4176 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 4238 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
4177 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 4239 nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
4178} 4240}
4179 4241
4180static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) 4242static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -4881,6 +4943,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
4881 return scnprintf(buf, len, "tcp"); 4943 return scnprintf(buf, len, "tcp");
4882} 4944}
4883 4945
4946static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
4947{
4948 struct nfs4_setclientid *sc = calldata;
4949
4950 if (task->tk_status == 0)
4951 sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
4952}
4953
4954static const struct rpc_call_ops nfs4_setclientid_ops = {
4955 .rpc_call_done = nfs4_setclientid_done,
4956};
4957
4884/** 4958/**
4885 * nfs4_proc_setclientid - Negotiate client ID 4959 * nfs4_proc_setclientid - Negotiate client ID
4886 * @clp: state data structure 4960 * @clp: state data structure
@@ -4907,6 +4981,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4907 .rpc_resp = res, 4981 .rpc_resp = res,
4908 .rpc_cred = cred, 4982 .rpc_cred = cred,
4909 }; 4983 };
4984 struct rpc_task *task;
4985 struct rpc_task_setup task_setup_data = {
4986 .rpc_client = clp->cl_rpcclient,
4987 .rpc_message = &msg,
4988 .callback_ops = &nfs4_setclientid_ops,
4989 .callback_data = &setclientid,
4990 .flags = RPC_TASK_TIMEOUT,
4991 };
4910 int status; 4992 int status;
4911 4993
4912 /* nfs_client_id4 */ 4994 /* nfs_client_id4 */
@@ -4933,7 +5015,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4933 dprintk("NFS call setclientid auth=%s, '%.*s'\n", 5015 dprintk("NFS call setclientid auth=%s, '%.*s'\n",
4934 clp->cl_rpcclient->cl_auth->au_ops->au_name, 5016 clp->cl_rpcclient->cl_auth->au_ops->au_name,
4935 setclientid.sc_name_len, setclientid.sc_name); 5017 setclientid.sc_name_len, setclientid.sc_name);
4936 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 5018 task = rpc_run_task(&task_setup_data);
5019 if (IS_ERR(task)) {
5020 status = PTR_ERR(task);
5021 goto out;
5022 }
5023 status = task->tk_status;
5024 if (setclientid.sc_cred) {
5025 clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
5026 put_rpccred(setclientid.sc_cred);
5027 }
5028 rpc_put_task(task);
5029out:
4937 trace_nfs4_setclientid(clp, status); 5030 trace_nfs4_setclientid(clp, status);
4938 dprintk("NFS reply setclientid: %d\n", status); 5031 dprintk("NFS reply setclientid: %d\n", status);
4939 return status; 5032 return status;
@@ -4975,6 +5068,9 @@ struct nfs4_delegreturndata {
4975 unsigned long timestamp; 5068 unsigned long timestamp;
4976 struct nfs_fattr fattr; 5069 struct nfs_fattr fattr;
4977 int rpc_status; 5070 int rpc_status;
5071 struct inode *inode;
5072 bool roc;
5073 u32 roc_barrier;
4978}; 5074};
4979 5075
4980static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) 5076static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -4988,7 +5084,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4988 switch (task->tk_status) { 5084 switch (task->tk_status) {
4989 case 0: 5085 case 0:
4990 renew_lease(data->res.server, data->timestamp); 5086 renew_lease(data->res.server, data->timestamp);
4991 break;
4992 case -NFS4ERR_ADMIN_REVOKED: 5087 case -NFS4ERR_ADMIN_REVOKED:
4993 case -NFS4ERR_DELEG_REVOKED: 5088 case -NFS4ERR_DELEG_REVOKED:
4994 case -NFS4ERR_BAD_STATEID: 5089 case -NFS4ERR_BAD_STATEID:
@@ -4996,6 +5091,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
4996 case -NFS4ERR_STALE_STATEID: 5091 case -NFS4ERR_STALE_STATEID:
4997 case -NFS4ERR_EXPIRED: 5092 case -NFS4ERR_EXPIRED:
4998 task->tk_status = 0; 5093 task->tk_status = 0;
5094 if (data->roc)
5095 pnfs_roc_set_barrier(data->inode, data->roc_barrier);
4999 break; 5096 break;
5000 default: 5097 default:
5001 if (nfs4_async_handle_error(task, data->res.server, NULL) == 5098 if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@@ -5009,6 +5106,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
5009 5106
5010static void nfs4_delegreturn_release(void *calldata) 5107static void nfs4_delegreturn_release(void *calldata)
5011{ 5108{
5109 struct nfs4_delegreturndata *data = calldata;
5110
5111 if (data->roc)
5112 pnfs_roc_release(data->inode);
5012 kfree(calldata); 5113 kfree(calldata);
5013} 5114}
5014 5115
@@ -5018,6 +5119,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
5018 5119
5019 d_data = (struct nfs4_delegreturndata *)data; 5120 d_data = (struct nfs4_delegreturndata *)data;
5020 5121
5122 if (d_data->roc &&
5123 pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task))
5124 return;
5125
5021 nfs4_setup_sequence(d_data->res.server, 5126 nfs4_setup_sequence(d_data->res.server,
5022 &d_data->args.seq_args, 5127 &d_data->args.seq_args,
5023 &d_data->res.seq_res, 5128 &d_data->res.seq_res,
@@ -5061,6 +5166,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
5061 nfs_fattr_init(data->res.fattr); 5166 nfs_fattr_init(data->res.fattr);
5062 data->timestamp = jiffies; 5167 data->timestamp = jiffies;
5063 data->rpc_status = 0; 5168 data->rpc_status = 0;
5169 data->inode = inode;
5170 data->roc = list_empty(&NFS_I(inode)->open_files) ?
5171 pnfs_roc(inode) : false;
5064 5172
5065 task_setup_data.callback_data = data; 5173 task_setup_data.callback_data = data;
5066 msg.rpc_argp = &data->args; 5174 msg.rpc_argp = &data->args;
@@ -5834,8 +5942,10 @@ struct nfs_release_lockowner_data {
5834static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) 5942static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
5835{ 5943{
5836 struct nfs_release_lockowner_data *data = calldata; 5944 struct nfs_release_lockowner_data *data = calldata;
5837 nfs40_setup_sequence(data->server, 5945 struct nfs_server *server = data->server;
5838 &data->args.seq_args, &data->res.seq_res, task); 5946 nfs40_setup_sequence(server, &data->args.seq_args,
5947 &data->res.seq_res, task);
5948 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
5839 data->timestamp = jiffies; 5949 data->timestamp = jiffies;
5840} 5950}
5841 5951
@@ -5852,6 +5962,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
5852 break; 5962 break;
5853 case -NFS4ERR_STALE_CLIENTID: 5963 case -NFS4ERR_STALE_CLIENTID:
5854 case -NFS4ERR_EXPIRED: 5964 case -NFS4ERR_EXPIRED:
5965 nfs4_schedule_lease_recovery(server->nfs_client);
5966 break;
5855 case -NFS4ERR_LEASE_MOVED: 5967 case -NFS4ERR_LEASE_MOVED:
5856 case -NFS4ERR_DELAY: 5968 case -NFS4ERR_DELAY:
5857 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) 5969 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
@@ -5872,7 +5984,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = {
5872 .rpc_release = nfs4_release_lockowner_release, 5984 .rpc_release = nfs4_release_lockowner_release,
5873}; 5985};
5874 5986
5875static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) 5987static void
5988nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
5876{ 5989{
5877 struct nfs_release_lockowner_data *data; 5990 struct nfs_release_lockowner_data *data;
5878 struct rpc_message msg = { 5991 struct rpc_message msg = {
@@ -5880,11 +5993,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
5880 }; 5993 };
5881 5994
5882 if (server->nfs_client->cl_mvops->minor_version != 0) 5995 if (server->nfs_client->cl_mvops->minor_version != 0)
5883 return -EINVAL; 5996 return;
5884 5997
5885 data = kmalloc(sizeof(*data), GFP_NOFS); 5998 data = kmalloc(sizeof(*data), GFP_NOFS);
5886 if (!data) 5999 if (!data)
5887 return -ENOMEM; 6000 return;
5888 data->lsp = lsp; 6001 data->lsp = lsp;
5889 data->server = server; 6002 data->server = server;
5890 data->args.lock_owner.clientid = server->nfs_client->cl_clientid; 6003 data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5895,7 +6008,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
5895 msg.rpc_resp = &data->res; 6008 msg.rpc_resp = &data->res;
5896 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); 6009 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
5897 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); 6010 rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
5898 return 0;
5899} 6011}
5900 6012
5901#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" 6013#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -8182,7 +8294,8 @@ static int nfs41_free_stateid(struct nfs_server *server,
8182 return ret; 8294 return ret;
8183} 8295}
8184 8296
8185static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) 8297static void
8298nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
8186{ 8299{
8187 struct rpc_task *task; 8300 struct rpc_task *task;
8188 struct rpc_cred *cred = lsp->ls_state->owner->so_cred; 8301 struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
@@ -8190,9 +8303,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
8190 task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); 8303 task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
8191 nfs4_free_lock_state(server, lsp); 8304 nfs4_free_lock_state(server, lsp);
8192 if (IS_ERR(task)) 8305 if (IS_ERR(task))
8193 return PTR_ERR(task); 8306 return;
8194 rpc_put_task(task); 8307 rpc_put_task(task);
8195 return 0;
8196} 8308}
8197 8309
8198static bool nfs41_match_stateid(const nfs4_stateid *s1, 8310static bool nfs41_match_stateid(const nfs4_stateid *s1,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 42f121182167..a043f618cd5a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -787,33 +787,36 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
787 * that is compatible with current->files 787 * that is compatible with current->files
788 */ 788 */
789static struct nfs4_lock_state * 789static struct nfs4_lock_state *
790__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 790__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
791{ 791{
792 struct nfs4_lock_state *pos; 792 struct nfs4_lock_state *pos;
793 list_for_each_entry(pos, &state->lock_states, ls_locks) { 793 list_for_each_entry(pos, &state->lock_states, ls_locks) {
794 if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) 794 if (pos->ls_owner != fl_owner)
795 continue; 795 continue;
796 switch (pos->ls_owner.lo_type) {
797 case NFS4_POSIX_LOCK_TYPE:
798 if (pos->ls_owner.lo_u.posix_owner != fl_owner)
799 continue;
800 break;
801 case NFS4_FLOCK_LOCK_TYPE:
802 if (pos->ls_owner.lo_u.flock_owner != fl_pid)
803 continue;
804 }
805 atomic_inc(&pos->ls_count); 796 atomic_inc(&pos->ls_count);
806 return pos; 797 return pos;
807 } 798 }
808 return NULL; 799 return NULL;
809} 800}
810 801
802static void
803free_lock_state_work(struct work_struct *work)
804{
805 struct nfs4_lock_state *lsp = container_of(work,
806 struct nfs4_lock_state, ls_release);
807 struct nfs4_state *state = lsp->ls_state;
808 struct nfs_server *server = state->owner->so_server;
809 struct nfs_client *clp = server->nfs_client;
810
811 clp->cl_mvops->free_lock_state(server, lsp);
812}
813
811/* 814/*
812 * Return a compatible lock_state. If no initialized lock_state structure 815 * Return a compatible lock_state. If no initialized lock_state structure
813 * exists, return an uninitialized one. 816 * exists, return an uninitialized one.
814 * 817 *
815 */ 818 */
816static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) 819static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
817{ 820{
818 struct nfs4_lock_state *lsp; 821 struct nfs4_lock_state *lsp;
819 struct nfs_server *server = state->owner->so_server; 822 struct nfs_server *server = state->owner->so_server;
@@ -824,21 +827,12 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
824 nfs4_init_seqid_counter(&lsp->ls_seqid); 827 nfs4_init_seqid_counter(&lsp->ls_seqid);
825 atomic_set(&lsp->ls_count, 1); 828 atomic_set(&lsp->ls_count, 1);
826 lsp->ls_state = state; 829 lsp->ls_state = state;
827 lsp->ls_owner.lo_type = type; 830 lsp->ls_owner = fl_owner;
828 switch (lsp->ls_owner.lo_type) {
829 case NFS4_FLOCK_LOCK_TYPE:
830 lsp->ls_owner.lo_u.flock_owner = fl_pid;
831 break;
832 case NFS4_POSIX_LOCK_TYPE:
833 lsp->ls_owner.lo_u.posix_owner = fl_owner;
834 break;
835 default:
836 goto out_free;
837 }
838 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); 831 lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
839 if (lsp->ls_seqid.owner_id < 0) 832 if (lsp->ls_seqid.owner_id < 0)
840 goto out_free; 833 goto out_free;
841 INIT_LIST_HEAD(&lsp->ls_locks); 834 INIT_LIST_HEAD(&lsp->ls_locks);
835 INIT_WORK(&lsp->ls_release, free_lock_state_work);
842 return lsp; 836 return lsp;
843out_free: 837out_free:
844 kfree(lsp); 838 kfree(lsp);
@@ -857,13 +851,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp
857 * exists, return an uninitialized one. 851 * exists, return an uninitialized one.
858 * 852 *
859 */ 853 */
860static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) 854static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
861{ 855{
862 struct nfs4_lock_state *lsp, *new = NULL; 856 struct nfs4_lock_state *lsp, *new = NULL;
863 857
864 for(;;) { 858 for(;;) {
865 spin_lock(&state->state_lock); 859 spin_lock(&state->state_lock);
866 lsp = __nfs4_find_lock_state(state, owner, pid, type); 860 lsp = __nfs4_find_lock_state(state, owner);
867 if (lsp != NULL) 861 if (lsp != NULL)
868 break; 862 break;
869 if (new != NULL) { 863 if (new != NULL) {
@@ -874,7 +868,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
874 break; 868 break;
875 } 869 }
876 spin_unlock(&state->state_lock); 870 spin_unlock(&state->state_lock);
877 new = nfs4_alloc_lock_state(state, owner, pid, type); 871 new = nfs4_alloc_lock_state(state, owner);
878 if (new == NULL) 872 if (new == NULL)
879 return NULL; 873 return NULL;
880 } 874 }
@@ -902,13 +896,12 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
902 if (list_empty(&state->lock_states)) 896 if (list_empty(&state->lock_states))
903 clear_bit(LK_STATE_IN_USE, &state->flags); 897 clear_bit(LK_STATE_IN_USE, &state->flags);
904 spin_unlock(&state->state_lock); 898 spin_unlock(&state->state_lock);
905 server = state->owner->so_server; 899 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags))
906 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { 900 queue_work(nfsiod_workqueue, &lsp->ls_release);
907 struct nfs_client *clp = server->nfs_client; 901 else {
908 902 server = state->owner->so_server;
909 clp->cl_mvops->free_lock_state(server, lsp);
910 } else
911 nfs4_free_lock_state(server, lsp); 903 nfs4_free_lock_state(server, lsp);
904 }
912} 905}
913 906
914static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) 907static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -935,13 +928,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
935 928
936 if (fl->fl_ops != NULL) 929 if (fl->fl_ops != NULL)
937 return 0; 930 return 0;
938 if (fl->fl_flags & FL_POSIX) 931 lsp = nfs4_get_lock_state(state, fl->fl_owner);
939 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
940 else if (fl->fl_flags & FL_FLOCK)
941 lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
942 NFS4_FLOCK_LOCK_TYPE);
943 else
944 return -EINVAL;
945 if (lsp == NULL) 932 if (lsp == NULL)
946 return -ENOMEM; 933 return -ENOMEM;
947 fl->fl_u.nfs4_fl.owner = lsp; 934 fl->fl_u.nfs4_fl.owner = lsp;
@@ -955,7 +942,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
955{ 942{
956 struct nfs4_lock_state *lsp; 943 struct nfs4_lock_state *lsp;
957 fl_owner_t fl_owner; 944 fl_owner_t fl_owner;
958 pid_t fl_pid;
959 int ret = -ENOENT; 945 int ret = -ENOENT;
960 946
961 947
@@ -966,9 +952,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
966 goto out; 952 goto out;
967 953
968 fl_owner = lockowner->l_owner; 954 fl_owner = lockowner->l_owner;
969 fl_pid = lockowner->l_pid;
970 spin_lock(&state->state_lock); 955 spin_lock(&state->state_lock);
971 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 956 lsp = __nfs4_find_lock_state(state, fl_owner);
972 if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) 957 if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
973 ret = -EIO; 958 ret = -EIO;
974 else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { 959 else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f6..1c32adbe728d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
932 932
933DECLARE_EVENT_CLASS(nfs4_read_event, 933DECLARE_EVENT_CLASS(nfs4_read_event,
934 TP_PROTO( 934 TP_PROTO(
935 const struct nfs_pgio_data *data, 935 const struct nfs_pgio_header *hdr,
936 int error 936 int error
937 ), 937 ),
938 938
939 TP_ARGS(data, error), 939 TP_ARGS(hdr, error),
940 940
941 TP_STRUCT__entry( 941 TP_STRUCT__entry(
942 __field(dev_t, dev) 942 __field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
948 ), 948 ),
949 949
950 TP_fast_assign( 950 TP_fast_assign(
951 const struct inode *inode = data->header->inode; 951 const struct inode *inode = hdr->inode;
952 __entry->dev = inode->i_sb->s_dev; 952 __entry->dev = inode->i_sb->s_dev;
953 __entry->fileid = NFS_FILEID(inode); 953 __entry->fileid = NFS_FILEID(inode);
954 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 954 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
955 __entry->offset = data->args.offset; 955 __entry->offset = hdr->args.offset;
956 __entry->count = data->args.count; 956 __entry->count = hdr->args.count;
957 __entry->error = error; 957 __entry->error = error;
958 ), 958 ),
959 959
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
972#define DEFINE_NFS4_READ_EVENT(name) \ 972#define DEFINE_NFS4_READ_EVENT(name) \
973 DEFINE_EVENT(nfs4_read_event, name, \ 973 DEFINE_EVENT(nfs4_read_event, name, \
974 TP_PROTO( \ 974 TP_PROTO( \
975 const struct nfs_pgio_data *data, \ 975 const struct nfs_pgio_header *hdr, \
976 int error \ 976 int error \
977 ), \ 977 ), \
978 TP_ARGS(data, error)) 978 TP_ARGS(hdr, error))
979DEFINE_NFS4_READ_EVENT(nfs4_read); 979DEFINE_NFS4_READ_EVENT(nfs4_read);
980#ifdef CONFIG_NFS_V4_1 980#ifdef CONFIG_NFS_V4_1
981DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); 981DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
983 983
984DECLARE_EVENT_CLASS(nfs4_write_event, 984DECLARE_EVENT_CLASS(nfs4_write_event,
985 TP_PROTO( 985 TP_PROTO(
986 const struct nfs_pgio_data *data, 986 const struct nfs_pgio_header *hdr,
987 int error 987 int error
988 ), 988 ),
989 989
990 TP_ARGS(data, error), 990 TP_ARGS(hdr, error),
991 991
992 TP_STRUCT__entry( 992 TP_STRUCT__entry(
993 __field(dev_t, dev) 993 __field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
999 ), 999 ),
1000 1000
1001 TP_fast_assign( 1001 TP_fast_assign(
1002 const struct inode *inode = data->header->inode; 1002 const struct inode *inode = hdr->inode;
1003 __entry->dev = inode->i_sb->s_dev; 1003 __entry->dev = inode->i_sb->s_dev;
1004 __entry->fileid = NFS_FILEID(inode); 1004 __entry->fileid = NFS_FILEID(inode);
1005 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); 1005 __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
1006 __entry->offset = data->args.offset; 1006 __entry->offset = hdr->args.offset;
1007 __entry->count = data->args.count; 1007 __entry->count = hdr->args.count;
1008 __entry->error = error; 1008 __entry->error = error;
1009 ), 1009 ),
1010 1010
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
1024#define DEFINE_NFS4_WRITE_EVENT(name) \ 1024#define DEFINE_NFS4_WRITE_EVENT(name) \
1025 DEFINE_EVENT(nfs4_write_event, name, \ 1025 DEFINE_EVENT(nfs4_write_event, name, \
1026 TP_PROTO( \ 1026 TP_PROTO( \
1027 const struct nfs_pgio_data *data, \ 1027 const struct nfs_pgio_header *hdr, \
1028 int error \ 1028 int error \
1029 ), \ 1029 ), \
1030 TP_ARGS(data, error)) 1030 TP_ARGS(hdr, error))
1031DEFINE_NFS4_WRITE_EVENT(nfs4_write); 1031DEFINE_NFS4_WRITE_EVENT(nfs4_write);
1032#ifdef CONFIG_NFS_V4_1 1032#ifdef CONFIG_NFS_V4_1
1033DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); 1033DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 939ae606cfa4..e13b59d8d9aa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
7092 if (!status) 7092 if (!status)
7093 status = decode_sequence(xdr, &res->seq_res, rqstp); 7093 status = decode_sequence(xdr, &res->seq_res, rqstp);
7094 if (!status) 7094 if (!status)
7095 status = decode_reclaim_complete(xdr, (void *)NULL); 7095 status = decode_reclaim_complete(xdr, NULL);
7096 return status; 7096 return status;
7097} 7097}
7098 7098
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db2..ae05278b3761 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
439 objlayout_read_done(&objios->oir, status, objios->sync); 439 objlayout_read_done(&objios->oir, status, objios->sync);
440} 440}
441 441
442int objio_read_pagelist(struct nfs_pgio_data *rdata) 442int objio_read_pagelist(struct nfs_pgio_header *hdr)
443{ 443{
444 struct nfs_pgio_header *hdr = rdata->header;
445 struct objio_state *objios; 444 struct objio_state *objios;
446 int ret; 445 int ret;
447 446
448 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, 447 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
449 hdr->lseg, rdata->args.pages, rdata->args.pgbase, 448 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
450 rdata->args.offset, rdata->args.count, rdata, 449 hdr->args.offset, hdr->args.count, hdr,
451 GFP_KERNEL, &objios); 450 GFP_KERNEL, &objios);
452 if (unlikely(ret)) 451 if (unlikely(ret))
453 return ret; 452 return ret;
454 453
455 objios->ios->done = _read_done; 454 objios->ios->done = _read_done;
456 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
457 rdata->args.offset, rdata->args.count); 456 hdr->args.offset, hdr->args.count);
458 ret = ore_read(objios->ios); 457 ret = ore_read(objios->ios);
459 if (unlikely(ret)) 458 if (unlikely(ret))
460 objio_free_result(&objios->oir); 459 objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
487static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) 486static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
488{ 487{
489 struct objio_state *objios = priv; 488 struct objio_state *objios = priv;
490 struct nfs_pgio_data *wdata = objios->oir.rpcdata; 489 struct nfs_pgio_header *hdr = objios->oir.rpcdata;
491 struct address_space *mapping = wdata->header->inode->i_mapping; 490 struct address_space *mapping = hdr->inode->i_mapping;
492 pgoff_t index = offset / PAGE_SIZE; 491 pgoff_t index = offset / PAGE_SIZE;
493 struct page *page; 492 struct page *page;
494 loff_t i_size = i_size_read(wdata->header->inode); 493 loff_t i_size = i_size_read(hdr->inode);
495 494
496 if (offset >= i_size) { 495 if (offset >= i_size) {
497 *uptodate = true; 496 *uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
531 .put_page = &__r4w_put_page, 530 .put_page = &__r4w_put_page,
532}; 531};
533 532
534int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) 533int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
535{ 534{
536 struct nfs_pgio_header *hdr = wdata->header;
537 struct objio_state *objios; 535 struct objio_state *objios;
538 int ret; 536 int ret;
539 537
540 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, 538 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
541 hdr->lseg, wdata->args.pages, wdata->args.pgbase, 539 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
542 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, 540 hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
543 &objios); 541 &objios);
544 if (unlikely(ret)) 542 if (unlikely(ret))
545 return ret; 543 return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
551 objios->ios->done = _write_done; 549 objios->ios->done = _write_done;
552 550
553 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 551 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
554 wdata->args.offset, wdata->args.count); 552 hdr->args.offset, hdr->args.count);
555 ret = ore_write(objios->ios); 553 ret = ore_write(objios->ios);
556 if (unlikely(ret)) { 554 if (unlikely(ret)) {
557 objio_free_result(&objios->oir); 555 objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e986..697a16d11fac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
229static void _rpc_read_complete(struct work_struct *work) 229static void _rpc_read_complete(struct work_struct *work)
230{ 230{
231 struct rpc_task *task; 231 struct rpc_task *task;
232 struct nfs_pgio_data *rdata; 232 struct nfs_pgio_header *hdr;
233 233
234 dprintk("%s enter\n", __func__); 234 dprintk("%s enter\n", __func__);
235 task = container_of(work, struct rpc_task, u.tk_work); 235 task = container_of(work, struct rpc_task, u.tk_work);
236 rdata = container_of(task, struct nfs_pgio_data, task); 236 hdr = container_of(task, struct nfs_pgio_header, task);
237 237
238 pnfs_ld_read_done(rdata); 238 pnfs_ld_read_done(hdr);
239} 239}
240 240
241void 241void
242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243{ 243{
244 struct nfs_pgio_data *rdata = oir->rpcdata; 244 struct nfs_pgio_header *hdr = oir->rpcdata;
245 245
246 oir->status = rdata->task.tk_status = status; 246 oir->status = hdr->task.tk_status = status;
247 if (status >= 0) 247 if (status >= 0)
248 rdata->res.count = status; 248 hdr->res.count = status;
249 else 249 else
250 rdata->header->pnfs_error = status; 250 hdr->pnfs_error = status;
251 objlayout_iodone(oir); 251 objlayout_iodone(oir);
252 /* must not use oir after this point */ 252 /* must not use oir after this point */
253 253
254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, 254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
255 status, rdata->res.eof, sync); 255 status, hdr->res.eof, sync);
256 256
257 if (sync) 257 if (sync)
258 pnfs_ld_read_done(rdata); 258 pnfs_ld_read_done(hdr);
259 else { 259 else {
260 INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); 260 INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
261 schedule_work(&rdata->task.u.tk_work); 261 schedule_work(&hdr->task.u.tk_work);
262 } 262 }
263} 263}
264 264
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
266 * Perform sync or async reads. 266 * Perform sync or async reads.
267 */ 267 */
268enum pnfs_try_status 268enum pnfs_try_status
269objlayout_read_pagelist(struct nfs_pgio_data *rdata) 269objlayout_read_pagelist(struct nfs_pgio_header *hdr)
270{ 270{
271 struct nfs_pgio_header *hdr = rdata->header;
272 struct inode *inode = hdr->inode; 271 struct inode *inode = hdr->inode;
273 loff_t offset = rdata->args.offset; 272 loff_t offset = hdr->args.offset;
274 size_t count = rdata->args.count; 273 size_t count = hdr->args.count;
275 int err; 274 int err;
276 loff_t eof; 275 loff_t eof;
277 276
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
279 if (unlikely(offset + count > eof)) { 278 if (unlikely(offset + count > eof)) {
280 if (offset >= eof) { 279 if (offset >= eof) {
281 err = 0; 280 err = 0;
282 rdata->res.count = 0; 281 hdr->res.count = 0;
283 rdata->res.eof = 1; 282 hdr->res.eof = 1;
284 /*FIXME: do we need to call pnfs_ld_read_done() */ 283 /*FIXME: do we need to call pnfs_ld_read_done() */
285 goto out; 284 goto out;
286 } 285 }
287 count = eof - offset; 286 count = eof - offset;
288 } 287 }
289 288
290 rdata->res.eof = (offset + count) >= eof; 289 hdr->res.eof = (offset + count) >= eof;
291 _fix_verify_io_params(hdr->lseg, &rdata->args.pages, 290 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
292 &rdata->args.pgbase, 291 &hdr->args.pgbase,
293 rdata->args.offset, rdata->args.count); 292 hdr->args.offset, hdr->args.count);
294 293
295 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", 294 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
296 __func__, inode->i_ino, offset, count, rdata->res.eof); 295 __func__, inode->i_ino, offset, count, hdr->res.eof);
297 296
298 err = objio_read_pagelist(rdata); 297 err = objio_read_pagelist(hdr);
299 out: 298 out:
300 if (unlikely(err)) { 299 if (unlikely(err)) {
301 hdr->pnfs_error = err; 300 hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
312static void _rpc_write_complete(struct work_struct *work) 311static void _rpc_write_complete(struct work_struct *work)
313{ 312{
314 struct rpc_task *task; 313 struct rpc_task *task;
315 struct nfs_pgio_data *wdata; 314 struct nfs_pgio_header *hdr;
316 315
317 dprintk("%s enter\n", __func__); 316 dprintk("%s enter\n", __func__);
318 task = container_of(work, struct rpc_task, u.tk_work); 317 task = container_of(work, struct rpc_task, u.tk_work);
319 wdata = container_of(task, struct nfs_pgio_data, task); 318 hdr = container_of(task, struct nfs_pgio_header, task);
320 319
321 pnfs_ld_write_done(wdata); 320 pnfs_ld_write_done(hdr);
322} 321}
323 322
324void 323void
325objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) 324objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
326{ 325{
327 struct nfs_pgio_data *wdata = oir->rpcdata; 326 struct nfs_pgio_header *hdr = oir->rpcdata;
328 327
329 oir->status = wdata->task.tk_status = status; 328 oir->status = hdr->task.tk_status = status;
330 if (status >= 0) { 329 if (status >= 0) {
331 wdata->res.count = status; 330 hdr->res.count = status;
332 wdata->verf.committed = oir->committed; 331 hdr->verf.committed = oir->committed;
333 } else { 332 } else {
334 wdata->header->pnfs_error = status; 333 hdr->pnfs_error = status;
335 } 334 }
336 objlayout_iodone(oir); 335 objlayout_iodone(oir);
337 /* must not use oir after this point */ 336 /* must not use oir after this point */
338 337
339 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, 338 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
340 status, wdata->verf.committed, sync); 339 status, hdr->verf.committed, sync);
341 340
342 if (sync) 341 if (sync)
343 pnfs_ld_write_done(wdata); 342 pnfs_ld_write_done(hdr);
344 else { 343 else {
345 INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); 344 INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
346 schedule_work(&wdata->task.u.tk_work); 345 schedule_work(&hdr->task.u.tk_work);
347 } 346 }
348} 347}
349 348
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
351 * Perform sync or async writes. 350 * Perform sync or async writes.
352 */ 351 */
353enum pnfs_try_status 352enum pnfs_try_status
354objlayout_write_pagelist(struct nfs_pgio_data *wdata, 353objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
355 int how)
356{ 354{
357 struct nfs_pgio_header *hdr = wdata->header;
358 int err; 355 int err;
359 356
360 _fix_verify_io_params(hdr->lseg, &wdata->args.pages, 357 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
361 &wdata->args.pgbase, 358 &hdr->args.pgbase,
362 wdata->args.offset, wdata->args.count); 359 hdr->args.offset, hdr->args.count);
363 360
364 err = objio_write_pagelist(wdata, how); 361 err = objio_write_pagelist(hdr, how);
365 if (unlikely(err)) { 362 if (unlikely(err)) {
366 hdr->pnfs_error = err; 363 hdr->pnfs_error = err;
367 dprintk("%s: Returned Error %d\n", __func__, err); 364 dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6c..fd13f1d2f136 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
119 */ 119 */
120extern void objio_free_result(struct objlayout_io_res *oir); 120extern void objio_free_result(struct objlayout_io_res *oir);
121 121
122extern int objio_read_pagelist(struct nfs_pgio_data *rdata); 122extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
123extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); 123extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
124 124
125/* 125/*
126 * callback API 126 * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
168extern void objlayout_free_lseg(struct pnfs_layout_segment *); 168extern void objlayout_free_lseg(struct pnfs_layout_segment *);
169 169
170extern enum pnfs_try_status objlayout_read_pagelist( 170extern enum pnfs_try_status objlayout_read_pagelist(
171 struct nfs_pgio_data *); 171 struct nfs_pgio_header *);
172 172
173extern enum pnfs_try_status objlayout_write_pagelist( 173extern enum pnfs_try_status objlayout_write_pagelist(
174 struct nfs_pgio_data *, 174 struct nfs_pgio_header *,
175 int how); 175 int how);
176 176
177extern void objlayout_encode_layoutcommit( 177extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 0be5050638f7..ba491926df5f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -141,16 +141,24 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
141 * @req - request in group that is to be locked 141 * @req - request in group that is to be locked
142 * 142 *
143 * this lock must be held if modifying the page group list 143 * this lock must be held if modifying the page group list
144 *
145 * returns result from wait_on_bit_lock: 0 on success, < 0 on error
144 */ 146 */
145void 147int
146nfs_page_group_lock(struct nfs_page *req) 148nfs_page_group_lock(struct nfs_page *req, bool wait)
147{ 149{
148 struct nfs_page *head = req->wb_head; 150 struct nfs_page *head = req->wb_head;
151 int ret;
149 152
150 WARN_ON_ONCE(head != head->wb_head); 153 WARN_ON_ONCE(head != head->wb_head);
151 154
152 wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, 155 do {
156 ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
153 TASK_UNINTERRUPTIBLE); 157 TASK_UNINTERRUPTIBLE);
158 } while (wait && ret != 0);
159
160 WARN_ON_ONCE(ret > 0);
161 return ret;
154} 162}
155 163
156/* 164/*
@@ -211,7 +219,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
211{ 219{
212 bool ret; 220 bool ret;
213 221
214 nfs_page_group_lock(req); 222 nfs_page_group_lock(req, true);
215 ret = nfs_page_group_sync_on_bit_locked(req, bit); 223 ret = nfs_page_group_sync_on_bit_locked(req, bit);
216 nfs_page_group_unlock(req); 224 nfs_page_group_unlock(req);
217 225
@@ -454,123 +462,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
454} 462}
455EXPORT_SYMBOL_GPL(nfs_generic_pg_test); 463EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
456 464
457static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) 465struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
458{
459 return container_of(hdr, struct nfs_rw_header, header);
460}
461
462/**
463 * nfs_rw_header_alloc - Allocate a header for a read or write
464 * @ops: Read or write function vector
465 */
466struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
467{ 466{
468 struct nfs_rw_header *header = ops->rw_alloc_header(); 467 struct nfs_pgio_header *hdr = ops->rw_alloc_header();
469
470 if (header) {
471 struct nfs_pgio_header *hdr = &header->header;
472 468
469 if (hdr) {
473 INIT_LIST_HEAD(&hdr->pages); 470 INIT_LIST_HEAD(&hdr->pages);
474 spin_lock_init(&hdr->lock); 471 spin_lock_init(&hdr->lock);
475 atomic_set(&hdr->refcnt, 0);
476 hdr->rw_ops = ops; 472 hdr->rw_ops = ops;
477 } 473 }
478 return header; 474 return hdr;
479} 475}
480EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); 476EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
481 477
482/* 478/*
483 * nfs_rw_header_free - Free a read or write header 479 * nfs_pgio_header_free - Free a read or write header
484 * @hdr: The header to free 480 * @hdr: The header to free
485 */ 481 */
486void nfs_rw_header_free(struct nfs_pgio_header *hdr) 482void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
487{ 483{
488 hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); 484 hdr->rw_ops->rw_free_header(hdr);
489} 485}
490EXPORT_SYMBOL_GPL(nfs_rw_header_free); 486EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
491 487
492/** 488/**
493 * nfs_pgio_data_alloc - Allocate pageio data 489 * nfs_pgio_data_destroy - make @hdr suitable for reuse
494 * @hdr: The header making a request 490 *
495 * @pagecount: Number of pages to create 491 * Frees memory and releases refs from nfs_generic_pgio, so that it may
496 */ 492 * be called again.
497static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, 493 *
498 unsigned int pagecount) 494 * @hdr: A header that has had nfs_generic_pgio called
499{
500 struct nfs_pgio_data *data, *prealloc;
501
502 prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
503 if (prealloc->header == NULL)
504 data = prealloc;
505 else
506 data = kzalloc(sizeof(*data), GFP_KERNEL);
507 if (!data)
508 goto out;
509
510 if (nfs_pgarray_set(&data->pages, pagecount)) {
511 data->header = hdr;
512 atomic_inc(&hdr->refcnt);
513 } else {
514 if (data != prealloc)
515 kfree(data);
516 data = NULL;
517 }
518out:
519 return data;
520}
521
522/**
523 * nfs_pgio_data_release - Properly free pageio data
524 * @data: The data to release
525 */ 495 */
526void nfs_pgio_data_release(struct nfs_pgio_data *data) 496void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
527{ 497{
528 struct nfs_pgio_header *hdr = data->header; 498 put_nfs_open_context(hdr->args.context);
529 struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); 499 if (hdr->page_array.pagevec != hdr->page_array.page_array)
530 500 kfree(hdr->page_array.pagevec);
531 put_nfs_open_context(data->args.context);
532 if (data->pages.pagevec != data->pages.page_array)
533 kfree(data->pages.pagevec);
534 if (data == &pageio_header->rpc_data) {
535 data->header = NULL;
536 data = NULL;
537 }
538 if (atomic_dec_and_test(&hdr->refcnt))
539 hdr->completion_ops->completion(hdr);
540 /* Note: we only free the rpc_task after callbacks are done.
541 * See the comment in rpc_free_task() for why
542 */
543 kfree(data);
544} 501}
545EXPORT_SYMBOL_GPL(nfs_pgio_data_release); 502EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
546 503
547/** 504/**
548 * nfs_pgio_rpcsetup - Set up arguments for a pageio call 505 * nfs_pgio_rpcsetup - Set up arguments for a pageio call
549 * @data: The pageio data 506 * @hdr: The pageio hdr
550 * @count: Number of bytes to read 507 * @count: Number of bytes to read
551 * @offset: Initial offset 508 * @offset: Initial offset
552 * @how: How to commit data (writes only) 509 * @how: How to commit data (writes only)
553 * @cinfo: Commit information for the call (writes only) 510 * @cinfo: Commit information for the call (writes only)
554 */ 511 */
555static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, 512static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
556 unsigned int count, unsigned int offset, 513 unsigned int count, unsigned int offset,
557 int how, struct nfs_commit_info *cinfo) 514 int how, struct nfs_commit_info *cinfo)
558{ 515{
559 struct nfs_page *req = data->header->req; 516 struct nfs_page *req = hdr->req;
560 517
561 /* Set up the RPC argument and reply structs 518 /* Set up the RPC argument and reply structs
562 * NB: take care not to mess about with data->commit et al. */ 519 * NB: take care not to mess about with hdr->commit et al. */
563 520
564 data->args.fh = NFS_FH(data->header->inode); 521 hdr->args.fh = NFS_FH(hdr->inode);
565 data->args.offset = req_offset(req) + offset; 522 hdr->args.offset = req_offset(req) + offset;
566 /* pnfs_set_layoutcommit needs this */ 523 /* pnfs_set_layoutcommit needs this */
567 data->mds_offset = data->args.offset; 524 hdr->mds_offset = hdr->args.offset;
568 data->args.pgbase = req->wb_pgbase + offset; 525 hdr->args.pgbase = req->wb_pgbase + offset;
569 data->args.pages = data->pages.pagevec; 526 hdr->args.pages = hdr->page_array.pagevec;
570 data->args.count = count; 527 hdr->args.count = count;
571 data->args.context = get_nfs_open_context(req->wb_context); 528 hdr->args.context = get_nfs_open_context(req->wb_context);
572 data->args.lock_context = req->wb_lock_context; 529 hdr->args.lock_context = req->wb_lock_context;
573 data->args.stable = NFS_UNSTABLE; 530 hdr->args.stable = NFS_UNSTABLE;
574 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 531 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
575 case 0: 532 case 0:
576 break; 533 break;
@@ -578,59 +535,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
578 if (nfs_reqs_to_commit(cinfo)) 535 if (nfs_reqs_to_commit(cinfo))
579 break; 536 break;
580 default: 537 default:
581 data->args.stable = NFS_FILE_SYNC; 538 hdr->args.stable = NFS_FILE_SYNC;
582 } 539 }
583 540
584 data->res.fattr = &data->fattr; 541 hdr->res.fattr = &hdr->fattr;
585 data->res.count = count; 542 hdr->res.count = count;
586 data->res.eof = 0; 543 hdr->res.eof = 0;
587 data->res.verf = &data->verf; 544 hdr->res.verf = &hdr->verf;
588 nfs_fattr_init(&data->fattr); 545 nfs_fattr_init(&hdr->fattr);
589} 546}
590 547
591/** 548/**
592 * nfs_pgio_prepare - Prepare pageio data to go over the wire 549 * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
593 * @task: The current task 550 * @task: The current task
594 * @calldata: pageio data to prepare 551 * @calldata: pageio header to prepare
595 */ 552 */
596static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) 553static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
597{ 554{
598 struct nfs_pgio_data *data = calldata; 555 struct nfs_pgio_header *hdr = calldata;
599 int err; 556 int err;
600 err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); 557 err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
601 if (err) 558 if (err)
602 rpc_exit(task, err); 559 rpc_exit(task, err);
603} 560}
604 561
605int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, 562int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
606 const struct rpc_call_ops *call_ops, int how, int flags) 563 const struct rpc_call_ops *call_ops, int how, int flags)
607{ 564{
608 struct rpc_task *task; 565 struct rpc_task *task;
609 struct rpc_message msg = { 566 struct rpc_message msg = {
610 .rpc_argp = &data->args, 567 .rpc_argp = &hdr->args,
611 .rpc_resp = &data->res, 568 .rpc_resp = &hdr->res,
612 .rpc_cred = data->header->cred, 569 .rpc_cred = hdr->cred,
613 }; 570 };
614 struct rpc_task_setup task_setup_data = { 571 struct rpc_task_setup task_setup_data = {
615 .rpc_client = clnt, 572 .rpc_client = clnt,
616 .task = &data->task, 573 .task = &hdr->task,
617 .rpc_message = &msg, 574 .rpc_message = &msg,
618 .callback_ops = call_ops, 575 .callback_ops = call_ops,
619 .callback_data = data, 576 .callback_data = hdr,
620 .workqueue = nfsiod_workqueue, 577 .workqueue = nfsiod_workqueue,
621 .flags = RPC_TASK_ASYNC | flags, 578 .flags = RPC_TASK_ASYNC | flags,
622 }; 579 };
623 int ret = 0; 580 int ret = 0;
624 581
625 data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); 582 hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
626 583
627 dprintk("NFS: %5u initiated pgio call " 584 dprintk("NFS: %5u initiated pgio call "
628 "(req %s/%llu, %u bytes @ offset %llu)\n", 585 "(req %s/%llu, %u bytes @ offset %llu)\n",
629 data->task.tk_pid, 586 hdr->task.tk_pid,
630 data->header->inode->i_sb->s_id, 587 hdr->inode->i_sb->s_id,
631 (unsigned long long)NFS_FILEID(data->header->inode), 588 (unsigned long long)NFS_FILEID(hdr->inode),
632 data->args.count, 589 hdr->args.count,
633 (unsigned long long)data->args.offset); 590 (unsigned long long)hdr->args.offset);
634 591
635 task = rpc_run_task(&task_setup_data); 592 task = rpc_run_task(&task_setup_data);
636 if (IS_ERR(task)) { 593 if (IS_ERR(task)) {
@@ -657,22 +614,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
657 struct nfs_pgio_header *hdr) 614 struct nfs_pgio_header *hdr)
658{ 615{
659 set_bit(NFS_IOHDR_REDO, &hdr->flags); 616 set_bit(NFS_IOHDR_REDO, &hdr->flags);
660 nfs_pgio_data_release(hdr->data); 617 nfs_pgio_data_destroy(hdr);
661 hdr->data = NULL; 618 hdr->completion_ops->completion(hdr);
662 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 619 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
663 return -ENOMEM; 620 return -ENOMEM;
664} 621}
665 622
666/** 623/**
667 * nfs_pgio_release - Release pageio data 624 * nfs_pgio_release - Release pageio data
668 * @calldata: The pageio data to release 625 * @calldata: The pageio header to release
669 */ 626 */
670static void nfs_pgio_release(void *calldata) 627static void nfs_pgio_release(void *calldata)
671{ 628{
672 struct nfs_pgio_data *data = calldata; 629 struct nfs_pgio_header *hdr = calldata;
673 if (data->header->rw_ops->rw_release) 630 if (hdr->rw_ops->rw_release)
674 data->header->rw_ops->rw_release(data); 631 hdr->rw_ops->rw_release(hdr);
675 nfs_pgio_data_release(data); 632 nfs_pgio_data_destroy(hdr);
633 hdr->completion_ops->completion(hdr);
676} 634}
677 635
678/** 636/**
@@ -713,22 +671,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
713/** 671/**
714 * nfs_pgio_result - Basic pageio error handling 672 * nfs_pgio_result - Basic pageio error handling
715 * @task: The task that ran 673 * @task: The task that ran
716 * @calldata: Pageio data to check 674 * @calldata: Pageio header to check
717 */ 675 */
718static void nfs_pgio_result(struct rpc_task *task, void *calldata) 676static void nfs_pgio_result(struct rpc_task *task, void *calldata)
719{ 677{
720 struct nfs_pgio_data *data = calldata; 678 struct nfs_pgio_header *hdr = calldata;
721 struct inode *inode = data->header->inode; 679 struct inode *inode = hdr->inode;
722 680
723 dprintk("NFS: %s: %5u, (status %d)\n", __func__, 681 dprintk("NFS: %s: %5u, (status %d)\n", __func__,
724 task->tk_pid, task->tk_status); 682 task->tk_pid, task->tk_status);
725 683
726 if (data->header->rw_ops->rw_done(task, data, inode) != 0) 684 if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
727 return; 685 return;
728 if (task->tk_status < 0) 686 if (task->tk_status < 0)
729 nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); 687 nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
730 else 688 else
731 data->header->rw_ops->rw_result(task, data); 689 hdr->rw_ops->rw_result(task, hdr);
732} 690}
733 691
734/* 692/*
@@ -744,17 +702,16 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
744{ 702{
745 struct nfs_page *req; 703 struct nfs_page *req;
746 struct page **pages; 704 struct page **pages;
747 struct nfs_pgio_data *data;
748 struct list_head *head = &desc->pg_list; 705 struct list_head *head = &desc->pg_list;
749 struct nfs_commit_info cinfo; 706 struct nfs_commit_info cinfo;
707 unsigned int pagecount;
750 708
751 data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, 709 pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
752 desc->pg_count)); 710 if (!nfs_pgarray_set(&hdr->page_array, pagecount))
753 if (!data)
754 return nfs_pgio_error(desc, hdr); 711 return nfs_pgio_error(desc, hdr);
755 712
756 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 713 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
757 pages = data->pages.pagevec; 714 pages = hdr->page_array.pagevec;
758 while (!list_empty(head)) { 715 while (!list_empty(head)) {
759 req = nfs_list_entry(head->next); 716 req = nfs_list_entry(head->next);
760 nfs_list_remove_request(req); 717 nfs_list_remove_request(req);
@@ -767,8 +724,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
767 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 724 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
768 725
769 /* Set up the argument struct */ 726 /* Set up the argument struct */
770 nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); 727 nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
771 hdr->data = data;
772 desc->pg_rpc_callops = &nfs_pgio_common_ops; 728 desc->pg_rpc_callops = &nfs_pgio_common_ops;
773 return 0; 729 return 0;
774} 730}
@@ -776,25 +732,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
776 732
777static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) 733static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
778{ 734{
779 struct nfs_rw_header *rw_hdr;
780 struct nfs_pgio_header *hdr; 735 struct nfs_pgio_header *hdr;
781 int ret; 736 int ret;
782 737
783 rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); 738 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
784 if (!rw_hdr) { 739 if (!hdr) {
785 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 740 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
786 return -ENOMEM; 741 return -ENOMEM;
787 } 742 }
788 hdr = &rw_hdr->header; 743 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
789 nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
790 atomic_inc(&hdr->refcnt);
791 ret = nfs_generic_pgio(desc, hdr); 744 ret = nfs_generic_pgio(desc, hdr);
792 if (ret == 0) 745 if (ret == 0)
793 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), 746 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
794 hdr->data, desc->pg_rpc_callops, 747 hdr, desc->pg_rpc_callops,
795 desc->pg_ioflags, 0); 748 desc->pg_ioflags, 0);
796 if (atomic_dec_and_test(&hdr->refcnt))
797 hdr->completion_ops->completion(hdr);
798 return ret; 749 return ret;
799} 750}
800 751
@@ -907,8 +858,13 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
907 struct nfs_page *subreq; 858 struct nfs_page *subreq;
908 unsigned int bytes_left = 0; 859 unsigned int bytes_left = 0;
909 unsigned int offset, pgbase; 860 unsigned int offset, pgbase;
861 int ret;
910 862
911 nfs_page_group_lock(req); 863 ret = nfs_page_group_lock(req, false);
864 if (ret < 0) {
865 desc->pg_error = ret;
866 return 0;
867 }
912 868
913 subreq = req; 869 subreq = req;
914 bytes_left = subreq->wb_bytes; 870 bytes_left = subreq->wb_bytes;
@@ -930,7 +886,11 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
930 if (desc->pg_recoalesce) 886 if (desc->pg_recoalesce)
931 return 0; 887 return 0;
932 /* retry add_request for this subreq */ 888 /* retry add_request for this subreq */
933 nfs_page_group_lock(req); 889 ret = nfs_page_group_lock(req, false);
890 if (ret < 0) {
891 desc->pg_error = ret;
892 return 0;
893 }
934 continue; 894 continue;
935 } 895 }
936 896
@@ -1005,7 +965,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1005 } while (ret); 965 } while (ret);
1006 return ret; 966 return ret;
1007} 967}
1008EXPORT_SYMBOL_GPL(nfs_pageio_add_request); 968
969/*
970 * nfs_pageio_resend - Transfer requests to new descriptor and resend
971 * @hdr - the pgio header to move request from
972 * @desc - the pageio descriptor to add requests to
973 *
974 * Try to move each request (nfs_page) from @hdr to @desc then attempt
975 * to send them.
976 *
977 * Returns 0 on success and < 0 on error.
978 */
979int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
980 struct nfs_pgio_header *hdr)
981{
982 LIST_HEAD(failed);
983
984 desc->pg_dreq = hdr->dreq;
985 while (!list_empty(&hdr->pages)) {
986 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
987
988 nfs_list_remove_request(req);
989 if (!nfs_pageio_add_request(desc, req))
990 nfs_list_add_request(req, &failed);
991 }
992 nfs_pageio_complete(desc);
993 if (!list_empty(&failed)) {
994 list_move(&failed, &hdr->pages);
995 return -EIO;
996 }
997 return 0;
998}
999EXPORT_SYMBOL_GPL(nfs_pageio_resend);
1009 1000
1010/** 1001/**
1011 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 1002 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@@ -1021,7 +1012,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
1021 break; 1012 break;
1022 } 1013 }
1023} 1014}
1024EXPORT_SYMBOL_GPL(nfs_pageio_complete);
1025 1015
1026/** 1016/**
1027 * nfs_pageio_cond_complete - Conditional I/O completion 1017 * nfs_pageio_cond_complete - Conditional I/O completion
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a8914b335617..a3851debf8a2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
361} 361}
362EXPORT_SYMBOL_GPL(pnfs_put_lseg); 362EXPORT_SYMBOL_GPL(pnfs_put_lseg);
363 363
364static void pnfs_put_lseg_async_work(struct work_struct *work)
365{
366 struct pnfs_layout_segment *lseg;
367
368 lseg = container_of(work, struct pnfs_layout_segment, pls_work);
369
370 pnfs_put_lseg(lseg);
371}
372
373void
374pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
375{
376 INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work);
377 schedule_work(&lseg->pls_work);
378}
379EXPORT_SYMBOL_GPL(pnfs_put_lseg_async);
380
364static u64 381static u64
365end_offset(u64 start, u64 len) 382end_offset(u64 start, u64 len)
366{ 383{
@@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1470} 1487}
1471EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1488EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
1472 1489
1473int pnfs_write_done_resend_to_mds(struct inode *inode, 1490int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
1474 struct list_head *head,
1475 const struct nfs_pgio_completion_ops *compl_ops,
1476 struct nfs_direct_req *dreq)
1477{ 1491{
1478 struct nfs_pageio_descriptor pgio; 1492 struct nfs_pageio_descriptor pgio;
1479 LIST_HEAD(failed);
1480 1493
1481 /* Resend all requests through the MDS */ 1494 /* Resend all requests through the MDS */
1482 nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); 1495 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
1483 pgio.pg_dreq = dreq; 1496 hdr->completion_ops);
1484 while (!list_empty(head)) { 1497 return nfs_pageio_resend(&pgio, hdr);
1485 struct nfs_page *req = nfs_list_entry(head->next);
1486
1487 nfs_list_remove_request(req);
1488 if (!nfs_pageio_add_request(&pgio, req))
1489 nfs_list_add_request(req, &failed);
1490 }
1491 nfs_pageio_complete(&pgio);
1492
1493 if (!list_empty(&failed)) {
1494 /* For some reason our attempt to resend pages. Mark the
1495 * overall send request as having failed, and let
1496 * nfs_writeback_release_full deal with the error.
1497 */
1498 list_move(&failed, head);
1499 return -EIO;
1500 }
1501 return 0;
1502} 1498}
1503EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 1499EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
1504 1500
1505static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) 1501static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
1506{ 1502{
1507 struct nfs_pgio_header *hdr = data->header;
1508 1503
1509 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1504 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1510 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1505 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
1512 pnfs_return_layout(hdr->inode); 1507 pnfs_return_layout(hdr->inode);
1513 } 1508 }
1514 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1509 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1515 data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, 1510 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
1516 &hdr->pages,
1517 hdr->completion_ops,
1518 hdr->dreq);
1519} 1511}
1520 1512
1521/* 1513/*
1522 * Called by non rpc-based layout drivers 1514 * Called by non rpc-based layout drivers
1523 */ 1515 */
1524void pnfs_ld_write_done(struct nfs_pgio_data *data) 1516void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
1525{ 1517{
1526 struct nfs_pgio_header *hdr = data->header; 1518 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
1527
1528 trace_nfs4_pnfs_write(data, hdr->pnfs_error);
1529 if (!hdr->pnfs_error) { 1519 if (!hdr->pnfs_error) {
1530 pnfs_set_layoutcommit(data); 1520 pnfs_set_layoutcommit(hdr);
1531 hdr->mds_ops->rpc_call_done(&data->task, data); 1521 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
1532 } else 1522 } else
1533 pnfs_ld_handle_write_error(data); 1523 pnfs_ld_handle_write_error(hdr);
1534 hdr->mds_ops->rpc_release(data); 1524 hdr->mds_ops->rpc_release(hdr);
1535} 1525}
1536EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1526EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1537 1527
1538static void 1528static void
1539pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 1529pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1540 struct nfs_pgio_data *data) 1530 struct nfs_pgio_header *hdr)
1541{ 1531{
1542 struct nfs_pgio_header *hdr = data->header;
1543
1544 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1532 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1545 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1533 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1546 nfs_pageio_reset_write_mds(desc); 1534 nfs_pageio_reset_write_mds(desc);
1547 desc->pg_recoalesce = 1; 1535 desc->pg_recoalesce = 1;
1548 } 1536 }
1549 nfs_pgio_data_release(data); 1537 nfs_pgio_data_destroy(hdr);
1550} 1538}
1551 1539
1552static enum pnfs_try_status 1540static enum pnfs_try_status
1553pnfs_try_to_write_data(struct nfs_pgio_data *wdata, 1541pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
1554 const struct rpc_call_ops *call_ops, 1542 const struct rpc_call_ops *call_ops,
1555 struct pnfs_layout_segment *lseg, 1543 struct pnfs_layout_segment *lseg,
1556 int how) 1544 int how)
1557{ 1545{
1558 struct nfs_pgio_header *hdr = wdata->header;
1559 struct inode *inode = hdr->inode; 1546 struct inode *inode = hdr->inode;
1560 enum pnfs_try_status trypnfs; 1547 enum pnfs_try_status trypnfs;
1561 struct nfs_server *nfss = NFS_SERVER(inode); 1548 struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
1563 hdr->mds_ops = call_ops; 1550 hdr->mds_ops = call_ops;
1564 1551
1565 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1552 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1566 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1553 inode->i_ino, hdr->args.count, hdr->args.offset, how);
1567 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); 1554 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
1568 if (trypnfs != PNFS_NOT_ATTEMPTED) 1555 if (trypnfs != PNFS_NOT_ATTEMPTED)
1569 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 1556 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
1570 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1557 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,139 +1562,105 @@ static void
1575pnfs_do_write(struct nfs_pageio_descriptor *desc, 1562pnfs_do_write(struct nfs_pageio_descriptor *desc,
1576 struct nfs_pgio_header *hdr, int how) 1563 struct nfs_pgio_header *hdr, int how)
1577{ 1564{
1578 struct nfs_pgio_data *data = hdr->data;
1579 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1565 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1580 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1566 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1581 enum pnfs_try_status trypnfs; 1567 enum pnfs_try_status trypnfs;
1582 1568
1583 desc->pg_lseg = NULL; 1569 desc->pg_lseg = NULL;
1584 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); 1570 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
1585 if (trypnfs == PNFS_NOT_ATTEMPTED) 1571 if (trypnfs == PNFS_NOT_ATTEMPTED)
1586 pnfs_write_through_mds(desc, data); 1572 pnfs_write_through_mds(desc, hdr);
1587 pnfs_put_lseg(lseg); 1573 pnfs_put_lseg(lseg);
1588} 1574}
1589 1575
1590static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1576static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1591{ 1577{
1592 pnfs_put_lseg(hdr->lseg); 1578 pnfs_put_lseg(hdr->lseg);
1593 nfs_rw_header_free(hdr); 1579 nfs_pgio_header_free(hdr);
1594} 1580}
1595EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1581EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
1596 1582
1597int 1583int
1598pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1584pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1599{ 1585{
1600 struct nfs_rw_header *whdr;
1601 struct nfs_pgio_header *hdr; 1586 struct nfs_pgio_header *hdr;
1602 int ret; 1587 int ret;
1603 1588
1604 whdr = nfs_rw_header_alloc(desc->pg_rw_ops); 1589 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1605 if (!whdr) { 1590 if (!hdr) {
1606 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1591 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1607 pnfs_put_lseg(desc->pg_lseg); 1592 pnfs_put_lseg(desc->pg_lseg);
1608 desc->pg_lseg = NULL; 1593 desc->pg_lseg = NULL;
1609 return -ENOMEM; 1594 return -ENOMEM;
1610 } 1595 }
1611 hdr = &whdr->header;
1612 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1596 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1613 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1597 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1614 atomic_inc(&hdr->refcnt);
1615 ret = nfs_generic_pgio(desc, hdr); 1598 ret = nfs_generic_pgio(desc, hdr);
1616 if (ret != 0) { 1599 if (ret != 0) {
1617 pnfs_put_lseg(desc->pg_lseg); 1600 pnfs_put_lseg(desc->pg_lseg);
1618 desc->pg_lseg = NULL; 1601 desc->pg_lseg = NULL;
1619 } else 1602 } else
1620 pnfs_do_write(desc, hdr, desc->pg_ioflags); 1603 pnfs_do_write(desc, hdr, desc->pg_ioflags);
1621 if (atomic_dec_and_test(&hdr->refcnt))
1622 hdr->completion_ops->completion(hdr);
1623 return ret; 1604 return ret;
1624} 1605}
1625EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1606EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1626 1607
1627int pnfs_read_done_resend_to_mds(struct inode *inode, 1608int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
1628 struct list_head *head,
1629 const struct nfs_pgio_completion_ops *compl_ops,
1630 struct nfs_direct_req *dreq)
1631{ 1609{
1632 struct nfs_pageio_descriptor pgio; 1610 struct nfs_pageio_descriptor pgio;
1633 LIST_HEAD(failed);
1634 1611
1635 /* Resend all requests through the MDS */ 1612 /* Resend all requests through the MDS */
1636 nfs_pageio_init_read(&pgio, inode, true, compl_ops); 1613 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
1637 pgio.pg_dreq = dreq; 1614 return nfs_pageio_resend(&pgio, hdr);
1638 while (!list_empty(head)) {
1639 struct nfs_page *req = nfs_list_entry(head->next);
1640
1641 nfs_list_remove_request(req);
1642 if (!nfs_pageio_add_request(&pgio, req))
1643 nfs_list_add_request(req, &failed);
1644 }
1645 nfs_pageio_complete(&pgio);
1646
1647 if (!list_empty(&failed)) {
1648 list_move(&failed, head);
1649 return -EIO;
1650 }
1651 return 0;
1652} 1615}
1653EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 1616EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
1654 1617
1655static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) 1618static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
1656{ 1619{
1657 struct nfs_pgio_header *hdr = data->header;
1658
1659 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1620 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1660 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1621 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1661 PNFS_LAYOUTRET_ON_ERROR) { 1622 PNFS_LAYOUTRET_ON_ERROR) {
1662 pnfs_return_layout(hdr->inode); 1623 pnfs_return_layout(hdr->inode);
1663 } 1624 }
1664 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1625 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1665 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, 1626 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
1666 &hdr->pages,
1667 hdr->completion_ops,
1668 hdr->dreq);
1669} 1627}
1670 1628
1671/* 1629/*
1672 * Called by non rpc-based layout drivers 1630 * Called by non rpc-based layout drivers
1673 */ 1631 */
1674void pnfs_ld_read_done(struct nfs_pgio_data *data) 1632void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
1675{ 1633{
1676 struct nfs_pgio_header *hdr = data->header; 1634 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
1677
1678 trace_nfs4_pnfs_read(data, hdr->pnfs_error);
1679 if (likely(!hdr->pnfs_error)) { 1635 if (likely(!hdr->pnfs_error)) {
1680 __nfs4_read_done_cb(data); 1636 __nfs4_read_done_cb(hdr);
1681 hdr->mds_ops->rpc_call_done(&data->task, data); 1637 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
1682 } else 1638 } else
1683 pnfs_ld_handle_read_error(data); 1639 pnfs_ld_handle_read_error(hdr);
1684 hdr->mds_ops->rpc_release(data); 1640 hdr->mds_ops->rpc_release(hdr);
1685} 1641}
1686EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1642EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1687 1643
1688static void 1644static void
1689pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 1645pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1690 struct nfs_pgio_data *data) 1646 struct nfs_pgio_header *hdr)
1691{ 1647{
1692 struct nfs_pgio_header *hdr = data->header;
1693
1694 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 1648 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1695 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1649 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1696 nfs_pageio_reset_read_mds(desc); 1650 nfs_pageio_reset_read_mds(desc);
1697 desc->pg_recoalesce = 1; 1651 desc->pg_recoalesce = 1;
1698 } 1652 }
1699 nfs_pgio_data_release(data); 1653 nfs_pgio_data_destroy(hdr);
1700} 1654}
1701 1655
1702/* 1656/*
1703 * Call the appropriate parallel I/O subsystem read function. 1657 * Call the appropriate parallel I/O subsystem read function.
1704 */ 1658 */
1705static enum pnfs_try_status 1659static enum pnfs_try_status
1706pnfs_try_to_read_data(struct nfs_pgio_data *rdata, 1660pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
1707 const struct rpc_call_ops *call_ops, 1661 const struct rpc_call_ops *call_ops,
1708 struct pnfs_layout_segment *lseg) 1662 struct pnfs_layout_segment *lseg)
1709{ 1663{
1710 struct nfs_pgio_header *hdr = rdata->header;
1711 struct inode *inode = hdr->inode; 1664 struct inode *inode = hdr->inode;
1712 struct nfs_server *nfss = NFS_SERVER(inode); 1665 struct nfs_server *nfss = NFS_SERVER(inode);
1713 enum pnfs_try_status trypnfs; 1666 enum pnfs_try_status trypnfs;
@@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1715 hdr->mds_ops = call_ops; 1668 hdr->mds_ops = call_ops;
1716 1669
1717 dprintk("%s: Reading ino:%lu %u@%llu\n", 1670 dprintk("%s: Reading ino:%lu %u@%llu\n",
1718 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1671 __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
1719 1672
1720 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1673 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
1721 if (trypnfs != PNFS_NOT_ATTEMPTED) 1674 if (trypnfs != PNFS_NOT_ATTEMPTED)
1722 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1675 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1723 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1676 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
1727static void 1680static void
1728pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 1681pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
1729{ 1682{
1730 struct nfs_pgio_data *data = hdr->data;
1731 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 1683 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1732 struct pnfs_layout_segment *lseg = desc->pg_lseg; 1684 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1733 enum pnfs_try_status trypnfs; 1685 enum pnfs_try_status trypnfs;
1734 1686
1735 desc->pg_lseg = NULL; 1687 desc->pg_lseg = NULL;
1736 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1688 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
1737 if (trypnfs == PNFS_NOT_ATTEMPTED) 1689 if (trypnfs == PNFS_NOT_ATTEMPTED)
1738 pnfs_read_through_mds(desc, data); 1690 pnfs_read_through_mds(desc, hdr);
1739 pnfs_put_lseg(lseg); 1691 pnfs_put_lseg(lseg);
1740} 1692}
1741 1693
1742static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1694static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1743{ 1695{
1744 pnfs_put_lseg(hdr->lseg); 1696 pnfs_put_lseg(hdr->lseg);
1745 nfs_rw_header_free(hdr); 1697 nfs_pgio_header_free(hdr);
1746} 1698}
1747EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1699EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
1748 1700
1749int 1701int
1750pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1702pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1751{ 1703{
1752 struct nfs_rw_header *rhdr;
1753 struct nfs_pgio_header *hdr; 1704 struct nfs_pgio_header *hdr;
1754 int ret; 1705 int ret;
1755 1706
1756 rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); 1707 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
1757 if (!rhdr) { 1708 if (!hdr) {
1758 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1709 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1759 ret = -ENOMEM; 1710 ret = -ENOMEM;
1760 pnfs_put_lseg(desc->pg_lseg); 1711 pnfs_put_lseg(desc->pg_lseg);
1761 desc->pg_lseg = NULL; 1712 desc->pg_lseg = NULL;
1762 return ret; 1713 return ret;
1763 } 1714 }
1764 hdr = &rhdr->header;
1765 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1715 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1766 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 1716 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1767 atomic_inc(&hdr->refcnt);
1768 ret = nfs_generic_pgio(desc, hdr); 1717 ret = nfs_generic_pgio(desc, hdr);
1769 if (ret != 0) { 1718 if (ret != 0) {
1770 pnfs_put_lseg(desc->pg_lseg); 1719 pnfs_put_lseg(desc->pg_lseg);
1771 desc->pg_lseg = NULL; 1720 desc->pg_lseg = NULL;
1772 } else 1721 } else
1773 pnfs_do_read(desc, hdr); 1722 pnfs_do_read(desc, hdr);
1774 if (atomic_dec_and_test(&hdr->refcnt))
1775 hdr->completion_ops->completion(hdr);
1776 return ret; 1723 return ret;
1777} 1724}
1778EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1725EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1820EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1767EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1821 1768
1822void 1769void
1823pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) 1770pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
1824{ 1771{
1825 struct nfs_pgio_header *hdr = wdata->header;
1826 struct inode *inode = hdr->inode; 1772 struct inode *inode = hdr->inode;
1827 struct nfs_inode *nfsi = NFS_I(inode); 1773 struct nfs_inode *nfsi = NFS_I(inode);
1828 loff_t end_pos = wdata->mds_offset + wdata->res.count; 1774 loff_t end_pos = hdr->mds_offset + hdr->res.count;
1829 bool mark_as_dirty = false; 1775 bool mark_as_dirty = false;
1830 1776
1831 spin_lock(&inode->i_lock); 1777 spin_lock(&inode->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c4..aca3dff5dae6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
32 32
33#include <linux/nfs_fs.h> 33#include <linux/nfs_fs.h>
34#include <linux/nfs_page.h> 34#include <linux/nfs_page.h>
35#include <linux/workqueue.h>
35 36
36enum { 37enum {
37 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 38 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
@@ -46,6 +47,7 @@ struct pnfs_layout_segment {
46 atomic_t pls_refcount; 47 atomic_t pls_refcount;
47 unsigned long pls_flags; 48 unsigned long pls_flags;
48 struct pnfs_layout_hdr *pls_layout; 49 struct pnfs_layout_hdr *pls_layout;
50 struct work_struct pls_work;
49}; 51};
50 52
51enum pnfs_try_status { 53enum pnfs_try_status {
@@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type {
104 int max); 106 int max);
105 void (*recover_commit_reqs) (struct list_head *list, 107 void (*recover_commit_reqs) (struct list_head *list,
106 struct nfs_commit_info *cinfo); 108 struct nfs_commit_info *cinfo);
109 struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
110 struct page *page);
107 int (*commit_pagelist)(struct inode *inode, 111 int (*commit_pagelist)(struct inode *inode,
108 struct list_head *mds_pages, 112 struct list_head *mds_pages,
109 int how, 113 int how,
@@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type {
113 * Return PNFS_ATTEMPTED to indicate the layout code has attempted 117 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
114 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS 118 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
115 */ 119 */
116 enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); 120 enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
117 enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); 121 enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
118 122
119 void (*free_deviceid_node) (struct nfs4_deviceid_node *); 123 void (*free_deviceid_node) (struct nfs4_deviceid_node *);
120 124
@@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
179/* pnfs.c */ 183/* pnfs.c */
180void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); 184void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
181void pnfs_put_lseg(struct pnfs_layout_segment *lseg); 185void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
186void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg);
182 187
183void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); 188void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
184void unset_pnfs_layoutdriver(struct nfs_server *); 189void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino);
213void pnfs_roc_release(struct inode *ino); 218void pnfs_roc_release(struct inode *ino);
214void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 219void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
215bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); 220bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
216void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); 221void pnfs_set_layoutcommit(struct nfs_pgio_header *);
217void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 222void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
218int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 223int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
219int _pnfs_return_layout(struct inode *); 224int _pnfs_return_layout(struct inode *);
220int pnfs_commit_and_return_layout(struct inode *); 225int pnfs_commit_and_return_layout(struct inode *);
221void pnfs_ld_write_done(struct nfs_pgio_data *); 226void pnfs_ld_write_done(struct nfs_pgio_header *);
222void pnfs_ld_read_done(struct nfs_pgio_data *); 227void pnfs_ld_read_done(struct nfs_pgio_header *);
223struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 228struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
224 struct nfs_open_context *ctx, 229 struct nfs_open_context *ctx,
225 loff_t pos, 230 loff_t pos,
@@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
228 gfp_t gfp_flags); 233 gfp_t gfp_flags);
229 234
230void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); 235void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
231int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, 236int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
232 const struct nfs_pgio_completion_ops *compl_ops, 237int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
233 struct nfs_direct_req *dreq);
234int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
235 const struct nfs_pgio_completion_ops *compl_ops,
236 struct nfs_direct_req *dreq);
237struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); 238struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
238 239
239/* nfs4_deviceid_flags */ 240/* nfs4_deviceid_flags */
@@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
345 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); 346 NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
346} 347}
347 348
349static inline struct nfs_page *
350pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
351 struct page *page)
352{
353 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
354
355 if (ld == NULL || ld->search_commit_reqs == NULL)
356 return NULL;
357 return ld->search_commit_reqs(cinfo, page);
358}
359
348/* Should the pNFS client commit and return the layout upon a setattr */ 360/* Should the pNFS client commit and return the layout upon a setattr */
349static inline bool 361static inline bool
350pnfs_ld_layoutret_on_setattr(struct inode *inode) 362pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
410{ 422{
411} 423}
412 424
425static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
426{
427}
428
413static inline int pnfs_return_layout(struct inode *ino) 429static inline int pnfs_return_layout(struct inode *ino)
414{ 430{
415 return 0; 431 return 0;
@@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
496{ 512{
497} 513}
498 514
515static inline struct nfs_page *
516pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
517 struct page *page)
518{
519 return NULL;
520}
521
499static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 522static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
500{ 523{
501 return 0; 524 return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a30..b09cc23d6f43 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
578 return 0; 578 return 0;
579} 579}
580 580
581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) 581static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
582{ 582{
583 struct inode *inode = data->header->inode; 583 struct inode *inode = hdr->inode;
584 584
585 nfs_invalidate_atime(inode); 585 nfs_invalidate_atime(inode);
586 if (task->tk_status >= 0) { 586 if (task->tk_status >= 0) {
587 nfs_refresh_inode(inode, data->res.fattr); 587 nfs_refresh_inode(inode, hdr->res.fattr);
588 /* Emulate the eof flag, which isn't normally needed in NFSv2 588 /* Emulate the eof flag, which isn't normally needed in NFSv2
589 * as it is guaranteed to always return the file attributes 589 * as it is guaranteed to always return the file attributes
590 */ 590 */
591 if (data->args.offset + data->res.count >= data->res.fattr->size) 591 if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
592 data->res.eof = 1; 592 hdr->res.eof = 1;
593 } 593 }
594 return 0; 594 return 0;
595} 595}
596 596
597static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 597static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
598 struct rpc_message *msg)
598{ 599{
599 msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; 600 msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
600} 601}
601 602
602static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) 603static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
604 struct nfs_pgio_header *hdr)
603{ 605{
604 rpc_call_start(task); 606 rpc_call_start(task);
605 return 0; 607 return 0;
606} 608}
607 609
608static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) 610static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
609{ 611{
610 struct inode *inode = data->header->inode; 612 struct inode *inode = hdr->inode;
611 613
612 if (task->tk_status >= 0) 614 if (task->tk_status >= 0)
613 nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); 615 nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
614 return 0; 616 return 0;
615} 617}
616 618
617static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) 619static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
620 struct rpc_message *msg)
618{ 621{
619 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ 622 /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
620 data->args.stable = NFS_FILE_SYNC; 623 hdr->args.stable = NFS_FILE_SYNC;
621 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; 624 msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
622} 625}
623 626
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca64..beff2769c5c5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
33 33
34static struct kmem_cache *nfs_rdata_cachep; 34static struct kmem_cache *nfs_rdata_cachep;
35 35
36static struct nfs_rw_header *nfs_readhdr_alloc(void) 36static struct nfs_pgio_header *nfs_readhdr_alloc(void)
37{ 37{
38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
39} 39}
40 40
41static void nfs_readhdr_free(struct nfs_rw_header *rhdr) 41static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
42{ 42{
43 kmem_cache_free(nfs_rdata_cachep, rhdr); 43 kmem_cache_free(nfs_rdata_cachep, rhdr);
44} 44}
@@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req)
115 115
116 unlock_page(req->wb_page); 116 unlock_page(req->wb_page);
117 } 117 }
118
119 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
120 req->wb_context->dentry->d_inode->i_sb->s_id,
121 (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
122 req->wb_bytes,
123 (long long)req_offset(req));
124 nfs_release_request(req); 118 nfs_release_request(req);
125} 119}
126 120
@@ -172,14 +166,15 @@ out:
172 hdr->release(hdr); 166 hdr->release(hdr);
173} 167}
174 168
175static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, 169static void nfs_initiate_read(struct nfs_pgio_header *hdr,
170 struct rpc_message *msg,
176 struct rpc_task_setup *task_setup_data, int how) 171 struct rpc_task_setup *task_setup_data, int how)
177{ 172{
178 struct inode *inode = data->header->inode; 173 struct inode *inode = hdr->inode;
179 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 174 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
180 175
181 task_setup_data->flags |= swap_flags; 176 task_setup_data->flags |= swap_flags;
182 NFS_PROTO(inode)->read_setup(data, msg); 177 NFS_PROTO(inode)->read_setup(hdr, msg);
183} 178}
184 179
185static void 180static void
@@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
203 * This is the callback from RPC telling us whether a reply was 198 * This is the callback from RPC telling us whether a reply was
204 * received or some error occurred (timeout or socket shutdown). 199 * received or some error occurred (timeout or socket shutdown).
205 */ 200 */
206static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, 201static int nfs_readpage_done(struct rpc_task *task,
202 struct nfs_pgio_header *hdr,
207 struct inode *inode) 203 struct inode *inode)
208{ 204{
209 int status = NFS_PROTO(inode)->read_done(task, data); 205 int status = NFS_PROTO(inode)->read_done(task, hdr);
210 if (status != 0) 206 if (status != 0)
211 return status; 207 return status;
212 208
213 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); 209 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
214 210
215 if (task->tk_status == -ESTALE) { 211 if (task->tk_status == -ESTALE) {
216 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 212 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
219 return 0; 215 return 0;
220} 216}
221 217
222static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) 218static void nfs_readpage_retry(struct rpc_task *task,
219 struct nfs_pgio_header *hdr)
223{ 220{
224 struct nfs_pgio_args *argp = &data->args; 221 struct nfs_pgio_args *argp = &hdr->args;
225 struct nfs_pgio_res *resp = &data->res; 222 struct nfs_pgio_res *resp = &hdr->res;
226 223
227 /* This is a short read! */ 224 /* This is a short read! */
228 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 225 nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
229 /* Has the server at least made some progress? */ 226 /* Has the server at least made some progress? */
230 if (resp->count == 0) { 227 if (resp->count == 0) {
231 nfs_set_pgio_error(data->header, -EIO, argp->offset); 228 nfs_set_pgio_error(hdr, -EIO, argp->offset);
232 return; 229 return;
233 } 230 }
234 /* Yes, so retry the read at the end of the data */ 231 /* Yes, so retry the read at the end of the hdr */
235 data->mds_offset += resp->count; 232 hdr->mds_offset += resp->count;
236 argp->offset += resp->count; 233 argp->offset += resp->count;
237 argp->pgbase += resp->count; 234 argp->pgbase += resp->count;
238 argp->count -= resp->count; 235 argp->count -= resp->count;
239 rpc_restart_call_prepare(task); 236 rpc_restart_call_prepare(task);
240} 237}
241 238
242static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) 239static void nfs_readpage_result(struct rpc_task *task,
240 struct nfs_pgio_header *hdr)
243{ 241{
244 struct nfs_pgio_header *hdr = data->header; 242 if (hdr->res.eof) {
245
246 if (data->res.eof) {
247 loff_t bound; 243 loff_t bound;
248 244
249 bound = data->args.offset + data->res.count; 245 bound = hdr->args.offset + hdr->res.count;
250 spin_lock(&hdr->lock); 246 spin_lock(&hdr->lock);
251 if (bound < hdr->io_start + hdr->good_bytes) { 247 if (bound < hdr->io_start + hdr->good_bytes) {
252 set_bit(NFS_IOHDR_EOF, &hdr->flags); 248 set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
254 hdr->good_bytes = bound - hdr->io_start; 250 hdr->good_bytes = bound - hdr->io_start;
255 } 251 }
256 spin_unlock(&hdr->lock); 252 spin_unlock(&hdr->lock);
257 } else if (data->res.count != data->args.count) 253 } else if (hdr->res.count != hdr->args.count)
258 nfs_readpage_retry(task, data); 254 nfs_readpage_retry(task, hdr);
259} 255}
260 256
261/* 257/*
@@ -404,7 +400,7 @@ out:
404int __init nfs_init_readpagecache(void) 400int __init nfs_init_readpagecache(void)
405{ 401{
406 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 402 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
407 sizeof(struct nfs_rw_header), 403 sizeof(struct nfs_pgio_header),
408 0, SLAB_HWCACHE_ALIGN, 404 0, SLAB_HWCACHE_ALIGN,
409 NULL); 405 NULL);
410 if (nfs_rdata_cachep == NULL) 406 if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 084af1060d79..e4499d5b51e8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
1027 rpc_authflavor_t flavor) 1027 rpc_authflavor_t flavor)
1028{ 1028{
1029 unsigned int i; 1029 unsigned int i;
1030 unsigned int max_flavor_len = (sizeof(auth_info->flavors) / 1030 unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
1031 sizeof(auth_info->flavors[0]));
1032 1031
1033 /* make sure this flavor isn't already in the list */ 1032 /* make sure this flavor isn't already in the list */
1034 for (i = 0; i < auth_info->flavor_len; i++) { 1033 for (i = 0; i < auth_info->flavor_len; i++) {
@@ -2180,7 +2179,7 @@ out_no_address:
2180 return -EINVAL; 2179 return -EINVAL;
2181} 2180}
2182 2181
2183#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ 2182#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
2184 | NFS_MOUNT_SECURE \ 2183 | NFS_MOUNT_SECURE \
2185 | NFS_MOUNT_TCP \ 2184 | NFS_MOUNT_TCP \
2186 | NFS_MOUNT_VER3 \ 2185 | NFS_MOUNT_VER3 \
@@ -2188,15 +2187,16 @@ out_no_address:
2188 | NFS_MOUNT_NONLM \ 2187 | NFS_MOUNT_NONLM \
2189 | NFS_MOUNT_BROKEN_SUID \ 2188 | NFS_MOUNT_BROKEN_SUID \
2190 | NFS_MOUNT_STRICTLOCK \ 2189 | NFS_MOUNT_STRICTLOCK \
2191 | NFS_MOUNT_UNSHARED \
2192 | NFS_MOUNT_NORESVPORT \
2193 | NFS_MOUNT_LEGACY_INTERFACE) 2190 | NFS_MOUNT_LEGACY_INTERFACE)
2194 2191
2192#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \
2193 ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT))
2194
2195static int 2195static int
2196nfs_compare_remount_data(struct nfs_server *nfss, 2196nfs_compare_remount_data(struct nfs_server *nfss,
2197 struct nfs_parsed_mount_data *data) 2197 struct nfs_parsed_mount_data *data)
2198{ 2198{
2199 if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || 2199 if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
2200 data->rsize != nfss->rsize || 2200 data->rsize != nfss->rsize ||
2201 data->wsize != nfss->wsize || 2201 data->wsize != nfss->wsize ||
2202 data->version != nfss->nfs_client->rpc_ops->version || 2202 data->version != nfss->nfs_client->rpc_ops->version ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 962c9ee758be..e3b5cf28bdc5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48static const struct nfs_rw_ops nfs_rw_write_ops; 48static const struct nfs_rw_ops nfs_rw_write_ops;
49static void nfs_clear_request_commit(struct nfs_page *req); 49static void nfs_clear_request_commit(struct nfs_page *req);
50static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
51 struct inode *inode);
50 52
51static struct kmem_cache *nfs_wdata_cachep; 53static struct kmem_cache *nfs_wdata_cachep;
52static mempool_t *nfs_wdata_mempool; 54static mempool_t *nfs_wdata_mempool;
@@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
71} 73}
72EXPORT_SYMBOL_GPL(nfs_commit_free); 74EXPORT_SYMBOL_GPL(nfs_commit_free);
73 75
74static struct nfs_rw_header *nfs_writehdr_alloc(void) 76static struct nfs_pgio_header *nfs_writehdr_alloc(void)
75{ 77{
76 struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 78 struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
77 79
78 if (p) 80 if (p)
79 memset(p, 0, sizeof(*p)); 81 memset(p, 0, sizeof(*p));
80 return p; 82 return p;
81} 83}
82 84
83static void nfs_writehdr_free(struct nfs_rw_header *whdr) 85static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
84{ 86{
85 mempool_free(whdr, nfs_wdata_mempool); 87 mempool_free(hdr, nfs_wdata_mempool);
86} 88}
87 89
88static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 90static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
93} 95}
94 96
95/* 97/*
98 * nfs_page_search_commits_for_head_request_locked
99 *
100 * Search through commit lists on @inode for the head request for @page.
101 * Must be called while holding the inode (which is cinfo) lock.
102 *
103 * Returns the head request if found, or NULL if not found.
104 */
105static struct nfs_page *
106nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
107 struct page *page)
108{
109 struct nfs_page *freq, *t;
110 struct nfs_commit_info cinfo;
111 struct inode *inode = &nfsi->vfs_inode;
112
113 nfs_init_cinfo_from_inode(&cinfo, inode);
114
115 /* search through pnfs commit lists */
116 freq = pnfs_search_commit_reqs(inode, &cinfo, page);
117 if (freq)
118 return freq->wb_head;
119
120 /* Linearly search the commit list for the correct request */
121 list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
122 if (freq->wb_page == page)
123 return freq->wb_head;
124 }
125
126 return NULL;
127}
128
129/*
96 * nfs_page_find_head_request_locked - find head request associated with @page 130 * nfs_page_find_head_request_locked - find head request associated with @page
97 * 131 *
98 * must be called while holding the inode lock. 132 * must be called while holding the inode lock.
@@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
106 140
107 if (PagePrivate(page)) 141 if (PagePrivate(page))
108 req = (struct nfs_page *)page_private(page); 142 req = (struct nfs_page *)page_private(page);
109 else if (unlikely(PageSwapCache(page))) { 143 else if (unlikely(PageSwapCache(page)))
110 struct nfs_page *freq, *t; 144 req = nfs_page_search_commits_for_head_request_locked(nfsi,
111 145 page);
112 /* Linearly search the commit list for the correct req */
113 list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
114 if (freq->wb_page == page) {
115 req = freq->wb_head;
116 break;
117 }
118 }
119 }
120 146
121 if (req) { 147 if (req) {
122 WARN_ON_ONCE(req->wb_head != req); 148 WARN_ON_ONCE(req->wb_head != req);
123
124 kref_get(&req->wb_kref); 149 kref_get(&req->wb_kref);
125 } 150 }
126 151
@@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
216 unsigned int pos = 0; 241 unsigned int pos = 0;
217 unsigned int len = nfs_page_length(req->wb_page); 242 unsigned int len = nfs_page_length(req->wb_page);
218 243
219 nfs_page_group_lock(req); 244 nfs_page_group_lock(req, true);
220 245
221 do { 246 do {
222 tmp = nfs_page_group_search_locked(req->wb_head, pos); 247 tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
379 subreq->wb_head = subreq; 404 subreq->wb_head = subreq;
380 subreq->wb_this_page = subreq; 405 subreq->wb_this_page = subreq;
381 406
382 nfs_clear_request_commit(subreq);
383
384 /* subreq is now totally disconnected from page group or any 407 /* subreq is now totally disconnected from page group or any
385 * write / commit lists. last chance to wake any waiters */ 408 * write / commit lists. last chance to wake any waiters */
386 nfs_unlock_request(subreq); 409 nfs_unlock_request(subreq);
@@ -456,7 +479,9 @@ try_again:
456 } 479 }
457 480
458 /* lock each request in the page group */ 481 /* lock each request in the page group */
459 nfs_page_group_lock(head); 482 ret = nfs_page_group_lock(head, false);
483 if (ret < 0)
484 return ERR_PTR(ret);
460 subreq = head; 485 subreq = head;
461 do { 486 do {
462 /* 487 /*
@@ -488,7 +513,7 @@ try_again:
488 * Commit list removal accounting is done after locks are dropped */ 513 * Commit list removal accounting is done after locks are dropped */
489 subreq = head; 514 subreq = head;
490 do { 515 do {
491 nfs_list_remove_request(subreq); 516 nfs_clear_request_commit(subreq);
492 subreq = subreq->wb_this_page; 517 subreq = subreq->wb_this_page;
493 } while (subreq != head); 518 } while (subreq != head);
494 519
@@ -518,15 +543,11 @@ try_again:
518 543
519 nfs_page_group_unlock(head); 544 nfs_page_group_unlock(head);
520 545
521 /* drop lock to clear_request_commit the head req and clean up 546 /* drop lock to clean uprequests on destroy list */
522 * requests on destroy list */
523 spin_unlock(&inode->i_lock); 547 spin_unlock(&inode->i_lock);
524 548
525 nfs_destroy_unlinked_subrequests(destroy_list, head); 549 nfs_destroy_unlinked_subrequests(destroy_list, head);
526 550
527 /* clean up commit list state */
528 nfs_clear_request_commit(head);
529
530 /* still holds ref on head from nfs_page_find_head_request_locked 551 /* still holds ref on head from nfs_page_find_head_request_locked
531 * and still has lock on head from lock loop */ 552 * and still has lock on head from lock loop */
532 return head; 553 return head;
@@ -705,6 +726,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
705 726
706 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) 727 if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
707 nfs_release_request(req); 728 nfs_release_request(req);
729 else
730 WARN_ON_ONCE(1);
708} 731}
709 732
710static void 733static void
@@ -808,6 +831,7 @@ nfs_clear_page_commit(struct page *page)
808 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); 831 dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
809} 832}
810 833
834/* Called holding inode (/cinfo) lock */
811static void 835static void
812nfs_clear_request_commit(struct nfs_page *req) 836nfs_clear_request_commit(struct nfs_page *req)
813{ 837{
@@ -817,20 +841,17 @@ nfs_clear_request_commit(struct nfs_page *req)
817 841
818 nfs_init_cinfo_from_inode(&cinfo, inode); 842 nfs_init_cinfo_from_inode(&cinfo, inode);
819 if (!pnfs_clear_request_commit(req, &cinfo)) { 843 if (!pnfs_clear_request_commit(req, &cinfo)) {
820 spin_lock(cinfo.lock);
821 nfs_request_remove_commit_list(req, &cinfo); 844 nfs_request_remove_commit_list(req, &cinfo);
822 spin_unlock(cinfo.lock);
823 } 845 }
824 nfs_clear_page_commit(req->wb_page); 846 nfs_clear_page_commit(req->wb_page);
825 } 847 }
826} 848}
827 849
828static inline 850int nfs_write_need_commit(struct nfs_pgio_header *hdr)
829int nfs_write_need_commit(struct nfs_pgio_data *data)
830{ 851{
831 if (data->verf.committed == NFS_DATA_SYNC) 852 if (hdr->verf.committed == NFS_DATA_SYNC)
832 return data->header->lseg == NULL; 853 return hdr->lseg == NULL;
833 return data->verf.committed != NFS_FILE_SYNC; 854 return hdr->verf.committed != NFS_FILE_SYNC;
834} 855}
835 856
836#else 857#else
@@ -856,8 +877,7 @@ nfs_clear_request_commit(struct nfs_page *req)
856{ 877{
857} 878}
858 879
859static inline 880int nfs_write_need_commit(struct nfs_pgio_header *hdr)
860int nfs_write_need_commit(struct nfs_pgio_data *data)
861{ 881{
862 return 0; 882 return 0;
863} 883}
@@ -883,11 +903,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
883 nfs_context_set_write_error(req->wb_context, hdr->error); 903 nfs_context_set_write_error(req->wb_context, hdr->error);
884 goto remove_req; 904 goto remove_req;
885 } 905 }
886 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 906 if (nfs_write_need_commit(hdr)) {
887 nfs_mark_request_dirty(req);
888 goto next;
889 }
890 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
891 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); 907 memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
892 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 908 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
893 goto next; 909 goto next;
@@ -1038,9 +1054,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
1038 else 1054 else
1039 req->wb_bytes = rqend - req->wb_offset; 1055 req->wb_bytes = rqend - req->wb_offset;
1040out_unlock: 1056out_unlock:
1041 spin_unlock(&inode->i_lock);
1042 if (req) 1057 if (req)
1043 nfs_clear_request_commit(req); 1058 nfs_clear_request_commit(req);
1059 spin_unlock(&inode->i_lock);
1044 return req; 1060 return req;
1045out_flushme: 1061out_flushme:
1046 spin_unlock(&inode->i_lock); 1062 spin_unlock(&inode->i_lock);
@@ -1241,17 +1257,18 @@ static int flush_task_priority(int how)
1241 return RPC_PRIORITY_NORMAL; 1257 return RPC_PRIORITY_NORMAL;
1242} 1258}
1243 1259
1244static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, 1260static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1261 struct rpc_message *msg,
1245 struct rpc_task_setup *task_setup_data, int how) 1262 struct rpc_task_setup *task_setup_data, int how)
1246{ 1263{
1247 struct inode *inode = data->header->inode; 1264 struct inode *inode = hdr->inode;
1248 int priority = flush_task_priority(how); 1265 int priority = flush_task_priority(how);
1249 1266
1250 task_setup_data->priority = priority; 1267 task_setup_data->priority = priority;
1251 NFS_PROTO(inode)->write_setup(data, msg); 1268 NFS_PROTO(inode)->write_setup(hdr, msg);
1252 1269
1253 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, 1270 nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
1254 &task_setup_data->rpc_client, msg, data); 1271 &task_setup_data->rpc_client, msg, hdr);
1255} 1272}
1256 1273
1257/* If a nfs_flush_* function fails, it should remove reqs from @head and 1274/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1313,21 +1330,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1313 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); 1330 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1314} 1331}
1315 1332
1316static void nfs_writeback_release_common(struct nfs_pgio_data *data) 1333static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
1317{ 1334{
1318 struct nfs_pgio_header *hdr = data->header; 1335 /* do nothing! */
1319 int status = data->task.tk_status;
1320
1321 if ((status >= 0) && nfs_write_need_commit(data)) {
1322 spin_lock(&hdr->lock);
1323 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
1324 ; /* Do nothing */
1325 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
1326 memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
1327 else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
1328 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
1329 spin_unlock(&hdr->lock);
1330 }
1331} 1336}
1332 1337
1333/* 1338/*
@@ -1358,7 +1363,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
1358/* 1363/*
1359 * This function is called when the WRITE call is complete. 1364 * This function is called when the WRITE call is complete.
1360 */ 1365 */
1361static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, 1366static int nfs_writeback_done(struct rpc_task *task,
1367 struct nfs_pgio_header *hdr,
1362 struct inode *inode) 1368 struct inode *inode)
1363{ 1369{
1364 int status; 1370 int status;
@@ -1370,13 +1376,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1370 * another writer had changed the file, but some applications 1376 * another writer had changed the file, but some applications
1371 * depend on tighter cache coherency when writing. 1377 * depend on tighter cache coherency when writing.
1372 */ 1378 */
1373 status = NFS_PROTO(inode)->write_done(task, data); 1379 status = NFS_PROTO(inode)->write_done(task, hdr);
1374 if (status != 0) 1380 if (status != 0)
1375 return status; 1381 return status;
1376 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); 1382 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1377 1383
1378#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) 1384#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1379 if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { 1385 if (hdr->res.verf->committed < hdr->args.stable &&
1386 task->tk_status >= 0) {
1380 /* We tried a write call, but the server did not 1387 /* We tried a write call, but the server did not
1381 * commit data to stable storage even though we 1388 * commit data to stable storage even though we
1382 * requested it. 1389 * requested it.
@@ -1392,7 +1399,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1392 dprintk("NFS: faulty NFS server %s:" 1399 dprintk("NFS: faulty NFS server %s:"
1393 " (committed = %d) != (stable = %d)\n", 1400 " (committed = %d) != (stable = %d)\n",
1394 NFS_SERVER(inode)->nfs_client->cl_hostname, 1401 NFS_SERVER(inode)->nfs_client->cl_hostname,
1395 data->res.verf->committed, data->args.stable); 1402 hdr->res.verf->committed, hdr->args.stable);
1396 complain = jiffies + 300 * HZ; 1403 complain = jiffies + 300 * HZ;
1397 } 1404 }
1398 } 1405 }
@@ -1407,16 +1414,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
1407/* 1414/*
1408 * This function is called when the WRITE call is complete. 1415 * This function is called when the WRITE call is complete.
1409 */ 1416 */
1410static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) 1417static void nfs_writeback_result(struct rpc_task *task,
1418 struct nfs_pgio_header *hdr)
1411{ 1419{
1412 struct nfs_pgio_args *argp = &data->args; 1420 struct nfs_pgio_args *argp = &hdr->args;
1413 struct nfs_pgio_res *resp = &data->res; 1421 struct nfs_pgio_res *resp = &hdr->res;
1414 1422
1415 if (resp->count < argp->count) { 1423 if (resp->count < argp->count) {
1416 static unsigned long complain; 1424 static unsigned long complain;
1417 1425
1418 /* This a short write! */ 1426 /* This a short write! */
1419 nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); 1427 nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
1420 1428
1421 /* Has the server at least made some progress? */ 1429 /* Has the server at least made some progress? */
1422 if (resp->count == 0) { 1430 if (resp->count == 0) {
@@ -1426,14 +1434,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
1426 argp->count); 1434 argp->count);
1427 complain = jiffies + 300 * HZ; 1435 complain = jiffies + 300 * HZ;
1428 } 1436 }
1429 nfs_set_pgio_error(data->header, -EIO, argp->offset); 1437 nfs_set_pgio_error(hdr, -EIO, argp->offset);
1430 task->tk_status = -EIO; 1438 task->tk_status = -EIO;
1431 return; 1439 return;
1432 } 1440 }
1433 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1441 /* Was this an NFSv2 write or an NFSv3 stable write? */
1434 if (resp->verf->committed != NFS_UNSTABLE) { 1442 if (resp->verf->committed != NFS_UNSTABLE) {
1435 /* Resend from where the server left off */ 1443 /* Resend from where the server left off */
1436 data->mds_offset += resp->count; 1444 hdr->mds_offset += resp->count;
1437 argp->offset += resp->count; 1445 argp->offset += resp->count;
1438 argp->pgbase += resp->count; 1446 argp->pgbase += resp->count;
1439 argp->count -= resp->count; 1447 argp->count -= resp->count;
@@ -1884,7 +1892,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1884int __init nfs_init_writepagecache(void) 1892int __init nfs_init_writepagecache(void)
1885{ 1893{
1886 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1894 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1887 sizeof(struct nfs_rw_header), 1895 sizeof(struct nfs_pgio_header),
1888 0, SLAB_HWCACHE_ALIGN, 1896 0, SLAB_HWCACHE_ALIGN,
1889 NULL); 1897 NULL);
1890 if (nfs_wdata_cachep == NULL) 1898 if (nfs_wdata_cachep == NULL)
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index ed628f71274c..538f142935ea 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -30,9 +30,6 @@
30 30
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32 32
33EXPORT_SYMBOL_GPL(nfsacl_encode);
34EXPORT_SYMBOL_GPL(nfsacl_decode);
35
36struct nfsacl_encode_desc { 33struct nfsacl_encode_desc {
37 struct xdr_array2_desc desc; 34 struct xdr_array2_desc desc;
38 unsigned int count; 35 unsigned int count;
@@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
136 nfsacl_desc.desc.array_len; 133 nfsacl_desc.desc.array_len;
137 return err; 134 return err;
138} 135}
136EXPORT_SYMBOL_GPL(nfsacl_encode);
139 137
140struct nfsacl_decode_desc { 138struct nfsacl_decode_desc {
141 struct xdr_array2_desc desc; 139 struct xdr_array2_desc desc;
@@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
295 return 8 + nfsacl_desc.desc.elem_size * 293 return 8 + nfsacl_desc.desc.elem_size *
296 nfsacl_desc.desc.array_len; 294 nfsacl_desc.desc.array_len;
297} 295}
296EXPORT_SYMBOL_GPL(nfsacl_decode);
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index a986ceb6fd0d..4cd7c69a6cb9 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -47,7 +47,7 @@ struct svc_rqst;
47#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ 47#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
48 / sizeof(struct nfs4_ace)) 48 / sizeof(struct nfs4_ace))
49 49
50struct nfs4_acl *nfs4_acl_new(int); 50int nfs4_acl_bytes(int entries);
51int nfs4_acl_get_whotype(char *, u32); 51int nfs4_acl_get_whotype(char *, u32);
52__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); 52__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
53 53
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 72f44823adbb..9d46a0bdd9f9 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
28 validate_process_creds(); 28 validate_process_creds();
29 29
30 /* discard any old override before preparing the new set */ 30 /* discard any old override before preparing the new set */
31 revert_creds(get_cred(current->real_cred)); 31 revert_creds(get_cred(current_real_cred()));
32 new = prepare_creds(); 32 new = prepare_creds();
33 if (!new) 33 if (!new)
34 return -ENOMEM; 34 return -ENOMEM;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 13b85f94d9e2..72ffd7cce3c3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
698 698
699 kref_get(&item->ex_client->ref); 699 kref_get(&item->ex_client->ref);
700 new->ex_client = item->ex_client; 700 new->ex_client = item->ex_client;
701 new->ex_path.dentry = dget(item->ex_path.dentry); 701 new->ex_path = item->ex_path;
702 new->ex_path.mnt = mntget(item->ex_path.mnt); 702 path_get(&item->ex_path);
703 new->ex_fslocs.locations = NULL; 703 new->ex_fslocs.locations = NULL;
704 new->ex_fslocs.locations_count = 0; 704 new->ex_fslocs.locations_count = 0;
705 new->ex_fslocs.migrated = 0; 705 new->ex_fslocs.migrated = 0;
@@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p)
1253 return 0; 1253 return 0;
1254 } 1254 }
1255 1255
1256 cache_get(&exp->h); 1256 exp_get(exp);
1257 if (cache_check(cd, &exp->h, NULL)) 1257 if (cache_check(cd, &exp->h, NULL))
1258 return 0; 1258 return 0;
1259 exp_put(exp); 1259 exp_put(exp);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index cfeea85c5bed..04dc8c167b0c 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp)
101 cache_put(&exp->h, exp->cd); 101 cache_put(&exp->h, exp->cd);
102} 102}
103 103
104static inline void exp_get(struct svc_export *exp) 104static inline struct svc_export *exp_get(struct svc_export *exp)
105{ 105{
106 cache_get(&exp->h); 106 cache_get(&exp->h);
107 return exp;
107} 108}
108struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); 109struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
109 110
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 2ed05c3cd43d..c16bf5af6831 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -17,81 +17,13 @@
17 17
18struct nfsd_fault_inject_op { 18struct nfsd_fault_inject_op {
19 char *file; 19 char *file;
20 u64 (*forget)(struct nfs4_client *, u64); 20 u64 (*get)(void);
21 u64 (*print)(struct nfs4_client *, u64); 21 u64 (*set_val)(u64);
22 u64 (*set_clnt)(struct sockaddr_storage *, size_t);
22}; 23};
23 24
24static struct nfsd_fault_inject_op inject_ops[] = {
25 {
26 .file = "forget_clients",
27 .forget = nfsd_forget_client,
28 .print = nfsd_print_client,
29 },
30 {
31 .file = "forget_locks",
32 .forget = nfsd_forget_client_locks,
33 .print = nfsd_print_client_locks,
34 },
35 {
36 .file = "forget_openowners",
37 .forget = nfsd_forget_client_openowners,
38 .print = nfsd_print_client_openowners,
39 },
40 {
41 .file = "forget_delegations",
42 .forget = nfsd_forget_client_delegations,
43 .print = nfsd_print_client_delegations,
44 },
45 {
46 .file = "recall_delegations",
47 .forget = nfsd_recall_client_delegations,
48 .print = nfsd_print_client_delegations,
49 },
50};
51
52static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
53static struct dentry *debug_dir; 25static struct dentry *debug_dir;
54 26
55static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
56{
57 u64 count = 0;
58
59 if (val == 0)
60 printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
61 else
62 printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
63
64 nfs4_lock_state();
65 count = nfsd_for_n_state(val, op->forget);
66 nfs4_unlock_state();
67 printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
68}
69
70static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
71 struct sockaddr_storage *addr,
72 size_t addr_size)
73{
74 char buf[INET6_ADDRSTRLEN];
75 struct nfs4_client *clp;
76 u64 count;
77
78 nfs4_lock_state();
79 clp = nfsd_find_client(addr, addr_size);
80 if (clp) {
81 count = op->forget(clp, 0);
82 rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
83 printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
84 }
85 nfs4_unlock_state();
86}
87
88static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
89{
90 nfs4_lock_state();
91 *val = nfsd_for_n_state(0, op->print);
92 nfs4_unlock_state();
93}
94
95static ssize_t fault_inject_read(struct file *file, char __user *buf, 27static ssize_t fault_inject_read(struct file *file, char __user *buf,
96 size_t len, loff_t *ppos) 28 size_t len, loff_t *ppos)
97{ 29{
@@ -99,9 +31,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
99 char read_buf[25]; 31 char read_buf[25];
100 size_t size; 32 size_t size;
101 loff_t pos = *ppos; 33 loff_t pos = *ppos;
34 struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
102 35
103 if (!pos) 36 if (!pos)
104 nfsd_inject_get(file_inode(file)->i_private, &val); 37 val = op->get();
105 size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); 38 size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
106 39
107 return simple_read_from_buffer(buf, len, ppos, read_buf, size); 40 return simple_read_from_buffer(buf, len, ppos, read_buf, size);
@@ -114,18 +47,36 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
114 size_t size = min(sizeof(write_buf) - 1, len); 47 size_t size = min(sizeof(write_buf) - 1, len);
115 struct net *net = current->nsproxy->net_ns; 48 struct net *net = current->nsproxy->net_ns;
116 struct sockaddr_storage sa; 49 struct sockaddr_storage sa;
50 struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
117 u64 val; 51 u64 val;
52 char *nl;
118 53
119 if (copy_from_user(write_buf, buf, size)) 54 if (copy_from_user(write_buf, buf, size))
120 return -EFAULT; 55 return -EFAULT;
121 write_buf[size] = '\0'; 56 write_buf[size] = '\0';
122 57
58 /* Deal with any embedded newlines in the string */
59 nl = strchr(write_buf, '\n');
60 if (nl) {
61 size = nl - write_buf;
62 *nl = '\0';
63 }
64
123 size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); 65 size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
124 if (size > 0) 66 if (size > 0) {
125 nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); 67 val = op->set_clnt(&sa, size);
126 else { 68 if (val)
69 pr_info("NFSD [%s]: Client %s had %llu state object(s)\n",
70 op->file, write_buf, val);
71 } else {
127 val = simple_strtoll(write_buf, NULL, 0); 72 val = simple_strtoll(write_buf, NULL, 0);
128 nfsd_inject_set(file_inode(file)->i_private, val); 73 if (val == 0)
74 pr_info("NFSD Fault Injection: %s (all)", op->file);
75 else
76 pr_info("NFSD Fault Injection: %s (n = %llu)",
77 op->file, val);
78 val = op->set_val(val);
79 pr_info("NFSD: %s: found %llu", op->file, val);
129 } 80 }
130 return len; /* on success, claim we got the whole input */ 81 return len; /* on success, claim we got the whole input */
131} 82}
@@ -141,6 +92,41 @@ void nfsd_fault_inject_cleanup(void)
141 debugfs_remove_recursive(debug_dir); 92 debugfs_remove_recursive(debug_dir);
142} 93}
143 94
95static struct nfsd_fault_inject_op inject_ops[] = {
96 {
97 .file = "forget_clients",
98 .get = nfsd_inject_print_clients,
99 .set_val = nfsd_inject_forget_clients,
100 .set_clnt = nfsd_inject_forget_client,
101 },
102 {
103 .file = "forget_locks",
104 .get = nfsd_inject_print_locks,
105 .set_val = nfsd_inject_forget_locks,
106 .set_clnt = nfsd_inject_forget_client_locks,
107 },
108 {
109 .file = "forget_openowners",
110 .get = nfsd_inject_print_openowners,
111 .set_val = nfsd_inject_forget_openowners,
112 .set_clnt = nfsd_inject_forget_client_openowners,
113 },
114 {
115 .file = "forget_delegations",
116 .get = nfsd_inject_print_delegations,
117 .set_val = nfsd_inject_forget_delegations,
118 .set_clnt = nfsd_inject_forget_client_delegations,
119 },
120 {
121 .file = "recall_delegations",
122 .get = nfsd_inject_print_delegations,
123 .set_val = nfsd_inject_recall_delegations,
124 .set_clnt = nfsd_inject_recall_client_delegations,
125 },
126};
127
128#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
129
144int nfsd_fault_inject_init(void) 130int nfsd_fault_inject_init(void)
145{ 131{
146 unsigned int i; 132 unsigned int i;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d32b3aa6600d..ea6749a32760 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -29,14 +29,19 @@
29#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) 29#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
30#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) 30#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
31 31
32#define LOCKOWNER_INO_HASH_BITS 8
33#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
34
35#define SESSION_HASH_SIZE 512 32#define SESSION_HASH_SIZE 512
36 33
37struct cld_net; 34struct cld_net;
38struct nfsd4_client_tracking_ops; 35struct nfsd4_client_tracking_ops;
39 36
37/*
38 * Represents a nfsd "container". With respect to nfsv4 state tracking, the
39 * fields of interest are the *_id_hashtbls and the *_name_tree. These track
40 * the nfs4_client objects by either short or long form clientid.
41 *
42 * Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean
43 * up expired clients and delegations within the container.
44 */
40struct nfsd_net { 45struct nfsd_net {
41 struct cld_net *cld_net; 46 struct cld_net *cld_net;
42 47
@@ -66,8 +71,6 @@ struct nfsd_net {
66 struct rb_root conf_name_tree; 71 struct rb_root conf_name_tree;
67 struct list_head *unconf_id_hashtbl; 72 struct list_head *unconf_id_hashtbl;
68 struct rb_root unconf_name_tree; 73 struct rb_root unconf_name_tree;
69 struct list_head *ownerstr_hashtbl;
70 struct list_head *lockowner_ino_hashtbl;
71 struct list_head *sessionid_hashtbl; 74 struct list_head *sessionid_hashtbl;
72 /* 75 /*
73 * client_lru holds client queue ordered by nfs4_client.cl_time 76 * client_lru holds client queue ordered by nfs4_client.cl_time
@@ -97,10 +100,16 @@ struct nfsd_net {
97 bool nfsd_net_up; 100 bool nfsd_net_up;
98 bool lockd_up; 101 bool lockd_up;
99 102
103 /* Time of server startup */
104 struct timeval nfssvc_boot;
105
100 /* 106 /*
101 * Time of server startup 107 * Max number of connections this nfsd container will allow. Defaults
108 * to '0' which is means that it bases this on the number of threads.
102 */ 109 */
103 struct timeval nfssvc_boot; 110 unsigned int max_connections;
111
112 u32 clientid_counter;
104 113
105 struct svc_serv *nfsd_serv; 114 struct svc_serv *nfsd_serv;
106}; 115};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12b023a7ab7d..ac54ea60b3f6 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
54 54
55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 55 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
56 acl = get_acl(inode, ACL_TYPE_ACCESS); 56 acl = get_acl(inode, ACL_TYPE_ACCESS);
57 if (IS_ERR(acl)) {
58 nfserr = nfserrno(PTR_ERR(acl));
59 goto fail;
60 }
61 if (acl == NULL) { 57 if (acl == NULL) {
62 /* Solaris returns the inode's minimum ACL. */ 58 /* Solaris returns the inode's minimum ACL. */
63 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 59 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
64 } 60 }
61 if (IS_ERR(acl)) {
62 nfserr = nfserrno(PTR_ERR(acl));
63 goto fail;
64 }
65 resp->acl_access = acl; 65 resp->acl_access = acl;
66 } 66 }
67 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { 67 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 2a514e21dc74..34cbbab6abd7 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
47 47
48 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 48 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
49 acl = get_acl(inode, ACL_TYPE_ACCESS); 49 acl = get_acl(inode, ACL_TYPE_ACCESS);
50 if (IS_ERR(acl)) {
51 nfserr = nfserrno(PTR_ERR(acl));
52 goto fail;
53 }
54 if (acl == NULL) { 50 if (acl == NULL) {
55 /* Solaris returns the inode's minimum ACL. */ 51 /* Solaris returns the inode's minimum ACL. */
56 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 52 acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
57 } 53 }
54 if (IS_ERR(acl)) {
55 nfserr = nfserrno(PTR_ERR(acl));
56 goto fail;
57 }
58 resp->acl_access = acl; 58 resp->acl_access = acl;
59 } 59 }
60 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { 60 if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 401289913130..fa2525b2e9d7 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
157 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) 157 * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
158 * + 1 (xdr opaque byte count) = 26 158 * + 1 (xdr opaque byte count) = 26
159 */ 159 */
160 160 resp->count = min(argp->count, max_blocksize);
161 resp->count = argp->count;
162 if (max_blocksize < resp->count)
163 resp->count = max_blocksize;
164
165 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); 161 svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
166 162
167 fh_copy(&resp->fh, &argp->fh); 163 fh_copy(&resp->fh, &argp->fh);
@@ -286,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
286 fh_copy(&resp->dirfh, &argp->ffh); 282 fh_copy(&resp->dirfh, &argp->ffh);
287 fh_init(&resp->fh, NFS3_FHSIZE); 283 fh_init(&resp->fh, NFS3_FHSIZE);
288 nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, 284 nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
289 argp->tname, argp->tlen, 285 argp->tname, &resp->fh);
290 &resp->fh, &argp->attrs);
291 RETURN_STATUS(nfserr); 286 RETURN_STATUS(nfserr);
292} 287}
293 288
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index e6c01e80325e..39c5eb3ad33a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap)
120 120
121 iap->ia_valid |= ATTR_SIZE; 121 iap->ia_valid |= ATTR_SIZE;
122 p = xdr_decode_hyper(p, &newsize); 122 p = xdr_decode_hyper(p, &newsize);
123 if (newsize <= NFS_OFFSET_MAX) 123 iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
124 iap->ia_size = newsize;
125 else
126 iap->ia_size = NFS_OFFSET_MAX;
127 } 124 }
128 if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ 125 if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
129 iap->ia_valid |= ATTR_ATIME; 126 iap->ia_valid |= ATTR_ATIME;
@@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
338 return 0; 335 return 0;
339 p = xdr_decode_hyper(p, &args->offset); 336 p = xdr_decode_hyper(p, &args->offset);
340 337
341 len = args->count = ntohl(*p++); 338 args->count = ntohl(*p++);
342 339 len = min(args->count, max_blocksize);
343 if (len > max_blocksize)
344 len = max_blocksize;
345 340
346 /* set up the kvec */ 341 /* set up the kvec */
347 v=0; 342 v=0;
@@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
349 struct page *p = *(rqstp->rq_next_page++); 344 struct page *p = *(rqstp->rq_next_page++);
350 345
351 rqstp->rq_vec[v].iov_base = page_address(p); 346 rqstp->rq_vec[v].iov_base = page_address(p);
352 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; 347 rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
353 len -= rqstp->rq_vec[v].iov_len; 348 len -= rqstp->rq_vec[v].iov_len;
354 v++; 349 v++;
355 } 350 }
@@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
484 } 479 }
485 /* now copy next page if there is one */ 480 /* now copy next page if there is one */
486 if (len && !avail && rqstp->rq_arg.page_len) { 481 if (len && !avail && rqstp->rq_arg.page_len) {
487 avail = rqstp->rq_arg.page_len; 482 avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
488 if (avail > PAGE_SIZE)
489 avail = PAGE_SIZE;
490 old = page_address(rqstp->rq_arg.pages[0]); 483 old = page_address(rqstp->rq_arg.pages[0]);
491 } 484 }
492 while (len && avail && *old) { 485 while (len && avail && *old) {
@@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
571 args->verf = p; p += 2; 564 args->verf = p; p += 2;
572 args->dircount = ~0; 565 args->dircount = ~0;
573 args->count = ntohl(*p++); 566 args->count = ntohl(*p++);
574 567 args->count = min_t(u32, args->count, PAGE_SIZE);
575 if (args->count > PAGE_SIZE)
576 args->count = PAGE_SIZE;
577
578 args->buffer = page_address(*(rqstp->rq_next_page++)); 568 args->buffer = page_address(*(rqstp->rq_next_page++));
579 569
580 return xdr_argsize_check(rqstp, p); 570 return xdr_argsize_check(rqstp, p);
@@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
595 args->dircount = ntohl(*p++); 585 args->dircount = ntohl(*p++);
596 args->count = ntohl(*p++); 586 args->count = ntohl(*p++);
597 587
598 len = (args->count > max_blocksize) ? max_blocksize : 588 len = args->count = min(args->count, max_blocksize);
599 args->count;
600 args->count = len;
601
602 while (len > 0) { 589 while (len > 0) {
603 struct page *p = *(rqstp->rq_next_page++); 590 struct page *p = *(rqstp->rq_next_page++);
604 if (!args->buffer) 591 if (!args->buffer)
@@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
913 */ 900 */
914 901
915 /* truncate filename if too long */ 902 /* truncate filename if too long */
916 if (namlen > NFS3_MAXNAMLEN) 903 namlen = min(namlen, NFS3_MAXNAMLEN);
917 namlen = NFS3_MAXNAMLEN;
918 904
919 slen = XDR_QUADLEN(namlen); 905 slen = XDR_QUADLEN(namlen);
920 elen = slen + NFS3_ENTRY_BAGGAGE 906 elen = slen + NFS3_ENTRY_BAGGAGE
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index d714156a19fd..59fd76651781 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -146,35 +146,43 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
146 int size = 0; 146 int size = 0;
147 147
148 pacl = get_acl(inode, ACL_TYPE_ACCESS); 148 pacl = get_acl(inode, ACL_TYPE_ACCESS);
149 if (!pacl) { 149 if (!pacl)
150 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 150 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
151 if (IS_ERR(pacl)) 151
152 return PTR_ERR(pacl); 152 if (IS_ERR(pacl))
153 } 153 return PTR_ERR(pacl);
154
154 /* allocate for worst case: one (deny, allow) pair each: */ 155 /* allocate for worst case: one (deny, allow) pair each: */
155 size += 2 * pacl->a_count; 156 size += 2 * pacl->a_count;
156 157
157 if (S_ISDIR(inode->i_mode)) { 158 if (S_ISDIR(inode->i_mode)) {
158 flags = NFS4_ACL_DIR; 159 flags = NFS4_ACL_DIR;
159 dpacl = get_acl(inode, ACL_TYPE_DEFAULT); 160 dpacl = get_acl(inode, ACL_TYPE_DEFAULT);
161 if (IS_ERR(dpacl)) {
162 error = PTR_ERR(dpacl);
163 goto rel_pacl;
164 }
165
160 if (dpacl) 166 if (dpacl)
161 size += 2 * dpacl->a_count; 167 size += 2 * dpacl->a_count;
162 } 168 }
163 169
164 *acl = nfs4_acl_new(size); 170 *acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL);
165 if (*acl == NULL) { 171 if (*acl == NULL) {
166 error = -ENOMEM; 172 error = -ENOMEM;
167 goto out; 173 goto out;
168 } 174 }
175 (*acl)->naces = 0;
169 176
170 _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); 177 _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT);
171 178
172 if (dpacl) 179 if (dpacl)
173 _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); 180 _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT);
174 181
175 out: 182out:
176 posix_acl_release(pacl);
177 posix_acl_release(dpacl); 183 posix_acl_release(dpacl);
184rel_pacl:
185 posix_acl_release(pacl);
178 return error; 186 return error;
179} 187}
180 188
@@ -872,16 +880,13 @@ ace2type(struct nfs4_ace *ace)
872 return -1; 880 return -1;
873} 881}
874 882
875struct nfs4_acl * 883/*
876nfs4_acl_new(int n) 884 * return the size of the struct nfs4_acl required to represent an acl
885 * with @entries entries.
886 */
887int nfs4_acl_bytes(int entries)
877{ 888{
878 struct nfs4_acl *acl; 889 return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace);
879
880 acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL);
881 if (acl == NULL)
882 return NULL;
883 acl->naces = 0;
884 return acl;
885} 890}
886 891
887static struct { 892static struct {
@@ -935,5 +940,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
935 return 0; 940 return 0;
936 } 941 }
937 WARN_ON_ONCE(1); 942 WARN_ON_ONCE(1);
938 return -1; 943 return nfserr_serverfault;
939} 944}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 2c73cae9899d..e0be57b0f79b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
337 p = xdr_reserve_space(xdr, 4); 337 p = xdr_reserve_space(xdr, 4);
338 *p++ = xdr_zero; /* truncate */ 338 *p++ = xdr_zero; /* truncate */
339 339
340 encode_nfs_fh4(xdr, &dp->dl_fh); 340 encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle);
341 341
342 hdr->nops++; 342 hdr->nops++;
343} 343}
@@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
678 (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) 678 (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
679 return -EINVAL; 679 return -EINVAL;
680 args.client_name = clp->cl_cred.cr_principal; 680 args.client_name = clp->cl_cred.cr_principal;
681 args.prognumber = conn->cb_prog, 681 args.prognumber = conn->cb_prog;
682 args.protocol = XPRT_TRANSPORT_TCP; 682 args.protocol = XPRT_TRANSPORT_TCP;
683 args.authflavor = clp->cl_cred.cr_flavor; 683 args.authflavor = clp->cl_cred.cr_flavor;
684 clp->cl_cb_ident = conn->cb_ident; 684 clp->cl_cb_ident = conn->cb_ident;
@@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
689 clp->cl_cb_session = ses; 689 clp->cl_cb_session = ses;
690 args.bc_xprt = conn->cb_xprt; 690 args.bc_xprt = conn->cb_xprt;
691 args.prognumber = clp->cl_cb_session->se_cb_prog; 691 args.prognumber = clp->cl_cb_session->se_cb_prog;
692 args.protocol = XPRT_TRANSPORT_BC_TCP; 692 args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
693 XPRT_TRANSPORT_BC;
693 args.authflavor = ses->se_cb_sec.flavor; 694 args.authflavor = ses->se_cb_sec.flavor;
694 } 695 }
695 /* Create RPC client */ 696 /* Create RPC client */
@@ -904,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata)
904 spin_lock(&clp->cl_lock); 905 spin_lock(&clp->cl_lock);
905 list_del(&cb->cb_per_client); 906 list_del(&cb->cb_per_client);
906 spin_unlock(&clp->cl_lock); 907 spin_unlock(&clp->cl_lock);
907 nfs4_put_delegation(dp); 908 nfs4_put_stid(&dp->dl_stid);
908 } 909 }
909} 910}
910 911
@@ -933,7 +934,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
933 set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); 934 set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
934 /* 935 /*
935 * Note this won't actually result in a null callback; 936 * Note this won't actually result in a null callback;
936 * instead, nfsd4_do_callback_rpc() will detect the killed 937 * instead, nfsd4_run_cb_null() will detect the killed
937 * client, destroy the rpc client, and stop: 938 * client, destroy the rpc client, and stop:
938 */ 939 */
939 do_probe_callback(clp); 940 do_probe_callback(clp);
@@ -1011,9 +1012,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
1011 run_nfsd4_cb(cb); 1012 run_nfsd4_cb(cb);
1012} 1013}
1013 1014
1014static void nfsd4_do_callback_rpc(struct work_struct *w) 1015static void
1016nfsd4_run_callback_rpc(struct nfsd4_callback *cb)
1015{ 1017{
1016 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
1017 struct nfs4_client *clp = cb->cb_clp; 1018 struct nfs4_client *clp = cb->cb_clp;
1018 struct rpc_clnt *clnt; 1019 struct rpc_clnt *clnt;
1019 1020
@@ -1031,9 +1032,22 @@ static void nfsd4_do_callback_rpc(struct work_struct *w)
1031 cb->cb_ops, cb); 1032 cb->cb_ops, cb);
1032} 1033}
1033 1034
1034void nfsd4_init_callback(struct nfsd4_callback *cb) 1035void
1036nfsd4_run_cb_null(struct work_struct *w)
1035{ 1037{
1036 INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); 1038 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1039 cb_work);
1040 nfsd4_run_callback_rpc(cb);
1041}
1042
1043void
1044nfsd4_run_cb_recall(struct work_struct *w)
1045{
1046 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
1047 cb_work);
1048
1049 nfsd4_prepare_cb_recall(cb->cb_op);
1050 nfsd4_run_callback_rpc(cb);
1037} 1051}
1038 1052
1039void nfsd4_cb_recall(struct nfs4_delegation *dp) 1053void nfsd4_cb_recall(struct nfs4_delegation *dp)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8f029db5d271..5e0dc528a0e8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
177 fh_put(dst); 177 fh_put(dst);
178 dget(src->fh_dentry); 178 dget(src->fh_dentry);
179 if (src->fh_export) 179 if (src->fh_export)
180 cache_get(&src->fh_export->h); 180 exp_get(src->fh_export);
181 *dst = *src; 181 *dst = *src;
182} 182}
183 183
@@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
385 if (nfsd4_has_session(cstate)) 385 if (nfsd4_has_session(cstate))
386 copy_clientid(&open->op_clientid, cstate->session); 386 copy_clientid(&open->op_clientid, cstate->session);
387 387
388 nfs4_lock_state();
389
390 /* check seqid for replay. set nfs4_owner */ 388 /* check seqid for replay. set nfs4_owner */
391 resp = rqstp->rq_resp; 389 resp = rqstp->rq_resp;
392 status = nfsd4_process_open1(&resp->cstate, open, nn); 390 status = nfsd4_process_open1(&resp->cstate, open, nn);
@@ -431,8 +429,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
431 break; 429 break;
432 case NFS4_OPEN_CLAIM_PREVIOUS: 430 case NFS4_OPEN_CLAIM_PREVIOUS:
433 status = nfs4_check_open_reclaim(&open->op_clientid, 431 status = nfs4_check_open_reclaim(&open->op_clientid,
434 cstate->minorversion, 432 cstate, nn);
435 nn);
436 if (status) 433 if (status)
437 goto out; 434 goto out;
438 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; 435 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
@@ -461,19 +458,17 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
461 * set, (2) sets open->op_stateid, (3) sets open->op_delegation. 458 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
462 */ 459 */
463 status = nfsd4_process_open2(rqstp, resfh, open); 460 status = nfsd4_process_open2(rqstp, resfh, open);
464 WARN_ON(status && open->op_created); 461 WARN(status && open->op_created,
462 "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
463 be32_to_cpu(status));
465out: 464out:
466 if (resfh && resfh != &cstate->current_fh) { 465 if (resfh && resfh != &cstate->current_fh) {
467 fh_dup2(&cstate->current_fh, resfh); 466 fh_dup2(&cstate->current_fh, resfh);
468 fh_put(resfh); 467 fh_put(resfh);
469 kfree(resfh); 468 kfree(resfh);
470 } 469 }
471 nfsd4_cleanup_open_state(open, status); 470 nfsd4_cleanup_open_state(cstate, open, status);
472 if (open->op_openowner && !nfsd4_has_session(cstate))
473 cstate->replay_owner = &open->op_openowner->oo_owner;
474 nfsd4_bump_seqid(cstate, status); 471 nfsd4_bump_seqid(cstate, status);
475 if (!cstate->replay_owner)
476 nfs4_unlock_state();
477 return status; 472 return status;
478} 473}
479 474
@@ -581,8 +576,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
581 __be32 verf[2]; 576 __be32 verf[2];
582 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 577 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
583 578
584 verf[0] = (__be32)nn->nfssvc_boot.tv_sec; 579 /*
585 verf[1] = (__be32)nn->nfssvc_boot.tv_usec; 580 * This is opaque to client, so no need to byte-swap. Use
581 * __force to keep sparse happy
582 */
583 verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
584 verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
586 memcpy(verifier->data, verf, sizeof(verifier->data)); 585 memcpy(verifier->data, verf, sizeof(verifier->data));
587} 586}
588 587
@@ -619,8 +618,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
619 case NF4LNK: 618 case NF4LNK:
620 status = nfsd_symlink(rqstp, &cstate->current_fh, 619 status = nfsd_symlink(rqstp, &cstate->current_fh,
621 create->cr_name, create->cr_namelen, 620 create->cr_name, create->cr_namelen,
622 create->cr_linkname, create->cr_linklen, 621 create->cr_data, &resfh);
623 &resfh, &create->cr_iattr);
624 break; 622 break;
625 623
626 case NF4BLK: 624 case NF4BLK:
@@ -909,8 +907,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
909 default: 907 default:
910 return nfserr_inval; 908 return nfserr_inval;
911 } 909 }
912 exp_get(cstate->current_fh.fh_export); 910
913 sin->sin_exp = cstate->current_fh.fh_export; 911 sin->sin_exp = exp_get(cstate->current_fh.fh_export);
914 fh_put(&cstate->current_fh); 912 fh_put(&cstate->current_fh);
915 return nfs_ok; 913 return nfs_ok;
916} 914}
@@ -1289,7 +1287,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1289 * Don't use the deferral mechanism for NFSv4; compounds make it 1287 * Don't use the deferral mechanism for NFSv4; compounds make it
1290 * too hard to avoid non-idempotency problems. 1288 * too hard to avoid non-idempotency problems.
1291 */ 1289 */
1292 rqstp->rq_usedeferral = 0; 1290 rqstp->rq_usedeferral = false;
1293 1291
1294 /* 1292 /*
1295 * According to RFC3010, this takes precedence over all other errors. 1293 * According to RFC3010, this takes precedence over all other errors.
@@ -1391,10 +1389,7 @@ encode_op:
1391 args->ops, args->opcnt, resp->opcnt, op->opnum, 1389 args->ops, args->opcnt, resp->opcnt, op->opnum,
1392 be32_to_cpu(status)); 1390 be32_to_cpu(status));
1393 1391
1394 if (cstate->replay_owner) { 1392 nfsd4_cstate_clear_replay(cstate);
1395 nfs4_unlock_state();
1396 cstate->replay_owner = NULL;
1397 }
1398 /* XXX Ugh, we need to get rid of this kind of special case: */ 1393 /* XXX Ugh, we need to get rid of this kind of special case: */
1399 if (op->opnum == OP_READ && op->u.read.rd_filp) 1394 if (op->opnum == OP_READ && op->u.read.rd_filp)
1400 fput(op->u.read.rd_filp); 1395 fput(op->u.read.rd_filp);
@@ -1408,7 +1403,7 @@ encode_op:
1408 BUG_ON(cstate->replay_owner); 1403 BUG_ON(cstate->replay_owner);
1409out: 1404out:
1410 /* Reset deferral mechanism for RPC deferrals */ 1405 /* Reset deferral mechanism for RPC deferrals */
1411 rqstp->rq_usedeferral = 1; 1406 rqstp->rq_usedeferral = true;
1412 dprintk("nfsv4 compound returned %d\n", ntohl(status)); 1407 dprintk("nfsv4 compound returned %d\n", ntohl(status));
1413 return status; 1408 return status;
1414} 1409}
@@ -1520,21 +1515,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1520 u32 maxcount = 0, rlen = 0; 1515 u32 maxcount = 0, rlen = 0;
1521 1516
1522 maxcount = svc_max_payload(rqstp); 1517 maxcount = svc_max_payload(rqstp);
1523 rlen = op->u.read.rd_length; 1518 rlen = min(op->u.read.rd_length, maxcount);
1524
1525 if (rlen > maxcount)
1526 rlen = maxcount;
1527 1519
1528 return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); 1520 return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
1529} 1521}
1530 1522
1531static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1523static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1532{ 1524{
1533 u32 maxcount = svc_max_payload(rqstp); 1525 u32 maxcount = 0, rlen = 0;
1534 u32 rlen = op->u.readdir.rd_maxcount;
1535 1526
1536 if (rlen > maxcount) 1527 maxcount = svc_max_payload(rqstp);
1537 rlen = maxcount; 1528 rlen = min(op->u.readdir.rd_maxcount, maxcount);
1538 1529
1539 return (op_encode_hdr_size + op_encode_verifier_maxsz + 1530 return (op_encode_hdr_size + op_encode_verifier_maxsz +
1540 XDR_QUADLEN(rlen)) * sizeof(__be32); 1531 XDR_QUADLEN(rlen)) * sizeof(__be32);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2204e1fe5725..2e80a59e7e91 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -70,13 +70,11 @@ static u64 current_sessionid = 1;
70#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t))) 70#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
71 71
72/* forward declarations */ 72/* forward declarations */
73static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); 73static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
74static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
74 75
75/* Locking: */ 76/* Locking: */
76 77
77/* Currently used for almost all code touching nfsv4 state: */
78static DEFINE_MUTEX(client_mutex);
79
80/* 78/*
81 * Currently used for the del_recall_lru and file hash table. In an 79 * Currently used for the del_recall_lru and file hash table. In an
82 * effort to decrease the scope of the client_mutex, this spinlock may 80 * effort to decrease the scope of the client_mutex, this spinlock may
@@ -84,18 +82,18 @@ static DEFINE_MUTEX(client_mutex);
84 */ 82 */
85static DEFINE_SPINLOCK(state_lock); 83static DEFINE_SPINLOCK(state_lock);
86 84
85/*
86 * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
87 * the refcount on the open stateid to drop.
88 */
89static DECLARE_WAIT_QUEUE_HEAD(close_wq);
90
87static struct kmem_cache *openowner_slab; 91static struct kmem_cache *openowner_slab;
88static struct kmem_cache *lockowner_slab; 92static struct kmem_cache *lockowner_slab;
89static struct kmem_cache *file_slab; 93static struct kmem_cache *file_slab;
90static struct kmem_cache *stateid_slab; 94static struct kmem_cache *stateid_slab;
91static struct kmem_cache *deleg_slab; 95static struct kmem_cache *deleg_slab;
92 96
93void
94nfs4_lock_state(void)
95{
96 mutex_lock(&client_mutex);
97}
98
99static void free_session(struct nfsd4_session *); 97static void free_session(struct nfsd4_session *);
100 98
101static bool is_session_dead(struct nfsd4_session *ses) 99static bool is_session_dead(struct nfsd4_session *ses)
@@ -103,12 +101,6 @@ static bool is_session_dead(struct nfsd4_session *ses)
103 return ses->se_flags & NFS4_SESSION_DEAD; 101 return ses->se_flags & NFS4_SESSION_DEAD;
104} 102}
105 103
106void nfsd4_put_session(struct nfsd4_session *ses)
107{
108 if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
109 free_session(ses);
110}
111
112static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) 104static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
113{ 105{
114 if (atomic_read(&ses->se_ref) > ref_held_by_me) 106 if (atomic_read(&ses->se_ref) > ref_held_by_me)
@@ -117,46 +109,17 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b
117 return nfs_ok; 109 return nfs_ok;
118} 110}
119 111
120static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
121{
122 if (is_session_dead(ses))
123 return nfserr_badsession;
124 atomic_inc(&ses->se_ref);
125 return nfs_ok;
126}
127
128void
129nfs4_unlock_state(void)
130{
131 mutex_unlock(&client_mutex);
132}
133
134static bool is_client_expired(struct nfs4_client *clp) 112static bool is_client_expired(struct nfs4_client *clp)
135{ 113{
136 return clp->cl_time == 0; 114 return clp->cl_time == 0;
137} 115}
138 116
139static __be32 mark_client_expired_locked(struct nfs4_client *clp) 117static __be32 get_client_locked(struct nfs4_client *clp)
140{
141 if (atomic_read(&clp->cl_refcount))
142 return nfserr_jukebox;
143 clp->cl_time = 0;
144 return nfs_ok;
145}
146
147static __be32 mark_client_expired(struct nfs4_client *clp)
148{ 118{
149 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 119 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
150 __be32 ret;
151 120
152 spin_lock(&nn->client_lock); 121 lockdep_assert_held(&nn->client_lock);
153 ret = mark_client_expired_locked(clp);
154 spin_unlock(&nn->client_lock);
155 return ret;
156}
157 122
158static __be32 get_client_locked(struct nfs4_client *clp)
159{
160 if (is_client_expired(clp)) 123 if (is_client_expired(clp))
161 return nfserr_expired; 124 return nfserr_expired;
162 atomic_inc(&clp->cl_refcount); 125 atomic_inc(&clp->cl_refcount);
@@ -197,13 +160,17 @@ renew_client(struct nfs4_client *clp)
197 160
198static void put_client_renew_locked(struct nfs4_client *clp) 161static void put_client_renew_locked(struct nfs4_client *clp)
199{ 162{
163 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
164
165 lockdep_assert_held(&nn->client_lock);
166
200 if (!atomic_dec_and_test(&clp->cl_refcount)) 167 if (!atomic_dec_and_test(&clp->cl_refcount))
201 return; 168 return;
202 if (!is_client_expired(clp)) 169 if (!is_client_expired(clp))
203 renew_client_locked(clp); 170 renew_client_locked(clp);
204} 171}
205 172
206void put_client_renew(struct nfs4_client *clp) 173static void put_client_renew(struct nfs4_client *clp)
207{ 174{
208 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 175 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
209 176
@@ -214,6 +181,79 @@ void put_client_renew(struct nfs4_client *clp)
214 spin_unlock(&nn->client_lock); 181 spin_unlock(&nn->client_lock);
215} 182}
216 183
184static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
185{
186 __be32 status;
187
188 if (is_session_dead(ses))
189 return nfserr_badsession;
190 status = get_client_locked(ses->se_client);
191 if (status)
192 return status;
193 atomic_inc(&ses->se_ref);
194 return nfs_ok;
195}
196
197static void nfsd4_put_session_locked(struct nfsd4_session *ses)
198{
199 struct nfs4_client *clp = ses->se_client;
200 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
201
202 lockdep_assert_held(&nn->client_lock);
203
204 if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
205 free_session(ses);
206 put_client_renew_locked(clp);
207}
208
209static void nfsd4_put_session(struct nfsd4_session *ses)
210{
211 struct nfs4_client *clp = ses->se_client;
212 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
213
214 spin_lock(&nn->client_lock);
215 nfsd4_put_session_locked(ses);
216 spin_unlock(&nn->client_lock);
217}
218
219static int
220same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
221{
222 return (sop->so_owner.len == owner->len) &&
223 0 == memcmp(sop->so_owner.data, owner->data, owner->len);
224}
225
226static struct nfs4_openowner *
227find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
228 struct nfs4_client *clp)
229{
230 struct nfs4_stateowner *so;
231
232 lockdep_assert_held(&clp->cl_lock);
233
234 list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval],
235 so_strhash) {
236 if (!so->so_is_open_owner)
237 continue;
238 if (same_owner_str(so, &open->op_owner)) {
239 atomic_inc(&so->so_count);
240 return openowner(so);
241 }
242 }
243 return NULL;
244}
245
246static struct nfs4_openowner *
247find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
248 struct nfs4_client *clp)
249{
250 struct nfs4_openowner *oo;
251
252 spin_lock(&clp->cl_lock);
253 oo = find_openstateowner_str_locked(hashval, open, clp);
254 spin_unlock(&clp->cl_lock);
255 return oo;
256}
217 257
218static inline u32 258static inline u32
219opaque_hashval(const void *ptr, int nbytes) 259opaque_hashval(const void *ptr, int nbytes)
@@ -236,10 +276,11 @@ static void nfsd4_free_file(struct nfs4_file *f)
236static inline void 276static inline void
237put_nfs4_file(struct nfs4_file *fi) 277put_nfs4_file(struct nfs4_file *fi)
238{ 278{
279 might_lock(&state_lock);
280
239 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { 281 if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
240 hlist_del(&fi->fi_hash); 282 hlist_del(&fi->fi_hash);
241 spin_unlock(&state_lock); 283 spin_unlock(&state_lock);
242 iput(fi->fi_inode);
243 nfsd4_free_file(fi); 284 nfsd4_free_file(fi);
244 } 285 }
245} 286}
@@ -250,7 +291,80 @@ get_nfs4_file(struct nfs4_file *fi)
250 atomic_inc(&fi->fi_ref); 291 atomic_inc(&fi->fi_ref);
251} 292}
252 293
253static int num_delegations; 294static struct file *
295__nfs4_get_fd(struct nfs4_file *f, int oflag)
296{
297 if (f->fi_fds[oflag])
298 return get_file(f->fi_fds[oflag]);
299 return NULL;
300}
301
302static struct file *
303find_writeable_file_locked(struct nfs4_file *f)
304{
305 struct file *ret;
306
307 lockdep_assert_held(&f->fi_lock);
308
309 ret = __nfs4_get_fd(f, O_WRONLY);
310 if (!ret)
311 ret = __nfs4_get_fd(f, O_RDWR);
312 return ret;
313}
314
315static struct file *
316find_writeable_file(struct nfs4_file *f)
317{
318 struct file *ret;
319
320 spin_lock(&f->fi_lock);
321 ret = find_writeable_file_locked(f);
322 spin_unlock(&f->fi_lock);
323
324 return ret;
325}
326
327static struct file *find_readable_file_locked(struct nfs4_file *f)
328{
329 struct file *ret;
330
331 lockdep_assert_held(&f->fi_lock);
332
333 ret = __nfs4_get_fd(f, O_RDONLY);
334 if (!ret)
335 ret = __nfs4_get_fd(f, O_RDWR);
336 return ret;
337}
338
339static struct file *
340find_readable_file(struct nfs4_file *f)
341{
342 struct file *ret;
343
344 spin_lock(&f->fi_lock);
345 ret = find_readable_file_locked(f);
346 spin_unlock(&f->fi_lock);
347
348 return ret;
349}
350
351static struct file *
352find_any_file(struct nfs4_file *f)
353{
354 struct file *ret;
355
356 spin_lock(&f->fi_lock);
357 ret = __nfs4_get_fd(f, O_RDWR);
358 if (!ret) {
359 ret = __nfs4_get_fd(f, O_WRONLY);
360 if (!ret)
361 ret = __nfs4_get_fd(f, O_RDONLY);
362 }
363 spin_unlock(&f->fi_lock);
364 return ret;
365}
366
367static atomic_long_t num_delegations;
254unsigned long max_delegations; 368unsigned long max_delegations;
255 369
256/* 370/*
@@ -262,12 +376,11 @@ unsigned long max_delegations;
262#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) 376#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
263#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) 377#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
264 378
265static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) 379static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
266{ 380{
267 unsigned int ret; 381 unsigned int ret;
268 382
269 ret = opaque_hashval(ownername->data, ownername->len); 383 ret = opaque_hashval(ownername->data, ownername->len);
270 ret += clientid;
271 return ret & OWNER_HASH_MASK; 384 return ret & OWNER_HASH_MASK;
272} 385}
273 386
@@ -275,75 +388,124 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
275#define FILE_HASH_BITS 8 388#define FILE_HASH_BITS 8
276#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) 389#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
277 390
278static unsigned int file_hashval(struct inode *ino) 391static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
392{
393 return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
394}
395
396static unsigned int file_hashval(struct knfsd_fh *fh)
397{
398 return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
399}
400
401static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
279{ 402{
280 /* XXX: why are we hashing on inode pointer, anyway? */ 403 return fh1->fh_size == fh2->fh_size &&
281 return hash_ptr(ino, FILE_HASH_BITS); 404 !memcmp(fh1->fh_base.fh_pad,
405 fh2->fh_base.fh_pad,
406 fh1->fh_size);
282} 407}
283 408
284static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; 409static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
285 410
286static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) 411static void
412__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
287{ 413{
288 WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); 414 lockdep_assert_held(&fp->fi_lock);
289 atomic_inc(&fp->fi_access[oflag]); 415
416 if (access & NFS4_SHARE_ACCESS_WRITE)
417 atomic_inc(&fp->fi_access[O_WRONLY]);
418 if (access & NFS4_SHARE_ACCESS_READ)
419 atomic_inc(&fp->fi_access[O_RDONLY]);
290} 420}
291 421
292static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) 422static __be32
423nfs4_file_get_access(struct nfs4_file *fp, u32 access)
293{ 424{
294 if (oflag == O_RDWR) { 425 lockdep_assert_held(&fp->fi_lock);
295 __nfs4_file_get_access(fp, O_RDONLY); 426
296 __nfs4_file_get_access(fp, O_WRONLY); 427 /* Does this access mode make sense? */
297 } else 428 if (access & ~NFS4_SHARE_ACCESS_BOTH)
298 __nfs4_file_get_access(fp, oflag); 429 return nfserr_inval;
430
431 /* Does it conflict with a deny mode already set? */
432 if ((access & fp->fi_share_deny) != 0)
433 return nfserr_share_denied;
434
435 __nfs4_file_get_access(fp, access);
436 return nfs_ok;
299} 437}
300 438
301static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) 439static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
302{ 440{
303 if (fp->fi_fds[oflag]) { 441 /* Common case is that there is no deny mode. */
304 fput(fp->fi_fds[oflag]); 442 if (deny) {
305 fp->fi_fds[oflag] = NULL; 443 /* Does this deny mode make sense? */
444 if (deny & ~NFS4_SHARE_DENY_BOTH)
445 return nfserr_inval;
446
447 if ((deny & NFS4_SHARE_DENY_READ) &&
448 atomic_read(&fp->fi_access[O_RDONLY]))
449 return nfserr_share_denied;
450
451 if ((deny & NFS4_SHARE_DENY_WRITE) &&
452 atomic_read(&fp->fi_access[O_WRONLY]))
453 return nfserr_share_denied;
306 } 454 }
455 return nfs_ok;
307} 456}
308 457
309static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) 458static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
310{ 459{
311 if (atomic_dec_and_test(&fp->fi_access[oflag])) { 460 might_lock(&fp->fi_lock);
312 nfs4_file_put_fd(fp, oflag); 461
462 if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
463 struct file *f1 = NULL;
464 struct file *f2 = NULL;
465
466 swap(f1, fp->fi_fds[oflag]);
313 if (atomic_read(&fp->fi_access[1 - oflag]) == 0) 467 if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
314 nfs4_file_put_fd(fp, O_RDWR); 468 swap(f2, fp->fi_fds[O_RDWR]);
469 spin_unlock(&fp->fi_lock);
470 if (f1)
471 fput(f1);
472 if (f2)
473 fput(f2);
315 } 474 }
316} 475}
317 476
318static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) 477static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
319{ 478{
320 if (oflag == O_RDWR) { 479 WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);
321 __nfs4_file_put_access(fp, O_RDONLY); 480
481 if (access & NFS4_SHARE_ACCESS_WRITE)
322 __nfs4_file_put_access(fp, O_WRONLY); 482 __nfs4_file_put_access(fp, O_WRONLY);
323 } else 483 if (access & NFS4_SHARE_ACCESS_READ)
324 __nfs4_file_put_access(fp, oflag); 484 __nfs4_file_put_access(fp, O_RDONLY);
325} 485}
326 486
327static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct 487static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
328kmem_cache *slab) 488 struct kmem_cache *slab)
329{ 489{
330 struct idr *stateids = &cl->cl_stateids;
331 struct nfs4_stid *stid; 490 struct nfs4_stid *stid;
332 int new_id; 491 int new_id;
333 492
334 stid = kmem_cache_alloc(slab, GFP_KERNEL); 493 stid = kmem_cache_zalloc(slab, GFP_KERNEL);
335 if (!stid) 494 if (!stid)
336 return NULL; 495 return NULL;
337 496
338 new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); 497 idr_preload(GFP_KERNEL);
498 spin_lock(&cl->cl_lock);
499 new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
500 spin_unlock(&cl->cl_lock);
501 idr_preload_end();
339 if (new_id < 0) 502 if (new_id < 0)
340 goto out_free; 503 goto out_free;
341 stid->sc_client = cl; 504 stid->sc_client = cl;
342 stid->sc_type = 0;
343 stid->sc_stateid.si_opaque.so_id = new_id; 505 stid->sc_stateid.si_opaque.so_id = new_id;
344 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; 506 stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
345 /* Will be incremented before return to client: */ 507 /* Will be incremented before return to client: */
346 stid->sc_stateid.si_generation = 0; 508 atomic_set(&stid->sc_count, 1);
347 509
348 /* 510 /*
349 * It shouldn't be a problem to reuse an opaque stateid value. 511 * It shouldn't be a problem to reuse an opaque stateid value.
@@ -360,9 +522,24 @@ out_free:
360 return NULL; 522 return NULL;
361} 523}
362 524
363static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) 525static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
364{ 526{
365 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); 527 struct nfs4_stid *stid;
528 struct nfs4_ol_stateid *stp;
529
530 stid = nfs4_alloc_stid(clp, stateid_slab);
531 if (!stid)
532 return NULL;
533
534 stp = openlockstateid(stid);
535 stp->st_stid.sc_free = nfs4_free_ol_stateid;
536 return stp;
537}
538
539static void nfs4_free_deleg(struct nfs4_stid *stid)
540{
541 kmem_cache_free(deleg_slab, stid);
542 atomic_long_dec(&num_delegations);
366} 543}
367 544
368/* 545/*
@@ -379,10 +556,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
379 * Each filter is 256 bits. We hash the filehandle to 32bit and use the 556 * Each filter is 256 bits. We hash the filehandle to 32bit and use the
380 * low 3 bytes as hash-table indices. 557 * low 3 bytes as hash-table indices.
381 * 558 *
382 * 'state_lock', which is always held when block_delegations() is called, 559 * 'blocked_delegations_lock', which is always taken in block_delegations(),
383 * is used to manage concurrent access. Testing does not need the lock 560 * is used to manage concurrent access. Testing does not need the lock
384 * except when swapping the two filters. 561 * except when swapping the two filters.
385 */ 562 */
563static DEFINE_SPINLOCK(blocked_delegations_lock);
386static struct bloom_pair { 564static struct bloom_pair {
387 int entries, old_entries; 565 int entries, old_entries;
388 time_t swap_time; 566 time_t swap_time;
@@ -398,7 +576,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
398 if (bd->entries == 0) 576 if (bd->entries == 0)
399 return 0; 577 return 0;
400 if (seconds_since_boot() - bd->swap_time > 30) { 578 if (seconds_since_boot() - bd->swap_time > 30) {
401 spin_lock(&state_lock); 579 spin_lock(&blocked_delegations_lock);
402 if (seconds_since_boot() - bd->swap_time > 30) { 580 if (seconds_since_boot() - bd->swap_time > 30) {
403 bd->entries -= bd->old_entries; 581 bd->entries -= bd->old_entries;
404 bd->old_entries = bd->entries; 582 bd->old_entries = bd->entries;
@@ -407,7 +585,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
407 bd->new = 1-bd->new; 585 bd->new = 1-bd->new;
408 bd->swap_time = seconds_since_boot(); 586 bd->swap_time = seconds_since_boot();
409 } 587 }
410 spin_unlock(&state_lock); 588 spin_unlock(&blocked_delegations_lock);
411 } 589 }
412 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); 590 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
413 if (test_bit(hash&255, bd->set[0]) && 591 if (test_bit(hash&255, bd->set[0]) &&
@@ -430,69 +608,73 @@ static void block_delegations(struct knfsd_fh *fh)
430 608
431 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); 609 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
432 610
611 spin_lock(&blocked_delegations_lock);
433 __set_bit(hash&255, bd->set[bd->new]); 612 __set_bit(hash&255, bd->set[bd->new]);
434 __set_bit((hash>>8)&255, bd->set[bd->new]); 613 __set_bit((hash>>8)&255, bd->set[bd->new]);
435 __set_bit((hash>>16)&255, bd->set[bd->new]); 614 __set_bit((hash>>16)&255, bd->set[bd->new]);
436 if (bd->entries == 0) 615 if (bd->entries == 0)
437 bd->swap_time = seconds_since_boot(); 616 bd->swap_time = seconds_since_boot();
438 bd->entries += 1; 617 bd->entries += 1;
618 spin_unlock(&blocked_delegations_lock);
439} 619}
440 620
441static struct nfs4_delegation * 621static struct nfs4_delegation *
442alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) 622alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
443{ 623{
444 struct nfs4_delegation *dp; 624 struct nfs4_delegation *dp;
625 long n;
445 626
446 dprintk("NFSD alloc_init_deleg\n"); 627 dprintk("NFSD alloc_init_deleg\n");
447 if (num_delegations > max_delegations) 628 n = atomic_long_inc_return(&num_delegations);
448 return NULL; 629 if (n < 0 || n > max_delegations)
630 goto out_dec;
449 if (delegation_blocked(&current_fh->fh_handle)) 631 if (delegation_blocked(&current_fh->fh_handle))
450 return NULL; 632 goto out_dec;
451 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); 633 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
452 if (dp == NULL) 634 if (dp == NULL)
453 return dp; 635 goto out_dec;
636
637 dp->dl_stid.sc_free = nfs4_free_deleg;
454 /* 638 /*
455 * delegation seqid's are never incremented. The 4.1 special 639 * delegation seqid's are never incremented. The 4.1 special
456 * meaning of seqid 0 isn't meaningful, really, but let's avoid 640 * meaning of seqid 0 isn't meaningful, really, but let's avoid
457 * 0 anyway just for consistency and use 1: 641 * 0 anyway just for consistency and use 1:
458 */ 642 */
459 dp->dl_stid.sc_stateid.si_generation = 1; 643 dp->dl_stid.sc_stateid.si_generation = 1;
460 num_delegations++;
461 INIT_LIST_HEAD(&dp->dl_perfile); 644 INIT_LIST_HEAD(&dp->dl_perfile);
462 INIT_LIST_HEAD(&dp->dl_perclnt); 645 INIT_LIST_HEAD(&dp->dl_perclnt);
463 INIT_LIST_HEAD(&dp->dl_recall_lru); 646 INIT_LIST_HEAD(&dp->dl_recall_lru);
464 dp->dl_file = NULL;
465 dp->dl_type = NFS4_OPEN_DELEGATE_READ; 647 dp->dl_type = NFS4_OPEN_DELEGATE_READ;
466 fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle); 648 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall);
467 dp->dl_time = 0;
468 atomic_set(&dp->dl_count, 1);
469 nfsd4_init_callback(&dp->dl_recall);
470 return dp; 649 return dp;
650out_dec:
651 atomic_long_dec(&num_delegations);
652 return NULL;
471} 653}
472 654
473static void remove_stid(struct nfs4_stid *s) 655void
656nfs4_put_stid(struct nfs4_stid *s)
474{ 657{
475 struct idr *stateids = &s->sc_client->cl_stateids; 658 struct nfs4_file *fp = s->sc_file;
659 struct nfs4_client *clp = s->sc_client;
476 660
477 idr_remove(stateids, s->sc_stateid.si_opaque.so_id); 661 might_lock(&clp->cl_lock);
478}
479 662
480static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) 663 if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
481{ 664 wake_up_all(&close_wq);
482 kmem_cache_free(slab, s); 665 return;
483}
484
485void
486nfs4_put_delegation(struct nfs4_delegation *dp)
487{
488 if (atomic_dec_and_test(&dp->dl_count)) {
489 nfs4_free_stid(deleg_slab, &dp->dl_stid);
490 num_delegations--;
491 } 666 }
667 idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
668 spin_unlock(&clp->cl_lock);
669 s->sc_free(s);
670 if (fp)
671 put_nfs4_file(fp);
492} 672}
493 673
494static void nfs4_put_deleg_lease(struct nfs4_file *fp) 674static void nfs4_put_deleg_lease(struct nfs4_file *fp)
495{ 675{
676 lockdep_assert_held(&state_lock);
677
496 if (!fp->fi_lease) 678 if (!fp->fi_lease)
497 return; 679 return;
498 if (atomic_dec_and_test(&fp->fi_delegees)) { 680 if (atomic_dec_and_test(&fp->fi_delegees)) {
@@ -512,54 +694,54 @@ static void
512hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) 694hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
513{ 695{
514 lockdep_assert_held(&state_lock); 696 lockdep_assert_held(&state_lock);
697 lockdep_assert_held(&fp->fi_lock);
515 698
699 atomic_inc(&dp->dl_stid.sc_count);
516 dp->dl_stid.sc_type = NFS4_DELEG_STID; 700 dp->dl_stid.sc_type = NFS4_DELEG_STID;
517 list_add(&dp->dl_perfile, &fp->fi_delegations); 701 list_add(&dp->dl_perfile, &fp->fi_delegations);
518 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); 702 list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
519} 703}
520 704
521/* Called under the state lock. */
522static void 705static void
523unhash_delegation(struct nfs4_delegation *dp) 706unhash_delegation_locked(struct nfs4_delegation *dp)
524{ 707{
525 spin_lock(&state_lock); 708 struct nfs4_file *fp = dp->dl_stid.sc_file;
526 list_del_init(&dp->dl_perclnt);
527 list_del_init(&dp->dl_perfile);
528 list_del_init(&dp->dl_recall_lru);
529 spin_unlock(&state_lock);
530 if (dp->dl_file) {
531 nfs4_put_deleg_lease(dp->dl_file);
532 put_nfs4_file(dp->dl_file);
533 dp->dl_file = NULL;
534 }
535}
536
537 709
710 lockdep_assert_held(&state_lock);
538 711
539static void destroy_revoked_delegation(struct nfs4_delegation *dp) 712 dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
540{ 713 /* Ensure that deleg break won't try to requeue it */
714 ++dp->dl_time;
715 spin_lock(&fp->fi_lock);
716 list_del_init(&dp->dl_perclnt);
541 list_del_init(&dp->dl_recall_lru); 717 list_del_init(&dp->dl_recall_lru);
542 remove_stid(&dp->dl_stid); 718 list_del_init(&dp->dl_perfile);
543 nfs4_put_delegation(dp); 719 spin_unlock(&fp->fi_lock);
720 if (fp)
721 nfs4_put_deleg_lease(fp);
544} 722}
545 723
546static void destroy_delegation(struct nfs4_delegation *dp) 724static void destroy_delegation(struct nfs4_delegation *dp)
547{ 725{
548 unhash_delegation(dp); 726 spin_lock(&state_lock);
549 remove_stid(&dp->dl_stid); 727 unhash_delegation_locked(dp);
550 nfs4_put_delegation(dp); 728 spin_unlock(&state_lock);
729 nfs4_put_stid(&dp->dl_stid);
551} 730}
552 731
553static void revoke_delegation(struct nfs4_delegation *dp) 732static void revoke_delegation(struct nfs4_delegation *dp)
554{ 733{
555 struct nfs4_client *clp = dp->dl_stid.sc_client; 734 struct nfs4_client *clp = dp->dl_stid.sc_client;
556 735
736 WARN_ON(!list_empty(&dp->dl_recall_lru));
737
557 if (clp->cl_minorversion == 0) 738 if (clp->cl_minorversion == 0)
558 destroy_delegation(dp); 739 nfs4_put_stid(&dp->dl_stid);
559 else { 740 else {
560 unhash_delegation(dp);
561 dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; 741 dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
742 spin_lock(&clp->cl_lock);
562 list_add(&dp->dl_recall_lru, &clp->cl_revoked); 743 list_add(&dp->dl_recall_lru, &clp->cl_revoked);
744 spin_unlock(&clp->cl_lock);
563 } 745 }
564} 746}
565 747
@@ -607,57 +789,62 @@ bmap_to_share_mode(unsigned long bmap) {
607 return access; 789 return access;
608} 790}
609 791
610static bool
611test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
612 unsigned int access, deny;
613
614 access = bmap_to_share_mode(stp->st_access_bmap);
615 deny = bmap_to_share_mode(stp->st_deny_bmap);
616 if ((access & open->op_share_deny) || (deny & open->op_share_access))
617 return false;
618 return true;
619}
620
621/* set share access for a given stateid */ 792/* set share access for a given stateid */
622static inline void 793static inline void
623set_access(u32 access, struct nfs4_ol_stateid *stp) 794set_access(u32 access, struct nfs4_ol_stateid *stp)
624{ 795{
625 __set_bit(access, &stp->st_access_bmap); 796 unsigned char mask = 1 << access;
797
798 WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
799 stp->st_access_bmap |= mask;
626} 800}
627 801
628/* clear share access for a given stateid */ 802/* clear share access for a given stateid */
629static inline void 803static inline void
630clear_access(u32 access, struct nfs4_ol_stateid *stp) 804clear_access(u32 access, struct nfs4_ol_stateid *stp)
631{ 805{
632 __clear_bit(access, &stp->st_access_bmap); 806 unsigned char mask = 1 << access;
807
808 WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
809 stp->st_access_bmap &= ~mask;
633} 810}
634 811
635/* test whether a given stateid has access */ 812/* test whether a given stateid has access */
636static inline bool 813static inline bool
637test_access(u32 access, struct nfs4_ol_stateid *stp) 814test_access(u32 access, struct nfs4_ol_stateid *stp)
638{ 815{
639 return test_bit(access, &stp->st_access_bmap); 816 unsigned char mask = 1 << access;
817
818 return (bool)(stp->st_access_bmap & mask);
640} 819}
641 820
642/* set share deny for a given stateid */ 821/* set share deny for a given stateid */
643static inline void 822static inline void
644set_deny(u32 access, struct nfs4_ol_stateid *stp) 823set_deny(u32 deny, struct nfs4_ol_stateid *stp)
645{ 824{
646 __set_bit(access, &stp->st_deny_bmap); 825 unsigned char mask = 1 << deny;
826
827 WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
828 stp->st_deny_bmap |= mask;
647} 829}
648 830
649/* clear share deny for a given stateid */ 831/* clear share deny for a given stateid */
650static inline void 832static inline void
651clear_deny(u32 access, struct nfs4_ol_stateid *stp) 833clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
652{ 834{
653 __clear_bit(access, &stp->st_deny_bmap); 835 unsigned char mask = 1 << deny;
836
837 WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
838 stp->st_deny_bmap &= ~mask;
654} 839}
655 840
656/* test whether a given stateid is denying specific access */ 841/* test whether a given stateid is denying specific access */
657static inline bool 842static inline bool
658test_deny(u32 access, struct nfs4_ol_stateid *stp) 843test_deny(u32 deny, struct nfs4_ol_stateid *stp)
659{ 844{
660 return test_bit(access, &stp->st_deny_bmap); 845 unsigned char mask = 1 << deny;
846
847 return (bool)(stp->st_deny_bmap & mask);
661} 848}
662 849
663static int nfs4_access_to_omode(u32 access) 850static int nfs4_access_to_omode(u32 access)
@@ -674,138 +861,283 @@ static int nfs4_access_to_omode(u32 access)
674 return O_RDONLY; 861 return O_RDONLY;
675} 862}
676 863
864/*
865 * A stateid that had a deny mode associated with it is being released
866 * or downgraded. Recalculate the deny mode on the file.
867 */
868static void
869recalculate_deny_mode(struct nfs4_file *fp)
870{
871 struct nfs4_ol_stateid *stp;
872
873 spin_lock(&fp->fi_lock);
874 fp->fi_share_deny = 0;
875 list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
876 fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
877 spin_unlock(&fp->fi_lock);
878}
879
880static void
881reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
882{
883 int i;
884 bool change = false;
885
886 for (i = 1; i < 4; i++) {
887 if ((i & deny) != i) {
888 change = true;
889 clear_deny(i, stp);
890 }
891 }
892
893 /* Recalculate per-file deny mode if there was a change */
894 if (change)
895 recalculate_deny_mode(stp->st_stid.sc_file);
896}
897
677/* release all access and file references for a given stateid */ 898/* release all access and file references for a given stateid */
678static void 899static void
679release_all_access(struct nfs4_ol_stateid *stp) 900release_all_access(struct nfs4_ol_stateid *stp)
680{ 901{
681 int i; 902 int i;
903 struct nfs4_file *fp = stp->st_stid.sc_file;
904
905 if (fp && stp->st_deny_bmap != 0)
906 recalculate_deny_mode(fp);
682 907
683 for (i = 1; i < 4; i++) { 908 for (i = 1; i < 4; i++) {
684 if (test_access(i, stp)) 909 if (test_access(i, stp))
685 nfs4_file_put_access(stp->st_file, 910 nfs4_file_put_access(stp->st_stid.sc_file, i);
686 nfs4_access_to_omode(i));
687 clear_access(i, stp); 911 clear_access(i, stp);
688 } 912 }
689} 913}
690 914
691static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) 915static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
692{ 916{
917 struct nfs4_client *clp = sop->so_client;
918
919 might_lock(&clp->cl_lock);
920
921 if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock))
922 return;
923 sop->so_ops->so_unhash(sop);
924 spin_unlock(&clp->cl_lock);
925 kfree(sop->so_owner.data);
926 sop->so_ops->so_free(sop);
927}
928
929static void unhash_ol_stateid(struct nfs4_ol_stateid *stp)
930{
931 struct nfs4_file *fp = stp->st_stid.sc_file;
932
933 lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
934
935 spin_lock(&fp->fi_lock);
693 list_del(&stp->st_perfile); 936 list_del(&stp->st_perfile);
937 spin_unlock(&fp->fi_lock);
694 list_del(&stp->st_perstateowner); 938 list_del(&stp->st_perstateowner);
695} 939}
696 940
697static void close_generic_stateid(struct nfs4_ol_stateid *stp) 941static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
698{ 942{
943 struct nfs4_ol_stateid *stp = openlockstateid(stid);
944
699 release_all_access(stp); 945 release_all_access(stp);
700 put_nfs4_file(stp->st_file); 946 if (stp->st_stateowner)
701 stp->st_file = NULL; 947 nfs4_put_stateowner(stp->st_stateowner);
948 kmem_cache_free(stateid_slab, stid);
702} 949}
703 950
704static void free_generic_stateid(struct nfs4_ol_stateid *stp) 951static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
705{ 952{
706 remove_stid(&stp->st_stid); 953 struct nfs4_ol_stateid *stp = openlockstateid(stid);
707 nfs4_free_stid(stateid_slab, &stp->st_stid); 954 struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
955 struct file *file;
956
957 file = find_any_file(stp->st_stid.sc_file);
958 if (file)
959 filp_close(file, (fl_owner_t)lo);
960 nfs4_free_ol_stateid(stid);
708} 961}
709 962
710static void release_lock_stateid(struct nfs4_ol_stateid *stp) 963/*
964 * Put the persistent reference to an already unhashed generic stateid, while
965 * holding the cl_lock. If it's the last reference, then put it onto the
966 * reaplist for later destruction.
967 */
968static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
969 struct list_head *reaplist)
711{ 970{
712 struct file *file; 971 struct nfs4_stid *s = &stp->st_stid;
972 struct nfs4_client *clp = s->sc_client;
973
974 lockdep_assert_held(&clp->cl_lock);
713 975
714 unhash_generic_stateid(stp); 976 WARN_ON_ONCE(!list_empty(&stp->st_locks));
977
978 if (!atomic_dec_and_test(&s->sc_count)) {
979 wake_up_all(&close_wq);
980 return;
981 }
982
983 idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
984 list_add(&stp->st_locks, reaplist);
985}
986
987static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
988{
989 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
990
991 lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
992
993 list_del_init(&stp->st_locks);
994 unhash_ol_stateid(stp);
715 unhash_stid(&stp->st_stid); 995 unhash_stid(&stp->st_stid);
716 file = find_any_file(stp->st_file);
717 if (file)
718 locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
719 close_generic_stateid(stp);
720 free_generic_stateid(stp);
721} 996}
722 997
723static void unhash_lockowner(struct nfs4_lockowner *lo) 998static void release_lock_stateid(struct nfs4_ol_stateid *stp)
724{ 999{
725 struct nfs4_ol_stateid *stp; 1000 struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
726 1001
727 list_del(&lo->lo_owner.so_strhash); 1002 spin_lock(&oo->oo_owner.so_client->cl_lock);
728 list_del(&lo->lo_perstateid); 1003 unhash_lock_stateid(stp);
729 list_del(&lo->lo_owner_ino_hash); 1004 spin_unlock(&oo->oo_owner.so_client->cl_lock);
730 while (!list_empty(&lo->lo_owner.so_stateids)) { 1005 nfs4_put_stid(&stp->st_stid);
731 stp = list_first_entry(&lo->lo_owner.so_stateids,
732 struct nfs4_ol_stateid, st_perstateowner);
733 release_lock_stateid(stp);
734 }
735} 1006}
736 1007
737static void nfs4_free_lockowner(struct nfs4_lockowner *lo) 1008static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
738{ 1009{
739 kfree(lo->lo_owner.so_owner.data); 1010 struct nfs4_client *clp = lo->lo_owner.so_client;
740 kmem_cache_free(lockowner_slab, lo); 1011
1012 lockdep_assert_held(&clp->cl_lock);
1013
1014 list_del_init(&lo->lo_owner.so_strhash);
1015}
1016
1017/*
1018 * Free a list of generic stateids that were collected earlier after being
1019 * fully unhashed.
1020 */
1021static void
1022free_ol_stateid_reaplist(struct list_head *reaplist)
1023{
1024 struct nfs4_ol_stateid *stp;
1025 struct nfs4_file *fp;
1026
1027 might_sleep();
1028
1029 while (!list_empty(reaplist)) {
1030 stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
1031 st_locks);
1032 list_del(&stp->st_locks);
1033 fp = stp->st_stid.sc_file;
1034 stp->st_stid.sc_free(&stp->st_stid);
1035 if (fp)
1036 put_nfs4_file(fp);
1037 }
741} 1038}
742 1039
743static void release_lockowner(struct nfs4_lockowner *lo) 1040static void release_lockowner(struct nfs4_lockowner *lo)
744{ 1041{
745 unhash_lockowner(lo); 1042 struct nfs4_client *clp = lo->lo_owner.so_client;
746 nfs4_free_lockowner(lo); 1043 struct nfs4_ol_stateid *stp;
1044 struct list_head reaplist;
1045
1046 INIT_LIST_HEAD(&reaplist);
1047
1048 spin_lock(&clp->cl_lock);
1049 unhash_lockowner_locked(lo);
1050 while (!list_empty(&lo->lo_owner.so_stateids)) {
1051 stp = list_first_entry(&lo->lo_owner.so_stateids,
1052 struct nfs4_ol_stateid, st_perstateowner);
1053 unhash_lock_stateid(stp);
1054 put_ol_stateid_locked(stp, &reaplist);
1055 }
1056 spin_unlock(&clp->cl_lock);
1057 free_ol_stateid_reaplist(&reaplist);
1058 nfs4_put_stateowner(&lo->lo_owner);
747} 1059}
748 1060
749static void 1061static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
750release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) 1062 struct list_head *reaplist)
751{ 1063{
752 struct nfs4_lockowner *lo; 1064 struct nfs4_ol_stateid *stp;
753 1065
754 while (!list_empty(&open_stp->st_lockowners)) { 1066 while (!list_empty(&open_stp->st_locks)) {
755 lo = list_entry(open_stp->st_lockowners.next, 1067 stp = list_entry(open_stp->st_locks.next,
756 struct nfs4_lockowner, lo_perstateid); 1068 struct nfs4_ol_stateid, st_locks);
757 release_lockowner(lo); 1069 unhash_lock_stateid(stp);
1070 put_ol_stateid_locked(stp, reaplist);
758 } 1071 }
759} 1072}
760 1073
761static void unhash_open_stateid(struct nfs4_ol_stateid *stp) 1074static void unhash_open_stateid(struct nfs4_ol_stateid *stp,
1075 struct list_head *reaplist)
762{ 1076{
763 unhash_generic_stateid(stp); 1077 lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
764 release_stateid_lockowners(stp); 1078
765 close_generic_stateid(stp); 1079 unhash_ol_stateid(stp);
1080 release_open_stateid_locks(stp, reaplist);
766} 1081}
767 1082
768static void release_open_stateid(struct nfs4_ol_stateid *stp) 1083static void release_open_stateid(struct nfs4_ol_stateid *stp)
769{ 1084{
770 unhash_open_stateid(stp); 1085 LIST_HEAD(reaplist);
771 free_generic_stateid(stp); 1086
1087 spin_lock(&stp->st_stid.sc_client->cl_lock);
1088 unhash_open_stateid(stp, &reaplist);
1089 put_ol_stateid_locked(stp, &reaplist);
1090 spin_unlock(&stp->st_stid.sc_client->cl_lock);
1091 free_ol_stateid_reaplist(&reaplist);
772} 1092}
773 1093
774static void unhash_openowner(struct nfs4_openowner *oo) 1094static void unhash_openowner_locked(struct nfs4_openowner *oo)
775{ 1095{
776 struct nfs4_ol_stateid *stp; 1096 struct nfs4_client *clp = oo->oo_owner.so_client;
777 1097
778 list_del(&oo->oo_owner.so_strhash); 1098 lockdep_assert_held(&clp->cl_lock);
779 list_del(&oo->oo_perclient); 1099
780 while (!list_empty(&oo->oo_owner.so_stateids)) { 1100 list_del_init(&oo->oo_owner.so_strhash);
781 stp = list_first_entry(&oo->oo_owner.so_stateids, 1101 list_del_init(&oo->oo_perclient);
782 struct nfs4_ol_stateid, st_perstateowner);
783 release_open_stateid(stp);
784 }
785} 1102}
786 1103
787static void release_last_closed_stateid(struct nfs4_openowner *oo) 1104static void release_last_closed_stateid(struct nfs4_openowner *oo)
788{ 1105{
789 struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; 1106 struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net,
1107 nfsd_net_id);
1108 struct nfs4_ol_stateid *s;
790 1109
1110 spin_lock(&nn->client_lock);
1111 s = oo->oo_last_closed_stid;
791 if (s) { 1112 if (s) {
792 free_generic_stateid(s); 1113 list_del_init(&oo->oo_close_lru);
793 oo->oo_last_closed_stid = NULL; 1114 oo->oo_last_closed_stid = NULL;
794 } 1115 }
795} 1116 spin_unlock(&nn->client_lock);
796 1117 if (s)
797static void nfs4_free_openowner(struct nfs4_openowner *oo) 1118 nfs4_put_stid(&s->st_stid);
798{
799 kfree(oo->oo_owner.so_owner.data);
800 kmem_cache_free(openowner_slab, oo);
801} 1119}
802 1120
803static void release_openowner(struct nfs4_openowner *oo) 1121static void release_openowner(struct nfs4_openowner *oo)
804{ 1122{
805 unhash_openowner(oo); 1123 struct nfs4_ol_stateid *stp;
806 list_del(&oo->oo_close_lru); 1124 struct nfs4_client *clp = oo->oo_owner.so_client;
1125 struct list_head reaplist;
1126
1127 INIT_LIST_HEAD(&reaplist);
1128
1129 spin_lock(&clp->cl_lock);
1130 unhash_openowner_locked(oo);
1131 while (!list_empty(&oo->oo_owner.so_stateids)) {
1132 stp = list_first_entry(&oo->oo_owner.so_stateids,
1133 struct nfs4_ol_stateid, st_perstateowner);
1134 unhash_open_stateid(stp, &reaplist);
1135 put_ol_stateid_locked(stp, &reaplist);
1136 }
1137 spin_unlock(&clp->cl_lock);
1138 free_ol_stateid_reaplist(&reaplist);
807 release_last_closed_stateid(oo); 1139 release_last_closed_stateid(oo);
808 nfs4_free_openowner(oo); 1140 nfs4_put_stateowner(&oo->oo_owner);
809} 1141}
810 1142
811static inline int 1143static inline int
@@ -842,7 +1174,7 @@ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
842 return; 1174 return;
843 1175
844 if (!seqid_mutating_err(ntohl(nfserr))) { 1176 if (!seqid_mutating_err(ntohl(nfserr))) {
845 cstate->replay_owner = NULL; 1177 nfsd4_cstate_clear_replay(cstate);
846 return; 1178 return;
847 } 1179 }
848 if (!so) 1180 if (!so)
@@ -1030,10 +1362,8 @@ static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, str
1030 if (ret) 1362 if (ret)
1031 /* oops; xprt is already down: */ 1363 /* oops; xprt is already down: */
1032 nfsd4_conn_lost(&conn->cn_xpt_user); 1364 nfsd4_conn_lost(&conn->cn_xpt_user);
1033 if (conn->cn_flags & NFS4_CDFC4_BACK) { 1365 /* We may have gained or lost a callback channel: */
1034 /* callback channel may be back up */ 1366 nfsd4_probe_callback_sync(ses->se_client);
1035 nfsd4_probe_callback(ses->se_client);
1036 }
1037} 1367}
1038 1368
1039static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) 1369static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses)
@@ -1073,9 +1403,6 @@ static void __free_session(struct nfsd4_session *ses)
1073 1403
1074static void free_session(struct nfsd4_session *ses) 1404static void free_session(struct nfsd4_session *ses)
1075{ 1405{
1076 struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
1077
1078 lockdep_assert_held(&nn->client_lock);
1079 nfsd4_del_conns(ses); 1406 nfsd4_del_conns(ses);
1080 nfsd4_put_drc_mem(&ses->se_fchannel); 1407 nfsd4_put_drc_mem(&ses->se_fchannel);
1081 __free_session(ses); 1408 __free_session(ses);
@@ -1097,12 +1424,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
1097 new->se_cb_sec = cses->cb_sec; 1424 new->se_cb_sec = cses->cb_sec;
1098 atomic_set(&new->se_ref, 0); 1425 atomic_set(&new->se_ref, 0);
1099 idx = hash_sessionid(&new->se_sessionid); 1426 idx = hash_sessionid(&new->se_sessionid);
1100 spin_lock(&nn->client_lock);
1101 list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); 1427 list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
1102 spin_lock(&clp->cl_lock); 1428 spin_lock(&clp->cl_lock);
1103 list_add(&new->se_perclnt, &clp->cl_sessions); 1429 list_add(&new->se_perclnt, &clp->cl_sessions);
1104 spin_unlock(&clp->cl_lock); 1430 spin_unlock(&clp->cl_lock);
1105 spin_unlock(&nn->client_lock);
1106 1431
1107 if (cses->flags & SESSION4_BACK_CHAN) { 1432 if (cses->flags & SESSION4_BACK_CHAN) {
1108 struct sockaddr *sa = svc_addr(rqstp); 1433 struct sockaddr *sa = svc_addr(rqstp);
@@ -1120,12 +1445,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
1120 1445
1121/* caller must hold client_lock */ 1446/* caller must hold client_lock */
1122static struct nfsd4_session * 1447static struct nfsd4_session *
1123find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) 1448__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
1124{ 1449{
1125 struct nfsd4_session *elem; 1450 struct nfsd4_session *elem;
1126 int idx; 1451 int idx;
1127 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 1452 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1128 1453
1454 lockdep_assert_held(&nn->client_lock);
1455
1129 dump_sessionid(__func__, sessionid); 1456 dump_sessionid(__func__, sessionid);
1130 idx = hash_sessionid(sessionid); 1457 idx = hash_sessionid(sessionid);
1131 /* Search in the appropriate list */ 1458 /* Search in the appropriate list */
@@ -1140,10 +1467,33 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
1140 return NULL; 1467 return NULL;
1141} 1468}
1142 1469
1470static struct nfsd4_session *
1471find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
1472 __be32 *ret)
1473{
1474 struct nfsd4_session *session;
1475 __be32 status = nfserr_badsession;
1476
1477 session = __find_in_sessionid_hashtbl(sessionid, net);
1478 if (!session)
1479 goto out;
1480 status = nfsd4_get_session_locked(session);
1481 if (status)
1482 session = NULL;
1483out:
1484 *ret = status;
1485 return session;
1486}
1487
1143/* caller must hold client_lock */ 1488/* caller must hold client_lock */
1144static void 1489static void
1145unhash_session(struct nfsd4_session *ses) 1490unhash_session(struct nfsd4_session *ses)
1146{ 1491{
1492 struct nfs4_client *clp = ses->se_client;
1493 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1494
1495 lockdep_assert_held(&nn->client_lock);
1496
1147 list_del(&ses->se_hash); 1497 list_del(&ses->se_hash);
1148 spin_lock(&ses->se_client->cl_lock); 1498 spin_lock(&ses->se_client->cl_lock);
1149 list_del(&ses->se_perclnt); 1499 list_del(&ses->se_perclnt);
@@ -1169,15 +1519,20 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
1169static struct nfs4_client *alloc_client(struct xdr_netobj name) 1519static struct nfs4_client *alloc_client(struct xdr_netobj name)
1170{ 1520{
1171 struct nfs4_client *clp; 1521 struct nfs4_client *clp;
1522 int i;
1172 1523
1173 clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); 1524 clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
1174 if (clp == NULL) 1525 if (clp == NULL)
1175 return NULL; 1526 return NULL;
1176 clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); 1527 clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
1177 if (clp->cl_name.data == NULL) { 1528 if (clp->cl_name.data == NULL)
1178 kfree(clp); 1529 goto err_no_name;
1179 return NULL; 1530 clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
1180 } 1531 OWNER_HASH_SIZE, GFP_KERNEL);
1532 if (!clp->cl_ownerstr_hashtbl)
1533 goto err_no_hashtbl;
1534 for (i = 0; i < OWNER_HASH_SIZE; i++)
1535 INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
1181 clp->cl_name.len = name.len; 1536 clp->cl_name.len = name.len;
1182 INIT_LIST_HEAD(&clp->cl_sessions); 1537 INIT_LIST_HEAD(&clp->cl_sessions);
1183 idr_init(&clp->cl_stateids); 1538 idr_init(&clp->cl_stateids);
@@ -1192,14 +1547,16 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1192 spin_lock_init(&clp->cl_lock); 1547 spin_lock_init(&clp->cl_lock);
1193 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1548 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1194 return clp; 1549 return clp;
1550err_no_hashtbl:
1551 kfree(clp->cl_name.data);
1552err_no_name:
1553 kfree(clp);
1554 return NULL;
1195} 1555}
1196 1556
1197static void 1557static void
1198free_client(struct nfs4_client *clp) 1558free_client(struct nfs4_client *clp)
1199{ 1559{
1200 struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
1201
1202 lockdep_assert_held(&nn->client_lock);
1203 while (!list_empty(&clp->cl_sessions)) { 1560 while (!list_empty(&clp->cl_sessions)) {
1204 struct nfsd4_session *ses; 1561 struct nfsd4_session *ses;
1205 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, 1562 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
@@ -1210,18 +1567,32 @@ free_client(struct nfs4_client *clp)
1210 } 1567 }
1211 rpc_destroy_wait_queue(&clp->cl_cb_waitq); 1568 rpc_destroy_wait_queue(&clp->cl_cb_waitq);
1212 free_svc_cred(&clp->cl_cred); 1569 free_svc_cred(&clp->cl_cred);
1570 kfree(clp->cl_ownerstr_hashtbl);
1213 kfree(clp->cl_name.data); 1571 kfree(clp->cl_name.data);
1214 idr_destroy(&clp->cl_stateids); 1572 idr_destroy(&clp->cl_stateids);
1215 kfree(clp); 1573 kfree(clp);
1216} 1574}
1217 1575
1218/* must be called under the client_lock */ 1576/* must be called under the client_lock */
1219static inline void 1577static void
1220unhash_client_locked(struct nfs4_client *clp) 1578unhash_client_locked(struct nfs4_client *clp)
1221{ 1579{
1580 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1222 struct nfsd4_session *ses; 1581 struct nfsd4_session *ses;
1223 1582
1224 list_del(&clp->cl_lru); 1583 lockdep_assert_held(&nn->client_lock);
1584
1585 /* Mark the client as expired! */
1586 clp->cl_time = 0;
1587 /* Make it invisible */
1588 if (!list_empty(&clp->cl_idhash)) {
1589 list_del_init(&clp->cl_idhash);
1590 if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
1591 rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
1592 else
1593 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1594 }
1595 list_del_init(&clp->cl_lru);
1225 spin_lock(&clp->cl_lock); 1596 spin_lock(&clp->cl_lock);
1226 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) 1597 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
1227 list_del_init(&ses->se_hash); 1598 list_del_init(&ses->se_hash);
@@ -1229,53 +1600,71 @@ unhash_client_locked(struct nfs4_client *clp)
1229} 1600}
1230 1601
1231static void 1602static void
1232destroy_client(struct nfs4_client *clp) 1603unhash_client(struct nfs4_client *clp)
1604{
1605 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1606
1607 spin_lock(&nn->client_lock);
1608 unhash_client_locked(clp);
1609 spin_unlock(&nn->client_lock);
1610}
1611
1612static __be32 mark_client_expired_locked(struct nfs4_client *clp)
1613{
1614 if (atomic_read(&clp->cl_refcount))
1615 return nfserr_jukebox;
1616 unhash_client_locked(clp);
1617 return nfs_ok;
1618}
1619
1620static void
1621__destroy_client(struct nfs4_client *clp)
1233{ 1622{
1234 struct nfs4_openowner *oo; 1623 struct nfs4_openowner *oo;
1235 struct nfs4_delegation *dp; 1624 struct nfs4_delegation *dp;
1236 struct list_head reaplist; 1625 struct list_head reaplist;
1237 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1238 1626
1239 INIT_LIST_HEAD(&reaplist); 1627 INIT_LIST_HEAD(&reaplist);
1240 spin_lock(&state_lock); 1628 spin_lock(&state_lock);
1241 while (!list_empty(&clp->cl_delegations)) { 1629 while (!list_empty(&clp->cl_delegations)) {
1242 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); 1630 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
1243 list_del_init(&dp->dl_perclnt); 1631 unhash_delegation_locked(dp);
1244 list_move(&dp->dl_recall_lru, &reaplist); 1632 list_add(&dp->dl_recall_lru, &reaplist);
1245 } 1633 }
1246 spin_unlock(&state_lock); 1634 spin_unlock(&state_lock);
1247 while (!list_empty(&reaplist)) { 1635 while (!list_empty(&reaplist)) {
1248 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1636 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1249 destroy_delegation(dp); 1637 list_del_init(&dp->dl_recall_lru);
1638 nfs4_put_stid(&dp->dl_stid);
1250 } 1639 }
1251 list_splice_init(&clp->cl_revoked, &reaplist); 1640 while (!list_empty(&clp->cl_revoked)) {
1252 while (!list_empty(&reaplist)) {
1253 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); 1641 dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1254 destroy_revoked_delegation(dp); 1642 list_del_init(&dp->dl_recall_lru);
1643 nfs4_put_stid(&dp->dl_stid);
1255 } 1644 }
1256 while (!list_empty(&clp->cl_openowners)) { 1645 while (!list_empty(&clp->cl_openowners)) {
1257 oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); 1646 oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
1647 atomic_inc(&oo->oo_owner.so_count);
1258 release_openowner(oo); 1648 release_openowner(oo);
1259 } 1649 }
1260 nfsd4_shutdown_callback(clp); 1650 nfsd4_shutdown_callback(clp);
1261 if (clp->cl_cb_conn.cb_xprt) 1651 if (clp->cl_cb_conn.cb_xprt)
1262 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 1652 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
1263 list_del(&clp->cl_idhash);
1264 if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
1265 rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
1266 else
1267 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1268 spin_lock(&nn->client_lock);
1269 unhash_client_locked(clp);
1270 WARN_ON_ONCE(atomic_read(&clp->cl_refcount));
1271 free_client(clp); 1653 free_client(clp);
1272 spin_unlock(&nn->client_lock); 1654}
1655
1656static void
1657destroy_client(struct nfs4_client *clp)
1658{
1659 unhash_client(clp);
1660 __destroy_client(clp);
1273} 1661}
1274 1662
1275static void expire_client(struct nfs4_client *clp) 1663static void expire_client(struct nfs4_client *clp)
1276{ 1664{
1665 unhash_client(clp);
1277 nfsd4_client_record_remove(clp); 1666 nfsd4_client_record_remove(clp);
1278 destroy_client(clp); 1667 __destroy_client(clp);
1279} 1668}
1280 1669
1281static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 1670static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -1408,25 +1797,28 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
1408 return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); 1797 return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
1409} 1798}
1410 1799
1411static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) 1800static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
1412{ 1801{
1413 static u32 current_clientid = 1; 1802 __be32 verf[2];
1414 1803
1415 clp->cl_clientid.cl_boot = nn->boot_time; 1804 /*
1416 clp->cl_clientid.cl_id = current_clientid++; 1805 * This is opaque to client, so no need to byte-swap. Use
1806 * __force to keep sparse happy
1807 */
1808 verf[0] = (__force __be32)get_seconds();
1809 verf[1] = (__force __be32)nn->clientid_counter;
1810 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1417} 1811}
1418 1812
1419static void gen_confirm(struct nfs4_client *clp) 1813static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
1420{ 1814{
1421 __be32 verf[2]; 1815 clp->cl_clientid.cl_boot = nn->boot_time;
1422 static u32 i; 1816 clp->cl_clientid.cl_id = nn->clientid_counter++;
1423 1817 gen_confirm(clp, nn);
1424 verf[0] = (__be32)get_seconds();
1425 verf[1] = (__be32)i++;
1426 memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
1427} 1818}
1428 1819
1429static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) 1820static struct nfs4_stid *
1821find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
1430{ 1822{
1431 struct nfs4_stid *ret; 1823 struct nfs4_stid *ret;
1432 1824
@@ -1436,16 +1828,21 @@ static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
1436 return ret; 1828 return ret;
1437} 1829}
1438 1830
1439static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) 1831static struct nfs4_stid *
1832find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
1440{ 1833{
1441 struct nfs4_stid *s; 1834 struct nfs4_stid *s;
1442 1835
1443 s = find_stateid(cl, t); 1836 spin_lock(&cl->cl_lock);
1444 if (!s) 1837 s = find_stateid_locked(cl, t);
1445 return NULL; 1838 if (s != NULL) {
1446 if (typemask & s->sc_type) 1839 if (typemask & s->sc_type)
1447 return s; 1840 atomic_inc(&s->sc_count);
1448 return NULL; 1841 else
1842 s = NULL;
1843 }
1844 spin_unlock(&cl->cl_lock);
1845 return s;
1449} 1846}
1450 1847
1451static struct nfs4_client *create_client(struct xdr_netobj name, 1848static struct nfs4_client *create_client(struct xdr_netobj name,
@@ -1455,7 +1852,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
1455 struct sockaddr *sa = svc_addr(rqstp); 1852 struct sockaddr *sa = svc_addr(rqstp);
1456 int ret; 1853 int ret;
1457 struct net *net = SVC_NET(rqstp); 1854 struct net *net = SVC_NET(rqstp);
1458 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1459 1855
1460 clp = alloc_client(name); 1856 clp = alloc_client(name);
1461 if (clp == NULL) 1857 if (clp == NULL)
@@ -1463,17 +1859,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
1463 1859
1464 ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); 1860 ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
1465 if (ret) { 1861 if (ret) {
1466 spin_lock(&nn->client_lock);
1467 free_client(clp); 1862 free_client(clp);
1468 spin_unlock(&nn->client_lock);
1469 return NULL; 1863 return NULL;
1470 } 1864 }
1471 nfsd4_init_callback(&clp->cl_cb_null); 1865 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null);
1472 clp->cl_time = get_seconds(); 1866 clp->cl_time = get_seconds();
1473 clear_bit(0, &clp->cl_cb_slot_busy); 1867 clear_bit(0, &clp->cl_cb_slot_busy);
1474 copy_verf(clp, verf); 1868 copy_verf(clp, verf);
1475 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); 1869 rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
1476 gen_confirm(clp);
1477 clp->cl_cb_session = NULL; 1870 clp->cl_cb_session = NULL;
1478 clp->net = net; 1871 clp->net = net;
1479 return clp; 1872 return clp;
@@ -1525,11 +1918,13 @@ add_to_unconfirmed(struct nfs4_client *clp)
1525 unsigned int idhashval; 1918 unsigned int idhashval;
1526 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 1919 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1527 1920
1921 lockdep_assert_held(&nn->client_lock);
1922
1528 clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); 1923 clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
1529 add_clp_to_name_tree(clp, &nn->unconf_name_tree); 1924 add_clp_to_name_tree(clp, &nn->unconf_name_tree);
1530 idhashval = clientid_hashval(clp->cl_clientid.cl_id); 1925 idhashval = clientid_hashval(clp->cl_clientid.cl_id);
1531 list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); 1926 list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
1532 renew_client(clp); 1927 renew_client_locked(clp);
1533} 1928}
1534 1929
1535static void 1930static void
@@ -1538,12 +1933,14 @@ move_to_confirmed(struct nfs4_client *clp)
1538 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); 1933 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
1539 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 1934 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1540 1935
1936 lockdep_assert_held(&nn->client_lock);
1937
1541 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 1938 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
1542 list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); 1939 list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
1543 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); 1940 rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1544 add_clp_to_name_tree(clp, &nn->conf_name_tree); 1941 add_clp_to_name_tree(clp, &nn->conf_name_tree);
1545 set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); 1942 set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
1546 renew_client(clp); 1943 renew_client_locked(clp);
1547} 1944}
1548 1945
1549static struct nfs4_client * 1946static struct nfs4_client *
@@ -1556,7 +1953,7 @@ find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
1556 if (same_clid(&clp->cl_clientid, clid)) { 1953 if (same_clid(&clp->cl_clientid, clid)) {
1557 if ((bool)clp->cl_minorversion != sessions) 1954 if ((bool)clp->cl_minorversion != sessions)
1558 return NULL; 1955 return NULL;
1559 renew_client(clp); 1956 renew_client_locked(clp);
1560 return clp; 1957 return clp;
1561 } 1958 }
1562 } 1959 }
@@ -1568,6 +1965,7 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
1568{ 1965{
1569 struct list_head *tbl = nn->conf_id_hashtbl; 1966 struct list_head *tbl = nn->conf_id_hashtbl;
1570 1967
1968 lockdep_assert_held(&nn->client_lock);
1571 return find_client_in_id_table(tbl, clid, sessions); 1969 return find_client_in_id_table(tbl, clid, sessions);
1572} 1970}
1573 1971
@@ -1576,6 +1974,7 @@ find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
1576{ 1974{
1577 struct list_head *tbl = nn->unconf_id_hashtbl; 1975 struct list_head *tbl = nn->unconf_id_hashtbl;
1578 1976
1977 lockdep_assert_held(&nn->client_lock);
1579 return find_client_in_id_table(tbl, clid, sessions); 1978 return find_client_in_id_table(tbl, clid, sessions);
1580} 1979}
1581 1980
@@ -1587,12 +1986,14 @@ static bool clp_used_exchangeid(struct nfs4_client *clp)
1587static struct nfs4_client * 1986static struct nfs4_client *
1588find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) 1987find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
1589{ 1988{
1989 lockdep_assert_held(&nn->client_lock);
1590 return find_clp_in_name_tree(name, &nn->conf_name_tree); 1990 return find_clp_in_name_tree(name, &nn->conf_name_tree);
1591} 1991}
1592 1992
1593static struct nfs4_client * 1993static struct nfs4_client *
1594find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) 1994find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
1595{ 1995{
1996 lockdep_assert_held(&nn->client_lock);
1596 return find_clp_in_name_tree(name, &nn->unconf_name_tree); 1997 return find_clp_in_name_tree(name, &nn->unconf_name_tree);
1597} 1998}
1598 1999
@@ -1642,7 +2043,7 @@ out_err:
1642/* 2043/*
1643 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. 2044 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
1644 */ 2045 */
1645void 2046static void
1646nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) 2047nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
1647{ 2048{
1648 struct xdr_buf *buf = resp->xdr.buf; 2049 struct xdr_buf *buf = resp->xdr.buf;
@@ -1758,7 +2159,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1758 struct nfsd4_compound_state *cstate, 2159 struct nfsd4_compound_state *cstate,
1759 struct nfsd4_exchange_id *exid) 2160 struct nfsd4_exchange_id *exid)
1760{ 2161{
1761 struct nfs4_client *unconf, *conf, *new; 2162 struct nfs4_client *conf, *new;
2163 struct nfs4_client *unconf = NULL;
1762 __be32 status; 2164 __be32 status;
1763 char addr_str[INET6_ADDRSTRLEN]; 2165 char addr_str[INET6_ADDRSTRLEN];
1764 nfs4_verifier verf = exid->verifier; 2166 nfs4_verifier verf = exid->verifier;
@@ -1787,8 +2189,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1787 return nfserr_encr_alg_unsupp; 2189 return nfserr_encr_alg_unsupp;
1788 } 2190 }
1789 2191
2192 new = create_client(exid->clname, rqstp, &verf);
2193 if (new == NULL)
2194 return nfserr_jukebox;
2195
1790 /* Cases below refer to rfc 5661 section 18.35.4: */ 2196 /* Cases below refer to rfc 5661 section 18.35.4: */
1791 nfs4_lock_state(); 2197 spin_lock(&nn->client_lock);
1792 conf = find_confirmed_client_by_name(&exid->clname, nn); 2198 conf = find_confirmed_client_by_name(&exid->clname, nn);
1793 if (conf) { 2199 if (conf) {
1794 bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); 2200 bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
@@ -1813,7 +2219,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1813 } 2219 }
1814 /* case 6 */ 2220 /* case 6 */
1815 exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; 2221 exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
1816 new = conf;
1817 goto out_copy; 2222 goto out_copy;
1818 } 2223 }
1819 if (!creds_match) { /* case 3 */ 2224 if (!creds_match) { /* case 3 */
@@ -1821,15 +2226,14 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1821 status = nfserr_clid_inuse; 2226 status = nfserr_clid_inuse;
1822 goto out; 2227 goto out;
1823 } 2228 }
1824 expire_client(conf);
1825 goto out_new; 2229 goto out_new;
1826 } 2230 }
1827 if (verfs_match) { /* case 2 */ 2231 if (verfs_match) { /* case 2 */
1828 conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; 2232 conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
1829 new = conf;
1830 goto out_copy; 2233 goto out_copy;
1831 } 2234 }
1832 /* case 5, client reboot */ 2235 /* case 5, client reboot */
2236 conf = NULL;
1833 goto out_new; 2237 goto out_new;
1834 } 2238 }
1835 2239
@@ -1840,33 +2244,38 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
1840 2244
1841 unconf = find_unconfirmed_client_by_name(&exid->clname, nn); 2245 unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
1842 if (unconf) /* case 4, possible retry or client restart */ 2246 if (unconf) /* case 4, possible retry or client restart */
1843 expire_client(unconf); 2247 unhash_client_locked(unconf);
1844 2248
1845 /* case 1 (normal case) */ 2249 /* case 1 (normal case) */
1846out_new: 2250out_new:
1847 new = create_client(exid->clname, rqstp, &verf); 2251 if (conf) {
1848 if (new == NULL) { 2252 status = mark_client_expired_locked(conf);
1849 status = nfserr_jukebox; 2253 if (status)
1850 goto out; 2254 goto out;
1851 } 2255 }
1852 new->cl_minorversion = cstate->minorversion; 2256 new->cl_minorversion = cstate->minorversion;
1853 new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); 2257 new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
1854 2258
1855 gen_clid(new, nn); 2259 gen_clid(new, nn);
1856 add_to_unconfirmed(new); 2260 add_to_unconfirmed(new);
2261 swap(new, conf);
1857out_copy: 2262out_copy:
1858 exid->clientid.cl_boot = new->cl_clientid.cl_boot; 2263 exid->clientid.cl_boot = conf->cl_clientid.cl_boot;
1859 exid->clientid.cl_id = new->cl_clientid.cl_id; 2264 exid->clientid.cl_id = conf->cl_clientid.cl_id;
1860 2265
1861 exid->seqid = new->cl_cs_slot.sl_seqid + 1; 2266 exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
1862 nfsd4_set_ex_flags(new, exid); 2267 nfsd4_set_ex_flags(conf, exid);
1863 2268
1864 dprintk("nfsd4_exchange_id seqid %d flags %x\n", 2269 dprintk("nfsd4_exchange_id seqid %d flags %x\n",
1865 new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); 2270 conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
1866 status = nfs_ok; 2271 status = nfs_ok;
1867 2272
1868out: 2273out:
1869 nfs4_unlock_state(); 2274 spin_unlock(&nn->client_lock);
2275 if (new)
2276 expire_client(new);
2277 if (unconf)
2278 expire_client(unconf);
1870 return status; 2279 return status;
1871} 2280}
1872 2281
@@ -2010,6 +2419,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2010{ 2419{
2011 struct sockaddr *sa = svc_addr(rqstp); 2420 struct sockaddr *sa = svc_addr(rqstp);
2012 struct nfs4_client *conf, *unconf; 2421 struct nfs4_client *conf, *unconf;
2422 struct nfs4_client *old = NULL;
2013 struct nfsd4_session *new; 2423 struct nfsd4_session *new;
2014 struct nfsd4_conn *conn; 2424 struct nfsd4_conn *conn;
2015 struct nfsd4_clid_slot *cs_slot = NULL; 2425 struct nfsd4_clid_slot *cs_slot = NULL;
@@ -2035,7 +2445,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2035 if (!conn) 2445 if (!conn)
2036 goto out_free_session; 2446 goto out_free_session;
2037 2447
2038 nfs4_lock_state(); 2448 spin_lock(&nn->client_lock);
2039 unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); 2449 unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
2040 conf = find_confirmed_client(&cr_ses->clientid, true, nn); 2450 conf = find_confirmed_client(&cr_ses->clientid, true, nn);
2041 WARN_ON_ONCE(conf && unconf); 2451 WARN_ON_ONCE(conf && unconf);
@@ -2054,7 +2464,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2054 goto out_free_conn; 2464 goto out_free_conn;
2055 } 2465 }
2056 } else if (unconf) { 2466 } else if (unconf) {
2057 struct nfs4_client *old;
2058 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 2467 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
2059 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 2468 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
2060 status = nfserr_clid_inuse; 2469 status = nfserr_clid_inuse;
@@ -2072,10 +2481,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2072 } 2481 }
2073 old = find_confirmed_client_by_name(&unconf->cl_name, nn); 2482 old = find_confirmed_client_by_name(&unconf->cl_name, nn);
2074 if (old) { 2483 if (old) {
2075 status = mark_client_expired(old); 2484 status = mark_client_expired_locked(old);
2076 if (status) 2485 if (status) {
2486 old = NULL;
2077 goto out_free_conn; 2487 goto out_free_conn;
2078 expire_client(old); 2488 }
2079 } 2489 }
2080 move_to_confirmed(unconf); 2490 move_to_confirmed(unconf);
2081 conf = unconf; 2491 conf = unconf;
@@ -2091,20 +2501,27 @@ nfsd4_create_session(struct svc_rqst *rqstp,
2091 cr_ses->flags &= ~SESSION4_RDMA; 2501 cr_ses->flags &= ~SESSION4_RDMA;
2092 2502
2093 init_session(rqstp, new, conf, cr_ses); 2503 init_session(rqstp, new, conf, cr_ses);
2094 nfsd4_init_conn(rqstp, conn, new); 2504 nfsd4_get_session_locked(new);
2095 2505
2096 memcpy(cr_ses->sessionid.data, new->se_sessionid.data, 2506 memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
2097 NFS4_MAX_SESSIONID_LEN); 2507 NFS4_MAX_SESSIONID_LEN);
2098 cs_slot->sl_seqid++; 2508 cs_slot->sl_seqid++;
2099 cr_ses->seqid = cs_slot->sl_seqid; 2509 cr_ses->seqid = cs_slot->sl_seqid;
2100 2510
2101 /* cache solo and embedded create sessions under the state lock */ 2511 /* cache solo and embedded create sessions under the client_lock */
2102 nfsd4_cache_create_session(cr_ses, cs_slot, status); 2512 nfsd4_cache_create_session(cr_ses, cs_slot, status);
2103 nfs4_unlock_state(); 2513 spin_unlock(&nn->client_lock);
2514 /* init connection and backchannel */
2515 nfsd4_init_conn(rqstp, conn, new);
2516 nfsd4_put_session(new);
2517 if (old)
2518 expire_client(old);
2104 return status; 2519 return status;
2105out_free_conn: 2520out_free_conn:
2106 nfs4_unlock_state(); 2521 spin_unlock(&nn->client_lock);
2107 free_conn(conn); 2522 free_conn(conn);
2523 if (old)
2524 expire_client(old);
2108out_free_session: 2525out_free_session:
2109 __free_session(new); 2526 __free_session(new);
2110out_release_drc_mem: 2527out_release_drc_mem:
@@ -2152,17 +2569,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
2152 __be32 status; 2569 __be32 status;
2153 struct nfsd4_conn *conn; 2570 struct nfsd4_conn *conn;
2154 struct nfsd4_session *session; 2571 struct nfsd4_session *session;
2155 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2572 struct net *net = SVC_NET(rqstp);
2573 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2156 2574
2157 if (!nfsd4_last_compound_op(rqstp)) 2575 if (!nfsd4_last_compound_op(rqstp))
2158 return nfserr_not_only_op; 2576 return nfserr_not_only_op;
2159 nfs4_lock_state();
2160 spin_lock(&nn->client_lock); 2577 spin_lock(&nn->client_lock);
2161 session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); 2578 session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status);
2162 spin_unlock(&nn->client_lock); 2579 spin_unlock(&nn->client_lock);
2163 status = nfserr_badsession;
2164 if (!session) 2580 if (!session)
2165 goto out; 2581 goto out_no_session;
2166 status = nfserr_wrong_cred; 2582 status = nfserr_wrong_cred;
2167 if (!mach_creds_match(session->se_client, rqstp)) 2583 if (!mach_creds_match(session->se_client, rqstp))
2168 goto out; 2584 goto out;
@@ -2176,7 +2592,8 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
2176 nfsd4_init_conn(rqstp, conn, session); 2592 nfsd4_init_conn(rqstp, conn, session);
2177 status = nfs_ok; 2593 status = nfs_ok;
2178out: 2594out:
2179 nfs4_unlock_state(); 2595 nfsd4_put_session(session);
2596out_no_session:
2180 return status; 2597 return status;
2181} 2598}
2182 2599
@@ -2195,9 +2612,9 @@ nfsd4_destroy_session(struct svc_rqst *r,
2195 struct nfsd4_session *ses; 2612 struct nfsd4_session *ses;
2196 __be32 status; 2613 __be32 status;
2197 int ref_held_by_me = 0; 2614 int ref_held_by_me = 0;
2198 struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); 2615 struct net *net = SVC_NET(r);
2616 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2199 2617
2200 nfs4_lock_state();
2201 status = nfserr_not_only_op; 2618 status = nfserr_not_only_op;
2202 if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { 2619 if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
2203 if (!nfsd4_last_compound_op(r)) 2620 if (!nfsd4_last_compound_op(r))
@@ -2206,14 +2623,12 @@ nfsd4_destroy_session(struct svc_rqst *r,
2206 } 2623 }
2207 dump_sessionid(__func__, &sessionid->sessionid); 2624 dump_sessionid(__func__, &sessionid->sessionid);
2208 spin_lock(&nn->client_lock); 2625 spin_lock(&nn->client_lock);
2209 ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); 2626 ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
2210 status = nfserr_badsession;
2211 if (!ses) 2627 if (!ses)
2212 goto out_client_lock; 2628 goto out_client_lock;
2213 status = nfserr_wrong_cred; 2629 status = nfserr_wrong_cred;
2214 if (!mach_creds_match(ses->se_client, r)) 2630 if (!mach_creds_match(ses->se_client, r))
2215 goto out_client_lock; 2631 goto out_put_session;
2216 nfsd4_get_session_locked(ses);
2217 status = mark_session_dead_locked(ses, 1 + ref_held_by_me); 2632 status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
2218 if (status) 2633 if (status)
2219 goto out_put_session; 2634 goto out_put_session;
@@ -2225,11 +2640,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
2225 spin_lock(&nn->client_lock); 2640 spin_lock(&nn->client_lock);
2226 status = nfs_ok; 2641 status = nfs_ok;
2227out_put_session: 2642out_put_session:
2228 nfsd4_put_session(ses); 2643 nfsd4_put_session_locked(ses);
2229out_client_lock: 2644out_client_lock:
2230 spin_unlock(&nn->client_lock); 2645 spin_unlock(&nn->client_lock);
2231out: 2646out:
2232 nfs4_unlock_state();
2233 return status; 2647 return status;
2234} 2648}
2235 2649
@@ -2300,7 +2714,8 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2300 struct nfsd4_conn *conn; 2714 struct nfsd4_conn *conn;
2301 __be32 status; 2715 __be32 status;
2302 int buflen; 2716 int buflen;
2303 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2717 struct net *net = SVC_NET(rqstp);
2718 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2304 2719
2305 if (resp->opcnt != 1) 2720 if (resp->opcnt != 1)
2306 return nfserr_sequence_pos; 2721 return nfserr_sequence_pos;
@@ -2314,17 +2729,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2314 return nfserr_jukebox; 2729 return nfserr_jukebox;
2315 2730
2316 spin_lock(&nn->client_lock); 2731 spin_lock(&nn->client_lock);
2317 status = nfserr_badsession; 2732 session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status);
2318 session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
2319 if (!session) 2733 if (!session)
2320 goto out_no_session; 2734 goto out_no_session;
2321 clp = session->se_client; 2735 clp = session->se_client;
2322 status = get_client_locked(clp);
2323 if (status)
2324 goto out_no_session;
2325 status = nfsd4_get_session_locked(session);
2326 if (status)
2327 goto out_put_client;
2328 2736
2329 status = nfserr_too_many_ops; 2737 status = nfserr_too_many_ops;
2330 if (nfsd4_session_too_many_ops(rqstp, session)) 2738 if (nfsd4_session_too_many_ops(rqstp, session))
@@ -2354,6 +2762,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2354 goto out_put_session; 2762 goto out_put_session;
2355 cstate->slot = slot; 2763 cstate->slot = slot;
2356 cstate->session = session; 2764 cstate->session = session;
2765 cstate->clp = clp;
2357 /* Return the cached reply status and set cstate->status 2766 /* Return the cached reply status and set cstate->status
2358 * for nfsd4_proc_compound processing */ 2767 * for nfsd4_proc_compound processing */
2359 status = nfsd4_replay_cache_entry(resp, seq); 2768 status = nfsd4_replay_cache_entry(resp, seq);
@@ -2388,6 +2797,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
2388 2797
2389 cstate->slot = slot; 2798 cstate->slot = slot;
2390 cstate->session = session; 2799 cstate->session = session;
2800 cstate->clp = clp;
2391 2801
2392out: 2802out:
2393 switch (clp->cl_cb_state) { 2803 switch (clp->cl_cb_state) {
@@ -2408,31 +2818,48 @@ out_no_session:
2408 spin_unlock(&nn->client_lock); 2818 spin_unlock(&nn->client_lock);
2409 return status; 2819 return status;
2410out_put_session: 2820out_put_session:
2411 nfsd4_put_session(session); 2821 nfsd4_put_session_locked(session);
2412out_put_client:
2413 put_client_renew_locked(clp);
2414 goto out_no_session; 2822 goto out_no_session;
2415} 2823}
2416 2824
2825void
2826nfsd4_sequence_done(struct nfsd4_compoundres *resp)
2827{
2828 struct nfsd4_compound_state *cs = &resp->cstate;
2829
2830 if (nfsd4_has_session(cs)) {
2831 if (cs->status != nfserr_replay_cache) {
2832 nfsd4_store_cache_entry(resp);
2833 cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
2834 }
2835 /* Drop session reference that was taken in nfsd4_sequence() */
2836 nfsd4_put_session(cs->session);
2837 } else if (cs->clp)
2838 put_client_renew(cs->clp);
2839}
2840
2417__be32 2841__be32
2418nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) 2842nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
2419{ 2843{
2420 struct nfs4_client *conf, *unconf, *clp; 2844 struct nfs4_client *conf, *unconf;
2845 struct nfs4_client *clp = NULL;
2421 __be32 status = 0; 2846 __be32 status = 0;
2422 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2847 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2423 2848
2424 nfs4_lock_state(); 2849 spin_lock(&nn->client_lock);
2425 unconf = find_unconfirmed_client(&dc->clientid, true, nn); 2850 unconf = find_unconfirmed_client(&dc->clientid, true, nn);
2426 conf = find_confirmed_client(&dc->clientid, true, nn); 2851 conf = find_confirmed_client(&dc->clientid, true, nn);
2427 WARN_ON_ONCE(conf && unconf); 2852 WARN_ON_ONCE(conf && unconf);
2428 2853
2429 if (conf) { 2854 if (conf) {
2430 clp = conf;
2431
2432 if (client_has_state(conf)) { 2855 if (client_has_state(conf)) {
2433 status = nfserr_clientid_busy; 2856 status = nfserr_clientid_busy;
2434 goto out; 2857 goto out;
2435 } 2858 }
2859 status = mark_client_expired_locked(conf);
2860 if (status)
2861 goto out;
2862 clp = conf;
2436 } else if (unconf) 2863 } else if (unconf)
2437 clp = unconf; 2864 clp = unconf;
2438 else { 2865 else {
@@ -2440,12 +2867,15 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2440 goto out; 2867 goto out;
2441 } 2868 }
2442 if (!mach_creds_match(clp, rqstp)) { 2869 if (!mach_creds_match(clp, rqstp)) {
2870 clp = NULL;
2443 status = nfserr_wrong_cred; 2871 status = nfserr_wrong_cred;
2444 goto out; 2872 goto out;
2445 } 2873 }
2446 expire_client(clp); 2874 unhash_client_locked(clp);
2447out: 2875out:
2448 nfs4_unlock_state(); 2876 spin_unlock(&nn->client_lock);
2877 if (clp)
2878 expire_client(clp);
2449 return status; 2879 return status;
2450} 2880}
2451 2881
@@ -2464,7 +2894,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2464 return nfs_ok; 2894 return nfs_ok;
2465 } 2895 }
2466 2896
2467 nfs4_lock_state();
2468 status = nfserr_complete_already; 2897 status = nfserr_complete_already;
2469 if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, 2898 if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
2470 &cstate->session->se_client->cl_flags)) 2899 &cstate->session->se_client->cl_flags))
@@ -2484,7 +2913,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
2484 status = nfs_ok; 2913 status = nfs_ok;
2485 nfsd4_client_record_create(cstate->session->se_client); 2914 nfsd4_client_record_create(cstate->session->se_client);
2486out: 2915out:
2487 nfs4_unlock_state();
2488 return status; 2916 return status;
2489} 2917}
2490 2918
@@ -2494,12 +2922,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2494{ 2922{
2495 struct xdr_netobj clname = setclid->se_name; 2923 struct xdr_netobj clname = setclid->se_name;
2496 nfs4_verifier clverifier = setclid->se_verf; 2924 nfs4_verifier clverifier = setclid->se_verf;
2497 struct nfs4_client *conf, *unconf, *new; 2925 struct nfs4_client *conf, *new;
2926 struct nfs4_client *unconf = NULL;
2498 __be32 status; 2927 __be32 status;
2499 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 2928 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2500 2929
2930 new = create_client(clname, rqstp, &clverifier);
2931 if (new == NULL)
2932 return nfserr_jukebox;
2501 /* Cases below refer to rfc 3530 section 14.2.33: */ 2933 /* Cases below refer to rfc 3530 section 14.2.33: */
2502 nfs4_lock_state(); 2934 spin_lock(&nn->client_lock);
2503 conf = find_confirmed_client_by_name(&clname, nn); 2935 conf = find_confirmed_client_by_name(&clname, nn);
2504 if (conf) { 2936 if (conf) {
2505 /* case 0: */ 2937 /* case 0: */
@@ -2517,11 +2949,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2517 } 2949 }
2518 unconf = find_unconfirmed_client_by_name(&clname, nn); 2950 unconf = find_unconfirmed_client_by_name(&clname, nn);
2519 if (unconf) 2951 if (unconf)
2520 expire_client(unconf); 2952 unhash_client_locked(unconf);
2521 status = nfserr_jukebox;
2522 new = create_client(clname, rqstp, &clverifier);
2523 if (new == NULL)
2524 goto out;
2525 if (conf && same_verf(&conf->cl_verifier, &clverifier)) 2953 if (conf && same_verf(&conf->cl_verifier, &clverifier))
2526 /* case 1: probable callback update */ 2954 /* case 1: probable callback update */
2527 copy_clid(new, conf); 2955 copy_clid(new, conf);
@@ -2533,9 +2961,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2533 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 2961 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
2534 setclid->se_clientid.cl_id = new->cl_clientid.cl_id; 2962 setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
2535 memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); 2963 memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
2964 new = NULL;
2536 status = nfs_ok; 2965 status = nfs_ok;
2537out: 2966out:
2538 nfs4_unlock_state(); 2967 spin_unlock(&nn->client_lock);
2968 if (new)
2969 free_client(new);
2970 if (unconf)
2971 expire_client(unconf);
2539 return status; 2972 return status;
2540} 2973}
2541 2974
@@ -2546,6 +2979,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2546 struct nfsd4_setclientid_confirm *setclientid_confirm) 2979 struct nfsd4_setclientid_confirm *setclientid_confirm)
2547{ 2980{
2548 struct nfs4_client *conf, *unconf; 2981 struct nfs4_client *conf, *unconf;
2982 struct nfs4_client *old = NULL;
2549 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 2983 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
2550 clientid_t * clid = &setclientid_confirm->sc_clientid; 2984 clientid_t * clid = &setclientid_confirm->sc_clientid;
2551 __be32 status; 2985 __be32 status;
@@ -2553,8 +2987,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2553 2987
2554 if (STALE_CLIENTID(clid, nn)) 2988 if (STALE_CLIENTID(clid, nn))
2555 return nfserr_stale_clientid; 2989 return nfserr_stale_clientid;
2556 nfs4_lock_state();
2557 2990
2991 spin_lock(&nn->client_lock);
2558 conf = find_confirmed_client(clid, false, nn); 2992 conf = find_confirmed_client(clid, false, nn);
2559 unconf = find_unconfirmed_client(clid, false, nn); 2993 unconf = find_unconfirmed_client(clid, false, nn);
2560 /* 2994 /*
@@ -2578,22 +3012,30 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
2578 } 3012 }
2579 status = nfs_ok; 3013 status = nfs_ok;
2580 if (conf) { /* case 1: callback update */ 3014 if (conf) { /* case 1: callback update */
3015 old = unconf;
3016 unhash_client_locked(old);
2581 nfsd4_change_callback(conf, &unconf->cl_cb_conn); 3017 nfsd4_change_callback(conf, &unconf->cl_cb_conn);
2582 nfsd4_probe_callback(conf);
2583 expire_client(unconf);
2584 } else { /* case 3: normal case; new or rebooted client */ 3018 } else { /* case 3: normal case; new or rebooted client */
2585 conf = find_confirmed_client_by_name(&unconf->cl_name, nn); 3019 old = find_confirmed_client_by_name(&unconf->cl_name, nn);
2586 if (conf) { 3020 if (old) {
2587 status = mark_client_expired(conf); 3021 status = mark_client_expired_locked(old);
2588 if (status) 3022 if (status) {
3023 old = NULL;
2589 goto out; 3024 goto out;
2590 expire_client(conf); 3025 }
2591 } 3026 }
2592 move_to_confirmed(unconf); 3027 move_to_confirmed(unconf);
2593 nfsd4_probe_callback(unconf); 3028 conf = unconf;
2594 } 3029 }
3030 get_client_locked(conf);
3031 spin_unlock(&nn->client_lock);
3032 nfsd4_probe_callback(conf);
3033 spin_lock(&nn->client_lock);
3034 put_client_renew_locked(conf);
2595out: 3035out:
2596 nfs4_unlock_state(); 3036 spin_unlock(&nn->client_lock);
3037 if (old)
3038 expire_client(old);
2597 return status; 3039 return status;
2598} 3040}
2599 3041
@@ -2603,21 +3045,23 @@ static struct nfs4_file *nfsd4_alloc_file(void)
2603} 3045}
2604 3046
2605/* OPEN Share state helper functions */ 3047/* OPEN Share state helper functions */
2606static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) 3048static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
2607{ 3049{
2608 unsigned int hashval = file_hashval(ino); 3050 unsigned int hashval = file_hashval(fh);
3051
3052 lockdep_assert_held(&state_lock);
2609 3053
2610 atomic_set(&fp->fi_ref, 1); 3054 atomic_set(&fp->fi_ref, 1);
3055 spin_lock_init(&fp->fi_lock);
2611 INIT_LIST_HEAD(&fp->fi_stateids); 3056 INIT_LIST_HEAD(&fp->fi_stateids);
2612 INIT_LIST_HEAD(&fp->fi_delegations); 3057 INIT_LIST_HEAD(&fp->fi_delegations);
2613 fp->fi_inode = igrab(ino); 3058 fh_copy_shallow(&fp->fi_fhandle, fh);
2614 fp->fi_had_conflict = false; 3059 fp->fi_had_conflict = false;
2615 fp->fi_lease = NULL; 3060 fp->fi_lease = NULL;
3061 fp->fi_share_deny = 0;
2616 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 3062 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
2617 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 3063 memset(fp->fi_access, 0, sizeof(fp->fi_access));
2618 spin_lock(&state_lock);
2619 hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); 3064 hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
2620 spin_unlock(&state_lock);
2621} 3065}
2622 3066
2623void 3067void
@@ -2673,6 +3117,28 @@ static void init_nfs4_replay(struct nfs4_replay *rp)
2673 rp->rp_status = nfserr_serverfault; 3117 rp->rp_status = nfserr_serverfault;
2674 rp->rp_buflen = 0; 3118 rp->rp_buflen = 0;
2675 rp->rp_buf = rp->rp_ibuf; 3119 rp->rp_buf = rp->rp_ibuf;
3120 mutex_init(&rp->rp_mutex);
3121}
3122
3123static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
3124 struct nfs4_stateowner *so)
3125{
3126 if (!nfsd4_has_session(cstate)) {
3127 mutex_lock(&so->so_replay.rp_mutex);
3128 cstate->replay_owner = so;
3129 atomic_inc(&so->so_count);
3130 }
3131}
3132
3133void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
3134{
3135 struct nfs4_stateowner *so = cstate->replay_owner;
3136
3137 if (so != NULL) {
3138 cstate->replay_owner = NULL;
3139 mutex_unlock(&so->so_replay.rp_mutex);
3140 nfs4_put_stateowner(so);
3141 }
2676} 3142}
2677 3143
2678static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) 3144static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
@@ -2693,111 +3159,172 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
2693 INIT_LIST_HEAD(&sop->so_stateids); 3159 INIT_LIST_HEAD(&sop->so_stateids);
2694 sop->so_client = clp; 3160 sop->so_client = clp;
2695 init_nfs4_replay(&sop->so_replay); 3161 init_nfs4_replay(&sop->so_replay);
3162 atomic_set(&sop->so_count, 1);
2696 return sop; 3163 return sop;
2697} 3164}
2698 3165
2699static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) 3166static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
2700{ 3167{
2701 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 3168 lockdep_assert_held(&clp->cl_lock);
2702 3169
2703 list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); 3170 list_add(&oo->oo_owner.so_strhash,
3171 &clp->cl_ownerstr_hashtbl[strhashval]);
2704 list_add(&oo->oo_perclient, &clp->cl_openowners); 3172 list_add(&oo->oo_perclient, &clp->cl_openowners);
2705} 3173}
2706 3174
3175static void nfs4_unhash_openowner(struct nfs4_stateowner *so)
3176{
3177 unhash_openowner_locked(openowner(so));
3178}
3179
3180static void nfs4_free_openowner(struct nfs4_stateowner *so)
3181{
3182 struct nfs4_openowner *oo = openowner(so);
3183
3184 kmem_cache_free(openowner_slab, oo);
3185}
3186
3187static const struct nfs4_stateowner_operations openowner_ops = {
3188 .so_unhash = nfs4_unhash_openowner,
3189 .so_free = nfs4_free_openowner,
3190};
3191
2707static struct nfs4_openowner * 3192static struct nfs4_openowner *
2708alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { 3193alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
2709 struct nfs4_openowner *oo; 3194 struct nfsd4_compound_state *cstate)
3195{
3196 struct nfs4_client *clp = cstate->clp;
3197 struct nfs4_openowner *oo, *ret;
2710 3198
2711 oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); 3199 oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
2712 if (!oo) 3200 if (!oo)
2713 return NULL; 3201 return NULL;
3202 oo->oo_owner.so_ops = &openowner_ops;
2714 oo->oo_owner.so_is_open_owner = 1; 3203 oo->oo_owner.so_is_open_owner = 1;
2715 oo->oo_owner.so_seqid = open->op_seqid; 3204 oo->oo_owner.so_seqid = open->op_seqid;
2716 oo->oo_flags = NFS4_OO_NEW; 3205 oo->oo_flags = 0;
3206 if (nfsd4_has_session(cstate))
3207 oo->oo_flags |= NFS4_OO_CONFIRMED;
2717 oo->oo_time = 0; 3208 oo->oo_time = 0;
2718 oo->oo_last_closed_stid = NULL; 3209 oo->oo_last_closed_stid = NULL;
2719 INIT_LIST_HEAD(&oo->oo_close_lru); 3210 INIT_LIST_HEAD(&oo->oo_close_lru);
2720 hash_openowner(oo, clp, strhashval); 3211 spin_lock(&clp->cl_lock);
3212 ret = find_openstateowner_str_locked(strhashval, open, clp);
3213 if (ret == NULL) {
3214 hash_openowner(oo, clp, strhashval);
3215 ret = oo;
3216 } else
3217 nfs4_free_openowner(&oo->oo_owner);
3218 spin_unlock(&clp->cl_lock);
2721 return oo; 3219 return oo;
2722} 3220}
2723 3221
2724static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { 3222static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
2725 struct nfs4_openowner *oo = open->op_openowner; 3223 struct nfs4_openowner *oo = open->op_openowner;
2726 3224
3225 atomic_inc(&stp->st_stid.sc_count);
2727 stp->st_stid.sc_type = NFS4_OPEN_STID; 3226 stp->st_stid.sc_type = NFS4_OPEN_STID;
2728 INIT_LIST_HEAD(&stp->st_lockowners); 3227 INIT_LIST_HEAD(&stp->st_locks);
2729 list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
2730 list_add(&stp->st_perfile, &fp->fi_stateids);
2731 stp->st_stateowner = &oo->oo_owner; 3228 stp->st_stateowner = &oo->oo_owner;
3229 atomic_inc(&stp->st_stateowner->so_count);
2732 get_nfs4_file(fp); 3230 get_nfs4_file(fp);
2733 stp->st_file = fp; 3231 stp->st_stid.sc_file = fp;
2734 stp->st_access_bmap = 0; 3232 stp->st_access_bmap = 0;
2735 stp->st_deny_bmap = 0; 3233 stp->st_deny_bmap = 0;
2736 set_access(open->op_share_access, stp);
2737 set_deny(open->op_share_deny, stp);
2738 stp->st_openstp = NULL; 3234 stp->st_openstp = NULL;
3235 spin_lock(&oo->oo_owner.so_client->cl_lock);
3236 list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
3237 spin_lock(&fp->fi_lock);
3238 list_add(&stp->st_perfile, &fp->fi_stateids);
3239 spin_unlock(&fp->fi_lock);
3240 spin_unlock(&oo->oo_owner.so_client->cl_lock);
2739} 3241}
2740 3242
3243/*
3244 * In the 4.0 case we need to keep the owners around a little while to handle
3245 * CLOSE replay. We still do need to release any file access that is held by
3246 * them before returning however.
3247 */
2741static void 3248static void
2742move_to_close_lru(struct nfs4_openowner *oo, struct net *net) 3249move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
2743{ 3250{
2744 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 3251 struct nfs4_ol_stateid *last;
3252 struct nfs4_openowner *oo = openowner(s->st_stateowner);
3253 struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
3254 nfsd_net_id);
2745 3255
2746 dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); 3256 dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
2747 3257
3258 /*
3259 * We know that we hold one reference via nfsd4_close, and another
3260 * "persistent" reference for the client. If the refcount is higher
3261 * than 2, then there are still calls in progress that are using this
3262 * stateid. We can't put the sc_file reference until they are finished.
3263 * Wait for the refcount to drop to 2. Since it has been unhashed,
3264 * there should be no danger of the refcount going back up again at
3265 * this point.
3266 */
3267 wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
3268
3269 release_all_access(s);
3270 if (s->st_stid.sc_file) {
3271 put_nfs4_file(s->st_stid.sc_file);
3272 s->st_stid.sc_file = NULL;
3273 }
3274
3275 spin_lock(&nn->client_lock);
3276 last = oo->oo_last_closed_stid;
3277 oo->oo_last_closed_stid = s;
2748 list_move_tail(&oo->oo_close_lru, &nn->close_lru); 3278 list_move_tail(&oo->oo_close_lru, &nn->close_lru);
2749 oo->oo_time = get_seconds(); 3279 oo->oo_time = get_seconds();
3280 spin_unlock(&nn->client_lock);
3281 if (last)
3282 nfs4_put_stid(&last->st_stid);
2750} 3283}
2751 3284
2752static int 3285/* search file_hashtbl[] for file */
2753same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, 3286static struct nfs4_file *
2754 clientid_t *clid) 3287find_file_locked(struct knfsd_fh *fh)
2755{ 3288{
2756 return (sop->so_owner.len == owner->len) && 3289 unsigned int hashval = file_hashval(fh);
2757 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && 3290 struct nfs4_file *fp;
2758 (sop->so_client->cl_clientid.cl_id == clid->cl_id);
2759}
2760 3291
2761static struct nfs4_openowner * 3292 lockdep_assert_held(&state_lock);
2762find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
2763 bool sessions, struct nfsd_net *nn)
2764{
2765 struct nfs4_stateowner *so;
2766 struct nfs4_openowner *oo;
2767 struct nfs4_client *clp;
2768 3293
2769 list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { 3294 hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
2770 if (!so->so_is_open_owner) 3295 if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
2771 continue; 3296 get_nfs4_file(fp);
2772 if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { 3297 return fp;
2773 oo = openowner(so);
2774 clp = oo->oo_owner.so_client;
2775 if ((bool)clp->cl_minorversion != sessions)
2776 return NULL;
2777 renew_client(oo->oo_owner.so_client);
2778 return oo;
2779 } 3298 }
2780 } 3299 }
2781 return NULL; 3300 return NULL;
2782} 3301}
2783 3302
2784/* search file_hashtbl[] for file */
2785static struct nfs4_file * 3303static struct nfs4_file *
2786find_file(struct inode *ino) 3304find_file(struct knfsd_fh *fh)
2787{ 3305{
2788 unsigned int hashval = file_hashval(ino);
2789 struct nfs4_file *fp; 3306 struct nfs4_file *fp;
2790 3307
2791 spin_lock(&state_lock); 3308 spin_lock(&state_lock);
2792 hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { 3309 fp = find_file_locked(fh);
2793 if (fp->fi_inode == ino) { 3310 spin_unlock(&state_lock);
2794 get_nfs4_file(fp); 3311 return fp;
2795 spin_unlock(&state_lock); 3312}
2796 return fp; 3313
2797 } 3314static struct nfs4_file *
3315find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
3316{
3317 struct nfs4_file *fp;
3318
3319 spin_lock(&state_lock);
3320 fp = find_file_locked(fh);
3321 if (fp == NULL) {
3322 nfsd4_init_file(new, fh);
3323 fp = new;
2798 } 3324 }
2799 spin_unlock(&state_lock); 3325 spin_unlock(&state_lock);
2800 return NULL; 3326
3327 return fp;
2801} 3328}
2802 3329
2803/* 3330/*
@@ -2807,47 +3334,53 @@ find_file(struct inode *ino)
2807static __be32 3334static __be32
2808nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) 3335nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
2809{ 3336{
2810 struct inode *ino = current_fh->fh_dentry->d_inode;
2811 struct nfs4_file *fp; 3337 struct nfs4_file *fp;
2812 struct nfs4_ol_stateid *stp; 3338 __be32 ret = nfs_ok;
2813 __be32 ret;
2814 3339
2815 fp = find_file(ino); 3340 fp = find_file(&current_fh->fh_handle);
2816 if (!fp) 3341 if (!fp)
2817 return nfs_ok; 3342 return ret;
2818 ret = nfserr_locked; 3343 /* Check for conflicting share reservations */
2819 /* Search for conflicting share reservations */ 3344 spin_lock(&fp->fi_lock);
2820 list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { 3345 if (fp->fi_share_deny & deny_type)
2821 if (test_deny(deny_type, stp) || 3346 ret = nfserr_locked;
2822 test_deny(NFS4_SHARE_DENY_BOTH, stp)) 3347 spin_unlock(&fp->fi_lock);
2823 goto out;
2824 }
2825 ret = nfs_ok;
2826out:
2827 put_nfs4_file(fp); 3348 put_nfs4_file(fp);
2828 return ret; 3349 return ret;
2829} 3350}
2830 3351
2831static void nfsd_break_one_deleg(struct nfs4_delegation *dp) 3352void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
2832{ 3353{
2833 struct nfs4_client *clp = dp->dl_stid.sc_client; 3354 struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
2834 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 3355 nfsd_net_id);
2835 3356
2836 lockdep_assert_held(&state_lock); 3357 block_delegations(&dp->dl_stid.sc_file->fi_fhandle);
2837 /* We're assuming the state code never drops its reference 3358
3359 /*
3360 * We can't do this in nfsd_break_deleg_cb because it is
3361 * already holding inode->i_lock.
3362 *
3363 * If the dl_time != 0, then we know that it has already been
3364 * queued for a lease break. Don't queue it again.
3365 */
3366 spin_lock(&state_lock);
3367 if (dp->dl_time == 0) {
3368 dp->dl_time = get_seconds();
3369 list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
3370 }
3371 spin_unlock(&state_lock);
3372}
3373
3374static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
3375{
3376 /*
3377 * We're assuming the state code never drops its reference
2838 * without first removing the lease. Since we're in this lease 3378 * without first removing the lease. Since we're in this lease
2839 * callback (and since the lease code is serialized by the kernel 3379 * callback (and since the lease code is serialized by the kernel
2840 * lock) we know the server hasn't removed the lease yet, we know 3380 * lock) we know the server hasn't removed the lease yet, we know
2841 * it's safe to take a reference: */ 3381 * it's safe to take a reference.
2842 atomic_inc(&dp->dl_count); 3382 */
2843 3383 atomic_inc(&dp->dl_stid.sc_count);
2844 list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
2845
2846 /* Only place dl_time is set; protected by i_lock: */
2847 dp->dl_time = get_seconds();
2848
2849 block_delegations(&dp->dl_fh);
2850
2851 nfsd4_cb_recall(dp); 3384 nfsd4_cb_recall(dp);
2852} 3385}
2853 3386
@@ -2872,11 +3405,20 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
2872 */ 3405 */
2873 fl->fl_break_time = 0; 3406 fl->fl_break_time = 0;
2874 3407
2875 spin_lock(&state_lock); 3408 spin_lock(&fp->fi_lock);
2876 fp->fi_had_conflict = true; 3409 fp->fi_had_conflict = true;
2877 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) 3410 /*
2878 nfsd_break_one_deleg(dp); 3411 * If there are no delegations on the list, then we can't count on this
2879 spin_unlock(&state_lock); 3412 * lease ever being cleaned up. Set the fl_break_time to jiffies so that
3413 * time_out_leases will do it ASAP. The fact that fi_had_conflict is now
3414 * true should keep any new delegations from being hashed.
3415 */
3416 if (list_empty(&fp->fi_delegations))
3417 fl->fl_break_time = jiffies;
3418 else
3419 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
3420 nfsd_break_one_deleg(dp);
3421 spin_unlock(&fp->fi_lock);
2880} 3422}
2881 3423
2882static 3424static
@@ -2904,6 +3446,42 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
2904 return nfserr_bad_seqid; 3446 return nfserr_bad_seqid;
2905} 3447}
2906 3448
3449static __be32 lookup_clientid(clientid_t *clid,
3450 struct nfsd4_compound_state *cstate,
3451 struct nfsd_net *nn)
3452{
3453 struct nfs4_client *found;
3454
3455 if (cstate->clp) {
3456 found = cstate->clp;
3457 if (!same_clid(&found->cl_clientid, clid))
3458 return nfserr_stale_clientid;
3459 return nfs_ok;
3460 }
3461
3462 if (STALE_CLIENTID(clid, nn))
3463 return nfserr_stale_clientid;
3464
3465 /*
3466 * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
3467 * cached already then we know this is for is for v4.0 and "sessions"
3468 * will be false.
3469 */
3470 WARN_ON_ONCE(cstate->session);
3471 spin_lock(&nn->client_lock);
3472 found = find_confirmed_client(clid, false, nn);
3473 if (!found) {
3474 spin_unlock(&nn->client_lock);
3475 return nfserr_expired;
3476 }
3477 atomic_inc(&found->cl_refcount);
3478 spin_unlock(&nn->client_lock);
3479
3480 /* Cache the nfs4_client in cstate! */
3481 cstate->clp = found;
3482 return nfs_ok;
3483}
3484
2907__be32 3485__be32
2908nfsd4_process_open1(struct nfsd4_compound_state *cstate, 3486nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2909 struct nfsd4_open *open, struct nfsd_net *nn) 3487 struct nfsd4_open *open, struct nfsd_net *nn)
@@ -2924,19 +3502,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2924 if (open->op_file == NULL) 3502 if (open->op_file == NULL)
2925 return nfserr_jukebox; 3503 return nfserr_jukebox;
2926 3504
2927 strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); 3505 status = lookup_clientid(clientid, cstate, nn);
2928 oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); 3506 if (status)
3507 return status;
3508 clp = cstate->clp;
3509
3510 strhashval = ownerstr_hashval(&open->op_owner);
3511 oo = find_openstateowner_str(strhashval, open, clp);
2929 open->op_openowner = oo; 3512 open->op_openowner = oo;
2930 if (!oo) { 3513 if (!oo) {
2931 clp = find_confirmed_client(clientid, cstate->minorversion,
2932 nn);
2933 if (clp == NULL)
2934 return nfserr_expired;
2935 goto new_owner; 3514 goto new_owner;
2936 } 3515 }
2937 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { 3516 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
2938 /* Replace unconfirmed owners without checking for replay. */ 3517 /* Replace unconfirmed owners without checking for replay. */
2939 clp = oo->oo_owner.so_client;
2940 release_openowner(oo); 3518 release_openowner(oo);
2941 open->op_openowner = NULL; 3519 open->op_openowner = NULL;
2942 goto new_owner; 3520 goto new_owner;
@@ -2944,15 +3522,14 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
2944 status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); 3522 status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
2945 if (status) 3523 if (status)
2946 return status; 3524 return status;
2947 clp = oo->oo_owner.so_client;
2948 goto alloc_stateid; 3525 goto alloc_stateid;
2949new_owner: 3526new_owner:
2950 oo = alloc_init_open_stateowner(strhashval, clp, open); 3527 oo = alloc_init_open_stateowner(strhashval, open, cstate);
2951 if (oo == NULL) 3528 if (oo == NULL)
2952 return nfserr_jukebox; 3529 return nfserr_jukebox;
2953 open->op_openowner = oo; 3530 open->op_openowner = oo;
2954alloc_stateid: 3531alloc_stateid:
2955 open->op_stp = nfs4_alloc_stateid(clp); 3532 open->op_stp = nfs4_alloc_open_stateid(clp);
2956 if (!open->op_stp) 3533 if (!open->op_stp)
2957 return nfserr_jukebox; 3534 return nfserr_jukebox;
2958 return nfs_ok; 3535 return nfs_ok;
@@ -2994,14 +3571,18 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
2994{ 3571{
2995 int flags; 3572 int flags;
2996 __be32 status = nfserr_bad_stateid; 3573 __be32 status = nfserr_bad_stateid;
3574 struct nfs4_delegation *deleg;
2997 3575
2998 *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); 3576 deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
2999 if (*dp == NULL) 3577 if (deleg == NULL)
3000 goto out; 3578 goto out;
3001 flags = share_access_to_flags(open->op_share_access); 3579 flags = share_access_to_flags(open->op_share_access);
3002 status = nfs4_check_delegmode(*dp, flags); 3580 status = nfs4_check_delegmode(deleg, flags);
3003 if (status) 3581 if (status) {
3004 *dp = NULL; 3582 nfs4_put_stid(&deleg->dl_stid);
3583 goto out;
3584 }
3585 *dp = deleg;
3005out: 3586out:
3006 if (!nfsd4_is_deleg_cur(open)) 3587 if (!nfsd4_is_deleg_cur(open))
3007 return nfs_ok; 3588 return nfs_ok;
@@ -3011,24 +3592,25 @@ out:
3011 return nfs_ok; 3592 return nfs_ok;
3012} 3593}
3013 3594
3014static __be32 3595static struct nfs4_ol_stateid *
3015nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) 3596nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
3016{ 3597{
3017 struct nfs4_ol_stateid *local; 3598 struct nfs4_ol_stateid *local, *ret = NULL;
3018 struct nfs4_openowner *oo = open->op_openowner; 3599 struct nfs4_openowner *oo = open->op_openowner;
3019 3600
3601 spin_lock(&fp->fi_lock);
3020 list_for_each_entry(local, &fp->fi_stateids, st_perfile) { 3602 list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
3021 /* ignore lock owners */ 3603 /* ignore lock owners */
3022 if (local->st_stateowner->so_is_open_owner == 0) 3604 if (local->st_stateowner->so_is_open_owner == 0)
3023 continue; 3605 continue;
3024 /* remember if we have seen this open owner */ 3606 if (local->st_stateowner == &oo->oo_owner) {
3025 if (local->st_stateowner == &oo->oo_owner) 3607 ret = local;
3026 *stpp = local; 3608 atomic_inc(&ret->st_stid.sc_count);
3027 /* check for conflicting share reservations */ 3609 break;
3028 if (!test_share(local, open)) 3610 }
3029 return nfserr_share_denied;
3030 } 3611 }
3031 return nfs_ok; 3612 spin_unlock(&fp->fi_lock);
3613 return ret;
3032} 3614}
3033 3615
3034static inline int nfs4_access_to_access(u32 nfs4_access) 3616static inline int nfs4_access_to_access(u32 nfs4_access)
@@ -3042,24 +3624,6 @@ static inline int nfs4_access_to_access(u32 nfs4_access)
3042 return flags; 3624 return flags;
3043} 3625}
3044 3626
3045static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
3046 struct svc_fh *cur_fh, struct nfsd4_open *open)
3047{
3048 __be32 status;
3049 int oflag = nfs4_access_to_omode(open->op_share_access);
3050 int access = nfs4_access_to_access(open->op_share_access);
3051
3052 if (!fp->fi_fds[oflag]) {
3053 status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
3054 &fp->fi_fds[oflag]);
3055 if (status)
3056 return status;
3057 }
3058 nfs4_file_get_access(fp, oflag);
3059
3060 return nfs_ok;
3061}
3062
3063static inline __be32 3627static inline __be32
3064nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, 3628nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
3065 struct nfsd4_open *open) 3629 struct nfsd4_open *open)
@@ -3075,34 +3639,99 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
3075 return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); 3639 return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
3076} 3640}
3077 3641
3078static __be32 3642static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
3079nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) 3643 struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
3644 struct nfsd4_open *open)
3080{ 3645{
3081 u32 op_share_access = open->op_share_access; 3646 struct file *filp = NULL;
3082 bool new_access;
3083 __be32 status; 3647 __be32 status;
3648 int oflag = nfs4_access_to_omode(open->op_share_access);
3649 int access = nfs4_access_to_access(open->op_share_access);
3650 unsigned char old_access_bmap, old_deny_bmap;
3084 3651
3085 new_access = !test_access(op_share_access, stp); 3652 spin_lock(&fp->fi_lock);
3086 if (new_access) { 3653
3087 status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); 3654 /*
3088 if (status) 3655 * Are we trying to set a deny mode that would conflict with
3089 return status; 3656 * current access?
3657 */
3658 status = nfs4_file_check_deny(fp, open->op_share_deny);
3659 if (status != nfs_ok) {
3660 spin_unlock(&fp->fi_lock);
3661 goto out;
3090 } 3662 }
3091 status = nfsd4_truncate(rqstp, cur_fh, open); 3663
3092 if (status) { 3664 /* set access to the file */
3093 if (new_access) { 3665 status = nfs4_file_get_access(fp, open->op_share_access);
3094 int oflag = nfs4_access_to_omode(op_share_access); 3666 if (status != nfs_ok) {
3095 nfs4_file_put_access(fp, oflag); 3667 spin_unlock(&fp->fi_lock);
3096 } 3668 goto out;
3097 return status;
3098 } 3669 }
3099 /* remember the open */ 3670
3100 set_access(op_share_access, stp); 3671 /* Set access bits in stateid */
3672 old_access_bmap = stp->st_access_bmap;
3673 set_access(open->op_share_access, stp);
3674
3675 /* Set new deny mask */
3676 old_deny_bmap = stp->st_deny_bmap;
3101 set_deny(open->op_share_deny, stp); 3677 set_deny(open->op_share_deny, stp);
3678 fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
3102 3679
3103 return nfs_ok; 3680 if (!fp->fi_fds[oflag]) {
3681 spin_unlock(&fp->fi_lock);
3682 status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
3683 if (status)
3684 goto out_put_access;
3685 spin_lock(&fp->fi_lock);
3686 if (!fp->fi_fds[oflag]) {
3687 fp->fi_fds[oflag] = filp;
3688 filp = NULL;
3689 }
3690 }
3691 spin_unlock(&fp->fi_lock);
3692 if (filp)
3693 fput(filp);
3694
3695 status = nfsd4_truncate(rqstp, cur_fh, open);
3696 if (status)
3697 goto out_put_access;
3698out:
3699 return status;
3700out_put_access:
3701 stp->st_access_bmap = old_access_bmap;
3702 nfs4_file_put_access(fp, open->op_share_access);
3703 reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
3704 goto out;
3104} 3705}
3105 3706
3707static __be32
3708nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
3709{
3710 __be32 status;
3711 unsigned char old_deny_bmap;
3712
3713 if (!test_access(open->op_share_access, stp))
3714 return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
3715
3716 /* test and set deny mode */
3717 spin_lock(&fp->fi_lock);
3718 status = nfs4_file_check_deny(fp, open->op_share_deny);
3719 if (status == nfs_ok) {
3720 old_deny_bmap = stp->st_deny_bmap;
3721 set_deny(open->op_share_deny, stp);
3722 fp->fi_share_deny |=
3723 (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
3724 }
3725 spin_unlock(&fp->fi_lock);
3726
3727 if (status != nfs_ok)
3728 return status;
3729
3730 status = nfsd4_truncate(rqstp, cur_fh, open);
3731 if (status != nfs_ok)
3732 reset_union_bmap_deny(old_deny_bmap, stp);
3733 return status;
3734}
3106 3735
3107static void 3736static void
3108nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) 3737nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
@@ -3123,7 +3752,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
3123 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; 3752 return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
3124} 3753}
3125 3754
3126static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) 3755static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3127{ 3756{
3128 struct file_lock *fl; 3757 struct file_lock *fl;
3129 3758
@@ -3135,53 +3764,101 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
3135 fl->fl_flags = FL_DELEG; 3764 fl->fl_flags = FL_DELEG;
3136 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; 3765 fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
3137 fl->fl_end = OFFSET_MAX; 3766 fl->fl_end = OFFSET_MAX;
3138 fl->fl_owner = (fl_owner_t)(dp->dl_file); 3767 fl->fl_owner = (fl_owner_t)fp;
3139 fl->fl_pid = current->tgid; 3768 fl->fl_pid = current->tgid;
3140 return fl; 3769 return fl;
3141} 3770}
3142 3771
3143static int nfs4_setlease(struct nfs4_delegation *dp) 3772static int nfs4_setlease(struct nfs4_delegation *dp)
3144{ 3773{
3145 struct nfs4_file *fp = dp->dl_file; 3774 struct nfs4_file *fp = dp->dl_stid.sc_file;
3146 struct file_lock *fl; 3775 struct file_lock *fl;
3147 int status; 3776 struct file *filp;
3777 int status = 0;
3148 3778
3149 fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); 3779 fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
3150 if (!fl) 3780 if (!fl)
3151 return -ENOMEM; 3781 return -ENOMEM;
3152 fl->fl_file = find_readable_file(fp); 3782 filp = find_readable_file(fp);
3153 status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); 3783 if (!filp) {
3154 if (status) 3784 /* We should always have a readable file here */
3155 goto out_free; 3785 WARN_ON_ONCE(1);
3786 return -EBADF;
3787 }
3788 fl->fl_file = filp;
3789 status = vfs_setlease(filp, fl->fl_type, &fl);
3790 if (status) {
3791 locks_free_lock(fl);
3792 goto out_fput;
3793 }
3794 spin_lock(&state_lock);
3795 spin_lock(&fp->fi_lock);
3796 /* Did the lease get broken before we took the lock? */
3797 status = -EAGAIN;
3798 if (fp->fi_had_conflict)
3799 goto out_unlock;
3800 /* Race breaker */
3801 if (fp->fi_lease) {
3802 status = 0;
3803 atomic_inc(&fp->fi_delegees);
3804 hash_delegation_locked(dp, fp);
3805 goto out_unlock;
3806 }
3156 fp->fi_lease = fl; 3807 fp->fi_lease = fl;
3157 fp->fi_deleg_file = get_file(fl->fl_file); 3808 fp->fi_deleg_file = filp;
3158 atomic_set(&fp->fi_delegees, 1); 3809 atomic_set(&fp->fi_delegees, 1);
3159 spin_lock(&state_lock);
3160 hash_delegation_locked(dp, fp); 3810 hash_delegation_locked(dp, fp);
3811 spin_unlock(&fp->fi_lock);
3161 spin_unlock(&state_lock); 3812 spin_unlock(&state_lock);
3162 return 0; 3813 return 0;
3163out_free: 3814out_unlock:
3164 locks_free_lock(fl); 3815 spin_unlock(&fp->fi_lock);
3816 spin_unlock(&state_lock);
3817out_fput:
3818 fput(filp);
3165 return status; 3819 return status;
3166} 3820}
3167 3821
3168static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) 3822static struct nfs4_delegation *
3823nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
3824 struct nfs4_file *fp)
3169{ 3825{
3826 int status;
3827 struct nfs4_delegation *dp;
3828
3170 if (fp->fi_had_conflict) 3829 if (fp->fi_had_conflict)
3171 return -EAGAIN; 3830 return ERR_PTR(-EAGAIN);
3831
3832 dp = alloc_init_deleg(clp, fh);
3833 if (!dp)
3834 return ERR_PTR(-ENOMEM);
3835
3172 get_nfs4_file(fp); 3836 get_nfs4_file(fp);
3173 dp->dl_file = fp;
3174 if (!fp->fi_lease)
3175 return nfs4_setlease(dp);
3176 spin_lock(&state_lock); 3837 spin_lock(&state_lock);
3838 spin_lock(&fp->fi_lock);
3839 dp->dl_stid.sc_file = fp;
3840 if (!fp->fi_lease) {
3841 spin_unlock(&fp->fi_lock);
3842 spin_unlock(&state_lock);
3843 status = nfs4_setlease(dp);
3844 goto out;
3845 }
3177 atomic_inc(&fp->fi_delegees); 3846 atomic_inc(&fp->fi_delegees);
3178 if (fp->fi_had_conflict) { 3847 if (fp->fi_had_conflict) {
3179 spin_unlock(&state_lock); 3848 status = -EAGAIN;
3180 return -EAGAIN; 3849 goto out_unlock;
3181 } 3850 }
3182 hash_delegation_locked(dp, fp); 3851 hash_delegation_locked(dp, fp);
3852 status = 0;
3853out_unlock:
3854 spin_unlock(&fp->fi_lock);
3183 spin_unlock(&state_lock); 3855 spin_unlock(&state_lock);
3184 return 0; 3856out:
3857 if (status) {
3858 nfs4_put_stid(&dp->dl_stid);
3859 return ERR_PTR(status);
3860 }
3861 return dp;
3185} 3862}
3186 3863
3187static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) 3864static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3212,11 +3889,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
3212 * proper support for them. 3889 * proper support for them.
3213 */ 3890 */
3214static void 3891static void
3215nfs4_open_delegation(struct net *net, struct svc_fh *fh, 3892nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
3216 struct nfsd4_open *open, struct nfs4_ol_stateid *stp) 3893 struct nfs4_ol_stateid *stp)
3217{ 3894{
3218 struct nfs4_delegation *dp; 3895 struct nfs4_delegation *dp;
3219 struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); 3896 struct nfs4_openowner *oo = openowner(stp->st_stateowner);
3897 struct nfs4_client *clp = stp->st_stid.sc_client;
3220 int cb_up; 3898 int cb_up;
3221 int status = 0; 3899 int status = 0;
3222 3900
@@ -3235,7 +3913,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
3235 * Let's not give out any delegations till everyone's 3913 * Let's not give out any delegations till everyone's
3236 * had the chance to reclaim theirs.... 3914 * had the chance to reclaim theirs....
3237 */ 3915 */
3238 if (locks_in_grace(net)) 3916 if (locks_in_grace(clp->net))
3239 goto out_no_deleg; 3917 goto out_no_deleg;
3240 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) 3918 if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
3241 goto out_no_deleg; 3919 goto out_no_deleg;
@@ -3254,21 +3932,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
3254 default: 3932 default:
3255 goto out_no_deleg; 3933 goto out_no_deleg;
3256 } 3934 }
3257 dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); 3935 dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
3258 if (dp == NULL) 3936 if (IS_ERR(dp))
3259 goto out_no_deleg; 3937 goto out_no_deleg;
3260 status = nfs4_set_delegation(dp, stp->st_file);
3261 if (status)
3262 goto out_free;
3263 3938
3264 memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); 3939 memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
3265 3940
3266 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", 3941 dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
3267 STATEID_VAL(&dp->dl_stid.sc_stateid)); 3942 STATEID_VAL(&dp->dl_stid.sc_stateid));
3268 open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; 3943 open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
3944 nfs4_put_stid(&dp->dl_stid);
3269 return; 3945 return;
3270out_free:
3271 destroy_delegation(dp);
3272out_no_deleg: 3946out_no_deleg:
3273 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; 3947 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
3274 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && 3948 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
@@ -3301,16 +3975,12 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
3301 */ 3975 */
3302} 3976}
3303 3977
3304/*
3305 * called with nfs4_lock_state() held.
3306 */
3307__be32 3978__be32
3308nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) 3979nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
3309{ 3980{
3310 struct nfsd4_compoundres *resp = rqstp->rq_resp; 3981 struct nfsd4_compoundres *resp = rqstp->rq_resp;
3311 struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; 3982 struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
3312 struct nfs4_file *fp = NULL; 3983 struct nfs4_file *fp = NULL;
3313 struct inode *ino = current_fh->fh_dentry->d_inode;
3314 struct nfs4_ol_stateid *stp = NULL; 3984 struct nfs4_ol_stateid *stp = NULL;
3315 struct nfs4_delegation *dp = NULL; 3985 struct nfs4_delegation *dp = NULL;
3316 __be32 status; 3986 __be32 status;
@@ -3320,21 +3990,18 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
3320 * and check for delegations in the process of being recalled. 3990 * and check for delegations in the process of being recalled.
3321 * If not found, create the nfs4_file struct 3991 * If not found, create the nfs4_file struct
3322 */ 3992 */
3323 fp = find_file(ino); 3993 fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
3324 if (fp) { 3994 if (fp != open->op_file) {
3325 if ((status = nfs4_check_open(fp, open, &stp)))
3326 goto out;
3327 status = nfs4_check_deleg(cl, open, &dp); 3995 status = nfs4_check_deleg(cl, open, &dp);
3328 if (status) 3996 if (status)
3329 goto out; 3997 goto out;
3998 stp = nfsd4_find_existing_open(fp, open);
3330 } else { 3999 } else {
4000 open->op_file = NULL;
3331 status = nfserr_bad_stateid; 4001 status = nfserr_bad_stateid;
3332 if (nfsd4_is_deleg_cur(open)) 4002 if (nfsd4_is_deleg_cur(open))
3333 goto out; 4003 goto out;
3334 status = nfserr_jukebox; 4004 status = nfserr_jukebox;
3335 fp = open->op_file;
3336 open->op_file = NULL;
3337 nfsd4_init_file(fp, ino);
3338 } 4005 }
3339 4006
3340 /* 4007 /*
@@ -3347,22 +4014,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
3347 if (status) 4014 if (status)
3348 goto out; 4015 goto out;
3349 } else { 4016 } else {
3350 status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
3351 if (status)
3352 goto out;
3353 status = nfsd4_truncate(rqstp, current_fh, open);
3354 if (status)
3355 goto out;
3356 stp = open->op_stp; 4017 stp = open->op_stp;
3357 open->op_stp = NULL; 4018 open->op_stp = NULL;
3358 init_open_stateid(stp, fp, open); 4019 init_open_stateid(stp, fp, open);
4020 status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
4021 if (status) {
4022 release_open_stateid(stp);
4023 goto out;
4024 }
3359 } 4025 }
3360 update_stateid(&stp->st_stid.sc_stateid); 4026 update_stateid(&stp->st_stid.sc_stateid);
3361 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4027 memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
3362 4028
3363 if (nfsd4_has_session(&resp->cstate)) { 4029 if (nfsd4_has_session(&resp->cstate)) {
3364 open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
3365
3366 if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { 4030 if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
3367 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; 4031 open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
3368 open->op_why_no_deleg = WND4_NOT_WANTED; 4032 open->op_why_no_deleg = WND4_NOT_WANTED;
@@ -3374,7 +4038,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
3374 * Attempt to hand out a delegation. No error return, because the 4038 * Attempt to hand out a delegation. No error return, because the
3375 * OPEN succeeds even if we fail. 4039 * OPEN succeeds even if we fail.
3376 */ 4040 */
3377 nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); 4041 nfs4_open_delegation(current_fh, open, stp);
3378nodeleg: 4042nodeleg:
3379 status = nfs_ok; 4043 status = nfs_ok;
3380 4044
@@ -3397,41 +4061,27 @@ out:
3397 if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && 4061 if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
3398 !nfsd4_has_session(&resp->cstate)) 4062 !nfsd4_has_session(&resp->cstate))
3399 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; 4063 open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
4064 if (dp)
4065 nfs4_put_stid(&dp->dl_stid);
4066 if (stp)
4067 nfs4_put_stid(&stp->st_stid);
3400 4068
3401 return status; 4069 return status;
3402} 4070}
3403 4071
3404void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) 4072void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
4073 struct nfsd4_open *open, __be32 status)
3405{ 4074{
3406 if (open->op_openowner) { 4075 if (open->op_openowner) {
3407 struct nfs4_openowner *oo = open->op_openowner; 4076 struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
3408 4077
3409 if (!list_empty(&oo->oo_owner.so_stateids)) 4078 nfsd4_cstate_assign_replay(cstate, so);
3410 list_del_init(&oo->oo_close_lru); 4079 nfs4_put_stateowner(so);
3411 if (oo->oo_flags & NFS4_OO_NEW) {
3412 if (status) {
3413 release_openowner(oo);
3414 open->op_openowner = NULL;
3415 } else
3416 oo->oo_flags &= ~NFS4_OO_NEW;
3417 }
3418 } 4080 }
3419 if (open->op_file) 4081 if (open->op_file)
3420 nfsd4_free_file(open->op_file); 4082 nfsd4_free_file(open->op_file);
3421 if (open->op_stp) 4083 if (open->op_stp)
3422 free_generic_stateid(open->op_stp); 4084 nfs4_put_stid(&open->op_stp->st_stid);
3423}
3424
3425static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp)
3426{
3427 struct nfs4_client *found;
3428
3429 if (STALE_CLIENTID(clid, nn))
3430 return nfserr_stale_clientid;
3431 found = find_confirmed_client(clid, session, nn);
3432 if (clp)
3433 *clp = found;
3434 return found ? nfs_ok : nfserr_expired;
3435} 4085}
3436 4086
3437__be32 4087__be32
@@ -3442,19 +4092,18 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3442 __be32 status; 4092 __be32 status;
3443 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 4093 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
3444 4094
3445 nfs4_lock_state();
3446 dprintk("process_renew(%08x/%08x): starting\n", 4095 dprintk("process_renew(%08x/%08x): starting\n",
3447 clid->cl_boot, clid->cl_id); 4096 clid->cl_boot, clid->cl_id);
3448 status = lookup_clientid(clid, cstate->minorversion, nn, &clp); 4097 status = lookup_clientid(clid, cstate, nn);
3449 if (status) 4098 if (status)
3450 goto out; 4099 goto out;
4100 clp = cstate->clp;
3451 status = nfserr_cb_path_down; 4101 status = nfserr_cb_path_down;
3452 if (!list_empty(&clp->cl_delegations) 4102 if (!list_empty(&clp->cl_delegations)
3453 && clp->cl_cb_state != NFSD4_CB_UP) 4103 && clp->cl_cb_state != NFSD4_CB_UP)
3454 goto out; 4104 goto out;
3455 status = nfs_ok; 4105 status = nfs_ok;
3456out: 4106out:
3457 nfs4_unlock_state();
3458 return status; 4107 return status;
3459} 4108}
3460 4109
@@ -3483,12 +4132,11 @@ nfs4_laundromat(struct nfsd_net *nn)
3483 struct nfs4_client *clp; 4132 struct nfs4_client *clp;
3484 struct nfs4_openowner *oo; 4133 struct nfs4_openowner *oo;
3485 struct nfs4_delegation *dp; 4134 struct nfs4_delegation *dp;
4135 struct nfs4_ol_stateid *stp;
3486 struct list_head *pos, *next, reaplist; 4136 struct list_head *pos, *next, reaplist;
3487 time_t cutoff = get_seconds() - nn->nfsd4_lease; 4137 time_t cutoff = get_seconds() - nn->nfsd4_lease;
3488 time_t t, new_timeo = nn->nfsd4_lease; 4138 time_t t, new_timeo = nn->nfsd4_lease;
3489 4139
3490 nfs4_lock_state();
3491
3492 dprintk("NFSD: laundromat service - starting\n"); 4140 dprintk("NFSD: laundromat service - starting\n");
3493 nfsd4_end_grace(nn); 4141 nfsd4_end_grace(nn);
3494 INIT_LIST_HEAD(&reaplist); 4142 INIT_LIST_HEAD(&reaplist);
@@ -3505,13 +4153,14 @@ nfs4_laundromat(struct nfsd_net *nn)
3505 clp->cl_clientid.cl_id); 4153 clp->cl_clientid.cl_id);
3506 continue; 4154 continue;
3507 } 4155 }
3508 list_move(&clp->cl_lru, &reaplist); 4156 list_add(&clp->cl_lru, &reaplist);
3509 } 4157 }
3510 spin_unlock(&nn->client_lock); 4158 spin_unlock(&nn->client_lock);
3511 list_for_each_safe(pos, next, &reaplist) { 4159 list_for_each_safe(pos, next, &reaplist) {
3512 clp = list_entry(pos, struct nfs4_client, cl_lru); 4160 clp = list_entry(pos, struct nfs4_client, cl_lru);
3513 dprintk("NFSD: purging unused client (clientid %08x)\n", 4161 dprintk("NFSD: purging unused client (clientid %08x)\n",
3514 clp->cl_clientid.cl_id); 4162 clp->cl_clientid.cl_id);
4163 list_del_init(&clp->cl_lru);
3515 expire_client(clp); 4164 expire_client(clp);
3516 } 4165 }
3517 spin_lock(&state_lock); 4166 spin_lock(&state_lock);
@@ -3524,24 +4173,37 @@ nfs4_laundromat(struct nfsd_net *nn)
3524 new_timeo = min(new_timeo, t); 4173 new_timeo = min(new_timeo, t);
3525 break; 4174 break;
3526 } 4175 }
3527 list_move(&dp->dl_recall_lru, &reaplist); 4176 unhash_delegation_locked(dp);
4177 list_add(&dp->dl_recall_lru, &reaplist);
3528 } 4178 }
3529 spin_unlock(&state_lock); 4179 spin_unlock(&state_lock);
3530 list_for_each_safe(pos, next, &reaplist) { 4180 while (!list_empty(&reaplist)) {
3531 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 4181 dp = list_first_entry(&reaplist, struct nfs4_delegation,
4182 dl_recall_lru);
4183 list_del_init(&dp->dl_recall_lru);
3532 revoke_delegation(dp); 4184 revoke_delegation(dp);
3533 } 4185 }
3534 list_for_each_safe(pos, next, &nn->close_lru) { 4186
3535 oo = container_of(pos, struct nfs4_openowner, oo_close_lru); 4187 spin_lock(&nn->client_lock);
3536 if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { 4188 while (!list_empty(&nn->close_lru)) {
4189 oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
4190 oo_close_lru);
4191 if (time_after((unsigned long)oo->oo_time,
4192 (unsigned long)cutoff)) {
3537 t = oo->oo_time - cutoff; 4193 t = oo->oo_time - cutoff;
3538 new_timeo = min(new_timeo, t); 4194 new_timeo = min(new_timeo, t);
3539 break; 4195 break;
3540 } 4196 }
3541 release_openowner(oo); 4197 list_del_init(&oo->oo_close_lru);
4198 stp = oo->oo_last_closed_stid;
4199 oo->oo_last_closed_stid = NULL;
4200 spin_unlock(&nn->client_lock);
4201 nfs4_put_stid(&stp->st_stid);
4202 spin_lock(&nn->client_lock);
3542 } 4203 }
4204 spin_unlock(&nn->client_lock);
4205
3543 new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); 4206 new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
3544 nfs4_unlock_state();
3545 return new_timeo; 4207 return new_timeo;
3546} 4208}
3547 4209
@@ -3564,7 +4226,7 @@ laundromat_main(struct work_struct *laundry)
3564 4226
3565static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) 4227static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
3566{ 4228{
3567 if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) 4229 if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle))
3568 return nfserr_bad_stateid; 4230 return nfserr_bad_stateid;
3569 return nfs_ok; 4231 return nfs_ok;
3570} 4232}
@@ -3666,10 +4328,10 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
3666{ 4328{
3667 struct nfs4_stid *s; 4329 struct nfs4_stid *s;
3668 struct nfs4_ol_stateid *ols; 4330 struct nfs4_ol_stateid *ols;
3669 __be32 status; 4331 __be32 status = nfserr_bad_stateid;
3670 4332
3671 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4333 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3672 return nfserr_bad_stateid; 4334 return status;
3673 /* Client debugging aid. */ 4335 /* Client debugging aid. */
3674 if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { 4336 if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
3675 char addr_str[INET6_ADDRSTRLEN]; 4337 char addr_str[INET6_ADDRSTRLEN];
@@ -3677,53 +4339,62 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
3677 sizeof(addr_str)); 4339 sizeof(addr_str));
3678 pr_warn_ratelimited("NFSD: client %s testing state ID " 4340 pr_warn_ratelimited("NFSD: client %s testing state ID "
3679 "with incorrect client ID\n", addr_str); 4341 "with incorrect client ID\n", addr_str);
3680 return nfserr_bad_stateid; 4342 return status;
3681 } 4343 }
3682 s = find_stateid(cl, stateid); 4344 spin_lock(&cl->cl_lock);
4345 s = find_stateid_locked(cl, stateid);
3683 if (!s) 4346 if (!s)
3684 return nfserr_bad_stateid; 4347 goto out_unlock;
3685 status = check_stateid_generation(stateid, &s->sc_stateid, 1); 4348 status = check_stateid_generation(stateid, &s->sc_stateid, 1);
3686 if (status) 4349 if (status)
3687 return status; 4350 goto out_unlock;
3688 switch (s->sc_type) { 4351 switch (s->sc_type) {
3689 case NFS4_DELEG_STID: 4352 case NFS4_DELEG_STID:
3690 return nfs_ok; 4353 status = nfs_ok;
4354 break;
3691 case NFS4_REVOKED_DELEG_STID: 4355 case NFS4_REVOKED_DELEG_STID:
3692 return nfserr_deleg_revoked; 4356 status = nfserr_deleg_revoked;
4357 break;
3693 case NFS4_OPEN_STID: 4358 case NFS4_OPEN_STID:
3694 case NFS4_LOCK_STID: 4359 case NFS4_LOCK_STID:
3695 ols = openlockstateid(s); 4360 ols = openlockstateid(s);
3696 if (ols->st_stateowner->so_is_open_owner 4361 if (ols->st_stateowner->so_is_open_owner
3697 && !(openowner(ols->st_stateowner)->oo_flags 4362 && !(openowner(ols->st_stateowner)->oo_flags
3698 & NFS4_OO_CONFIRMED)) 4363 & NFS4_OO_CONFIRMED))
3699 return nfserr_bad_stateid; 4364 status = nfserr_bad_stateid;
3700 return nfs_ok; 4365 else
4366 status = nfs_ok;
4367 break;
3701 default: 4368 default:
3702 printk("unknown stateid type %x\n", s->sc_type); 4369 printk("unknown stateid type %x\n", s->sc_type);
4370 /* Fallthrough */
3703 case NFS4_CLOSED_STID: 4371 case NFS4_CLOSED_STID:
3704 return nfserr_bad_stateid; 4372 case NFS4_CLOSED_DELEG_STID:
4373 status = nfserr_bad_stateid;
3705 } 4374 }
4375out_unlock:
4376 spin_unlock(&cl->cl_lock);
4377 return status;
3706} 4378}
3707 4379
3708static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, 4380static __be32
3709 struct nfs4_stid **s, bool sessions, 4381nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
3710 struct nfsd_net *nn) 4382 stateid_t *stateid, unsigned char typemask,
4383 struct nfs4_stid **s, struct nfsd_net *nn)
3711{ 4384{
3712 struct nfs4_client *cl;
3713 __be32 status; 4385 __be32 status;
3714 4386
3715 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4387 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3716 return nfserr_bad_stateid; 4388 return nfserr_bad_stateid;
3717 status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, 4389 status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
3718 nn, &cl);
3719 if (status == nfserr_stale_clientid) { 4390 if (status == nfserr_stale_clientid) {
3720 if (sessions) 4391 if (cstate->session)
3721 return nfserr_bad_stateid; 4392 return nfserr_bad_stateid;
3722 return nfserr_stale_stateid; 4393 return nfserr_stale_stateid;
3723 } 4394 }
3724 if (status) 4395 if (status)
3725 return status; 4396 return status;
3726 *s = find_stateid_by_type(cl, stateid, typemask); 4397 *s = find_stateid_by_type(cstate->clp, stateid, typemask);
3727 if (!*s) 4398 if (!*s)
3728 return nfserr_bad_stateid; 4399 return nfserr_bad_stateid;
3729 return nfs_ok; 4400 return nfs_ok;
@@ -3754,12 +4425,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3754 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) 4425 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
3755 return check_special_stateids(net, current_fh, stateid, flags); 4426 return check_special_stateids(net, current_fh, stateid, flags);
3756 4427
3757 nfs4_lock_state(); 4428 status = nfsd4_lookup_stateid(cstate, stateid,
3758 4429 NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
3759 status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, 4430 &s, nn);
3760 &s, cstate->minorversion, nn);
3761 if (status) 4431 if (status)
3762 goto out; 4432 return status;
3763 status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); 4433 status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
3764 if (status) 4434 if (status)
3765 goto out; 4435 goto out;
@@ -3770,12 +4440,13 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3770 if (status) 4440 if (status)
3771 goto out; 4441 goto out;
3772 if (filpp) { 4442 if (filpp) {
3773 file = dp->dl_file->fi_deleg_file; 4443 file = dp->dl_stid.sc_file->fi_deleg_file;
3774 if (!file) { 4444 if (!file) {
3775 WARN_ON_ONCE(1); 4445 WARN_ON_ONCE(1);
3776 status = nfserr_serverfault; 4446 status = nfserr_serverfault;
3777 goto out; 4447 goto out;
3778 } 4448 }
4449 get_file(file);
3779 } 4450 }
3780 break; 4451 break;
3781 case NFS4_OPEN_STID: 4452 case NFS4_OPEN_STID:
@@ -3791,10 +4462,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3791 if (status) 4462 if (status)
3792 goto out; 4463 goto out;
3793 if (filpp) { 4464 if (filpp) {
4465 struct nfs4_file *fp = stp->st_stid.sc_file;
4466
3794 if (flags & RD_STATE) 4467 if (flags & RD_STATE)
3795 file = find_readable_file(stp->st_file); 4468 file = find_readable_file(fp);
3796 else 4469 else
3797 file = find_writeable_file(stp->st_file); 4470 file = find_writeable_file(fp);
3798 } 4471 }
3799 break; 4472 break;
3800 default: 4473 default:
@@ -3803,28 +4476,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
3803 } 4476 }
3804 status = nfs_ok; 4477 status = nfs_ok;
3805 if (file) 4478 if (file)
3806 *filpp = get_file(file); 4479 *filpp = file;
3807out: 4480out:
3808 nfs4_unlock_state(); 4481 nfs4_put_stid(s);
3809 return status; 4482 return status;
3810} 4483}
3811 4484
3812static __be32
3813nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
3814{
3815 struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
3816
3817 if (check_for_locks(stp->st_file, lo))
3818 return nfserr_locks_held;
3819 /*
3820 * Currently there's a 1-1 lock stateid<->lockowner
3821 * correspondance, and we have to delete the lockowner when we
3822 * delete the lock stateid:
3823 */
3824 release_lockowner(lo);
3825 return nfs_ok;
3826}
3827
3828/* 4485/*
3829 * Test if the stateid is valid 4486 * Test if the stateid is valid
3830 */ 4487 */
@@ -3835,11 +4492,9 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3835 struct nfsd4_test_stateid_id *stateid; 4492 struct nfsd4_test_stateid_id *stateid;
3836 struct nfs4_client *cl = cstate->session->se_client; 4493 struct nfs4_client *cl = cstate->session->se_client;
3837 4494
3838 nfs4_lock_state();
3839 list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) 4495 list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
3840 stateid->ts_id_status = 4496 stateid->ts_id_status =
3841 nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); 4497 nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
3842 nfs4_unlock_state();
3843 4498
3844 return nfs_ok; 4499 return nfs_ok;
3845} 4500}
@@ -3851,37 +4506,50 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3851 stateid_t *stateid = &free_stateid->fr_stateid; 4506 stateid_t *stateid = &free_stateid->fr_stateid;
3852 struct nfs4_stid *s; 4507 struct nfs4_stid *s;
3853 struct nfs4_delegation *dp; 4508 struct nfs4_delegation *dp;
4509 struct nfs4_ol_stateid *stp;
3854 struct nfs4_client *cl = cstate->session->se_client; 4510 struct nfs4_client *cl = cstate->session->se_client;
3855 __be32 ret = nfserr_bad_stateid; 4511 __be32 ret = nfserr_bad_stateid;
3856 4512
3857 nfs4_lock_state(); 4513 spin_lock(&cl->cl_lock);
3858 s = find_stateid(cl, stateid); 4514 s = find_stateid_locked(cl, stateid);
3859 if (!s) 4515 if (!s)
3860 goto out; 4516 goto out_unlock;
3861 switch (s->sc_type) { 4517 switch (s->sc_type) {
3862 case NFS4_DELEG_STID: 4518 case NFS4_DELEG_STID:
3863 ret = nfserr_locks_held; 4519 ret = nfserr_locks_held;
3864 goto out; 4520 break;
3865 case NFS4_OPEN_STID: 4521 case NFS4_OPEN_STID:
3866 case NFS4_LOCK_STID:
3867 ret = check_stateid_generation(stateid, &s->sc_stateid, 1); 4522 ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
3868 if (ret) 4523 if (ret)
3869 goto out; 4524 break;
3870 if (s->sc_type == NFS4_LOCK_STID) 4525 ret = nfserr_locks_held;
3871 ret = nfsd4_free_lock_stateid(openlockstateid(s));
3872 else
3873 ret = nfserr_locks_held;
3874 break; 4526 break;
4527 case NFS4_LOCK_STID:
4528 ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
4529 if (ret)
4530 break;
4531 stp = openlockstateid(s);
4532 ret = nfserr_locks_held;
4533 if (check_for_locks(stp->st_stid.sc_file,
4534 lockowner(stp->st_stateowner)))
4535 break;
4536 unhash_lock_stateid(stp);
4537 spin_unlock(&cl->cl_lock);
4538 nfs4_put_stid(s);
4539 ret = nfs_ok;
4540 goto out;
3875 case NFS4_REVOKED_DELEG_STID: 4541 case NFS4_REVOKED_DELEG_STID:
3876 dp = delegstateid(s); 4542 dp = delegstateid(s);
3877 destroy_revoked_delegation(dp); 4543 list_del_init(&dp->dl_recall_lru);
4544 spin_unlock(&cl->cl_lock);
4545 nfs4_put_stid(s);
3878 ret = nfs_ok; 4546 ret = nfs_ok;
3879 break; 4547 goto out;
3880 default: 4548 /* Default falls through and returns nfserr_bad_stateid */
3881 ret = nfserr_bad_stateid;
3882 } 4549 }
4550out_unlock:
4551 spin_unlock(&cl->cl_lock);
3883out: 4552out:
3884 nfs4_unlock_state();
3885 return ret; 4553 return ret;
3886} 4554}
3887 4555
@@ -3926,20 +4594,24 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3926{ 4594{
3927 __be32 status; 4595 __be32 status;
3928 struct nfs4_stid *s; 4596 struct nfs4_stid *s;
4597 struct nfs4_ol_stateid *stp = NULL;
3929 4598
3930 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, 4599 dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
3931 seqid, STATEID_VAL(stateid)); 4600 seqid, STATEID_VAL(stateid));
3932 4601
3933 *stpp = NULL; 4602 *stpp = NULL;
3934 status = nfsd4_lookup_stateid(stateid, typemask, &s, 4603 status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
3935 cstate->minorversion, nn);
3936 if (status) 4604 if (status)
3937 return status; 4605 return status;
3938 *stpp = openlockstateid(s); 4606 stp = openlockstateid(s);
3939 if (!nfsd4_has_session(cstate)) 4607 nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
3940 cstate->replay_owner = (*stpp)->st_stateowner;
3941 4608
3942 return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); 4609 status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
4610 if (!status)
4611 *stpp = stp;
4612 else
4613 nfs4_put_stid(&stp->st_stid);
4614 return status;
3943} 4615}
3944 4616
3945static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, 4617static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
@@ -3947,14 +4619,18 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
3947{ 4619{
3948 __be32 status; 4620 __be32 status;
3949 struct nfs4_openowner *oo; 4621 struct nfs4_openowner *oo;
4622 struct nfs4_ol_stateid *stp;
3950 4623
3951 status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, 4624 status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
3952 NFS4_OPEN_STID, stpp, nn); 4625 NFS4_OPEN_STID, &stp, nn);
3953 if (status) 4626 if (status)
3954 return status; 4627 return status;
3955 oo = openowner((*stpp)->st_stateowner); 4628 oo = openowner(stp->st_stateowner);
3956 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) 4629 if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
4630 nfs4_put_stid(&stp->st_stid);
3957 return nfserr_bad_stateid; 4631 return nfserr_bad_stateid;
4632 }
4633 *stpp = stp;
3958 return nfs_ok; 4634 return nfs_ok;
3959} 4635}
3960 4636
@@ -3974,8 +4650,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3974 if (status) 4650 if (status)
3975 return status; 4651 return status;
3976 4652
3977 nfs4_lock_state();
3978
3979 status = nfs4_preprocess_seqid_op(cstate, 4653 status = nfs4_preprocess_seqid_op(cstate,
3980 oc->oc_seqid, &oc->oc_req_stateid, 4654 oc->oc_seqid, &oc->oc_req_stateid,
3981 NFS4_OPEN_STID, &stp, nn); 4655 NFS4_OPEN_STID, &stp, nn);
@@ -3984,7 +4658,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3984 oo = openowner(stp->st_stateowner); 4658 oo = openowner(stp->st_stateowner);
3985 status = nfserr_bad_stateid; 4659 status = nfserr_bad_stateid;
3986 if (oo->oo_flags & NFS4_OO_CONFIRMED) 4660 if (oo->oo_flags & NFS4_OO_CONFIRMED)
3987 goto out; 4661 goto put_stateid;
3988 oo->oo_flags |= NFS4_OO_CONFIRMED; 4662 oo->oo_flags |= NFS4_OO_CONFIRMED;
3989 update_stateid(&stp->st_stid.sc_stateid); 4663 update_stateid(&stp->st_stid.sc_stateid);
3990 memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4664 memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -3993,10 +4667,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3993 4667
3994 nfsd4_client_record_create(oo->oo_owner.so_client); 4668 nfsd4_client_record_create(oo->oo_owner.so_client);
3995 status = nfs_ok; 4669 status = nfs_ok;
4670put_stateid:
4671 nfs4_put_stid(&stp->st_stid);
3996out: 4672out:
3997 nfsd4_bump_seqid(cstate, status); 4673 nfsd4_bump_seqid(cstate, status);
3998 if (!cstate->replay_owner)
3999 nfs4_unlock_state();
4000 return status; 4674 return status;
4001} 4675}
4002 4676
@@ -4004,7 +4678,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a
4004{ 4678{
4005 if (!test_access(access, stp)) 4679 if (!test_access(access, stp))
4006 return; 4680 return;
4007 nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); 4681 nfs4_file_put_access(stp->st_stid.sc_file, access);
4008 clear_access(access, stp); 4682 clear_access(access, stp);
4009} 4683}
4010 4684
@@ -4026,16 +4700,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
4026 } 4700 }
4027} 4701}
4028 4702
4029static void
4030reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp)
4031{
4032 int i;
4033 for (i = 0; i < 4; i++) {
4034 if ((i & deny) != i)
4035 clear_deny(i, stp);
4036 }
4037}
4038
4039__be32 4703__be32
4040nfsd4_open_downgrade(struct svc_rqst *rqstp, 4704nfsd4_open_downgrade(struct svc_rqst *rqstp,
4041 struct nfsd4_compound_state *cstate, 4705 struct nfsd4_compound_state *cstate,
@@ -4053,21 +4717,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
4053 dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, 4717 dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
4054 od->od_deleg_want); 4718 od->od_deleg_want);
4055 4719
4056 nfs4_lock_state();
4057 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, 4720 status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
4058 &od->od_stateid, &stp, nn); 4721 &od->od_stateid, &stp, nn);
4059 if (status) 4722 if (status)
4060 goto out; 4723 goto out;
4061 status = nfserr_inval; 4724 status = nfserr_inval;
4062 if (!test_access(od->od_share_access, stp)) { 4725 if (!test_access(od->od_share_access, stp)) {
4063 dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", 4726 dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
4064 stp->st_access_bmap, od->od_share_access); 4727 stp->st_access_bmap, od->od_share_access);
4065 goto out; 4728 goto put_stateid;
4066 } 4729 }
4067 if (!test_deny(od->od_share_deny, stp)) { 4730 if (!test_deny(od->od_share_deny, stp)) {
4068 dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", 4731 dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
4069 stp->st_deny_bmap, od->od_share_deny); 4732 stp->st_deny_bmap, od->od_share_deny);
4070 goto out; 4733 goto put_stateid;
4071 } 4734 }
4072 nfs4_stateid_downgrade(stp, od->od_share_access); 4735 nfs4_stateid_downgrade(stp, od->od_share_access);
4073 4736
@@ -4076,17 +4739,31 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
4076 update_stateid(&stp->st_stid.sc_stateid); 4739 update_stateid(&stp->st_stid.sc_stateid);
4077 memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4740 memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4078 status = nfs_ok; 4741 status = nfs_ok;
4742put_stateid:
4743 nfs4_put_stid(&stp->st_stid);
4079out: 4744out:
4080 nfsd4_bump_seqid(cstate, status); 4745 nfsd4_bump_seqid(cstate, status);
4081 if (!cstate->replay_owner)
4082 nfs4_unlock_state();
4083 return status; 4746 return status;
4084} 4747}
4085 4748
4086static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) 4749static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
4087{ 4750{
4088 unhash_open_stateid(s); 4751 struct nfs4_client *clp = s->st_stid.sc_client;
4752 LIST_HEAD(reaplist);
4753
4089 s->st_stid.sc_type = NFS4_CLOSED_STID; 4754 s->st_stid.sc_type = NFS4_CLOSED_STID;
4755 spin_lock(&clp->cl_lock);
4756 unhash_open_stateid(s, &reaplist);
4757
4758 if (clp->cl_minorversion) {
4759 put_ol_stateid_locked(s, &reaplist);
4760 spin_unlock(&clp->cl_lock);
4761 free_ol_stateid_reaplist(&reaplist);
4762 } else {
4763 spin_unlock(&clp->cl_lock);
4764 free_ol_stateid_reaplist(&reaplist);
4765 move_to_close_lru(s, clp->net);
4766 }
4090} 4767}
4091 4768
4092/* 4769/*
@@ -4097,7 +4774,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4097 struct nfsd4_close *close) 4774 struct nfsd4_close *close)
4098{ 4775{
4099 __be32 status; 4776 __be32 status;
4100 struct nfs4_openowner *oo;
4101 struct nfs4_ol_stateid *stp; 4777 struct nfs4_ol_stateid *stp;
4102 struct net *net = SVC_NET(rqstp); 4778 struct net *net = SVC_NET(rqstp);
4103 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 4779 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -4105,7 +4781,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4105 dprintk("NFSD: nfsd4_close on file %pd\n", 4781 dprintk("NFSD: nfsd4_close on file %pd\n",
4106 cstate->current_fh.fh_dentry); 4782 cstate->current_fh.fh_dentry);
4107 4783
4108 nfs4_lock_state();
4109 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, 4784 status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
4110 &close->cl_stateid, 4785 &close->cl_stateid,
4111 NFS4_OPEN_STID|NFS4_CLOSED_STID, 4786 NFS4_OPEN_STID|NFS4_CLOSED_STID,
@@ -4113,31 +4788,14 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4113 nfsd4_bump_seqid(cstate, status); 4788 nfsd4_bump_seqid(cstate, status);
4114 if (status) 4789 if (status)
4115 goto out; 4790 goto out;
4116 oo = openowner(stp->st_stateowner);
4117 update_stateid(&stp->st_stid.sc_stateid); 4791 update_stateid(&stp->st_stid.sc_stateid);
4118 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4792 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4119 4793
4120 nfsd4_close_open_stateid(stp); 4794 nfsd4_close_open_stateid(stp);
4121 4795
4122 if (cstate->minorversion) 4796 /* put reference from nfs4_preprocess_seqid_op */
4123 free_generic_stateid(stp); 4797 nfs4_put_stid(&stp->st_stid);
4124 else
4125 oo->oo_last_closed_stid = stp;
4126
4127 if (list_empty(&oo->oo_owner.so_stateids)) {
4128 if (cstate->minorversion)
4129 release_openowner(oo);
4130 else {
4131 /*
4132 * In the 4.0 case we need to keep the owners around a
4133 * little while to handle CLOSE replay.
4134 */
4135 move_to_close_lru(oo, SVC_NET(rqstp));
4136 }
4137 }
4138out: 4798out:
4139 if (!cstate->replay_owner)
4140 nfs4_unlock_state();
4141 return status; 4799 return status;
4142} 4800}
4143 4801
@@ -4154,28 +4812,24 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4154 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) 4812 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
4155 return status; 4813 return status;
4156 4814
4157 nfs4_lock_state(); 4815 status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
4158 status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s,
4159 cstate->minorversion, nn);
4160 if (status) 4816 if (status)
4161 goto out; 4817 goto out;
4162 dp = delegstateid(s); 4818 dp = delegstateid(s);
4163 status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); 4819 status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
4164 if (status) 4820 if (status)
4165 goto out; 4821 goto put_stateid;
4166 4822
4167 destroy_delegation(dp); 4823 destroy_delegation(dp);
4824put_stateid:
4825 nfs4_put_stid(&dp->dl_stid);
4168out: 4826out:
4169 nfs4_unlock_state();
4170
4171 return status; 4827 return status;
4172} 4828}
4173 4829
4174 4830
4175#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) 4831#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
4176 4832
4177#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
4178
4179static inline u64 4833static inline u64
4180end_offset(u64 start, u64 len) 4834end_offset(u64 start, u64 len)
4181{ 4835{
@@ -4196,13 +4850,6 @@ last_byte_offset(u64 start, u64 len)
4196 return end > start ? end - 1: NFS4_MAX_UINT64; 4850 return end > start ? end - 1: NFS4_MAX_UINT64;
4197} 4851}
4198 4852
4199static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
4200{
4201 return (file_hashval(inode) + cl_id
4202 + opaque_hashval(ownername->data, ownername->len))
4203 & LOCKOWNER_INO_HASH_MASK;
4204}
4205
4206/* 4853/*
4207 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that 4854 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
4208 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th 4855 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -4255,47 +4902,56 @@ nevermind:
4255 deny->ld_type = NFS4_WRITE_LT; 4902 deny->ld_type = NFS4_WRITE_LT;
4256} 4903}
4257 4904
4258static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) 4905static struct nfs4_lockowner *
4906find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
4907 struct nfs4_client *clp)
4259{ 4908{
4260 struct nfs4_ol_stateid *lst; 4909 unsigned int strhashval = ownerstr_hashval(owner);
4910 struct nfs4_stateowner *so;
4261 4911
4262 if (!same_owner_str(&lo->lo_owner, owner, clid)) 4912 lockdep_assert_held(&clp->cl_lock);
4263 return false; 4913
4264 if (list_empty(&lo->lo_owner.so_stateids)) { 4914 list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval],
4265 WARN_ON_ONCE(1); 4915 so_strhash) {
4266 return false; 4916 if (so->so_is_open_owner)
4917 continue;
4918 if (!same_owner_str(so, owner))
4919 continue;
4920 atomic_inc(&so->so_count);
4921 return lockowner(so);
4267 } 4922 }
4268 lst = list_first_entry(&lo->lo_owner.so_stateids, 4923 return NULL;
4269 struct nfs4_ol_stateid, st_perstateowner);
4270 return lst->st_file->fi_inode == inode;
4271} 4924}
4272 4925
4273static struct nfs4_lockowner * 4926static struct nfs4_lockowner *
4274find_lockowner_str(struct inode *inode, clientid_t *clid, 4927find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner,
4275 struct xdr_netobj *owner, struct nfsd_net *nn) 4928 struct nfs4_client *clp)
4276{ 4929{
4277 unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
4278 struct nfs4_lockowner *lo; 4930 struct nfs4_lockowner *lo;
4279 4931
4280 list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { 4932 spin_lock(&clp->cl_lock);
4281 if (same_lockowner_ino(lo, inode, clid, owner)) 4933 lo = find_lockowner_str_locked(clid, owner, clp);
4282 return lo; 4934 spin_unlock(&clp->cl_lock);
4283 } 4935 return lo;
4284 return NULL;
4285} 4936}
4286 4937
4287static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) 4938static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop)
4288{ 4939{
4289 struct inode *inode = open_stp->st_file->fi_inode; 4940 unhash_lockowner_locked(lockowner(sop));
4290 unsigned int inohash = lockowner_ino_hashval(inode, 4941}
4291 clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); 4942
4292 struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); 4943static void nfs4_free_lockowner(struct nfs4_stateowner *sop)
4944{
4945 struct nfs4_lockowner *lo = lockowner(sop);
4293 4946
4294 list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); 4947 kmem_cache_free(lockowner_slab, lo);
4295 list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]);
4296 list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
4297} 4948}
4298 4949
4950static const struct nfs4_stateowner_operations lockowner_ops = {
4951 .so_unhash = nfs4_unhash_lockowner,
4952 .so_free = nfs4_free_lockowner,
4953};
4954
4299/* 4955/*
4300 * Alloc a lock owner structure. 4956 * Alloc a lock owner structure.
4301 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 4957 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
@@ -4303,42 +4959,107 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
4303 * 4959 *
4304 * strhashval = ownerstr_hashval 4960 * strhashval = ownerstr_hashval
4305 */ 4961 */
4306
4307static struct nfs4_lockowner * 4962static struct nfs4_lockowner *
4308alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { 4963alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
4309 struct nfs4_lockowner *lo; 4964 struct nfs4_ol_stateid *open_stp,
4965 struct nfsd4_lock *lock)
4966{
4967 struct nfs4_lockowner *lo, *ret;
4310 4968
4311 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); 4969 lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
4312 if (!lo) 4970 if (!lo)
4313 return NULL; 4971 return NULL;
4314 INIT_LIST_HEAD(&lo->lo_owner.so_stateids); 4972 INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
4315 lo->lo_owner.so_is_open_owner = 0; 4973 lo->lo_owner.so_is_open_owner = 0;
4316 /* It is the openowner seqid that will be incremented in encode in the 4974 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
4317 * case of new lockowners; so increment the lock seqid manually: */ 4975 lo->lo_owner.so_ops = &lockowner_ops;
4318 lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; 4976 spin_lock(&clp->cl_lock);
4319 hash_lockowner(lo, strhashval, clp, open_stp); 4977 ret = find_lockowner_str_locked(&clp->cl_clientid,
4978 &lock->lk_new_owner, clp);
4979 if (ret == NULL) {
4980 list_add(&lo->lo_owner.so_strhash,
4981 &clp->cl_ownerstr_hashtbl[strhashval]);
4982 ret = lo;
4983 } else
4984 nfs4_free_lockowner(&lo->lo_owner);
4985 spin_unlock(&clp->cl_lock);
4320 return lo; 4986 return lo;
4321} 4987}
4322 4988
4323static struct nfs4_ol_stateid * 4989static void
4324alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) 4990init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
4991 struct nfs4_file *fp, struct inode *inode,
4992 struct nfs4_ol_stateid *open_stp)
4325{ 4993{
4326 struct nfs4_ol_stateid *stp;
4327 struct nfs4_client *clp = lo->lo_owner.so_client; 4994 struct nfs4_client *clp = lo->lo_owner.so_client;
4328 4995
4329 stp = nfs4_alloc_stateid(clp); 4996 lockdep_assert_held(&clp->cl_lock);
4330 if (stp == NULL) 4997
4331 return NULL; 4998 atomic_inc(&stp->st_stid.sc_count);
4332 stp->st_stid.sc_type = NFS4_LOCK_STID; 4999 stp->st_stid.sc_type = NFS4_LOCK_STID;
4333 list_add(&stp->st_perfile, &fp->fi_stateids);
4334 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
4335 stp->st_stateowner = &lo->lo_owner; 5000 stp->st_stateowner = &lo->lo_owner;
5001 atomic_inc(&lo->lo_owner.so_count);
4336 get_nfs4_file(fp); 5002 get_nfs4_file(fp);
4337 stp->st_file = fp; 5003 stp->st_stid.sc_file = fp;
5004 stp->st_stid.sc_free = nfs4_free_lock_stateid;
4338 stp->st_access_bmap = 0; 5005 stp->st_access_bmap = 0;
4339 stp->st_deny_bmap = open_stp->st_deny_bmap; 5006 stp->st_deny_bmap = open_stp->st_deny_bmap;
4340 stp->st_openstp = open_stp; 5007 stp->st_openstp = open_stp;
4341 return stp; 5008 list_add(&stp->st_locks, &open_stp->st_locks);
5009 list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
5010 spin_lock(&fp->fi_lock);
5011 list_add(&stp->st_perfile, &fp->fi_stateids);
5012 spin_unlock(&fp->fi_lock);
5013}
5014
5015static struct nfs4_ol_stateid *
5016find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
5017{
5018 struct nfs4_ol_stateid *lst;
5019 struct nfs4_client *clp = lo->lo_owner.so_client;
5020
5021 lockdep_assert_held(&clp->cl_lock);
5022
5023 list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
5024 if (lst->st_stid.sc_file == fp) {
5025 atomic_inc(&lst->st_stid.sc_count);
5026 return lst;
5027 }
5028 }
5029 return NULL;
5030}
5031
5032static struct nfs4_ol_stateid *
5033find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
5034 struct inode *inode, struct nfs4_ol_stateid *ost,
5035 bool *new)
5036{
5037 struct nfs4_stid *ns = NULL;
5038 struct nfs4_ol_stateid *lst;
5039 struct nfs4_openowner *oo = openowner(ost->st_stateowner);
5040 struct nfs4_client *clp = oo->oo_owner.so_client;
5041
5042 spin_lock(&clp->cl_lock);
5043 lst = find_lock_stateid(lo, fi);
5044 if (lst == NULL) {
5045 spin_unlock(&clp->cl_lock);
5046 ns = nfs4_alloc_stid(clp, stateid_slab);
5047 if (ns == NULL)
5048 return NULL;
5049
5050 spin_lock(&clp->cl_lock);
5051 lst = find_lock_stateid(lo, fi);
5052 if (likely(!lst)) {
5053 lst = openlockstateid(ns);
5054 init_lock_stateid(lst, lo, fi, inode, ost);
5055 ns = NULL;
5056 *new = true;
5057 }
5058 }
5059 spin_unlock(&clp->cl_lock);
5060 if (ns)
5061 nfs4_put_stid(ns);
5062 return lst;
4342} 5063}
4343 5064
4344static int 5065static int
@@ -4350,46 +5071,53 @@ check_lock_length(u64 offset, u64 length)
4350 5071
4351static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) 5072static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
4352{ 5073{
4353 struct nfs4_file *fp = lock_stp->st_file; 5074 struct nfs4_file *fp = lock_stp->st_stid.sc_file;
4354 int oflag = nfs4_access_to_omode(access); 5075
5076 lockdep_assert_held(&fp->fi_lock);
4355 5077
4356 if (test_access(access, lock_stp)) 5078 if (test_access(access, lock_stp))
4357 return; 5079 return;
4358 nfs4_file_get_access(fp, oflag); 5080 __nfs4_file_get_access(fp, access);
4359 set_access(access, lock_stp); 5081 set_access(access, lock_stp);
4360} 5082}
4361 5083
4362static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) 5084static __be32
5085lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
5086 struct nfs4_ol_stateid *ost,
5087 struct nfsd4_lock *lock,
5088 struct nfs4_ol_stateid **lst, bool *new)
4363{ 5089{
4364 struct nfs4_file *fi = ost->st_file; 5090 __be32 status;
5091 struct nfs4_file *fi = ost->st_stid.sc_file;
4365 struct nfs4_openowner *oo = openowner(ost->st_stateowner); 5092 struct nfs4_openowner *oo = openowner(ost->st_stateowner);
4366 struct nfs4_client *cl = oo->oo_owner.so_client; 5093 struct nfs4_client *cl = oo->oo_owner.so_client;
5094 struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
4367 struct nfs4_lockowner *lo; 5095 struct nfs4_lockowner *lo;
4368 unsigned int strhashval; 5096 unsigned int strhashval;
4369 struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); 5097
4370 5098 lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl);
4371 lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, 5099 if (!lo) {
4372 &lock->v.new.owner, nn); 5100 strhashval = ownerstr_hashval(&lock->v.new.owner);
4373 if (lo) { 5101 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
4374 if (!cstate->minorversion) 5102 if (lo == NULL)
4375 return nfserr_bad_seqid; 5103 return nfserr_jukebox;
4376 /* XXX: a lockowner always has exactly one stateid: */ 5104 } else {
4377 *lst = list_first_entry(&lo->lo_owner.so_stateids, 5105 /* with an existing lockowner, seqids must be the same */
4378 struct nfs4_ol_stateid, st_perstateowner); 5106 status = nfserr_bad_seqid;
4379 return nfs_ok; 5107 if (!cstate->minorversion &&
5108 lock->lk_new_lock_seqid != lo->lo_owner.so_seqid)
5109 goto out;
4380 } 5110 }
4381 strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, 5111
4382 &lock->v.new.owner); 5112 *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
4383 lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
4384 if (lo == NULL)
4385 return nfserr_jukebox;
4386 *lst = alloc_init_lock_stateid(lo, fi, ost);
4387 if (*lst == NULL) { 5113 if (*lst == NULL) {
4388 release_lockowner(lo); 5114 status = nfserr_jukebox;
4389 return nfserr_jukebox; 5115 goto out;
4390 } 5116 }
4391 *new = true; 5117 status = nfs_ok;
4392 return nfs_ok; 5118out:
5119 nfs4_put_stateowner(&lo->lo_owner);
5120 return status;
4393} 5121}
4394 5122
4395/* 5123/*
@@ -4401,14 +5129,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4401{ 5129{
4402 struct nfs4_openowner *open_sop = NULL; 5130 struct nfs4_openowner *open_sop = NULL;
4403 struct nfs4_lockowner *lock_sop = NULL; 5131 struct nfs4_lockowner *lock_sop = NULL;
4404 struct nfs4_ol_stateid *lock_stp; 5132 struct nfs4_ol_stateid *lock_stp = NULL;
5133 struct nfs4_ol_stateid *open_stp = NULL;
5134 struct nfs4_file *fp;
4405 struct file *filp = NULL; 5135 struct file *filp = NULL;
4406 struct file_lock *file_lock = NULL; 5136 struct file_lock *file_lock = NULL;
4407 struct file_lock *conflock = NULL; 5137 struct file_lock *conflock = NULL;
4408 __be32 status = 0; 5138 __be32 status = 0;
4409 bool new_state = false;
4410 int lkflg; 5139 int lkflg;
4411 int err; 5140 int err;
5141 bool new = false;
4412 struct net *net = SVC_NET(rqstp); 5142 struct net *net = SVC_NET(rqstp);
4413 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 5143 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
4414 5144
@@ -4425,11 +5155,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4425 return status; 5155 return status;
4426 } 5156 }
4427 5157
4428 nfs4_lock_state();
4429
4430 if (lock->lk_is_new) { 5158 if (lock->lk_is_new) {
4431 struct nfs4_ol_stateid *open_stp = NULL;
4432
4433 if (nfsd4_has_session(cstate)) 5159 if (nfsd4_has_session(cstate))
4434 /* See rfc 5661 18.10.3: given clientid is ignored: */ 5160 /* See rfc 5661 18.10.3: given clientid is ignored: */
4435 memcpy(&lock->v.new.clientid, 5161 memcpy(&lock->v.new.clientid,
@@ -4453,12 +5179,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4453 &lock->v.new.clientid)) 5179 &lock->v.new.clientid))
4454 goto out; 5180 goto out;
4455 status = lookup_or_create_lock_state(cstate, open_stp, lock, 5181 status = lookup_or_create_lock_state(cstate, open_stp, lock,
4456 &lock_stp, &new_state); 5182 &lock_stp, &new);
4457 } else 5183 } else {
4458 status = nfs4_preprocess_seqid_op(cstate, 5184 status = nfs4_preprocess_seqid_op(cstate,
4459 lock->lk_old_lock_seqid, 5185 lock->lk_old_lock_seqid,
4460 &lock->lk_old_lock_stateid, 5186 &lock->lk_old_lock_stateid,
4461 NFS4_LOCK_STID, &lock_stp, nn); 5187 NFS4_LOCK_STID, &lock_stp, nn);
5188 }
4462 if (status) 5189 if (status)
4463 goto out; 5190 goto out;
4464 lock_sop = lockowner(lock_stp->st_stateowner); 5191 lock_sop = lockowner(lock_stp->st_stateowner);
@@ -4482,20 +5209,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4482 goto out; 5209 goto out;
4483 } 5210 }
4484 5211
5212 fp = lock_stp->st_stid.sc_file;
4485 locks_init_lock(file_lock); 5213 locks_init_lock(file_lock);
4486 switch (lock->lk_type) { 5214 switch (lock->lk_type) {
4487 case NFS4_READ_LT: 5215 case NFS4_READ_LT:
4488 case NFS4_READW_LT: 5216 case NFS4_READW_LT:
4489 filp = find_readable_file(lock_stp->st_file); 5217 spin_lock(&fp->fi_lock);
5218 filp = find_readable_file_locked(fp);
4490 if (filp) 5219 if (filp)
4491 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); 5220 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
5221 spin_unlock(&fp->fi_lock);
4492 file_lock->fl_type = F_RDLCK; 5222 file_lock->fl_type = F_RDLCK;
4493 break; 5223 break;
4494 case NFS4_WRITE_LT: 5224 case NFS4_WRITE_LT:
4495 case NFS4_WRITEW_LT: 5225 case NFS4_WRITEW_LT:
4496 filp = find_writeable_file(lock_stp->st_file); 5226 spin_lock(&fp->fi_lock);
5227 filp = find_writeable_file_locked(fp);
4497 if (filp) 5228 if (filp)
4498 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); 5229 get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
5230 spin_unlock(&fp->fi_lock);
4499 file_lock->fl_type = F_WRLCK; 5231 file_lock->fl_type = F_WRLCK;
4500 break; 5232 break;
4501 default: 5233 default:
@@ -4544,11 +5276,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4544 break; 5276 break;
4545 } 5277 }
4546out: 5278out:
4547 if (status && new_state) 5279 if (filp)
4548 release_lockowner(lock_sop); 5280 fput(filp);
5281 if (lock_stp) {
5282 /* Bump seqid manually if the 4.0 replay owner is openowner */
5283 if (cstate->replay_owner &&
5284 cstate->replay_owner != &lock_sop->lo_owner &&
5285 seqid_mutating_err(ntohl(status)))
5286 lock_sop->lo_owner.so_seqid++;
5287
5288 /*
5289 * If this is a new, never-before-used stateid, and we are
5290 * returning an error, then just go ahead and release it.
5291 */
5292 if (status && new)
5293 release_lock_stateid(lock_stp);
5294
5295 nfs4_put_stid(&lock_stp->st_stid);
5296 }
5297 if (open_stp)
5298 nfs4_put_stid(&open_stp->st_stid);
4549 nfsd4_bump_seqid(cstate, status); 5299 nfsd4_bump_seqid(cstate, status);
4550 if (!cstate->replay_owner)
4551 nfs4_unlock_state();
4552 if (file_lock) 5300 if (file_lock)
4553 locks_free_lock(file_lock); 5301 locks_free_lock(file_lock);
4554 if (conflock) 5302 if (conflock)
@@ -4580,9 +5328,8 @@ __be32
4580nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 5328nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4581 struct nfsd4_lockt *lockt) 5329 struct nfsd4_lockt *lockt)
4582{ 5330{
4583 struct inode *inode;
4584 struct file_lock *file_lock = NULL; 5331 struct file_lock *file_lock = NULL;
4585 struct nfs4_lockowner *lo; 5332 struct nfs4_lockowner *lo = NULL;
4586 __be32 status; 5333 __be32 status;
4587 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 5334 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4588 5335
@@ -4592,10 +5339,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4592 if (check_lock_length(lockt->lt_offset, lockt->lt_length)) 5339 if (check_lock_length(lockt->lt_offset, lockt->lt_length))
4593 return nfserr_inval; 5340 return nfserr_inval;
4594 5341
4595 nfs4_lock_state();
4596
4597 if (!nfsd4_has_session(cstate)) { 5342 if (!nfsd4_has_session(cstate)) {
4598 status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); 5343 status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
4599 if (status) 5344 if (status)
4600 goto out; 5345 goto out;
4601 } 5346 }
@@ -4603,7 +5348,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4603 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) 5348 if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
4604 goto out; 5349 goto out;
4605 5350
4606 inode = cstate->current_fh.fh_dentry->d_inode;
4607 file_lock = locks_alloc_lock(); 5351 file_lock = locks_alloc_lock();
4608 if (!file_lock) { 5352 if (!file_lock) {
4609 dprintk("NFSD: %s: unable to allocate lock!\n", __func__); 5353 dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
@@ -4626,7 +5370,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4626 goto out; 5370 goto out;
4627 } 5371 }
4628 5372
4629 lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); 5373 lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner,
5374 cstate->clp);
4630 if (lo) 5375 if (lo)
4631 file_lock->fl_owner = (fl_owner_t)lo; 5376 file_lock->fl_owner = (fl_owner_t)lo;
4632 file_lock->fl_pid = current->tgid; 5377 file_lock->fl_pid = current->tgid;
@@ -4646,7 +5391,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4646 nfs4_set_lock_denied(file_lock, &lockt->lt_denied); 5391 nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
4647 } 5392 }
4648out: 5393out:
4649 nfs4_unlock_state(); 5394 if (lo)
5395 nfs4_put_stateowner(&lo->lo_owner);
4650 if (file_lock) 5396 if (file_lock)
4651 locks_free_lock(file_lock); 5397 locks_free_lock(file_lock);
4652 return status; 5398 return status;
@@ -4670,23 +5416,21 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4670 if (check_lock_length(locku->lu_offset, locku->lu_length)) 5416 if (check_lock_length(locku->lu_offset, locku->lu_length))
4671 return nfserr_inval; 5417 return nfserr_inval;
4672 5418
4673 nfs4_lock_state();
4674
4675 status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, 5419 status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
4676 &locku->lu_stateid, NFS4_LOCK_STID, 5420 &locku->lu_stateid, NFS4_LOCK_STID,
4677 &stp, nn); 5421 &stp, nn);
4678 if (status) 5422 if (status)
4679 goto out; 5423 goto out;
4680 filp = find_any_file(stp->st_file); 5424 filp = find_any_file(stp->st_stid.sc_file);
4681 if (!filp) { 5425 if (!filp) {
4682 status = nfserr_lock_range; 5426 status = nfserr_lock_range;
4683 goto out; 5427 goto put_stateid;
4684 } 5428 }
4685 file_lock = locks_alloc_lock(); 5429 file_lock = locks_alloc_lock();
4686 if (!file_lock) { 5430 if (!file_lock) {
4687 dprintk("NFSD: %s: unable to allocate lock!\n", __func__); 5431 dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
4688 status = nfserr_jukebox; 5432 status = nfserr_jukebox;
4689 goto out; 5433 goto fput;
4690 } 5434 }
4691 locks_init_lock(file_lock); 5435 locks_init_lock(file_lock);
4692 file_lock->fl_type = F_UNLCK; 5436 file_lock->fl_type = F_UNLCK;
@@ -4708,41 +5452,51 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4708 } 5452 }
4709 update_stateid(&stp->st_stid.sc_stateid); 5453 update_stateid(&stp->st_stid.sc_stateid);
4710 memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 5454 memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4711 5455fput:
5456 fput(filp);
5457put_stateid:
5458 nfs4_put_stid(&stp->st_stid);
4712out: 5459out:
4713 nfsd4_bump_seqid(cstate, status); 5460 nfsd4_bump_seqid(cstate, status);
4714 if (!cstate->replay_owner)
4715 nfs4_unlock_state();
4716 if (file_lock) 5461 if (file_lock)
4717 locks_free_lock(file_lock); 5462 locks_free_lock(file_lock);
4718 return status; 5463 return status;
4719 5464
4720out_nfserr: 5465out_nfserr:
4721 status = nfserrno(err); 5466 status = nfserrno(err);
4722 goto out; 5467 goto fput;
4723} 5468}
4724 5469
4725/* 5470/*
4726 * returns 5471 * returns
4727 * 1: locks held by lockowner 5472 * true: locks held by lockowner
4728 * 0: no locks held by lockowner 5473 * false: no locks held by lockowner
4729 */ 5474 */
4730static int 5475static bool
4731check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) 5476check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
4732{ 5477{
4733 struct file_lock **flpp; 5478 struct file_lock **flpp;
4734 struct inode *inode = filp->fi_inode; 5479 int status = false;
4735 int status = 0; 5480 struct file *filp = find_any_file(fp);
5481 struct inode *inode;
5482
5483 if (!filp) {
5484 /* Any valid lock stateid should have some sort of access */
5485 WARN_ON_ONCE(1);
5486 return status;
5487 }
5488
5489 inode = file_inode(filp);
4736 5490
4737 spin_lock(&inode->i_lock); 5491 spin_lock(&inode->i_lock);
4738 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { 5492 for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
4739 if ((*flpp)->fl_owner == (fl_owner_t)lowner) { 5493 if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
4740 status = 1; 5494 status = true;
4741 goto out; 5495 break;
4742 } 5496 }
4743 } 5497 }
4744out:
4745 spin_unlock(&inode->i_lock); 5498 spin_unlock(&inode->i_lock);
5499 fput(filp);
4746 return status; 5500 return status;
4747} 5501}
4748 5502
@@ -4753,53 +5507,46 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
4753{ 5507{
4754 clientid_t *clid = &rlockowner->rl_clientid; 5508 clientid_t *clid = &rlockowner->rl_clientid;
4755 struct nfs4_stateowner *sop; 5509 struct nfs4_stateowner *sop;
4756 struct nfs4_lockowner *lo; 5510 struct nfs4_lockowner *lo = NULL;
4757 struct nfs4_ol_stateid *stp; 5511 struct nfs4_ol_stateid *stp;
4758 struct xdr_netobj *owner = &rlockowner->rl_owner; 5512 struct xdr_netobj *owner = &rlockowner->rl_owner;
4759 struct list_head matches; 5513 unsigned int hashval = ownerstr_hashval(owner);
4760 unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
4761 __be32 status; 5514 __be32 status;
4762 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); 5515 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5516 struct nfs4_client *clp;
4763 5517
4764 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", 5518 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
4765 clid->cl_boot, clid->cl_id); 5519 clid->cl_boot, clid->cl_id);
4766 5520
4767 nfs4_lock_state(); 5521 status = lookup_clientid(clid, cstate, nn);
4768
4769 status = lookup_clientid(clid, cstate->minorversion, nn, NULL);
4770 if (status) 5522 if (status)
4771 goto out; 5523 return status;
4772 5524
4773 status = nfserr_locks_held; 5525 clp = cstate->clp;
4774 INIT_LIST_HEAD(&matches); 5526 /* Find the matching lock stateowner */
5527 spin_lock(&clp->cl_lock);
5528 list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
5529 so_strhash) {
4775 5530
4776 list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { 5531 if (sop->so_is_open_owner || !same_owner_str(sop, owner))
4777 if (sop->so_is_open_owner)
4778 continue; 5532 continue;
4779 if (!same_owner_str(sop, owner, clid)) 5533
4780 continue; 5534 /* see if there are still any locks associated with it */
4781 list_for_each_entry(stp, &sop->so_stateids, 5535 lo = lockowner(sop);
4782 st_perstateowner) { 5536 list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
4783 lo = lockowner(sop); 5537 if (check_for_locks(stp->st_stid.sc_file, lo)) {
4784 if (check_for_locks(stp->st_file, lo)) 5538 status = nfserr_locks_held;
4785 goto out; 5539 spin_unlock(&clp->cl_lock);
4786 list_add(&lo->lo_list, &matches); 5540 return status;
5541 }
4787 } 5542 }
5543
5544 atomic_inc(&sop->so_count);
5545 break;
4788 } 5546 }
4789 /* Clients probably won't expect us to return with some (but not all) 5547 spin_unlock(&clp->cl_lock);
4790 * of the lockowner state released; so don't release any until all 5548 if (lo)
4791 * have been checked. */
4792 status = nfs_ok;
4793 while (!list_empty(&matches)) {
4794 lo = list_entry(matches.next, struct nfs4_lockowner,
4795 lo_list);
4796 /* unhash_stateowner deletes so_perclient only
4797 * for openowners. */
4798 list_del(&lo->lo_list);
4799 release_lockowner(lo); 5549 release_lockowner(lo);
4800 }
4801out:
4802 nfs4_unlock_state();
4803 return status; 5550 return status;
4804} 5551}
4805 5552
@@ -4887,34 +5634,123 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
4887* Called from OPEN. Look for clientid in reclaim list. 5634* Called from OPEN. Look for clientid in reclaim list.
4888*/ 5635*/
4889__be32 5636__be32
4890nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) 5637nfs4_check_open_reclaim(clientid_t *clid,
5638 struct nfsd4_compound_state *cstate,
5639 struct nfsd_net *nn)
4891{ 5640{
4892 struct nfs4_client *clp; 5641 __be32 status;
4893 5642
4894 /* find clientid in conf_id_hashtbl */ 5643 /* find clientid in conf_id_hashtbl */
4895 clp = find_confirmed_client(clid, sessions, nn); 5644 status = lookup_clientid(clid, cstate, nn);
4896 if (clp == NULL) 5645 if (status)
4897 return nfserr_reclaim_bad; 5646 return nfserr_reclaim_bad;
4898 5647
4899 return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; 5648 if (nfsd4_client_record_check(cstate->clp))
5649 return nfserr_reclaim_bad;
5650
5651 return nfs_ok;
4900} 5652}
4901 5653
4902#ifdef CONFIG_NFSD_FAULT_INJECTION 5654#ifdef CONFIG_NFSD_FAULT_INJECTION
5655static inline void
5656put_client(struct nfs4_client *clp)
5657{
5658 atomic_dec(&clp->cl_refcount);
5659}
4903 5660
4904u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) 5661static struct nfs4_client *
5662nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
4905{ 5663{
4906 if (mark_client_expired(clp)) 5664 struct nfs4_client *clp;
4907 return 0; 5665 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
4908 expire_client(clp); 5666 nfsd_net_id);
4909 return 1; 5667
5668 if (!nfsd_netns_ready(nn))
5669 return NULL;
5670
5671 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5672 if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
5673 return clp;
5674 }
5675 return NULL;
4910} 5676}
4911 5677
4912u64 nfsd_print_client(struct nfs4_client *clp, u64 num) 5678u64
5679nfsd_inject_print_clients(void)
4913{ 5680{
5681 struct nfs4_client *clp;
5682 u64 count = 0;
5683 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5684 nfsd_net_id);
4914 char buf[INET6_ADDRSTRLEN]; 5685 char buf[INET6_ADDRSTRLEN];
4915 rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); 5686
4916 printk(KERN_INFO "NFS Client: %s\n", buf); 5687 if (!nfsd_netns_ready(nn))
4917 return 1; 5688 return 0;
5689
5690 spin_lock(&nn->client_lock);
5691 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5692 rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
5693 pr_info("NFS Client: %s\n", buf);
5694 ++count;
5695 }
5696 spin_unlock(&nn->client_lock);
5697
5698 return count;
5699}
5700
5701u64
5702nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size)
5703{
5704 u64 count = 0;
5705 struct nfs4_client *clp;
5706 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5707 nfsd_net_id);
5708
5709 if (!nfsd_netns_ready(nn))
5710 return count;
5711
5712 spin_lock(&nn->client_lock);
5713 clp = nfsd_find_client(addr, addr_size);
5714 if (clp) {
5715 if (mark_client_expired_locked(clp) == nfs_ok)
5716 ++count;
5717 else
5718 clp = NULL;
5719 }
5720 spin_unlock(&nn->client_lock);
5721
5722 if (clp)
5723 expire_client(clp);
5724
5725 return count;
5726}
5727
5728u64
5729nfsd_inject_forget_clients(u64 max)
5730{
5731 u64 count = 0;
5732 struct nfs4_client *clp, *next;
5733 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5734 nfsd_net_id);
5735 LIST_HEAD(reaplist);
5736
5737 if (!nfsd_netns_ready(nn))
5738 return count;
5739
5740 spin_lock(&nn->client_lock);
5741 list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
5742 if (mark_client_expired_locked(clp) == nfs_ok) {
5743 list_add(&clp->cl_lru, &reaplist);
5744 if (max != 0 && ++count >= max)
5745 break;
5746 }
5747 }
5748 spin_unlock(&nn->client_lock);
5749
5750 list_for_each_entry_safe(clp, next, &reaplist, cl_lru)
5751 expire_client(clp);
5752
5753 return count;
4918} 5754}
4919 5755
4920static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, 5756static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
@@ -4925,158 +5761,484 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
4925 printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); 5761 printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
4926} 5762}
4927 5763
4928static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) 5764static void
5765nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
5766 struct list_head *collect)
5767{
5768 struct nfs4_client *clp = lst->st_stid.sc_client;
5769 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5770 nfsd_net_id);
5771
5772 if (!collect)
5773 return;
5774
5775 lockdep_assert_held(&nn->client_lock);
5776 atomic_inc(&clp->cl_refcount);
5777 list_add(&lst->st_locks, collect);
5778}
5779
5780static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
5781 struct list_head *collect,
5782 void (*func)(struct nfs4_ol_stateid *))
4929{ 5783{
4930 struct nfs4_openowner *oop; 5784 struct nfs4_openowner *oop;
4931 struct nfs4_lockowner *lop, *lo_next;
4932 struct nfs4_ol_stateid *stp, *st_next; 5785 struct nfs4_ol_stateid *stp, *st_next;
5786 struct nfs4_ol_stateid *lst, *lst_next;
4933 u64 count = 0; 5787 u64 count = 0;
4934 5788
5789 spin_lock(&clp->cl_lock);
4935 list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { 5790 list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
4936 list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { 5791 list_for_each_entry_safe(stp, st_next,
4937 list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { 5792 &oop->oo_owner.so_stateids, st_perstateowner) {
4938 if (func) 5793 list_for_each_entry_safe(lst, lst_next,
4939 func(lop); 5794 &stp->st_locks, st_locks) {
4940 if (++count == max) 5795 if (func) {
4941 return count; 5796 func(lst);
5797 nfsd_inject_add_lock_to_list(lst,
5798 collect);
5799 }
5800 ++count;
5801 /*
5802 * Despite the fact that these functions deal
5803 * with 64-bit integers for "count", we must
5804 * ensure that it doesn't blow up the
5805 * clp->cl_refcount. Throw a warning if we
5806 * start to approach INT_MAX here.
5807 */
5808 WARN_ON_ONCE(count == (INT_MAX / 2));
5809 if (count == max)
5810 goto out;
4942 } 5811 }
4943 } 5812 }
4944 } 5813 }
5814out:
5815 spin_unlock(&clp->cl_lock);
4945 5816
4946 return count; 5817 return count;
4947} 5818}
4948 5819
4949u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) 5820static u64
5821nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect,
5822 u64 max)
4950{ 5823{
4951 return nfsd_foreach_client_lock(clp, max, release_lockowner); 5824 return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid);
4952} 5825}
4953 5826
4954u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) 5827static u64
5828nfsd_print_client_locks(struct nfs4_client *clp)
4955{ 5829{
4956 u64 count = nfsd_foreach_client_lock(clp, max, NULL); 5830 u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL);
4957 nfsd_print_count(clp, count, "locked files"); 5831 nfsd_print_count(clp, count, "locked files");
4958 return count; 5832 return count;
4959} 5833}
4960 5834
4961static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) 5835u64
5836nfsd_inject_print_locks(void)
5837{
5838 struct nfs4_client *clp;
5839 u64 count = 0;
5840 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5841 nfsd_net_id);
5842
5843 if (!nfsd_netns_ready(nn))
5844 return 0;
5845
5846 spin_lock(&nn->client_lock);
5847 list_for_each_entry(clp, &nn->client_lru, cl_lru)
5848 count += nfsd_print_client_locks(clp);
5849 spin_unlock(&nn->client_lock);
5850
5851 return count;
5852}
5853
5854static void
5855nfsd_reap_locks(struct list_head *reaplist)
5856{
5857 struct nfs4_client *clp;
5858 struct nfs4_ol_stateid *stp, *next;
5859
5860 list_for_each_entry_safe(stp, next, reaplist, st_locks) {
5861 list_del_init(&stp->st_locks);
5862 clp = stp->st_stid.sc_client;
5863 nfs4_put_stid(&stp->st_stid);
5864 put_client(clp);
5865 }
5866}
5867
5868u64
5869nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size)
5870{
5871 unsigned int count = 0;
5872 struct nfs4_client *clp;
5873 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5874 nfsd_net_id);
5875 LIST_HEAD(reaplist);
5876
5877 if (!nfsd_netns_ready(nn))
5878 return count;
5879
5880 spin_lock(&nn->client_lock);
5881 clp = nfsd_find_client(addr, addr_size);
5882 if (clp)
5883 count = nfsd_collect_client_locks(clp, &reaplist, 0);
5884 spin_unlock(&nn->client_lock);
5885 nfsd_reap_locks(&reaplist);
5886 return count;
5887}
5888
5889u64
5890nfsd_inject_forget_locks(u64 max)
5891{
5892 u64 count = 0;
5893 struct nfs4_client *clp;
5894 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5895 nfsd_net_id);
5896 LIST_HEAD(reaplist);
5897
5898 if (!nfsd_netns_ready(nn))
5899 return count;
5900
5901 spin_lock(&nn->client_lock);
5902 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5903 count += nfsd_collect_client_locks(clp, &reaplist, max - count);
5904 if (max != 0 && count >= max)
5905 break;
5906 }
5907 spin_unlock(&nn->client_lock);
5908 nfsd_reap_locks(&reaplist);
5909 return count;
5910}
5911
5912static u64
5913nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
5914 struct list_head *collect,
5915 void (*func)(struct nfs4_openowner *))
4962{ 5916{
4963 struct nfs4_openowner *oop, *next; 5917 struct nfs4_openowner *oop, *next;
5918 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5919 nfsd_net_id);
4964 u64 count = 0; 5920 u64 count = 0;
4965 5921
5922 lockdep_assert_held(&nn->client_lock);
5923
5924 spin_lock(&clp->cl_lock);
4966 list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { 5925 list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
4967 if (func) 5926 if (func) {
4968 func(oop); 5927 func(oop);
4969 if (++count == max) 5928 if (collect) {
5929 atomic_inc(&clp->cl_refcount);
5930 list_add(&oop->oo_perclient, collect);
5931 }
5932 }
5933 ++count;
5934 /*
5935 * Despite the fact that these functions deal with
5936 * 64-bit integers for "count", we must ensure that
5937 * it doesn't blow up the clp->cl_refcount. Throw a
5938 * warning if we start to approach INT_MAX here.
5939 */
5940 WARN_ON_ONCE(count == (INT_MAX / 2));
5941 if (count == max)
4970 break; 5942 break;
4971 } 5943 }
5944 spin_unlock(&clp->cl_lock);
4972 5945
4973 return count; 5946 return count;
4974} 5947}
4975 5948
4976u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) 5949static u64
5950nfsd_print_client_openowners(struct nfs4_client *clp)
4977{ 5951{
4978 return nfsd_foreach_client_open(clp, max, release_openowner); 5952 u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL);
5953
5954 nfsd_print_count(clp, count, "openowners");
5955 return count;
4979} 5956}
4980 5957
4981u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) 5958static u64
5959nfsd_collect_client_openowners(struct nfs4_client *clp,
5960 struct list_head *collect, u64 max)
4982{ 5961{
4983 u64 count = nfsd_foreach_client_open(clp, max, NULL); 5962 return nfsd_foreach_client_openowner(clp, max, collect,
4984 nfsd_print_count(clp, count, "open files"); 5963 unhash_openowner_locked);
4985 return count;
4986} 5964}
4987 5965
4988static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, 5966u64
4989 struct list_head *victims) 5967nfsd_inject_print_openowners(void)
4990{ 5968{
4991 struct nfs4_delegation *dp, *next; 5969 struct nfs4_client *clp;
4992 u64 count = 0; 5970 u64 count = 0;
5971 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
5972 nfsd_net_id);
5973
5974 if (!nfsd_netns_ready(nn))
5975 return 0;
5976
5977 spin_lock(&nn->client_lock);
5978 list_for_each_entry(clp, &nn->client_lru, cl_lru)
5979 count += nfsd_print_client_openowners(clp);
5980 spin_unlock(&nn->client_lock);
4993 5981
4994 lockdep_assert_held(&state_lock);
4995 list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
4996 if (victims)
4997 list_move(&dp->dl_recall_lru, victims);
4998 if (++count == max)
4999 break;
5000 }
5001 return count; 5982 return count;
5002} 5983}
5003 5984
5004u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) 5985static void
5986nfsd_reap_openowners(struct list_head *reaplist)
5005{ 5987{
5006 struct nfs4_delegation *dp, *next; 5988 struct nfs4_client *clp;
5007 LIST_HEAD(victims); 5989 struct nfs4_openowner *oop, *next;
5008 u64 count;
5009 5990
5010 spin_lock(&state_lock); 5991 list_for_each_entry_safe(oop, next, reaplist, oo_perclient) {
5011 count = nfsd_find_all_delegations(clp, max, &victims); 5992 list_del_init(&oop->oo_perclient);
5012 spin_unlock(&state_lock); 5993 clp = oop->oo_owner.so_client;
5994 release_openowner(oop);
5995 put_client(clp);
5996 }
5997}
5013 5998
5014 list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) 5999u64
5015 revoke_delegation(dp); 6000nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr,
6001 size_t addr_size)
6002{
6003 unsigned int count = 0;
6004 struct nfs4_client *clp;
6005 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6006 nfsd_net_id);
6007 LIST_HEAD(reaplist);
5016 6008
6009 if (!nfsd_netns_ready(nn))
6010 return count;
6011
6012 spin_lock(&nn->client_lock);
6013 clp = nfsd_find_client(addr, addr_size);
6014 if (clp)
6015 count = nfsd_collect_client_openowners(clp, &reaplist, 0);
6016 spin_unlock(&nn->client_lock);
6017 nfsd_reap_openowners(&reaplist);
5017 return count; 6018 return count;
5018} 6019}
5019 6020
5020u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) 6021u64
6022nfsd_inject_forget_openowners(u64 max)
5021{ 6023{
5022 struct nfs4_delegation *dp, *next; 6024 u64 count = 0;
5023 LIST_HEAD(victims); 6025 struct nfs4_client *clp;
5024 u64 count; 6026 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6027 nfsd_net_id);
6028 LIST_HEAD(reaplist);
5025 6029
5026 spin_lock(&state_lock); 6030 if (!nfsd_netns_ready(nn))
5027 count = nfsd_find_all_delegations(clp, max, &victims); 6031 return count;
5028 list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
5029 nfsd_break_one_deleg(dp);
5030 spin_unlock(&state_lock);
5031 6032
6033 spin_lock(&nn->client_lock);
6034 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
6035 count += nfsd_collect_client_openowners(clp, &reaplist,
6036 max - count);
6037 if (max != 0 && count >= max)
6038 break;
6039 }
6040 spin_unlock(&nn->client_lock);
6041 nfsd_reap_openowners(&reaplist);
5032 return count; 6042 return count;
5033} 6043}
5034 6044
5035u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) 6045static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
6046 struct list_head *victims)
5036{ 6047{
6048 struct nfs4_delegation *dp, *next;
6049 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6050 nfsd_net_id);
5037 u64 count = 0; 6051 u64 count = 0;
5038 6052
6053 lockdep_assert_held(&nn->client_lock);
6054
5039 spin_lock(&state_lock); 6055 spin_lock(&state_lock);
5040 count = nfsd_find_all_delegations(clp, max, NULL); 6056 list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
6057 if (victims) {
6058 /*
6059 * It's not safe to mess with delegations that have a
6060 * non-zero dl_time. They might have already been broken
6061 * and could be processed by the laundromat outside of
6062 * the state_lock. Just leave them be.
6063 */
6064 if (dp->dl_time != 0)
6065 continue;
6066
6067 atomic_inc(&clp->cl_refcount);
6068 unhash_delegation_locked(dp);
6069 list_add(&dp->dl_recall_lru, victims);
6070 }
6071 ++count;
6072 /*
6073 * Despite the fact that these functions deal with
6074 * 64-bit integers for "count", we must ensure that
6075 * it doesn't blow up the clp->cl_refcount. Throw a
6076 * warning if we start to approach INT_MAX here.
6077 */
6078 WARN_ON_ONCE(count == (INT_MAX / 2));
6079 if (count == max)
6080 break;
6081 }
5041 spin_unlock(&state_lock); 6082 spin_unlock(&state_lock);
6083 return count;
6084}
6085
6086static u64
6087nfsd_print_client_delegations(struct nfs4_client *clp)
6088{
6089 u64 count = nfsd_find_all_delegations(clp, 0, NULL);
5042 6090
5043 nfsd_print_count(clp, count, "delegations"); 6091 nfsd_print_count(clp, count, "delegations");
5044 return count; 6092 return count;
5045} 6093}
5046 6094
5047u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) 6095u64
6096nfsd_inject_print_delegations(void)
5048{ 6097{
5049 struct nfs4_client *clp, *next; 6098 struct nfs4_client *clp;
5050 u64 count = 0; 6099 u64 count = 0;
5051 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); 6100 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6101 nfsd_net_id);
5052 6102
5053 if (!nfsd_netns_ready(nn)) 6103 if (!nfsd_netns_ready(nn))
5054 return 0; 6104 return 0;
5055 6105
5056 list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { 6106 spin_lock(&nn->client_lock);
5057 count += func(clp, max - count); 6107 list_for_each_entry(clp, &nn->client_lru, cl_lru)
5058 if ((max != 0) && (count >= max)) 6108 count += nfsd_print_client_delegations(clp);
5059 break; 6109 spin_unlock(&nn->client_lock);
6110
6111 return count;
6112}
6113
6114static void
6115nfsd_forget_delegations(struct list_head *reaplist)
6116{
6117 struct nfs4_client *clp;
6118 struct nfs4_delegation *dp, *next;
6119
6120 list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6121 list_del_init(&dp->dl_recall_lru);
6122 clp = dp->dl_stid.sc_client;
6123 revoke_delegation(dp);
6124 put_client(clp);
5060 } 6125 }
6126}
5061 6127
6128u64
6129nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr,
6130 size_t addr_size)
6131{
6132 u64 count = 0;
6133 struct nfs4_client *clp;
6134 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6135 nfsd_net_id);
6136 LIST_HEAD(reaplist);
6137
6138 if (!nfsd_netns_ready(nn))
6139 return count;
6140
6141 spin_lock(&nn->client_lock);
6142 clp = nfsd_find_client(addr, addr_size);
6143 if (clp)
6144 count = nfsd_find_all_delegations(clp, 0, &reaplist);
6145 spin_unlock(&nn->client_lock);
6146
6147 nfsd_forget_delegations(&reaplist);
5062 return count; 6148 return count;
5063} 6149}
5064 6150
5065struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) 6151u64
6152nfsd_inject_forget_delegations(u64 max)
5066{ 6153{
6154 u64 count = 0;
5067 struct nfs4_client *clp; 6155 struct nfs4_client *clp;
5068 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); 6156 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6157 nfsd_net_id);
6158 LIST_HEAD(reaplist);
5069 6159
5070 if (!nfsd_netns_ready(nn)) 6160 if (!nfsd_netns_ready(nn))
5071 return NULL; 6161 return count;
5072 6162
6163 spin_lock(&nn->client_lock);
5073 list_for_each_entry(clp, &nn->client_lru, cl_lru) { 6164 list_for_each_entry(clp, &nn->client_lru, cl_lru) {
5074 if (memcmp(&clp->cl_addr, addr, addr_size) == 0) 6165 count += nfsd_find_all_delegations(clp, max - count, &reaplist);
5075 return clp; 6166 if (max != 0 && count >= max)
6167 break;
5076 } 6168 }
5077 return NULL; 6169 spin_unlock(&nn->client_lock);
6170 nfsd_forget_delegations(&reaplist);
6171 return count;
5078} 6172}
5079 6173
6174static void
6175nfsd_recall_delegations(struct list_head *reaplist)
6176{
6177 struct nfs4_client *clp;
6178 struct nfs4_delegation *dp, *next;
6179
6180 list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6181 list_del_init(&dp->dl_recall_lru);
6182 clp = dp->dl_stid.sc_client;
6183 /*
6184 * We skipped all entries that had a zero dl_time before,
6185 * so we can now reset the dl_time back to 0. If a delegation
6186 * break comes in now, then it won't make any difference since
6187 * we're recalling it either way.
6188 */
6189 spin_lock(&state_lock);
6190 dp->dl_time = 0;
6191 spin_unlock(&state_lock);
6192 nfsd_break_one_deleg(dp);
6193 put_client(clp);
6194 }
6195}
6196
6197u64
6198nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr,
6199 size_t addr_size)
6200{
6201 u64 count = 0;
6202 struct nfs4_client *clp;
6203 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6204 nfsd_net_id);
6205 LIST_HEAD(reaplist);
6206
6207 if (!nfsd_netns_ready(nn))
6208 return count;
6209
6210 spin_lock(&nn->client_lock);
6211 clp = nfsd_find_client(addr, addr_size);
6212 if (clp)
6213 count = nfsd_find_all_delegations(clp, 0, &reaplist);
6214 spin_unlock(&nn->client_lock);
6215
6216 nfsd_recall_delegations(&reaplist);
6217 return count;
6218}
6219
6220u64
6221nfsd_inject_recall_delegations(u64 max)
6222{
6223 u64 count = 0;
6224 struct nfs4_client *clp, *next;
6225 struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
6226 nfsd_net_id);
6227 LIST_HEAD(reaplist);
6228
6229 if (!nfsd_netns_ready(nn))
6230 return count;
6231
6232 spin_lock(&nn->client_lock);
6233 list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
6234 count += nfsd_find_all_delegations(clp, max - count, &reaplist);
6235 if (max != 0 && ++count >= max)
6236 break;
6237 }
6238 spin_unlock(&nn->client_lock);
6239 nfsd_recall_delegations(&reaplist);
6240 return count;
6241}
5080#endif /* CONFIG_NFSD_FAULT_INJECTION */ 6242#endif /* CONFIG_NFSD_FAULT_INJECTION */
5081 6243
5082/* 6244/*
@@ -5113,14 +6275,6 @@ static int nfs4_state_create_net(struct net *net)
5113 CLIENT_HASH_SIZE, GFP_KERNEL); 6275 CLIENT_HASH_SIZE, GFP_KERNEL);
5114 if (!nn->unconf_id_hashtbl) 6276 if (!nn->unconf_id_hashtbl)
5115 goto err_unconf_id; 6277 goto err_unconf_id;
5116 nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
5117 OWNER_HASH_SIZE, GFP_KERNEL);
5118 if (!nn->ownerstr_hashtbl)
5119 goto err_ownerstr;
5120 nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) *
5121 LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL);
5122 if (!nn->lockowner_ino_hashtbl)
5123 goto err_lockowner_ino;
5124 nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * 6278 nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
5125 SESSION_HASH_SIZE, GFP_KERNEL); 6279 SESSION_HASH_SIZE, GFP_KERNEL);
5126 if (!nn->sessionid_hashtbl) 6280 if (!nn->sessionid_hashtbl)
@@ -5130,10 +6284,6 @@ static int nfs4_state_create_net(struct net *net)
5130 INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); 6284 INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
5131 INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); 6285 INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
5132 } 6286 }
5133 for (i = 0; i < OWNER_HASH_SIZE; i++)
5134 INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
5135 for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
5136 INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]);
5137 for (i = 0; i < SESSION_HASH_SIZE; i++) 6287 for (i = 0; i < SESSION_HASH_SIZE; i++)
5138 INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); 6288 INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
5139 nn->conf_name_tree = RB_ROOT; 6289 nn->conf_name_tree = RB_ROOT;
@@ -5149,10 +6299,6 @@ static int nfs4_state_create_net(struct net *net)
5149 return 0; 6299 return 0;
5150 6300
5151err_sessionid: 6301err_sessionid:
5152 kfree(nn->lockowner_ino_hashtbl);
5153err_lockowner_ino:
5154 kfree(nn->ownerstr_hashtbl);
5155err_ownerstr:
5156 kfree(nn->unconf_id_hashtbl); 6302 kfree(nn->unconf_id_hashtbl);
5157err_unconf_id: 6303err_unconf_id:
5158 kfree(nn->conf_id_hashtbl); 6304 kfree(nn->conf_id_hashtbl);
@@ -5182,8 +6328,6 @@ nfs4_state_destroy_net(struct net *net)
5182 } 6328 }
5183 6329
5184 kfree(nn->sessionid_hashtbl); 6330 kfree(nn->sessionid_hashtbl);
5185 kfree(nn->lockowner_ino_hashtbl);
5186 kfree(nn->ownerstr_hashtbl);
5187 kfree(nn->unconf_id_hashtbl); 6331 kfree(nn->unconf_id_hashtbl);
5188 kfree(nn->conf_id_hashtbl); 6332 kfree(nn->conf_id_hashtbl);
5189 put_net(net); 6333 put_net(net);
@@ -5247,22 +6391,22 @@ nfs4_state_shutdown_net(struct net *net)
5247 cancel_delayed_work_sync(&nn->laundromat_work); 6391 cancel_delayed_work_sync(&nn->laundromat_work);
5248 locks_end_grace(&nn->nfsd4_manager); 6392 locks_end_grace(&nn->nfsd4_manager);
5249 6393
5250 nfs4_lock_state();
5251 INIT_LIST_HEAD(&reaplist); 6394 INIT_LIST_HEAD(&reaplist);
5252 spin_lock(&state_lock); 6395 spin_lock(&state_lock);
5253 list_for_each_safe(pos, next, &nn->del_recall_lru) { 6396 list_for_each_safe(pos, next, &nn->del_recall_lru) {
5254 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6397 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
5255 list_move(&dp->dl_recall_lru, &reaplist); 6398 unhash_delegation_locked(dp);
6399 list_add(&dp->dl_recall_lru, &reaplist);
5256 } 6400 }
5257 spin_unlock(&state_lock); 6401 spin_unlock(&state_lock);
5258 list_for_each_safe(pos, next, &reaplist) { 6402 list_for_each_safe(pos, next, &reaplist) {
5259 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 6403 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
5260 destroy_delegation(dp); 6404 list_del_init(&dp->dl_recall_lru);
6405 nfs4_put_stid(&dp->dl_stid);
5261 } 6406 }
5262 6407
5263 nfsd4_client_tracking_exit(net); 6408 nfsd4_client_tracking_exit(net);
5264 nfs4_state_destroy_net(net); 6409 nfs4_state_destroy_net(net);
5265 nfs4_unlock_state();
5266} 6410}
5267 6411
5268void 6412void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 944275c8f56d..f9821ce6658a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -181,28 +181,43 @@ static int zero_clientid(clientid_t *clid)
181} 181}
182 182
183/** 183/**
184 * defer_free - mark an allocation as deferred freed 184 * svcxdr_tmpalloc - allocate memory to be freed after compound processing
185 * @argp: NFSv4 compound argument structure to be freed with 185 * @argp: NFSv4 compound argument structure
186 * @release: release callback to free @p, typically kfree() 186 * @p: pointer to be freed (with kfree())
187 * @p: pointer to be freed
188 * 187 *
189 * Marks @p to be freed when processing the compound operation 188 * Marks @p to be freed when processing the compound operation
190 * described in @argp finishes. 189 * described in @argp finishes.
191 */ 190 */
192static int 191static void *
193defer_free(struct nfsd4_compoundargs *argp, 192svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
194 void (*release)(const void *), void *p)
195{ 193{
196 struct tmpbuf *tb; 194 struct svcxdr_tmpbuf *tb;
197 195
198 tb = kmalloc(sizeof(*tb), GFP_KERNEL); 196 tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL);
199 if (!tb) 197 if (!tb)
200 return -ENOMEM; 198 return NULL;
201 tb->buf = p;
202 tb->release = release;
203 tb->next = argp->to_free; 199 tb->next = argp->to_free;
204 argp->to_free = tb; 200 argp->to_free = tb;
205 return 0; 201 return tb->buf;
202}
203
204/*
205 * For xdr strings that need to be passed to other kernel api's
206 * as null-terminated strings.
207 *
208 * Note null-terminating in place usually isn't safe since the
209 * buffer might end on a page boundary.
210 */
211static char *
212svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
213{
214 char *p = svcxdr_tmpalloc(argp, len + 1);
215
216 if (!p)
217 return NULL;
218 memcpy(p, buf, len);
219 p[len] = '\0';
220 return p;
206} 221}
207 222
208/** 223/**
@@ -217,19 +232,13 @@ defer_free(struct nfsd4_compoundargs *argp,
217 */ 232 */
218static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) 233static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
219{ 234{
220 if (p == argp->tmp) { 235 void *ret;
221 p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); 236
222 if (!p) 237 ret = svcxdr_tmpalloc(argp, nbytes);
223 return NULL; 238 if (!ret)
224 } else {
225 BUG_ON(p != argp->tmpp);
226 argp->tmpp = NULL;
227 }
228 if (defer_free(argp, kfree, p)) {
229 kfree(p);
230 return NULL; 239 return NULL;
231 } else 240 memcpy(ret, p, nbytes);
232 return (char *)p; 241 return ret;
233} 242}
234 243
235static __be32 244static __be32
@@ -292,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
292 if (nace > NFS4_ACL_MAX) 301 if (nace > NFS4_ACL_MAX)
293 return nfserr_fbig; 302 return nfserr_fbig;
294 303
295 *acl = nfs4_acl_new(nace); 304 *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
296 if (*acl == NULL) 305 if (*acl == NULL)
297 return nfserr_jukebox; 306 return nfserr_jukebox;
298 307
299 defer_free(argp, kfree, *acl);
300
301 (*acl)->naces = nace; 308 (*acl)->naces = nace;
302 for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { 309 for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
303 READ_BUF(16); len += 16; 310 READ_BUF(16); len += 16;
@@ -418,12 +425,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
418 return nfserr_badlabel; 425 return nfserr_badlabel;
419 len += (XDR_QUADLEN(dummy32) << 2); 426 len += (XDR_QUADLEN(dummy32) << 2);
420 READMEM(buf, dummy32); 427 READMEM(buf, dummy32);
421 label->data = kzalloc(dummy32 + 1, GFP_KERNEL); 428 label->len = dummy32;
429 label->data = svcxdr_dupstr(argp, buf, dummy32);
422 if (!label->data) 430 if (!label->data)
423 return nfserr_jukebox; 431 return nfserr_jukebox;
424 label->len = dummy32;
425 defer_free(argp, kfree, label->data);
426 memcpy(label->data, buf, dummy32);
427 } 432 }
428#endif 433#endif
429 434
@@ -598,20 +603,11 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
598 switch (create->cr_type) { 603 switch (create->cr_type) {
599 case NF4LNK: 604 case NF4LNK:
600 READ_BUF(4); 605 READ_BUF(4);
601 create->cr_linklen = be32_to_cpup(p++); 606 create->cr_datalen = be32_to_cpup(p++);
602 READ_BUF(create->cr_linklen); 607 READ_BUF(create->cr_datalen);
603 /* 608 create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
604 * The VFS will want a null-terminated string, and 609 if (!create->cr_data)
605 * null-terminating in place isn't safe since this might
606 * end on a page boundary:
607 */
608 create->cr_linkname =
609 kmalloc(create->cr_linklen + 1, GFP_KERNEL);
610 if (!create->cr_linkname)
611 return nfserr_jukebox; 610 return nfserr_jukebox;
612 memcpy(create->cr_linkname, p, create->cr_linklen);
613 create->cr_linkname[create->cr_linklen] = '\0';
614 defer_free(argp, kfree, create->cr_linkname);
615 break; 611 break;
616 case NF4BLK: 612 case NF4BLK:
617 case NF4CHR: 613 case NF4CHR:
@@ -1481,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
1481 INIT_LIST_HEAD(&test_stateid->ts_stateid_list); 1477 INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
1482 1478
1483 for (i = 0; i < test_stateid->ts_num_ids; i++) { 1479 for (i = 0; i < test_stateid->ts_num_ids; i++) {
1484 stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); 1480 stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
1485 if (!stateid) { 1481 if (!stateid) {
1486 status = nfserrno(-ENOMEM); 1482 status = nfserrno(-ENOMEM);
1487 goto out; 1483 goto out;
1488 } 1484 }
1489 1485
1490 defer_free(argp, kfree, stateid);
1491 INIT_LIST_HEAD(&stateid->ts_id_list); 1486 INIT_LIST_HEAD(&stateid->ts_id_list);
1492 list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); 1487 list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
1493 1488
@@ -1640,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1640 goto xdr_error; 1635 goto xdr_error;
1641 1636
1642 if (argp->opcnt > ARRAY_SIZE(argp->iops)) { 1637 if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
1643 argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); 1638 argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
1644 if (!argp->ops) { 1639 if (!argp->ops) {
1645 argp->ops = argp->iops; 1640 argp->ops = argp->iops;
1646 dprintk("nfsd: couldn't allocate room for COMPOUND\n"); 1641 dprintk("nfsd: couldn't allocate room for COMPOUND\n");
@@ -3077,11 +3072,8 @@ static __be32 nfsd4_encode_splice_read(
3077 __be32 nfserr; 3072 __be32 nfserr;
3078 __be32 *p = xdr->p - 2; 3073 __be32 *p = xdr->p - 2;
3079 3074
3080 /* 3075 /* Make sure there will be room for padding if needed */
3081 * Don't inline pages unless we know there's room for eof, 3076 if (xdr->end - xdr->p < 1)
3082 * count, and possible padding:
3083 */
3084 if (xdr->end - xdr->p < 3)
3085 return nfserr_resource; 3077 return nfserr_resource;
3086 3078
3087 nfserr = nfsd_splice_read(read->rd_rqstp, file, 3079 nfserr = nfsd_splice_read(read->rd_rqstp, file,
@@ -3147,9 +3139,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
3147 len = maxcount; 3139 len = maxcount;
3148 v = 0; 3140 v = 0;
3149 3141
3150 thislen = (void *)xdr->end - (void *)xdr->p; 3142 thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
3151 if (len < thislen)
3152 thislen = len;
3153 p = xdr_reserve_space(xdr, (thislen+3)&~3); 3143 p = xdr_reserve_space(xdr, (thislen+3)&~3);
3154 WARN_ON_ONCE(!p); 3144 WARN_ON_ONCE(!p);
3155 resp->rqstp->rq_vec[v].iov_base = p; 3145 resp->rqstp->rq_vec[v].iov_base = p;
@@ -3216,10 +3206,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
3216 xdr_commit_encode(xdr); 3206 xdr_commit_encode(xdr);
3217 3207
3218 maxcount = svc_max_payload(resp->rqstp); 3208 maxcount = svc_max_payload(resp->rqstp);
3219 if (maxcount > xdr->buf->buflen - xdr->buf->len) 3209 maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
3220 maxcount = xdr->buf->buflen - xdr->buf->len; 3210 maxcount = min_t(unsigned long, maxcount, read->rd_length);
3221 if (maxcount > read->rd_length)
3222 maxcount = read->rd_length;
3223 3211
3224 if (!read->rd_filp) { 3212 if (!read->rd_filp) {
3225 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, 3213 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
@@ -3937,8 +3925,6 @@ status:
3937 * 3925 *
3938 * XDR note: do not encode rp->rp_buflen: the buffer contains the 3926 * XDR note: do not encode rp->rp_buflen: the buffer contains the
3939 * previously sent already encoded operation. 3927 * previously sent already encoded operation.
3940 *
3941 * called with nfs4_lock_state() held
3942 */ 3928 */
3943void 3929void
3944nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) 3930nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
@@ -3977,9 +3963,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
3977 kfree(args->tmpp); 3963 kfree(args->tmpp);
3978 args->tmpp = NULL; 3964 args->tmpp = NULL;
3979 while (args->to_free) { 3965 while (args->to_free) {
3980 struct tmpbuf *tb = args->to_free; 3966 struct svcxdr_tmpbuf *tb = args->to_free;
3981 args->to_free = tb->next; 3967 args->to_free = tb->next;
3982 tb->release(tb->buf);
3983 kfree(tb); 3968 kfree(tb);
3984 } 3969 }
3985 return 1; 3970 return 1;
@@ -4012,7 +3997,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
4012 /* 3997 /*
4013 * All that remains is to write the tag and operation count... 3998 * All that remains is to write the tag and operation count...
4014 */ 3999 */
4015 struct nfsd4_compound_state *cs = &resp->cstate;
4016 struct xdr_buf *buf = resp->xdr.buf; 4000 struct xdr_buf *buf = resp->xdr.buf;
4017 4001
4018 WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + 4002 WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
@@ -4026,19 +4010,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
4026 p += XDR_QUADLEN(resp->taglen); 4010 p += XDR_QUADLEN(resp->taglen);
4027 *p++ = htonl(resp->opcnt); 4011 *p++ = htonl(resp->opcnt);
4028 4012
4029 if (nfsd4_has_session(cs)) { 4013 nfsd4_sequence_done(resp);
4030 struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
4031 struct nfs4_client *clp = cs->session->se_client;
4032 if (cs->status != nfserr_replay_cache) {
4033 nfsd4_store_cache_entry(resp);
4034 cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
4035 }
4036 /* Renew the clientid on success and on replay */
4037 spin_lock(&nn->client_lock);
4038 nfsd4_put_session(cs->session);
4039 spin_unlock(&nn->client_lock);
4040 put_client_renew(clp);
4041 }
4042 return 1; 4014 return 1;
4043} 4015}
4044 4016
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6040da8830ff..ff9567633245 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -221,7 +221,12 @@ static void
221hash_refile(struct svc_cacherep *rp) 221hash_refile(struct svc_cacherep *rp)
222{ 222{
223 hlist_del_init(&rp->c_hash); 223 hlist_del_init(&rp->c_hash);
224 hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); 224 /*
225 * No point in byte swapping c_xid since we're just using it to pick
226 * a hash bucket.
227 */
228 hlist_add_head(&rp->c_hash, cache_hash +
229 hash_32((__force u32)rp->c_xid, maskbits));
225} 230}
226 231
227/* 232/*
@@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
356 struct hlist_head *rh; 361 struct hlist_head *rh;
357 unsigned int entries = 0; 362 unsigned int entries = 0;
358 363
359 rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; 364 /*
365 * No point in byte swapping rq_xid since we're just using it to pick
366 * a hash bucket.
367 */
368 rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
360 hlist_for_each_entry(rp, rh, c_hash) { 369 hlist_for_each_entry(rp, rh, c_hash) {
361 ++entries; 370 ++entries;
362 if (nfsd_cache_match(rqstp, csum, rp)) { 371 if (nfsd_cache_match(rqstp, csum, rp)) {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51844048937f..4e042105fb6e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -39,6 +39,7 @@ enum {
39 NFSD_Versions, 39 NFSD_Versions,
40 NFSD_Ports, 40 NFSD_Ports,
41 NFSD_MaxBlkSize, 41 NFSD_MaxBlkSize,
42 NFSD_MaxConnections,
42 NFSD_SupportedEnctypes, 43 NFSD_SupportedEnctypes,
43 /* 44 /*
44 * The below MUST come last. Otherwise we leave a hole in nfsd_files[] 45 * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
@@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
62static ssize_t write_versions(struct file *file, char *buf, size_t size); 63static ssize_t write_versions(struct file *file, char *buf, size_t size);
63static ssize_t write_ports(struct file *file, char *buf, size_t size); 64static ssize_t write_ports(struct file *file, char *buf, size_t size);
64static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); 65static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
66static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
65#ifdef CONFIG_NFSD_V4 67#ifdef CONFIG_NFSD_V4
66static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 68static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
67static ssize_t write_gracetime(struct file *file, char *buf, size_t size); 69static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
@@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
77 [NFSD_Versions] = write_versions, 79 [NFSD_Versions] = write_versions,
78 [NFSD_Ports] = write_ports, 80 [NFSD_Ports] = write_ports,
79 [NFSD_MaxBlkSize] = write_maxblksize, 81 [NFSD_MaxBlkSize] = write_maxblksize,
82 [NFSD_MaxConnections] = write_maxconn,
80#ifdef CONFIG_NFSD_V4 83#ifdef CONFIG_NFSD_V4
81 [NFSD_Leasetime] = write_leasetime, 84 [NFSD_Leasetime] = write_leasetime,
82 [NFSD_Gracetime] = write_gracetime, 85 [NFSD_Gracetime] = write_gracetime,
@@ -369,8 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
369 372
370 if (maxsize < NFS_FHSIZE) 373 if (maxsize < NFS_FHSIZE)
371 return -EINVAL; 374 return -EINVAL;
372 if (maxsize > NFS3_FHSIZE) 375 maxsize = min(maxsize, NFS3_FHSIZE);
373 maxsize = NFS3_FHSIZE;
374 376
375 if (qword_get(&mesg, mesg, size)>0) 377 if (qword_get(&mesg, mesg, size)>0)
376 return -EINVAL; 378 return -EINVAL;
@@ -871,10 +873,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
871 /* force bsize into allowed range and 873 /* force bsize into allowed range and
872 * required alignment. 874 * required alignment.
873 */ 875 */
874 if (bsize < 1024) 876 bsize = max_t(int, bsize, 1024);
875 bsize = 1024; 877 bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE);
876 if (bsize > NFSSVC_MAXBLKSIZE)
877 bsize = NFSSVC_MAXBLKSIZE;
878 bsize &= ~(1024-1); 878 bsize &= ~(1024-1);
879 mutex_lock(&nfsd_mutex); 879 mutex_lock(&nfsd_mutex);
880 if (nn->nfsd_serv) { 880 if (nn->nfsd_serv) {
@@ -889,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
889 nfsd_max_blksize); 889 nfsd_max_blksize);
890} 890}
891 891
892/**
893 * write_maxconn - Set or report the current max number of connections
894 *
895 * Input:
896 * buf: ignored
897 * size: zero
898 * OR
899 *
900 * Input:
901 * buf: C string containing an unsigned
902 * integer value representing the new
903 * number of max connections
904 * size: non-zero length of C string in @buf
905 * Output:
906 * On success: passed-in buffer filled with '\n'-terminated C string
907 * containing numeric value of max_connections setting
908 * for this net namespace;
909 * return code is the size in bytes of the string
910 * On error: return code is zero or a negative errno value
911 */
912static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
913{
914 char *mesg = buf;
915 struct net *net = file->f_dentry->d_sb->s_fs_info;
916 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
917 unsigned int maxconn = nn->max_connections;
918
919 if (size > 0) {
920 int rv = get_uint(&mesg, &maxconn);
921
922 if (rv)
923 return rv;
924 nn->max_connections = maxconn;
925 }
926
927 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
928}
929
892#ifdef CONFIG_NFSD_V4 930#ifdef CONFIG_NFSD_V4
893static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, 931static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
894 time_t *time, struct nfsd_net *nn) 932 time_t *time, struct nfsd_net *nn)
@@ -1064,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1064 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, 1102 [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
1065 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, 1103 [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
1066 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1104 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1105 [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
1067#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) 1106#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
1068 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, 1107 [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
1069#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ 1108#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ec8393418154..e883a5868be6 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
162 /* deprecated, convert to type 3 */ 162 /* deprecated, convert to type 3 */
163 len = key_len(FSID_ENCODE_DEV)/4; 163 len = key_len(FSID_ENCODE_DEV)/4;
164 fh->fh_fsid_type = FSID_ENCODE_DEV; 164 fh->fh_fsid_type = FSID_ENCODE_DEV;
165 fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); 165 /*
166 * struct knfsd_fh uses host-endian fields, which are
167 * sometimes used to hold net-endian values. This
168 * confuses sparse, so we must use __force here to
169 * keep it from complaining.
170 */
171 fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
172 ntohl((__force __be32)fh->fh_fsid[1])));
166 fh->fh_fsid[1] = fh->fh_fsid[2]; 173 fh->fh_fsid[1] = fh->fh_fsid[2];
167 } 174 }
168 data_left -= len; 175 data_left -= len;
@@ -539,8 +546,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
539 dentry); 546 dentry);
540 547
541 fhp->fh_dentry = dget(dentry); /* our internal copy */ 548 fhp->fh_dentry = dget(dentry); /* our internal copy */
542 fhp->fh_export = exp; 549 fhp->fh_export = exp_get(exp);
543 cache_get(&exp->h);
544 550
545 if (fhp->fh_handle.fh_version == 0xca) { 551 if (fhp->fh_handle.fh_version == 0xca) {
546 /* old style filehandle please */ 552 /* old style filehandle please */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 2e89e70ac15c..08236d70c667 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -73,8 +73,15 @@ enum fsid_source {
73extern enum fsid_source fsid_source(struct svc_fh *fhp); 73extern enum fsid_source fsid_source(struct svc_fh *fhp);
74 74
75 75
76/* This might look a little large to "inline" but in all calls except 76/*
77 * This might look a little large to "inline" but in all calls except
77 * one, 'vers' is constant so moste of the function disappears. 78 * one, 'vers' is constant so moste of the function disappears.
79 *
80 * In some cases the values are considered to be host endian and in
81 * others, net endian. fsidv is always considered to be u32 as the
82 * callers don't know which it will be. So we must use __force to keep
83 * sparse from complaining. Since these values are opaque to the
84 * client, that shouldn't be a problem.
78 */ 85 */
79static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, 86static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
80 u32 fsid, unsigned char *uuid) 87 u32 fsid, unsigned char *uuid)
@@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
82 u32 *up; 89 u32 *up;
83 switch(vers) { 90 switch(vers) {
84 case FSID_DEV: 91 case FSID_DEV:
85 fsidv[0] = htonl((MAJOR(dev)<<16) | 92 fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) |
86 MINOR(dev)); 93 MINOR(dev));
87 fsidv[1] = ino_t_to_u32(ino); 94 fsidv[1] = ino_t_to_u32(ino);
88 break; 95 break;
@@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
90 fsidv[0] = fsid; 97 fsidv[0] = fsid;
91 break; 98 break;
92 case FSID_MAJOR_MINOR: 99 case FSID_MAJOR_MINOR:
93 fsidv[0] = htonl(MAJOR(dev)); 100 fsidv[0] = (__force __u32)htonl(MAJOR(dev));
94 fsidv[1] = htonl(MINOR(dev)); 101 fsidv[1] = (__force __u32)htonl(MINOR(dev));
95 fsidv[2] = ino_t_to_u32(ino); 102 fsidv[2] = ino_t_to_u32(ino);
96 break; 103 break;
97 104
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 54c6b3d3cc79..b8680738f588 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -403,12 +403,13 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
403 403
404 fh_init(&newfh, NFS_FHSIZE); 404 fh_init(&newfh, NFS_FHSIZE);
405 /* 405 /*
406 * Create the link, look up new file and set attrs. 406 * Crazy hack: the request fits in a page, and already-decoded
407 * attributes follow argp->tname, so it's safe to just write a
408 * null to ensure it's null-terminated:
407 */ 409 */
410 argp->tname[argp->tlen] = '\0';
408 nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, 411 nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
409 argp->tname, argp->tlen, 412 argp->tname, &newfh);
410 &newfh, &argp->attrs);
411
412 413
413 fh_put(&argp->ffh); 414 fh_put(&argp->ffh);
414 fh_put(&newfh); 415 fh_put(&newfh);
@@ -716,6 +717,7 @@ nfserrno (int errno)
716 { nfserr_noent, -ENOENT }, 717 { nfserr_noent, -ENOENT },
717 { nfserr_io, -EIO }, 718 { nfserr_io, -EIO },
718 { nfserr_nxio, -ENXIO }, 719 { nfserr_nxio, -ENXIO },
720 { nfserr_fbig, -E2BIG },
719 { nfserr_acces, -EACCES }, 721 { nfserr_acces, -EACCES },
720 { nfserr_exist, -EEXIST }, 722 { nfserr_exist, -EEXIST },
721 { nfserr_xdev, -EXDEV }, 723 { nfserr_xdev, -EXDEV },
@@ -743,6 +745,7 @@ nfserrno (int errno)
743 { nfserr_notsupp, -EOPNOTSUPP }, 745 { nfserr_notsupp, -EOPNOTSUPP },
744 { nfserr_toosmall, -ETOOSMALL }, 746 { nfserr_toosmall, -ETOOSMALL },
745 { nfserr_serverfault, -ESERVERFAULT }, 747 { nfserr_serverfault, -ESERVERFAULT },
748 { nfserr_serverfault, -ENFILE },
746 }; 749 };
747 int i; 750 int i;
748 751
@@ -750,7 +753,7 @@ nfserrno (int errno)
750 if (nfs_errtbl[i].syserr == errno) 753 if (nfs_errtbl[i].syserr == errno)
751 return nfs_errtbl[i].nfserr; 754 return nfs_errtbl[i].nfserr;
752 } 755 }
753 printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); 756 WARN(1, "nfsd: non-standard errno: %d\n", errno);
754 return nfserr_io; 757 return nfserr_io;
755} 758}
756 759
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1879e43f2868..752d56bbe0ba 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs)
221 */ 221 */
222 ret = nfsd_racache_init(2*nrservs); 222 ret = nfsd_racache_init(2*nrservs);
223 if (ret) 223 if (ret)
224 return ret; 224 goto dec_users;
225
225 ret = nfs4_state_start(); 226 ret = nfs4_state_start();
226 if (ret) 227 if (ret)
227 goto out_racache; 228 goto out_racache;
@@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs)
229 230
230out_racache: 231out_racache:
231 nfsd_racache_shutdown(); 232 nfsd_racache_shutdown();
233dec_users:
234 nfsd_users--;
232 return ret; 235 return ret;
233} 236}
234 237
@@ -405,6 +408,7 @@ int nfsd_create_serv(struct net *net)
405 if (nn->nfsd_serv == NULL) 408 if (nn->nfsd_serv == NULL)
406 return -ENOMEM; 409 return -ENOMEM;
407 410
411 nn->nfsd_serv->sv_maxconn = nn->max_connections;
408 error = svc_bind(nn->nfsd_serv, net); 412 error = svc_bind(nn->nfsd_serv, net);
409 if (error < 0) { 413 if (error < 0) {
410 svc_destroy(nn->nfsd_serv); 414 svc_destroy(nn->nfsd_serv);
@@ -469,8 +473,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
469 /* enforce a global maximum number of threads */ 473 /* enforce a global maximum number of threads */
470 tot = 0; 474 tot = 0;
471 for (i = 0; i < n; i++) { 475 for (i = 0; i < n; i++) {
472 if (nthreads[i] > NFSD_MAXSERVS) 476 nthreads[i] = min(nthreads[i], NFSD_MAXSERVS);
473 nthreads[i] = NFSD_MAXSERVS;
474 tot += nthreads[i]; 477 tot += nthreads[i];
475 } 478 }
476 if (tot > NFSD_MAXSERVS) { 479 if (tot > NFSD_MAXSERVS) {
@@ -519,11 +522,11 @@ nfsd_svc(int nrservs, struct net *net)
519 522
520 mutex_lock(&nfsd_mutex); 523 mutex_lock(&nfsd_mutex);
521 dprintk("nfsd: creating service\n"); 524 dprintk("nfsd: creating service\n");
522 if (nrservs <= 0) 525
523 nrservs = 0; 526 nrservs = max(nrservs, 0);
524 if (nrservs > NFSD_MAXSERVS) 527 nrservs = min(nrservs, NFSD_MAXSERVS);
525 nrservs = NFSD_MAXSERVS;
526 error = 0; 528 error = 0;
529
527 if (nrservs == 0 && nn->nfsd_serv == NULL) 530 if (nrservs == 0 && nn->nfsd_serv == NULL)
528 goto out; 531 goto out;
529 532
@@ -564,6 +567,7 @@ nfsd(void *vrqstp)
564 struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; 567 struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
565 struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); 568 struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
566 struct net *net = perm_sock->xpt_net; 569 struct net *net = perm_sock->xpt_net;
570 struct nfsd_net *nn = net_generic(net, nfsd_net_id);
567 int err; 571 int err;
568 572
569 /* Lock module and set up kernel thread */ 573 /* Lock module and set up kernel thread */
@@ -597,6 +601,9 @@ nfsd(void *vrqstp)
597 * The main request loop 601 * The main request loop
598 */ 602 */
599 for (;;) { 603 for (;;) {
604 /* Update sv_maxconn if it has changed */
605 rqstp->rq_server->sv_maxconn = nn->max_connections;
606
600 /* 607 /*
601 * Find a socket with data available and call its 608 * Find a socket with data available and call its
602 * recvfrom routine. 609 * recvfrom routine.
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 1ac306b769df..412d7061f9e5 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
257 len = args->count = ntohl(*p++); 257 len = args->count = ntohl(*p++);
258 p++; /* totalcount - unused */ 258 p++; /* totalcount - unused */
259 259
260 if (len > NFSSVC_MAXBLKSIZE_V2) 260 len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
261 len = NFSSVC_MAXBLKSIZE_V2;
262 261
263 /* set up somewhere to store response. 262 /* set up somewhere to store response.
264 * We take pages, put them on reslist and include in iovec 263 * We take pages, put them on reslist and include in iovec
@@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
268 struct page *p = *(rqstp->rq_next_page++); 267 struct page *p = *(rqstp->rq_next_page++);
269 268
270 rqstp->rq_vec[v].iov_base = page_address(p); 269 rqstp->rq_vec[v].iov_base = page_address(p);
271 rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; 270 rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
272 len -= rqstp->rq_vec[v].iov_len; 271 len -= rqstp->rq_vec[v].iov_len;
273 v++; 272 v++;
274 } 273 }
@@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
400 return 0; 399 return 0;
401 args->cookie = ntohl(*p++); 400 args->cookie = ntohl(*p++);
402 args->count = ntohl(*p++); 401 args->count = ntohl(*p++);
403 if (args->count > PAGE_SIZE) 402 args->count = min_t(u32, args->count, PAGE_SIZE);
404 args->count = PAGE_SIZE;
405
406 args->buffer = page_address(*(rqstp->rq_next_page++)); 403 args->buffer = page_address(*(rqstp->rq_next_page++));
407 404
408 return xdr_argsize_check(rqstp, p); 405 return xdr_argsize_check(rqstp, p);
@@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name,
516 } 513 }
517 if (cd->offset) 514 if (cd->offset)
518 *cd->offset = htonl(offset); 515 *cd->offset = htonl(offset);
519 if (namlen > NFS2_MAXNAMLEN)
520 namlen = NFS2_MAXNAMLEN;/* truncate filename */
521 516
517 /* truncate filename */
518 namlen = min(namlen, NFS2_MAXNAMLEN);
522 slen = XDR_QUADLEN(namlen); 519 slen = XDR_QUADLEN(namlen);
520
523 if ((buflen = cd->buflen - slen - 4) < 0) { 521 if ((buflen = cd->buflen - slen - 4) < 0) {
524 cd->common.err = nfserr_toosmall; 522 cd->common.err = nfserr_toosmall;
525 return -EINVAL; 523 return -EINVAL;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 374c66283ac5..4a89e00d7461 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -72,7 +72,13 @@ struct nfsd4_callback {
72 bool cb_done; 72 bool cb_done;
73}; 73};
74 74
75/*
76 * A core object that represents a "common" stateid. These are generally
77 * embedded within the different (more specific) stateid objects and contain
78 * fields that are of general use to any stateid.
79 */
75struct nfs4_stid { 80struct nfs4_stid {
81 atomic_t sc_count;
76#define NFS4_OPEN_STID 1 82#define NFS4_OPEN_STID 1
77#define NFS4_LOCK_STID 2 83#define NFS4_LOCK_STID 2
78#define NFS4_DELEG_STID 4 84#define NFS4_DELEG_STID 4
@@ -80,22 +86,43 @@ struct nfs4_stid {
80#define NFS4_CLOSED_STID 8 86#define NFS4_CLOSED_STID 8
81/* For a deleg stateid kept around only to process free_stateid's: */ 87/* For a deleg stateid kept around only to process free_stateid's: */
82#define NFS4_REVOKED_DELEG_STID 16 88#define NFS4_REVOKED_DELEG_STID 16
89#define NFS4_CLOSED_DELEG_STID 32
83 unsigned char sc_type; 90 unsigned char sc_type;
84 stateid_t sc_stateid; 91 stateid_t sc_stateid;
85 struct nfs4_client *sc_client; 92 struct nfs4_client *sc_client;
93 struct nfs4_file *sc_file;
94 void (*sc_free)(struct nfs4_stid *);
86}; 95};
87 96
97/*
98 * Represents a delegation stateid. The nfs4_client holds references to these
99 * and they are put when it is being destroyed or when the delegation is
100 * returned by the client:
101 *
102 * o 1 reference as long as a delegation is still in force (taken when it's
103 * alloc'd, put when it's returned or revoked)
104 *
105 * o 1 reference as long as a recall rpc is in progress (taken when the lease
106 * is broken, put when the rpc exits)
107 *
108 * o 1 more ephemeral reference for each nfsd thread currently doing something
109 * with that delegation without holding the cl_lock
110 *
111 * If the server attempts to recall a delegation and the client doesn't do so
112 * before a timeout, the server may also revoke the delegation. In that case,
113 * the object will either be destroyed (v4.0) or moved to a per-client list of
114 * revoked delegations (v4.1+).
115 *
116 * This object is a superset of the nfs4_stid.
117 */
88struct nfs4_delegation { 118struct nfs4_delegation {
89 struct nfs4_stid dl_stid; /* must be first field */ 119 struct nfs4_stid dl_stid; /* must be first field */
90 struct list_head dl_perfile; 120 struct list_head dl_perfile;
91 struct list_head dl_perclnt; 121 struct list_head dl_perclnt;
92 struct list_head dl_recall_lru; /* delegation recalled */ 122 struct list_head dl_recall_lru; /* delegation recalled */
93 atomic_t dl_count; /* ref count */
94 struct nfs4_file *dl_file;
95 u32 dl_type; 123 u32 dl_type;
96 time_t dl_time; 124 time_t dl_time;
97/* For recall: */ 125/* For recall: */
98 struct knfsd_fh dl_fh;
99 int dl_retries; 126 int dl_retries;
100 struct nfsd4_callback dl_recall; 127 struct nfsd4_callback dl_recall;
101}; 128};
@@ -194,6 +221,11 @@ struct nfsd4_conn {
194 unsigned char cn_flags; 221 unsigned char cn_flags;
195}; 222};
196 223
224/*
225 * Representation of a v4.1+ session. These are refcounted in a similar fashion
226 * to the nfs4_client. References are only taken when the server is actively
227 * working on the object (primarily during the processing of compounds).
228 */
197struct nfsd4_session { 229struct nfsd4_session {
198 atomic_t se_ref; 230 atomic_t se_ref;
199 struct list_head se_hash; /* hash by sessionid */ 231 struct list_head se_hash; /* hash by sessionid */
@@ -212,8 +244,6 @@ struct nfsd4_session {
212 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 244 struct nfsd4_slot *se_slots[]; /* forward channel slots */
213}; 245};
214 246
215extern void nfsd4_put_session(struct nfsd4_session *ses);
216
217/* formatted contents of nfs4_sessionid */ 247/* formatted contents of nfs4_sessionid */
218struct nfsd4_sessionid { 248struct nfsd4_sessionid {
219 clientid_t clientid; 249 clientid_t clientid;
@@ -225,17 +255,35 @@ struct nfsd4_sessionid {
225 255
226/* 256/*
227 * struct nfs4_client - one per client. Clientids live here. 257 * struct nfs4_client - one per client. Clientids live here.
228 * o Each nfs4_client is hashed by clientid.
229 * 258 *
230 * o Each nfs4_clients is also hashed by name 259 * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0)
231 * (the opaque quantity initially sent by the client to identify itself). 260 * or EXCHANGE_ID (for NFSv4.1+). These objects are refcounted and timestamped.
261 * Each nfsd_net_ns object contains a set of these and they are tracked via
262 * short and long form clientid. They are hashed and searched for under the
263 * per-nfsd_net client_lock spinlock.
264 *
265 * References to it are only held during the processing of compounds, and in
266 * certain other operations. In their "resting state" they have a refcount of
267 * 0. If they are not renewed within a lease period, they become eligible for
268 * destruction by the laundromat.
269 *
270 * These objects can also be destroyed prematurely by the fault injection code,
271 * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
272 * Care is taken *not* to do this however when the objects have an elevated
273 * refcount.
274 *
275 * o Each nfs4_client is hashed by clientid
276 *
277 * o Each nfs4_clients is also hashed by name (the opaque quantity initially
278 * sent by the client to identify itself).
232 * 279 *
233 * o cl_perclient list is used to ensure no dangling stateowner references 280 * o cl_perclient list is used to ensure no dangling stateowner references
234 * when we expire the nfs4_client 281 * when we expire the nfs4_client
235 */ 282 */
236struct nfs4_client { 283struct nfs4_client {
237 struct list_head cl_idhash; /* hash by cl_clientid.id */ 284 struct list_head cl_idhash; /* hash by cl_clientid.id */
238 struct rb_node cl_namenode; /* link into by-name trees */ 285 struct rb_node cl_namenode; /* link into by-name trees */
286 struct list_head *cl_ownerstr_hashtbl;
239 struct list_head cl_openowners; 287 struct list_head cl_openowners;
240 struct idr cl_stateids; /* stateid lookup */ 288 struct idr cl_stateids; /* stateid lookup */
241 struct list_head cl_delegations; 289 struct list_head cl_delegations;
@@ -329,21 +377,43 @@ struct nfs4_replay {
329 unsigned int rp_buflen; 377 unsigned int rp_buflen;
330 char *rp_buf; 378 char *rp_buf;
331 struct knfsd_fh rp_openfh; 379 struct knfsd_fh rp_openfh;
380 struct mutex rp_mutex;
332 char rp_ibuf[NFSD4_REPLAY_ISIZE]; 381 char rp_ibuf[NFSD4_REPLAY_ISIZE];
333}; 382};
334 383
384struct nfs4_stateowner;
385
386struct nfs4_stateowner_operations {
387 void (*so_unhash)(struct nfs4_stateowner *);
388 void (*so_free)(struct nfs4_stateowner *);
389};
390
391/*
392 * A core object that represents either an open or lock owner. The object and
393 * lock owner objects have one of these embedded within them. Refcounts and
394 * other fields common to both owner types are contained within these
395 * structures.
396 */
335struct nfs4_stateowner { 397struct nfs4_stateowner {
336 struct list_head so_strhash; /* hash by op_name */ 398 struct list_head so_strhash;
337 struct list_head so_stateids; 399 struct list_head so_stateids;
338 struct nfs4_client * so_client; 400 struct nfs4_client *so_client;
339 /* after increment in ENCODE_SEQID_OP_TAIL, represents the next 401 const struct nfs4_stateowner_operations *so_ops;
402 /* after increment in nfsd4_bump_seqid, represents the next
340 * sequence id expected from the client: */ 403 * sequence id expected from the client: */
341 u32 so_seqid; 404 atomic_t so_count;
342 struct xdr_netobj so_owner; /* open owner name */ 405 u32 so_seqid;
343 struct nfs4_replay so_replay; 406 struct xdr_netobj so_owner; /* open owner name */
344 bool so_is_open_owner; 407 struct nfs4_replay so_replay;
408 bool so_is_open_owner;
345}; 409};
346 410
411/*
412 * When a file is opened, the client provides an open state owner opaque string
413 * that indicates the "owner" of that open. These objects are refcounted.
414 * References to it are held by each open state associated with it. This object
415 * is a superset of the nfs4_stateowner struct.
416 */
347struct nfs4_openowner { 417struct nfs4_openowner {
348 struct nfs4_stateowner oo_owner; /* must be first field */ 418 struct nfs4_stateowner oo_owner; /* must be first field */
349 struct list_head oo_perclient; 419 struct list_head oo_perclient;
@@ -358,15 +428,17 @@ struct nfs4_openowner {
358 struct nfs4_ol_stateid *oo_last_closed_stid; 428 struct nfs4_ol_stateid *oo_last_closed_stid;
359 time_t oo_time; /* time of placement on so_close_lru */ 429 time_t oo_time; /* time of placement on so_close_lru */
360#define NFS4_OO_CONFIRMED 1 430#define NFS4_OO_CONFIRMED 1
361#define NFS4_OO_NEW 4
362 unsigned char oo_flags; 431 unsigned char oo_flags;
363}; 432};
364 433
434/*
435 * Represents a generic "lockowner". Similar to an openowner. References to it
436 * are held by the lock stateids that are created on its behalf. This object is
437 * a superset of the nfs4_stateowner struct (or would be if it needed any extra
438 * fields).
439 */
365struct nfs4_lockowner { 440struct nfs4_lockowner {
366 struct nfs4_stateowner lo_owner; /* must be first element */ 441 struct nfs4_stateowner lo_owner; /* must be first element */
367 struct list_head lo_owner_ino_hash; /* hash by owner,file */
368 struct list_head lo_perstateid;
369 struct list_head lo_list; /* for temporary uses */
370}; 442};
371 443
372static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) 444static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
@@ -379,9 +451,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
379 return container_of(so, struct nfs4_lockowner, lo_owner); 451 return container_of(so, struct nfs4_lockowner, lo_owner);
380} 452}
381 453
382/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ 454/*
455 * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
456 *
457 * These objects are global. nfsd only keeps one instance of a nfs4_file per
458 * inode (though it may keep multiple file descriptors open per inode). These
459 * are tracked in the file_hashtbl which is protected by the state_lock
460 * spinlock.
461 */
383struct nfs4_file { 462struct nfs4_file {
384 atomic_t fi_ref; 463 atomic_t fi_ref;
464 spinlock_t fi_lock;
385 struct hlist_node fi_hash; /* hash by "struct inode *" */ 465 struct hlist_node fi_hash; /* hash by "struct inode *" */
386 struct list_head fi_stateids; 466 struct list_head fi_stateids;
387 struct list_head fi_delegations; 467 struct list_head fi_delegations;
@@ -395,49 +475,36 @@ struct nfs4_file {
395 * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. 475 * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set.
396 */ 476 */
397 atomic_t fi_access[2]; 477 atomic_t fi_access[2];
478 u32 fi_share_deny;
398 struct file *fi_deleg_file; 479 struct file *fi_deleg_file;
399 struct file_lock *fi_lease; 480 struct file_lock *fi_lease;
400 atomic_t fi_delegees; 481 atomic_t fi_delegees;
401 struct inode *fi_inode; 482 struct knfsd_fh fi_fhandle;
402 bool fi_had_conflict; 483 bool fi_had_conflict;
403}; 484};
404 485
405/* XXX: for first cut may fall back on returning file that doesn't work 486/*
406 * at all? */ 487 * A generic struct representing either a open or lock stateid. The nfs4_client
407static inline struct file *find_writeable_file(struct nfs4_file *f) 488 * holds a reference to each of these objects, and they in turn hold a
408{ 489 * reference to their respective stateowners. The client's reference is
409 if (f->fi_fds[O_WRONLY]) 490 * released in response to a close or unlock (depending on whether it's an open
410 return f->fi_fds[O_WRONLY]; 491 * or lock stateid) or when the client is being destroyed.
411 return f->fi_fds[O_RDWR]; 492 *
412} 493 * In the case of v4.0 open stateids, these objects are preserved for a little
413 494 * while after close in order to handle CLOSE replays. Those are eventually
414static inline struct file *find_readable_file(struct nfs4_file *f) 495 * reclaimed via a LRU scheme by the laundromat.
415{ 496 *
416 if (f->fi_fds[O_RDONLY]) 497 * This object is a superset of the nfs4_stid. "ol" stands for "Open or Lock".
417 return f->fi_fds[O_RDONLY]; 498 * Better suggestions welcome.
418 return f->fi_fds[O_RDWR]; 499 */
419}
420
421static inline struct file *find_any_file(struct nfs4_file *f)
422{
423 if (f->fi_fds[O_RDWR])
424 return f->fi_fds[O_RDWR];
425 else if (f->fi_fds[O_WRONLY])
426 return f->fi_fds[O_WRONLY];
427 else
428 return f->fi_fds[O_RDONLY];
429}
430
431/* "ol" stands for "Open or Lock". Better suggestions welcome. */
432struct nfs4_ol_stateid { 500struct nfs4_ol_stateid {
433 struct nfs4_stid st_stid; /* must be first field */ 501 struct nfs4_stid st_stid; /* must be first field */
434 struct list_head st_perfile; 502 struct list_head st_perfile;
435 struct list_head st_perstateowner; 503 struct list_head st_perstateowner;
436 struct list_head st_lockowners; 504 struct list_head st_locks;
437 struct nfs4_stateowner * st_stateowner; 505 struct nfs4_stateowner * st_stateowner;
438 struct nfs4_file * st_file; 506 unsigned char st_access_bmap;
439 unsigned long st_access_bmap; 507 unsigned char st_deny_bmap;
440 unsigned long st_deny_bmap;
441 struct nfs4_ol_stateid * st_openstp; 508 struct nfs4_ol_stateid * st_openstp;
442}; 509};
443 510
@@ -456,15 +523,16 @@ struct nfsd_net;
456extern __be32 nfs4_preprocess_stateid_op(struct net *net, 523extern __be32 nfs4_preprocess_stateid_op(struct net *net,
457 struct nfsd4_compound_state *cstate, 524 struct nfsd4_compound_state *cstate,
458 stateid_t *stateid, int flags, struct file **filp); 525 stateid_t *stateid, int flags, struct file **filp);
459extern void nfs4_lock_state(void); 526void nfs4_put_stid(struct nfs4_stid *s);
460extern void nfs4_unlock_state(void);
461void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); 527void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
462extern void nfs4_release_reclaim(struct nfsd_net *); 528extern void nfs4_release_reclaim(struct nfsd_net *);
463extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, 529extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
464 struct nfsd_net *nn); 530 struct nfsd_net *nn);
465extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); 531extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
532 struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
466extern int set_callback_cred(void); 533extern int set_callback_cred(void);
467extern void nfsd4_init_callback(struct nfsd4_callback *); 534void nfsd4_run_cb_null(struct work_struct *w);
535void nfsd4_run_cb_recall(struct work_struct *w);
468extern void nfsd4_probe_callback(struct nfs4_client *clp); 536extern void nfsd4_probe_callback(struct nfs4_client *clp);
469extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); 537extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
470extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 538extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
@@ -472,11 +540,10 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
472extern int nfsd4_create_callback_queue(void); 540extern int nfsd4_create_callback_queue(void);
473extern void nfsd4_destroy_callback_queue(void); 541extern void nfsd4_destroy_callback_queue(void);
474extern void nfsd4_shutdown_callback(struct nfs4_client *); 542extern void nfsd4_shutdown_callback(struct nfs4_client *);
475extern void nfs4_put_delegation(struct nfs4_delegation *dp); 543extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
476extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, 544extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
477 struct nfsd_net *nn); 545 struct nfsd_net *nn);
478extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); 546extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
479extern void put_client_renew(struct nfs4_client *clp);
480 547
481/* nfs4recover operations */ 548/* nfs4recover operations */
482extern int nfsd4_client_tracking_init(struct net *net); 549extern int nfsd4_client_tracking_init(struct net *net);
@@ -490,19 +557,24 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
490#ifdef CONFIG_NFSD_FAULT_INJECTION 557#ifdef CONFIG_NFSD_FAULT_INJECTION
491int nfsd_fault_inject_init(void); 558int nfsd_fault_inject_init(void);
492void nfsd_fault_inject_cleanup(void); 559void nfsd_fault_inject_cleanup(void);
493u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); 560
494struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); 561u64 nfsd_inject_print_clients(void);
495 562u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t);
496u64 nfsd_forget_client(struct nfs4_client *, u64); 563u64 nfsd_inject_forget_clients(u64);
497u64 nfsd_forget_client_locks(struct nfs4_client*, u64); 564
498u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); 565u64 nfsd_inject_print_locks(void);
499u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); 566u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t);
500u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); 567u64 nfsd_inject_forget_locks(u64);
501 568
502u64 nfsd_print_client(struct nfs4_client *, u64); 569u64 nfsd_inject_print_openowners(void);
503u64 nfsd_print_client_locks(struct nfs4_client *, u64); 570u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t);
504u64 nfsd_print_client_openowners(struct nfs4_client *, u64); 571u64 nfsd_inject_forget_openowners(u64);
505u64 nfsd_print_client_delegations(struct nfs4_client *, u64); 572
573u64 nfsd_inject_print_delegations(void);
574u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t);
575u64 nfsd_inject_forget_delegations(u64);
576u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t);
577u64 nfsd_inject_recall_delegations(u64);
506#else /* CONFIG_NFSD_FAULT_INJECTION */ 578#else /* CONFIG_NFSD_FAULT_INJECTION */
507static inline int nfsd_fault_inject_init(void) { return 0; } 579static inline int nfsd_fault_inject_init(void) { return 0; }
508static inline void nfsd_fault_inject_cleanup(void) {} 580static inline void nfsd_fault_inject_cleanup(void) {}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 140c496f612c..f501a9b5c9df 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
189 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 189 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
190 190
191 dparent = fhp->fh_dentry; 191 dparent = fhp->fh_dentry;
192 exp = fhp->fh_export; 192 exp = exp_get(fhp->fh_export);
193 exp_get(exp);
194 193
195 /* Lookup the name, but don't follow links */ 194 /* Lookup the name, but don't follow links */
196 if (isdotent(name, len)) { 195 if (isdotent(name, len)) {
@@ -464,7 +463,7 @@ out_put_write_access:
464 if (size_change) 463 if (size_change)
465 put_write_access(inode); 464 put_write_access(inode);
466 if (!err) 465 if (!err)
467 commit_metadata(fhp); 466 err = nfserrno(commit_metadata(fhp));
468out: 467out:
469 return err; 468 return err;
470} 469}
@@ -820,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
820 return __splice_from_pipe(pipe, sd, nfsd_splice_actor); 819 return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
821} 820}
822 821
823__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) 822static __be32
823nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
824{ 824{
825 if (host_err >= 0) { 825 if (host_err >= 0) {
826 nfsdstats.io_read += host_err; 826 nfsdstats.io_read += host_err;
@@ -831,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
831 return nfserrno(host_err); 831 return nfserrno(host_err);
832} 832}
833 833
834int nfsd_splice_read(struct svc_rqst *rqstp, 834__be32 nfsd_splice_read(struct svc_rqst *rqstp,
835 struct file *file, loff_t offset, unsigned long *count) 835 struct file *file, loff_t offset, unsigned long *count)
836{ 836{
837 struct splice_desc sd = { 837 struct splice_desc sd = {
@@ -847,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp,
847 return nfsd_finish_read(file, count, host_err); 847 return nfsd_finish_read(file, count, host_err);
848} 848}
849 849
850int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, 850__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
851 unsigned long *count) 851 unsigned long *count)
852{ 852{
853 mm_segment_t oldfs; 853 mm_segment_t oldfs;
@@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1121 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1121 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1122 if (iap->ia_valid) 1122 if (iap->ia_valid)
1123 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1123 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
1124 return 0; 1124 /* Callers expect file metadata to be committed here */
1125 return nfserrno(commit_metadata(resfhp));
1125} 1126}
1126 1127
1127/* HPUX client sometimes creates a file in mode 000, and sets size to 0. 1128/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
@@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1253 err = nfsd_create_setattr(rqstp, resfhp, iap); 1254 err = nfsd_create_setattr(rqstp, resfhp, iap);
1254 1255
1255 /* 1256 /*
1256 * nfsd_setattr already committed the child. Transactional filesystems 1257 * nfsd_create_setattr already committed the child. Transactional
1257 * had a chance to commit changes for both parent and child 1258 * filesystems had a chance to commit changes for both parent and
1258 * simultaneously making the following commit_metadata a noop. 1259 * child * simultaneously making the following commit_metadata a
1260 * noop.
1259 */ 1261 */
1260 err2 = nfserrno(commit_metadata(fhp)); 1262 err2 = nfserrno(commit_metadata(fhp));
1261 if (err2) 1263 if (err2)
@@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1426 err = nfsd_create_setattr(rqstp, resfhp, iap); 1428 err = nfsd_create_setattr(rqstp, resfhp, iap);
1427 1429
1428 /* 1430 /*
1429 * nfsd_setattr already committed the child (and possibly also the parent). 1431 * nfsd_create_setattr already committed the child
1432 * (and possibly also the parent).
1430 */ 1433 */
1431 if (!err) 1434 if (!err)
1432 err = nfserrno(commit_metadata(fhp)); 1435 err = nfserrno(commit_metadata(fhp));
@@ -1504,16 +1507,15 @@ out_nfserr:
1504__be32 1507__be32
1505nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, 1508nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1506 char *fname, int flen, 1509 char *fname, int flen,
1507 char *path, int plen, 1510 char *path,
1508 struct svc_fh *resfhp, 1511 struct svc_fh *resfhp)
1509 struct iattr *iap)
1510{ 1512{
1511 struct dentry *dentry, *dnew; 1513 struct dentry *dentry, *dnew;
1512 __be32 err, cerr; 1514 __be32 err, cerr;
1513 int host_err; 1515 int host_err;
1514 1516
1515 err = nfserr_noent; 1517 err = nfserr_noent;
1516 if (!flen || !plen) 1518 if (!flen || path[0] == '\0')
1517 goto out; 1519 goto out;
1518 err = nfserr_exist; 1520 err = nfserr_exist;
1519 if (isdotent(fname, flen)) 1521 if (isdotent(fname, flen))
@@ -1534,18 +1536,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1534 if (IS_ERR(dnew)) 1536 if (IS_ERR(dnew))
1535 goto out_nfserr; 1537 goto out_nfserr;
1536 1538
1537 if (unlikely(path[plen] != 0)) { 1539 host_err = vfs_symlink(dentry->d_inode, dnew, path);
1538 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1539 if (path_alloced == NULL)
1540 host_err = -ENOMEM;
1541 else {
1542 strncpy(path_alloced, path, plen);
1543 path_alloced[plen] = 0;
1544 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
1545 kfree(path_alloced);
1546 }
1547 } else
1548 host_err = vfs_symlink(dentry->d_inode, dnew, path);
1549 err = nfserrno(host_err); 1540 err = nfserrno(host_err);
1550 if (!err) 1541 if (!err)
1551 err = nfserrno(commit_metadata(fhp)); 1542 err = nfserrno(commit_metadata(fhp));
@@ -2093,8 +2084,7 @@ nfsd_racache_init(int cache_size)
2093 if (raparm_hash[0].pb_head) 2084 if (raparm_hash[0].pb_head)
2094 return 0; 2085 return 0;
2095 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); 2086 nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
2096 if (nperbucket < 2) 2087 nperbucket = max(2, nperbucket);
2097 nperbucket = 2;
2098 cache_size = nperbucket * RAPARM_HASH_SIZE; 2088 cache_size = nperbucket * RAPARM_HASH_SIZE;
2099 2089
2100 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); 2090 dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 91b6ae3f658b..c2ff3f14e5f6 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,9 +74,9 @@ struct raparms;
74__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, 74__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
75 struct file **, struct raparms **); 75 struct file **, struct raparms **);
76void nfsd_put_tmp_read_open(struct file *, struct raparms *); 76void nfsd_put_tmp_read_open(struct file *, struct raparms *);
77int nfsd_splice_read(struct svc_rqst *, 77__be32 nfsd_splice_read(struct svc_rqst *,
78 struct file *, loff_t, unsigned long *); 78 struct file *, loff_t, unsigned long *);
79int nfsd_readv(struct file *, loff_t, struct kvec *, int, 79__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
80 unsigned long *); 80 unsigned long *);
81__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, 81__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
82 loff_t, struct kvec *, int, unsigned long *); 82 loff_t, struct kvec *, int, unsigned long *);
@@ -85,8 +85,8 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
85__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, 85__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
86 char *, int *); 86 char *, int *);
87__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, 87__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
88 char *name, int len, char *path, int plen, 88 char *name, int len, char *path,
89 struct svc_fh *res, struct iattr *); 89 struct svc_fh *res);
90__be32 nfsd_link(struct svc_rqst *, struct svc_fh *, 90__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
91 char *, int, struct svc_fh *); 91 char *, int, struct svc_fh *);
92__be32 nfsd_rename(struct svc_rqst *, 92__be32 nfsd_rename(struct svc_rqst *,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 18cbb6d9c8a9..465e7799742a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -55,6 +55,7 @@ struct nfsd4_compound_state {
55 struct svc_fh current_fh; 55 struct svc_fh current_fh;
56 struct svc_fh save_fh; 56 struct svc_fh save_fh;
57 struct nfs4_stateowner *replay_owner; 57 struct nfs4_stateowner *replay_owner;
58 struct nfs4_client *clp;
58 /* For sessions DRC */ 59 /* For sessions DRC */
59 struct nfsd4_session *session; 60 struct nfsd4_session *session;
60 struct nfsd4_slot *slot; 61 struct nfsd4_slot *slot;
@@ -107,8 +108,8 @@ struct nfsd4_create {
107 u32 cr_type; /* request */ 108 u32 cr_type; /* request */
108 union { /* request */ 109 union { /* request */
109 struct { 110 struct {
110 u32 namelen; 111 u32 datalen;
111 char *name; 112 char *data;
112 } link; /* NF4LNK */ 113 } link; /* NF4LNK */
113 struct { 114 struct {
114 u32 specdata1; 115 u32 specdata1;
@@ -121,8 +122,8 @@ struct nfsd4_create {
121 struct nfs4_acl *cr_acl; 122 struct nfs4_acl *cr_acl;
122 struct xdr_netobj cr_label; 123 struct xdr_netobj cr_label;
123}; 124};
124#define cr_linklen u.link.namelen 125#define cr_datalen u.link.datalen
125#define cr_linkname u.link.name 126#define cr_data u.link.data
126#define cr_specdata1 u.dev.specdata1 127#define cr_specdata1 u.dev.specdata1
127#define cr_specdata2 u.dev.specdata2 128#define cr_specdata2 u.dev.specdata2
128 129
@@ -478,6 +479,14 @@ struct nfsd4_op {
478 479
479bool nfsd4_cache_this_op(struct nfsd4_op *); 480bool nfsd4_cache_this_op(struct nfsd4_op *);
480 481
482/*
483 * Memory needed just for the duration of processing one compound:
484 */
485struct svcxdr_tmpbuf {
486 struct svcxdr_tmpbuf *next;
487 char buf[];
488};
489
481struct nfsd4_compoundargs { 490struct nfsd4_compoundargs {
482 /* scratch variables for XDR decode */ 491 /* scratch variables for XDR decode */
483 __be32 * p; 492 __be32 * p;
@@ -486,11 +495,7 @@ struct nfsd4_compoundargs {
486 int pagelen; 495 int pagelen;
487 __be32 tmp[8]; 496 __be32 tmp[8];
488 __be32 * tmpp; 497 __be32 * tmpp;
489 struct tmpbuf { 498 struct svcxdr_tmpbuf *to_free;
490 struct tmpbuf *next;
491 void (*release)(const void *);
492 void *buf;
493 } *to_free;
494 499
495 struct svc_rqst *rqstp; 500 struct svc_rqst *rqstp;
496 501
@@ -574,7 +579,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
574extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, 579extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
575 struct nfsd4_compound_state *, 580 struct nfsd4_compound_state *,
576 struct nfsd4_setclientid_confirm *setclientid_confirm); 581 struct nfsd4_setclientid_confirm *setclientid_confirm);
577extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
578extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, 582extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
579 struct nfsd4_compound_state *, struct nfsd4_exchange_id *); 583 struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
580extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); 584extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
@@ -585,6 +589,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *,
585extern __be32 nfsd4_sequence(struct svc_rqst *, 589extern __be32 nfsd4_sequence(struct svc_rqst *,
586 struct nfsd4_compound_state *, 590 struct nfsd4_compound_state *,
587 struct nfsd4_sequence *); 591 struct nfsd4_sequence *);
592extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
588extern __be32 nfsd4_destroy_session(struct svc_rqst *, 593extern __be32 nfsd4_destroy_session(struct svc_rqst *,
589 struct nfsd4_compound_state *, 594 struct nfsd4_compound_state *,
590 struct nfsd4_destroy_session *); 595 struct nfsd4_destroy_session *);
@@ -594,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
594 struct nfsd4_open *open, struct nfsd_net *nn); 599 struct nfsd4_open *open, struct nfsd_net *nn);
595extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, 600extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
596 struct svc_fh *current_fh, struct nfsd4_open *open); 601 struct svc_fh *current_fh, struct nfsd4_open *open);
597extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); 602extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
603extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
604 struct nfsd4_open *open, __be32 status);
598extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, 605extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
599 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); 606 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
600extern __be32 nfsd4_close(struct svc_rqst *rqstp, 607extern __be32 nfsd4_close(struct svc_rqst *rqstp,
@@ -625,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
625extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, 632extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
626 struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); 633 struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
627extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); 634extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
635
628#endif 636#endif
629 637
630/* 638/*
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
index 85c98737a146..fc603e0431bb 100644
--- a/fs/nilfs2/Makefile
+++ b/fs/nilfs2/Makefile
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o
2nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ 2nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
3 btnode.o bmap.o btree.o direct.o dat.o recovery.o \ 3 btnode.o bmap.o btree.o direct.o dat.o recovery.o \
4 the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ 4 the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
5 ifile.o alloc.o gcinode.o ioctl.o 5 ifile.o alloc.o gcinode.o ioctl.o sysfs.o
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..0696161bf59d 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -320,6 +320,14 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
320int nilfs_init_gcinode(struct inode *inode); 320int nilfs_init_gcinode(struct inode *inode);
321void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); 321void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs);
322 322
323/* sysfs.c */
324int __init nilfs_sysfs_init(void);
325void nilfs_sysfs_exit(void);
326int nilfs_sysfs_create_device_group(struct super_block *);
327void nilfs_sysfs_delete_device_group(struct the_nilfs *);
328int nilfs_sysfs_create_snapshot_group(struct nilfs_root *);
329void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *);
330
323/* 331/*
324 * Inodes and files operations 332 * Inodes and files operations
325 */ 333 */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8c532b2ca3ab..228f5bdf0772 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -942,7 +942,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
942 iput(inode); 942 iput(inode);
943 } 943 }
944 } else { 944 } else {
945 dentry = d_obtain_alias(inode); 945 dentry = d_obtain_root(inode);
946 if (IS_ERR(dentry)) { 946 if (IS_ERR(dentry)) {
947 ret = PTR_ERR(dentry); 947 ret = PTR_ERR(dentry);
948 goto failed_dentry; 948 goto failed_dentry;
@@ -1452,13 +1452,19 @@ static int __init init_nilfs_fs(void)
1452 if (err) 1452 if (err)
1453 goto fail; 1453 goto fail;
1454 1454
1455 err = register_filesystem(&nilfs_fs_type); 1455 err = nilfs_sysfs_init();
1456 if (err) 1456 if (err)
1457 goto free_cachep; 1457 goto free_cachep;
1458 1458
1459 err = register_filesystem(&nilfs_fs_type);
1460 if (err)
1461 goto deinit_sysfs_entry;
1462
1459 printk(KERN_INFO "NILFS version 2 loaded\n"); 1463 printk(KERN_INFO "NILFS version 2 loaded\n");
1460 return 0; 1464 return 0;
1461 1465
1466deinit_sysfs_entry:
1467 nilfs_sysfs_exit();
1462free_cachep: 1468free_cachep:
1463 nilfs_destroy_cachep(); 1469 nilfs_destroy_cachep();
1464fail: 1470fail:
@@ -1468,6 +1474,7 @@ fail:
1468static void __exit exit_nilfs_fs(void) 1474static void __exit exit_nilfs_fs(void)
1469{ 1475{
1470 nilfs_destroy_cachep(); 1476 nilfs_destroy_cachep();
1477 nilfs_sysfs_exit();
1471 unregister_filesystem(&nilfs_fs_type); 1478 unregister_filesystem(&nilfs_fs_type);
1472} 1479}
1473 1480
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
new file mode 100644
index 000000000000..bbb0dcc35905
--- /dev/null
+++ b/fs/nilfs2/sysfs.c
@@ -0,0 +1,1137 @@
1/*
2 * sysfs.c - sysfs support implementation.
3 *
4 * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
5 * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
18 */
19
20#include <linux/kobject.h>
21
22#include "nilfs.h"
23#include "mdt.h"
24#include "sufile.h"
25#include "cpfile.h"
26#include "sysfs.h"
27
28/* /sys/fs/<nilfs>/ */
29static struct kset *nilfs_kset;
30
31#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
32 struct tm res; \
33 int count = 0; \
34 time_to_tm(time_t_val, 0, &res); \
35 res.tm_year += 1900; \
36 res.tm_mon += 1; \
37 count = scnprintf(buf, PAGE_SIZE, \
38 "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
39 res.tm_year, res.tm_mon, res.tm_mday, \
40 res.tm_hour, res.tm_min, res.tm_sec);\
41 count; \
42})
43
44#define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
45static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
46 struct attribute *attr, char *buf) \
47{ \
48 struct the_nilfs *nilfs = container_of(kobj->parent, \
49 struct the_nilfs, \
50 ns_##parent_name##_kobj); \
51 struct nilfs_##name##_attr *a = container_of(attr, \
52 struct nilfs_##name##_attr, \
53 attr); \
54 return a->show ? a->show(a, nilfs, buf) : 0; \
55} \
56static ssize_t nilfs_##name##_attr_store(struct kobject *kobj, \
57 struct attribute *attr, \
58 const char *buf, size_t len) \
59{ \
60 struct the_nilfs *nilfs = container_of(kobj->parent, \
61 struct the_nilfs, \
62 ns_##parent_name##_kobj); \
63 struct nilfs_##name##_attr *a = container_of(attr, \
64 struct nilfs_##name##_attr, \
65 attr); \
66 return a->store ? a->store(a, nilfs, buf, len) : 0; \
67} \
68static const struct sysfs_ops nilfs_##name##_attr_ops = { \
69 .show = nilfs_##name##_attr_show, \
70 .store = nilfs_##name##_attr_store, \
71};
72
73#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
74static void nilfs_##name##_attr_release(struct kobject *kobj) \
75{ \
76 struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
77 struct the_nilfs *nilfs = container_of(kobj->parent, \
78 struct the_nilfs, \
79 ns_##parent_name##_kobj); \
80 subgroups = nilfs->ns_##parent_name##_subgroups; \
81 complete(&subgroups->sg_##name##_kobj_unregister); \
82} \
83static struct kobj_type nilfs_##name##_ktype = { \
84 .default_attrs = nilfs_##name##_attrs, \
85 .sysfs_ops = &nilfs_##name##_attr_ops, \
86 .release = nilfs_##name##_attr_release, \
87};
88
89#define NILFS_DEV_INT_GROUP_FNS(name, parent_name) \
90static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
91{ \
92 struct kobject *parent; \
93 struct kobject *kobj; \
94 struct completion *kobj_unregister; \
95 struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
96 int err; \
97 subgroups = nilfs->ns_##parent_name##_subgroups; \
98 kobj = &subgroups->sg_##name##_kobj; \
99 kobj_unregister = &subgroups->sg_##name##_kobj_unregister; \
100 parent = &nilfs->ns_##parent_name##_kobj; \
101 kobj->kset = nilfs_kset; \
102 init_completion(kobj_unregister); \
103 err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
104 #name); \
105 if (err) \
106 return err; \
107 return 0; \
108} \
109static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
110{ \
111 kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
112}
113
114/************************************************************************
115 * NILFS snapshot attrs *
116 ************************************************************************/
117
118static ssize_t
119nilfs_snapshot_inodes_count_show(struct nilfs_snapshot_attr *attr,
120 struct nilfs_root *root, char *buf)
121{
122 return snprintf(buf, PAGE_SIZE, "%llu\n",
123 (unsigned long long)atomic64_read(&root->inodes_count));
124}
125
126static ssize_t
127nilfs_snapshot_blocks_count_show(struct nilfs_snapshot_attr *attr,
128 struct nilfs_root *root, char *buf)
129{
130 return snprintf(buf, PAGE_SIZE, "%llu\n",
131 (unsigned long long)atomic64_read(&root->blocks_count));
132}
133
134static const char snapshot_readme_str[] =
135 "The group contains details about mounted snapshot.\n\n"
136 "(1) inodes_count\n\tshow number of inodes for snapshot.\n\n"
137 "(2) blocks_count\n\tshow number of blocks for snapshot.\n\n";
138
139static ssize_t
140nilfs_snapshot_README_show(struct nilfs_snapshot_attr *attr,
141 struct nilfs_root *root, char *buf)
142{
143 return snprintf(buf, PAGE_SIZE, snapshot_readme_str);
144}
145
146NILFS_SNAPSHOT_RO_ATTR(inodes_count);
147NILFS_SNAPSHOT_RO_ATTR(blocks_count);
148NILFS_SNAPSHOT_RO_ATTR(README);
149
150static struct attribute *nilfs_snapshot_attrs[] = {
151 NILFS_SNAPSHOT_ATTR_LIST(inodes_count),
152 NILFS_SNAPSHOT_ATTR_LIST(blocks_count),
153 NILFS_SNAPSHOT_ATTR_LIST(README),
154 NULL,
155};
156
157static ssize_t nilfs_snapshot_attr_show(struct kobject *kobj,
158 struct attribute *attr, char *buf)
159{
160 struct nilfs_root *root =
161 container_of(kobj, struct nilfs_root, snapshot_kobj);
162 struct nilfs_snapshot_attr *a =
163 container_of(attr, struct nilfs_snapshot_attr, attr);
164
165 return a->show ? a->show(a, root, buf) : 0;
166}
167
168static ssize_t nilfs_snapshot_attr_store(struct kobject *kobj,
169 struct attribute *attr,
170 const char *buf, size_t len)
171{
172 struct nilfs_root *root =
173 container_of(kobj, struct nilfs_root, snapshot_kobj);
174 struct nilfs_snapshot_attr *a =
175 container_of(attr, struct nilfs_snapshot_attr, attr);
176
177 return a->store ? a->store(a, root, buf, len) : 0;
178}
179
180static void nilfs_snapshot_attr_release(struct kobject *kobj)
181{
182 struct nilfs_root *root = container_of(kobj, struct nilfs_root,
183 snapshot_kobj);
184 complete(&root->snapshot_kobj_unregister);
185}
186
187static const struct sysfs_ops nilfs_snapshot_attr_ops = {
188 .show = nilfs_snapshot_attr_show,
189 .store = nilfs_snapshot_attr_store,
190};
191
192static struct kobj_type nilfs_snapshot_ktype = {
193 .default_attrs = nilfs_snapshot_attrs,
194 .sysfs_ops = &nilfs_snapshot_attr_ops,
195 .release = nilfs_snapshot_attr_release,
196};
197
198int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
199{
200 struct the_nilfs *nilfs;
201 struct kobject *parent;
202 int err;
203
204 nilfs = root->nilfs;
205 parent = &nilfs->ns_dev_subgroups->sg_mounted_snapshots_kobj;
206 root->snapshot_kobj.kset = nilfs_kset;
207 init_completion(&root->snapshot_kobj_unregister);
208
209 if (root->cno == NILFS_CPTREE_CURRENT_CNO) {
210 err = kobject_init_and_add(&root->snapshot_kobj,
211 &nilfs_snapshot_ktype,
212 &nilfs->ns_dev_kobj,
213 "current_checkpoint");
214 } else {
215 err = kobject_init_and_add(&root->snapshot_kobj,
216 &nilfs_snapshot_ktype,
217 parent,
218 "%llu", root->cno);
219 }
220
221 if (err)
222 return err;
223
224 return 0;
225}
226
227void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
228{
229 kobject_del(&root->snapshot_kobj);
230}
231
232/************************************************************************
233 * NILFS mounted snapshots attrs *
234 ************************************************************************/
235
236static const char mounted_snapshots_readme_str[] =
237 "The mounted_snapshots group contains group for\n"
238 "every mounted snapshot.\n";
239
240static ssize_t
241nilfs_mounted_snapshots_README_show(struct nilfs_mounted_snapshots_attr *attr,
242 struct the_nilfs *nilfs, char *buf)
243{
244 return snprintf(buf, PAGE_SIZE, mounted_snapshots_readme_str);
245}
246
247NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(README);
248
249static struct attribute *nilfs_mounted_snapshots_attrs[] = {
250 NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(README),
251 NULL,
252};
253
254NILFS_DEV_INT_GROUP_OPS(mounted_snapshots, dev);
255NILFS_DEV_INT_GROUP_TYPE(mounted_snapshots, dev);
256NILFS_DEV_INT_GROUP_FNS(mounted_snapshots, dev);
257
258/************************************************************************
259 * NILFS checkpoints attrs *
260 ************************************************************************/
261
262static ssize_t
263nilfs_checkpoints_checkpoints_number_show(struct nilfs_checkpoints_attr *attr,
264 struct the_nilfs *nilfs,
265 char *buf)
266{
267 __u64 ncheckpoints;
268 struct nilfs_cpstat cpstat;
269 int err;
270
271 down_read(&nilfs->ns_segctor_sem);
272 err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
273 up_read(&nilfs->ns_segctor_sem);
274 if (err < 0) {
275 printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
276 err);
277 return err;
278 }
279
280 ncheckpoints = cpstat.cs_ncps;
281
282 return snprintf(buf, PAGE_SIZE, "%llu\n", ncheckpoints);
283}
284
285static ssize_t
286nilfs_checkpoints_snapshots_number_show(struct nilfs_checkpoints_attr *attr,
287 struct the_nilfs *nilfs,
288 char *buf)
289{
290 __u64 nsnapshots;
291 struct nilfs_cpstat cpstat;
292 int err;
293
294 down_read(&nilfs->ns_segctor_sem);
295 err = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
296 up_read(&nilfs->ns_segctor_sem);
297 if (err < 0) {
298 printk(KERN_ERR "NILFS: unable to get checkpoint stat: err=%d\n",
299 err);
300 return err;
301 }
302
303 nsnapshots = cpstat.cs_nsss;
304
305 return snprintf(buf, PAGE_SIZE, "%llu\n", nsnapshots);
306}
307
308static ssize_t
309nilfs_checkpoints_last_seg_checkpoint_show(struct nilfs_checkpoints_attr *attr,
310 struct the_nilfs *nilfs,
311 char *buf)
312{
313 __u64 last_cno;
314
315 spin_lock(&nilfs->ns_last_segment_lock);
316 last_cno = nilfs->ns_last_cno;
317 spin_unlock(&nilfs->ns_last_segment_lock);
318
319 return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
320}
321
322static ssize_t
323nilfs_checkpoints_next_checkpoint_show(struct nilfs_checkpoints_attr *attr,
324 struct the_nilfs *nilfs,
325 char *buf)
326{
327 __u64 cno;
328
329 down_read(&nilfs->ns_sem);
330 cno = nilfs->ns_cno;
331 up_read(&nilfs->ns_sem);
332
333 return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
334}
335
336static const char checkpoints_readme_str[] =
337 "The checkpoints group contains attributes that describe\n"
338 "details about volume's checkpoints.\n\n"
339 "(1) checkpoints_number\n\tshow number of checkpoints on volume.\n\n"
340 "(2) snapshots_number\n\tshow number of snapshots on volume.\n\n"
341 "(3) last_seg_checkpoint\n"
342 "\tshow checkpoint number of the latest segment.\n\n"
343 "(4) next_checkpoint\n\tshow next checkpoint number.\n\n";
344
345static ssize_t
346nilfs_checkpoints_README_show(struct nilfs_checkpoints_attr *attr,
347 struct the_nilfs *nilfs, char *buf)
348{
349 return snprintf(buf, PAGE_SIZE, checkpoints_readme_str);
350}
351
352NILFS_CHECKPOINTS_RO_ATTR(checkpoints_number);
353NILFS_CHECKPOINTS_RO_ATTR(snapshots_number);
354NILFS_CHECKPOINTS_RO_ATTR(last_seg_checkpoint);
355NILFS_CHECKPOINTS_RO_ATTR(next_checkpoint);
356NILFS_CHECKPOINTS_RO_ATTR(README);
357
358static struct attribute *nilfs_checkpoints_attrs[] = {
359 NILFS_CHECKPOINTS_ATTR_LIST(checkpoints_number),
360 NILFS_CHECKPOINTS_ATTR_LIST(snapshots_number),
361 NILFS_CHECKPOINTS_ATTR_LIST(last_seg_checkpoint),
362 NILFS_CHECKPOINTS_ATTR_LIST(next_checkpoint),
363 NILFS_CHECKPOINTS_ATTR_LIST(README),
364 NULL,
365};
366
367NILFS_DEV_INT_GROUP_OPS(checkpoints, dev);
368NILFS_DEV_INT_GROUP_TYPE(checkpoints, dev);
369NILFS_DEV_INT_GROUP_FNS(checkpoints, dev);
370
371/************************************************************************
372 * NILFS segments attrs *
373 ************************************************************************/
374
375static ssize_t
376nilfs_segments_segments_number_show(struct nilfs_segments_attr *attr,
377 struct the_nilfs *nilfs,
378 char *buf)
379{
380 return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_nsegments);
381}
382
383static ssize_t
384nilfs_segments_blocks_per_segment_show(struct nilfs_segments_attr *attr,
385 struct the_nilfs *nilfs,
386 char *buf)
387{
388 return snprintf(buf, PAGE_SIZE, "%lu\n", nilfs->ns_blocks_per_segment);
389}
390
391static ssize_t
392nilfs_segments_clean_segments_show(struct nilfs_segments_attr *attr,
393 struct the_nilfs *nilfs,
394 char *buf)
395{
396 unsigned long ncleansegs;
397
398 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
399 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
400 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
401
402 return snprintf(buf, PAGE_SIZE, "%lu\n", ncleansegs);
403}
404
405static ssize_t
406nilfs_segments_dirty_segments_show(struct nilfs_segments_attr *attr,
407 struct the_nilfs *nilfs,
408 char *buf)
409{
410 struct nilfs_sustat sustat;
411 int err;
412
413 down_read(&nilfs->ns_segctor_sem);
414 err = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
415 up_read(&nilfs->ns_segctor_sem);
416 if (err < 0) {
417 printk(KERN_ERR "NILFS: unable to get segment stat: err=%d\n",
418 err);
419 return err;
420 }
421
422 return snprintf(buf, PAGE_SIZE, "%llu\n", sustat.ss_ndirtysegs);
423}
424
425static const char segments_readme_str[] =
426 "The segments group contains attributes that describe\n"
427 "details about volume's segments.\n\n"
428 "(1) segments_number\n\tshow number of segments on volume.\n\n"
429 "(2) blocks_per_segment\n\tshow number of blocks in segment.\n\n"
430 "(3) clean_segments\n\tshow count of clean segments.\n\n"
431 "(4) dirty_segments\n\tshow count of dirty segments.\n\n";
432
433static ssize_t
434nilfs_segments_README_show(struct nilfs_segments_attr *attr,
435 struct the_nilfs *nilfs,
436 char *buf)
437{
438 return snprintf(buf, PAGE_SIZE, segments_readme_str);
439}
440
441NILFS_SEGMENTS_RO_ATTR(segments_number);
442NILFS_SEGMENTS_RO_ATTR(blocks_per_segment);
443NILFS_SEGMENTS_RO_ATTR(clean_segments);
444NILFS_SEGMENTS_RO_ATTR(dirty_segments);
445NILFS_SEGMENTS_RO_ATTR(README);
446
447static struct attribute *nilfs_segments_attrs[] = {
448 NILFS_SEGMENTS_ATTR_LIST(segments_number),
449 NILFS_SEGMENTS_ATTR_LIST(blocks_per_segment),
450 NILFS_SEGMENTS_ATTR_LIST(clean_segments),
451 NILFS_SEGMENTS_ATTR_LIST(dirty_segments),
452 NILFS_SEGMENTS_ATTR_LIST(README),
453 NULL,
454};
455
456NILFS_DEV_INT_GROUP_OPS(segments, dev);
457NILFS_DEV_INT_GROUP_TYPE(segments, dev);
458NILFS_DEV_INT_GROUP_FNS(segments, dev);
459
460/************************************************************************
461 * NILFS segctor attrs *
462 ************************************************************************/
463
464static ssize_t
465nilfs_segctor_last_pseg_block_show(struct nilfs_segctor_attr *attr,
466 struct the_nilfs *nilfs,
467 char *buf)
468{
469 sector_t last_pseg;
470
471 spin_lock(&nilfs->ns_last_segment_lock);
472 last_pseg = nilfs->ns_last_pseg;
473 spin_unlock(&nilfs->ns_last_segment_lock);
474
475 return snprintf(buf, PAGE_SIZE, "%llu\n",
476 (unsigned long long)last_pseg);
477}
478
479static ssize_t
480nilfs_segctor_last_seg_sequence_show(struct nilfs_segctor_attr *attr,
481 struct the_nilfs *nilfs,
482 char *buf)
483{
484 u64 last_seq;
485
486 spin_lock(&nilfs->ns_last_segment_lock);
487 last_seq = nilfs->ns_last_seq;
488 spin_unlock(&nilfs->ns_last_segment_lock);
489
490 return snprintf(buf, PAGE_SIZE, "%llu\n", last_seq);
491}
492
493static ssize_t
494nilfs_segctor_last_seg_checkpoint_show(struct nilfs_segctor_attr *attr,
495 struct the_nilfs *nilfs,
496 char *buf)
497{
498 __u64 last_cno;
499
500 spin_lock(&nilfs->ns_last_segment_lock);
501 last_cno = nilfs->ns_last_cno;
502 spin_unlock(&nilfs->ns_last_segment_lock);
503
504 return snprintf(buf, PAGE_SIZE, "%llu\n", last_cno);
505}
506
507static ssize_t
508nilfs_segctor_current_seg_sequence_show(struct nilfs_segctor_attr *attr,
509 struct the_nilfs *nilfs,
510 char *buf)
511{
512 u64 seg_seq;
513
514 down_read(&nilfs->ns_sem);
515 seg_seq = nilfs->ns_seg_seq;
516 up_read(&nilfs->ns_sem);
517
518 return snprintf(buf, PAGE_SIZE, "%llu\n", seg_seq);
519}
520
521static ssize_t
522nilfs_segctor_current_last_full_seg_show(struct nilfs_segctor_attr *attr,
523 struct the_nilfs *nilfs,
524 char *buf)
525{
526 __u64 segnum;
527
528 down_read(&nilfs->ns_sem);
529 segnum = nilfs->ns_segnum;
530 up_read(&nilfs->ns_sem);
531
532 return snprintf(buf, PAGE_SIZE, "%llu\n", segnum);
533}
534
535static ssize_t
536nilfs_segctor_next_full_seg_show(struct nilfs_segctor_attr *attr,
537 struct the_nilfs *nilfs,
538 char *buf)
539{
540 __u64 nextnum;
541
542 down_read(&nilfs->ns_sem);
543 nextnum = nilfs->ns_nextnum;
544 up_read(&nilfs->ns_sem);
545
546 return snprintf(buf, PAGE_SIZE, "%llu\n", nextnum);
547}
548
549static ssize_t
550nilfs_segctor_next_pseg_offset_show(struct nilfs_segctor_attr *attr,
551 struct the_nilfs *nilfs,
552 char *buf)
553{
554 unsigned long pseg_offset;
555
556 down_read(&nilfs->ns_sem);
557 pseg_offset = nilfs->ns_pseg_offset;
558 up_read(&nilfs->ns_sem);
559
560 return snprintf(buf, PAGE_SIZE, "%lu\n", pseg_offset);
561}
562
563static ssize_t
564nilfs_segctor_next_checkpoint_show(struct nilfs_segctor_attr *attr,
565 struct the_nilfs *nilfs,
566 char *buf)
567{
568 __u64 cno;
569
570 down_read(&nilfs->ns_sem);
571 cno = nilfs->ns_cno;
572 up_read(&nilfs->ns_sem);
573
574 return snprintf(buf, PAGE_SIZE, "%llu\n", cno);
575}
576
577static ssize_t
578nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
579 struct the_nilfs *nilfs,
580 char *buf)
581{
582 time_t ctime;
583
584 down_read(&nilfs->ns_sem);
585 ctime = nilfs->ns_ctime;
586 up_read(&nilfs->ns_sem);
587
588 return NILFS_SHOW_TIME(ctime, buf);
589}
590
591static ssize_t
592nilfs_segctor_last_seg_write_time_secs_show(struct nilfs_segctor_attr *attr,
593 struct the_nilfs *nilfs,
594 char *buf)
595{
596 time_t ctime;
597
598 down_read(&nilfs->ns_sem);
599 ctime = nilfs->ns_ctime;
600 up_read(&nilfs->ns_sem);
601
602 return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)ctime);
603}
604
605static ssize_t
606nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
607 struct the_nilfs *nilfs,
608 char *buf)
609{
610 time_t nongc_ctime;
611
612 down_read(&nilfs->ns_sem);
613 nongc_ctime = nilfs->ns_nongc_ctime;
614 up_read(&nilfs->ns_sem);
615
616 return NILFS_SHOW_TIME(nongc_ctime, buf);
617}
618
619static ssize_t
620nilfs_segctor_last_nongc_write_time_secs_show(struct nilfs_segctor_attr *attr,
621 struct the_nilfs *nilfs,
622 char *buf)
623{
624 time_t nongc_ctime;
625
626 down_read(&nilfs->ns_sem);
627 nongc_ctime = nilfs->ns_nongc_ctime;
628 up_read(&nilfs->ns_sem);
629
630 return snprintf(buf, PAGE_SIZE, "%llu\n",
631 (unsigned long long)nongc_ctime);
632}
633
634static ssize_t
635nilfs_segctor_dirty_data_blocks_count_show(struct nilfs_segctor_attr *attr,
636 struct the_nilfs *nilfs,
637 char *buf)
638{
639 u32 ndirtyblks;
640
641 down_read(&nilfs->ns_sem);
642 ndirtyblks = atomic_read(&nilfs->ns_ndirtyblks);
643 up_read(&nilfs->ns_sem);
644
645 return snprintf(buf, PAGE_SIZE, "%u\n", ndirtyblks);
646}
647
648static const char segctor_readme_str[] =
649 "The segctor group contains attributes that describe\n"
650 "segctor thread activity details.\n\n"
651 "(1) last_pseg_block\n"
652 "\tshow start block number of the latest segment.\n\n"
653 "(2) last_seg_sequence\n"
654 "\tshow sequence value of the latest segment.\n\n"
655 "(3) last_seg_checkpoint\n"
656 "\tshow checkpoint number of the latest segment.\n\n"
657 "(4) current_seg_sequence\n\tshow segment sequence counter.\n\n"
658 "(5) current_last_full_seg\n"
659 "\tshow index number of the latest full segment.\n\n"
660 "(6) next_full_seg\n"
661 "\tshow index number of the full segment index to be used next.\n\n"
662 "(7) next_pseg_offset\n"
663 "\tshow offset of next partial segment in the current full segment.\n\n"
664 "(8) next_checkpoint\n\tshow next checkpoint number.\n\n"
665 "(9) last_seg_write_time\n"
666 "\tshow write time of the last segment in human-readable format.\n\n"
667 "(10) last_seg_write_time_secs\n"
668 "\tshow write time of the last segment in seconds.\n\n"
669 "(11) last_nongc_write_time\n"
670 "\tshow write time of the last segment not for cleaner operation "
671 "in human-readable format.\n\n"
672 "(12) last_nongc_write_time_secs\n"
673 "\tshow write time of the last segment not for cleaner operation "
674 "in seconds.\n\n"
675 "(13) dirty_data_blocks_count\n"
676 "\tshow number of dirty data blocks.\n\n";
677
678static ssize_t
679nilfs_segctor_README_show(struct nilfs_segctor_attr *attr,
680 struct the_nilfs *nilfs, char *buf)
681{
682 return snprintf(buf, PAGE_SIZE, segctor_readme_str);
683}
684
685NILFS_SEGCTOR_RO_ATTR(last_pseg_block);
686NILFS_SEGCTOR_RO_ATTR(last_seg_sequence);
687NILFS_SEGCTOR_RO_ATTR(last_seg_checkpoint);
688NILFS_SEGCTOR_RO_ATTR(current_seg_sequence);
689NILFS_SEGCTOR_RO_ATTR(current_last_full_seg);
690NILFS_SEGCTOR_RO_ATTR(next_full_seg);
691NILFS_SEGCTOR_RO_ATTR(next_pseg_offset);
692NILFS_SEGCTOR_RO_ATTR(next_checkpoint);
693NILFS_SEGCTOR_RO_ATTR(last_seg_write_time);
694NILFS_SEGCTOR_RO_ATTR(last_seg_write_time_secs);
695NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time);
696NILFS_SEGCTOR_RO_ATTR(last_nongc_write_time_secs);
697NILFS_SEGCTOR_RO_ATTR(dirty_data_blocks_count);
698NILFS_SEGCTOR_RO_ATTR(README);
699
700static struct attribute *nilfs_segctor_attrs[] = {
701 NILFS_SEGCTOR_ATTR_LIST(last_pseg_block),
702 NILFS_SEGCTOR_ATTR_LIST(last_seg_sequence),
703 NILFS_SEGCTOR_ATTR_LIST(last_seg_checkpoint),
704 NILFS_SEGCTOR_ATTR_LIST(current_seg_sequence),
705 NILFS_SEGCTOR_ATTR_LIST(current_last_full_seg),
706 NILFS_SEGCTOR_ATTR_LIST(next_full_seg),
707 NILFS_SEGCTOR_ATTR_LIST(next_pseg_offset),
708 NILFS_SEGCTOR_ATTR_LIST(next_checkpoint),
709 NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time),
710 NILFS_SEGCTOR_ATTR_LIST(last_seg_write_time_secs),
711 NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time),
712 NILFS_SEGCTOR_ATTR_LIST(last_nongc_write_time_secs),
713 NILFS_SEGCTOR_ATTR_LIST(dirty_data_blocks_count),
714 NILFS_SEGCTOR_ATTR_LIST(README),
715 NULL,
716};
717
718NILFS_DEV_INT_GROUP_OPS(segctor, dev);
719NILFS_DEV_INT_GROUP_TYPE(segctor, dev);
720NILFS_DEV_INT_GROUP_FNS(segctor, dev);
721
722/************************************************************************
723 * NILFS superblock attrs *
724 ************************************************************************/
725
726static ssize_t
727nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
728 struct the_nilfs *nilfs,
729 char *buf)
730{
731 time_t sbwtime;
732
733 down_read(&nilfs->ns_sem);
734 sbwtime = nilfs->ns_sbwtime;
735 up_read(&nilfs->ns_sem);
736
737 return NILFS_SHOW_TIME(sbwtime, buf);
738}
739
740static ssize_t
741nilfs_superblock_sb_write_time_secs_show(struct nilfs_superblock_attr *attr,
742 struct the_nilfs *nilfs,
743 char *buf)
744{
745 time_t sbwtime;
746
747 down_read(&nilfs->ns_sem);
748 sbwtime = nilfs->ns_sbwtime;
749 up_read(&nilfs->ns_sem);
750
751 return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)sbwtime);
752}
753
754static ssize_t
755nilfs_superblock_sb_write_count_show(struct nilfs_superblock_attr *attr,
756 struct the_nilfs *nilfs,
757 char *buf)
758{
759 unsigned sbwcount;
760
761 down_read(&nilfs->ns_sem);
762 sbwcount = nilfs->ns_sbwcount;
763 up_read(&nilfs->ns_sem);
764
765 return snprintf(buf, PAGE_SIZE, "%u\n", sbwcount);
766}
767
768static ssize_t
769nilfs_superblock_sb_update_frequency_show(struct nilfs_superblock_attr *attr,
770 struct the_nilfs *nilfs,
771 char *buf)
772{
773 unsigned sb_update_freq;
774
775 down_read(&nilfs->ns_sem);
776 sb_update_freq = nilfs->ns_sb_update_freq;
777 up_read(&nilfs->ns_sem);
778
779 return snprintf(buf, PAGE_SIZE, "%u\n", sb_update_freq);
780}
781
782static ssize_t
783nilfs_superblock_sb_update_frequency_store(struct nilfs_superblock_attr *attr,
784 struct the_nilfs *nilfs,
785 const char *buf, size_t count)
786{
787 unsigned val;
788 int err;
789
790 err = kstrtouint(skip_spaces(buf), 0, &val);
791 if (err) {
792 printk(KERN_ERR "NILFS: unable to convert string: err=%d\n",
793 err);
794 return err;
795 }
796
797 if (val < NILFS_SB_FREQ) {
798 val = NILFS_SB_FREQ;
799 printk(KERN_WARNING "NILFS: superblock update frequency cannot be lesser than 10 seconds\n");
800 }
801
802 down_write(&nilfs->ns_sem);
803 nilfs->ns_sb_update_freq = val;
804 up_write(&nilfs->ns_sem);
805
806 return count;
807}
808
809static const char sb_readme_str[] =
810 "The superblock group contains attributes that describe\n"
811 "superblock's details.\n\n"
812 "(1) sb_write_time\n\tshow previous write time of super block "
813 "in human-readable format.\n\n"
814 "(2) sb_write_time_secs\n\tshow previous write time of super block "
815 "in seconds.\n\n"
816 "(3) sb_write_count\n\tshow write count of super block.\n\n"
817 "(4) sb_update_frequency\n"
818 "\tshow/set interval of periodical update of superblock (in seconds).\n\n"
819 "\tYou can set preferable frequency of superblock update by command:\n\n"
820 "\t'echo <val> > /sys/fs/<nilfs>/<dev>/superblock/sb_update_frequency'\n";
821
822static ssize_t
823nilfs_superblock_README_show(struct nilfs_superblock_attr *attr,
824 struct the_nilfs *nilfs, char *buf)
825{
826 return snprintf(buf, PAGE_SIZE, sb_readme_str);
827}
828
829NILFS_SUPERBLOCK_RO_ATTR(sb_write_time);
830NILFS_SUPERBLOCK_RO_ATTR(sb_write_time_secs);
831NILFS_SUPERBLOCK_RO_ATTR(sb_write_count);
832NILFS_SUPERBLOCK_RW_ATTR(sb_update_frequency);
833NILFS_SUPERBLOCK_RO_ATTR(README);
834
835static struct attribute *nilfs_superblock_attrs[] = {
836 NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time),
837 NILFS_SUPERBLOCK_ATTR_LIST(sb_write_time_secs),
838 NILFS_SUPERBLOCK_ATTR_LIST(sb_write_count),
839 NILFS_SUPERBLOCK_ATTR_LIST(sb_update_frequency),
840 NILFS_SUPERBLOCK_ATTR_LIST(README),
841 NULL,
842};
843
844NILFS_DEV_INT_GROUP_OPS(superblock, dev);
845NILFS_DEV_INT_GROUP_TYPE(superblock, dev);
846NILFS_DEV_INT_GROUP_FNS(superblock, dev);
847
848/************************************************************************
849 * NILFS device attrs *
850 ************************************************************************/
851
852static
853ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
854 struct the_nilfs *nilfs,
855 char *buf)
856{
857 struct nilfs_super_block **sbp = nilfs->ns_sbp;
858 u32 major = le32_to_cpu(sbp[0]->s_rev_level);
859 u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
860
861 return snprintf(buf, PAGE_SIZE, "%d.%d\n", major, minor);
862}
863
864static
865ssize_t nilfs_dev_blocksize_show(struct nilfs_dev_attr *attr,
866 struct the_nilfs *nilfs,
867 char *buf)
868{
869 return snprintf(buf, PAGE_SIZE, "%u\n", nilfs->ns_blocksize);
870}
871
872static
873ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
874 struct the_nilfs *nilfs,
875 char *buf)
876{
877 struct nilfs_super_block **sbp = nilfs->ns_sbp;
878 u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
879
880 return snprintf(buf, PAGE_SIZE, "%llu\n", dev_size);
881}
882
883static
884ssize_t nilfs_dev_free_blocks_show(struct nilfs_dev_attr *attr,
885 struct the_nilfs *nilfs,
886 char *buf)
887{
888 sector_t free_blocks = 0;
889
890 nilfs_count_free_blocks(nilfs, &free_blocks);
891 return snprintf(buf, PAGE_SIZE, "%llu\n",
892 (unsigned long long)free_blocks);
893}
894
895static
896ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
897 struct the_nilfs *nilfs,
898 char *buf)
899{
900 struct nilfs_super_block **sbp = nilfs->ns_sbp;
901
902 return snprintf(buf, PAGE_SIZE, "%pUb\n", sbp[0]->s_uuid);
903}
904
905static
906ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
907 struct the_nilfs *nilfs,
908 char *buf)
909{
910 struct nilfs_super_block **sbp = nilfs->ns_sbp;
911
912 return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
913 sbp[0]->s_volume_name);
914}
915
916static const char dev_readme_str[] =
917 "The <device> group contains attributes that describe file system\n"
918 "partition's details.\n\n"
919 "(1) revision\n\tshow NILFS file system revision.\n\n"
920 "(2) blocksize\n\tshow volume block size in bytes.\n\n"
921 "(3) device_size\n\tshow volume size in bytes.\n\n"
922 "(4) free_blocks\n\tshow count of free blocks on volume.\n\n"
923 "(5) uuid\n\tshow volume's UUID.\n\n"
924 "(6) volume_name\n\tshow volume's name.\n\n";
925
926static ssize_t nilfs_dev_README_show(struct nilfs_dev_attr *attr,
927 struct the_nilfs *nilfs,
928 char *buf)
929{
930 return snprintf(buf, PAGE_SIZE, dev_readme_str);
931}
932
933NILFS_DEV_RO_ATTR(revision);
934NILFS_DEV_RO_ATTR(blocksize);
935NILFS_DEV_RO_ATTR(device_size);
936NILFS_DEV_RO_ATTR(free_blocks);
937NILFS_DEV_RO_ATTR(uuid);
938NILFS_DEV_RO_ATTR(volume_name);
939NILFS_DEV_RO_ATTR(README);
940
941static struct attribute *nilfs_dev_attrs[] = {
942 NILFS_DEV_ATTR_LIST(revision),
943 NILFS_DEV_ATTR_LIST(blocksize),
944 NILFS_DEV_ATTR_LIST(device_size),
945 NILFS_DEV_ATTR_LIST(free_blocks),
946 NILFS_DEV_ATTR_LIST(uuid),
947 NILFS_DEV_ATTR_LIST(volume_name),
948 NILFS_DEV_ATTR_LIST(README),
949 NULL,
950};
951
952static ssize_t nilfs_dev_attr_show(struct kobject *kobj,
953 struct attribute *attr, char *buf)
954{
955 struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
956 ns_dev_kobj);
957 struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
958 attr);
959
960 return a->show ? a->show(a, nilfs, buf) : 0;
961}
962
963static ssize_t nilfs_dev_attr_store(struct kobject *kobj,
964 struct attribute *attr,
965 const char *buf, size_t len)
966{
967 struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
968 ns_dev_kobj);
969 struct nilfs_dev_attr *a = container_of(attr, struct nilfs_dev_attr,
970 attr);
971
972 return a->store ? a->store(a, nilfs, buf, len) : 0;
973}
974
975static void nilfs_dev_attr_release(struct kobject *kobj)
976{
977 struct the_nilfs *nilfs = container_of(kobj, struct the_nilfs,
978 ns_dev_kobj);
979 complete(&nilfs->ns_dev_kobj_unregister);
980}
981
982static const struct sysfs_ops nilfs_dev_attr_ops = {
983 .show = nilfs_dev_attr_show,
984 .store = nilfs_dev_attr_store,
985};
986
987static struct kobj_type nilfs_dev_ktype = {
988 .default_attrs = nilfs_dev_attrs,
989 .sysfs_ops = &nilfs_dev_attr_ops,
990 .release = nilfs_dev_attr_release,
991};
992
993int nilfs_sysfs_create_device_group(struct super_block *sb)
994{
995 struct the_nilfs *nilfs = sb->s_fs_info;
996 size_t devgrp_size = sizeof(struct nilfs_sysfs_dev_subgroups);
997 int err;
998
999 nilfs->ns_dev_subgroups = kzalloc(devgrp_size, GFP_KERNEL);
1000 if (unlikely(!nilfs->ns_dev_subgroups)) {
1001 err = -ENOMEM;
1002 printk(KERN_ERR "NILFS: unable to allocate memory for device group\n");
1003 goto failed_create_device_group;
1004 }
1005
1006 nilfs->ns_dev_kobj.kset = nilfs_kset;
1007 init_completion(&nilfs->ns_dev_kobj_unregister);
1008 err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
1009 "%s", sb->s_id);
1010 if (err)
1011 goto free_dev_subgroups;
1012
1013 err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
1014 if (err)
1015 goto cleanup_dev_kobject;
1016
1017 err = nilfs_sysfs_create_checkpoints_group(nilfs);
1018 if (err)
1019 goto delete_mounted_snapshots_group;
1020
1021 err = nilfs_sysfs_create_segments_group(nilfs);
1022 if (err)
1023 goto delete_checkpoints_group;
1024
1025 err = nilfs_sysfs_create_superblock_group(nilfs);
1026 if (err)
1027 goto delete_segments_group;
1028
1029 err = nilfs_sysfs_create_segctor_group(nilfs);
1030 if (err)
1031 goto delete_superblock_group;
1032
1033 return 0;
1034
1035delete_superblock_group:
1036 nilfs_sysfs_delete_superblock_group(nilfs);
1037
1038delete_segments_group:
1039 nilfs_sysfs_delete_segments_group(nilfs);
1040
1041delete_checkpoints_group:
1042 nilfs_sysfs_delete_checkpoints_group(nilfs);
1043
1044delete_mounted_snapshots_group:
1045 nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
1046
1047cleanup_dev_kobject:
1048 kobject_del(&nilfs->ns_dev_kobj);
1049
1050free_dev_subgroups:
1051 kfree(nilfs->ns_dev_subgroups);
1052
1053failed_create_device_group:
1054 return err;
1055}
1056
1057void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
1058{
1059 nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
1060 nilfs_sysfs_delete_checkpoints_group(nilfs);
1061 nilfs_sysfs_delete_segments_group(nilfs);
1062 nilfs_sysfs_delete_superblock_group(nilfs);
1063 nilfs_sysfs_delete_segctor_group(nilfs);
1064 kobject_del(&nilfs->ns_dev_kobj);
1065 kfree(nilfs->ns_dev_subgroups);
1066}
1067
1068/************************************************************************
1069 * NILFS feature attrs *
1070 ************************************************************************/
1071
1072static ssize_t nilfs_feature_revision_show(struct kobject *kobj,
1073 struct attribute *attr, char *buf)
1074{
1075 return snprintf(buf, PAGE_SIZE, "%d.%d\n",
1076 NILFS_CURRENT_REV, NILFS_MINOR_REV);
1077}
1078
1079static const char features_readme_str[] =
1080 "The features group contains attributes that describe NILFS file\n"
1081 "system driver features.\n\n"
1082 "(1) revision\n\tshow current revision of NILFS file system driver.\n";
1083
1084static ssize_t nilfs_feature_README_show(struct kobject *kobj,
1085 struct attribute *attr,
1086 char *buf)
1087{
1088 return snprintf(buf, PAGE_SIZE, features_readme_str);
1089}
1090
1091NILFS_FEATURE_RO_ATTR(revision);
1092NILFS_FEATURE_RO_ATTR(README);
1093
1094static struct attribute *nilfs_feature_attrs[] = {
1095 NILFS_FEATURE_ATTR_LIST(revision),
1096 NILFS_FEATURE_ATTR_LIST(README),
1097 NULL,
1098};
1099
1100static const struct attribute_group nilfs_feature_attr_group = {
1101 .name = "features",
1102 .attrs = nilfs_feature_attrs,
1103};
1104
1105int __init nilfs_sysfs_init(void)
1106{
1107 int err;
1108
1109 nilfs_kset = kset_create_and_add(NILFS_ROOT_GROUP_NAME, NULL, fs_kobj);
1110 if (!nilfs_kset) {
1111 err = -ENOMEM;
1112 printk(KERN_ERR "NILFS: unable to create sysfs entry: err %d\n",
1113 err);
1114 goto failed_sysfs_init;
1115 }
1116
1117 err = sysfs_create_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
1118 if (unlikely(err)) {
1119 printk(KERN_ERR "NILFS: unable to create feature group: err %d\n",
1120 err);
1121 goto cleanup_sysfs_init;
1122 }
1123
1124 return 0;
1125
1126cleanup_sysfs_init:
1127 kset_unregister(nilfs_kset);
1128
1129failed_sysfs_init:
1130 return err;
1131}
1132
1133void nilfs_sysfs_exit(void)
1134{
1135 sysfs_remove_group(&nilfs_kset->kobj, &nilfs_feature_attr_group);
1136 kset_unregister(nilfs_kset);
1137}
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
new file mode 100644
index 000000000000..677e3a1a8370
--- /dev/null
+++ b/fs/nilfs2/sysfs.h
@@ -0,0 +1,176 @@
1/*
2 * sysfs.h - sysfs support declarations.
3 *
4 * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
5 * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
18 */
19
20#ifndef _NILFS_SYSFS_H
21#define _NILFS_SYSFS_H
22
23#include <linux/sysfs.h>
24
25#define NILFS_ROOT_GROUP_NAME "nilfs2"
26
27/*
28 * struct nilfs_sysfs_dev_subgroups - device subgroup kernel objects
29 * @sg_superblock_kobj: /sys/fs/<nilfs>/<device>/superblock
30 * @sg_superblock_kobj_unregister: completion state
31 * @sg_segctor_kobj: /sys/fs/<nilfs>/<device>/segctor
32 * @sg_segctor_kobj_unregister: completion state
33 * @sg_mounted_snapshots_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots
34 * @sg_mounted_snapshots_kobj_unregister: completion state
35 * @sg_checkpoints_kobj: /sys/fs/<nilfs>/<device>/checkpoints
36 * @sg_checkpoints_kobj_unregister: completion state
37 * @sg_segments_kobj: /sys/fs/<nilfs>/<device>/segments
38 * @sg_segments_kobj_unregister: completion state
39 */
40struct nilfs_sysfs_dev_subgroups {
41 /* /sys/fs/<nilfs>/<device>/superblock */
42 struct kobject sg_superblock_kobj;
43 struct completion sg_superblock_kobj_unregister;
44
45 /* /sys/fs/<nilfs>/<device>/segctor */
46 struct kobject sg_segctor_kobj;
47 struct completion sg_segctor_kobj_unregister;
48
49 /* /sys/fs/<nilfs>/<device>/mounted_snapshots */
50 struct kobject sg_mounted_snapshots_kobj;
51 struct completion sg_mounted_snapshots_kobj_unregister;
52
53 /* /sys/fs/<nilfs>/<device>/checkpoints */
54 struct kobject sg_checkpoints_kobj;
55 struct completion sg_checkpoints_kobj_unregister;
56
57 /* /sys/fs/<nilfs>/<device>/segments */
58 struct kobject sg_segments_kobj;
59 struct completion sg_segments_kobj_unregister;
60};
61
62#define NILFS_COMMON_ATTR_STRUCT(name) \
63struct nilfs_##name##_attr { \
64 struct attribute attr; \
65 ssize_t (*show)(struct kobject *, struct attribute *, \
66 char *); \
67 ssize_t (*store)(struct kobject *, struct attribute *, \
68 const char *, size_t); \
69};
70
71NILFS_COMMON_ATTR_STRUCT(feature);
72
73#define NILFS_DEV_ATTR_STRUCT(name) \
74struct nilfs_##name##_attr { \
75 struct attribute attr; \
76 ssize_t (*show)(struct nilfs_##name##_attr *, struct the_nilfs *, \
77 char *); \
78 ssize_t (*store)(struct nilfs_##name##_attr *, struct the_nilfs *, \
79 const char *, size_t); \
80};
81
82NILFS_DEV_ATTR_STRUCT(dev);
83NILFS_DEV_ATTR_STRUCT(segments);
84NILFS_DEV_ATTR_STRUCT(mounted_snapshots);
85NILFS_DEV_ATTR_STRUCT(checkpoints);
86NILFS_DEV_ATTR_STRUCT(superblock);
87NILFS_DEV_ATTR_STRUCT(segctor);
88
89#define NILFS_CP_ATTR_STRUCT(name) \
90struct nilfs_##name##_attr { \
91 struct attribute attr; \
92 ssize_t (*show)(struct nilfs_##name##_attr *, struct nilfs_root *, \
93 char *); \
94 ssize_t (*store)(struct nilfs_##name##_attr *, struct nilfs_root *, \
95 const char *, size_t); \
96};
97
98NILFS_CP_ATTR_STRUCT(snapshot);
99
100#define NILFS_ATTR(type, name, mode, show, store) \
101 static struct nilfs_##type##_attr nilfs_##type##_attr_##name = \
102 __ATTR(name, mode, show, store)
103
104#define NILFS_INFO_ATTR(type, name) \
105 NILFS_ATTR(type, name, 0444, NULL, NULL)
106#define NILFS_RO_ATTR(type, name) \
107 NILFS_ATTR(type, name, 0444, nilfs_##type##_##name##_show, NULL)
108#define NILFS_RW_ATTR(type, name) \
109 NILFS_ATTR(type, name, 0644, \
110 nilfs_##type##_##name##_show, \
111 nilfs_##type##_##name##_store)
112
113#define NILFS_FEATURE_INFO_ATTR(name) \
114 NILFS_INFO_ATTR(feature, name)
115#define NILFS_FEATURE_RO_ATTR(name) \
116 NILFS_RO_ATTR(feature, name)
117#define NILFS_FEATURE_RW_ATTR(name) \
118 NILFS_RW_ATTR(feature, name)
119
120#define NILFS_DEV_INFO_ATTR(name) \
121 NILFS_INFO_ATTR(dev, name)
122#define NILFS_DEV_RO_ATTR(name) \
123 NILFS_RO_ATTR(dev, name)
124#define NILFS_DEV_RW_ATTR(name) \
125 NILFS_RW_ATTR(dev, name)
126
127#define NILFS_SEGMENTS_RO_ATTR(name) \
128 NILFS_RO_ATTR(segments, name)
129#define NILFS_SEGMENTS_RW_ATTR(name) \
130 NILFS_RW_ATTR(segs_info, name)
131
132#define NILFS_MOUNTED_SNAPSHOTS_RO_ATTR(name) \
133 NILFS_RO_ATTR(mounted_snapshots, name)
134
135#define NILFS_CHECKPOINTS_RO_ATTR(name) \
136 NILFS_RO_ATTR(checkpoints, name)
137#define NILFS_CHECKPOINTS_RW_ATTR(name) \
138 NILFS_RW_ATTR(checkpoints, name)
139
140#define NILFS_SNAPSHOT_INFO_ATTR(name) \
141 NILFS_INFO_ATTR(snapshot, name)
142#define NILFS_SNAPSHOT_RO_ATTR(name) \
143 NILFS_RO_ATTR(snapshot, name)
144#define NILFS_SNAPSHOT_RW_ATTR(name) \
145 NILFS_RW_ATTR(snapshot, name)
146
147#define NILFS_SUPERBLOCK_RO_ATTR(name) \
148 NILFS_RO_ATTR(superblock, name)
149#define NILFS_SUPERBLOCK_RW_ATTR(name) \
150 NILFS_RW_ATTR(superblock, name)
151
152#define NILFS_SEGCTOR_INFO_ATTR(name) \
153 NILFS_INFO_ATTR(segctor, name)
154#define NILFS_SEGCTOR_RO_ATTR(name) \
155 NILFS_RO_ATTR(segctor, name)
156#define NILFS_SEGCTOR_RW_ATTR(name) \
157 NILFS_RW_ATTR(segctor, name)
158
159#define NILFS_FEATURE_ATTR_LIST(name) \
160 (&nilfs_feature_attr_##name.attr)
161#define NILFS_DEV_ATTR_LIST(name) \
162 (&nilfs_dev_attr_##name.attr)
163#define NILFS_SEGMENTS_ATTR_LIST(name) \
164 (&nilfs_segments_attr_##name.attr)
165#define NILFS_MOUNTED_SNAPSHOTS_ATTR_LIST(name) \
166 (&nilfs_mounted_snapshots_attr_##name.attr)
167#define NILFS_CHECKPOINTS_ATTR_LIST(name) \
168 (&nilfs_checkpoints_attr_##name.attr)
169#define NILFS_SNAPSHOT_ATTR_LIST(name) \
170 (&nilfs_snapshot_attr_##name.attr)
171#define NILFS_SUPERBLOCK_ATTR_LIST(name) \
172 (&nilfs_superblock_attr_##name.attr)
173#define NILFS_SEGCTOR_ATTR_LIST(name) \
174 (&nilfs_segctor_attr_##name.attr)
175
176#endif /* _NILFS_SYSFS_H */
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8ba8229ba076..9da25fe9ea61 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -85,6 +85,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
85 nilfs->ns_cptree = RB_ROOT; 85 nilfs->ns_cptree = RB_ROOT;
86 spin_lock_init(&nilfs->ns_cptree_lock); 86 spin_lock_init(&nilfs->ns_cptree_lock);
87 init_rwsem(&nilfs->ns_segctor_sem); 87 init_rwsem(&nilfs->ns_segctor_sem);
88 nilfs->ns_sb_update_freq = NILFS_SB_FREQ;
88 89
89 return nilfs; 90 return nilfs;
90} 91}
@@ -97,6 +98,7 @@ void destroy_nilfs(struct the_nilfs *nilfs)
97{ 98{
98 might_sleep(); 99 might_sleep();
99 if (nilfs_init(nilfs)) { 100 if (nilfs_init(nilfs)) {
101 nilfs_sysfs_delete_device_group(nilfs);
100 brelse(nilfs->ns_sbh[0]); 102 brelse(nilfs->ns_sbh[0]);
101 brelse(nilfs->ns_sbh[1]); 103 brelse(nilfs->ns_sbh[1]);
102 } 104 }
@@ -640,6 +642,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
640 if (err) 642 if (err)
641 goto failed_sbh; 643 goto failed_sbh;
642 644
645 err = nilfs_sysfs_create_device_group(sb);
646 if (err)
647 goto failed_sbh;
648
643 set_nilfs_init(nilfs); 649 set_nilfs_init(nilfs);
644 err = 0; 650 err = 0;
645 out: 651 out:
@@ -740,12 +746,13 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
740{ 746{
741 struct rb_node **p, *parent; 747 struct rb_node **p, *parent;
742 struct nilfs_root *root, *new; 748 struct nilfs_root *root, *new;
749 int err;
743 750
744 root = nilfs_lookup_root(nilfs, cno); 751 root = nilfs_lookup_root(nilfs, cno);
745 if (root) 752 if (root)
746 return root; 753 return root;
747 754
748 new = kmalloc(sizeof(*root), GFP_KERNEL); 755 new = kzalloc(sizeof(*root), GFP_KERNEL);
749 if (!new) 756 if (!new)
750 return NULL; 757 return NULL;
751 758
@@ -782,6 +789,12 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno)
782 789
783 spin_unlock(&nilfs->ns_cptree_lock); 790 spin_unlock(&nilfs->ns_cptree_lock);
784 791
792 err = nilfs_sysfs_create_snapshot_group(new);
793 if (err) {
794 kfree(new);
795 new = NULL;
796 }
797
785 return new; 798 return new;
786} 799}
787 800
@@ -790,6 +803,8 @@ void nilfs_put_root(struct nilfs_root *root)
790 if (atomic_dec_and_test(&root->count)) { 803 if (atomic_dec_and_test(&root->count)) {
791 struct the_nilfs *nilfs = root->nilfs; 804 struct the_nilfs *nilfs = root->nilfs;
792 805
806 nilfs_sysfs_delete_snapshot_group(root);
807
793 spin_lock(&nilfs->ns_cptree_lock); 808 spin_lock(&nilfs->ns_cptree_lock);
794 rb_erase(&root->rb_node, &nilfs->ns_cptree); 809 rb_erase(&root->rb_node, &nilfs->ns_cptree);
795 spin_unlock(&nilfs->ns_cptree_lock); 810 spin_unlock(&nilfs->ns_cptree_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index de8cc53b4a5c..d01ead1bea9a 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -33,6 +33,7 @@
33#include <linux/slab.h> 33#include <linux/slab.h>
34 34
35struct nilfs_sc_info; 35struct nilfs_sc_info;
36struct nilfs_sysfs_dev_subgroups;
36 37
37/* the_nilfs struct */ 38/* the_nilfs struct */
38enum { 39enum {
@@ -54,6 +55,7 @@ enum {
54 * @ns_sbwcount: write count of super block 55 * @ns_sbwcount: write count of super block
55 * @ns_sbsize: size of valid data in super block 56 * @ns_sbsize: size of valid data in super block
56 * @ns_mount_state: file system state 57 * @ns_mount_state: file system state
58 * @ns_sb_update_freq: interval of periodical update of superblocks (in seconds)
57 * @ns_seg_seq: segment sequence counter 59 * @ns_seg_seq: segment sequence counter
58 * @ns_segnum: index number of the latest full segment. 60 * @ns_segnum: index number of the latest full segment.
59 * @ns_nextnum: index number of the full segment index to be used next 61 * @ns_nextnum: index number of the full segment index to be used next
@@ -95,6 +97,9 @@ enum {
95 * @ns_inode_size: size of on-disk inode 97 * @ns_inode_size: size of on-disk inode
96 * @ns_first_ino: first not-special inode number 98 * @ns_first_ino: first not-special inode number
97 * @ns_crc_seed: seed value of CRC32 calculation 99 * @ns_crc_seed: seed value of CRC32 calculation
100 * @ns_dev_kobj: /sys/fs/<nilfs>/<device>
101 * @ns_dev_kobj_unregister: completion state
102 * @ns_dev_subgroups: <device> subgroups pointer
98 */ 103 */
99struct the_nilfs { 104struct the_nilfs {
100 unsigned long ns_flags; 105 unsigned long ns_flags;
@@ -114,6 +119,7 @@ struct the_nilfs {
114 unsigned ns_sbwcount; 119 unsigned ns_sbwcount;
115 unsigned ns_sbsize; 120 unsigned ns_sbsize;
116 unsigned ns_mount_state; 121 unsigned ns_mount_state;
122 unsigned ns_sb_update_freq;
117 123
118 /* 124 /*
119 * Following fields are dedicated to a writable FS-instance. 125 * Following fields are dedicated to a writable FS-instance.
@@ -188,6 +194,11 @@ struct the_nilfs {
188 int ns_inode_size; 194 int ns_inode_size;
189 int ns_first_ino; 195 int ns_first_ino;
190 u32 ns_crc_seed; 196 u32 ns_crc_seed;
197
198 /* /sys/fs/<nilfs>/<device> */
199 struct kobject ns_dev_kobj;
200 struct completion ns_dev_kobj_unregister;
201 struct nilfs_sysfs_dev_subgroups *ns_dev_subgroups;
191}; 202};
192 203
193#define THE_NILFS_FNS(bit, name) \ 204#define THE_NILFS_FNS(bit, name) \
@@ -232,6 +243,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
232 * @ifile: inode file 243 * @ifile: inode file
233 * @inodes_count: number of inodes 244 * @inodes_count: number of inodes
234 * @blocks_count: number of blocks 245 * @blocks_count: number of blocks
246 * @snapshot_kobj: /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot>
247 * @snapshot_kobj_unregister: completion state for kernel object
235 */ 248 */
236struct nilfs_root { 249struct nilfs_root {
237 __u64 cno; 250 __u64 cno;
@@ -243,6 +256,10 @@ struct nilfs_root {
243 256
244 atomic64_t inodes_count; 257 atomic64_t inodes_count;
245 atomic64_t blocks_count; 258 atomic64_t blocks_count;
259
260 /* /sys/fs/<nilfs>/<device>/mounted_snapshots/<snapshot> */
261 struct kobject snapshot_kobj;
262 struct completion snapshot_kobj_unregister;
246}; 263};
247 264
248/* Special checkpoint number */ 265/* Special checkpoint number */
@@ -254,7 +271,8 @@ struct nilfs_root {
254static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) 271static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
255{ 272{
256 u64 t = get_seconds(); 273 u64 t = get_seconds();
257 return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ; 274 return t < nilfs->ns_sbwtime ||
275 t > nilfs->ns_sbwtime + nilfs->ns_sb_update_freq;
258} 276}
259 277
260static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) 278static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ee9cb3795c2b..30d3addfad75 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -70,8 +70,15 @@ static int fanotify_get_response(struct fsnotify_group *group,
70 wait_event(group->fanotify_data.access_waitq, event->response || 70 wait_event(group->fanotify_data.access_waitq, event->response ||
71 atomic_read(&group->fanotify_data.bypass_perm)); 71 atomic_read(&group->fanotify_data.bypass_perm));
72 72
73 if (!event->response) /* bypass_perm set */ 73 if (!event->response) { /* bypass_perm set */
74 /*
75 * Event was canceled because group is being destroyed. Remove
76 * it from group's event list because we are responsible for
77 * freeing the permission event.
78 */
79 fsnotify_remove_event(group, &event->fae.fse);
74 return 0; 80 return 0;
81 }
75 82
76 /* userspace responded, convert to something usable */ 83 /* userspace responded, convert to something usable */
77 switch (event->response) { 84 switch (event->response) {
@@ -210,7 +217,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
210 return -ENOMEM; 217 return -ENOMEM;
211 218
212 fsn_event = &event->fse; 219 fsn_event = &event->fse;
213 ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); 220 ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
214 if (ret) { 221 if (ret) {
215 /* Permission events shouldn't be merged */ 222 /* Permission events shouldn't be merged */
216 BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); 223 BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3fdc8a3e1134..b13992a41bd9 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -66,7 +66,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
66 66
67 /* held the notification_mutex the whole time, so this is the 67 /* held the notification_mutex the whole time, so this is the
68 * same event we peeked above */ 68 * same event we peeked above */
69 return fsnotify_remove_notify_event(group); 69 return fsnotify_remove_first_event(group);
70} 70}
71 71
72static int create_fd(struct fsnotify_group *group, 72static int create_fd(struct fsnotify_group *group,
@@ -359,6 +359,11 @@ static int fanotify_release(struct inode *ignored, struct file *file)
359#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 359#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
360 struct fanotify_perm_event_info *event, *next; 360 struct fanotify_perm_event_info *event, *next;
361 361
362 /*
363 * There may be still new events arriving in the notification queue
364 * but since userspace cannot use fanotify fd anymore, no event can
365 * enter or leave access_list by now.
366 */
362 spin_lock(&group->fanotify_data.access_lock); 367 spin_lock(&group->fanotify_data.access_lock);
363 368
364 atomic_inc(&group->fanotify_data.bypass_perm); 369 atomic_inc(&group->fanotify_data.bypass_perm);
@@ -373,6 +378,13 @@ static int fanotify_release(struct inode *ignored, struct file *file)
373 } 378 }
374 spin_unlock(&group->fanotify_data.access_lock); 379 spin_unlock(&group->fanotify_data.access_lock);
375 380
381 /*
382 * Since bypass_perm is set, newly queued events will not wait for
383 * access response. Wake up the already sleeping ones now.
384 * synchronize_srcu() in fsnotify_destroy_group() will wait for all
385 * processes sleeping in fanotify_handle_event() waiting for access
386 * response and thus also for all permission events to be freed.
387 */
376 wake_up(&group->fanotify_data.access_waitq); 388 wake_up(&group->fanotify_data.access_waitq);
377#endif 389#endif
378 390
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..9ce062218de9 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -232,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
232 232
233 BUG_ON(last == NULL); 233 BUG_ON(last == NULL);
234 /* mark should be the last entry. last is the current last entry */ 234 /* mark should be the last entry. last is the current last entry */
235 hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list); 235 hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
236out: 236out:
237 fsnotify_recalc_inode_mask_locked(inode); 237 fsnotify_recalc_inode_mask_locked(inode);
238 spin_unlock(&inode->i_lock); 238 spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 43ab1e1a07a2..0f88bc0b4e6c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -108,7 +108,7 @@ int inotify_handle_event(struct fsnotify_group *group,
108 if (len) 108 if (len)
109 strcpy(event->name, file_name); 109 strcpy(event->name, file_name);
110 110
111 ret = fsnotify_add_notify_event(group, fsn_event, inotify_merge); 111 ret = fsnotify_add_event(group, fsn_event, inotify_merge);
112 if (ret) { 112 if (ret) {
113 /* Our event wasn't used in the end. Free it. */ 113 /* Our event wasn't used in the end. Free it. */
114 fsnotify_destroy_event(group, fsn_event); 114 fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cc423a30a0c8..daf76652fe58 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -149,7 +149,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
149 if (fsnotify_notify_queue_is_empty(group)) 149 if (fsnotify_notify_queue_is_empty(group))
150 return NULL; 150 return NULL;
151 151
152 event = fsnotify_peek_notify_event(group); 152 event = fsnotify_peek_first_event(group);
153 153
154 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 154 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
155 155
@@ -159,7 +159,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
159 159
160 /* held the notification_mutex the whole time, so this is the 160 /* held the notification_mutex the whole time, so this is the
161 * same event we peeked above */ 161 * same event we peeked above */
162 fsnotify_remove_notify_event(group); 162 fsnotify_remove_first_event(group);
163 163
164 return event; 164 return event;
165} 165}
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 1e58402171a5..a95d8e037aeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -73,7 +73,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
73 /* Overflow events are per-group and we don't want to free them */ 73 /* Overflow events are per-group and we don't want to free them */
74 if (!event || event->mask == FS_Q_OVERFLOW) 74 if (!event || event->mask == FS_Q_OVERFLOW)
75 return; 75 return;
76 76 /* If the event is still queued, we have a problem... */
77 WARN_ON(!list_empty(&event->list));
77 group->ops->free_event(event); 78 group->ops->free_event(event);
78} 79}
79 80
@@ -83,10 +84,10 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
83 * added to the queue, 1 if the event was merged with some other queued event, 84 * added to the queue, 1 if the event was merged with some other queued event,
84 * 2 if the queue of events has overflown. 85 * 2 if the queue of events has overflown.
85 */ 86 */
86int fsnotify_add_notify_event(struct fsnotify_group *group, 87int fsnotify_add_event(struct fsnotify_group *group,
87 struct fsnotify_event *event, 88 struct fsnotify_event *event,
88 int (*merge)(struct list_head *, 89 int (*merge)(struct list_head *,
89 struct fsnotify_event *)) 90 struct fsnotify_event *))
90{ 91{
91 int ret = 0; 92 int ret = 0;
92 struct list_head *list = &group->notification_list; 93 struct list_head *list = &group->notification_list;
@@ -125,10 +126,25 @@ queue:
125} 126}
126 127
127/* 128/*
129 * Remove @event from group's notification queue. It is the responsibility of
130 * the caller to destroy the event.
131 */
132void fsnotify_remove_event(struct fsnotify_group *group,
133 struct fsnotify_event *event)
134{
135 mutex_lock(&group->notification_mutex);
136 if (!list_empty(&event->list)) {
137 list_del_init(&event->list);
138 group->q_len--;
139 }
140 mutex_unlock(&group->notification_mutex);
141}
142
143/*
128 * Remove and return the first event from the notification list. It is the 144 * Remove and return the first event from the notification list. It is the
129 * responsibility of the caller to destroy the obtained event 145 * responsibility of the caller to destroy the obtained event
130 */ 146 */
131struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) 147struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
132{ 148{
133 struct fsnotify_event *event; 149 struct fsnotify_event *event;
134 150
@@ -140,7 +156,7 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
140 struct fsnotify_event, list); 156 struct fsnotify_event, list);
141 /* 157 /*
142 * We need to init list head for the case of overflow event so that 158 * We need to init list head for the case of overflow event so that
143 * check in fsnotify_add_notify_events() works 159 * check in fsnotify_add_event() works
144 */ 160 */
145 list_del_init(&event->list); 161 list_del_init(&event->list);
146 group->q_len--; 162 group->q_len--;
@@ -149,9 +165,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
149} 165}
150 166
151/* 167/*
152 * This will not remove the event, that must be done with fsnotify_remove_notify_event() 168 * This will not remove the event, that must be done with
169 * fsnotify_remove_first_event()
153 */ 170 */
154struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) 171struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group)
155{ 172{
156 BUG_ON(!mutex_is_locked(&group->notification_mutex)); 173 BUG_ON(!mutex_is_locked(&group->notification_mutex));
157 174
@@ -169,7 +186,7 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
169 186
170 mutex_lock(&group->notification_mutex); 187 mutex_lock(&group->notification_mutex);
171 while (!fsnotify_notify_queue_is_empty(group)) { 188 while (!fsnotify_notify_queue_is_empty(group)) {
172 event = fsnotify_remove_notify_event(group); 189 event = fsnotify_remove_first_event(group);
173 fsnotify_destroy_event(group, event); 190 fsnotify_destroy_event(group, event);
174 } 191 }
175 mutex_unlock(&group->notification_mutex); 192 mutex_unlock(&group->notification_mutex);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 68ca5a8704b5..ac851e8376b1 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -191,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
191 191
192 BUG_ON(last == NULL); 192 BUG_ON(last == NULL);
193 /* mark should be the last entry. last is the current last entry */ 193 /* mark should be the last entry. last is the current last entry */
194 hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list); 194 hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
195out: 195out:
196 fsnotify_recalc_vfsmount_mask_locked(mnt); 196 fsnotify_recalc_vfsmount_mask_locked(mnt);
197 spin_unlock(&mnt->mnt_root->d_lock); 197 spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5c9e2c81cb11..f5ec1ce7a532 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -74,8 +74,6 @@ static int ntfs_file_open(struct inode *vi, struct file *filp)
74 * ntfs_attr_extend_initialized - extend the initialized size of an attribute 74 * ntfs_attr_extend_initialized - extend the initialized size of an attribute
75 * @ni: ntfs inode of the attribute to extend 75 * @ni: ntfs inode of the attribute to extend
76 * @new_init_size: requested new initialized size in bytes 76 * @new_init_size: requested new initialized size in bytes
77 * @cached_page: store any allocated but unused page here
78 * @lru_pvec: lru-buffering pagevec of the caller
79 * 77 *
80 * Extend the initialized size of an attribute described by the ntfs inode @ni 78 * Extend the initialized size of an attribute described by the ntfs inode @ni
81 * to @new_init_size bytes. This involves zeroing any non-sparse space between 79 * to @new_init_size bytes. This involves zeroing any non-sparse space between
@@ -395,7 +393,6 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
395 * @nr_pages: number of page cache pages to obtain 393 * @nr_pages: number of page cache pages to obtain
396 * @pages: array of pages in which to return the obtained page cache pages 394 * @pages: array of pages in which to return the obtained page cache pages
397 * @cached_page: allocated but as yet unused page 395 * @cached_page: allocated but as yet unused page
398 * @lru_pvec: lru-buffering pagevec of caller
399 * 396 *
400 * Obtain @nr_pages locked page cache pages from the mapping @mapping and 397 * Obtain @nr_pages locked page cache pages from the mapping @mapping and
401 * starting at index @index. 398 * starting at index @index.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9d8fcf2f3b94..a93bf9892256 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4961,6 +4961,15 @@ leftright:
4961 4961
4962 el = path_leaf_el(path); 4962 el = path_leaf_el(path);
4963 split_index = ocfs2_search_extent_list(el, cpos); 4963 split_index = ocfs2_search_extent_list(el, cpos);
4964 if (split_index == -1) {
4965 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
4966 "Owner %llu has an extent at cpos %u "
4967 "which can no longer be found.\n",
4968 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
4969 cpos);
4970 ret = -EROFS;
4971 goto out;
4972 }
4964 goto leftright; 4973 goto leftright;
4965 } 4974 }
4966out: 4975out:
@@ -5135,7 +5144,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
5135 el = path_leaf_el(left_path); 5144 el = path_leaf_el(left_path);
5136 5145
5137 index = ocfs2_search_extent_list(el, cpos); 5146 index = ocfs2_search_extent_list(el, cpos);
5138 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5147 if (index == -1) {
5139 ocfs2_error(sb, 5148 ocfs2_error(sb,
5140 "Owner %llu has an extent at cpos %u which can no " 5149 "Owner %llu has an extent at cpos %u which can no "
5141 "longer be found.\n", 5150 "longer be found.\n",
@@ -5491,7 +5500,7 @@ int ocfs2_remove_extent(handle_t *handle,
5491 5500
5492 el = path_leaf_el(path); 5501 el = path_leaf_el(path);
5493 index = ocfs2_search_extent_list(el, cpos); 5502 index = ocfs2_search_extent_list(el, cpos);
5494 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5503 if (index == -1) {
5495 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 5504 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5496 "Owner %llu has an extent at cpos %u which can no " 5505 "Owner %llu has an extent at cpos %u which can no "
5497 "longer be found.\n", 5506 "longer be found.\n",
@@ -5557,7 +5566,7 @@ int ocfs2_remove_extent(handle_t *handle,
5557 5566
5558 el = path_leaf_el(path); 5567 el = path_leaf_el(path);
5559 index = ocfs2_search_extent_list(el, cpos); 5568 index = ocfs2_search_extent_list(el, cpos);
5560 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5569 if (index == -1) {
5561 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), 5570 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5562 "Owner %llu: split at cpos %u lost record.", 5571 "Owner %llu: split at cpos %u lost record.",
5563 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), 5572 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 39efc5057a36..3fcf205ee900 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1923,12 +1923,11 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
1923 goto bail; 1923 goto bail;
1924 } 1924 }
1925 1925
1926 if (total_backoff > 1926 if (total_backoff > DLM_JOIN_TIMEOUT_MSECS) {
1927 msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
1928 status = -ERESTARTSYS; 1927 status = -ERESTARTSYS;
1929 mlog(ML_NOTICE, "Timed out joining dlm domain " 1928 mlog(ML_NOTICE, "Timed out joining dlm domain "
1930 "%s after %u msecs\n", dlm->name, 1929 "%s after %u msecs\n", dlm->name,
1931 jiffies_to_msecs(total_backoff)); 1930 total_backoff);
1932 goto bail; 1931 goto bail;
1933 } 1932 }
1934 1933
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 82abf0cc9a12..3ec906ef5d9a 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2405,6 +2405,10 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2405 if (res->state & DLM_LOCK_RES_MIGRATING) 2405 if (res->state & DLM_LOCK_RES_MIGRATING)
2406 return 0; 2406 return 0;
2407 2407
2408 /* delay migration when the lockres is in RECOCERING state */
2409 if (res->state & DLM_LOCK_RES_RECOVERING)
2410 return 0;
2411
2408 if (res->owner != dlm->node_num) 2412 if (res->owner != dlm->node_num)
2409 return 0; 2413 return 0;
2410 2414
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 599eb4c4c8be..6219aaadeb08 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,7 @@ static int __ocfs2_move_extent(handle_t *handle,
98 el = path_leaf_el(path); 98 el = path_leaf_el(path);
99 99
100 index = ocfs2_search_extent_list(el, cpos); 100 index = ocfs2_search_extent_list(el, cpos);
101 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 101 if (index == -1) {
102 ocfs2_error(inode->i_sb, 102 ocfs2_error(inode->i_sb,
103 "Inode %llu has an extent at cpos %u which can no " 103 "Inode %llu has an extent at cpos %u which can no "
104 "longer be found.\n", 104 "longer be found.\n",
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 636aab69ead5..d81f6e2a97f5 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3109,7 +3109,7 @@ static int ocfs2_clear_ext_refcount(handle_t *handle,
3109 el = path_leaf_el(path); 3109 el = path_leaf_el(path);
3110 3110
3111 index = ocfs2_search_extent_list(el, cpos); 3111 index = ocfs2_search_extent_list(el, cpos);
3112 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 3112 if (index == -1) {
3113 ocfs2_error(sb, 3113 ocfs2_error(sb,
3114 "Inode %llu has an extent at cpos %u which can no " 3114 "Inode %llu has an extent at cpos %u which can no "
3115 "longer be found.\n", 3115 "longer be found.\n",
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 1424c151cccc..a88b2a4fcc85 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -382,7 +382,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
382 382
383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks); 383 trace_ocfs2_map_slot_buffers(bytes, si->si_blocks);
384 384
385 si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, 385 si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *),
386 GFP_KERNEL); 386 GFP_KERNEL);
387 if (!si->si_bh) { 387 if (!si->si_bh) {
388 status = -ENOMEM; 388 status = -ENOMEM;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index ec58c7659183..ba8819702c56 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -321,7 +321,7 @@ static int omfs_get_imap(struct super_block *sb)
321 goto out; 321 goto out;
322 322
323 sbi->s_imap_size = array_size; 323 sbi->s_imap_size = array_size;
324 sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL); 324 sbi->s_imap = kcalloc(array_size, sizeof(unsigned long *), GFP_KERNEL);
325 if (!sbi->s_imap) 325 if (!sbi->s_imap)
326 goto nomem; 326 goto nomem;
327 327
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 239493ec718e..7151ea428041 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -23,6 +23,7 @@ proc-y += version.o
23proc-y += softirqs.o 23proc-y += softirqs.o
24proc-y += namespaces.o 24proc-y += namespaces.o
25proc-y += self.o 25proc-y += self.o
26proc-y += thread_self.o
26proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 27proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
27proc-$(CONFIG_NET) += proc_net.o 28proc-$(CONFIG_NET) += proc_net.o
28proc-$(CONFIG_PROC_KCORE) += kcore.o 29proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 64db2bceac59..cd3653e4f35c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -297,15 +297,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
297 seq_puts(m, header); 297 seq_puts(m, header);
298 CAP_FOR_EACH_U32(__capi) { 298 CAP_FOR_EACH_U32(__capi) {
299 seq_printf(m, "%08x", 299 seq_printf(m, "%08x",
300 a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); 300 a->cap[CAP_LAST_U32 - __capi]);
301 } 301 }
302 seq_putc(m, '\n'); 302 seq_putc(m, '\n');
303} 303}
304 304
305/* Remove non-existent capabilities */
306#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
307 CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
308
309static inline void task_cap(struct seq_file *m, struct task_struct *p) 305static inline void task_cap(struct seq_file *m, struct task_struct *p)
310{ 306{
311 const struct cred *cred; 307 const struct cred *cred;
@@ -319,11 +315,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
319 cap_bset = cred->cap_bset; 315 cap_bset = cred->cap_bset;
320 rcu_read_unlock(); 316 rcu_read_unlock();
321 317
322 NORM_CAPS(cap_inheritable);
323 NORM_CAPS(cap_permitted);
324 NORM_CAPS(cap_effective);
325 NORM_CAPS(cap_bset);
326
327 render_cap_t(m, "CapInh:\t", &cap_inheritable); 318 render_cap_t(m, "CapInh:\t", &cap_inheritable);
328 render_cap_t(m, "CapPrm:\t", &cap_permitted); 319 render_cap_t(m, "CapPrm:\t", &cap_permitted);
329 render_cap_t(m, "CapEff:\t", &cap_effective); 320 render_cap_t(m, "CapEff:\t", &cap_effective);
@@ -473,13 +464,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
473 priority = task_prio(task); 464 priority = task_prio(task);
474 nice = task_nice(task); 465 nice = task_nice(task);
475 466
476 /* Temporary variable needed for gcc-2.96 */
477 /* convert timespec -> nsec*/
478 start_time =
479 (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
480 + task->real_start_time.tv_nsec;
481 /* convert nsec -> ticks */ 467 /* convert nsec -> ticks */
482 start_time = nsec_to_clock_t(start_time); 468 start_time = nsec_to_clock_t(task->real_start_time);
483 469
484 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); 470 seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
485 seq_put_decimal_ll(m, ' ', ppid); 471 seq_put_decimal_ll(m, ' ', ppid);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2d696b0c93bf..baf852b648ad 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -105,7 +105,7 @@
105 */ 105 */
106 106
107struct pid_entry { 107struct pid_entry {
108 char *name; 108 const char *name;
109 int len; 109 int len;
110 umode_t mode; 110 umode_t mode;
111 const struct inode_operations *iop; 111 const struct inode_operations *iop;
@@ -130,10 +130,6 @@ struct pid_entry {
130 { .proc_get_link = get_link } ) 130 { .proc_get_link = get_link } )
131#define REG(NAME, MODE, fops) \ 131#define REG(NAME, MODE, fops) \
132 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 132 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
133#define INF(NAME, MODE, read) \
134 NOD(NAME, (S_IFREG|(MODE)), \
135 NULL, &proc_info_file_operations, \
136 { .proc_read = read } )
137#define ONE(NAME, MODE, show) \ 133#define ONE(NAME, MODE, show) \
138 NOD(NAME, (S_IFREG|(MODE)), \ 134 NOD(NAME, (S_IFREG|(MODE)), \
139 NULL, &proc_single_file_operations, \ 135 NULL, &proc_single_file_operations, \
@@ -200,27 +196,32 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
200 return result; 196 return result;
201} 197}
202 198
203static int proc_pid_cmdline(struct task_struct *task, char *buffer) 199static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns,
200 struct pid *pid, struct task_struct *task)
204{ 201{
205 return get_cmdline(task, buffer, PAGE_SIZE); 202 /*
203 * Rely on struct seq_operations::show() being called once
204 * per internal buffer allocation. See single_open(), traverse().
205 */
206 BUG_ON(m->size < PAGE_SIZE);
207 m->count += get_cmdline(task, m->buf, PAGE_SIZE);
208 return 0;
206} 209}
207 210
208static int proc_pid_auxv(struct task_struct *task, char *buffer) 211static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
212 struct pid *pid, struct task_struct *task)
209{ 213{
210 struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); 214 struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ);
211 int res = PTR_ERR(mm);
212 if (mm && !IS_ERR(mm)) { 215 if (mm && !IS_ERR(mm)) {
213 unsigned int nwords = 0; 216 unsigned int nwords = 0;
214 do { 217 do {
215 nwords += 2; 218 nwords += 2;
216 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 219 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
217 res = nwords * sizeof(mm->saved_auxv[0]); 220 seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
218 if (res > PAGE_SIZE)
219 res = PAGE_SIZE;
220 memcpy(buffer, mm->saved_auxv, res);
221 mmput(mm); 221 mmput(mm);
222 } 222 return 0;
223 return res; 223 } else
224 return PTR_ERR(mm);
224} 225}
225 226
226 227
@@ -229,7 +230,8 @@ static int proc_pid_auxv(struct task_struct *task, char *buffer)
229 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 230 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
230 * Returns the resolved symbol. If that fails, simply return the address. 231 * Returns the resolved symbol. If that fails, simply return the address.
231 */ 232 */
232static int proc_pid_wchan(struct task_struct *task, char *buffer) 233static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
234 struct pid *pid, struct task_struct *task)
233{ 235{
234 unsigned long wchan; 236 unsigned long wchan;
235 char symname[KSYM_NAME_LEN]; 237 char symname[KSYM_NAME_LEN];
@@ -240,9 +242,9 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
240 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 242 if (!ptrace_may_access(task, PTRACE_MODE_READ))
241 return 0; 243 return 0;
242 else 244 else
243 return sprintf(buffer, "%lu", wchan); 245 return seq_printf(m, "%lu", wchan);
244 else 246 else
245 return sprintf(buffer, "%s", symname); 247 return seq_printf(m, "%s", symname);
246} 248}
247#endif /* CONFIG_KALLSYMS */ 249#endif /* CONFIG_KALLSYMS */
248 250
@@ -304,9 +306,10 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
304/* 306/*
305 * Provides /proc/PID/schedstat 307 * Provides /proc/PID/schedstat
306 */ 308 */
307static int proc_pid_schedstat(struct task_struct *task, char *buffer) 309static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
310 struct pid *pid, struct task_struct *task)
308{ 311{
309 return sprintf(buffer, "%llu %llu %lu\n", 312 return seq_printf(m, "%llu %llu %lu\n",
310 (unsigned long long)task->se.sum_exec_runtime, 313 (unsigned long long)task->se.sum_exec_runtime,
311 (unsigned long long)task->sched_info.run_delay, 314 (unsigned long long)task->sched_info.run_delay,
312 task->sched_info.pcount); 315 task->sched_info.pcount);
@@ -404,7 +407,8 @@ static const struct file_operations proc_cpuset_operations = {
404}; 407};
405#endif 408#endif
406 409
407static int proc_oom_score(struct task_struct *task, char *buffer) 410static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
411 struct pid *pid, struct task_struct *task)
408{ 412{
409 unsigned long totalpages = totalram_pages + total_swap_pages; 413 unsigned long totalpages = totalram_pages + total_swap_pages;
410 unsigned long points = 0; 414 unsigned long points = 0;
@@ -414,12 +418,12 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
414 points = oom_badness(task, NULL, NULL, totalpages) * 418 points = oom_badness(task, NULL, NULL, totalpages) *
415 1000 / totalpages; 419 1000 / totalpages;
416 read_unlock(&tasklist_lock); 420 read_unlock(&tasklist_lock);
417 return sprintf(buffer, "%lu\n", points); 421 return seq_printf(m, "%lu\n", points);
418} 422}
419 423
420struct limit_names { 424struct limit_names {
421 char *name; 425 const char *name;
422 char *unit; 426 const char *unit;
423}; 427};
424 428
425static const struct limit_names lnames[RLIM_NLIMITS] = { 429static const struct limit_names lnames[RLIM_NLIMITS] = {
@@ -442,12 +446,11 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
442}; 446};
443 447
444/* Display limits for a process */ 448/* Display limits for a process */
445static int proc_pid_limits(struct task_struct *task, char *buffer) 449static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
450 struct pid *pid, struct task_struct *task)
446{ 451{
447 unsigned int i; 452 unsigned int i;
448 int count = 0;
449 unsigned long flags; 453 unsigned long flags;
450 char *bufptr = buffer;
451 454
452 struct rlimit rlim[RLIM_NLIMITS]; 455 struct rlimit rlim[RLIM_NLIMITS];
453 456
@@ -459,35 +462,34 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
459 /* 462 /*
460 * print the file header 463 * print the file header
461 */ 464 */
462 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", 465 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
463 "Limit", "Soft Limit", "Hard Limit", "Units"); 466 "Limit", "Soft Limit", "Hard Limit", "Units");
464 467
465 for (i = 0; i < RLIM_NLIMITS; i++) { 468 for (i = 0; i < RLIM_NLIMITS; i++) {
466 if (rlim[i].rlim_cur == RLIM_INFINITY) 469 if (rlim[i].rlim_cur == RLIM_INFINITY)
467 count += sprintf(&bufptr[count], "%-25s %-20s ", 470 seq_printf(m, "%-25s %-20s ",
468 lnames[i].name, "unlimited"); 471 lnames[i].name, "unlimited");
469 else 472 else
470 count += sprintf(&bufptr[count], "%-25s %-20lu ", 473 seq_printf(m, "%-25s %-20lu ",
471 lnames[i].name, rlim[i].rlim_cur); 474 lnames[i].name, rlim[i].rlim_cur);
472 475
473 if (rlim[i].rlim_max == RLIM_INFINITY) 476 if (rlim[i].rlim_max == RLIM_INFINITY)
474 count += sprintf(&bufptr[count], "%-20s ", "unlimited"); 477 seq_printf(m, "%-20s ", "unlimited");
475 else 478 else
476 count += sprintf(&bufptr[count], "%-20lu ", 479 seq_printf(m, "%-20lu ", rlim[i].rlim_max);
477 rlim[i].rlim_max);
478 480
479 if (lnames[i].unit) 481 if (lnames[i].unit)
480 count += sprintf(&bufptr[count], "%-10s\n", 482 seq_printf(m, "%-10s\n", lnames[i].unit);
481 lnames[i].unit);
482 else 483 else
483 count += sprintf(&bufptr[count], "\n"); 484 seq_putc(m, '\n');
484 } 485 }
485 486
486 return count; 487 return 0;
487} 488}
488 489
489#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 490#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
490static int proc_pid_syscall(struct task_struct *task, char *buffer) 491static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
492 struct pid *pid, struct task_struct *task)
491{ 493{
492 long nr; 494 long nr;
493 unsigned long args[6], sp, pc; 495 unsigned long args[6], sp, pc;
@@ -496,11 +498,11 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
496 return res; 498 return res;
497 499
498 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 500 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
499 res = sprintf(buffer, "running\n"); 501 seq_puts(m, "running\n");
500 else if (nr < 0) 502 else if (nr < 0)
501 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 503 seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
502 else 504 else
503 res = sprintf(buffer, 505 seq_printf(m,
504 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 506 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
505 nr, 507 nr,
506 args[0], args[1], args[2], args[3], args[4], args[5], 508 args[0], args[1], args[2], args[3], args[4], args[5],
@@ -598,43 +600,6 @@ static const struct inode_operations proc_def_inode_operations = {
598 .setattr = proc_setattr, 600 .setattr = proc_setattr,
599}; 601};
600 602
601#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
602
603static ssize_t proc_info_read(struct file * file, char __user * buf,
604 size_t count, loff_t *ppos)
605{
606 struct inode * inode = file_inode(file);
607 unsigned long page;
608 ssize_t length;
609 struct task_struct *task = get_proc_task(inode);
610
611 length = -ESRCH;
612 if (!task)
613 goto out_no_task;
614
615 if (count > PROC_BLOCK_SIZE)
616 count = PROC_BLOCK_SIZE;
617
618 length = -ENOMEM;
619 if (!(page = __get_free_page(GFP_TEMPORARY)))
620 goto out;
621
622 length = PROC_I(inode)->op.proc_read(task, (char*)page);
623
624 if (length >= 0)
625 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
626 free_page(page);
627out:
628 put_task_struct(task);
629out_no_task:
630 return length;
631}
632
633static const struct file_operations proc_info_file_operations = {
634 .read = proc_info_read,
635 .llseek = generic_file_llseek,
636};
637
638static int proc_single_show(struct seq_file *m, void *v) 603static int proc_single_show(struct seq_file *m, void *v)
639{ 604{
640 struct inode *inode = m->private; 605 struct inode *inode = m->private;
@@ -2056,7 +2021,7 @@ static int show_timer(struct seq_file *m, void *v)
2056 struct k_itimer *timer; 2021 struct k_itimer *timer;
2057 struct timers_private *tp = m->private; 2022 struct timers_private *tp = m->private;
2058 int notify; 2023 int notify;
2059 static char *nstr[] = { 2024 static const char * const nstr[] = {
2060 [SIGEV_SIGNAL] = "signal", 2025 [SIGEV_SIGNAL] = "signal",
2061 [SIGEV_NONE] = "none", 2026 [SIGEV_NONE] = "none",
2062 [SIGEV_THREAD] = "thread", 2027 [SIGEV_THREAD] = "thread",
@@ -2392,7 +2357,7 @@ static const struct file_operations proc_coredump_filter_operations = {
2392#endif 2357#endif
2393 2358
2394#ifdef CONFIG_TASK_IO_ACCOUNTING 2359#ifdef CONFIG_TASK_IO_ACCOUNTING
2395static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2360static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
2396{ 2361{
2397 struct task_io_accounting acct = task->ioac; 2362 struct task_io_accounting acct = task->ioac;
2398 unsigned long flags; 2363 unsigned long flags;
@@ -2416,7 +2381,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2416 2381
2417 unlock_task_sighand(task, &flags); 2382 unlock_task_sighand(task, &flags);
2418 } 2383 }
2419 result = sprintf(buffer, 2384 result = seq_printf(m,
2420 "rchar: %llu\n" 2385 "rchar: %llu\n"
2421 "wchar: %llu\n" 2386 "wchar: %llu\n"
2422 "syscr: %llu\n" 2387 "syscr: %llu\n"
@@ -2436,20 +2401,22 @@ out_unlock:
2436 return result; 2401 return result;
2437} 2402}
2438 2403
2439static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2404static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2405 struct pid *pid, struct task_struct *task)
2440{ 2406{
2441 return do_io_accounting(task, buffer, 0); 2407 return do_io_accounting(task, m, 0);
2442} 2408}
2443 2409
2444static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2410static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
2411 struct pid *pid, struct task_struct *task)
2445{ 2412{
2446 return do_io_accounting(task, buffer, 1); 2413 return do_io_accounting(task, m, 1);
2447} 2414}
2448#endif /* CONFIG_TASK_IO_ACCOUNTING */ 2415#endif /* CONFIG_TASK_IO_ACCOUNTING */
2449 2416
2450#ifdef CONFIG_USER_NS 2417#ifdef CONFIG_USER_NS
2451static int proc_id_map_open(struct inode *inode, struct file *file, 2418static int proc_id_map_open(struct inode *inode, struct file *file,
2452 struct seq_operations *seq_ops) 2419 const struct seq_operations *seq_ops)
2453{ 2420{
2454 struct user_namespace *ns = NULL; 2421 struct user_namespace *ns = NULL;
2455 struct task_struct *task; 2422 struct task_struct *task;
@@ -2557,10 +2524,10 @@ static const struct pid_entry tgid_base_stuff[] = {
2557 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2524 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2558#endif 2525#endif
2559 REG("environ", S_IRUSR, proc_environ_operations), 2526 REG("environ", S_IRUSR, proc_environ_operations),
2560 INF("auxv", S_IRUSR, proc_pid_auxv), 2527 ONE("auxv", S_IRUSR, proc_pid_auxv),
2561 ONE("status", S_IRUGO, proc_pid_status), 2528 ONE("status", S_IRUGO, proc_pid_status),
2562 ONE("personality", S_IRUSR, proc_pid_personality), 2529 ONE("personality", S_IRUSR, proc_pid_personality),
2563 INF("limits", S_IRUGO, proc_pid_limits), 2530 ONE("limits", S_IRUGO, proc_pid_limits),
2564#ifdef CONFIG_SCHED_DEBUG 2531#ifdef CONFIG_SCHED_DEBUG
2565 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2532 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2566#endif 2533#endif
@@ -2569,9 +2536,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2569#endif 2536#endif
2570 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2537 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2571#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2538#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2572 INF("syscall", S_IRUSR, proc_pid_syscall), 2539 ONE("syscall", S_IRUSR, proc_pid_syscall),
2573#endif 2540#endif
2574 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2541 ONE("cmdline", S_IRUGO, proc_pid_cmdline),
2575 ONE("stat", S_IRUGO, proc_tgid_stat), 2542 ONE("stat", S_IRUGO, proc_tgid_stat),
2576 ONE("statm", S_IRUGO, proc_pid_statm), 2543 ONE("statm", S_IRUGO, proc_pid_statm),
2577 REG("maps", S_IRUGO, proc_pid_maps_operations), 2544 REG("maps", S_IRUGO, proc_pid_maps_operations),
@@ -2594,13 +2561,13 @@ static const struct pid_entry tgid_base_stuff[] = {
2594 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2561 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2595#endif 2562#endif
2596#ifdef CONFIG_KALLSYMS 2563#ifdef CONFIG_KALLSYMS
2597 INF("wchan", S_IRUGO, proc_pid_wchan), 2564 ONE("wchan", S_IRUGO, proc_pid_wchan),
2598#endif 2565#endif
2599#ifdef CONFIG_STACKTRACE 2566#ifdef CONFIG_STACKTRACE
2600 ONE("stack", S_IRUSR, proc_pid_stack), 2567 ONE("stack", S_IRUSR, proc_pid_stack),
2601#endif 2568#endif
2602#ifdef CONFIG_SCHEDSTATS 2569#ifdef CONFIG_SCHEDSTATS
2603 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2570 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2604#endif 2571#endif
2605#ifdef CONFIG_LATENCYTOP 2572#ifdef CONFIG_LATENCYTOP
2606 REG("latency", S_IRUGO, proc_lstats_operations), 2573 REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2611,7 +2578,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2611#ifdef CONFIG_CGROUPS 2578#ifdef CONFIG_CGROUPS
2612 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2579 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2613#endif 2580#endif
2614 INF("oom_score", S_IRUGO, proc_oom_score), 2581 ONE("oom_score", S_IRUGO, proc_oom_score),
2615 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2582 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2616 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2583 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2617#ifdef CONFIG_AUDITSYSCALL 2584#ifdef CONFIG_AUDITSYSCALL
@@ -2625,10 +2592,10 @@ static const struct pid_entry tgid_base_stuff[] = {
2625 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2592 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
2626#endif 2593#endif
2627#ifdef CONFIG_TASK_IO_ACCOUNTING 2594#ifdef CONFIG_TASK_IO_ACCOUNTING
2628 INF("io", S_IRUSR, proc_tgid_io_accounting), 2595 ONE("io", S_IRUSR, proc_tgid_io_accounting),
2629#endif 2596#endif
2630#ifdef CONFIG_HARDWALL 2597#ifdef CONFIG_HARDWALL
2631 INF("hardwall", S_IRUGO, proc_pid_hardwall), 2598 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
2632#endif 2599#endif
2633#ifdef CONFIG_USER_NS 2600#ifdef CONFIG_USER_NS
2634 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2601 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -2780,12 +2747,12 @@ out:
2780 2747
2781struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 2748struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
2782{ 2749{
2783 int result = 0; 2750 int result = -ENOENT;
2784 struct task_struct *task; 2751 struct task_struct *task;
2785 unsigned tgid; 2752 unsigned tgid;
2786 struct pid_namespace *ns; 2753 struct pid_namespace *ns;
2787 2754
2788 tgid = name_to_int(dentry); 2755 tgid = name_to_int(&dentry->d_name);
2789 if (tgid == ~0U) 2756 if (tgid == ~0U)
2790 goto out; 2757 goto out;
2791 2758
@@ -2847,7 +2814,7 @@ retry:
2847 return iter; 2814 return iter;
2848} 2815}
2849 2816
2850#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) 2817#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
2851 2818
2852/* for the /proc/ directory itself, after non-process stuff has been done */ 2819/* for the /proc/ directory itself, after non-process stuff has been done */
2853int proc_pid_readdir(struct file *file, struct dir_context *ctx) 2820int proc_pid_readdir(struct file *file, struct dir_context *ctx)
@@ -2859,14 +2826,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
2859 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 2826 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
2860 return 0; 2827 return 0;
2861 2828
2862 if (pos == TGID_OFFSET - 1) { 2829 if (pos == TGID_OFFSET - 2) {
2863 struct inode *inode = ns->proc_self->d_inode; 2830 struct inode *inode = ns->proc_self->d_inode;
2864 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) 2831 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
2865 return 0; 2832 return 0;
2866 iter.tgid = 0; 2833 ctx->pos = pos = pos + 1;
2867 } else {
2868 iter.tgid = pos - TGID_OFFSET;
2869 } 2834 }
2835 if (pos == TGID_OFFSET - 1) {
2836 struct inode *inode = ns->proc_thread_self->d_inode;
2837 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
2838 return 0;
2839 ctx->pos = pos = pos + 1;
2840 }
2841 iter.tgid = pos - TGID_OFFSET;
2870 iter.task = NULL; 2842 iter.task = NULL;
2871 for (iter = next_tgid(ns, iter); 2843 for (iter = next_tgid(ns, iter);
2872 iter.task; 2844 iter.task;
@@ -2895,19 +2867,22 @@ static const struct pid_entry tid_base_stuff[] = {
2895 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2867 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2896 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2868 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2897 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2869 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2870#ifdef CONFIG_NET
2871 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2872#endif
2898 REG("environ", S_IRUSR, proc_environ_operations), 2873 REG("environ", S_IRUSR, proc_environ_operations),
2899 INF("auxv", S_IRUSR, proc_pid_auxv), 2874 ONE("auxv", S_IRUSR, proc_pid_auxv),
2900 ONE("status", S_IRUGO, proc_pid_status), 2875 ONE("status", S_IRUGO, proc_pid_status),
2901 ONE("personality", S_IRUSR, proc_pid_personality), 2876 ONE("personality", S_IRUSR, proc_pid_personality),
2902 INF("limits", S_IRUGO, proc_pid_limits), 2877 ONE("limits", S_IRUGO, proc_pid_limits),
2903#ifdef CONFIG_SCHED_DEBUG 2878#ifdef CONFIG_SCHED_DEBUG
2904 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2879 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2905#endif 2880#endif
2906 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2881 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2907#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2882#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2908 INF("syscall", S_IRUSR, proc_pid_syscall), 2883 ONE("syscall", S_IRUSR, proc_pid_syscall),
2909#endif 2884#endif
2910 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2885 ONE("cmdline", S_IRUGO, proc_pid_cmdline),
2911 ONE("stat", S_IRUGO, proc_tid_stat), 2886 ONE("stat", S_IRUGO, proc_tid_stat),
2912 ONE("statm", S_IRUGO, proc_pid_statm), 2887 ONE("statm", S_IRUGO, proc_pid_statm),
2913 REG("maps", S_IRUGO, proc_tid_maps_operations), 2888 REG("maps", S_IRUGO, proc_tid_maps_operations),
@@ -2932,13 +2907,13 @@ static const struct pid_entry tid_base_stuff[] = {
2932 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2907 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
2933#endif 2908#endif
2934#ifdef CONFIG_KALLSYMS 2909#ifdef CONFIG_KALLSYMS
2935 INF("wchan", S_IRUGO, proc_pid_wchan), 2910 ONE("wchan", S_IRUGO, proc_pid_wchan),
2936#endif 2911#endif
2937#ifdef CONFIG_STACKTRACE 2912#ifdef CONFIG_STACKTRACE
2938 ONE("stack", S_IRUSR, proc_pid_stack), 2913 ONE("stack", S_IRUSR, proc_pid_stack),
2939#endif 2914#endif
2940#ifdef CONFIG_SCHEDSTATS 2915#ifdef CONFIG_SCHEDSTATS
2941 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2916 ONE("schedstat", S_IRUGO, proc_pid_schedstat),
2942#endif 2917#endif
2943#ifdef CONFIG_LATENCYTOP 2918#ifdef CONFIG_LATENCYTOP
2944 REG("latency", S_IRUGO, proc_lstats_operations), 2919 REG("latency", S_IRUGO, proc_lstats_operations),
@@ -2949,7 +2924,7 @@ static const struct pid_entry tid_base_stuff[] = {
2949#ifdef CONFIG_CGROUPS 2924#ifdef CONFIG_CGROUPS
2950 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2925 REG("cgroup", S_IRUGO, proc_cgroup_operations),
2951#endif 2926#endif
2952 INF("oom_score", S_IRUGO, proc_oom_score), 2927 ONE("oom_score", S_IRUGO, proc_oom_score),
2953 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 2928 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
2954 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2929 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
2955#ifdef CONFIG_AUDITSYSCALL 2930#ifdef CONFIG_AUDITSYSCALL
@@ -2960,10 +2935,10 @@ static const struct pid_entry tid_base_stuff[] = {
2960 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2935 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
2961#endif 2936#endif
2962#ifdef CONFIG_TASK_IO_ACCOUNTING 2937#ifdef CONFIG_TASK_IO_ACCOUNTING
2963 INF("io", S_IRUSR, proc_tid_io_accounting), 2938 ONE("io", S_IRUSR, proc_tid_io_accounting),
2964#endif 2939#endif
2965#ifdef CONFIG_HARDWALL 2940#ifdef CONFIG_HARDWALL
2966 INF("hardwall", S_IRUGO, proc_pid_hardwall), 2941 ONE("hardwall", S_IRUGO, proc_pid_hardwall),
2967#endif 2942#endif
2968#ifdef CONFIG_USER_NS 2943#ifdef CONFIG_USER_NS
2969 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2944 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
@@ -3033,7 +3008,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
3033 if (!leader) 3008 if (!leader)
3034 goto out_no_task; 3009 goto out_no_task;
3035 3010
3036 tid = name_to_int(dentry); 3011 tid = name_to_int(&dentry->d_name);
3037 if (tid == ~0U) 3012 if (tid == ~0U)
3038 goto out; 3013 goto out;
3039 3014
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 0788d093f5d8..955bb55fab8c 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -206,7 +206,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
206{ 206{
207 struct task_struct *task = get_proc_task(dir); 207 struct task_struct *task = get_proc_task(dir);
208 int result = -ENOENT; 208 int result = -ENOENT;
209 unsigned fd = name_to_int(dentry); 209 unsigned fd = name_to_int(&dentry->d_name);
210 210
211 if (!task) 211 if (!task)
212 goto out_no_task; 212 goto out_no_task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b7f268eb5f45..317b72641ebf 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -27,7 +27,7 @@
27 27
28#include "internal.h" 28#include "internal.h"
29 29
30DEFINE_SPINLOCK(proc_subdir_lock); 30static DEFINE_SPINLOCK(proc_subdir_lock);
31 31
32static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) 32static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
33{ 33{
@@ -330,28 +330,28 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
330 nlink_t nlink) 330 nlink_t nlink)
331{ 331{
332 struct proc_dir_entry *ent = NULL; 332 struct proc_dir_entry *ent = NULL;
333 const char *fn = name; 333 const char *fn;
334 unsigned int len; 334 struct qstr qstr;
335
336 /* make sure name is valid */
337 if (!name || !strlen(name))
338 goto out;
339 335
340 if (xlate_proc_name(name, parent, &fn) != 0) 336 if (xlate_proc_name(name, parent, &fn) != 0)
341 goto out; 337 goto out;
338 qstr.name = fn;
339 qstr.len = strlen(fn);
340 if (qstr.len == 0 || qstr.len >= 256) {
341 WARN(1, "name len %u\n", qstr.len);
342 return NULL;
343 }
344 if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
345 WARN(1, "create '/proc/%s' by hand\n", qstr.name);
346 return NULL;
347 }
342 348
343 /* At this point there must not be any '/' characters beyond *fn */ 349 ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
344 if (strchr(fn, '/'))
345 goto out;
346
347 len = strlen(fn);
348
349 ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
350 if (!ent) 350 if (!ent)
351 goto out; 351 goto out;
352 352
353 memcpy(ent->name, fn, len + 1); 353 memcpy(ent->name, fn, qstr.len + 1);
354 ent->namelen = len; 354 ent->namelen = qstr.len;
355 ent->mode = mode; 355 ent->mode = mode;
356 ent->nlink = nlink; 356 ent->nlink = nlink;
357 atomic_set(&ent->count, 1); 357 atomic_set(&ent->count, 1);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 0adbc02d60e3..333080d7a671 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
442int proc_fill_super(struct super_block *s) 442int proc_fill_super(struct super_block *s)
443{ 443{
444 struct inode *root_inode; 444 struct inode *root_inode;
445 int ret;
445 446
446 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; 447 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
447 s->s_blocksize = 1024; 448 s->s_blocksize = 1024;
@@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
463 return -ENOMEM; 464 return -ENOMEM;
464 } 465 }
465 466
466 return proc_setup_self(s); 467 ret = proc_setup_self(s);
468 if (ret) {
469 return ret;
470 }
471 return proc_setup_thread_self(s);
467} 472}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 3ab6d14e71c5..7da13e49128a 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,7 +52,6 @@ struct proc_dir_entry {
52 52
53union proc_op { 53union proc_op {
54 int (*proc_get_link)(struct dentry *, struct path *); 54 int (*proc_get_link)(struct dentry *, struct path *);
55 int (*proc_read)(struct task_struct *task, char *page);
56 int (*proc_show)(struct seq_file *m, 55 int (*proc_show)(struct seq_file *m,
57 struct pid_namespace *ns, struct pid *pid, 56 struct pid_namespace *ns, struct pid *pid,
58 struct task_struct *task); 57 struct task_struct *task);
@@ -112,10 +111,10 @@ static inline int task_dumpable(struct task_struct *task)
112 return 0; 111 return 0;
113} 112}
114 113
115static inline unsigned name_to_int(struct dentry *dentry) 114static inline unsigned name_to_int(const struct qstr *qstr)
116{ 115{
117 const char *name = dentry->d_name.name; 116 const char *name = qstr->name;
118 int len = dentry->d_name.len; 117 int len = qstr->len;
119 unsigned n = 0; 118 unsigned n = 0;
120 119
121 if (len > 1 && *name == '0') 120 if (len > 1 && *name == '0')
@@ -178,8 +177,6 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i
178/* 177/*
179 * generic.c 178 * generic.c
180 */ 179 */
181extern spinlock_t proc_subdir_lock;
182
183extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); 180extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
184extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, 181extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
185 struct dentry *); 182 struct dentry *);
@@ -234,6 +231,12 @@ static inline int proc_net_init(void) { return 0; }
234extern int proc_setup_self(struct super_block *); 231extern int proc_setup_self(struct super_block *);
235 232
236/* 233/*
234 * proc_thread_self.c
235 */
236extern int proc_setup_thread_self(struct super_block *);
237extern void proc_thread_self_init(void);
238
239/*
237 * proc_sysctl.c 240 * proc_sysctl.c
238 */ 241 */
239#ifdef CONFIG_PROC_SYSCTL 242#ifdef CONFIG_PROC_SYSCTL
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 39e6ef32f0bd..6df8d0722c97 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -172,7 +172,7 @@ get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
172 172
173 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK; 173 start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
174 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1; 174 end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
175 end = ALIGN(end, PAGE_SIZE); 175 end = PAGE_ALIGN(end);
176 /* overlap check (because we have to align page */ 176 /* overlap check (because we have to align page */
177 list_for_each_entry(tmp, head, list) { 177 list_for_each_entry(tmp, head, list) {
178 if (tmp->type != KCORE_VMEMMAP) 178 if (tmp->type != KCORE_VMEMMAP)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7445af0b1aa3..aa1eee06420f 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,7 +168,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
168 K(global_page_state(NR_WRITEBACK)), 168 K(global_page_state(NR_WRITEBACK)),
169 K(global_page_state(NR_ANON_PAGES)), 169 K(global_page_state(NR_ANON_PAGES)),
170 K(global_page_state(NR_FILE_MAPPED)), 170 K(global_page_state(NR_FILE_MAPPED)),
171 K(global_page_state(NR_SHMEM)), 171 K(i.sharedram),
172 K(global_page_state(NR_SLAB_RECLAIMABLE) + 172 K(global_page_state(NR_SLAB_RECLAIMABLE) +
173 global_page_state(NR_SLAB_UNRECLAIMABLE)), 173 global_page_state(NR_SLAB_UNRECLAIMABLE)),
174 K(global_page_state(NR_SLAB_RECLAIMABLE)), 174 K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4677bb7dc7c2..a63af3e0a612 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
113 rcu_read_lock(); 113 rcu_read_lock();
114 task = pid_task(proc_pid(dir), PIDTYPE_PID); 114 task = pid_task(proc_pid(dir), PIDTYPE_PID);
115 if (task != NULL) { 115 if (task != NULL) {
116 ns = task_nsproxy(task); 116 task_lock(task);
117 ns = task->nsproxy;
117 if (ns != NULL) 118 if (ns != NULL)
118 net = get_net(ns->net_ns); 119 net = get_net(ns->net_ns);
120 task_unlock(task);
119 } 121 }
120 rcu_read_unlock(); 122 rcu_read_unlock();
121 123
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 71290463a1d3..f92d5dd578a4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -632,7 +632,7 @@ out:
632 return ret; 632 return ret;
633} 633}
634 634
635static int scan(struct ctl_table_header *head, ctl_table *table, 635static int scan(struct ctl_table_header *head, struct ctl_table *table,
636 unsigned long *pos, struct file *file, 636 unsigned long *pos, struct file *file,
637 struct dir_context *ctx) 637 struct dir_context *ctx)
638{ 638{
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index cb761f010300..15f327bed8c6 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -18,7 +18,7 @@
18/* 18/*
19 * The /proc/tty directory inodes... 19 * The /proc/tty directory inodes...
20 */ 20 */
21static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver; 21static struct proc_dir_entry *proc_tty_driver;
22 22
23/* 23/*
24 * This is the handler for /proc/tty/drivers 24 * This is the handler for /proc/tty/drivers
@@ -176,7 +176,7 @@ void __init proc_tty_init(void)
176{ 176{
177 if (!proc_mkdir("tty", NULL)) 177 if (!proc_mkdir("tty", NULL))
178 return; 178 return;
179 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); 179 proc_mkdir("tty/ldisc", NULL); /* Preserved: it's userspace visible */
180 /* 180 /*
181 * /proc/tty/driver/serial reveals the exact character counts for 181 * /proc/tty/driver/serial reveals the exact character counts for
182 * serial links which is just too easy to abuse for inferring 182 * serial links which is just too easy to abuse for inferring
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5dbadecb234d..094e44d4a6be 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
149 ns = (struct pid_namespace *)sb->s_fs_info; 149 ns = (struct pid_namespace *)sb->s_fs_info;
150 if (ns->proc_self) 150 if (ns->proc_self)
151 dput(ns->proc_self); 151 dput(ns->proc_self);
152 if (ns->proc_thread_self)
153 dput(ns->proc_thread_self);
152 kill_anon_super(sb); 154 kill_anon_super(sb);
153 put_pid_ns(ns); 155 put_pid_ns(ns);
154} 156}
@@ -170,6 +172,7 @@ void __init proc_root_init(void)
170 return; 172 return;
171 173
172 proc_self_init(); 174 proc_self_init();
175 proc_thread_self_init();
173 proc_symlink("mounts", NULL, "self/mounts"); 176 proc_symlink("mounts", NULL, "self/mounts");
174 177
175 proc_net_init(); 178 proc_net_init();
@@ -199,10 +202,10 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
199 202
200static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) 203static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
201{ 204{
202 if (!proc_lookup(dir, dentry, flags)) 205 if (!proc_pid_lookup(dir, dentry, flags))
203 return NULL; 206 return NULL;
204 207
205 return proc_pid_lookup(dir, dentry, flags); 208 return proc_lookup(dir, dentry, flags);
206} 209}
207 210
208static int proc_root_readdir(struct file *file, struct dir_context *ctx) 211static int proc_root_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index cfa63ee92c96..dfc791c42d64 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -925,15 +925,30 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
925 struct mm_walk *walk) 925 struct mm_walk *walk)
926{ 926{
927 struct pagemapread *pm = walk->private; 927 struct pagemapread *pm = walk->private;
928 unsigned long addr; 928 unsigned long addr = start;
929 int err = 0; 929 int err = 0;
930 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
931 930
932 for (addr = start; addr < end; addr += PAGE_SIZE) { 931 while (addr < end) {
933 err = add_to_pagemap(addr, &pme, pm); 932 struct vm_area_struct *vma = find_vma(walk->mm, addr);
934 if (err) 933 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
935 break; 934 unsigned long vm_end;
935
936 if (!vma) {
937 vm_end = end;
938 } else {
939 vm_end = min(end, vma->vm_end);
940 if (vma->vm_flags & VM_SOFTDIRTY)
941 pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
942 }
943
944 for (; addr < vm_end; addr += PAGE_SIZE) {
945 err = add_to_pagemap(addr, &pme, pm);
946 if (err)
947 goto out;
948 }
936 } 949 }
950
951out:
937 return err; 952 return err;
938} 953}
939 954
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
new file mode 100644
index 000000000000..59075b509df3
--- /dev/null
+++ b/fs/proc/thread_self.c
@@ -0,0 +1,85 @@
1#include <linux/sched.h>
2#include <linux/namei.h>
3#include <linux/slab.h>
4#include <linux/pid_namespace.h>
5#include "internal.h"
6
7/*
8 * /proc/thread_self:
9 */
10static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
11 int buflen)
12{
13 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
14 pid_t tgid = task_tgid_nr_ns(current, ns);
15 pid_t pid = task_pid_nr_ns(current, ns);
16 char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];
17 if (!pid)
18 return -ENOENT;
19 sprintf(tmp, "%d/task/%d", tgid, pid);
20 return readlink_copy(buffer, buflen, tmp);
21}
22
23static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
24{
25 struct pid_namespace *ns = dentry->d_sb->s_fs_info;
26 pid_t tgid = task_tgid_nr_ns(current, ns);
27 pid_t pid = task_pid_nr_ns(current, ns);
28 char *name = ERR_PTR(-ENOENT);
29 if (pid) {
30 name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
31 if (!name)
32 name = ERR_PTR(-ENOMEM);
33 else
34 sprintf(name, "%d/task/%d", tgid, pid);
35 }
36 nd_set_link(nd, name);
37 return NULL;
38}
39
40static const struct inode_operations proc_thread_self_inode_operations = {
41 .readlink = proc_thread_self_readlink,
42 .follow_link = proc_thread_self_follow_link,
43 .put_link = kfree_put_link,
44};
45
46static unsigned thread_self_inum;
47
48int proc_setup_thread_self(struct super_block *s)
49{
50 struct inode *root_inode = s->s_root->d_inode;
51 struct pid_namespace *ns = s->s_fs_info;
52 struct dentry *thread_self;
53
54 mutex_lock(&root_inode->i_mutex);
55 thread_self = d_alloc_name(s->s_root, "thread-self");
56 if (thread_self) {
57 struct inode *inode = new_inode_pseudo(s);
58 if (inode) {
59 inode->i_ino = thread_self_inum;
60 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
61 inode->i_mode = S_IFLNK | S_IRWXUGO;
62 inode->i_uid = GLOBAL_ROOT_UID;
63 inode->i_gid = GLOBAL_ROOT_GID;
64 inode->i_op = &proc_thread_self_inode_operations;
65 d_add(thread_self, inode);
66 } else {
67 dput(thread_self);
68 thread_self = ERR_PTR(-ENOMEM);
69 }
70 } else {
71 thread_self = ERR_PTR(-ENOMEM);
72 }
73 mutex_unlock(&root_inode->i_mutex);
74 if (IS_ERR(thread_self)) {
75 pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
76 return PTR_ERR(thread_self);
77 }
78 ns->proc_thread_self = thread_self;
79 return 0;
80}
81
82void __init proc_thread_self_init(void)
83{
84 proc_alloc_inum(&thread_self_inum);
85}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 382aa890e228..a90d6d354199 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -328,6 +328,82 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
328 * virtually contiguous user-space in ELF layout. 328 * virtually contiguous user-space in ELF layout.
329 */ 329 */
330#ifdef CONFIG_MMU 330#ifdef CONFIG_MMU
331/*
332 * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
333 * reported as not being ram with the zero page.
334 *
335 * @vma: vm_area_struct describing requested mapping
336 * @from: start remapping from
337 * @pfn: page frame number to start remapping to
338 * @size: remapping size
339 * @prot: protection bits
340 *
341 * Returns zero on success, -EAGAIN on failure.
342 */
343static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
344 unsigned long from, unsigned long pfn,
345 unsigned long size, pgprot_t prot)
346{
347 unsigned long map_size;
348 unsigned long pos_start, pos_end, pos;
349 unsigned long zeropage_pfn = my_zero_pfn(0);
350 size_t len = 0;
351
352 pos_start = pfn;
353 pos_end = pfn + (size >> PAGE_SHIFT);
354
355 for (pos = pos_start; pos < pos_end; ++pos) {
356 if (!pfn_is_ram(pos)) {
357 /*
358 * We hit a page which is not ram. Remap the continuous
359 * region between pos_start and pos-1 and replace
360 * the non-ram page at pos with the zero page.
361 */
362 if (pos > pos_start) {
363 /* Remap continuous region */
364 map_size = (pos - pos_start) << PAGE_SHIFT;
365 if (remap_oldmem_pfn_range(vma, from + len,
366 pos_start, map_size,
367 prot))
368 goto fail;
369 len += map_size;
370 }
371 /* Remap the zero page */
372 if (remap_oldmem_pfn_range(vma, from + len,
373 zeropage_pfn,
374 PAGE_SIZE, prot))
375 goto fail;
376 len += PAGE_SIZE;
377 pos_start = pos + 1;
378 }
379 }
380 if (pos > pos_start) {
381 /* Remap the rest */
382 map_size = (pos - pos_start) << PAGE_SHIFT;
383 if (remap_oldmem_pfn_range(vma, from + len, pos_start,
384 map_size, prot))
385 goto fail;
386 }
387 return 0;
388fail:
389 do_munmap(vma->vm_mm, from, len);
390 return -EAGAIN;
391}
392
393static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
394 unsigned long from, unsigned long pfn,
395 unsigned long size, pgprot_t prot)
396{
397 /*
398 * Check if oldmem_pfn_is_ram was registered to avoid
399 * looping over all pages without a reason.
400 */
401 if (oldmem_pfn_is_ram)
402 return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
403 else
404 return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
405}
406
331static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) 407static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
332{ 408{
333 size_t size = vma->vm_end - vma->vm_start; 409 size_t size = vma->vm_end - vma->vm_start;
@@ -387,9 +463,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
387 463
388 tsz = min_t(size_t, m->offset + m->size - start, size); 464 tsz = min_t(size_t, m->offset + m->size - start, size);
389 paddr = m->paddr + start - m->offset; 465 paddr = m->paddr + start - m->offset;
390 if (remap_oldmem_pfn_range(vma, vma->vm_start + len, 466 if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
391 paddr >> PAGE_SHIFT, tsz, 467 paddr >> PAGE_SHIFT, tsz,
392 vma->vm_page_prot)) 468 vma->vm_page_prot))
393 goto fail; 469 goto fail;
394 size -= tsz; 470 size -= tsz;
395 start += tsz; 471 start += tsz;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 1a81373947f3..73ca1740d839 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
232 if (!task) 232 if (!task)
233 goto err; 233 goto err;
234 234
235 rcu_read_lock(); 235 task_lock(task);
236 nsp = task_nsproxy(task); 236 nsp = task->nsproxy;
237 if (!nsp || !nsp->mnt_ns) { 237 if (!nsp || !nsp->mnt_ns) {
238 rcu_read_unlock(); 238 task_unlock(task);
239 put_task_struct(task); 239 put_task_struct(task);
240 goto err; 240 goto err;
241 } 241 }
242 ns = nsp->mnt_ns; 242 ns = nsp->mnt_ns;
243 get_mnt_ns(ns); 243 get_mnt_ns(ns);
244 rcu_read_unlock();
245 task_lock(task);
246 if (!task->fs) { 244 if (!task->fs) {
247 task_unlock(task); 245 task_unlock(task);
248 put_task_struct(task); 246 put_task_struct(task);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 34a1e5aa848c..9d7b9a83699e 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -394,7 +394,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
394 394
395 prot = pgprot_noncached(PAGE_KERNEL); 395 prot = pgprot_noncached(PAGE_KERNEL);
396 396
397 pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); 397 pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
398 if (!pages) { 398 if (!pages) {
399 pr_err("%s: Failed to allocate array for %u pages\n", 399 pr_err("%s: Failed to allocate array for %u pages\n",
400 __func__, page_count); 400 __func__, page_count);
diff --git a/fs/qnx6/Makefile b/fs/qnx6/Makefile
index 9dd06199afc9..5e6bae6fae50 100644
--- a/fs/qnx6/Makefile
+++ b/fs/qnx6/Makefile
@@ -5,3 +5,4 @@
5obj-$(CONFIG_QNX6FS_FS) += qnx6.o 5obj-$(CONFIG_QNX6FS_FS) += qnx6.o
6 6
7qnx6-objs := inode.o dir.o namei.o super_mmi.o 7qnx6-objs := inode.o dir.o namei.o super_mmi.o
8ccflags-$(CONFIG_QNX6FS_DEBUG) += -DDEBUG
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 15b7d92ed60d..8d64bb5366bf 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -77,21 +77,20 @@ static int qnx6_dir_longfilename(struct inode *inode,
77 if (de->de_size != 0xff) { 77 if (de->de_size != 0xff) {
78 /* error - long filename entries always have size 0xff 78 /* error - long filename entries always have size 0xff
79 in direntry */ 79 in direntry */
80 printk(KERN_ERR "qnx6: invalid direntry size (%i).\n", 80 pr_err("invalid direntry size (%i).\n", de->de_size);
81 de->de_size);
82 return 0; 81 return 0;
83 } 82 }
84 lf = qnx6_longname(s, de, &page); 83 lf = qnx6_longname(s, de, &page);
85 if (IS_ERR(lf)) { 84 if (IS_ERR(lf)) {
86 printk(KERN_ERR "qnx6:Error reading longname\n"); 85 pr_err("Error reading longname\n");
87 return 0; 86 return 0;
88 } 87 }
89 88
90 lf_size = fs16_to_cpu(sbi, lf->lf_size); 89 lf_size = fs16_to_cpu(sbi, lf->lf_size);
91 90
92 if (lf_size > QNX6_LONG_NAME_MAX) { 91 if (lf_size > QNX6_LONG_NAME_MAX) {
93 QNX6DEBUG((KERN_INFO "file %s\n", lf->lf_fname)); 92 pr_debug("file %s\n", lf->lf_fname);
94 printk(KERN_ERR "qnx6:Filename too long (%i)\n", lf_size); 93 pr_err("Filename too long (%i)\n", lf_size);
95 qnx6_put_page(page); 94 qnx6_put_page(page);
96 return 0; 95 return 0;
97 } 96 }
@@ -100,10 +99,10 @@ static int qnx6_dir_longfilename(struct inode *inode,
100 mmi 3g filesystem does not have that checksum */ 99 mmi 3g filesystem does not have that checksum */
101 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) != 100 if (!test_opt(s, MMI_FS) && fs32_to_cpu(sbi, de->de_checksum) !=
102 qnx6_lfile_checksum(lf->lf_fname, lf_size)) 101 qnx6_lfile_checksum(lf->lf_fname, lf_size))
103 printk(KERN_INFO "qnx6: long filename checksum error.\n"); 102 pr_info("long filename checksum error.\n");
104 103
105 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", 104 pr_debug("qnx6_readdir:%.*s inode:%u\n",
106 lf_size, lf->lf_fname, de_inode)); 105 lf_size, lf->lf_fname, de_inode);
107 if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) { 106 if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
108 qnx6_put_page(page); 107 qnx6_put_page(page);
109 return 0; 108 return 0;
@@ -136,7 +135,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
136 int i = start; 135 int i = start;
137 136
138 if (IS_ERR(page)) { 137 if (IS_ERR(page)) {
139 printk(KERN_ERR "qnx6_readdir: read failed\n"); 138 pr_err("%s(): read failed\n", __func__);
140 ctx->pos = (n + 1) << PAGE_CACHE_SHIFT; 139 ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
141 return PTR_ERR(page); 140 return PTR_ERR(page);
142 } 141 }
@@ -159,9 +158,9 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
159 break; 158 break;
160 } 159 }
161 } else { 160 } else {
162 QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" 161 pr_debug("%s():%.*s inode:%u\n",
163 " inode:%u\n", size, de->de_fname, 162 __func__, size, de->de_fname,
164 no_inode)); 163 no_inode);
165 if (!dir_emit(ctx, de->de_fname, size, 164 if (!dir_emit(ctx, de->de_fname, size,
166 no_inode, DT_UNKNOWN)) { 165 no_inode, DT_UNKNOWN)) {
167 done = true; 166 done = true;
@@ -259,8 +258,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
259 if (ino) 258 if (ino)
260 goto found; 259 goto found;
261 } else 260 } else
262 printk(KERN_ERR "qnx6: undefined " 261 pr_err("undefined filename size in inode.\n");
263 "filename size in inode.\n");
264 } 262 }
265 qnx6_put_page(page); 263 qnx6_put_page(page);
266 } 264 }
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 65cdaab3ed49..44e73923670d 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -73,8 +73,8 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
73{ 73{
74 unsigned phys; 74 unsigned phys;
75 75
76 QNX6DEBUG((KERN_INFO "qnx6: qnx6_get_block inode=[%ld] iblock=[%ld]\n", 76 pr_debug("qnx6_get_block inode=[%ld] iblock=[%ld]\n",
77 inode->i_ino, (unsigned long)iblock)); 77 inode->i_ino, (unsigned long)iblock);
78 78
79 phys = qnx6_block_map(inode, iblock); 79 phys = qnx6_block_map(inode, iblock);
80 if (phys) { 80 if (phys) {
@@ -87,7 +87,7 @@ static int qnx6_get_block(struct inode *inode, sector_t iblock,
87static int qnx6_check_blockptr(__fs32 ptr) 87static int qnx6_check_blockptr(__fs32 ptr)
88{ 88{
89 if (ptr == ~(__fs32)0) { 89 if (ptr == ~(__fs32)0) {
90 printk(KERN_ERR "qnx6: hit unused blockpointer.\n"); 90 pr_err("hit unused blockpointer.\n");
91 return 0; 91 return 0;
92 } 92 }
93 return 1; 93 return 1;
@@ -127,8 +127,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
127 levelptr = no >> bitdelta; 127 levelptr = no >> bitdelta;
128 128
129 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) { 129 if (levelptr > QNX6_NO_DIRECT_POINTERS - 1) {
130 printk(KERN_ERR "qnx6:Requested file block number (%u) too big.", 130 pr_err("Requested file block number (%u) too big.", no);
131 no);
132 return 0; 131 return 0;
133 } 132 }
134 133
@@ -137,8 +136,7 @@ static unsigned qnx6_block_map(struct inode *inode, unsigned no)
137 for (i = 0; i < depth; i++) { 136 for (i = 0; i < depth; i++) {
138 bh = sb_bread(s, block); 137 bh = sb_bread(s, block);
139 if (!bh) { 138 if (!bh) {
140 printk(KERN_ERR "qnx6:Error reading block (%u)\n", 139 pr_err("Error reading block (%u)\n", block);
141 block);
142 return 0; 140 return 0;
143 } 141 }
144 bitdelta -= ptrbits; 142 bitdelta -= ptrbits;
@@ -207,26 +205,16 @@ void qnx6_superblock_debug(struct qnx6_super_block *sb, struct super_block *s)
207{ 205{
208 struct qnx6_sb_info *sbi = QNX6_SB(s); 206 struct qnx6_sb_info *sbi = QNX6_SB(s);
209 207
210 QNX6DEBUG((KERN_INFO "magic: %08x\n", 208 pr_debug("magic: %08x\n", fs32_to_cpu(sbi, sb->sb_magic));
211 fs32_to_cpu(sbi, sb->sb_magic))); 209 pr_debug("checksum: %08x\n", fs32_to_cpu(sbi, sb->sb_checksum));
212 QNX6DEBUG((KERN_INFO "checksum: %08x\n", 210 pr_debug("serial: %llx\n", fs64_to_cpu(sbi, sb->sb_serial));
213 fs32_to_cpu(sbi, sb->sb_checksum))); 211 pr_debug("flags: %08x\n", fs32_to_cpu(sbi, sb->sb_flags));
214 QNX6DEBUG((KERN_INFO "serial: %llx\n", 212 pr_debug("blocksize: %08x\n", fs32_to_cpu(sbi, sb->sb_blocksize));
215 fs64_to_cpu(sbi, sb->sb_serial))); 213 pr_debug("num_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_num_inodes));
216 QNX6DEBUG((KERN_INFO "flags: %08x\n", 214 pr_debug("free_inodes: %08x\n", fs32_to_cpu(sbi, sb->sb_free_inodes));
217 fs32_to_cpu(sbi, sb->sb_flags))); 215 pr_debug("num_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_num_blocks));
218 QNX6DEBUG((KERN_INFO "blocksize: %08x\n", 216 pr_debug("free_blocks: %08x\n", fs32_to_cpu(sbi, sb->sb_free_blocks));
219 fs32_to_cpu(sbi, sb->sb_blocksize))); 217 pr_debug("inode_levels: %02x\n", sb->Inode.levels);
220 QNX6DEBUG((KERN_INFO "num_inodes: %08x\n",
221 fs32_to_cpu(sbi, sb->sb_num_inodes)));
222 QNX6DEBUG((KERN_INFO "free_inodes: %08x\n",
223 fs32_to_cpu(sbi, sb->sb_free_inodes)));
224 QNX6DEBUG((KERN_INFO "num_blocks: %08x\n",
225 fs32_to_cpu(sbi, sb->sb_num_blocks)));
226 QNX6DEBUG((KERN_INFO "free_blocks: %08x\n",
227 fs32_to_cpu(sbi, sb->sb_free_blocks)));
228 QNX6DEBUG((KERN_INFO "inode_levels: %02x\n",
229 sb->Inode.levels));
230} 218}
231#endif 219#endif
232 220
@@ -277,7 +265,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
277 start with the first superblock */ 265 start with the first superblock */
278 bh = sb_bread(s, offset); 266 bh = sb_bread(s, offset);
279 if (!bh) { 267 if (!bh) {
280 printk(KERN_ERR "qnx6: unable to read the first superblock\n"); 268 pr_err("unable to read the first superblock\n");
281 return NULL; 269 return NULL;
282 } 270 }
283 sb = (struct qnx6_super_block *)bh->b_data; 271 sb = (struct qnx6_super_block *)bh->b_data;
@@ -285,20 +273,16 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
285 sbi->s_bytesex = BYTESEX_BE; 273 sbi->s_bytesex = BYTESEX_BE;
286 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { 274 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
287 /* we got a big endian fs */ 275 /* we got a big endian fs */
288 QNX6DEBUG((KERN_INFO "qnx6: fs got different" 276 pr_debug("fs got different endianness.\n");
289 " endianness.\n"));
290 return bh; 277 return bh;
291 } else 278 } else
292 sbi->s_bytesex = BYTESEX_LE; 279 sbi->s_bytesex = BYTESEX_LE;
293 if (!silent) { 280 if (!silent) {
294 if (offset == 0) { 281 if (offset == 0) {
295 printk(KERN_ERR "qnx6: wrong signature (magic)" 282 pr_err("wrong signature (magic) in superblock #1.\n");
296 " in superblock #1.\n");
297 } else { 283 } else {
298 printk(KERN_INFO "qnx6: wrong signature (magic)" 284 pr_info("wrong signature (magic) at position (0x%lx) - will try alternative position (0x0000).\n",
299 " at position (0x%lx) - will try" 285 offset * s->s_blocksize);
300 " alternative position (0x0000).\n",
301 offset * s->s_blocksize);
302 } 286 }
303 } 287 }
304 brelse(bh); 288 brelse(bh);
@@ -329,13 +313,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
329 313
330 /* Superblock always is 512 Byte long */ 314 /* Superblock always is 512 Byte long */
331 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) { 315 if (!sb_set_blocksize(s, QNX6_SUPERBLOCK_SIZE)) {
332 printk(KERN_ERR "qnx6: unable to set blocksize\n"); 316 pr_err("unable to set blocksize\n");
333 goto outnobh; 317 goto outnobh;
334 } 318 }
335 319
336 /* parse the mount-options */ 320 /* parse the mount-options */
337 if (!qnx6_parse_options((char *) data, s)) { 321 if (!qnx6_parse_options((char *) data, s)) {
338 printk(KERN_ERR "qnx6: invalid mount options.\n"); 322 pr_err("invalid mount options.\n");
339 goto outnobh; 323 goto outnobh;
340 } 324 }
341 if (test_opt(s, MMI_FS)) { 325 if (test_opt(s, MMI_FS)) {
@@ -355,7 +339,7 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
355 /* try again without bootblock offset */ 339 /* try again without bootblock offset */
356 bh1 = qnx6_check_first_superblock(s, 0, silent); 340 bh1 = qnx6_check_first_superblock(s, 0, silent);
357 if (!bh1) { 341 if (!bh1) {
358 printk(KERN_ERR "qnx6: unable to read the first superblock\n"); 342 pr_err("unable to read the first superblock\n");
359 goto outnobh; 343 goto outnobh;
360 } 344 }
361 /* seems that no bootblock at partition start */ 345 /* seems that no bootblock at partition start */
@@ -370,13 +354,13 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
370 /* checksum check - start at byte 8 and end at byte 512 */ 354 /* checksum check - start at byte 8 and end at byte 512 */
371 if (fs32_to_cpu(sbi, sb1->sb_checksum) != 355 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
372 crc32_be(0, (char *)(bh1->b_data + 8), 504)) { 356 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
373 printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); 357 pr_err("superblock #1 checksum error\n");
374 goto out; 358 goto out;
375 } 359 }
376 360
377 /* set new blocksize */ 361 /* set new blocksize */
378 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) { 362 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
379 printk(KERN_ERR "qnx6: unable to set blocksize\n"); 363 pr_err("unable to set blocksize\n");
380 goto out; 364 goto out;
381 } 365 }
382 /* blocksize invalidates bh - pull it back in */ 366 /* blocksize invalidates bh - pull it back in */
@@ -398,21 +382,20 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
398 /* next the second superblock */ 382 /* next the second superblock */
399 bh2 = sb_bread(s, offset); 383 bh2 = sb_bread(s, offset);
400 if (!bh2) { 384 if (!bh2) {
401 printk(KERN_ERR "qnx6: unable to read the second superblock\n"); 385 pr_err("unable to read the second superblock\n");
402 goto out; 386 goto out;
403 } 387 }
404 sb2 = (struct qnx6_super_block *)bh2->b_data; 388 sb2 = (struct qnx6_super_block *)bh2->b_data;
405 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) { 389 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
406 if (!silent) 390 if (!silent)
407 printk(KERN_ERR "qnx6: wrong signature (magic)" 391 pr_err("wrong signature (magic) in superblock #2.\n");
408 " in superblock #2.\n");
409 goto out; 392 goto out;
410 } 393 }
411 394
412 /* checksum check - start at byte 8 and end at byte 512 */ 395 /* checksum check - start at byte 8 and end at byte 512 */
413 if (fs32_to_cpu(sbi, sb2->sb_checksum) != 396 if (fs32_to_cpu(sbi, sb2->sb_checksum) !=
414 crc32_be(0, (char *)(bh2->b_data + 8), 504)) { 397 crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
415 printk(KERN_ERR "qnx6: superblock #2 checksum error\n"); 398 pr_err("superblock #2 checksum error\n");
416 goto out; 399 goto out;
417 } 400 }
418 401
@@ -422,25 +405,24 @@ static int qnx6_fill_super(struct super_block *s, void *data, int silent)
422 sbi->sb_buf = bh1; 405 sbi->sb_buf = bh1;
423 sbi->sb = (struct qnx6_super_block *)bh1->b_data; 406 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
424 brelse(bh2); 407 brelse(bh2);
425 printk(KERN_INFO "qnx6: superblock #1 active\n"); 408 pr_info("superblock #1 active\n");
426 } else { 409 } else {
427 /* superblock #2 active */ 410 /* superblock #2 active */
428 sbi->sb_buf = bh2; 411 sbi->sb_buf = bh2;
429 sbi->sb = (struct qnx6_super_block *)bh2->b_data; 412 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
430 brelse(bh1); 413 brelse(bh1);
431 printk(KERN_INFO "qnx6: superblock #2 active\n"); 414 pr_info("superblock #2 active\n");
432 } 415 }
433mmi_success: 416mmi_success:
434 /* sanity check - limit maximum indirect pointer levels */ 417 /* sanity check - limit maximum indirect pointer levels */
435 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) { 418 if (sb1->Inode.levels > QNX6_PTR_MAX_LEVELS) {
436 printk(KERN_ERR "qnx6: too many inode levels (max %i, sb %i)\n", 419 pr_err("too many inode levels (max %i, sb %i)\n",
437 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels); 420 QNX6_PTR_MAX_LEVELS, sb1->Inode.levels);
438 goto out; 421 goto out;
439 } 422 }
440 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) { 423 if (sb1->Longfile.levels > QNX6_PTR_MAX_LEVELS) {
441 printk(KERN_ERR "qnx6: too many longfilename levels" 424 pr_err("too many longfilename levels (max %i, sb %i)\n",
442 " (max %i, sb %i)\n", 425 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
443 QNX6_PTR_MAX_LEVELS, sb1->Longfile.levels);
444 goto out; 426 goto out;
445 } 427 }
446 s->s_op = &qnx6_sops; 428 s->s_op = &qnx6_sops;
@@ -460,7 +442,7 @@ mmi_success:
460 /* prefetch root inode */ 442 /* prefetch root inode */
461 root = qnx6_iget(s, QNX6_ROOT_INO); 443 root = qnx6_iget(s, QNX6_ROOT_INO);
462 if (IS_ERR(root)) { 444 if (IS_ERR(root)) {
463 printk(KERN_ERR "qnx6: get inode failed\n"); 445 pr_err("get inode failed\n");
464 ret = PTR_ERR(root); 446 ret = PTR_ERR(root);
465 goto out2; 447 goto out2;
466 } 448 }
@@ -474,7 +456,7 @@ mmi_success:
474 errmsg = qnx6_checkroot(s); 456 errmsg = qnx6_checkroot(s);
475 if (errmsg != NULL) { 457 if (errmsg != NULL) {
476 if (!silent) 458 if (!silent)
477 printk(KERN_ERR "qnx6: %s\n", errmsg); 459 pr_err("%s\n", errmsg);
478 goto out3; 460 goto out3;
479 } 461 }
480 return 0; 462 return 0;
@@ -555,8 +537,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
555 inode->i_mode = 0; 537 inode->i_mode = 0;
556 538
557 if (ino == 0) { 539 if (ino == 0) {
558 printk(KERN_ERR "qnx6: bad inode number on dev %s: %u is " 540 pr_err("bad inode number on dev %s: %u is out of range\n",
559 "out of range\n",
560 sb->s_id, ino); 541 sb->s_id, ino);
561 iget_failed(inode); 542 iget_failed(inode);
562 return ERR_PTR(-EIO); 543 return ERR_PTR(-EIO);
@@ -566,8 +547,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
566 mapping = sbi->inodes->i_mapping; 547 mapping = sbi->inodes->i_mapping;
567 page = read_mapping_page(mapping, n, NULL); 548 page = read_mapping_page(mapping, n, NULL);
568 if (IS_ERR(page)) { 549 if (IS_ERR(page)) {
569 printk(KERN_ERR "qnx6: major problem: unable to read inode from " 550 pr_err("major problem: unable to read inode from dev %s\n",
570 "dev %s\n", sb->s_id); 551 sb->s_id);
571 iget_failed(inode); 552 iget_failed(inode);
572 return ERR_CAST(page); 553 return ERR_CAST(page);
573 } 554 }
@@ -689,7 +670,7 @@ static int __init init_qnx6_fs(void)
689 return err; 670 return err;
690 } 671 }
691 672
692 printk(KERN_INFO "QNX6 filesystem 1.0.0 registered.\n"); 673 pr_info("QNX6 filesystem 1.0.0 registered.\n");
693 return 0; 674 return 0;
694} 675}
695 676
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 0561326a94f5..6c1a323137dd 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,12 +29,12 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
29 foundinode = qnx6_iget(dir->i_sb, ino); 29 foundinode = qnx6_iget(dir->i_sb, ino);
30 qnx6_put_page(page); 30 qnx6_put_page(page);
31 if (IS_ERR(foundinode)) { 31 if (IS_ERR(foundinode)) {
32 QNX6DEBUG((KERN_ERR "qnx6: lookup->iget -> " 32 pr_debug("lookup->iget -> error %ld\n",
33 " error %ld\n", PTR_ERR(foundinode))); 33 PTR_ERR(foundinode));
34 return ERR_CAST(foundinode); 34 return ERR_CAST(foundinode);
35 } 35 }
36 } else { 36 } else {
37 QNX6DEBUG((KERN_INFO "qnx6_lookup: not found %s\n", name)); 37 pr_debug("%s(): not found %s\n", __func__, name);
38 return NULL; 38 return NULL;
39 } 39 }
40 d_add(dentry, foundinode); 40 d_add(dentry, foundinode);
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index b00fcc960d37..d3fb2b698800 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -10,6 +10,12 @@
10 * 10 *
11 */ 11 */
12 12
13#ifdef pr_fmt
14#undef pr_fmt
15#endif
16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
13#include <linux/fs.h> 19#include <linux/fs.h>
14#include <linux/pagemap.h> 20#include <linux/pagemap.h>
15 21
@@ -19,12 +25,6 @@ typedef __u64 __bitwise __fs64;
19 25
20#include <linux/qnx6_fs.h> 26#include <linux/qnx6_fs.h>
21 27
22#ifdef CONFIG_QNX6FS_DEBUG
23#define QNX6DEBUG(X) printk X
24#else
25#define QNX6DEBUG(X) (void) 0
26#endif
27
28struct qnx6_sb_info { 28struct qnx6_sb_info {
29 struct buffer_head *sb_buf; /* superblock buffer */ 29 struct buffer_head *sb_buf; /* superblock buffer */
30 struct qnx6_super_block *sb; /* our superblock */ 30 struct qnx6_super_block *sb; /* our superblock */
diff --git a/fs/qnx6/super_mmi.c b/fs/qnx6/super_mmi.c
index 29c32cba62d6..62aaf3e3126a 100644
--- a/fs/qnx6/super_mmi.c
+++ b/fs/qnx6/super_mmi.c
@@ -44,15 +44,14 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
44 start with the first superblock */ 44 start with the first superblock */
45 bh1 = sb_bread(s, 0); 45 bh1 = sb_bread(s, 0);
46 if (!bh1) { 46 if (!bh1) {
47 printk(KERN_ERR "qnx6: Unable to read first mmi superblock\n"); 47 pr_err("Unable to read first mmi superblock\n");
48 return NULL; 48 return NULL;
49 } 49 }
50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data; 50 sb1 = (struct qnx6_mmi_super_block *)bh1->b_data;
51 sbi = QNX6_SB(s); 51 sbi = QNX6_SB(s);
52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) { 52 if (fs32_to_cpu(sbi, sb1->sb_magic) != QNX6_SUPER_MAGIC) {
53 if (!silent) { 53 if (!silent) {
54 printk(KERN_ERR "qnx6: wrong signature (magic) in" 54 pr_err("wrong signature (magic) in superblock #1.\n");
55 " superblock #1.\n");
56 goto out; 55 goto out;
57 } 56 }
58 } 57 }
@@ -60,7 +59,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
60 /* checksum check - start at byte 8 and end at byte 512 */ 59 /* checksum check - start at byte 8 and end at byte 512 */
61 if (fs32_to_cpu(sbi, sb1->sb_checksum) != 60 if (fs32_to_cpu(sbi, sb1->sb_checksum) !=
62 crc32_be(0, (char *)(bh1->b_data + 8), 504)) { 61 crc32_be(0, (char *)(bh1->b_data + 8), 504)) {
63 printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); 62 pr_err("superblock #1 checksum error\n");
64 goto out; 63 goto out;
65 } 64 }
66 65
@@ -70,7 +69,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
70 69
71 /* set new blocksize */ 70 /* set new blocksize */
72 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) { 71 if (!sb_set_blocksize(s, fs32_to_cpu(sbi, sb1->sb_blocksize))) {
73 printk(KERN_ERR "qnx6: unable to set blocksize\n"); 72 pr_err("unable to set blocksize\n");
74 goto out; 73 goto out;
75 } 74 }
76 /* blocksize invalidates bh - pull it back in */ 75 /* blocksize invalidates bh - pull it back in */
@@ -83,27 +82,26 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
83 /* read second superblock */ 82 /* read second superblock */
84 bh2 = sb_bread(s, offset); 83 bh2 = sb_bread(s, offset);
85 if (!bh2) { 84 if (!bh2) {
86 printk(KERN_ERR "qnx6: unable to read the second superblock\n"); 85 pr_err("unable to read the second superblock\n");
87 goto out; 86 goto out;
88 } 87 }
89 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data; 88 sb2 = (struct qnx6_mmi_super_block *)bh2->b_data;
90 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) { 89 if (fs32_to_cpu(sbi, sb2->sb_magic) != QNX6_SUPER_MAGIC) {
91 if (!silent) 90 if (!silent)
92 printk(KERN_ERR "qnx6: wrong signature (magic) in" 91 pr_err("wrong signature (magic) in superblock #2.\n");
93 " superblock #2.\n");
94 goto out; 92 goto out;
95 } 93 }
96 94
97 /* checksum check - start at byte 8 and end at byte 512 */ 95 /* checksum check - start at byte 8 and end at byte 512 */
98 if (fs32_to_cpu(sbi, sb2->sb_checksum) 96 if (fs32_to_cpu(sbi, sb2->sb_checksum)
99 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) { 97 != crc32_be(0, (char *)(bh2->b_data + 8), 504)) {
100 printk(KERN_ERR "qnx6: superblock #1 checksum error\n"); 98 pr_err("superblock #1 checksum error\n");
101 goto out; 99 goto out;
102 } 100 }
103 101
104 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL); 102 qsb = kmalloc(sizeof(*qsb), GFP_KERNEL);
105 if (!qsb) { 103 if (!qsb) {
106 printk(KERN_ERR "qnx6: unable to allocate memory.\n"); 104 pr_err("unable to allocate memory.\n");
107 goto out; 105 goto out;
108 } 106 }
109 107
@@ -119,7 +117,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
119 sbi->sb_buf = bh1; 117 sbi->sb_buf = bh1;
120 sbi->sb = (struct qnx6_super_block *)bh1->b_data; 118 sbi->sb = (struct qnx6_super_block *)bh1->b_data;
121 brelse(bh2); 119 brelse(bh2);
122 printk(KERN_INFO "qnx6: superblock #1 active\n"); 120 pr_info("superblock #1 active\n");
123 } else { 121 } else {
124 /* superblock #2 active */ 122 /* superblock #2 active */
125 qnx6_mmi_copy_sb(qsb, sb2); 123 qnx6_mmi_copy_sb(qsb, sb2);
@@ -131,7 +129,7 @@ struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s, int silent)
131 sbi->sb_buf = bh2; 129 sbi->sb_buf = bh2;
132 sbi->sb = (struct qnx6_super_block *)bh2->b_data; 130 sbi->sb = (struct qnx6_super_block *)bh2->b_data;
133 brelse(bh1); 131 brelse(bh1);
134 printk(KERN_INFO "qnx6: superblock #2 active\n"); 132 pr_info("superblock #2 active\n");
135 } 133 }
136 kfree(qsb); 134 kfree(qsb);
137 135
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 7f30bdc57d13..f2d0eee9d1f1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -96,13 +96,16 @@
96 * Note that some things (eg. sb pointer, type, id) doesn't change during 96 * Note that some things (eg. sb pointer, type, id) doesn't change during
97 * the life of the dquot structure and so needn't to be protected by a lock 97 * the life of the dquot structure and so needn't to be protected by a lock
98 * 98 *
99 * Any operation working on dquots via inode pointers must hold dqptr_sem. If 99 * Operation accessing dquots via inode pointers are protected by dquot_srcu.
100 * operation is just reading pointers from inode (or not using them at all) the 100 * Operation of reading pointer needs srcu_read_lock(&dquot_srcu), and
101 * read lock is enough. If pointers are altered function must hold write lock. 101 * synchronize_srcu(&dquot_srcu) is called after clearing pointers from
102 * inode and before dropping dquot references to avoid use of dquots after
103 * they are freed. dq_data_lock is used to serialize the pointer setting and
104 * clearing operations.
102 * Special care needs to be taken about S_NOQUOTA inode flag (marking that 105 * Special care needs to be taken about S_NOQUOTA inode flag (marking that
103 * inode is a quota file). Functions adding pointers from inode to dquots have 106 * inode is a quota file). Functions adding pointers from inode to dquots have
104 * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they 107 * to check this flag under dq_data_lock and then (if S_NOQUOTA is not set) they
105 * have to do all pointer modifications before dropping dqptr_sem. This makes 108 * have to do all pointer modifications before dropping dq_data_lock. This makes
106 * sure they cannot race with quotaon which first sets S_NOQUOTA flag and 109 * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
107 * then drops all pointers to dquots from an inode. 110 * then drops all pointers to dquots from an inode.
108 * 111 *
@@ -116,21 +119,15 @@
116 * spinlock to internal buffers before writing. 119 * spinlock to internal buffers before writing.
117 * 120 *
118 * Lock ordering (including related VFS locks) is the following: 121 * Lock ordering (including related VFS locks) is the following:
119 * dqonoff_mutex > i_mutex > journal_lock > dqptr_sem > dquot->dq_lock > 122 * dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
120 * dqio_mutex
121 * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc. 123 * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
122 * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
123 * dqptr_sem. But filesystem has to count with the fact that functions such as
124 * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
125 * from inside a transaction to keep filesystem consistency after a crash. Also
126 * filesystems usually want to do some IO on dquot from ->mark_dirty which is
127 * called with dqptr_sem held.
128 */ 124 */
129 125
130static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock); 126static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
131static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); 127static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); 128__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
133EXPORT_SYMBOL(dq_data_lock); 129EXPORT_SYMBOL(dq_data_lock);
130DEFINE_STATIC_SRCU(dquot_srcu);
134 131
135void __quota_error(struct super_block *sb, const char *func, 132void __quota_error(struct super_block *sb, const char *func,
136 const char *fmt, ...) 133 const char *fmt, ...)
@@ -733,7 +730,6 @@ static struct shrinker dqcache_shrinker = {
733 730
734/* 731/*
735 * Put reference to dquot 732 * Put reference to dquot
736 * NOTE: If you change this function please check whether dqput_blocks() works right...
737 */ 733 */
738void dqput(struct dquot *dquot) 734void dqput(struct dquot *dquot)
739{ 735{
@@ -963,46 +959,33 @@ static void add_dquot_ref(struct super_block *sb, int type)
963} 959}
964 960
965/* 961/*
966 * Return 0 if dqput() won't block.
967 * (note that 1 doesn't necessarily mean blocking)
968 */
969static inline int dqput_blocks(struct dquot *dquot)
970{
971 if (atomic_read(&dquot->dq_count) <= 1)
972 return 1;
973 return 0;
974}
975
976/*
977 * Remove references to dquots from inode and add dquot to list for freeing 962 * Remove references to dquots from inode and add dquot to list for freeing
978 * if we have the last reference to dquot 963 * if we have the last reference to dquot
979 * We can't race with anybody because we hold dqptr_sem for writing...
980 */ 964 */
981static int remove_inode_dquot_ref(struct inode *inode, int type, 965static void remove_inode_dquot_ref(struct inode *inode, int type,
982 struct list_head *tofree_head) 966 struct list_head *tofree_head)
983{ 967{
984 struct dquot *dquot = inode->i_dquot[type]; 968 struct dquot *dquot = inode->i_dquot[type];
985 969
986 inode->i_dquot[type] = NULL; 970 inode->i_dquot[type] = NULL;
987 if (dquot) { 971 if (!dquot)
988 if (dqput_blocks(dquot)) { 972 return;
989#ifdef CONFIG_QUOTA_DEBUG 973
990 if (atomic_read(&dquot->dq_count) != 1) 974 if (list_empty(&dquot->dq_free)) {
991 quota_error(inode->i_sb, "Adding dquot with " 975 /*
992 "dq_count %d to dispose list", 976 * The inode still has reference to dquot so it can't be in the
993 atomic_read(&dquot->dq_count)); 977 * free list
994#endif 978 */
995 spin_lock(&dq_list_lock); 979 spin_lock(&dq_list_lock);
996 /* As dquot must have currently users it can't be on 980 list_add(&dquot->dq_free, tofree_head);
997 * the free list... */ 981 spin_unlock(&dq_list_lock);
998 list_add(&dquot->dq_free, tofree_head); 982 } else {
999 spin_unlock(&dq_list_lock); 983 /*
1000 return 1; 984 * Dquot is already in a list to put so we won't drop the last
1001 } 985 * reference here.
1002 else 986 */
1003 dqput(dquot); /* We have guaranteed we won't block */ 987 dqput(dquot);
1004 } 988 }
1005 return 0;
1006} 989}
1007 990
1008/* 991/*
@@ -1037,13 +1020,15 @@ static void remove_dquot_ref(struct super_block *sb, int type,
1037 * We have to scan also I_NEW inodes because they can already 1020 * We have to scan also I_NEW inodes because they can already
1038 * have quota pointer initialized. Luckily, we need to touch 1021 * have quota pointer initialized. Luckily, we need to touch
1039 * only quota pointers and these have separate locking 1022 * only quota pointers and these have separate locking
1040 * (dqptr_sem). 1023 * (dq_data_lock).
1041 */ 1024 */
1025 spin_lock(&dq_data_lock);
1042 if (!IS_NOQUOTA(inode)) { 1026 if (!IS_NOQUOTA(inode)) {
1043 if (unlikely(inode_get_rsv_space(inode) > 0)) 1027 if (unlikely(inode_get_rsv_space(inode) > 0))
1044 reserved = 1; 1028 reserved = 1;
1045 remove_inode_dquot_ref(inode, type, tofree_head); 1029 remove_inode_dquot_ref(inode, type, tofree_head);
1046 } 1030 }
1031 spin_unlock(&dq_data_lock);
1047 } 1032 }
1048 spin_unlock(&inode_sb_list_lock); 1033 spin_unlock(&inode_sb_list_lock);
1049#ifdef CONFIG_QUOTA_DEBUG 1034#ifdef CONFIG_QUOTA_DEBUG
@@ -1061,9 +1046,8 @@ static void drop_dquot_ref(struct super_block *sb, int type)
1061 LIST_HEAD(tofree_head); 1046 LIST_HEAD(tofree_head);
1062 1047
1063 if (sb->dq_op) { 1048 if (sb->dq_op) {
1064 down_write(&sb_dqopt(sb)->dqptr_sem);
1065 remove_dquot_ref(sb, type, &tofree_head); 1049 remove_dquot_ref(sb, type, &tofree_head);
1066 up_write(&sb_dqopt(sb)->dqptr_sem); 1050 synchronize_srcu(&dquot_srcu);
1067 put_dquot_list(&tofree_head); 1051 put_dquot_list(&tofree_head);
1068 } 1052 }
1069} 1053}
@@ -1394,21 +1378,16 @@ static int dquot_active(const struct inode *inode)
1394/* 1378/*
1395 * Initialize quota pointers in inode 1379 * Initialize quota pointers in inode
1396 * 1380 *
1397 * We do things in a bit complicated way but by that we avoid calling
1398 * dqget() and thus filesystem callbacks under dqptr_sem.
1399 *
1400 * It is better to call this function outside of any transaction as it 1381 * It is better to call this function outside of any transaction as it
1401 * might need a lot of space in journal for dquot structure allocation. 1382 * might need a lot of space in journal for dquot structure allocation.
1402 */ 1383 */
1403static void __dquot_initialize(struct inode *inode, int type) 1384static void __dquot_initialize(struct inode *inode, int type)
1404{ 1385{
1405 int cnt; 1386 int cnt, init_needed = 0;
1406 struct dquot *got[MAXQUOTAS]; 1387 struct dquot *got[MAXQUOTAS];
1407 struct super_block *sb = inode->i_sb; 1388 struct super_block *sb = inode->i_sb;
1408 qsize_t rsv; 1389 qsize_t rsv;
1409 1390
1410 /* First test before acquiring mutex - solves deadlocks when we
1411 * re-enter the quota code and are already holding the mutex */
1412 if (!dquot_active(inode)) 1391 if (!dquot_active(inode))
1413 return; 1392 return;
1414 1393
@@ -1418,6 +1397,15 @@ static void __dquot_initialize(struct inode *inode, int type)
1418 got[cnt] = NULL; 1397 got[cnt] = NULL;
1419 if (type != -1 && cnt != type) 1398 if (type != -1 && cnt != type)
1420 continue; 1399 continue;
1400 /*
1401 * The i_dquot should have been initialized in most cases,
1402 * we check it without locking here to avoid unnecessary
1403 * dqget()/dqput() calls.
1404 */
1405 if (inode->i_dquot[cnt])
1406 continue;
1407 init_needed = 1;
1408
1421 switch (cnt) { 1409 switch (cnt) {
1422 case USRQUOTA: 1410 case USRQUOTA:
1423 qid = make_kqid_uid(inode->i_uid); 1411 qid = make_kqid_uid(inode->i_uid);
@@ -1429,7 +1417,11 @@ static void __dquot_initialize(struct inode *inode, int type)
1429 got[cnt] = dqget(sb, qid); 1417 got[cnt] = dqget(sb, qid);
1430 } 1418 }
1431 1419
1432 down_write(&sb_dqopt(sb)->dqptr_sem); 1420 /* All required i_dquot has been initialized */
1421 if (!init_needed)
1422 return;
1423
1424 spin_lock(&dq_data_lock);
1433 if (IS_NOQUOTA(inode)) 1425 if (IS_NOQUOTA(inode))
1434 goto out_err; 1426 goto out_err;
1435 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1427 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1449,15 +1441,12 @@ static void __dquot_initialize(struct inode *inode, int type)
1449 * did a write before quota was turned on 1441 * did a write before quota was turned on
1450 */ 1442 */
1451 rsv = inode_get_rsv_space(inode); 1443 rsv = inode_get_rsv_space(inode);
1452 if (unlikely(rsv)) { 1444 if (unlikely(rsv))
1453 spin_lock(&dq_data_lock);
1454 dquot_resv_space(inode->i_dquot[cnt], rsv); 1445 dquot_resv_space(inode->i_dquot[cnt], rsv);
1455 spin_unlock(&dq_data_lock);
1456 }
1457 } 1446 }
1458 } 1447 }
1459out_err: 1448out_err:
1460 up_write(&sb_dqopt(sb)->dqptr_sem); 1449 spin_unlock(&dq_data_lock);
1461 /* Drop unused references */ 1450 /* Drop unused references */
1462 dqput_all(got); 1451 dqput_all(got);
1463} 1452}
@@ -1469,19 +1458,24 @@ void dquot_initialize(struct inode *inode)
1469EXPORT_SYMBOL(dquot_initialize); 1458EXPORT_SYMBOL(dquot_initialize);
1470 1459
1471/* 1460/*
1472 * Release all quotas referenced by inode 1461 * Release all quotas referenced by inode.
1462 *
1463 * This function only be called on inode free or converting
1464 * a file to quota file, no other users for the i_dquot in
1465 * both cases, so we needn't call synchronize_srcu() after
1466 * clearing i_dquot.
1473 */ 1467 */
1474static void __dquot_drop(struct inode *inode) 1468static void __dquot_drop(struct inode *inode)
1475{ 1469{
1476 int cnt; 1470 int cnt;
1477 struct dquot *put[MAXQUOTAS]; 1471 struct dquot *put[MAXQUOTAS];
1478 1472
1479 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1473 spin_lock(&dq_data_lock);
1480 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1474 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1481 put[cnt] = inode->i_dquot[cnt]; 1475 put[cnt] = inode->i_dquot[cnt];
1482 inode->i_dquot[cnt] = NULL; 1476 inode->i_dquot[cnt] = NULL;
1483 } 1477 }
1484 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1478 spin_unlock(&dq_data_lock);
1485 dqput_all(put); 1479 dqput_all(put);
1486} 1480}
1487 1481
@@ -1599,15 +1593,11 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
1599 */ 1593 */
1600int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) 1594int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1601{ 1595{
1602 int cnt, ret = 0; 1596 int cnt, ret = 0, index;
1603 struct dquot_warn warn[MAXQUOTAS]; 1597 struct dquot_warn warn[MAXQUOTAS];
1604 struct dquot **dquots = inode->i_dquot; 1598 struct dquot **dquots = inode->i_dquot;
1605 int reserve = flags & DQUOT_SPACE_RESERVE; 1599 int reserve = flags & DQUOT_SPACE_RESERVE;
1606 1600
1607 /*
1608 * First test before acquiring mutex - solves deadlocks when we
1609 * re-enter the quota code and are already holding the mutex
1610 */
1611 if (!dquot_active(inode)) { 1601 if (!dquot_active(inode)) {
1612 inode_incr_space(inode, number, reserve); 1602 inode_incr_space(inode, number, reserve);
1613 goto out; 1603 goto out;
@@ -1616,7 +1606,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1616 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1606 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1617 warn[cnt].w_type = QUOTA_NL_NOWARN; 1607 warn[cnt].w_type = QUOTA_NL_NOWARN;
1618 1608
1619 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1609 index = srcu_read_lock(&dquot_srcu);
1620 spin_lock(&dq_data_lock); 1610 spin_lock(&dq_data_lock);
1621 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1611 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1622 if (!dquots[cnt]) 1612 if (!dquots[cnt])
@@ -1643,7 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1643 goto out_flush_warn; 1633 goto out_flush_warn;
1644 mark_all_dquot_dirty(dquots); 1634 mark_all_dquot_dirty(dquots);
1645out_flush_warn: 1635out_flush_warn:
1646 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1636 srcu_read_unlock(&dquot_srcu, index);
1647 flush_warnings(warn); 1637 flush_warnings(warn);
1648out: 1638out:
1649 return ret; 1639 return ret;
@@ -1655,17 +1645,16 @@ EXPORT_SYMBOL(__dquot_alloc_space);
1655 */ 1645 */
1656int dquot_alloc_inode(const struct inode *inode) 1646int dquot_alloc_inode(const struct inode *inode)
1657{ 1647{
1658 int cnt, ret = 0; 1648 int cnt, ret = 0, index;
1659 struct dquot_warn warn[MAXQUOTAS]; 1649 struct dquot_warn warn[MAXQUOTAS];
1660 struct dquot * const *dquots = inode->i_dquot; 1650 struct dquot * const *dquots = inode->i_dquot;
1661 1651
1662 /* First test before acquiring mutex - solves deadlocks when we
1663 * re-enter the quota code and are already holding the mutex */
1664 if (!dquot_active(inode)) 1652 if (!dquot_active(inode))
1665 return 0; 1653 return 0;
1666 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1654 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1667 warn[cnt].w_type = QUOTA_NL_NOWARN; 1655 warn[cnt].w_type = QUOTA_NL_NOWARN;
1668 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1656
1657 index = srcu_read_lock(&dquot_srcu);
1669 spin_lock(&dq_data_lock); 1658 spin_lock(&dq_data_lock);
1670 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1659 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1671 if (!dquots[cnt]) 1660 if (!dquots[cnt])
@@ -1685,7 +1674,7 @@ warn_put_all:
1685 spin_unlock(&dq_data_lock); 1674 spin_unlock(&dq_data_lock);
1686 if (ret == 0) 1675 if (ret == 0)
1687 mark_all_dquot_dirty(dquots); 1676 mark_all_dquot_dirty(dquots);
1688 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1677 srcu_read_unlock(&dquot_srcu, index);
1689 flush_warnings(warn); 1678 flush_warnings(warn);
1690 return ret; 1679 return ret;
1691} 1680}
@@ -1696,14 +1685,14 @@ EXPORT_SYMBOL(dquot_alloc_inode);
1696 */ 1685 */
1697int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) 1686int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1698{ 1687{
1699 int cnt; 1688 int cnt, index;
1700 1689
1701 if (!dquot_active(inode)) { 1690 if (!dquot_active(inode)) {
1702 inode_claim_rsv_space(inode, number); 1691 inode_claim_rsv_space(inode, number);
1703 return 0; 1692 return 0;
1704 } 1693 }
1705 1694
1706 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1695 index = srcu_read_lock(&dquot_srcu);
1707 spin_lock(&dq_data_lock); 1696 spin_lock(&dq_data_lock);
1708 /* Claim reserved quotas to allocated quotas */ 1697 /* Claim reserved quotas to allocated quotas */
1709 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1698 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1715,7 +1704,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1715 inode_claim_rsv_space(inode, number); 1704 inode_claim_rsv_space(inode, number);
1716 spin_unlock(&dq_data_lock); 1705 spin_unlock(&dq_data_lock);
1717 mark_all_dquot_dirty(inode->i_dquot); 1706 mark_all_dquot_dirty(inode->i_dquot);
1718 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1707 srcu_read_unlock(&dquot_srcu, index);
1719 return 0; 1708 return 0;
1720} 1709}
1721EXPORT_SYMBOL(dquot_claim_space_nodirty); 1710EXPORT_SYMBOL(dquot_claim_space_nodirty);
@@ -1725,14 +1714,14 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
1725 */ 1714 */
1726void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) 1715void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1727{ 1716{
1728 int cnt; 1717 int cnt, index;
1729 1718
1730 if (!dquot_active(inode)) { 1719 if (!dquot_active(inode)) {
1731 inode_reclaim_rsv_space(inode, number); 1720 inode_reclaim_rsv_space(inode, number);
1732 return; 1721 return;
1733 } 1722 }
1734 1723
1735 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1724 index = srcu_read_lock(&dquot_srcu);
1736 spin_lock(&dq_data_lock); 1725 spin_lock(&dq_data_lock);
1737 /* Claim reserved quotas to allocated quotas */ 1726 /* Claim reserved quotas to allocated quotas */
1738 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1727 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1744,7 +1733,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1744 inode_reclaim_rsv_space(inode, number); 1733 inode_reclaim_rsv_space(inode, number);
1745 spin_unlock(&dq_data_lock); 1734 spin_unlock(&dq_data_lock);
1746 mark_all_dquot_dirty(inode->i_dquot); 1735 mark_all_dquot_dirty(inode->i_dquot);
1747 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1736 srcu_read_unlock(&dquot_srcu, index);
1748 return; 1737 return;
1749} 1738}
1750EXPORT_SYMBOL(dquot_reclaim_space_nodirty); 1739EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
@@ -1757,16 +1746,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1757 unsigned int cnt; 1746 unsigned int cnt;
1758 struct dquot_warn warn[MAXQUOTAS]; 1747 struct dquot_warn warn[MAXQUOTAS];
1759 struct dquot **dquots = inode->i_dquot; 1748 struct dquot **dquots = inode->i_dquot;
1760 int reserve = flags & DQUOT_SPACE_RESERVE; 1749 int reserve = flags & DQUOT_SPACE_RESERVE, index;
1761 1750
1762 /* First test before acquiring mutex - solves deadlocks when we
1763 * re-enter the quota code and are already holding the mutex */
1764 if (!dquot_active(inode)) { 1751 if (!dquot_active(inode)) {
1765 inode_decr_space(inode, number, reserve); 1752 inode_decr_space(inode, number, reserve);
1766 return; 1753 return;
1767 } 1754 }
1768 1755
1769 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1756 index = srcu_read_lock(&dquot_srcu);
1770 spin_lock(&dq_data_lock); 1757 spin_lock(&dq_data_lock);
1771 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1758 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1772 int wtype; 1759 int wtype;
@@ -1789,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1789 goto out_unlock; 1776 goto out_unlock;
1790 mark_all_dquot_dirty(dquots); 1777 mark_all_dquot_dirty(dquots);
1791out_unlock: 1778out_unlock:
1792 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1779 srcu_read_unlock(&dquot_srcu, index);
1793 flush_warnings(warn); 1780 flush_warnings(warn);
1794} 1781}
1795EXPORT_SYMBOL(__dquot_free_space); 1782EXPORT_SYMBOL(__dquot_free_space);
@@ -1802,13 +1789,12 @@ void dquot_free_inode(const struct inode *inode)
1802 unsigned int cnt; 1789 unsigned int cnt;
1803 struct dquot_warn warn[MAXQUOTAS]; 1790 struct dquot_warn warn[MAXQUOTAS];
1804 struct dquot * const *dquots = inode->i_dquot; 1791 struct dquot * const *dquots = inode->i_dquot;
1792 int index;
1805 1793
1806 /* First test before acquiring mutex - solves deadlocks when we
1807 * re-enter the quota code and are already holding the mutex */
1808 if (!dquot_active(inode)) 1794 if (!dquot_active(inode))
1809 return; 1795 return;
1810 1796
1811 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1797 index = srcu_read_lock(&dquot_srcu);
1812 spin_lock(&dq_data_lock); 1798 spin_lock(&dq_data_lock);
1813 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1799 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1814 int wtype; 1800 int wtype;
@@ -1823,7 +1809,7 @@ void dquot_free_inode(const struct inode *inode)
1823 } 1809 }
1824 spin_unlock(&dq_data_lock); 1810 spin_unlock(&dq_data_lock);
1825 mark_all_dquot_dirty(dquots); 1811 mark_all_dquot_dirty(dquots);
1826 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1812 srcu_read_unlock(&dquot_srcu, index);
1827 flush_warnings(warn); 1813 flush_warnings(warn);
1828} 1814}
1829EXPORT_SYMBOL(dquot_free_inode); 1815EXPORT_SYMBOL(dquot_free_inode);
@@ -1837,6 +1823,8 @@ EXPORT_SYMBOL(dquot_free_inode);
1837 * This operation can block, but only after everything is updated 1823 * This operation can block, but only after everything is updated
1838 * A transaction must be started when entering this function. 1824 * A transaction must be started when entering this function.
1839 * 1825 *
1826 * We are holding reference on transfer_from & transfer_to, no need to
1827 * protect them by srcu_read_lock().
1840 */ 1828 */
1841int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) 1829int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1842{ 1830{
@@ -1849,8 +1837,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1849 struct dquot_warn warn_from_inodes[MAXQUOTAS]; 1837 struct dquot_warn warn_from_inodes[MAXQUOTAS];
1850 struct dquot_warn warn_from_space[MAXQUOTAS]; 1838 struct dquot_warn warn_from_space[MAXQUOTAS];
1851 1839
1852 /* First test before acquiring mutex - solves deadlocks when we
1853 * re-enter the quota code and are already holding the mutex */
1854 if (IS_NOQUOTA(inode)) 1840 if (IS_NOQUOTA(inode))
1855 return 0; 1841 return 0;
1856 /* Initialize the arrays */ 1842 /* Initialize the arrays */
@@ -1859,12 +1845,12 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1859 warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN; 1845 warn_from_inodes[cnt].w_type = QUOTA_NL_NOWARN;
1860 warn_from_space[cnt].w_type = QUOTA_NL_NOWARN; 1846 warn_from_space[cnt].w_type = QUOTA_NL_NOWARN;
1861 } 1847 }
1862 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1848
1849 spin_lock(&dq_data_lock);
1863 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ 1850 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
1864 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1851 spin_unlock(&dq_data_lock);
1865 return 0; 1852 return 0;
1866 } 1853 }
1867 spin_lock(&dq_data_lock);
1868 cur_space = inode_get_bytes(inode); 1854 cur_space = inode_get_bytes(inode);
1869 rsv_space = inode_get_rsv_space(inode); 1855 rsv_space = inode_get_rsv_space(inode);
1870 space = cur_space + rsv_space; 1856 space = cur_space + rsv_space;
@@ -1918,7 +1904,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1918 inode->i_dquot[cnt] = transfer_to[cnt]; 1904 inode->i_dquot[cnt] = transfer_to[cnt];
1919 } 1905 }
1920 spin_unlock(&dq_data_lock); 1906 spin_unlock(&dq_data_lock);
1921 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1922 1907
1923 mark_all_dquot_dirty(transfer_from); 1908 mark_all_dquot_dirty(transfer_from);
1924 mark_all_dquot_dirty(transfer_to); 1909 mark_all_dquot_dirty(transfer_to);
@@ -1932,7 +1917,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1932 return 0; 1917 return 0;
1933over_quota: 1918over_quota:
1934 spin_unlock(&dq_data_lock); 1919 spin_unlock(&dq_data_lock);
1935 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1936 flush_warnings(warn_to); 1920 flush_warnings(warn_to);
1937 return ret; 1921 return ret;
1938} 1922}
diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c
index 2f97b0e2c501..ebc5e6285800 100644
--- a/fs/quota/kqid.c
+++ b/fs/quota/kqid.c
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(qid_lt);
55/** 55/**
56 * from_kqid - Create a qid from a kqid user-namespace pair. 56 * from_kqid - Create a qid from a kqid user-namespace pair.
57 * @targ: The user namespace we want a qid in. 57 * @targ: The user namespace we want a qid in.
58 * @kuid: The kernel internal quota identifier to start with. 58 * @kqid: The kernel internal quota identifier to start with.
59 * 59 *
60 * Map @kqid into the user-namespace specified by @targ and 60 * Map @kqid into the user-namespace specified by @targ and
61 * return the resulting qid. 61 * return the resulting qid.
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 72d29177998e..bb2869f5dfd8 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -32,8 +32,7 @@ static struct genl_family quota_genl_family = {
32 32
33/** 33/**
34 * quota_send_warning - Send warning to userspace about exceeded quota 34 * quota_send_warning - Send warning to userspace about exceeded quota
35 * @type: The quota type: USRQQUOTA, GRPQUOTA,... 35 * @qid: The kernel internal quota identifier.
36 * @id: The user or group id of the quota that was exceeded
37 * @dev: The device on which the fs is mounted (sb->s_dev) 36 * @dev: The device on which the fs is mounted (sb->s_dev)
38 * @warntype: The type of the warning: QUOTA_NL_... 37 * @warntype: The type of the warning: QUOTA_NL_...
39 * 38 *
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ff3f0b3cfdb3..75621649dbd7 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,13 +79,13 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
79{ 79{
80 __u32 fmt; 80 __u32 fmt;
81 81
82 down_read(&sb_dqopt(sb)->dqptr_sem); 82 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
83 if (!sb_has_quota_active(sb, type)) { 83 if (!sb_has_quota_active(sb, type)) {
84 up_read(&sb_dqopt(sb)->dqptr_sem); 84 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
85 return -ESRCH; 85 return -ESRCH;
86 } 86 }
87 fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; 87 fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
88 up_read(&sb_dqopt(sb)->dqptr_sem); 88 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
89 if (copy_to_user(addr, &fmt, sizeof(fmt))) 89 if (copy_to_user(addr, &fmt, sizeof(fmt)))
90 return -EFAULT; 90 return -EFAULT;
91 return 0; 91 return 0;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index dda012ad4208..bbafbde3471a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -222,7 +222,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
222 222
223 /* gang-find the pages */ 223 /* gang-find the pages */
224 ret = -ENOMEM; 224 ret = -ENOMEM;
225 pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); 225 pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
226 if (!pages) 226 if (!pages)
227 goto out_free; 227 goto out_free;
228 228
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d9f5a60dd59b..0a7dc941aaf4 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -9,7 +9,7 @@
9#include <linux/stat.h> 9#include <linux/stat.h>
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <asm/uaccess.h> 12#include <linux/uaccess.h>
13 13
14extern const struct reiserfs_key MIN_KEY; 14extern const struct reiserfs_key MIN_KEY;
15 15
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 54fdf196bfb2..9c02d96d3a42 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -10,7 +10,7 @@
10 * and using buffers obtained after all above. 10 * and using buffers obtained after all above.
11 */ 11 */
12 12
13#include <asm/uaccess.h> 13#include <linux/uaccess.h>
14#include <linux/time.h> 14#include <linux/time.h>
15#include "reiserfs.h" 15#include "reiserfs.h"
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
@@ -286,12 +286,14 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
286 return 0; 286 return 0;
287} 287}
288 288
289static void balance_leaf_insert_left(struct tree_balance *tb, 289static unsigned int balance_leaf_insert_left(struct tree_balance *tb,
290 struct item_head *ih, const char *body) 290 struct item_head *const ih,
291 const char * const body)
291{ 292{
292 int ret; 293 int ret;
293 struct buffer_info bi; 294 struct buffer_info bi;
294 int n = B_NR_ITEMS(tb->L[0]); 295 int n = B_NR_ITEMS(tb->L[0]);
296 unsigned body_shift_bytes = 0;
295 297
296 if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { 298 if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
297 /* part of new item falls into L[0] */ 299 /* part of new item falls into L[0] */
@@ -329,7 +331,7 @@ static void balance_leaf_insert_left(struct tree_balance *tb,
329 331
330 put_ih_item_len(ih, new_item_len); 332 put_ih_item_len(ih, new_item_len);
331 if (tb->lbytes > tb->zeroes_num) { 333 if (tb->lbytes > tb->zeroes_num) {
332 body += (tb->lbytes - tb->zeroes_num); 334 body_shift_bytes = tb->lbytes - tb->zeroes_num;
333 tb->zeroes_num = 0; 335 tb->zeroes_num = 0;
334 } else 336 } else
335 tb->zeroes_num -= tb->lbytes; 337 tb->zeroes_num -= tb->lbytes;
@@ -349,11 +351,12 @@ static void balance_leaf_insert_left(struct tree_balance *tb,
349 tb->insert_size[0] = 0; 351 tb->insert_size[0] = 0;
350 tb->zeroes_num = 0; 352 tb->zeroes_num = 0;
351 } 353 }
354 return body_shift_bytes;
352} 355}
353 356
354static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb, 357static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb,
355 struct item_head *ih, 358 struct item_head * const ih,
356 const char *body) 359 const char * const body)
357{ 360{
358 int n = B_NR_ITEMS(tb->L[0]); 361 int n = B_NR_ITEMS(tb->L[0]);
359 struct buffer_info bi; 362 struct buffer_info bi;
@@ -413,17 +416,18 @@ static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb,
413 tb->pos_in_item -= tb->lbytes; 416 tb->pos_in_item -= tb->lbytes;
414} 417}
415 418
416static void balance_leaf_paste_left_shift(struct tree_balance *tb, 419static unsigned int balance_leaf_paste_left_shift(struct tree_balance *tb,
417 struct item_head *ih, 420 struct item_head * const ih,
418 const char *body) 421 const char * const body)
419{ 422{
420 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 423 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
421 int n = B_NR_ITEMS(tb->L[0]); 424 int n = B_NR_ITEMS(tb->L[0]);
422 struct buffer_info bi; 425 struct buffer_info bi;
426 int body_shift_bytes = 0;
423 427
424 if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) { 428 if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
425 balance_leaf_paste_left_shift_dirent(tb, ih, body); 429 balance_leaf_paste_left_shift_dirent(tb, ih, body);
426 return; 430 return 0;
427 } 431 }
428 432
429 RFALSE(tb->lbytes <= 0, 433 RFALSE(tb->lbytes <= 0,
@@ -497,7 +501,7 @@ static void balance_leaf_paste_left_shift(struct tree_balance *tb,
497 * insert_size[0] 501 * insert_size[0]
498 */ 502 */
499 if (l_n > tb->zeroes_num) { 503 if (l_n > tb->zeroes_num) {
500 body += (l_n - tb->zeroes_num); 504 body_shift_bytes = l_n - tb->zeroes_num;
501 tb->zeroes_num = 0; 505 tb->zeroes_num = 0;
502 } else 506 } else
503 tb->zeroes_num -= l_n; 507 tb->zeroes_num -= l_n;
@@ -526,13 +530,14 @@ static void balance_leaf_paste_left_shift(struct tree_balance *tb,
526 */ 530 */
527 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 531 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
528 } 532 }
533 return body_shift_bytes;
529} 534}
530 535
531 536
532/* appended item will be in L[0] in whole */ 537/* appended item will be in L[0] in whole */
533static void balance_leaf_paste_left_whole(struct tree_balance *tb, 538static void balance_leaf_paste_left_whole(struct tree_balance *tb,
534 struct item_head *ih, 539 struct item_head * const ih,
535 const char *body) 540 const char * const body)
536{ 541{
537 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 542 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
538 int n = B_NR_ITEMS(tb->L[0]); 543 int n = B_NR_ITEMS(tb->L[0]);
@@ -584,39 +589,44 @@ static void balance_leaf_paste_left_whole(struct tree_balance *tb,
584 tb->zeroes_num = 0; 589 tb->zeroes_num = 0;
585} 590}
586 591
587static void balance_leaf_paste_left(struct tree_balance *tb, 592static unsigned int balance_leaf_paste_left(struct tree_balance *tb,
588 struct item_head *ih, const char *body) 593 struct item_head * const ih,
594 const char * const body)
589{ 595{
590 /* we must shift the part of the appended item */ 596 /* we must shift the part of the appended item */
591 if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) 597 if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1)
592 balance_leaf_paste_left_shift(tb, ih, body); 598 return balance_leaf_paste_left_shift(tb, ih, body);
593 else 599 else
594 balance_leaf_paste_left_whole(tb, ih, body); 600 balance_leaf_paste_left_whole(tb, ih, body);
601 return 0;
595} 602}
596 603
597/* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 604/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
598static void balance_leaf_left(struct tree_balance *tb, struct item_head *ih, 605static unsigned int balance_leaf_left(struct tree_balance *tb,
599 const char *body, int flag) 606 struct item_head * const ih,
607 const char * const body, int flag)
600{ 608{
601 if (tb->lnum[0] <= 0) 609 if (tb->lnum[0] <= 0)
602 return; 610 return 0;
603 611
604 /* new item or it part falls to L[0], shift it too */ 612 /* new item or it part falls to L[0], shift it too */
605 if (tb->item_pos < tb->lnum[0]) { 613 if (tb->item_pos < tb->lnum[0]) {
606 BUG_ON(flag != M_INSERT && flag != M_PASTE); 614 BUG_ON(flag != M_INSERT && flag != M_PASTE);
607 615
608 if (flag == M_INSERT) 616 if (flag == M_INSERT)
609 balance_leaf_insert_left(tb, ih, body); 617 return balance_leaf_insert_left(tb, ih, body);
610 else /* M_PASTE */ 618 else /* M_PASTE */
611 balance_leaf_paste_left(tb, ih, body); 619 return balance_leaf_paste_left(tb, ih, body);
612 } else 620 } else
613 /* new item doesn't fall into L[0] */ 621 /* new item doesn't fall into L[0] */
614 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 622 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
623 return 0;
615} 624}
616 625
617 626
618static void balance_leaf_insert_right(struct tree_balance *tb, 627static void balance_leaf_insert_right(struct tree_balance *tb,
619 struct item_head *ih, const char *body) 628 struct item_head * const ih,
629 const char * const body)
620{ 630{
621 631
622 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 632 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
@@ -704,7 +714,8 @@ static void balance_leaf_insert_right(struct tree_balance *tb,
704 714
705 715
706static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb, 716static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb,
707 struct item_head *ih, const char *body) 717 struct item_head * const ih,
718 const char * const body)
708{ 719{
709 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 720 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
710 struct buffer_info bi; 721 struct buffer_info bi;
@@ -754,7 +765,8 @@ static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb,
754} 765}
755 766
756static void balance_leaf_paste_right_shift(struct tree_balance *tb, 767static void balance_leaf_paste_right_shift(struct tree_balance *tb,
757 struct item_head *ih, const char *body) 768 struct item_head * const ih,
769 const char * const body)
758{ 770{
759 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 771 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
760 int n_shift, n_rem, r_zeroes_number, version; 772 int n_shift, n_rem, r_zeroes_number, version;
@@ -831,7 +843,8 @@ static void balance_leaf_paste_right_shift(struct tree_balance *tb,
831} 843}
832 844
833static void balance_leaf_paste_right_whole(struct tree_balance *tb, 845static void balance_leaf_paste_right_whole(struct tree_balance *tb,
834 struct item_head *ih, const char *body) 846 struct item_head * const ih,
847 const char * const body)
835{ 848{
836 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 849 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
837 int n = B_NR_ITEMS(tbS0); 850 int n = B_NR_ITEMS(tbS0);
@@ -874,7 +887,8 @@ static void balance_leaf_paste_right_whole(struct tree_balance *tb,
874} 887}
875 888
876static void balance_leaf_paste_right(struct tree_balance *tb, 889static void balance_leaf_paste_right(struct tree_balance *tb,
877 struct item_head *ih, const char *body) 890 struct item_head * const ih,
891 const char * const body)
878{ 892{
879 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 893 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
880 int n = B_NR_ITEMS(tbS0); 894 int n = B_NR_ITEMS(tbS0);
@@ -896,8 +910,9 @@ static void balance_leaf_paste_right(struct tree_balance *tb,
896} 910}
897 911
898/* shift rnum[0] items from S[0] to the right neighbor R[0] */ 912/* shift rnum[0] items from S[0] to the right neighbor R[0] */
899static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih, 913static void balance_leaf_right(struct tree_balance *tb,
900 const char *body, int flag) 914 struct item_head * const ih,
915 const char * const body, int flag)
901{ 916{
902 if (tb->rnum[0] <= 0) 917 if (tb->rnum[0] <= 0)
903 return; 918 return;
@@ -911,8 +926,8 @@ static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih,
911} 926}
912 927
913static void balance_leaf_new_nodes_insert(struct tree_balance *tb, 928static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
914 struct item_head *ih, 929 struct item_head * const ih,
915 const char *body, 930 const char * const body,
916 struct item_head *insert_key, 931 struct item_head *insert_key,
917 struct buffer_head **insert_ptr, 932 struct buffer_head **insert_ptr,
918 int i) 933 int i)
@@ -1003,8 +1018,8 @@ static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
1003 1018
1004/* we append to directory item */ 1019/* we append to directory item */
1005static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb, 1020static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb,
1006 struct item_head *ih, 1021 struct item_head * const ih,
1007 const char *body, 1022 const char * const body,
1008 struct item_head *insert_key, 1023 struct item_head *insert_key,
1009 struct buffer_head **insert_ptr, 1024 struct buffer_head **insert_ptr,
1010 int i) 1025 int i)
@@ -1058,8 +1073,8 @@ static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb,
1058} 1073}
1059 1074
1060static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb, 1075static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb,
1061 struct item_head *ih, 1076 struct item_head * const ih,
1062 const char *body, 1077 const char * const body,
1063 struct item_head *insert_key, 1078 struct item_head *insert_key,
1064 struct buffer_head **insert_ptr, 1079 struct buffer_head **insert_ptr,
1065 int i) 1080 int i)
@@ -1131,8 +1146,8 @@ static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb,
1131} 1146}
1132 1147
1133static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb, 1148static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb,
1134 struct item_head *ih, 1149 struct item_head * const ih,
1135 const char *body, 1150 const char * const body,
1136 struct item_head *insert_key, 1151 struct item_head *insert_key,
1137 struct buffer_head **insert_ptr, 1152 struct buffer_head **insert_ptr,
1138 int i) 1153 int i)
@@ -1184,8 +1199,8 @@ static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb,
1184 1199
1185} 1200}
1186static void balance_leaf_new_nodes_paste(struct tree_balance *tb, 1201static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
1187 struct item_head *ih, 1202 struct item_head * const ih,
1188 const char *body, 1203 const char * const body,
1189 struct item_head *insert_key, 1204 struct item_head *insert_key,
1190 struct buffer_head **insert_ptr, 1205 struct buffer_head **insert_ptr,
1191 int i) 1206 int i)
@@ -1214,8 +1229,8 @@ static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
1214 1229
1215/* Fill new nodes that appear in place of S[0] */ 1230/* Fill new nodes that appear in place of S[0] */
1216static void balance_leaf_new_nodes(struct tree_balance *tb, 1231static void balance_leaf_new_nodes(struct tree_balance *tb,
1217 struct item_head *ih, 1232 struct item_head * const ih,
1218 const char *body, 1233 const char * const body,
1219 struct item_head *insert_key, 1234 struct item_head *insert_key,
1220 struct buffer_head **insert_ptr, 1235 struct buffer_head **insert_ptr,
1221 int flag) 1236 int flag)
@@ -1254,8 +1269,8 @@ static void balance_leaf_new_nodes(struct tree_balance *tb,
1254} 1269}
1255 1270
1256static void balance_leaf_finish_node_insert(struct tree_balance *tb, 1271static void balance_leaf_finish_node_insert(struct tree_balance *tb,
1257 struct item_head *ih, 1272 struct item_head * const ih,
1258 const char *body) 1273 const char * const body)
1259{ 1274{
1260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 1275 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1261 struct buffer_info bi; 1276 struct buffer_info bi;
@@ -1271,8 +1286,8 @@ static void balance_leaf_finish_node_insert(struct tree_balance *tb,
1271} 1286}
1272 1287
1273static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb, 1288static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb,
1274 struct item_head *ih, 1289 struct item_head * const ih,
1275 const char *body) 1290 const char * const body)
1276{ 1291{
1277 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 1292 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1278 struct item_head *pasted = item_head(tbS0, tb->item_pos); 1293 struct item_head *pasted = item_head(tbS0, tb->item_pos);
@@ -1305,8 +1320,8 @@ static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb,
1305} 1320}
1306 1321
1307static void balance_leaf_finish_node_paste(struct tree_balance *tb, 1322static void balance_leaf_finish_node_paste(struct tree_balance *tb,
1308 struct item_head *ih, 1323 struct item_head * const ih,
1309 const char *body) 1324 const char * const body)
1310{ 1325{
1311 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 1326 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1312 struct buffer_info bi; 1327 struct buffer_info bi;
@@ -1349,8 +1364,8 @@ static void balance_leaf_finish_node_paste(struct tree_balance *tb,
1349 * of the affected item which remains in S 1364 * of the affected item which remains in S
1350 */ 1365 */
1351static void balance_leaf_finish_node(struct tree_balance *tb, 1366static void balance_leaf_finish_node(struct tree_balance *tb,
1352 struct item_head *ih, 1367 struct item_head * const ih,
1353 const char *body, int flag) 1368 const char * const body, int flag)
1354{ 1369{
1355 /* if we must insert or append into buffer S[0] */ 1370 /* if we must insert or append into buffer S[0] */
1356 if (0 <= tb->item_pos && tb->item_pos < tb->s0num) { 1371 if (0 <= tb->item_pos && tb->item_pos < tb->s0num) {
@@ -1402,7 +1417,7 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih,
1402 && is_indirect_le_ih(item_head(tbS0, tb->item_pos))) 1417 && is_indirect_le_ih(item_head(tbS0, tb->item_pos)))
1403 tb->pos_in_item *= UNFM_P_SIZE; 1418 tb->pos_in_item *= UNFM_P_SIZE;
1404 1419
1405 balance_leaf_left(tb, ih, body, flag); 1420 body += balance_leaf_left(tb, ih, body, flag);
1406 1421
1407 /* tb->lnum[0] > 0 */ 1422 /* tb->lnum[0] > 0 */
1408 /* Calculate new item position */ 1423 /* Calculate new item position */
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index db9e80ba53a0..751dd3f4346b 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -6,7 +6,7 @@
6#include "reiserfs.h" 6#include "reiserfs.h"
7#include "acl.h" 7#include "acl.h"
8#include "xattr.h" 8#include "xattr.h"
9#include <asm/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/swap.h> 11#include <linux/swap.h>
12#include <linux/writeback.h> 12#include <linux/writeback.h>
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index 73231b1ebdbe..b751eea32e20 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -2,7 +2,7 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <asm/uaccess.h> 5#include <linux/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 63b2b0ec49e6..a7eec9888f10 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,7 +11,7 @@
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/highmem.h> 12#include <linux/highmem.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <asm/uaccess.h> 14#include <linux/uaccess.h>
15#include <asm/unaligned.h> 15#include <asm/unaligned.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/mpage.h> 17#include <linux/mpage.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 501ed6811a2b..6ec8a30a0911 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -7,7 +7,7 @@
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
9#include <linux/time.h> 9#include <linux/time.h>
10#include <asm/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/compat.h> 12#include <linux/compat.h>
13 13
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index cfaee912ee09..aca73dd73906 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -54,7 +54,7 @@ static void sd_print_item(struct item_head *ih, char *item)
54 } else { 54 } else {
55 struct stat_data *sd = (struct stat_data *)item; 55 struct stat_data *sd = (struct stat_data *)item;
56 56
57 printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), 57 printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
58 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), 58 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
59 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); 59 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
60 } 60 }
@@ -408,7 +408,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
408 namebuf[namelen + 2] = 0; 408 namebuf[namelen + 2] = 0;
409 } 409 }
410 410
411 printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", 411 printk("%d: %-15s%-15d%-15d%-15lld%-15lld(%s)\n",
412 i, namebuf, 412 i, namebuf,
413 deh_dir_id(deh), deh_objectid(deh), 413 deh_dir_id(deh), deh_objectid(deh),
414 GET_HASH_VALUE(deh_offset(deh)), 414 GET_HASH_VALUE(deh_offset(deh)),
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e8870de4627e..a88b1b3e7db3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1947,8 +1947,6 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1947 } 1947 }
1948 } 1948 }
1949 1949
1950 /* wait for all commits to finish */
1951 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
1952 1950
1953 /* 1951 /*
1954 * We must release the write lock here because 1952 * We must release the write lock here because
@@ -1956,8 +1954,14 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1956 */ 1954 */
1957 reiserfs_write_unlock(sb); 1955 reiserfs_write_unlock(sb);
1958 1956
1957 /*
1958 * Cancel flushing of old commits. Note that neither of these works
1959 * will be requeued because superblock is being shutdown and doesn't
1960 * have MS_ACTIVE set.
1961 */
1959 cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); 1962 cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
1960 flush_workqueue(REISERFS_SB(sb)->commit_wq); 1963 /* wait for all commits to finish */
1964 cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work);
1961 1965
1962 free_journal_ram(sb); 1966 free_journal_ram(sb);
1963 1967
@@ -4292,9 +4296,15 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
4292 if (flush) { 4296 if (flush) {
4293 flush_commit_list(sb, jl, 1); 4297 flush_commit_list(sb, jl, 1);
4294 flush_journal_list(sb, jl, 1); 4298 flush_journal_list(sb, jl, 1);
4295 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 4299 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) {
4296 queue_delayed_work(REISERFS_SB(sb)->commit_wq, 4300 /*
4297 &journal->j_work, HZ / 10); 4301 * Avoid queueing work when sb is being shut down. Transaction
4302 * will be flushed on journal shutdown.
4303 */
4304 if (sb->s_flags & MS_ACTIVE)
4305 queue_delayed_work(REISERFS_SB(sb)->commit_wq,
4306 &journal->j_work, HZ / 10);
4307 }
4298 4308
4299 /* 4309 /*
4300 * if the next transaction has any chance of wrapping, flush 4310 * if the next transaction has any chance of wrapping, flush
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index d6744c8b24e1..249594a821e0 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -2,7 +2,7 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5#include <asm/uaccess.h> 5#include <linux/uaccess.h>
6#include <linux/string.h> 6#include <linux/string.h>
7#include <linux/time.h> 7#include <linux/time.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
@@ -899,8 +899,9 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
899 899
900/* insert item into the leaf node in position before */ 900/* insert item into the leaf node in position before */
901void leaf_insert_into_buf(struct buffer_info *bi, int before, 901void leaf_insert_into_buf(struct buffer_info *bi, int before,
902 struct item_head *inserted_item_ih, 902 struct item_head * const inserted_item_ih,
903 const char *inserted_item_body, int zeros_number) 903 const char * const inserted_item_body,
904 int zeros_number)
904{ 905{
905 struct buffer_head *bh = bi->bi_bh; 906 struct buffer_head *bh = bi->bi_bh;
906 int nr, free_space; 907 int nr, free_space;
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c9b47e91baf8..ae1dc841db3a 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -17,7 +17,7 @@ static char off_buf[80];
17static char *reiserfs_cpu_offset(struct cpu_key *key) 17static char *reiserfs_cpu_offset(struct cpu_key *key)
18{ 18{
19 if (cpu_key_k_type(key) == TYPE_DIRENTRY) 19 if (cpu_key_k_type(key) == TYPE_DIRENTRY)
20 sprintf(off_buf, "%Lu(%Lu)", 20 sprintf(off_buf, "%llu(%llu)",
21 (unsigned long long) 21 (unsigned long long)
22 GET_HASH_VALUE(cpu_key_k_offset(key)), 22 GET_HASH_VALUE(cpu_key_k_offset(key)),
23 (unsigned long long) 23 (unsigned long long)
@@ -34,7 +34,7 @@ static char *le_offset(struct reiserfs_key *key)
34 34
35 version = le_key_version(key); 35 version = le_key_version(key);
36 if (le_key_k_type(version, key) == TYPE_DIRENTRY) 36 if (le_key_k_type(version, key) == TYPE_DIRENTRY)
37 sprintf(off_buf, "%Lu(%Lu)", 37 sprintf(off_buf, "%llu(%llu)",
38 (unsigned long long) 38 (unsigned long long)
39 GET_HASH_VALUE(le_key_k_offset(version, key)), 39 GET_HASH_VALUE(le_key_k_offset(version, key)),
40 (unsigned long long) 40 (unsigned long long)
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 02b0b7d0f7d5..621b9f381fe1 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -11,7 +11,7 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/seq_file.h> 13#include <linux/seq_file.h>
14#include <asm/uaccess.h> 14#include <linux/uaccess.h>
15#include "reiserfs.h" 15#include "reiserfs.h"
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index bf53888c7f59..735c2c2b4536 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -3216,11 +3216,12 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes);
3216void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, 3216void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first,
3217 int del_num, int del_bytes); 3217 int del_num, int del_bytes);
3218void leaf_insert_into_buf(struct buffer_info *bi, int before, 3218void leaf_insert_into_buf(struct buffer_info *bi, int before,
3219 struct item_head *inserted_item_ih, 3219 struct item_head * const inserted_item_ih,
3220 const char *inserted_item_body, int zeros_number); 3220 const char * const inserted_item_body,
3221void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
3222 int pos_in_item, int paste_size, const char *body,
3223 int zeros_number); 3221 int zeros_number);
3222void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num,
3223 int pos_in_item, int paste_size,
3224 const char * const body, int zeros_number);
3224void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, 3225void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
3225 int pos_in_item, int cut_size); 3226 int pos_in_item, int cut_size);
3226void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, 3227void leaf_paste_entries(struct buffer_info *bi, int item_num, int before,
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index dd44468edc2b..24cbe013240f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2006,7 +2006,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
2006 &s_search_path) == POSITION_FOUND); 2006 &s_search_path) == POSITION_FOUND);
2007 2007
2008 RFALSE(file_size > ROUND_UP(new_file_size), 2008 RFALSE(file_size > ROUND_UP(new_file_size),
2009 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", 2009 "PAP-5680: truncate did not finish: new_file_size %lld, current %lld, oid %d",
2010 new_file_size, file_size, s_item_key.on_disk_key.k_objectid); 2010 new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
2011 2011
2012update_and_out: 2012update_and_out:
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index a392cef6acc6..d46e88a33b02 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -15,7 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/time.h> 17#include <linux/time.h>
18#include <asm/uaccess.h> 18#include <linux/uaccess.h>
19#include "reiserfs.h" 19#include "reiserfs.h"
20#include "acl.h" 20#include "acl.h"
21#include "xattr.h" 21#include "xattr.h"
@@ -100,7 +100,11 @@ void reiserfs_schedule_old_flush(struct super_block *s)
100 struct reiserfs_sb_info *sbi = REISERFS_SB(s); 100 struct reiserfs_sb_info *sbi = REISERFS_SB(s);
101 unsigned long delay; 101 unsigned long delay;
102 102
103 if (s->s_flags & MS_RDONLY) 103 /*
104 * Avoid scheduling flush when sb is being shut down. It can race
105 * with journal shutdown and free still queued delayed work.
106 */
107 if (s->s_flags & MS_RDONLY || !(s->s_flags & MS_ACTIVE))
104 return; 108 return;
105 109
106 spin_lock(&sbi->old_work_lock); 110 spin_lock(&sbi->old_work_lock);
@@ -331,7 +335,7 @@ static int finish_unfinished(struct super_block *s)
331 * not completed truncate found. New size was 335 * not completed truncate found. New size was
332 * committed together with "save" link 336 * committed together with "save" link
333 */ 337 */
334 reiserfs_info(s, "Truncating %k to %Ld ..", 338 reiserfs_info(s, "Truncating %k to %lld ..",
335 INODE_PKEY(inode), inode->i_size); 339 INODE_PKEY(inode), inode->i_size);
336 340
337 /* don't update modification time */ 341 /* don't update modification time */
@@ -1577,7 +1581,7 @@ static int read_super_block(struct super_block *s, int offset)
1577 rs = (struct reiserfs_super_block *)bh->b_data; 1581 rs = (struct reiserfs_super_block *)bh->b_data;
1578 if (sb_blocksize(rs) != s->s_blocksize) { 1582 if (sb_blocksize(rs) != s->s_blocksize) {
1579 reiserfs_warning(s, "sh-2011", "can't find a reiserfs " 1583 reiserfs_warning(s, "sh-2011", "can't find a reiserfs "
1580 "filesystem on (dev %s, block %Lu, size %lu)", 1584 "filesystem on (dev %s, block %llu, size %lu)",
1581 s->s_id, 1585 s->s_id,
1582 (unsigned long long)bh->b_blocknr, 1586 (unsigned long long)bh->b_blocknr,
1583 s->s_blocksize); 1587 s->s_blocksize);
@@ -2441,8 +2445,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2441 struct buffer_head tmp_bh, *bh; 2445 struct buffer_head tmp_bh, *bh;
2442 2446
2443 if (!current->journal_info) { 2447 if (!current->journal_info) {
2444 printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)" 2448 printk(KERN_WARNING "reiserfs: Quota write (off=%llu, len=%llu) cancelled because transaction is not started.\n",
2445 " cancelled because transaction is not started.\n",
2446 (unsigned long long)off, (unsigned long long)len); 2449 (unsigned long long)off, (unsigned long long)len);
2447 return -EIO; 2450 return -EIO;
2448 } 2451 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ca416d099e7d..7c36898af402 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -45,7 +45,7 @@
45#include <linux/xattr.h> 45#include <linux/xattr.h>
46#include "xattr.h" 46#include "xattr.h"
47#include "acl.h" 47#include "acl.h"
48#include <asm/uaccess.h> 48#include <linux/uaccess.h>
49#include <net/checksum.h> 49#include <net/checksum.h>
50#include <linux/stat.h> 50#include <linux/stat.h>
51#include <linux/quotaops.h> 51#include <linux/quotaops.h>
@@ -84,6 +84,7 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
84static int xattr_unlink(struct inode *dir, struct dentry *dentry) 84static int xattr_unlink(struct inode *dir, struct dentry *dentry)
85{ 85{
86 int error; 86 int error;
87
87 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 88 BUG_ON(!mutex_is_locked(&dir->i_mutex));
88 89
89 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 90 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -98,6 +99,7 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
98static int xattr_rmdir(struct inode *dir, struct dentry *dentry) 99static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
99{ 100{
100 int error; 101 int error;
102
101 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 103 BUG_ON(!mutex_is_locked(&dir->i_mutex));
102 104
103 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 105 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
@@ -117,6 +119,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
117{ 119{
118 struct dentry *privroot = REISERFS_SB(sb)->priv_root; 120 struct dentry *privroot = REISERFS_SB(sb)->priv_root;
119 struct dentry *xaroot; 121 struct dentry *xaroot;
122
120 if (!privroot->d_inode) 123 if (!privroot->d_inode)
121 return ERR_PTR(-ENODATA); 124 return ERR_PTR(-ENODATA);
122 125
@@ -127,6 +130,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
127 xaroot = ERR_PTR(-ENODATA); 130 xaroot = ERR_PTR(-ENODATA);
128 else if (!xaroot->d_inode) { 131 else if (!xaroot->d_inode) {
129 int err = -ENODATA; 132 int err = -ENODATA;
133
130 if (xattr_may_create(flags)) 134 if (xattr_may_create(flags))
131 err = xattr_mkdir(privroot->d_inode, xaroot, 0700); 135 err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
132 if (err) { 136 if (err) {
@@ -157,6 +161,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
157 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); 161 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
158 if (!IS_ERR(xadir) && !xadir->d_inode) { 162 if (!IS_ERR(xadir) && !xadir->d_inode) {
159 int err = -ENODATA; 163 int err = -ENODATA;
164
160 if (xattr_may_create(flags)) 165 if (xattr_may_create(flags))
161 err = xattr_mkdir(xaroot->d_inode, xadir, 0700); 166 err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
162 if (err) { 167 if (err) {
@@ -188,6 +193,7 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
188{ 193{
189 struct reiserfs_dentry_buf *dbuf = buf; 194 struct reiserfs_dentry_buf *dbuf = buf;
190 struct dentry *dentry; 195 struct dentry *dentry;
196
191 WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); 197 WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
192 198
193 if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) 199 if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
@@ -218,6 +224,7 @@ static void
218cleanup_dentry_buf(struct reiserfs_dentry_buf *buf) 224cleanup_dentry_buf(struct reiserfs_dentry_buf *buf)
219{ 225{
220 int i; 226 int i;
227
221 for (i = 0; i < buf->count; i++) 228 for (i = 0; i < buf->count; i++)
222 if (buf->dentries[i]) 229 if (buf->dentries[i])
223 dput(buf->dentries[i]); 230 dput(buf->dentries[i]);
@@ -283,11 +290,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
283 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 290 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
284 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 291 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
285 struct reiserfs_transaction_handle th; 292 struct reiserfs_transaction_handle th;
293
286 reiserfs_write_lock(inode->i_sb); 294 reiserfs_write_lock(inode->i_sb);
287 err = journal_begin(&th, inode->i_sb, blocks); 295 err = journal_begin(&th, inode->i_sb, blocks);
288 reiserfs_write_unlock(inode->i_sb); 296 reiserfs_write_unlock(inode->i_sb);
289 if (!err) { 297 if (!err) {
290 int jerror; 298 int jerror;
299
291 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 300 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
292 I_MUTEX_XATTR); 301 I_MUTEX_XATTR);
293 err = action(dir, data); 302 err = action(dir, data);
@@ -340,6 +349,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
340int reiserfs_delete_xattrs(struct inode *inode) 349int reiserfs_delete_xattrs(struct inode *inode)
341{ 350{
342 int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL); 351 int err = reiserfs_for_each_xattr(inode, delete_one_xattr, NULL);
352
343 if (err) 353 if (err)
344 reiserfs_warning(inode->i_sb, "jdm-20004", 354 reiserfs_warning(inode->i_sb, "jdm-20004",
345 "Couldn't delete all xattrs (%d)\n", err); 355 "Couldn't delete all xattrs (%d)\n", err);
@@ -350,6 +360,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
350int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) 360int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
351{ 361{
352 int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs); 362 int err = reiserfs_for_each_xattr(inode, chown_one_xattr, attrs);
363
353 if (err) 364 if (err)
354 reiserfs_warning(inode->i_sb, "jdm-20007", 365 reiserfs_warning(inode->i_sb, "jdm-20007",
355 "Couldn't chown all xattrs (%d)\n", err); 366 "Couldn't chown all xattrs (%d)\n", err);
@@ -439,6 +450,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
439static void update_ctime(struct inode *inode) 450static void update_ctime(struct inode *inode)
440{ 451{
441 struct timespec now = current_fs_time(inode->i_sb); 452 struct timespec now = current_fs_time(inode->i_sb);
453
442 if (inode_unhashed(inode) || !inode->i_nlink || 454 if (inode_unhashed(inode) || !inode->i_nlink ||
443 timespec_equal(&inode->i_ctime, &now)) 455 timespec_equal(&inode->i_ctime, &now))
444 return; 456 return;
@@ -514,6 +526,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
514 size_t chunk; 526 size_t chunk;
515 size_t skip = 0; 527 size_t skip = 0;
516 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); 528 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
529
517 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) 530 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
518 chunk = PAGE_CACHE_SIZE; 531 chunk = PAGE_CACHE_SIZE;
519 else 532 else
@@ -530,6 +543,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
530 543
531 if (file_pos == 0) { 544 if (file_pos == 0) {
532 struct reiserfs_xattr_header *rxh; 545 struct reiserfs_xattr_header *rxh;
546
533 skip = file_pos = sizeof(struct reiserfs_xattr_header); 547 skip = file_pos = sizeof(struct reiserfs_xattr_header);
534 if (chunk + skip > PAGE_CACHE_SIZE) 548 if (chunk + skip > PAGE_CACHE_SIZE)
535 chunk = PAGE_CACHE_SIZE - skip; 549 chunk = PAGE_CACHE_SIZE - skip;
@@ -659,6 +673,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
659 size_t chunk; 673 size_t chunk;
660 char *data; 674 char *data;
661 size_t skip = 0; 675 size_t skip = 0;
676
662 if (isize - file_pos > PAGE_CACHE_SIZE) 677 if (isize - file_pos > PAGE_CACHE_SIZE)
663 chunk = PAGE_CACHE_SIZE; 678 chunk = PAGE_CACHE_SIZE;
664 else 679 else
@@ -792,6 +807,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
792int reiserfs_removexattr(struct dentry *dentry, const char *name) 807int reiserfs_removexattr(struct dentry *dentry, const char *name)
793{ 808{
794 const struct xattr_handler *handler; 809 const struct xattr_handler *handler;
810
795 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); 811 handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
796 812
797 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) 813 if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
@@ -813,9 +829,11 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
813{ 829{
814 struct listxattr_buf *b = (struct listxattr_buf *)buf; 830 struct listxattr_buf *b = (struct listxattr_buf *)buf;
815 size_t size; 831 size_t size;
832
816 if (name[0] != '.' || 833 if (name[0] != '.' ||
817 (namelen != 1 && (name[1] != '.' || namelen != 2))) { 834 (namelen != 1 && (name[1] != '.' || namelen != 2))) {
818 const struct xattr_handler *handler; 835 const struct xattr_handler *handler;
836
819 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr, 837 handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
820 name); 838 name);
821 if (!handler) /* Unsupported xattr name */ 839 if (!handler) /* Unsupported xattr name */
@@ -885,6 +903,7 @@ static int create_privroot(struct dentry *dentry)
885{ 903{
886 int err; 904 int err;
887 struct inode *inode = dentry->d_parent->d_inode; 905 struct inode *inode = dentry->d_parent->d_inode;
906
888 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); 907 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
889 908
890 err = xattr_mkdir(inode, dentry, 0700); 909 err = xattr_mkdir(inode, dentry, 0700);
@@ -1015,6 +1034,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1015 mutex_lock(&privroot->d_inode->i_mutex); 1034 mutex_lock(&privroot->d_inode->i_mutex);
1016 if (!REISERFS_SB(s)->xattr_root) { 1035 if (!REISERFS_SB(s)->xattr_root) {
1017 struct dentry *dentry; 1036 struct dentry *dentry;
1037
1018 dentry = lookup_one_len(XAROOT_NAME, privroot, 1038 dentry = lookup_one_len(XAROOT_NAME, privroot,
1019 strlen(XAROOT_NAME)); 1039 strlen(XAROOT_NAME));
1020 if (!IS_ERR(dentry)) 1040 if (!IS_ERR(dentry))
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44503e293790..4b34b9dc03dd 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,7 @@
9#include <linux/posix_acl_xattr.h> 9#include <linux/posix_acl_xattr.h>
10#include "xattr.h" 10#include "xattr.h"
11#include "acl.h" 11#include "acl.h"
12#include <asm/uaccess.h> 12#include <linux/uaccess.h>
13 13
14static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th, 14static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
15 struct inode *inode, int type, 15 struct inode *inode, int type,
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 800a3cef6f62..e7f8939a4cb5 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,7 +6,7 @@
6#include <linux/slab.h> 6#include <linux/slab.h>
7#include "xattr.h" 7#include "xattr.h"
8#include <linux/security.h> 8#include <linux/security.h>
9#include <asm/uaccess.h> 9#include <linux/uaccess.h>
10 10
11static int 11static int
12security_get(struct dentry *dentry, const char *name, void *buffer, size_t size, 12security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a0035719f66b..5eeb0c48ba46 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -5,7 +5,7 @@
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include "xattr.h" 7#include "xattr.h"
8#include <asm/uaccess.h> 8#include <linux/uaccess.h>
9 9
10static int 10static int
11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size, 11trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 8667491ae7c3..e50eab046471 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -4,7 +4,7 @@
4#include <linux/pagemap.h> 4#include <linux/pagemap.h>
5#include <linux/xattr.h> 5#include <linux/xattr.h>
6#include "xattr.h" 6#include "xattr.h"
7#include <asm/uaccess.h> 7#include <linux/uaccess.h>
8 8
9static int 9static int
10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size, 10user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ef90e8bca95a..e98dd88197d5 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -56,6 +56,8 @@
56 * 2 of the Licence, or (at your option) any later version. 56 * 2 of the Licence, or (at your option) any later version.
57 */ 57 */
58 58
59#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
60
59#include <linux/module.h> 61#include <linux/module.h>
60#include <linux/string.h> 62#include <linux/string.h>
61#include <linux/fs.h> 63#include <linux/fs.h>
@@ -380,7 +382,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
380eio: 382eio:
381 ret = -EIO; 383 ret = -EIO;
382error: 384error:
383 printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos); 385 pr_err("read error for inode 0x%lx\n", pos);
384 return ERR_PTR(ret); 386 return ERR_PTR(ret);
385} 387}
386 388
@@ -390,6 +392,7 @@ error:
390static struct inode *romfs_alloc_inode(struct super_block *sb) 392static struct inode *romfs_alloc_inode(struct super_block *sb)
391{ 393{
392 struct romfs_inode_info *inode; 394 struct romfs_inode_info *inode;
395
393 inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); 396 inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL);
394 return inode ? &inode->vfs_inode : NULL; 397 return inode ? &inode->vfs_inode : NULL;
395} 398}
@@ -400,6 +403,7 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
400static void romfs_i_callback(struct rcu_head *head) 403static void romfs_i_callback(struct rcu_head *head)
401{ 404{
402 struct inode *inode = container_of(head, struct inode, i_rcu); 405 struct inode *inode = container_of(head, struct inode, i_rcu);
406
403 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
404} 408}
405 409
@@ -507,15 +511,13 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
507 if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 || 511 if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 ||
508 img_size < ROMFH_SIZE) { 512 img_size < ROMFH_SIZE) {
509 if (!silent) 513 if (!silent)
510 printk(KERN_WARNING "VFS:" 514 pr_warn("VFS: Can't find a romfs filesystem on dev %s.\n",
511 " Can't find a romfs filesystem on dev %s.\n",
512 sb->s_id); 515 sb->s_id);
513 goto error_rsb_inval; 516 goto error_rsb_inval;
514 } 517 }
515 518
516 if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) { 519 if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) {
517 printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n", 520 pr_err("bad initial checksum on dev %s.\n", sb->s_id);
518 sb->s_id);
519 goto error_rsb_inval; 521 goto error_rsb_inval;
520 } 522 }
521 523
@@ -523,8 +525,8 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
523 525
524 len = strnlen(rsb->name, ROMFS_MAXFN); 526 len = strnlen(rsb->name, ROMFS_MAXFN);
525 if (!silent) 527 if (!silent)
526 printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n", 528 pr_notice("Mounting image '%*.*s' through %s\n",
527 (unsigned) len, (unsigned) len, rsb->name, storage); 529 (unsigned) len, (unsigned) len, rsb->name, storage);
528 530
529 kfree(rsb); 531 kfree(rsb);
530 rsb = NULL; 532 rsb = NULL;
@@ -614,7 +616,7 @@ static int __init init_romfs_fs(void)
614{ 616{
615 int ret; 617 int ret;
616 618
617 printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n"); 619 pr_info("ROMFS MTD (C) 2007 Red Hat, Inc.\n");
618 620
619 romfs_inode_cachep = 621 romfs_inode_cachep =
620 kmem_cache_create("romfs_i", 622 kmem_cache_create("romfs_i",
@@ -623,13 +625,12 @@ static int __init init_romfs_fs(void)
623 romfs_i_init_once); 625 romfs_i_init_once);
624 626
625 if (!romfs_inode_cachep) { 627 if (!romfs_inode_cachep) {
626 printk(KERN_ERR 628 pr_err("Failed to initialise inode cache\n");
627 "ROMFS error: Failed to initialise inode cache\n");
628 return -ENOMEM; 629 return -ENOMEM;
629 } 630 }
630 ret = register_filesystem(&romfs_fs_type); 631 ret = register_filesystem(&romfs_fs_type);
631 if (ret) { 632 if (ret) {
632 printk(KERN_ERR "ROMFS error: Failed to register filesystem\n"); 633 pr_err("Failed to register filesystem\n");
633 goto error_register; 634 goto error_register;
634 } 635 }
635 return 0; 636 return 0;
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 62a0de6632e1..43e7a7eddac0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
44 44
45 pages = end_index - start_index + 1; 45 pages = end_index - start_index + 1;
46 46
47 page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); 47 page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL);
48 if (page == NULL) 48 if (page == NULL)
49 return res; 49 return res;
50 50
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 031c8d67fd51..5056babe00df 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -27,6 +27,8 @@
27 * the filesystem. 27 * the filesystem.
28 */ 28 */
29 29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
30#include <linux/fs.h> 32#include <linux/fs.h>
31#include <linux/vfs.h> 33#include <linux/vfs.h>
32#include <linux/slab.h> 34#include <linux/slab.h>
@@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void)
448 return err; 450 return err;
449 } 451 }
450 452
451 printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) " 453 pr_info("version 4.0 (2009/01/31) Phillip Lougher\n");
452 "Phillip Lougher\n");
453 454
454 return 0; 455 return 0;
455} 456}
diff --git a/fs/super.c b/fs/super.c
index d20d5b11dedf..b9a214d2fe98 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -22,7 +22,6 @@
22 22
23#include <linux/export.h> 23#include <linux/export.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/acct.h>
26#include <linux/blkdev.h> 25#include <linux/blkdev.h>
27#include <linux/mount.h> 26#include <linux/mount.h>
28#include <linux/security.h> 27#include <linux/security.h>
@@ -218,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
218 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); 217 lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
219 mutex_init(&s->s_dquot.dqio_mutex); 218 mutex_init(&s->s_dquot.dqio_mutex);
220 mutex_init(&s->s_dquot.dqonoff_mutex); 219 mutex_init(&s->s_dquot.dqonoff_mutex);
221 init_rwsem(&s->s_dquot.dqptr_sem);
222 s->s_maxbytes = MAX_NON_LFS; 220 s->s_maxbytes = MAX_NON_LFS;
223 s->s_op = &default_op; 221 s->s_op = &default_op;
224 s->s_time_gran = 1000000000; 222 s->s_time_gran = 1000000000;
@@ -702,12 +700,22 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
702 return -EACCES; 700 return -EACCES;
703#endif 701#endif
704 702
705 if (flags & MS_RDONLY)
706 acct_auto_close(sb);
707 shrink_dcache_sb(sb);
708
709 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY); 703 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
710 704
705 if (remount_ro) {
706 if (sb->s_pins.first) {
707 up_write(&sb->s_umount);
708 sb_pin_kill(sb);
709 down_write(&sb->s_umount);
710 if (!sb->s_root)
711 return 0;
712 if (sb->s_writers.frozen != SB_UNFROZEN)
713 return -EBUSY;
714 remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
715 }
716 }
717 shrink_dcache_sb(sb);
718
711 /* If we are remounting RDONLY and current sb is read/write, 719 /* If we are remounting RDONLY and current sb is read/write,
712 make sure there are no rw files opened */ 720 make sure there are no rw files opened */
713 if (remount_ro) { 721 if (remount_ro) {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 0013142c0475..80c350216ea8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -35,8 +35,9 @@ struct timerfd_ctx {
35 ktime_t moffs; 35 ktime_t moffs;
36 wait_queue_head_t wqh; 36 wait_queue_head_t wqh;
37 u64 ticks; 37 u64 ticks;
38 int expired;
39 int clockid; 38 int clockid;
39 short unsigned expired;
40 short unsigned settime_flags; /* to show in fdinfo */
40 struct rcu_head rcu; 41 struct rcu_head rcu;
41 struct list_head clist; 42 struct list_head clist;
42 bool might_cancel; 43 bool might_cancel;
@@ -92,7 +93,7 @@ static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
92 */ 93 */
93void timerfd_clock_was_set(void) 94void timerfd_clock_was_set(void)
94{ 95{
95 ktime_t moffs = ktime_get_monotonic_offset(); 96 ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
96 struct timerfd_ctx *ctx; 97 struct timerfd_ctx *ctx;
97 unsigned long flags; 98 unsigned long flags;
98 99
@@ -125,7 +126,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
125{ 126{
126 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) 127 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
127 return false; 128 return false;
128 ctx->moffs = ktime_get_monotonic_offset(); 129 ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
129 return true; 130 return true;
130} 131}
131 132
@@ -196,6 +197,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
196 if (timerfd_canceled(ctx)) 197 if (timerfd_canceled(ctx))
197 return -ECANCELED; 198 return -ECANCELED;
198 } 199 }
200
201 ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
199 return 0; 202 return 0;
200} 203}
201 204
@@ -284,11 +287,77 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
284 return res; 287 return res;
285} 288}
286 289
290#ifdef CONFIG_PROC_FS
291static int timerfd_show(struct seq_file *m, struct file *file)
292{
293 struct timerfd_ctx *ctx = file->private_data;
294 struct itimerspec t;
295
296 spin_lock_irq(&ctx->wqh.lock);
297 t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
298 t.it_interval = ktime_to_timespec(ctx->tintv);
299 spin_unlock_irq(&ctx->wqh.lock);
300
301 return seq_printf(m,
302 "clockid: %d\n"
303 "ticks: %llu\n"
304 "settime flags: 0%o\n"
305 "it_value: (%llu, %llu)\n"
306 "it_interval: (%llu, %llu)\n",
307 ctx->clockid, (unsigned long long)ctx->ticks,
308 ctx->settime_flags,
309 (unsigned long long)t.it_value.tv_sec,
310 (unsigned long long)t.it_value.tv_nsec,
311 (unsigned long long)t.it_interval.tv_sec,
312 (unsigned long long)t.it_interval.tv_nsec);
313}
314#else
315#define timerfd_show NULL
316#endif
317
318#ifdef CONFIG_CHECKPOINT_RESTORE
319static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
320{
321 struct timerfd_ctx *ctx = file->private_data;
322 int ret = 0;
323
324 switch (cmd) {
325 case TFD_IOC_SET_TICKS: {
326 u64 ticks;
327
328 if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
329 return -EFAULT;
330 if (!ticks)
331 return -EINVAL;
332
333 spin_lock_irq(&ctx->wqh.lock);
334 if (!timerfd_canceled(ctx)) {
335 ctx->ticks = ticks;
336 if (ticks)
337 wake_up_locked(&ctx->wqh);
338 } else
339 ret = -ECANCELED;
340 spin_unlock_irq(&ctx->wqh.lock);
341 break;
342 }
343 default:
344 ret = -ENOTTY;
345 break;
346 }
347
348 return ret;
349}
350#else
351#define timerfd_ioctl NULL
352#endif
353
287static const struct file_operations timerfd_fops = { 354static const struct file_operations timerfd_fops = {
288 .release = timerfd_release, 355 .release = timerfd_release,
289 .poll = timerfd_poll, 356 .poll = timerfd_poll,
290 .read = timerfd_read, 357 .read = timerfd_read,
291 .llseek = noop_llseek, 358 .llseek = noop_llseek,
359 .show_fdinfo = timerfd_show,
360 .unlocked_ioctl = timerfd_ioctl,
292}; 361};
293 362
294static int timerfd_fget(int fd, struct fd *p) 363static int timerfd_fget(int fd, struct fd *p)
@@ -336,7 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
336 else 405 else
337 hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); 406 hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
338 407
339 ctx->moffs = ktime_get_monotonic_offset(); 408 ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 });
340 409
341 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 410 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
342 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 411 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index ff8229340cd5..aa13ad053b14 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -174,7 +174,6 @@ static int do_commit(struct ubifs_info *c)
174 if (err) 174 if (err)
175 goto out; 175 goto out;
176 176
177 mutex_lock(&c->mst_mutex);
178 c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); 177 c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
179 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); 178 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
180 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); 179 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
@@ -204,7 +203,6 @@ static int do_commit(struct ubifs_info *c)
204 else 203 else
205 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); 204 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
206 err = ubifs_write_master(c); 205 err = ubifs_write_master(c);
207 mutex_unlock(&c->mst_mutex);
208 if (err) 206 if (err)
209 goto out; 207 goto out;
210 208
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 2290d5866725..fb08b0c514b6 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -431,7 +431,7 @@ void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
431 431
432/** 432/**
433 * wbuf_timer_callback - write-buffer timer callback function. 433 * wbuf_timer_callback - write-buffer timer callback function.
434 * @data: timer data (write-buffer descriptor) 434 * @timer: timer data (write-buffer descriptor)
435 * 435 *
436 * This function is called when the write-buffer timer expires. 436 * This function is called when the write-buffer timer expires.
437 */ 437 */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index a902c5919e42..a47ddfc9be6b 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -240,6 +240,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
240 240
241 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { 241 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
242 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); 242 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
243 ubifs_assert(c->lhead_lnum != c->ltail_lnum);
243 c->lhead_offs = 0; 244 c->lhead_offs = 0;
244 } 245 }
245 246
@@ -404,15 +405,14 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
404 /* Switch to the next log LEB */ 405 /* Switch to the next log LEB */
405 if (c->lhead_offs) { 406 if (c->lhead_offs) {
406 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); 407 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
408 ubifs_assert(c->lhead_lnum != c->ltail_lnum);
407 c->lhead_offs = 0; 409 c->lhead_offs = 0;
408 } 410 }
409 411
410 if (c->lhead_offs == 0) { 412 /* Must ensure next LEB has been unmapped */
411 /* Must ensure next LEB has been unmapped */ 413 err = ubifs_leb_unmap(c, c->lhead_lnum);
412 err = ubifs_leb_unmap(c, c->lhead_lnum); 414 if (err)
413 if (err) 415 goto out;
414 goto out;
415 }
416 416
417 len = ALIGN(len, c->min_io_size); 417 len = ALIGN(len, c->min_io_size);
418 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len); 418 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index d46b19ec1815..421bd0a80424 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1464,7 +1464,6 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
1464 return ERR_CAST(nnode); 1464 return ERR_CAST(nnode);
1465 } 1465 }
1466 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); 1466 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1467 shft -= UBIFS_LPT_FANOUT_SHIFT;
1468 pnode = ubifs_get_pnode(c, nnode, iip); 1467 pnode = ubifs_get_pnode(c, nnode, iip);
1469 if (IS_ERR(pnode)) 1468 if (IS_ERR(pnode))
1470 return ERR_CAST(pnode); 1469 return ERR_CAST(pnode);
@@ -1604,7 +1603,6 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
1604 return ERR_CAST(nnode); 1603 return ERR_CAST(nnode);
1605 } 1604 }
1606 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); 1605 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1607 shft -= UBIFS_LPT_FANOUT_SHIFT;
1608 pnode = ubifs_get_pnode(c, nnode, iip); 1606 pnode = ubifs_get_pnode(c, nnode, iip);
1609 if (IS_ERR(pnode)) 1607 if (IS_ERR(pnode))
1610 return ERR_CAST(pnode); 1608 return ERR_CAST(pnode);
@@ -1964,7 +1962,6 @@ again:
1964 } 1962 }
1965 } 1963 }
1966 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); 1964 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1967 shft -= UBIFS_LPT_FANOUT_SHIFT;
1968 pnode = scan_get_pnode(c, path + h, nnode, iip); 1965 pnode = scan_get_pnode(c, path + h, nnode, iip);
1969 if (IS_ERR(pnode)) { 1966 if (IS_ERR(pnode)) {
1970 err = PTR_ERR(pnode); 1967 err = PTR_ERR(pnode);
@@ -2198,6 +2195,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2198 lprops->dirty); 2195 lprops->dirty);
2199 return -EINVAL; 2196 return -EINVAL;
2200 } 2197 }
2198 break;
2201 case LPROPS_FREEABLE: 2199 case LPROPS_FREEABLE:
2202 case LPROPS_FRDI_IDX: 2200 case LPROPS_FRDI_IDX:
2203 if (lprops->free + lprops->dirty != c->leb_size) { 2201 if (lprops->free + lprops->dirty != c->leb_size) {
@@ -2206,6 +2204,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2206 lprops->dirty); 2204 lprops->dirty);
2207 return -EINVAL; 2205 return -EINVAL;
2208 } 2206 }
2207 break;
2209 } 2208 }
2210 } 2209 }
2211 return 0; 2210 return 0;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 45d4e96a6bac..d9c02928e992 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -304,7 +304,6 @@ static int layout_cnodes(struct ubifs_info *c)
304 ubifs_assert(lnum >= c->lpt_first && 304 ubifs_assert(lnum >= c->lpt_first &&
305 lnum <= c->lpt_last); 305 lnum <= c->lpt_last);
306 } 306 }
307 done_ltab = 1;
308 c->ltab_lnum = lnum; 307 c->ltab_lnum = lnum;
309 c->ltab_offs = offs; 308 c->ltab_offs = offs;
310 offs += c->ltab_sz; 309 offs += c->ltab_sz;
@@ -514,7 +513,6 @@ static int write_cnodes(struct ubifs_info *c)
514 if (err) 513 if (err)
515 return err; 514 return err;
516 } 515 }
517 done_ltab = 1;
518 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); 516 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
519 offs += c->ltab_sz; 517 offs += c->ltab_sz;
520 dbg_chk_lpt_sz(c, 1, c->ltab_sz); 518 dbg_chk_lpt_sz(c, 1, c->ltab_sz);
@@ -1941,6 +1939,11 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1941 pr_err("LEB %d:%d, nnode, ", 1939 pr_err("LEB %d:%d, nnode, ",
1942 lnum, offs); 1940 lnum, offs);
1943 err = ubifs_unpack_nnode(c, p, &nnode); 1941 err = ubifs_unpack_nnode(c, p, &nnode);
1942 if (err) {
1943 pr_err("failed to unpack_node, error %d\n",
1944 err);
1945 break;
1946 }
1944 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 1947 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1945 pr_cont("%d:%d", nnode.nbranch[i].lnum, 1948 pr_cont("%d:%d", nnode.nbranch[i].lnum,
1946 nnode.nbranch[i].offs); 1949 nnode.nbranch[i].offs);
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index ab83ace9910a..1a4bb9e8b3b8 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c)
352 * ubifs_write_master - write master node. 352 * ubifs_write_master - write master node.
353 * @c: UBIFS file-system description object 353 * @c: UBIFS file-system description object
354 * 354 *
355 * This function writes the master node. The caller has to take the 355 * This function writes the master node. Returns zero in case of success and a
356 * @c->mst_mutex lock before calling this function. Returns zero in case of 356 * negative error code in case of failure. The master node is written twice to
357 * success and a negative error code in case of failure. The master node is 357 * enable recovery.
358 * written twice to enable recovery.
359 */ 358 */
360int ubifs_write_master(struct ubifs_info *c) 359int ubifs_write_master(struct ubifs_info *c)
361{ 360{
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index f1c3e5a1b315..4409f486ecef 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -346,7 +346,6 @@ static int write_orph_nodes(struct ubifs_info *c, int atomic)
346 int lnum; 346 int lnum;
347 347
348 /* Unmap any unused LEBs after consolidation */ 348 /* Unmap any unused LEBs after consolidation */
349 lnum = c->ohead_lnum + 1;
350 for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) { 349 for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) {
351 err = ubifs_leb_unmap(c, lnum); 350 err = ubifs_leb_unmap(c, lnum);
352 if (err) 351 if (err)
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c14adb2f420c..c640938f62f0 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -596,7 +596,6 @@ static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
596 * drop_last_node - drop the last node. 596 * drop_last_node - drop the last node.
597 * @sleb: scanned LEB information 597 * @sleb: scanned LEB information
598 * @offs: offset of dropped nodes is returned here 598 * @offs: offset of dropped nodes is returned here
599 * @grouped: non-zero if whole group of nodes have to be dropped
600 * 599 *
601 * This is a helper function for 'ubifs_recover_leb()' which drops the last 600 * This is a helper function for 'ubifs_recover_leb()' which drops the last
602 * node of the scanned LEB. 601 * node of the scanned LEB.
@@ -629,8 +628,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
629 * 628 *
630 * This function does a scan of a LEB, but caters for errors that might have 629 * This function does a scan of a LEB, but caters for errors that might have
631 * been caused by the unclean unmount from which we are attempting to recover. 630 * been caused by the unclean unmount from which we are attempting to recover.
632 * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is 631 * Returns the scanned information on success and a negative error code on
633 * found, and a negative error code in case of failure. 632 * failure.
634 */ 633 */
635struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 634struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
636 int offs, void *sbuf, int jhead) 635 int offs, void *sbuf, int jhead)
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 4c37607a958e..79c6dbbc0e04 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -332,6 +332,8 @@ static int create_default_filesystem(struct ubifs_info *c)
332 cs->ch.node_type = UBIFS_CS_NODE; 332 cs->ch.node_type = UBIFS_CS_NODE;
333 err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0); 333 err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0);
334 kfree(cs); 334 kfree(cs);
335 if (err)
336 return err;
335 337
336 ubifs_msg("default file-system created"); 338 ubifs_msg("default file-system created");
337 return 0; 339 return 0;
@@ -447,7 +449,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
447 goto failed; 449 goto failed;
448 } 450 }
449 451
450 if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { 452 if (c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
451 err = 13; 453 err = 13;
452 goto failed; 454 goto failed;
453 } 455 }
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 58aa05df2bb6..89adbc4d08ac 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -131,7 +131,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
131 * @offs: offset to start at (usually zero) 131 * @offs: offset to start at (usually zero)
132 * @sbuf: scan buffer (must be c->leb_size) 132 * @sbuf: scan buffer (must be c->leb_size)
133 * 133 *
134 * This function returns %0 on success and a negative error code on failure. 134 * This function returns the scanned information on success and a negative error
135 * code on failure.
135 */ 136 */
136struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, 137struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
137 int offs, void *sbuf) 138 int offs, void *sbuf)
@@ -157,9 +158,10 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
157 return ERR_PTR(err); 158 return ERR_PTR(err);
158 } 159 }
159 160
160 if (err == -EBADMSG) 161 /*
161 sleb->ecc = 1; 162 * Note, we ignore integrity errors (EBASMSG) because all the nodes are
162 163 * protected by CRC checksums.
164 */
163 return sleb; 165 return sleb;
164} 166}
165 167
@@ -169,8 +171,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
169 * @sleb: scanning information 171 * @sleb: scanning information
170 * @lnum: logical eraseblock number 172 * @lnum: logical eraseblock number
171 * @offs: offset to start at (usually zero) 173 * @offs: offset to start at (usually zero)
172 *
173 * This function returns %0 on success and a negative error code on failure.
174 */ 174 */
175void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, 175void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
176 int lnum, int offs) 176 int lnum, int offs)
@@ -257,7 +257,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
257 * @quiet: print no messages 257 * @quiet: print no messages
258 * 258 *
259 * This function scans LEB number @lnum and returns complete information about 259 * This function scans LEB number @lnum and returns complete information about
260 * its contents. Returns the scaned information in case of success and, 260 * its contents. Returns the scanned information in case of success and,
261 * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case 261 * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
262 * of failure. 262 * of failure.
263 * 263 *
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 3904c8574ef9..106bf20629ce 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -75,7 +75,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
75 return 1; 75 return 1;
76 } 76 }
77 77
78 if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { 78 if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
79 ubifs_err("unknown compression type %d", ui->compr_type); 79 ubifs_err("unknown compression type %d", ui->compr_type);
80 return 2; 80 return 2;
81 } 81 }
@@ -424,19 +424,19 @@ static int ubifs_show_options(struct seq_file *s, struct dentry *root)
424 struct ubifs_info *c = root->d_sb->s_fs_info; 424 struct ubifs_info *c = root->d_sb->s_fs_info;
425 425
426 if (c->mount_opts.unmount_mode == 2) 426 if (c->mount_opts.unmount_mode == 2)
427 seq_printf(s, ",fast_unmount"); 427 seq_puts(s, ",fast_unmount");
428 else if (c->mount_opts.unmount_mode == 1) 428 else if (c->mount_opts.unmount_mode == 1)
429 seq_printf(s, ",norm_unmount"); 429 seq_puts(s, ",norm_unmount");
430 430
431 if (c->mount_opts.bulk_read == 2) 431 if (c->mount_opts.bulk_read == 2)
432 seq_printf(s, ",bulk_read"); 432 seq_puts(s, ",bulk_read");
433 else if (c->mount_opts.bulk_read == 1) 433 else if (c->mount_opts.bulk_read == 1)
434 seq_printf(s, ",no_bulk_read"); 434 seq_puts(s, ",no_bulk_read");
435 435
436 if (c->mount_opts.chk_data_crc == 2) 436 if (c->mount_opts.chk_data_crc == 2)
437 seq_printf(s, ",chk_data_crc"); 437 seq_puts(s, ",chk_data_crc");
438 else if (c->mount_opts.chk_data_crc == 1) 438 else if (c->mount_opts.chk_data_crc == 1)
439 seq_printf(s, ",no_chk_data_crc"); 439 seq_puts(s, ",no_chk_data_crc");
440 440
441 if (c->mount_opts.override_compr) { 441 if (c->mount_opts.override_compr) {
442 seq_printf(s, ",compr=%s", 442 seq_printf(s, ",compr=%s",
@@ -796,8 +796,8 @@ static int alloc_wbufs(struct ubifs_info *c)
796{ 796{
797 int i, err; 797 int i, err;
798 798
799 c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead), 799 c->jheads = kcalloc(c->jhead_cnt, sizeof(struct ubifs_jhead),
800 GFP_KERNEL); 800 GFP_KERNEL);
801 if (!c->jheads) 801 if (!c->jheads)
802 return -ENOMEM; 802 return -ENOMEM;
803 803
@@ -1963,7 +1963,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
1963 mutex_init(&c->lp_mutex); 1963 mutex_init(&c->lp_mutex);
1964 mutex_init(&c->tnc_mutex); 1964 mutex_init(&c->tnc_mutex);
1965 mutex_init(&c->log_mutex); 1965 mutex_init(&c->log_mutex);
1966 mutex_init(&c->mst_mutex);
1967 mutex_init(&c->umount_mutex); 1966 mutex_init(&c->umount_mutex);
1968 mutex_init(&c->bu_mutex); 1967 mutex_init(&c->bu_mutex);
1969 mutex_init(&c->write_reserve_mutex); 1968 mutex_init(&c->write_reserve_mutex);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 8a40cf9c02d7..6793db0754f6 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -3294,7 +3294,6 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3294 goto out_unlock; 3294 goto out_unlock;
3295 3295
3296 if (err) { 3296 if (err) {
3297 err = -EINVAL;
3298 key = &from_key; 3297 key = &from_key;
3299 goto out_dump; 3298 goto out_dump;
3300 } 3299 }
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 3600994f8411..7a205e046776 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -389,7 +389,6 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
389 ubifs_dump_lprops(c); 389 ubifs_dump_lprops(c);
390 } 390 }
391 /* Try to commit anyway */ 391 /* Try to commit anyway */
392 err = 0;
393 break; 392 break;
394 } 393 }
395 p++; 394 p++;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index c1f71fe17cc0..c4fe900c67ab 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -314,7 +314,6 @@ struct ubifs_scan_node {
314 * @nodes_cnt: number of nodes scanned 314 * @nodes_cnt: number of nodes scanned
315 * @nodes: list of struct ubifs_scan_node 315 * @nodes: list of struct ubifs_scan_node
316 * @endpt: end point (and therefore the start of empty space) 316 * @endpt: end point (and therefore the start of empty space)
317 * @ecc: read returned -EBADMSG
318 * @buf: buffer containing entire LEB scanned 317 * @buf: buffer containing entire LEB scanned
319 */ 318 */
320struct ubifs_scan_leb { 319struct ubifs_scan_leb {
@@ -322,7 +321,6 @@ struct ubifs_scan_leb {
322 int nodes_cnt; 321 int nodes_cnt;
323 struct list_head nodes; 322 struct list_head nodes;
324 int endpt; 323 int endpt;
325 int ecc;
326 void *buf; 324 void *buf;
327}; 325};
328 326
@@ -1051,7 +1049,6 @@ struct ubifs_debug_info;
1051 * 1049 *
1052 * @mst_node: master node 1050 * @mst_node: master node
1053 * @mst_offs: offset of valid master node 1051 * @mst_offs: offset of valid master node
1054 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
1055 * 1052 *
1056 * @max_bu_buf_len: maximum bulk-read buffer length 1053 * @max_bu_buf_len: maximum bulk-read buffer length
1057 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu 1054 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
@@ -1292,7 +1289,6 @@ struct ubifs_info {
1292 1289
1293 struct ubifs_mst_node *mst_node; 1290 struct ubifs_mst_node *mst_node;
1294 int mst_offs; 1291 int mst_offs;
1295 struct mutex mst_mutex;
1296 1292
1297 int max_bu_buf_len; 1293 int max_bu_buf_len;
1298 struct mutex bu_mutex; 1294 struct mutex bu_mutex;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d80738fdf424..86c6743ec1fe 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -27,7 +27,7 @@
27 27
28#include "udfdecl.h" 28#include "udfdecl.h"
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <asm/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <linux/string.h> /* memset */ 32#include <linux/string.h> /* memset */
33#include <linux/capability.h> 33#include <linux/capability.h>
@@ -100,24 +100,6 @@ static int udf_adinicb_write_begin(struct file *file,
100 return 0; 100 return 0;
101} 101}
102 102
103static int udf_adinicb_write_end(struct file *file,
104 struct address_space *mapping,
105 loff_t pos, unsigned len, unsigned copied,
106 struct page *page, void *fsdata)
107{
108 struct inode *inode = mapping->host;
109 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
110 char *kaddr;
111 struct udf_inode_info *iinfo = UDF_I(inode);
112
113 kaddr = kmap_atomic(page);
114 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset,
115 kaddr + offset, copied);
116 kunmap_atomic(kaddr);
117
118 return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
119}
120
121static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, 103static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
122 struct iov_iter *iter, 104 struct iov_iter *iter,
123 loff_t offset) 105 loff_t offset)
@@ -130,7 +112,7 @@ const struct address_space_operations udf_adinicb_aops = {
130 .readpage = udf_adinicb_readpage, 112 .readpage = udf_adinicb_readpage,
131 .writepage = udf_adinicb_writepage, 113 .writepage = udf_adinicb_writepage,
132 .write_begin = udf_adinicb_write_begin, 114 .write_begin = udf_adinicb_write_begin,
133 .write_end = udf_adinicb_write_end, 115 .write_end = simple_write_end,
134 .direct_IO = udf_adinicb_direct_IO, 116 .direct_IO = udf_adinicb_direct_IO,
135}; 117};
136 118
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 6583fe9b0645..6ad5a453af97 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -21,7 +21,7 @@
21 21
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/cdrom.h> 23#include <linux/cdrom.h>
24#include <asm/uaccess.h> 24#include <linux/uaccess.h>
25 25
26#include "udf_sb.h" 26#include "udf_sb.h"
27 27
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3286db047a40..813da94d447b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -63,7 +63,7 @@
63#include "udf_i.h" 63#include "udf_i.h"
64 64
65#include <linux/init.h> 65#include <linux/init.h>
66#include <asm/uaccess.h> 66#include <linux/uaccess.h>
67 67
68#define VDS_POS_PRIMARY_VOL_DESC 0 68#define VDS_POS_PRIMARY_VOL_DESC 0
69#define VDS_POS_UNALLOC_SPACE_DESC 1 69#define VDS_POS_UNALLOC_SPACE_DESC 1
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index d7c6dbe4194b..6fb7945c1e6e 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -20,7 +20,7 @@
20 */ 20 */
21 21
22#include "udfdecl.h" 22#include "udfdecl.h"
23#include <asm/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/errno.h> 24#include <linux/errno.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/time.h> 26#include <linux/time.h>
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 44b815e57f94..afd470e588ff 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -412,7 +412,6 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
412 int extIndex = 0, newExtIndex = 0, hasExt = 0; 412 int extIndex = 0, newExtIndex = 0, hasExt = 0;
413 unsigned short valueCRC; 413 unsigned short valueCRC;
414 uint8_t curr; 414 uint8_t curr;
415 const uint8_t hexChar[] = "0123456789ABCDEF";
416 415
417 if (udfName[0] == '.' && 416 if (udfName[0] == '.' &&
418 (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) { 417 (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
@@ -477,10 +476,10 @@ static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
477 newIndex = 250; 476 newIndex = 250;
478 newName[newIndex++] = CRC_MARK; 477 newName[newIndex++] = CRC_MARK;
479 valueCRC = crc_itu_t(0, fidName, fidNameLen); 478 valueCRC = crc_itu_t(0, fidName, fidNameLen);
480 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; 479 newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8);
481 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; 480 newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8);
482 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; 481 newName[newIndex++] = hex_asc_upper_hi(valueCRC);
483 newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; 482 newName[newIndex++] = hex_asc_upper_lo(valueCRC);
484 483
485 if (hasExt) { 484 if (hasExt) {
486 newName[newIndex++] = EXT_MARK; 485 newName[newIndex++] = EXT_MARK;
diff --git a/fs/ufs/Makefile b/fs/ufs/Makefile
index dd39980437fc..4d0e02b022b3 100644
--- a/fs/ufs/Makefile
+++ b/fs/ufs/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_UFS_FS) += ufs.o
6 6
7ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \ 7ufs-objs := balloc.o cylinder.o dir.o file.o ialloc.o inode.o \
8 namei.o super.o symlink.o truncate.o util.o 8 namei.o super.o symlink.o truncate.o util.o
9ccflags-$(CONFIG_UFS_DEBUG) += -DDEBUG
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 61e8a9b021dd..7c580c97990e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -158,16 +158,16 @@ out:
158 158
159/** 159/**
160 * ufs_inode_getfrag() - allocate new fragment(s) 160 * ufs_inode_getfrag() - allocate new fragment(s)
161 * @inode - pointer to inode 161 * @inode: pointer to inode
162 * @fragment - number of `fragment' which hold pointer 162 * @fragment: number of `fragment' which hold pointer
163 * to new allocated fragment(s) 163 * to new allocated fragment(s)
164 * @new_fragment - number of new allocated fragment(s) 164 * @new_fragment: number of new allocated fragment(s)
165 * @required - how many fragment(s) we require 165 * @required: how many fragment(s) we require
166 * @err - we set it if something wrong 166 * @err: we set it if something wrong
167 * @phys - pointer to where we save physical number of new allocated fragments, 167 * @phys: pointer to where we save physical number of new allocated fragments,
168 * NULL if we allocate not data(indirect blocks for example). 168 * NULL if we allocate not data(indirect blocks for example).
169 * @new - we set it if we allocate new block 169 * @new: we set it if we allocate new block
170 * @locked_page - for ufs_new_fragments() 170 * @locked_page: for ufs_new_fragments()
171 */ 171 */
172static struct buffer_head * 172static struct buffer_head *
173ufs_inode_getfrag(struct inode *inode, u64 fragment, 173ufs_inode_getfrag(struct inode *inode, u64 fragment,
@@ -315,16 +315,16 @@ repeat2:
315 315
316/** 316/**
317 * ufs_inode_getblock() - allocate new block 317 * ufs_inode_getblock() - allocate new block
318 * @inode - pointer to inode 318 * @inode: pointer to inode
319 * @bh - pointer to block which hold "pointer" to new allocated block 319 * @bh: pointer to block which hold "pointer" to new allocated block
320 * @fragment - number of `fragment' which hold pointer 320 * @fragment: number of `fragment' which hold pointer
321 * to new allocated block 321 * to new allocated block
322 * @new_fragment - number of new allocated fragment 322 * @new_fragment: number of new allocated fragment
323 * (block will hold this fragment and also uspi->s_fpb-1) 323 * (block will hold this fragment and also uspi->s_fpb-1)
324 * @err - see ufs_inode_getfrag() 324 * @err: see ufs_inode_getfrag()
325 * @phys - see ufs_inode_getfrag() 325 * @phys: see ufs_inode_getfrag()
326 * @new - see ufs_inode_getfrag() 326 * @new: see ufs_inode_getfrag()
327 * @locked_page - see ufs_inode_getfrag() 327 * @locked_page: see ufs_inode_getfrag()
328 */ 328 */
329static struct buffer_head * 329static struct buffer_head *
330ufs_inode_getblock(struct inode *inode, struct buffer_head *bh, 330ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index b879f1ba3439..da73801301d5 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -65,7 +65,6 @@
65 * Evgeniy Dushistov <dushistov@mail.ru>, 2007 65 * Evgeniy Dushistov <dushistov@mail.ru>, 2007
66 */ 66 */
67 67
68
69#include <linux/exportfs.h> 68#include <linux/exportfs.h>
70#include <linux/module.h> 69#include <linux/module.h>
71#include <linux/bitops.h> 70#include <linux/bitops.h>
@@ -172,73 +171,73 @@ static void ufs_print_super_stuff(struct super_block *sb,
172{ 171{
173 u32 magic = fs32_to_cpu(sb, usb3->fs_magic); 172 u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
174 173
175 printk("ufs_print_super_stuff\n"); 174 pr_debug("ufs_print_super_stuff\n");
176 printk(" magic: 0x%x\n", magic); 175 pr_debug(" magic: 0x%x\n", magic);
177 if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) { 176 if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
178 printk(" fs_size: %llu\n", (unsigned long long) 177 pr_debug(" fs_size: %llu\n", (unsigned long long)
179 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size)); 178 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
180 printk(" fs_dsize: %llu\n", (unsigned long long) 179 pr_debug(" fs_dsize: %llu\n", (unsigned long long)
181 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize)); 180 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_dsize));
182 printk(" bsize: %u\n", 181 pr_debug(" bsize: %u\n",
183 fs32_to_cpu(sb, usb1->fs_bsize)); 182 fs32_to_cpu(sb, usb1->fs_bsize));
184 printk(" fsize: %u\n", 183 pr_debug(" fsize: %u\n",
185 fs32_to_cpu(sb, usb1->fs_fsize)); 184 fs32_to_cpu(sb, usb1->fs_fsize));
186 printk(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname); 185 pr_debug(" fs_volname: %s\n", usb2->fs_un.fs_u2.fs_volname);
187 printk(" fs_sblockloc: %llu\n", (unsigned long long) 186 pr_debug(" fs_sblockloc: %llu\n", (unsigned long long)
188 fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc)); 187 fs64_to_cpu(sb, usb2->fs_un.fs_u2.fs_sblockloc));
189 printk(" cs_ndir(No of dirs): %llu\n", (unsigned long long) 188 pr_debug(" cs_ndir(No of dirs): %llu\n", (unsigned long long)
190 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir)); 189 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_ndir));
191 printk(" cs_nbfree(No of free blocks): %llu\n", 190 pr_debug(" cs_nbfree(No of free blocks): %llu\n",
192 (unsigned long long) 191 (unsigned long long)
193 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree)); 192 fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
194 printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n", 193 pr_info(" cs_nifree(Num of free inodes): %llu\n",
195 (unsigned long long) 194 (unsigned long long)
196 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree)); 195 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
197 printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n", 196 pr_info(" cs_nffree(Num of free frags): %llu\n",
198 (unsigned long long) 197 (unsigned long long)
199 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree)); 198 fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
200 printk(KERN_INFO" fs_maxsymlinklen: %u\n", 199 pr_info(" fs_maxsymlinklen: %u\n",
201 fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen)); 200 fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen));
202 } else { 201 } else {
203 printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno)); 202 pr_debug(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
204 printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno)); 203 pr_debug(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
205 printk(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno)); 204 pr_debug(" iblkno: %u\n", fs32_to_cpu(sb, usb1->fs_iblkno));
206 printk(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno)); 205 pr_debug(" dblkno: %u\n", fs32_to_cpu(sb, usb1->fs_dblkno));
207 printk(" cgoffset: %u\n", 206 pr_debug(" cgoffset: %u\n",
208 fs32_to_cpu(sb, usb1->fs_cgoffset)); 207 fs32_to_cpu(sb, usb1->fs_cgoffset));
209 printk(" ~cgmask: 0x%x\n", 208 pr_debug(" ~cgmask: 0x%x\n",
210 ~fs32_to_cpu(sb, usb1->fs_cgmask)); 209 ~fs32_to_cpu(sb, usb1->fs_cgmask));
211 printk(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size)); 210 pr_debug(" size: %u\n", fs32_to_cpu(sb, usb1->fs_size));
212 printk(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize)); 211 pr_debug(" dsize: %u\n", fs32_to_cpu(sb, usb1->fs_dsize));
213 printk(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg)); 212 pr_debug(" ncg: %u\n", fs32_to_cpu(sb, usb1->fs_ncg));
214 printk(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize)); 213 pr_debug(" bsize: %u\n", fs32_to_cpu(sb, usb1->fs_bsize));
215 printk(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize)); 214 pr_debug(" fsize: %u\n", fs32_to_cpu(sb, usb1->fs_fsize));
216 printk(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag)); 215 pr_debug(" frag: %u\n", fs32_to_cpu(sb, usb1->fs_frag));
217 printk(" fragshift: %u\n", 216 pr_debug(" fragshift: %u\n",
218 fs32_to_cpu(sb, usb1->fs_fragshift)); 217 fs32_to_cpu(sb, usb1->fs_fragshift));
219 printk(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask)); 218 pr_debug(" ~fmask: %u\n", ~fs32_to_cpu(sb, usb1->fs_fmask));
220 printk(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift)); 219 pr_debug(" fshift: %u\n", fs32_to_cpu(sb, usb1->fs_fshift));
221 printk(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize)); 220 pr_debug(" sbsize: %u\n", fs32_to_cpu(sb, usb1->fs_sbsize));
222 printk(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc)); 221 pr_debug(" spc: %u\n", fs32_to_cpu(sb, usb1->fs_spc));
223 printk(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg)); 222 pr_debug(" cpg: %u\n", fs32_to_cpu(sb, usb1->fs_cpg));
224 printk(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg)); 223 pr_debug(" ipg: %u\n", fs32_to_cpu(sb, usb1->fs_ipg));
225 printk(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg)); 224 pr_debug(" fpg: %u\n", fs32_to_cpu(sb, usb1->fs_fpg));
226 printk(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr)); 225 pr_debug(" csaddr: %u\n", fs32_to_cpu(sb, usb1->fs_csaddr));
227 printk(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize)); 226 pr_debug(" cssize: %u\n", fs32_to_cpu(sb, usb1->fs_cssize));
228 printk(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize)); 227 pr_debug(" cgsize: %u\n", fs32_to_cpu(sb, usb1->fs_cgsize));
229 printk(" fstodb: %u\n", 228 pr_debug(" fstodb: %u\n",
230 fs32_to_cpu(sb, usb1->fs_fsbtodb)); 229 fs32_to_cpu(sb, usb1->fs_fsbtodb));
231 printk(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos)); 230 pr_debug(" nrpos: %u\n", fs32_to_cpu(sb, usb3->fs_nrpos));
232 printk(" ndir %u\n", 231 pr_debug(" ndir %u\n",
233 fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir)); 232 fs32_to_cpu(sb, usb1->fs_cstotal.cs_ndir));
234 printk(" nifree %u\n", 233 pr_debug(" nifree %u\n",
235 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree)); 234 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nifree));
236 printk(" nbfree %u\n", 235 pr_debug(" nbfree %u\n",
237 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree)); 236 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nbfree));
238 printk(" nffree %u\n", 237 pr_debug(" nffree %u\n",
239 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree)); 238 fs32_to_cpu(sb, usb1->fs_cstotal.cs_nffree));
240 } 239 }
241 printk("\n"); 240 pr_debug("\n");
242} 241}
243 242
244/* 243/*
@@ -247,38 +246,38 @@ static void ufs_print_super_stuff(struct super_block *sb,
247static void ufs_print_cylinder_stuff(struct super_block *sb, 246static void ufs_print_cylinder_stuff(struct super_block *sb,
248 struct ufs_cylinder_group *cg) 247 struct ufs_cylinder_group *cg)
249{ 248{
250 printk("\nufs_print_cylinder_stuff\n"); 249 pr_debug("\nufs_print_cylinder_stuff\n");
251 printk("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group)); 250 pr_debug("size of ucg: %zu\n", sizeof(struct ufs_cylinder_group));
252 printk(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic)); 251 pr_debug(" magic: %x\n", fs32_to_cpu(sb, cg->cg_magic));
253 printk(" time: %u\n", fs32_to_cpu(sb, cg->cg_time)); 252 pr_debug(" time: %u\n", fs32_to_cpu(sb, cg->cg_time));
254 printk(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx)); 253 pr_debug(" cgx: %u\n", fs32_to_cpu(sb, cg->cg_cgx));
255 printk(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl)); 254 pr_debug(" ncyl: %u\n", fs16_to_cpu(sb, cg->cg_ncyl));
256 printk(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk)); 255 pr_debug(" niblk: %u\n", fs16_to_cpu(sb, cg->cg_niblk));
257 printk(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk)); 256 pr_debug(" ndblk: %u\n", fs32_to_cpu(sb, cg->cg_ndblk));
258 printk(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir)); 257 pr_debug(" cs_ndir: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_ndir));
259 printk(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree)); 258 pr_debug(" cs_nbfree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nbfree));
260 printk(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree)); 259 pr_debug(" cs_nifree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nifree));
261 printk(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree)); 260 pr_debug(" cs_nffree: %u\n", fs32_to_cpu(sb, cg->cg_cs.cs_nffree));
262 printk(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor)); 261 pr_debug(" rotor: %u\n", fs32_to_cpu(sb, cg->cg_rotor));
263 printk(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor)); 262 pr_debug(" frotor: %u\n", fs32_to_cpu(sb, cg->cg_frotor));
264 printk(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor)); 263 pr_debug(" irotor: %u\n", fs32_to_cpu(sb, cg->cg_irotor));
265 printk(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n", 264 pr_debug(" frsum: %u, %u, %u, %u, %u, %u, %u, %u\n",
266 fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]), 265 fs32_to_cpu(sb, cg->cg_frsum[0]), fs32_to_cpu(sb, cg->cg_frsum[1]),
267 fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]), 266 fs32_to_cpu(sb, cg->cg_frsum[2]), fs32_to_cpu(sb, cg->cg_frsum[3]),
268 fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]), 267 fs32_to_cpu(sb, cg->cg_frsum[4]), fs32_to_cpu(sb, cg->cg_frsum[5]),
269 fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7])); 268 fs32_to_cpu(sb, cg->cg_frsum[6]), fs32_to_cpu(sb, cg->cg_frsum[7]));
270 printk(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff)); 269 pr_debug(" btotoff: %u\n", fs32_to_cpu(sb, cg->cg_btotoff));
271 printk(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff)); 270 pr_debug(" boff: %u\n", fs32_to_cpu(sb, cg->cg_boff));
272 printk(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff)); 271 pr_debug(" iuseoff: %u\n", fs32_to_cpu(sb, cg->cg_iusedoff));
273 printk(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff)); 272 pr_debug(" freeoff: %u\n", fs32_to_cpu(sb, cg->cg_freeoff));
274 printk(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff)); 273 pr_debug(" nextfreeoff: %u\n", fs32_to_cpu(sb, cg->cg_nextfreeoff));
275 printk(" clustersumoff %u\n", 274 pr_debug(" clustersumoff %u\n",
276 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff)); 275 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clustersumoff));
277 printk(" clusteroff %u\n", 276 pr_debug(" clusteroff %u\n",
278 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff)); 277 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_clusteroff));
279 printk(" nclusterblks %u\n", 278 pr_debug(" nclusterblks %u\n",
280 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks)); 279 fs32_to_cpu(sb, cg->cg_u.cg_44.cg_nclusterblks));
281 printk("\n"); 280 pr_debug("\n");
282} 281}
283#else 282#else
284# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/ 283# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
@@ -287,13 +286,12 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
287 286
288static const struct super_operations ufs_super_ops; 287static const struct super_operations ufs_super_ops;
289 288
290static char error_buf[1024];
291
292void ufs_error (struct super_block * sb, const char * function, 289void ufs_error (struct super_block * sb, const char * function,
293 const char * fmt, ...) 290 const char * fmt, ...)
294{ 291{
295 struct ufs_sb_private_info * uspi; 292 struct ufs_sb_private_info * uspi;
296 struct ufs_super_block_first * usb1; 293 struct ufs_super_block_first * usb1;
294 struct va_format vaf;
297 va_list args; 295 va_list args;
298 296
299 uspi = UFS_SB(sb)->s_uspi; 297 uspi = UFS_SB(sb)->s_uspi;
@@ -305,20 +303,21 @@ void ufs_error (struct super_block * sb, const char * function,
305 ufs_mark_sb_dirty(sb); 303 ufs_mark_sb_dirty(sb);
306 sb->s_flags |= MS_RDONLY; 304 sb->s_flags |= MS_RDONLY;
307 } 305 }
308 va_start (args, fmt); 306 va_start(args, fmt);
309 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 307 vaf.fmt = fmt;
310 va_end (args); 308 vaf.va = &args;
311 switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) { 309 switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
312 case UFS_MOUNT_ONERROR_PANIC: 310 case UFS_MOUNT_ONERROR_PANIC:
313 panic ("UFS-fs panic (device %s): %s: %s\n", 311 panic("panic (device %s): %s: %pV\n",
314 sb->s_id, function, error_buf); 312 sb->s_id, function, &vaf);
315 313
316 case UFS_MOUNT_ONERROR_LOCK: 314 case UFS_MOUNT_ONERROR_LOCK:
317 case UFS_MOUNT_ONERROR_UMOUNT: 315 case UFS_MOUNT_ONERROR_UMOUNT:
318 case UFS_MOUNT_ONERROR_REPAIR: 316 case UFS_MOUNT_ONERROR_REPAIR:
319 printk (KERN_CRIT "UFS-fs error (device %s): %s: %s\n", 317 pr_crit("error (device %s): %s: %pV\n",
320 sb->s_id, function, error_buf); 318 sb->s_id, function, &vaf);
321 } 319 }
320 va_end(args);
322} 321}
323 322
324void ufs_panic (struct super_block * sb, const char * function, 323void ufs_panic (struct super_block * sb, const char * function,
@@ -326,6 +325,7 @@ void ufs_panic (struct super_block * sb, const char * function,
326{ 325{
327 struct ufs_sb_private_info * uspi; 326 struct ufs_sb_private_info * uspi;
328 struct ufs_super_block_first * usb1; 327 struct ufs_super_block_first * usb1;
328 struct va_format vaf;
329 va_list args; 329 va_list args;
330 330
331 uspi = UFS_SB(sb)->s_uspi; 331 uspi = UFS_SB(sb)->s_uspi;
@@ -336,24 +336,27 @@ void ufs_panic (struct super_block * sb, const char * function,
336 ubh_mark_buffer_dirty(USPI_UBH(uspi)); 336 ubh_mark_buffer_dirty(USPI_UBH(uspi));
337 ufs_mark_sb_dirty(sb); 337 ufs_mark_sb_dirty(sb);
338 } 338 }
339 va_start (args, fmt); 339 va_start(args, fmt);
340 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 340 vaf.fmt = fmt;
341 va_end (args); 341 vaf.va = &args;
342 sb->s_flags |= MS_RDONLY; 342 sb->s_flags |= MS_RDONLY;
343 printk (KERN_CRIT "UFS-fs panic (device %s): %s: %s\n", 343 pr_crit("panic (device %s): %s: %pV\n",
344 sb->s_id, function, error_buf); 344 sb->s_id, function, &vaf);
345 va_end(args);
345} 346}
346 347
347void ufs_warning (struct super_block * sb, const char * function, 348void ufs_warning (struct super_block * sb, const char * function,
348 const char * fmt, ...) 349 const char * fmt, ...)
349{ 350{
351 struct va_format vaf;
350 va_list args; 352 va_list args;
351 353
352 va_start (args, fmt); 354 va_start(args, fmt);
353 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 355 vaf.fmt = fmt;
354 va_end (args); 356 vaf.va = &args;
355 printk (KERN_WARNING "UFS-fs warning (device %s): %s: %s\n", 357 pr_warn("(device %s): %s: %pV\n",
356 sb->s_id, function, error_buf); 358 sb->s_id, function, &vaf);
359 va_end(args);
357} 360}
358 361
359enum { 362enum {
@@ -464,14 +467,12 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
464 ufs_set_opt (*mount_options, ONERROR_UMOUNT); 467 ufs_set_opt (*mount_options, ONERROR_UMOUNT);
465 break; 468 break;
466 case Opt_onerror_repair: 469 case Opt_onerror_repair:
467 printk("UFS-fs: Unable to do repair on error, " 470 pr_err("Unable to do repair on error, will lock lock instead\n");
468 "will lock lock instead\n");
469 ufs_clear_opt (*mount_options, ONERROR); 471 ufs_clear_opt (*mount_options, ONERROR);
470 ufs_set_opt (*mount_options, ONERROR_REPAIR); 472 ufs_set_opt (*mount_options, ONERROR_REPAIR);
471 break; 473 break;
472 default: 474 default:
473 printk("UFS-fs: Invalid option: \"%s\" " 475 pr_err("Invalid option: \"%s\" or missing value\n", p);
474 "or missing value\n", p);
475 return 0; 476 return 0;
476 } 477 }
477 } 478 }
@@ -788,8 +789,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
788 789
789#ifndef CONFIG_UFS_FS_WRITE 790#ifndef CONFIG_UFS_FS_WRITE
790 if (!(sb->s_flags & MS_RDONLY)) { 791 if (!(sb->s_flags & MS_RDONLY)) {
791 printk("ufs was compiled with read-only support, " 792 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
792 "can't be mounted as read-write\n");
793 return -EROFS; 793 return -EROFS;
794 } 794 }
795#endif 795#endif
@@ -812,12 +812,12 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
812 sbi->s_mount_opt = 0; 812 sbi->s_mount_opt = 0;
813 ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK); 813 ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
814 if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) { 814 if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
815 printk("wrong mount options\n"); 815 pr_err("wrong mount options\n");
816 goto failed; 816 goto failed;
817 } 817 }
818 if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) { 818 if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
819 if (!silent) 819 if (!silent)
820 printk("You didn't specify the type of your ufs filesystem\n\n" 820 pr_err("You didn't specify the type of your ufs filesystem\n\n"
821 "mount -t ufs -o ufstype=" 821 "mount -t ufs -o ufstype="
822 "sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n" 822 "sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
823 ">>>WARNING<<< Wrong ufstype may corrupt your filesystem, " 823 ">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
@@ -868,7 +868,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
868 break; 868 break;
869 869
870 case UFS_MOUNT_UFSTYPE_SUNOS: 870 case UFS_MOUNT_UFSTYPE_SUNOS:
871 UFSD(("ufstype=sunos\n")) 871 UFSD("ufstype=sunos\n");
872 uspi->s_fsize = block_size = 1024; 872 uspi->s_fsize = block_size = 1024;
873 uspi->s_fmask = ~(1024 - 1); 873 uspi->s_fmask = ~(1024 - 1);
874 uspi->s_fshift = 10; 874 uspi->s_fshift = 10;
@@ -900,7 +900,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
900 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 900 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
901 if (!(sb->s_flags & MS_RDONLY)) { 901 if (!(sb->s_flags & MS_RDONLY)) {
902 if (!silent) 902 if (!silent)
903 printk(KERN_INFO "ufstype=old is supported read-only\n"); 903 pr_info("ufstype=old is supported read-only\n");
904 sb->s_flags |= MS_RDONLY; 904 sb->s_flags |= MS_RDONLY;
905 } 905 }
906 break; 906 break;
@@ -916,7 +916,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
916 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 916 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
917 if (!(sb->s_flags & MS_RDONLY)) { 917 if (!(sb->s_flags & MS_RDONLY)) {
918 if (!silent) 918 if (!silent)
919 printk(KERN_INFO "ufstype=nextstep is supported read-only\n"); 919 pr_info("ufstype=nextstep is supported read-only\n");
920 sb->s_flags |= MS_RDONLY; 920 sb->s_flags |= MS_RDONLY;
921 } 921 }
922 break; 922 break;
@@ -932,7 +932,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
932 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 932 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
933 if (!(sb->s_flags & MS_RDONLY)) { 933 if (!(sb->s_flags & MS_RDONLY)) {
934 if (!silent) 934 if (!silent)
935 printk(KERN_INFO "ufstype=nextstep-cd is supported read-only\n"); 935 pr_info("ufstype=nextstep-cd is supported read-only\n");
936 sb->s_flags |= MS_RDONLY; 936 sb->s_flags |= MS_RDONLY;
937 } 937 }
938 break; 938 break;
@@ -948,7 +948,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
948 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD; 948 flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
949 if (!(sb->s_flags & MS_RDONLY)) { 949 if (!(sb->s_flags & MS_RDONLY)) {
950 if (!silent) 950 if (!silent)
951 printk(KERN_INFO "ufstype=openstep is supported read-only\n"); 951 pr_info("ufstype=openstep is supported read-only\n");
952 sb->s_flags |= MS_RDONLY; 952 sb->s_flags |= MS_RDONLY;
953 } 953 }
954 break; 954 break;
@@ -963,19 +963,19 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
963 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD; 963 flags |= UFS_DE_OLD | UFS_UID_OLD | UFS_ST_OLD | UFS_CG_OLD;
964 if (!(sb->s_flags & MS_RDONLY)) { 964 if (!(sb->s_flags & MS_RDONLY)) {
965 if (!silent) 965 if (!silent)
966 printk(KERN_INFO "ufstype=hp is supported read-only\n"); 966 pr_info("ufstype=hp is supported read-only\n");
967 sb->s_flags |= MS_RDONLY; 967 sb->s_flags |= MS_RDONLY;
968 } 968 }
969 break; 969 break;
970 default: 970 default:
971 if (!silent) 971 if (!silent)
972 printk("unknown ufstype\n"); 972 pr_err("unknown ufstype\n");
973 goto failed; 973 goto failed;
974 } 974 }
975 975
976again: 976again:
977 if (!sb_set_blocksize(sb, block_size)) { 977 if (!sb_set_blocksize(sb, block_size)) {
978 printk(KERN_ERR "UFS: failed to set blocksize\n"); 978 pr_err("failed to set blocksize\n");
979 goto failed; 979 goto failed;
980 } 980 }
981 981
@@ -1034,7 +1034,7 @@ again:
1034 goto again; 1034 goto again;
1035 } 1035 }
1036 if (!silent) 1036 if (!silent)
1037 printk("ufs_read_super: bad magic number\n"); 1037 pr_err("%s(): bad magic number\n", __func__);
1038 goto failed; 1038 goto failed;
1039 1039
1040magic_found: 1040magic_found:
@@ -1048,33 +1048,33 @@ magic_found:
1048 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift); 1048 uspi->s_fshift = fs32_to_cpu(sb, usb1->fs_fshift);
1049 1049
1050 if (!is_power_of_2(uspi->s_fsize)) { 1050 if (!is_power_of_2(uspi->s_fsize)) {
1051 printk(KERN_ERR "ufs_read_super: fragment size %u is not a power of 2\n", 1051 pr_err("%s(): fragment size %u is not a power of 2\n",
1052 uspi->s_fsize); 1052 __func__, uspi->s_fsize);
1053 goto failed; 1053 goto failed;
1054 } 1054 }
1055 if (uspi->s_fsize < 512) { 1055 if (uspi->s_fsize < 512) {
1056 printk(KERN_ERR "ufs_read_super: fragment size %u is too small\n", 1056 pr_err("%s(): fragment size %u is too small\n",
1057 uspi->s_fsize); 1057 __func__, uspi->s_fsize);
1058 goto failed; 1058 goto failed;
1059 } 1059 }
1060 if (uspi->s_fsize > 4096) { 1060 if (uspi->s_fsize > 4096) {
1061 printk(KERN_ERR "ufs_read_super: fragment size %u is too large\n", 1061 pr_err("%s(): fragment size %u is too large\n",
1062 uspi->s_fsize); 1062 __func__, uspi->s_fsize);
1063 goto failed; 1063 goto failed;
1064 } 1064 }
1065 if (!is_power_of_2(uspi->s_bsize)) { 1065 if (!is_power_of_2(uspi->s_bsize)) {
1066 printk(KERN_ERR "ufs_read_super: block size %u is not a power of 2\n", 1066 pr_err("%s(): block size %u is not a power of 2\n",
1067 uspi->s_bsize); 1067 __func__, uspi->s_bsize);
1068 goto failed; 1068 goto failed;
1069 } 1069 }
1070 if (uspi->s_bsize < 4096) { 1070 if (uspi->s_bsize < 4096) {
1071 printk(KERN_ERR "ufs_read_super: block size %u is too small\n", 1071 pr_err("%s(): block size %u is too small\n",
1072 uspi->s_bsize); 1072 __func__, uspi->s_bsize);
1073 goto failed; 1073 goto failed;
1074 } 1074 }
1075 if (uspi->s_bsize / uspi->s_fsize > 8) { 1075 if (uspi->s_bsize / uspi->s_fsize > 8) {
1076 printk(KERN_ERR "ufs_read_super: too many fragments per block (%u)\n", 1076 pr_err("%s(): too many fragments per block (%u)\n",
1077 uspi->s_bsize / uspi->s_fsize); 1077 __func__, uspi->s_bsize / uspi->s_fsize);
1078 goto failed; 1078 goto failed;
1079 } 1079 }
1080 if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) { 1080 if (uspi->s_fsize != block_size || uspi->s_sbsize != super_block_size) {
@@ -1113,20 +1113,21 @@ magic_found:
1113 UFSD("fs is DEC OSF/1\n"); 1113 UFSD("fs is DEC OSF/1\n");
1114 break; 1114 break;
1115 case UFS_FSACTIVE: 1115 case UFS_FSACTIVE:
1116 printk("ufs_read_super: fs is active\n"); 1116 pr_err("%s(): fs is active\n", __func__);
1117 sb->s_flags |= MS_RDONLY; 1117 sb->s_flags |= MS_RDONLY;
1118 break; 1118 break;
1119 case UFS_FSBAD: 1119 case UFS_FSBAD:
1120 printk("ufs_read_super: fs is bad\n"); 1120 pr_err("%s(): fs is bad\n", __func__);
1121 sb->s_flags |= MS_RDONLY; 1121 sb->s_flags |= MS_RDONLY;
1122 break; 1122 break;
1123 default: 1123 default:
1124 printk("ufs_read_super: can't grok fs_clean 0x%x\n", usb1->fs_clean); 1124 pr_err("%s(): can't grok fs_clean 0x%x\n",
1125 __func__, usb1->fs_clean);
1125 sb->s_flags |= MS_RDONLY; 1126 sb->s_flags |= MS_RDONLY;
1126 break; 1127 break;
1127 } 1128 }
1128 } else { 1129 } else {
1129 printk("ufs_read_super: fs needs fsck\n"); 1130 pr_err("%s(): fs needs fsck\n", __func__);
1130 sb->s_flags |= MS_RDONLY; 1131 sb->s_flags |= MS_RDONLY;
1131 } 1132 }
1132 1133
@@ -1299,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1299 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { 1300 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
1300 new_mount_opt |= ufstype; 1301 new_mount_opt |= ufstype;
1301 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { 1302 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1302 printk("ufstype can't be changed during remount\n"); 1303 pr_err("ufstype can't be changed during remount\n");
1303 unlock_ufs(sb); 1304 unlock_ufs(sb);
1304 return -EINVAL; 1305 return -EINVAL;
1305 } 1306 }
@@ -1328,8 +1329,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1328 * fs was mounted as ro, remounting rw 1329 * fs was mounted as ro, remounting rw
1329 */ 1330 */
1330#ifndef CONFIG_UFS_FS_WRITE 1331#ifndef CONFIG_UFS_FS_WRITE
1331 printk("ufs was compiled with read-only support, " 1332 pr_err("ufs was compiled with read-only support, can't be mounted as read-write\n");
1332 "can't be mounted as read-write\n");
1333 unlock_ufs(sb); 1333 unlock_ufs(sb);
1334 return -EINVAL; 1334 return -EINVAL;
1335#else 1335#else
@@ -1338,12 +1338,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1338 ufstype != UFS_MOUNT_UFSTYPE_44BSD && 1338 ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
1339 ufstype != UFS_MOUNT_UFSTYPE_SUNx86 && 1339 ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
1340 ufstype != UFS_MOUNT_UFSTYPE_UFS2) { 1340 ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
1341 printk("this ufstype is read-only supported\n"); 1341 pr_err("this ufstype is read-only supported\n");
1342 unlock_ufs(sb); 1342 unlock_ufs(sb);
1343 return -EINVAL; 1343 return -EINVAL;
1344 } 1344 }
1345 if (!ufs_read_cylinder_structures(sb)) { 1345 if (!ufs_read_cylinder_structures(sb)) {
1346 printk("failed during remounting\n"); 1346 pr_err("failed during remounting\n");
1347 unlock_ufs(sb); 1347 unlock_ufs(sb);
1348 return -EPERM; 1348 return -EPERM;
1349 } 1349 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 343e6fc571e5..2a07396d5f9e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -1,6 +1,12 @@
1#ifndef _UFS_UFS_H 1#ifndef _UFS_UFS_H
2#define _UFS_UFS_H 1 2#define _UFS_UFS_H 1
3 3
4#ifdef pr_fmt
5#undef pr_fmt
6#endif
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
4#define UFS_MAX_GROUP_LOADED 8 10#define UFS_MAX_GROUP_LOADED 8
5#define UFS_CGNO_EMPTY ((unsigned)-1) 11#define UFS_CGNO_EMPTY ((unsigned)-1)
6 12
@@ -71,9 +77,9 @@ struct ufs_inode_info {
71 */ 77 */
72#ifdef CONFIG_UFS_DEBUG 78#ifdef CONFIG_UFS_DEBUG
73# define UFSD(f, a...) { \ 79# define UFSD(f, a...) { \
74 printk ("UFSD (%s, %d): %s:", \ 80 pr_debug("UFSD (%s, %d): %s:", \
75 __FILE__, __LINE__, __func__); \ 81 __FILE__, __LINE__, __func__); \
76 printk (f, ## a); \ 82 pr_debug(f, ## a); \
77 } 83 }
78#else 84#else
79# define UFSD(f, a...) /**/ 85# define UFSD(f, a...) /**/
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 399e8cec6e60..5d47b4df61ea 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,6 +1,7 @@
1config XFS_FS 1config XFS_FS
2 tristate "XFS filesystem support" 2 tristate "XFS filesystem support"
3 depends on BLOCK 3 depends on BLOCK
4 depends on (64BIT || LBDAF)
4 select EXPORTFS 5 select EXPORTFS
5 select LIBCRC32C 6 select LIBCRC32C
6 help 7 help
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c21f43506661..d61799949580 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -17,6 +17,7 @@
17# 17#
18 18
19ccflags-y += -I$(src) # needed for trace events 19ccflags-y += -I$(src) # needed for trace events
20ccflags-y += -I$(src)/libxfs
20 21
21ccflags-$(CONFIG_XFS_DEBUG) += -g 22ccflags-$(CONFIG_XFS_DEBUG) += -g
22 23
@@ -25,6 +26,39 @@ obj-$(CONFIG_XFS_FS) += xfs.o
25# this one should be compiled first, as the tracing macros can easily blow up 26# this one should be compiled first, as the tracing macros can easily blow up
26xfs-y += xfs_trace.o 27xfs-y += xfs_trace.o
27 28
29# build the libxfs code first
30xfs-y += $(addprefix libxfs/, \
31 xfs_alloc.o \
32 xfs_alloc_btree.o \
33 xfs_attr.o \
34 xfs_attr_leaf.o \
35 xfs_attr_remote.o \
36 xfs_bmap.o \
37 xfs_bmap_btree.o \
38 xfs_btree.o \
39 xfs_da_btree.o \
40 xfs_da_format.o \
41 xfs_dir2.o \
42 xfs_dir2_block.o \
43 xfs_dir2_data.o \
44 xfs_dir2_leaf.o \
45 xfs_dir2_node.o \
46 xfs_dir2_sf.o \
47 xfs_dquot_buf.o \
48 xfs_ialloc.o \
49 xfs_ialloc_btree.o \
50 xfs_inode_fork.o \
51 xfs_inode_buf.o \
52 xfs_log_rlimit.o \
53 xfs_sb.o \
54 xfs_symlink_remote.o \
55 xfs_trans_resv.o \
56 )
57# xfs_rtbitmap is shared with libxfs
58xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
59 xfs_rtbitmap.o \
60 )
61
28# highlevel code 62# highlevel code
29xfs-y += xfs_aops.o \ 63xfs-y += xfs_aops.o \
30 xfs_attr_inactive.o \ 64 xfs_attr_inactive.o \
@@ -45,53 +79,27 @@ xfs-y += xfs_aops.o \
45 xfs_ioctl.o \ 79 xfs_ioctl.o \
46 xfs_iomap.o \ 80 xfs_iomap.o \
47 xfs_iops.o \ 81 xfs_iops.o \
82 xfs_inode.o \
48 xfs_itable.o \ 83 xfs_itable.o \
49 xfs_message.o \ 84 xfs_message.o \
50 xfs_mount.o \ 85 xfs_mount.o \
51 xfs_mru_cache.o \ 86 xfs_mru_cache.o \
52 xfs_super.o \ 87 xfs_super.o \
53 xfs_symlink.o \ 88 xfs_symlink.o \
89 xfs_sysfs.o \
54 xfs_trans.o \ 90 xfs_trans.o \
55 xfs_xattr.o \ 91 xfs_xattr.o \
56 kmem.o \ 92 kmem.o \
57 uuid.o 93 uuid.o
58 94
59# code shared with libxfs
60xfs-y += xfs_alloc.o \
61 xfs_alloc_btree.o \
62 xfs_attr.o \
63 xfs_attr_leaf.o \
64 xfs_attr_remote.o \
65 xfs_bmap.o \
66 xfs_bmap_btree.o \
67 xfs_btree.o \
68 xfs_da_btree.o \
69 xfs_da_format.o \
70 xfs_dir2.o \
71 xfs_dir2_block.o \
72 xfs_dir2_data.o \
73 xfs_dir2_leaf.o \
74 xfs_dir2_node.o \
75 xfs_dir2_sf.o \
76 xfs_dquot_buf.o \
77 xfs_ialloc.o \
78 xfs_ialloc_btree.o \
79 xfs_icreate_item.o \
80 xfs_inode.o \
81 xfs_inode_fork.o \
82 xfs_inode_buf.o \
83 xfs_log_recover.o \
84 xfs_log_rlimit.o \
85 xfs_sb.o \
86 xfs_symlink_remote.o \
87 xfs_trans_resv.o
88
89# low-level transaction/log code 95# low-level transaction/log code
90xfs-y += xfs_log.o \ 96xfs-y += xfs_log.o \
91 xfs_log_cil.o \ 97 xfs_log_cil.o \
92 xfs_buf_item.o \ 98 xfs_buf_item.o \
93 xfs_extfree_item.o \ 99 xfs_extfree_item.o \
100 xfs_icreate_item.o \
94 xfs_inode_item.o \ 101 xfs_inode_item.o \
102 xfs_log_recover.o \
95 xfs_trans_ail.o \ 103 xfs_trans_ail.o \
96 xfs_trans_buf.o \ 104 xfs_trans_buf.o \
97 xfs_trans_extfree.o \ 105 xfs_trans_extfree.o \
@@ -107,8 +115,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
107 xfs_quotaops.o 115 xfs_quotaops.o
108 116
109# xfs_rtbitmap is shared with libxfs 117# xfs_rtbitmap is shared with libxfs
110xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \ 118xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
111 xfs_rtbitmap.o
112 119
113xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 120xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
114xfs-$(CONFIG_PROC_FS) += xfs_stats.o 121xfs-$(CONFIG_PROC_FS) += xfs_stats.o
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 6e247a99f5db..6e247a99f5db 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index d43813267a80..4bffffe038a1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -483,9 +483,9 @@ xfs_agfl_read_verify(
483 return; 483 return;
484 484
485 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 485 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
486 xfs_buf_ioerror(bp, EFSBADCRC); 486 xfs_buf_ioerror(bp, -EFSBADCRC);
487 else if (!xfs_agfl_verify(bp)) 487 else if (!xfs_agfl_verify(bp))
488 xfs_buf_ioerror(bp, EFSCORRUPTED); 488 xfs_buf_ioerror(bp, -EFSCORRUPTED);
489 489
490 if (bp->b_error) 490 if (bp->b_error)
491 xfs_verifier_error(bp); 491 xfs_verifier_error(bp);
@@ -503,7 +503,7 @@ xfs_agfl_write_verify(
503 return; 503 return;
504 504
505 if (!xfs_agfl_verify(bp)) { 505 if (!xfs_agfl_verify(bp)) {
506 xfs_buf_ioerror(bp, EFSCORRUPTED); 506 xfs_buf_ioerror(bp, -EFSCORRUPTED);
507 xfs_verifier_error(bp); 507 xfs_verifier_error(bp);
508 return; 508 return;
509 } 509 }
@@ -559,7 +559,7 @@ xfs_alloc_update_counters(
559 xfs_trans_agblocks_delta(tp, len); 559 xfs_trans_agblocks_delta(tp, len);
560 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 560 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
561 be32_to_cpu(agf->agf_length))) 561 be32_to_cpu(agf->agf_length)))
562 return EFSCORRUPTED; 562 return -EFSCORRUPTED;
563 563
564 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 564 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
565 return 0; 565 return 0;
@@ -2234,11 +2234,11 @@ xfs_agf_read_verify(
2234 2234
2235 if (xfs_sb_version_hascrc(&mp->m_sb) && 2235 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2236 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2236 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2237 xfs_buf_ioerror(bp, EFSBADCRC); 2237 xfs_buf_ioerror(bp, -EFSBADCRC);
2238 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2238 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2239 XFS_ERRTAG_ALLOC_READ_AGF, 2239 XFS_ERRTAG_ALLOC_READ_AGF,
2240 XFS_RANDOM_ALLOC_READ_AGF)) 2240 XFS_RANDOM_ALLOC_READ_AGF))
2241 xfs_buf_ioerror(bp, EFSCORRUPTED); 2241 xfs_buf_ioerror(bp, -EFSCORRUPTED);
2242 2242
2243 if (bp->b_error) 2243 if (bp->b_error)
2244 xfs_verifier_error(bp); 2244 xfs_verifier_error(bp);
@@ -2252,7 +2252,7 @@ xfs_agf_write_verify(
2252 struct xfs_buf_log_item *bip = bp->b_fspriv; 2252 struct xfs_buf_log_item *bip = bp->b_fspriv;
2253 2253
2254 if (!xfs_agf_verify(mp, bp)) { 2254 if (!xfs_agf_verify(mp, bp)) {
2255 xfs_buf_ioerror(bp, EFSCORRUPTED); 2255 xfs_buf_ioerror(bp, -EFSCORRUPTED);
2256 xfs_verifier_error(bp); 2256 xfs_verifier_error(bp);
2257 return; 2257 return;
2258 } 2258 }
@@ -2601,11 +2601,11 @@ xfs_free_extent(
2601 */ 2601 */
2602 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2602 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2603 if (args.agno >= args.mp->m_sb.sb_agcount) 2603 if (args.agno >= args.mp->m_sb.sb_agcount)
2604 return EFSCORRUPTED; 2604 return -EFSCORRUPTED;
2605 2605
2606 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2606 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2607 if (args.agbno >= args.mp->m_sb.sb_agblocks) 2607 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2608 return EFSCORRUPTED; 2608 return -EFSCORRUPTED;
2609 2609
2610 args.pag = xfs_perag_get(args.mp, args.agno); 2610 args.pag = xfs_perag_get(args.mp, args.agno);
2611 ASSERT(args.pag); 2611 ASSERT(args.pag);
@@ -2617,7 +2617,7 @@ xfs_free_extent(
2617 /* validate the extent size is legal now we have the agf locked */ 2617 /* validate the extent size is legal now we have the agf locked */
2618 if (args.agbno + len > 2618 if (args.agbno + len >
2619 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { 2619 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2620 error = EFSCORRUPTED; 2620 error = -EFSCORRUPTED;
2621 goto error0; 2621 goto error0;
2622 } 2622 }
2623 2623
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index feacb061bab7..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 8358f1ded94d..e0e83e24d3ef 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -355,9 +355,9 @@ xfs_allocbt_read_verify(
355 struct xfs_buf *bp) 355 struct xfs_buf *bp)
356{ 356{
357 if (!xfs_btree_sblock_verify_crc(bp)) 357 if (!xfs_btree_sblock_verify_crc(bp))
358 xfs_buf_ioerror(bp, EFSBADCRC); 358 xfs_buf_ioerror(bp, -EFSBADCRC);
359 else if (!xfs_allocbt_verify(bp)) 359 else if (!xfs_allocbt_verify(bp))
360 xfs_buf_ioerror(bp, EFSCORRUPTED); 360 xfs_buf_ioerror(bp, -EFSCORRUPTED);
361 361
362 if (bp->b_error) { 362 if (bp->b_error) {
363 trace_xfs_btree_corrupt(bp, _RET_IP_); 363 trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -371,7 +371,7 @@ xfs_allocbt_write_verify(
371{ 371{
372 if (!xfs_allocbt_verify(bp)) { 372 if (!xfs_allocbt_verify(bp)) {
373 trace_xfs_btree_corrupt(bp, _RET_IP_); 373 trace_xfs_btree_corrupt(bp, _RET_IP_);
374 xfs_buf_ioerror(bp, EFSCORRUPTED); 374 xfs_buf_ioerror(bp, -EFSCORRUPTED);
375 xfs_verifier_error(bp); 375 xfs_verifier_error(bp);
376 return; 376 return;
377 } 377 }
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 45e189e7e81c..45e189e7e81c 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index bfe36fc2cdc2..353fb425faef 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -85,7 +85,7 @@ xfs_attr_args_init(
85{ 85{
86 86
87 if (!name) 87 if (!name)
88 return EINVAL; 88 return -EINVAL;
89 89
90 memset(args, 0, sizeof(*args)); 90 memset(args, 0, sizeof(*args));
91 args->geo = dp->i_mount->m_attr_geo; 91 args->geo = dp->i_mount->m_attr_geo;
@@ -95,7 +95,7 @@ xfs_attr_args_init(
95 args->name = name; 95 args->name = name;
96 args->namelen = strlen((const char *)name); 96 args->namelen = strlen((const char *)name);
97 if (args->namelen >= MAXNAMELEN) 97 if (args->namelen >= MAXNAMELEN)
98 return EFAULT; /* match IRIX behaviour */ 98 return -EFAULT; /* match IRIX behaviour */
99 99
100 args->hashval = xfs_da_hashname(args->name, args->namelen); 100 args->hashval = xfs_da_hashname(args->name, args->namelen);
101 return 0; 101 return 0;
@@ -131,10 +131,10 @@ xfs_attr_get(
131 XFS_STATS_INC(xs_attr_get); 131 XFS_STATS_INC(xs_attr_get);
132 132
133 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 133 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
134 return EIO; 134 return -EIO;
135 135
136 if (!xfs_inode_hasattr(ip)) 136 if (!xfs_inode_hasattr(ip))
137 return ENOATTR; 137 return -ENOATTR;
138 138
139 error = xfs_attr_args_init(&args, ip, name, flags); 139 error = xfs_attr_args_init(&args, ip, name, flags);
140 if (error) 140 if (error)
@@ -145,7 +145,7 @@ xfs_attr_get(
145 145
146 lock_mode = xfs_ilock_attr_map_shared(ip); 146 lock_mode = xfs_ilock_attr_map_shared(ip);
147 if (!xfs_inode_hasattr(ip)) 147 if (!xfs_inode_hasattr(ip))
148 error = ENOATTR; 148 error = -ENOATTR;
149 else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) 149 else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
150 error = xfs_attr_shortform_getvalue(&args); 150 error = xfs_attr_shortform_getvalue(&args);
151 else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) 151 else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
@@ -155,7 +155,7 @@ xfs_attr_get(
155 xfs_iunlock(ip, lock_mode); 155 xfs_iunlock(ip, lock_mode);
156 156
157 *valuelenp = args.valuelen; 157 *valuelenp = args.valuelen;
158 return error == EEXIST ? 0 : error; 158 return error == -EEXIST ? 0 : error;
159} 159}
160 160
161/* 161/*
@@ -213,7 +213,7 @@ xfs_attr_set(
213 XFS_STATS_INC(xs_attr_set); 213 XFS_STATS_INC(xs_attr_set);
214 214
215 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 215 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
216 return EIO; 216 return -EIO;
217 217
218 error = xfs_attr_args_init(&args, dp, name, flags); 218 error = xfs_attr_args_init(&args, dp, name, flags);
219 if (error) 219 if (error)
@@ -304,7 +304,7 @@ xfs_attr_set(
304 * the inode. 304 * the inode.
305 */ 305 */
306 error = xfs_attr_shortform_addname(&args); 306 error = xfs_attr_shortform_addname(&args);
307 if (error != ENOSPC) { 307 if (error != -ENOSPC) {
308 /* 308 /*
309 * Commit the shortform mods, and we're done. 309 * Commit the shortform mods, and we're done.
310 * NOTE: this is also the error path (EEXIST, etc). 310 * NOTE: this is also the error path (EEXIST, etc).
@@ -419,10 +419,10 @@ xfs_attr_remove(
419 XFS_STATS_INC(xs_attr_remove); 419 XFS_STATS_INC(xs_attr_remove);
420 420
421 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 421 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
422 return EIO; 422 return -EIO;
423 423
424 if (!xfs_inode_hasattr(dp)) 424 if (!xfs_inode_hasattr(dp))
425 return ENOATTR; 425 return -ENOATTR;
426 426
427 error = xfs_attr_args_init(&args, dp, name, flags); 427 error = xfs_attr_args_init(&args, dp, name, flags);
428 if (error) 428 if (error)
@@ -477,7 +477,7 @@ xfs_attr_remove(
477 xfs_trans_ijoin(args.trans, dp, 0); 477 xfs_trans_ijoin(args.trans, dp, 0);
478 478
479 if (!xfs_inode_hasattr(dp)) { 479 if (!xfs_inode_hasattr(dp)) {
480 error = XFS_ERROR(ENOATTR); 480 error = -ENOATTR;
481 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 481 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
482 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); 482 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
483 error = xfs_attr_shortform_remove(&args); 483 error = xfs_attr_shortform_remove(&args);
@@ -534,28 +534,28 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
534 trace_xfs_attr_sf_addname(args); 534 trace_xfs_attr_sf_addname(args);
535 535
536 retval = xfs_attr_shortform_lookup(args); 536 retval = xfs_attr_shortform_lookup(args);
537 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { 537 if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
538 return(retval); 538 return retval;
539 } else if (retval == EEXIST) { 539 } else if (retval == -EEXIST) {
540 if (args->flags & ATTR_CREATE) 540 if (args->flags & ATTR_CREATE)
541 return(retval); 541 return retval;
542 retval = xfs_attr_shortform_remove(args); 542 retval = xfs_attr_shortform_remove(args);
543 ASSERT(retval == 0); 543 ASSERT(retval == 0);
544 } 544 }
545 545
546 if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || 546 if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
547 args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX) 547 args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
548 return(XFS_ERROR(ENOSPC)); 548 return -ENOSPC;
549 549
550 newsize = XFS_ATTR_SF_TOTSIZE(args->dp); 550 newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
551 newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen); 551 newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
552 552
553 forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize); 553 forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
554 if (!forkoff) 554 if (!forkoff)
555 return(XFS_ERROR(ENOSPC)); 555 return -ENOSPC;
556 556
557 xfs_attr_shortform_add(args, forkoff); 557 xfs_attr_shortform_add(args, forkoff);
558 return(0); 558 return 0;
559} 559}
560 560
561 561
@@ -592,10 +592,10 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
592 * the given flags produce an error or call for an atomic rename. 592 * the given flags produce an error or call for an atomic rename.
593 */ 593 */
594 retval = xfs_attr3_leaf_lookup_int(bp, args); 594 retval = xfs_attr3_leaf_lookup_int(bp, args);
595 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { 595 if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
596 xfs_trans_brelse(args->trans, bp); 596 xfs_trans_brelse(args->trans, bp);
597 return retval; 597 return retval;
598 } else if (retval == EEXIST) { 598 } else if (retval == -EEXIST) {
599 if (args->flags & ATTR_CREATE) { /* pure create op */ 599 if (args->flags & ATTR_CREATE) { /* pure create op */
600 xfs_trans_brelse(args->trans, bp); 600 xfs_trans_brelse(args->trans, bp);
601 return retval; 601 return retval;
@@ -626,7 +626,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
626 * if required. 626 * if required.
627 */ 627 */
628 retval = xfs_attr3_leaf_add(bp, args); 628 retval = xfs_attr3_leaf_add(bp, args);
629 if (retval == ENOSPC) { 629 if (retval == -ENOSPC) {
630 /* 630 /*
631 * Promote the attribute list to the Btree format, then 631 * Promote the attribute list to the Btree format, then
632 * Commit that transaction so that the node_addname() call 632 * Commit that transaction so that the node_addname() call
@@ -642,7 +642,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
642 ASSERT(committed); 642 ASSERT(committed);
643 args->trans = NULL; 643 args->trans = NULL;
644 xfs_bmap_cancel(args->flist); 644 xfs_bmap_cancel(args->flist);
645 return(error); 645 return error;
646 } 646 }
647 647
648 /* 648 /*
@@ -658,13 +658,13 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
658 */ 658 */
659 error = xfs_trans_roll(&args->trans, dp); 659 error = xfs_trans_roll(&args->trans, dp);
660 if (error) 660 if (error)
661 return (error); 661 return error;
662 662
663 /* 663 /*
664 * Fob the whole rest of the problem off on the Btree code. 664 * Fob the whole rest of the problem off on the Btree code.
665 */ 665 */
666 error = xfs_attr_node_addname(args); 666 error = xfs_attr_node_addname(args);
667 return(error); 667 return error;
668 } 668 }
669 669
670 /* 670 /*
@@ -673,7 +673,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
673 */ 673 */
674 error = xfs_trans_roll(&args->trans, dp); 674 error = xfs_trans_roll(&args->trans, dp);
675 if (error) 675 if (error)
676 return (error); 676 return error;
677 677
678 /* 678 /*
679 * If there was an out-of-line value, allocate the blocks we 679 * If there was an out-of-line value, allocate the blocks we
@@ -684,7 +684,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
684 if (args->rmtblkno > 0) { 684 if (args->rmtblkno > 0) {
685 error = xfs_attr_rmtval_set(args); 685 error = xfs_attr_rmtval_set(args);
686 if (error) 686 if (error)
687 return(error); 687 return error;
688 } 688 }
689 689
690 /* 690 /*
@@ -700,7 +700,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
700 */ 700 */
701 error = xfs_attr3_leaf_flipflags(args); 701 error = xfs_attr3_leaf_flipflags(args);
702 if (error) 702 if (error)
703 return(error); 703 return error;
704 704
705 /* 705 /*
706 * Dismantle the "old" attribute/value pair by removing 706 * Dismantle the "old" attribute/value pair by removing
@@ -714,7 +714,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
714 if (args->rmtblkno) { 714 if (args->rmtblkno) {
715 error = xfs_attr_rmtval_remove(args); 715 error = xfs_attr_rmtval_remove(args);
716 if (error) 716 if (error)
717 return(error); 717 return error;
718 } 718 }
719 719
720 /* 720 /*
@@ -744,7 +744,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
744 ASSERT(committed); 744 ASSERT(committed);
745 args->trans = NULL; 745 args->trans = NULL;
746 xfs_bmap_cancel(args->flist); 746 xfs_bmap_cancel(args->flist);
747 return(error); 747 return error;
748 } 748 }
749 749
750 /* 750 /*
@@ -795,7 +795,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
795 return error; 795 return error;
796 796
797 error = xfs_attr3_leaf_lookup_int(bp, args); 797 error = xfs_attr3_leaf_lookup_int(bp, args);
798 if (error == ENOATTR) { 798 if (error == -ENOATTR) {
799 xfs_trans_brelse(args->trans, bp); 799 xfs_trans_brelse(args->trans, bp);
800 return error; 800 return error;
801 } 801 }
@@ -850,7 +850,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
850 return error; 850 return error;
851 851
852 error = xfs_attr3_leaf_lookup_int(bp, args); 852 error = xfs_attr3_leaf_lookup_int(bp, args);
853 if (error != EEXIST) { 853 if (error != -EEXIST) {
854 xfs_trans_brelse(args->trans, bp); 854 xfs_trans_brelse(args->trans, bp);
855 return error; 855 return error;
856 } 856 }
@@ -906,9 +906,9 @@ restart:
906 goto out; 906 goto out;
907 blk = &state->path.blk[ state->path.active-1 ]; 907 blk = &state->path.blk[ state->path.active-1 ];
908 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 908 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
909 if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) { 909 if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
910 goto out; 910 goto out;
911 } else if (retval == EEXIST) { 911 } else if (retval == -EEXIST) {
912 if (args->flags & ATTR_CREATE) 912 if (args->flags & ATTR_CREATE)
913 goto out; 913 goto out;
914 914
@@ -933,7 +933,7 @@ restart:
933 } 933 }
934 934
935 retval = xfs_attr3_leaf_add(blk->bp, state->args); 935 retval = xfs_attr3_leaf_add(blk->bp, state->args);
936 if (retval == ENOSPC) { 936 if (retval == -ENOSPC) {
937 if (state->path.active == 1) { 937 if (state->path.active == 1) {
938 /* 938 /*
939 * Its really a single leaf node, but it had 939 * Its really a single leaf node, but it had
@@ -1031,7 +1031,7 @@ restart:
1031 if (args->rmtblkno > 0) { 1031 if (args->rmtblkno > 0) {
1032 error = xfs_attr_rmtval_set(args); 1032 error = xfs_attr_rmtval_set(args);
1033 if (error) 1033 if (error)
1034 return(error); 1034 return error;
1035 } 1035 }
1036 1036
1037 /* 1037 /*
@@ -1061,7 +1061,7 @@ restart:
1061 if (args->rmtblkno) { 1061 if (args->rmtblkno) {
1062 error = xfs_attr_rmtval_remove(args); 1062 error = xfs_attr_rmtval_remove(args);
1063 if (error) 1063 if (error)
1064 return(error); 1064 return error;
1065 } 1065 }
1066 1066
1067 /* 1067 /*
@@ -1134,8 +1134,8 @@ out:
1134 if (state) 1134 if (state)
1135 xfs_da_state_free(state); 1135 xfs_da_state_free(state);
1136 if (error) 1136 if (error)
1137 return(error); 1137 return error;
1138 return(retval); 1138 return retval;
1139} 1139}
1140 1140
1141/* 1141/*
@@ -1168,7 +1168,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1168 * Search to see if name exists, and get back a pointer to it. 1168 * Search to see if name exists, and get back a pointer to it.
1169 */ 1169 */
1170 error = xfs_da3_node_lookup_int(state, &retval); 1170 error = xfs_da3_node_lookup_int(state, &retval);
1171 if (error || (retval != EEXIST)) { 1171 if (error || (retval != -EEXIST)) {
1172 if (error == 0) 1172 if (error == 0)
1173 error = retval; 1173 error = retval;
1174 goto out; 1174 goto out;
@@ -1297,7 +1297,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1297 1297
1298out: 1298out:
1299 xfs_da_state_free(state); 1299 xfs_da_state_free(state);
1300 return(error); 1300 return error;
1301} 1301}
1302 1302
1303/* 1303/*
@@ -1345,7 +1345,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
1345 } 1345 }
1346 } 1346 }
1347 1347
1348 return(0); 1348 return 0;
1349} 1349}
1350 1350
1351/* 1351/*
@@ -1376,7 +1376,7 @@ xfs_attr_refillstate(xfs_da_state_t *state)
1376 blk->blkno, blk->disk_blkno, 1376 blk->blkno, blk->disk_blkno,
1377 &blk->bp, XFS_ATTR_FORK); 1377 &blk->bp, XFS_ATTR_FORK);
1378 if (error) 1378 if (error)
1379 return(error); 1379 return error;
1380 } else { 1380 } else {
1381 blk->bp = NULL; 1381 blk->bp = NULL;
1382 } 1382 }
@@ -1395,13 +1395,13 @@ xfs_attr_refillstate(xfs_da_state_t *state)
1395 blk->blkno, blk->disk_blkno, 1395 blk->blkno, blk->disk_blkno,
1396 &blk->bp, XFS_ATTR_FORK); 1396 &blk->bp, XFS_ATTR_FORK);
1397 if (error) 1397 if (error)
1398 return(error); 1398 return error;
1399 } else { 1399 } else {
1400 blk->bp = NULL; 1400 blk->bp = NULL;
1401 } 1401 }
1402 } 1402 }
1403 1403
1404 return(0); 1404 return 0;
1405} 1405}
1406 1406
1407/* 1407/*
@@ -1431,7 +1431,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
1431 error = xfs_da3_node_lookup_int(state, &retval); 1431 error = xfs_da3_node_lookup_int(state, &retval);
1432 if (error) { 1432 if (error) {
1433 retval = error; 1433 retval = error;
1434 } else if (retval == EEXIST) { 1434 } else if (retval == -EEXIST) {
1435 blk = &state->path.blk[ state->path.active-1 ]; 1435 blk = &state->path.blk[ state->path.active-1 ];
1436 ASSERT(blk->bp != NULL); 1436 ASSERT(blk->bp != NULL);
1437 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); 1437 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1455,5 +1455,5 @@ xfs_attr_node_get(xfs_da_args_t *args)
1455 } 1455 }
1456 1456
1457 xfs_da_state_free(state); 1457 xfs_da_state_free(state);
1458 return(retval); 1458 return retval;
1459} 1459}
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 28712d29e43c..b1f73dbbf3d8 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -214,7 +214,7 @@ xfs_attr3_leaf_write_verify(
214 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 214 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
215 215
216 if (!xfs_attr3_leaf_verify(bp)) { 216 if (!xfs_attr3_leaf_verify(bp)) {
217 xfs_buf_ioerror(bp, EFSCORRUPTED); 217 xfs_buf_ioerror(bp, -EFSCORRUPTED);
218 xfs_verifier_error(bp); 218 xfs_verifier_error(bp);
219 return; 219 return;
220 } 220 }
@@ -242,9 +242,9 @@ xfs_attr3_leaf_read_verify(
242 242
243 if (xfs_sb_version_hascrc(&mp->m_sb) && 243 if (xfs_sb_version_hascrc(&mp->m_sb) &&
244 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) 244 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
245 xfs_buf_ioerror(bp, EFSBADCRC); 245 xfs_buf_ioerror(bp, -EFSBADCRC);
246 else if (!xfs_attr3_leaf_verify(bp)) 246 else if (!xfs_attr3_leaf_verify(bp))
247 xfs_buf_ioerror(bp, EFSCORRUPTED); 247 xfs_buf_ioerror(bp, -EFSCORRUPTED);
248 248
249 if (bp->b_error) 249 if (bp->b_error)
250 xfs_verifier_error(bp); 250 xfs_verifier_error(bp);
@@ -547,7 +547,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
547 break; 547 break;
548 } 548 }
549 if (i == end) 549 if (i == end)
550 return(XFS_ERROR(ENOATTR)); 550 return -ENOATTR;
551 551
552 /* 552 /*
553 * Fix up the attribute fork data, covering the hole 553 * Fix up the attribute fork data, covering the hole
@@ -582,7 +582,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
582 582
583 xfs_sbversion_add_attr2(mp, args->trans); 583 xfs_sbversion_add_attr2(mp, args->trans);
584 584
585 return(0); 585 return 0;
586} 586}
587 587
588/* 588/*
@@ -611,9 +611,9 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
611 continue; 611 continue;
612 if (!xfs_attr_namesp_match(args->flags, sfe->flags)) 612 if (!xfs_attr_namesp_match(args->flags, sfe->flags))
613 continue; 613 continue;
614 return(XFS_ERROR(EEXIST)); 614 return -EEXIST;
615 } 615 }
616 return(XFS_ERROR(ENOATTR)); 616 return -ENOATTR;
617} 617}
618 618
619/* 619/*
@@ -640,18 +640,18 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
640 continue; 640 continue;
641 if (args->flags & ATTR_KERNOVAL) { 641 if (args->flags & ATTR_KERNOVAL) {
642 args->valuelen = sfe->valuelen; 642 args->valuelen = sfe->valuelen;
643 return(XFS_ERROR(EEXIST)); 643 return -EEXIST;
644 } 644 }
645 if (args->valuelen < sfe->valuelen) { 645 if (args->valuelen < sfe->valuelen) {
646 args->valuelen = sfe->valuelen; 646 args->valuelen = sfe->valuelen;
647 return(XFS_ERROR(ERANGE)); 647 return -ERANGE;
648 } 648 }
649 args->valuelen = sfe->valuelen; 649 args->valuelen = sfe->valuelen;
650 memcpy(args->value, &sfe->nameval[args->namelen], 650 memcpy(args->value, &sfe->nameval[args->namelen],
651 args->valuelen); 651 args->valuelen);
652 return(XFS_ERROR(EEXIST)); 652 return -EEXIST;
653 } 653 }
654 return(XFS_ERROR(ENOATTR)); 654 return -ENOATTR;
655} 655}
656 656
657/* 657/*
@@ -691,7 +691,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
691 * If we hit an IO error middle of the transaction inside 691 * If we hit an IO error middle of the transaction inside
692 * grow_inode(), we may have inconsistent data. Bail out. 692 * grow_inode(), we may have inconsistent data. Bail out.
693 */ 693 */
694 if (error == EIO) 694 if (error == -EIO)
695 goto out; 695 goto out;
696 xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */ 696 xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
697 memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */ 697 memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
@@ -730,9 +730,9 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
730 sfe->namelen); 730 sfe->namelen);
731 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags); 731 nargs.flags = XFS_ATTR_NSP_ONDISK_TO_ARGS(sfe->flags);
732 error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ 732 error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
733 ASSERT(error == ENOATTR); 733 ASSERT(error == -ENOATTR);
734 error = xfs_attr3_leaf_add(bp, &nargs); 734 error = xfs_attr3_leaf_add(bp, &nargs);
735 ASSERT(error != ENOSPC); 735 ASSERT(error != -ENOSPC);
736 if (error) 736 if (error)
737 goto out; 737 goto out;
738 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 738 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
@@ -741,7 +741,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
741 741
742out: 742out:
743 kmem_free(tmpbuffer); 743 kmem_free(tmpbuffer);
744 return(error); 744 return error;
745} 745}
746 746
747/* 747/*
@@ -769,12 +769,12 @@ xfs_attr_shortform_allfit(
769 if (entry->flags & XFS_ATTR_INCOMPLETE) 769 if (entry->flags & XFS_ATTR_INCOMPLETE)
770 continue; /* don't copy partial entries */ 770 continue; /* don't copy partial entries */
771 if (!(entry->flags & XFS_ATTR_LOCAL)) 771 if (!(entry->flags & XFS_ATTR_LOCAL))
772 return(0); 772 return 0;
773 name_loc = xfs_attr3_leaf_name_local(leaf, i); 773 name_loc = xfs_attr3_leaf_name_local(leaf, i);
774 if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) 774 if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX)
775 return(0); 775 return 0;
776 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) 776 if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX)
777 return(0); 777 return 0;
778 bytes += sizeof(struct xfs_attr_sf_entry) - 1 778 bytes += sizeof(struct xfs_attr_sf_entry) - 1
779 + name_loc->namelen 779 + name_loc->namelen
780 + be16_to_cpu(name_loc->valuelen); 780 + be16_to_cpu(name_loc->valuelen);
@@ -809,7 +809,7 @@ xfs_attr3_leaf_to_shortform(
809 809
810 tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); 810 tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
811 if (!tmpbuffer) 811 if (!tmpbuffer)
812 return ENOMEM; 812 return -ENOMEM;
813 813
814 memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); 814 memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
815 815
@@ -1017,10 +1017,10 @@ xfs_attr3_leaf_split(
1017 ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC); 1017 ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
1018 error = xfs_da_grow_inode(state->args, &blkno); 1018 error = xfs_da_grow_inode(state->args, &blkno);
1019 if (error) 1019 if (error)
1020 return(error); 1020 return error;
1021 error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp); 1021 error = xfs_attr3_leaf_create(state->args, blkno, &newblk->bp);
1022 if (error) 1022 if (error)
1023 return(error); 1023 return error;
1024 newblk->blkno = blkno; 1024 newblk->blkno = blkno;
1025 newblk->magic = XFS_ATTR_LEAF_MAGIC; 1025 newblk->magic = XFS_ATTR_LEAF_MAGIC;
1026 1026
@@ -1031,7 +1031,7 @@ xfs_attr3_leaf_split(
1031 xfs_attr3_leaf_rebalance(state, oldblk, newblk); 1031 xfs_attr3_leaf_rebalance(state, oldblk, newblk);
1032 error = xfs_da3_blk_link(state, oldblk, newblk); 1032 error = xfs_da3_blk_link(state, oldblk, newblk);
1033 if (error) 1033 if (error)
1034 return(error); 1034 return error;
1035 1035
1036 /* 1036 /*
1037 * Save info on "old" attribute for "atomic rename" ops, leaf_add() 1037 * Save info on "old" attribute for "atomic rename" ops, leaf_add()
@@ -1053,7 +1053,7 @@ xfs_attr3_leaf_split(
1053 */ 1053 */
1054 oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); 1054 oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
1055 newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); 1055 newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
1056 return(error); 1056 return error;
1057} 1057}
1058 1058
1059/* 1059/*
@@ -1108,7 +1108,7 @@ xfs_attr3_leaf_add(
1108 * no good and we should just give up. 1108 * no good and we should just give up.
1109 */ 1109 */
1110 if (!ichdr.holes && sum < entsize) 1110 if (!ichdr.holes && sum < entsize)
1111 return XFS_ERROR(ENOSPC); 1111 return -ENOSPC;
1112 1112
1113 /* 1113 /*
1114 * Compact the entries to coalesce free space. 1114 * Compact the entries to coalesce free space.
@@ -1121,7 +1121,7 @@ xfs_attr3_leaf_add(
1121 * free region, in freemap[0]. If it is not big enough, give up. 1121 * free region, in freemap[0]. If it is not big enough, give up.
1122 */ 1122 */
1123 if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { 1123 if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
1124 tmp = ENOSPC; 1124 tmp = -ENOSPC;
1125 goto out_log_hdr; 1125 goto out_log_hdr;
1126 } 1126 }
1127 1127
@@ -1692,7 +1692,7 @@ xfs_attr3_leaf_toosmall(
1692 ichdr.usedbytes; 1692 ichdr.usedbytes;
1693 if (bytes > (state->args->geo->blksize >> 1)) { 1693 if (bytes > (state->args->geo->blksize >> 1)) {
1694 *action = 0; /* blk over 50%, don't try to join */ 1694 *action = 0; /* blk over 50%, don't try to join */
1695 return(0); 1695 return 0;
1696 } 1696 }
1697 1697
1698 /* 1698 /*
@@ -1711,7 +1711,7 @@ xfs_attr3_leaf_toosmall(
1711 error = xfs_da3_path_shift(state, &state->altpath, forward, 1711 error = xfs_da3_path_shift(state, &state->altpath, forward,
1712 0, &retval); 1712 0, &retval);
1713 if (error) 1713 if (error)
1714 return(error); 1714 return error;
1715 if (retval) { 1715 if (retval) {
1716 *action = 0; 1716 *action = 0;
1717 } else { 1717 } else {
@@ -1740,7 +1740,7 @@ xfs_attr3_leaf_toosmall(
1740 error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, 1740 error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
1741 blkno, -1, &bp); 1741 blkno, -1, &bp);
1742 if (error) 1742 if (error)
1743 return(error); 1743 return error;
1744 1744
1745 xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); 1745 xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
1746 1746
@@ -1757,7 +1757,7 @@ xfs_attr3_leaf_toosmall(
1757 } 1757 }
1758 if (i >= 2) { 1758 if (i >= 2) {
1759 *action = 0; 1759 *action = 0;
1760 return(0); 1760 return 0;
1761 } 1761 }
1762 1762
1763 /* 1763 /*
@@ -1773,13 +1773,13 @@ xfs_attr3_leaf_toosmall(
1773 0, &retval); 1773 0, &retval);
1774 } 1774 }
1775 if (error) 1775 if (error)
1776 return(error); 1776 return error;
1777 if (retval) { 1777 if (retval) {
1778 *action = 0; 1778 *action = 0;
1779 } else { 1779 } else {
1780 *action = 1; 1780 *action = 1;
1781 } 1781 }
1782 return(0); 1782 return 0;
1783} 1783}
1784 1784
1785/* 1785/*
@@ -2123,7 +2123,7 @@ xfs_attr3_leaf_lookup_int(
2123 } 2123 }
2124 if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) { 2124 if (probe == ichdr.count || be32_to_cpu(entry->hashval) != hashval) {
2125 args->index = probe; 2125 args->index = probe;
2126 return XFS_ERROR(ENOATTR); 2126 return -ENOATTR;
2127 } 2127 }
2128 2128
2129 /* 2129 /*
@@ -2152,7 +2152,7 @@ xfs_attr3_leaf_lookup_int(
2152 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2152 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2153 continue; 2153 continue;
2154 args->index = probe; 2154 args->index = probe;
2155 return XFS_ERROR(EEXIST); 2155 return -EEXIST;
2156 } else { 2156 } else {
2157 name_rmt = xfs_attr3_leaf_name_remote(leaf, probe); 2157 name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
2158 if (name_rmt->namelen != args->namelen) 2158 if (name_rmt->namelen != args->namelen)
@@ -2168,11 +2168,11 @@ xfs_attr3_leaf_lookup_int(
2168 args->rmtblkcnt = xfs_attr3_rmt_blocks( 2168 args->rmtblkcnt = xfs_attr3_rmt_blocks(
2169 args->dp->i_mount, 2169 args->dp->i_mount,
2170 args->rmtvaluelen); 2170 args->rmtvaluelen);
2171 return XFS_ERROR(EEXIST); 2171 return -EEXIST;
2172 } 2172 }
2173 } 2173 }
2174 args->index = probe; 2174 args->index = probe;
2175 return XFS_ERROR(ENOATTR); 2175 return -ENOATTR;
2176} 2176}
2177 2177
2178/* 2178/*
@@ -2208,7 +2208,7 @@ xfs_attr3_leaf_getvalue(
2208 } 2208 }
2209 if (args->valuelen < valuelen) { 2209 if (args->valuelen < valuelen) {
2210 args->valuelen = valuelen; 2210 args->valuelen = valuelen;
2211 return XFS_ERROR(ERANGE); 2211 return -ERANGE;
2212 } 2212 }
2213 args->valuelen = valuelen; 2213 args->valuelen = valuelen;
2214 memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); 2214 memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
@@ -2226,7 +2226,7 @@ xfs_attr3_leaf_getvalue(
2226 } 2226 }
2227 if (args->valuelen < args->rmtvaluelen) { 2227 if (args->valuelen < args->rmtvaluelen) {
2228 args->valuelen = args->rmtvaluelen; 2228 args->valuelen = args->rmtvaluelen;
2229 return XFS_ERROR(ERANGE); 2229 return -ERANGE;
2230 } 2230 }
2231 args->valuelen = args->rmtvaluelen; 2231 args->valuelen = args->rmtvaluelen;
2232 } 2232 }
@@ -2481,7 +2481,7 @@ xfs_attr3_leaf_clearflag(
2481 */ 2481 */
2482 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 2482 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
2483 if (error) 2483 if (error)
2484 return(error); 2484 return error;
2485 2485
2486 leaf = bp->b_addr; 2486 leaf = bp->b_addr;
2487 entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; 2487 entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
@@ -2548,7 +2548,7 @@ xfs_attr3_leaf_setflag(
2548 */ 2548 */
2549 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); 2549 error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
2550 if (error) 2550 if (error)
2551 return(error); 2551 return error;
2552 2552
2553 leaf = bp->b_addr; 2553 leaf = bp->b_addr;
2554#ifdef DEBUG 2554#ifdef DEBUG
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index e2929da7c3ba..e2929da7c3ba 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index b5adfecbb8ee..7510ab8058a4 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -138,11 +138,11 @@ xfs_attr3_rmt_read_verify(
138 138
139 while (len > 0) { 139 while (len > 0) {
140 if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 140 if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
141 xfs_buf_ioerror(bp, EFSBADCRC); 141 xfs_buf_ioerror(bp, -EFSBADCRC);
142 break; 142 break;
143 } 143 }
144 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 144 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
145 xfs_buf_ioerror(bp, EFSCORRUPTED); 145 xfs_buf_ioerror(bp, -EFSCORRUPTED);
146 break; 146 break;
147 } 147 }
148 len -= blksize; 148 len -= blksize;
@@ -178,7 +178,7 @@ xfs_attr3_rmt_write_verify(
178 178
179 while (len > 0) { 179 while (len > 0) {
180 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 180 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
181 xfs_buf_ioerror(bp, EFSCORRUPTED); 181 xfs_buf_ioerror(bp, -EFSCORRUPTED);
182 xfs_verifier_error(bp); 182 xfs_verifier_error(bp);
183 return; 183 return;
184 } 184 }
@@ -257,7 +257,7 @@ xfs_attr_rmtval_copyout(
257 xfs_alert(mp, 257 xfs_alert(mp,
258"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 258"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
259 bno, *offset, byte_cnt, ino); 259 bno, *offset, byte_cnt, ino);
260 return EFSCORRUPTED; 260 return -EFSCORRUPTED;
261 } 261 }
262 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 262 hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
263 } 263 }
@@ -452,7 +452,7 @@ xfs_attr_rmtval_set(
452 ASSERT(committed); 452 ASSERT(committed);
453 args->trans = NULL; 453 args->trans = NULL;
454 xfs_bmap_cancel(args->flist); 454 xfs_bmap_cancel(args->flist);
455 return(error); 455 return error;
456 } 456 }
457 457
458 /* 458 /*
@@ -473,7 +473,7 @@ xfs_attr_rmtval_set(
473 */ 473 */
474 error = xfs_trans_roll(&args->trans, dp); 474 error = xfs_trans_roll(&args->trans, dp);
475 if (error) 475 if (error)
476 return (error); 476 return error;
477 } 477 }
478 478
479 /* 479 /*
@@ -498,7 +498,7 @@ xfs_attr_rmtval_set(
498 blkcnt, &map, &nmap, 498 blkcnt, &map, &nmap,
499 XFS_BMAPI_ATTRFORK); 499 XFS_BMAPI_ATTRFORK);
500 if (error) 500 if (error)
501 return(error); 501 return error;
502 ASSERT(nmap == 1); 502 ASSERT(nmap == 1);
503 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 503 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
504 (map.br_startblock != HOLESTARTBLOCK)); 504 (map.br_startblock != HOLESTARTBLOCK));
@@ -508,7 +508,7 @@ xfs_attr_rmtval_set(
508 508
509 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); 509 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
510 if (!bp) 510 if (!bp)
511 return ENOMEM; 511 return -ENOMEM;
512 bp->b_ops = &xfs_attr3_rmt_buf_ops; 512 bp->b_ops = &xfs_attr3_rmt_buf_ops;
513 513
514 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 514 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
@@ -563,7 +563,7 @@ xfs_attr_rmtval_remove(
563 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 563 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
564 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 564 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
565 if (error) 565 if (error)
566 return(error); 566 return error;
567 ASSERT(nmap == 1); 567 ASSERT(nmap == 1);
568 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 568 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
569 (map.br_startblock != HOLESTARTBLOCK)); 569 (map.br_startblock != HOLESTARTBLOCK));
@@ -622,7 +622,7 @@ xfs_attr_rmtval_remove(
622 */ 622 */
623 error = xfs_trans_roll(&args->trans, args->dp); 623 error = xfs_trans_roll(&args->trans, args->dp);
624 if (error) 624 if (error)
625 return (error); 625 return error;
626 } 626 }
627 return(0); 627 return 0;
628} 628}
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 5a9acfa156d7..5a9acfa156d7 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 919756e3ba53..919756e3ba53 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/libxfs/xfs_bit.h
index e1649c0d3e02..e1649c0d3e02 100644
--- a/fs/xfs/xfs_bit.h
+++ b/fs/xfs/libxfs/xfs_bit.h
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 75c3fe5f3d9d..de2d26d32844 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -392,7 +392,7 @@ xfs_bmap_check_leaf_extents(
392 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 392 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
393 bno = be64_to_cpu(*pp); 393 bno = be64_to_cpu(*pp);
394 394
395 ASSERT(bno != NULLDFSBNO); 395 ASSERT(bno != NULLFSBLOCK);
396 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 396 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
397 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 397 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
398 398
@@ -1033,7 +1033,7 @@ xfs_bmap_add_attrfork_btree(
1033 goto error0; 1033 goto error0;
1034 if (stat == 0) { 1034 if (stat == 0) {
1035 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1035 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1036 return XFS_ERROR(ENOSPC); 1036 return -ENOSPC;
1037 } 1037 }
1038 *firstblock = cur->bc_private.b.firstblock; 1038 *firstblock = cur->bc_private.b.firstblock;
1039 cur->bc_private.b.allocated = 0; 1039 cur->bc_private.b.allocated = 0;
@@ -1115,7 +1115,7 @@ xfs_bmap_add_attrfork_local(
1115 1115
1116 /* should only be called for types that support local format data */ 1116 /* should only be called for types that support local format data */
1117 ASSERT(0); 1117 ASSERT(0);
1118 return EFSCORRUPTED; 1118 return -EFSCORRUPTED;
1119} 1119}
1120 1120
1121/* 1121/*
@@ -1192,7 +1192,7 @@ xfs_bmap_add_attrfork(
1192 break; 1192 break;
1193 default: 1193 default:
1194 ASSERT(0); 1194 ASSERT(0);
1195 error = XFS_ERROR(EINVAL); 1195 error = -EINVAL;
1196 goto trans_cancel; 1196 goto trans_cancel;
1197 } 1197 }
1198 1198
@@ -1299,7 +1299,7 @@ xfs_bmap_read_extents(
1299 ASSERT(level > 0); 1299 ASSERT(level > 0);
1300 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 1300 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1301 bno = be64_to_cpu(*pp); 1301 bno = be64_to_cpu(*pp);
1302 ASSERT(bno != NULLDFSBNO); 1302 ASSERT(bno != NULLFSBLOCK);
1303 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 1303 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1304 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 1304 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1305 /* 1305 /*
@@ -1399,7 +1399,7 @@ xfs_bmap_read_extents(
1399 return 0; 1399 return 0;
1400error0: 1400error0:
1401 xfs_trans_brelse(tp, bp); 1401 xfs_trans_brelse(tp, bp);
1402 return XFS_ERROR(EFSCORRUPTED); 1402 return -EFSCORRUPTED;
1403} 1403}
1404 1404
1405 1405
@@ -1429,11 +1429,7 @@ xfs_bmap_search_multi_extents(
1429 gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL; 1429 gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1430 gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL; 1430 gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1431 gotp->br_state = XFS_EXT_INVALID; 1431 gotp->br_state = XFS_EXT_INVALID;
1432#if XFS_BIG_BLKNOS
1433 gotp->br_startblock = 0xffffa5a5a5a5a5a5LL; 1432 gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1434#else
1435 gotp->br_startblock = 0xffffa5a5;
1436#endif
1437 prevp->br_startoff = NULLFILEOFF; 1433 prevp->br_startoff = NULLFILEOFF;
1438 1434
1439 ep = xfs_iext_bno_to_ext(ifp, bno, &lastx); 1435 ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
@@ -1576,7 +1572,7 @@ xfs_bmap_last_before(
1576 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && 1572 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1577 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 1573 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1578 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) 1574 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1579 return XFS_ERROR(EIO); 1575 return -EIO;
1580 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1576 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1581 *last_block = 0; 1577 *last_block = 0;
1582 return 0; 1578 return 0;
@@ -1690,7 +1686,7 @@ xfs_bmap_last_offset(
1690 1686
1691 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && 1687 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1692 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 1688 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1693 return XFS_ERROR(EIO); 1689 return -EIO;
1694 1690
1695 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); 1691 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1696 if (error || is_empty) 1692 if (error || is_empty)
@@ -3323,7 +3319,7 @@ xfs_bmap_extsize_align(
3323 if (orig_off < align_off || 3319 if (orig_off < align_off ||
3324 orig_end > align_off + align_alen || 3320 orig_end > align_off + align_alen ||
3325 align_alen - temp < orig_alen) 3321 align_alen - temp < orig_alen)
3326 return XFS_ERROR(EINVAL); 3322 return -EINVAL;
3327 /* 3323 /*
3328 * Try to fix it by moving the start up. 3324 * Try to fix it by moving the start up.
3329 */ 3325 */
@@ -3348,7 +3344,7 @@ xfs_bmap_extsize_align(
3348 * Result doesn't cover the request, fail it. 3344 * Result doesn't cover the request, fail it.
3349 */ 3345 */
3350 if (orig_off < align_off || orig_end > align_off + align_alen) 3346 if (orig_off < align_off || orig_end > align_off + align_alen)
3351 return XFS_ERROR(EINVAL); 3347 return -EINVAL;
3352 } else { 3348 } else {
3353 ASSERT(orig_off >= align_off); 3349 ASSERT(orig_off >= align_off);
3354 ASSERT(orig_end <= align_off + align_alen); 3350 ASSERT(orig_end <= align_off + align_alen);
@@ -4051,11 +4047,11 @@ xfs_bmapi_read(
4051 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4047 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4052 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { 4048 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4053 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); 4049 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4054 return XFS_ERROR(EFSCORRUPTED); 4050 return -EFSCORRUPTED;
4055 } 4051 }
4056 4052
4057 if (XFS_FORCED_SHUTDOWN(mp)) 4053 if (XFS_FORCED_SHUTDOWN(mp))
4058 return XFS_ERROR(EIO); 4054 return -EIO;
4059 4055
4060 XFS_STATS_INC(xs_blk_mapr); 4056 XFS_STATS_INC(xs_blk_mapr);
4061 4057
@@ -4246,11 +4242,11 @@ xfs_bmapi_delay(
4246 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE), 4242 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4247 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { 4243 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4248 XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp); 4244 XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4249 return XFS_ERROR(EFSCORRUPTED); 4245 return -EFSCORRUPTED;
4250 } 4246 }
4251 4247
4252 if (XFS_FORCED_SHUTDOWN(mp)) 4248 if (XFS_FORCED_SHUTDOWN(mp))
4253 return XFS_ERROR(EIO); 4249 return -EIO;
4254 4250
4255 XFS_STATS_INC(xs_blk_mapw); 4251 XFS_STATS_INC(xs_blk_mapw);
4256 4252
@@ -4469,7 +4465,7 @@ xfs_bmapi_convert_unwritten(
4469 * so generate another request. 4465 * so generate another request.
4470 */ 4466 */
4471 if (mval->br_blockcount < len) 4467 if (mval->br_blockcount < len)
4472 return EAGAIN; 4468 return -EAGAIN;
4473 return 0; 4469 return 0;
4474} 4470}
4475 4471
@@ -4540,11 +4536,11 @@ xfs_bmapi_write(
4540 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4536 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4541 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { 4537 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4542 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); 4538 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4543 return XFS_ERROR(EFSCORRUPTED); 4539 return -EFSCORRUPTED;
4544 } 4540 }
4545 4541
4546 if (XFS_FORCED_SHUTDOWN(mp)) 4542 if (XFS_FORCED_SHUTDOWN(mp))
4547 return XFS_ERROR(EIO); 4543 return -EIO;
4548 4544
4549 ifp = XFS_IFORK_PTR(ip, whichfork); 4545 ifp = XFS_IFORK_PTR(ip, whichfork);
4550 4546
@@ -4620,7 +4616,7 @@ xfs_bmapi_write(
4620 4616
4621 /* Execute unwritten extent conversion if necessary */ 4617 /* Execute unwritten extent conversion if necessary */
4622 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); 4618 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4623 if (error == EAGAIN) 4619 if (error == -EAGAIN)
4624 continue; 4620 continue;
4625 if (error) 4621 if (error)
4626 goto error0; 4622 goto error0;
@@ -4922,7 +4918,7 @@ xfs_bmap_del_extent(
4922 goto done; 4918 goto done;
4923 cur->bc_rec.b = new; 4919 cur->bc_rec.b = new;
4924 error = xfs_btree_insert(cur, &i); 4920 error = xfs_btree_insert(cur, &i);
4925 if (error && error != ENOSPC) 4921 if (error && error != -ENOSPC)
4926 goto done; 4922 goto done;
4927 /* 4923 /*
4928 * If get no-space back from btree insert, 4924 * If get no-space back from btree insert,
@@ -4930,7 +4926,7 @@ xfs_bmap_del_extent(
4930 * block reservation. 4926 * block reservation.
4931 * Fix up our state and return the error. 4927 * Fix up our state and return the error.
4932 */ 4928 */
4933 if (error == ENOSPC) { 4929 if (error == -ENOSPC) {
4934 /* 4930 /*
4935 * Reset the cursor, don't trust 4931 * Reset the cursor, don't trust
4936 * it after any insert operation. 4932 * it after any insert operation.
@@ -4958,7 +4954,7 @@ xfs_bmap_del_extent(
4958 xfs_bmbt_set_blockcount(ep, 4954 xfs_bmbt_set_blockcount(ep,
4959 got.br_blockcount); 4955 got.br_blockcount);
4960 flags = 0; 4956 flags = 0;
4961 error = XFS_ERROR(ENOSPC); 4957 error = -ENOSPC;
4962 goto done; 4958 goto done;
4963 } 4959 }
4964 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4960 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
@@ -5076,11 +5072,11 @@ xfs_bunmapi(
5076 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 5072 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5077 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, 5073 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5078 ip->i_mount); 5074 ip->i_mount);
5079 return XFS_ERROR(EFSCORRUPTED); 5075 return -EFSCORRUPTED;
5080 } 5076 }
5081 mp = ip->i_mount; 5077 mp = ip->i_mount;
5082 if (XFS_FORCED_SHUTDOWN(mp)) 5078 if (XFS_FORCED_SHUTDOWN(mp))
5083 return XFS_ERROR(EIO); 5079 return -EIO;
5084 5080
5085 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5081 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5086 ASSERT(len > 0); 5082 ASSERT(len > 0);
@@ -5325,7 +5321,7 @@ xfs_bunmapi(
5325 del.br_startoff > got.br_startoff && 5321 del.br_startoff > got.br_startoff &&
5326 del.br_startoff + del.br_blockcount < 5322 del.br_startoff + del.br_blockcount <
5327 got.br_startoff + got.br_blockcount) { 5323 got.br_startoff + got.br_blockcount) {
5328 error = XFS_ERROR(ENOSPC); 5324 error = -ENOSPC;
5329 goto error0; 5325 goto error0;
5330 } 5326 }
5331 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, 5327 error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
@@ -5449,11 +5445,11 @@ xfs_bmap_shift_extents(
5449 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { 5445 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5450 XFS_ERROR_REPORT("xfs_bmap_shift_extents", 5446 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5451 XFS_ERRLEVEL_LOW, mp); 5447 XFS_ERRLEVEL_LOW, mp);
5452 return XFS_ERROR(EFSCORRUPTED); 5448 return -EFSCORRUPTED;
5453 } 5449 }
5454 5450
5455 if (XFS_FORCED_SHUTDOWN(mp)) 5451 if (XFS_FORCED_SHUTDOWN(mp))
5456 return XFS_ERROR(EIO); 5452 return -EIO;
5457 5453
5458 ASSERT(current_ext != NULL); 5454 ASSERT(current_ext != NULL);
5459 5455
@@ -5516,14 +5512,14 @@ xfs_bmap_shift_extents(
5516 *current_ext - 1), &left); 5512 *current_ext - 1), &left);
5517 5513
5518 if (startoff < left.br_startoff + left.br_blockcount) 5514 if (startoff < left.br_startoff + left.br_blockcount)
5519 error = XFS_ERROR(EINVAL); 5515 error = -EINVAL;
5520 } else if (offset_shift_fsb > got.br_startoff) { 5516 } else if (offset_shift_fsb > got.br_startoff) {
5521 /* 5517 /*
5522 * When first extent is shifted, offset_shift_fsb 5518 * When first extent is shifted, offset_shift_fsb
5523 * should be less than the stating offset of 5519 * should be less than the stating offset of
5524 * the first extent. 5520 * the first extent.
5525 */ 5521 */
5526 error = XFS_ERROR(EINVAL); 5522 error = -EINVAL;
5527 } 5523 }
5528 5524
5529 if (error) 5525 if (error)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b879ca56a64c..b879ca56a64c 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 948836c4fd90..fba753308f31 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -111,23 +111,8 @@ __xfs_bmbt_get_all(
111 ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); 111 ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN));
112 s->br_startoff = ((xfs_fileoff_t)l0 & 112 s->br_startoff = ((xfs_fileoff_t)l0 &
113 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; 113 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
114#if XFS_BIG_BLKNOS
115 s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | 114 s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) |
116 (((xfs_fsblock_t)l1) >> 21); 115 (((xfs_fsblock_t)l1) >> 21);
117#else
118#ifdef DEBUG
119 {
120 xfs_dfsbno_t b;
121
122 b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) |
123 (((xfs_dfsbno_t)l1) >> 21);
124 ASSERT((b >> 32) == 0 || isnulldstartblock(b));
125 s->br_startblock = (xfs_fsblock_t)b;
126 }
127#else /* !DEBUG */
128 s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21);
129#endif /* DEBUG */
130#endif /* XFS_BIG_BLKNOS */
131 s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); 116 s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21));
132 /* This is xfs_extent_state() in-line */ 117 /* This is xfs_extent_state() in-line */
133 if (ext_flag) { 118 if (ext_flag) {
@@ -163,21 +148,8 @@ xfs_fsblock_t
163xfs_bmbt_get_startblock( 148xfs_bmbt_get_startblock(
164 xfs_bmbt_rec_host_t *r) 149 xfs_bmbt_rec_host_t *r)
165{ 150{
166#if XFS_BIG_BLKNOS
167 return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | 151 return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) |
168 (((xfs_fsblock_t)r->l1) >> 21); 152 (((xfs_fsblock_t)r->l1) >> 21);
169#else
170#ifdef DEBUG
171 xfs_dfsbno_t b;
172
173 b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) |
174 (((xfs_dfsbno_t)r->l1) >> 21);
175 ASSERT((b >> 32) == 0 || isnulldstartblock(b));
176 return (xfs_fsblock_t)b;
177#else /* !DEBUG */
178 return (xfs_fsblock_t)(((xfs_dfsbno_t)r->l1) >> 21);
179#endif /* DEBUG */
180#endif /* XFS_BIG_BLKNOS */
181} 153}
182 154
183/* 155/*
@@ -241,7 +213,6 @@ xfs_bmbt_set_allf(
241 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); 213 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
242 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); 214 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
243 215
244#if XFS_BIG_BLKNOS
245 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); 216 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
246 217
247 r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | 218 r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
@@ -250,23 +221,6 @@ xfs_bmbt_set_allf(
250 r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | 221 r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
251 ((xfs_bmbt_rec_base_t)blockcount & 222 ((xfs_bmbt_rec_base_t)blockcount &
252 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); 223 (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
253#else /* !XFS_BIG_BLKNOS */
254 if (isnullstartblock(startblock)) {
255 r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
256 ((xfs_bmbt_rec_base_t)startoff << 9) |
257 (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
258 r->l1 = xfs_mask64hi(11) |
259 ((xfs_bmbt_rec_base_t)startblock << 21) |
260 ((xfs_bmbt_rec_base_t)blockcount &
261 (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
262 } else {
263 r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) |
264 ((xfs_bmbt_rec_base_t)startoff << 9);
265 r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) |
266 ((xfs_bmbt_rec_base_t)blockcount &
267 (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
268 }
269#endif /* XFS_BIG_BLKNOS */
270} 224}
271 225
272/* 226/*
@@ -298,8 +252,6 @@ xfs_bmbt_disk_set_allf(
298 ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); 252 ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN);
299 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); 253 ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0);
300 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); 254 ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0);
301
302#if XFS_BIG_BLKNOS
303 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); 255 ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0);
304 256
305 r->l0 = cpu_to_be64( 257 r->l0 = cpu_to_be64(
@@ -310,26 +262,6 @@ xfs_bmbt_disk_set_allf(
310 ((xfs_bmbt_rec_base_t)startblock << 21) | 262 ((xfs_bmbt_rec_base_t)startblock << 21) |
311 ((xfs_bmbt_rec_base_t)blockcount & 263 ((xfs_bmbt_rec_base_t)blockcount &
312 (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); 264 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
313#else /* !XFS_BIG_BLKNOS */
314 if (isnullstartblock(startblock)) {
315 r->l0 = cpu_to_be64(
316 ((xfs_bmbt_rec_base_t)extent_flag << 63) |
317 ((xfs_bmbt_rec_base_t)startoff << 9) |
318 (xfs_bmbt_rec_base_t)xfs_mask64lo(9));
319 r->l1 = cpu_to_be64(xfs_mask64hi(11) |
320 ((xfs_bmbt_rec_base_t)startblock << 21) |
321 ((xfs_bmbt_rec_base_t)blockcount &
322 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
323 } else {
324 r->l0 = cpu_to_be64(
325 ((xfs_bmbt_rec_base_t)extent_flag << 63) |
326 ((xfs_bmbt_rec_base_t)startoff << 9));
327 r->l1 = cpu_to_be64(
328 ((xfs_bmbt_rec_base_t)startblock << 21) |
329 ((xfs_bmbt_rec_base_t)blockcount &
330 (xfs_bmbt_rec_base_t)xfs_mask64lo(21)));
331 }
332#endif /* XFS_BIG_BLKNOS */
333} 265}
334 266
335/* 267/*
@@ -365,24 +297,11 @@ xfs_bmbt_set_startblock(
365 xfs_bmbt_rec_host_t *r, 297 xfs_bmbt_rec_host_t *r,
366 xfs_fsblock_t v) 298 xfs_fsblock_t v)
367{ 299{
368#if XFS_BIG_BLKNOS
369 ASSERT((v & xfs_mask64hi(12)) == 0); 300 ASSERT((v & xfs_mask64hi(12)) == 0);
370 r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | 301 r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) |
371 (xfs_bmbt_rec_base_t)(v >> 43); 302 (xfs_bmbt_rec_base_t)(v >> 43);
372 r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | 303 r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) |
373 (xfs_bmbt_rec_base_t)(v << 21); 304 (xfs_bmbt_rec_base_t)(v << 21);
374#else /* !XFS_BIG_BLKNOS */
375 if (isnullstartblock(v)) {
376 r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9);
377 r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) |
378 ((xfs_bmbt_rec_base_t)v << 21) |
379 (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
380 } else {
381 r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9);
382 r->l1 = ((xfs_bmbt_rec_base_t)v << 21) |
383 (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21));
384 }
385#endif /* XFS_BIG_BLKNOS */
386} 305}
387 306
388/* 307/*
@@ -438,8 +357,8 @@ xfs_bmbt_to_bmdr(
438 cpu_to_be64(XFS_BUF_DADDR_NULL)); 357 cpu_to_be64(XFS_BUF_DADDR_NULL));
439 } else 358 } else
440 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC)); 359 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
441 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)); 360 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
442 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)); 361 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
443 ASSERT(rblock->bb_level != 0); 362 ASSERT(rblock->bb_level != 0);
444 dblock->bb_level = rblock->bb_level; 363 dblock->bb_level = rblock->bb_level;
445 dblock->bb_numrecs = rblock->bb_numrecs; 364 dblock->bb_numrecs = rblock->bb_numrecs;
@@ -554,7 +473,7 @@ xfs_bmbt_alloc_block(
554 args.minlen = args.maxlen = args.prod = 1; 473 args.minlen = args.maxlen = args.prod = 1;
555 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 474 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
556 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { 475 if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
557 error = XFS_ERROR(ENOSPC); 476 error = -ENOSPC;
558 goto error0; 477 goto error0;
559 } 478 }
560 error = xfs_alloc_vextent(&args); 479 error = xfs_alloc_vextent(&args);
@@ -763,11 +682,11 @@ xfs_bmbt_verify(
763 682
764 /* sibling pointer verification */ 683 /* sibling pointer verification */
765 if (!block->bb_u.l.bb_leftsib || 684 if (!block->bb_u.l.bb_leftsib ||
766 (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) && 685 (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
767 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))) 686 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
768 return false; 687 return false;
769 if (!block->bb_u.l.bb_rightsib || 688 if (!block->bb_u.l.bb_rightsib ||
770 (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) && 689 (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
771 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))) 690 !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
772 return false; 691 return false;
773 692
@@ -779,9 +698,9 @@ xfs_bmbt_read_verify(
779 struct xfs_buf *bp) 698 struct xfs_buf *bp)
780{ 699{
781 if (!xfs_btree_lblock_verify_crc(bp)) 700 if (!xfs_btree_lblock_verify_crc(bp))
782 xfs_buf_ioerror(bp, EFSBADCRC); 701 xfs_buf_ioerror(bp, -EFSBADCRC);
783 else if (!xfs_bmbt_verify(bp)) 702 else if (!xfs_bmbt_verify(bp))
784 xfs_buf_ioerror(bp, EFSCORRUPTED); 703 xfs_buf_ioerror(bp, -EFSCORRUPTED);
785 704
786 if (bp->b_error) { 705 if (bp->b_error) {
787 trace_xfs_btree_corrupt(bp, _RET_IP_); 706 trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -795,7 +714,7 @@ xfs_bmbt_write_verify(
795{ 714{
796 if (!xfs_bmbt_verify(bp)) { 715 if (!xfs_bmbt_verify(bp)) {
797 trace_xfs_btree_corrupt(bp, _RET_IP_); 716 trace_xfs_btree_corrupt(bp, _RET_IP_);
798 xfs_buf_ioerror(bp, EFSCORRUPTED); 717 xfs_buf_ioerror(bp, -EFSCORRUPTED);
799 xfs_verifier_error(bp); 718 xfs_verifier_error(bp);
800 return; 719 return;
801 } 720 }
@@ -959,7 +878,7 @@ xfs_bmbt_change_owner(
959 878
960 cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); 879 cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
961 if (!cur) 880 if (!cur)
962 return ENOMEM; 881 return -ENOMEM;
963 882
964 error = xfs_btree_change_owner(cur, new_owner, buffer_list); 883 error = xfs_btree_change_owner(cur, new_owner, buffer_list);
965 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 884 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 819a8a4dee95..819a8a4dee95 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index cf893bc1e373..8fe6a93ff473 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -78,11 +78,11 @@ xfs_btree_check_lblock(
78 be16_to_cpu(block->bb_numrecs) <= 78 be16_to_cpu(block->bb_numrecs) <=
79 cur->bc_ops->get_maxrecs(cur, level) && 79 cur->bc_ops->get_maxrecs(cur, level) &&
80 block->bb_u.l.bb_leftsib && 80 block->bb_u.l.bb_leftsib &&
81 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || 81 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
82 XFS_FSB_SANITY_CHECK(mp, 82 XFS_FSB_SANITY_CHECK(mp,
83 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 83 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
84 block->bb_u.l.bb_rightsib && 84 block->bb_u.l.bb_rightsib &&
85 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || 85 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
86 XFS_FSB_SANITY_CHECK(mp, 86 XFS_FSB_SANITY_CHECK(mp,
87 be64_to_cpu(block->bb_u.l.bb_rightsib))); 87 be64_to_cpu(block->bb_u.l.bb_rightsib)));
88 88
@@ -92,7 +92,7 @@ xfs_btree_check_lblock(
92 if (bp) 92 if (bp)
93 trace_xfs_btree_corrupt(bp, _RET_IP_); 93 trace_xfs_btree_corrupt(bp, _RET_IP_);
94 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 94 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
95 return XFS_ERROR(EFSCORRUPTED); 95 return -EFSCORRUPTED;
96 } 96 }
97 return 0; 97 return 0;
98} 98}
@@ -140,7 +140,7 @@ xfs_btree_check_sblock(
140 if (bp) 140 if (bp)
141 trace_xfs_btree_corrupt(bp, _RET_IP_); 141 trace_xfs_btree_corrupt(bp, _RET_IP_);
142 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 142 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
143 return XFS_ERROR(EFSCORRUPTED); 143 return -EFSCORRUPTED;
144 } 144 }
145 return 0; 145 return 0;
146} 146}
@@ -167,12 +167,12 @@ xfs_btree_check_block(
167int /* error (0 or EFSCORRUPTED) */ 167int /* error (0 or EFSCORRUPTED) */
168xfs_btree_check_lptr( 168xfs_btree_check_lptr(
169 struct xfs_btree_cur *cur, /* btree cursor */ 169 struct xfs_btree_cur *cur, /* btree cursor */
170 xfs_dfsbno_t bno, /* btree block disk address */ 170 xfs_fsblock_t bno, /* btree block disk address */
171 int level) /* btree block level */ 171 int level) /* btree block level */
172{ 172{
173 XFS_WANT_CORRUPTED_RETURN( 173 XFS_WANT_CORRUPTED_RETURN(
174 level > 0 && 174 level > 0 &&
175 bno != NULLDFSBNO && 175 bno != NULLFSBLOCK &&
176 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 176 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
177 return 0; 177 return 0;
178} 178}
@@ -595,7 +595,7 @@ xfs_btree_islastblock(
595 block = xfs_btree_get_block(cur, level, &bp); 595 block = xfs_btree_get_block(cur, level, &bp);
596 xfs_btree_check_block(cur, block, level, bp); 596 xfs_btree_check_block(cur, block, level, bp);
597 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 597 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
598 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); 598 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
599 else 599 else
600 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 600 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
601} 601}
@@ -771,16 +771,16 @@ xfs_btree_readahead_lblock(
771 struct xfs_btree_block *block) 771 struct xfs_btree_block *block)
772{ 772{
773 int rval = 0; 773 int rval = 0;
774 xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 774 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
775 xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 775 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
776 776
777 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { 777 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
778 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 778 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
779 cur->bc_ops->buf_ops); 779 cur->bc_ops->buf_ops);
780 rval++; 780 rval++;
781 } 781 }
782 782
783 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { 783 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
784 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 784 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
785 cur->bc_ops->buf_ops); 785 cur->bc_ops->buf_ops);
786 rval++; 786 rval++;
@@ -852,7 +852,7 @@ xfs_btree_ptr_to_daddr(
852 union xfs_btree_ptr *ptr) 852 union xfs_btree_ptr *ptr)
853{ 853{
854 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 854 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
855 ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); 855 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
856 856
857 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); 857 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
858 } else { 858 } else {
@@ -900,9 +900,9 @@ xfs_btree_setbuf(
900 900
901 b = XFS_BUF_TO_BLOCK(bp); 901 b = XFS_BUF_TO_BLOCK(bp);
902 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 902 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
903 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) 903 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
904 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 904 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
905 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) 905 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
906 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 906 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
907 } else { 907 } else {
908 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 908 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
@@ -918,7 +918,7 @@ xfs_btree_ptr_is_null(
918 union xfs_btree_ptr *ptr) 918 union xfs_btree_ptr *ptr)
919{ 919{
920 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 920 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
921 return ptr->l == cpu_to_be64(NULLDFSBNO); 921 return ptr->l == cpu_to_be64(NULLFSBLOCK);
922 else 922 else
923 return ptr->s == cpu_to_be32(NULLAGBLOCK); 923 return ptr->s == cpu_to_be32(NULLAGBLOCK);
924} 924}
@@ -929,7 +929,7 @@ xfs_btree_set_ptr_null(
929 union xfs_btree_ptr *ptr) 929 union xfs_btree_ptr *ptr)
930{ 930{
931 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 931 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
932 ptr->l = cpu_to_be64(NULLDFSBNO); 932 ptr->l = cpu_to_be64(NULLFSBLOCK);
933 else 933 else
934 ptr->s = cpu_to_be32(NULLAGBLOCK); 934 ptr->s = cpu_to_be32(NULLAGBLOCK);
935} 935}
@@ -997,8 +997,8 @@ xfs_btree_init_block_int(
997 buf->bb_numrecs = cpu_to_be16(numrecs); 997 buf->bb_numrecs = cpu_to_be16(numrecs);
998 998
999 if (flags & XFS_BTREE_LONG_PTRS) { 999 if (flags & XFS_BTREE_LONG_PTRS) {
1000 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); 1000 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1001 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); 1001 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1002 if (flags & XFS_BTREE_CRC_BLOCKS) { 1002 if (flags & XFS_BTREE_CRC_BLOCKS) {
1003 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1003 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1004 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1004 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
@@ -1140,7 +1140,7 @@ xfs_btree_get_buf_block(
1140 mp->m_bsize, flags); 1140 mp->m_bsize, flags);
1141 1141
1142 if (!*bpp) 1142 if (!*bpp)
1143 return ENOMEM; 1143 return -ENOMEM;
1144 1144
1145 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1145 (*bpp)->b_ops = cur->bc_ops->buf_ops;
1146 *block = XFS_BUF_TO_BLOCK(*bpp); 1146 *block = XFS_BUF_TO_BLOCK(*bpp);
@@ -1498,7 +1498,7 @@ xfs_btree_increment(
1498 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1498 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1499 goto out0; 1499 goto out0;
1500 ASSERT(0); 1500 ASSERT(0);
1501 error = EFSCORRUPTED; 1501 error = -EFSCORRUPTED;
1502 goto error0; 1502 goto error0;
1503 } 1503 }
1504 ASSERT(lev < cur->bc_nlevels); 1504 ASSERT(lev < cur->bc_nlevels);
@@ -1597,7 +1597,7 @@ xfs_btree_decrement(
1597 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1597 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1598 goto out0; 1598 goto out0;
1599 ASSERT(0); 1599 ASSERT(0);
1600 error = EFSCORRUPTED; 1600 error = -EFSCORRUPTED;
1601 goto error0; 1601 goto error0;
1602 } 1602 }
1603 ASSERT(lev < cur->bc_nlevels); 1603 ASSERT(lev < cur->bc_nlevels);
@@ -4018,7 +4018,7 @@ xfs_btree_block_change_owner(
4018 /* now read rh sibling block for next iteration */ 4018 /* now read rh sibling block for next iteration */
4019 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4019 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4020 if (xfs_btree_ptr_is_null(cur, &rptr)) 4020 if (xfs_btree_ptr_is_null(cur, &rptr))
4021 return ENOENT; 4021 return -ENOENT;
4022 4022
4023 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4023 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4024} 4024}
@@ -4061,7 +4061,7 @@ xfs_btree_change_owner(
4061 buffer_list); 4061 buffer_list);
4062 } while (!error); 4062 } while (!error);
4063 4063
4064 if (error != ENOENT) 4064 if (error != -ENOENT)
4065 return error; 4065 return error;
4066 } 4066 }
4067 4067
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index a04b69422f67..8f18bab73ea5 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -258,7 +258,7 @@ xfs_btree_check_block(
258int /* error (0 or EFSCORRUPTED) */ 258int /* error (0 or EFSCORRUPTED) */
259xfs_btree_check_lptr( 259xfs_btree_check_lptr(
260 struct xfs_btree_cur *cur, /* btree cursor */ 260 struct xfs_btree_cur *cur, /* btree cursor */
261 xfs_dfsbno_t ptr, /* btree block disk address */ 261 xfs_fsblock_t ptr, /* btree block disk address */
262 int level); /* btree block level */ 262 int level); /* btree block level */
263 263
264/* 264/*
diff --git a/fs/xfs/xfs_cksum.h b/fs/xfs/libxfs/xfs_cksum.h
index fad1676ad8cd..fad1676ad8cd 100644
--- a/fs/xfs/xfs_cksum.h
+++ b/fs/xfs/libxfs/xfs_cksum.h
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index a514ab616650..2c42ae28d027 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -185,7 +185,7 @@ xfs_da3_node_write_verify(
185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
186 186
187 if (!xfs_da3_node_verify(bp)) { 187 if (!xfs_da3_node_verify(bp)) {
188 xfs_buf_ioerror(bp, EFSCORRUPTED); 188 xfs_buf_ioerror(bp, -EFSCORRUPTED);
189 xfs_verifier_error(bp); 189 xfs_verifier_error(bp);
190 return; 190 return;
191 } 191 }
@@ -214,13 +214,13 @@ xfs_da3_node_read_verify(
214 switch (be16_to_cpu(info->magic)) { 214 switch (be16_to_cpu(info->magic)) {
215 case XFS_DA3_NODE_MAGIC: 215 case XFS_DA3_NODE_MAGIC:
216 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 216 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
217 xfs_buf_ioerror(bp, EFSBADCRC); 217 xfs_buf_ioerror(bp, -EFSBADCRC);
218 break; 218 break;
219 } 219 }
220 /* fall through */ 220 /* fall through */
221 case XFS_DA_NODE_MAGIC: 221 case XFS_DA_NODE_MAGIC:
222 if (!xfs_da3_node_verify(bp)) { 222 if (!xfs_da3_node_verify(bp)) {
223 xfs_buf_ioerror(bp, EFSCORRUPTED); 223 xfs_buf_ioerror(bp, -EFSCORRUPTED);
224 break; 224 break;
225 } 225 }
226 return; 226 return;
@@ -315,7 +315,7 @@ xfs_da3_node_create(
315 315
316 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); 316 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
317 if (error) 317 if (error)
318 return(error); 318 return error;
319 bp->b_ops = &xfs_da3_node_buf_ops; 319 bp->b_ops = &xfs_da3_node_buf_ops;
320 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 320 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
321 node = bp->b_addr; 321 node = bp->b_addr;
@@ -337,7 +337,7 @@ xfs_da3_node_create(
337 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 337 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
338 338
339 *bpp = bp; 339 *bpp = bp;
340 return(0); 340 return 0;
341} 341}
342 342
343/* 343/*
@@ -385,8 +385,8 @@ xfs_da3_split(
385 switch (oldblk->magic) { 385 switch (oldblk->magic) {
386 case XFS_ATTR_LEAF_MAGIC: 386 case XFS_ATTR_LEAF_MAGIC:
387 error = xfs_attr3_leaf_split(state, oldblk, newblk); 387 error = xfs_attr3_leaf_split(state, oldblk, newblk);
388 if ((error != 0) && (error != ENOSPC)) { 388 if ((error != 0) && (error != -ENOSPC)) {
389 return(error); /* GROT: attr is inconsistent */ 389 return error; /* GROT: attr is inconsistent */
390 } 390 }
391 if (!error) { 391 if (!error) {
392 addblk = newblk; 392 addblk = newblk;
@@ -408,7 +408,7 @@ xfs_da3_split(
408 &state->extrablk); 408 &state->extrablk);
409 } 409 }
410 if (error) 410 if (error)
411 return(error); /* GROT: attr inconsistent */ 411 return error; /* GROT: attr inconsistent */
412 addblk = newblk; 412 addblk = newblk;
413 break; 413 break;
414 case XFS_DIR2_LEAFN_MAGIC: 414 case XFS_DIR2_LEAFN_MAGIC:
@@ -422,7 +422,7 @@ xfs_da3_split(
422 max - i, &action); 422 max - i, &action);
423 addblk->bp = NULL; 423 addblk->bp = NULL;
424 if (error) 424 if (error)
425 return(error); /* GROT: dir is inconsistent */ 425 return error; /* GROT: dir is inconsistent */
426 /* 426 /*
427 * Record the newly split block for the next time thru? 427 * Record the newly split block for the next time thru?
428 */ 428 */
@@ -439,7 +439,7 @@ xfs_da3_split(
439 xfs_da3_fixhashpath(state, &state->path); 439 xfs_da3_fixhashpath(state, &state->path);
440 } 440 }
441 if (!addblk) 441 if (!addblk)
442 return(0); 442 return 0;
443 443
444 /* 444 /*
445 * Split the root node. 445 * Split the root node.
@@ -449,7 +449,7 @@ xfs_da3_split(
449 error = xfs_da3_root_split(state, oldblk, addblk); 449 error = xfs_da3_root_split(state, oldblk, addblk);
450 if (error) { 450 if (error) {
451 addblk->bp = NULL; 451 addblk->bp = NULL;
452 return(error); /* GROT: dir is inconsistent */ 452 return error; /* GROT: dir is inconsistent */
453 } 453 }
454 454
455 /* 455 /*
@@ -492,7 +492,7 @@ xfs_da3_split(
492 sizeof(node->hdr.info))); 492 sizeof(node->hdr.info)));
493 } 493 }
494 addblk->bp = NULL; 494 addblk->bp = NULL;
495 return(0); 495 return 0;
496} 496}
497 497
498/* 498/*
@@ -670,18 +670,18 @@ xfs_da3_node_split(
670 */ 670 */
671 error = xfs_da_grow_inode(state->args, &blkno); 671 error = xfs_da_grow_inode(state->args, &blkno);
672 if (error) 672 if (error)
673 return(error); /* GROT: dir is inconsistent */ 673 return error; /* GROT: dir is inconsistent */
674 674
675 error = xfs_da3_node_create(state->args, blkno, treelevel, 675 error = xfs_da3_node_create(state->args, blkno, treelevel,
676 &newblk->bp, state->args->whichfork); 676 &newblk->bp, state->args->whichfork);
677 if (error) 677 if (error)
678 return(error); /* GROT: dir is inconsistent */ 678 return error; /* GROT: dir is inconsistent */
679 newblk->blkno = blkno; 679 newblk->blkno = blkno;
680 newblk->magic = XFS_DA_NODE_MAGIC; 680 newblk->magic = XFS_DA_NODE_MAGIC;
681 xfs_da3_node_rebalance(state, oldblk, newblk); 681 xfs_da3_node_rebalance(state, oldblk, newblk);
682 error = xfs_da3_blk_link(state, oldblk, newblk); 682 error = xfs_da3_blk_link(state, oldblk, newblk);
683 if (error) 683 if (error)
684 return(error); 684 return error;
685 *result = 1; 685 *result = 1;
686 } else { 686 } else {
687 *result = 0; 687 *result = 0;
@@ -721,7 +721,7 @@ xfs_da3_node_split(
721 } 721 }
722 } 722 }
723 723
724 return(0); 724 return 0;
725} 725}
726 726
727/* 727/*
@@ -963,9 +963,9 @@ xfs_da3_join(
963 case XFS_ATTR_LEAF_MAGIC: 963 case XFS_ATTR_LEAF_MAGIC:
964 error = xfs_attr3_leaf_toosmall(state, &action); 964 error = xfs_attr3_leaf_toosmall(state, &action);
965 if (error) 965 if (error)
966 return(error); 966 return error;
967 if (action == 0) 967 if (action == 0)
968 return(0); 968 return 0;
969 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); 969 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk);
970 break; 970 break;
971 case XFS_DIR2_LEAFN_MAGIC: 971 case XFS_DIR2_LEAFN_MAGIC:
@@ -985,7 +985,7 @@ xfs_da3_join(
985 xfs_da3_fixhashpath(state, &state->path); 985 xfs_da3_fixhashpath(state, &state->path);
986 error = xfs_da3_node_toosmall(state, &action); 986 error = xfs_da3_node_toosmall(state, &action);
987 if (error) 987 if (error)
988 return(error); 988 return error;
989 if (action == 0) 989 if (action == 0)
990 return 0; 990 return 0;
991 xfs_da3_node_unbalance(state, drop_blk, save_blk); 991 xfs_da3_node_unbalance(state, drop_blk, save_blk);
@@ -995,12 +995,12 @@ xfs_da3_join(
995 error = xfs_da3_blk_unlink(state, drop_blk, save_blk); 995 error = xfs_da3_blk_unlink(state, drop_blk, save_blk);
996 xfs_da_state_kill_altpath(state); 996 xfs_da_state_kill_altpath(state);
997 if (error) 997 if (error)
998 return(error); 998 return error;
999 error = xfs_da_shrink_inode(state->args, drop_blk->blkno, 999 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
1000 drop_blk->bp); 1000 drop_blk->bp);
1001 drop_blk->bp = NULL; 1001 drop_blk->bp = NULL;
1002 if (error) 1002 if (error)
1003 return(error); 1003 return error;
1004 } 1004 }
1005 /* 1005 /*
1006 * We joined all the way to the top. If it turns out that 1006 * We joined all the way to the top. If it turns out that
@@ -1010,7 +1010,7 @@ xfs_da3_join(
1010 xfs_da3_node_remove(state, drop_blk); 1010 xfs_da3_node_remove(state, drop_blk);
1011 xfs_da3_fixhashpath(state, &state->path); 1011 xfs_da3_fixhashpath(state, &state->path);
1012 error = xfs_da3_root_join(state, &state->path.blk[0]); 1012 error = xfs_da3_root_join(state, &state->path.blk[0]);
1013 return(error); 1013 return error;
1014} 1014}
1015 1015
1016#ifdef DEBUG 1016#ifdef DEBUG
@@ -1099,7 +1099,7 @@ xfs_da3_root_join(
1099 xfs_trans_log_buf(args->trans, root_blk->bp, 0, 1099 xfs_trans_log_buf(args->trans, root_blk->bp, 0,
1100 args->geo->blksize - 1); 1100 args->geo->blksize - 1);
1101 error = xfs_da_shrink_inode(args, child, bp); 1101 error = xfs_da_shrink_inode(args, child, bp);
1102 return(error); 1102 return error;
1103} 1103}
1104 1104
1105/* 1105/*
@@ -1142,7 +1142,7 @@ xfs_da3_node_toosmall(
1142 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1142 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1143 if (nodehdr.count > (state->args->geo->node_ents >> 1)) { 1143 if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
1144 *action = 0; /* blk over 50%, don't try to join */ 1144 *action = 0; /* blk over 50%, don't try to join */
1145 return(0); /* blk over 50%, don't try to join */ 1145 return 0; /* blk over 50%, don't try to join */
1146 } 1146 }
1147 1147
1148 /* 1148 /*
@@ -1161,13 +1161,13 @@ xfs_da3_node_toosmall(
1161 error = xfs_da3_path_shift(state, &state->altpath, forward, 1161 error = xfs_da3_path_shift(state, &state->altpath, forward,
1162 0, &retval); 1162 0, &retval);
1163 if (error) 1163 if (error)
1164 return(error); 1164 return error;
1165 if (retval) { 1165 if (retval) {
1166 *action = 0; 1166 *action = 0;
1167 } else { 1167 } else {
1168 *action = 2; 1168 *action = 2;
1169 } 1169 }
1170 return(0); 1170 return 0;
1171 } 1171 }
1172 1172
1173 /* 1173 /*
@@ -1194,7 +1194,7 @@ xfs_da3_node_toosmall(
1194 error = xfs_da3_node_read(state->args->trans, dp, 1194 error = xfs_da3_node_read(state->args->trans, dp,
1195 blkno, -1, &bp, state->args->whichfork); 1195 blkno, -1, &bp, state->args->whichfork);
1196 if (error) 1196 if (error)
1197 return(error); 1197 return error;
1198 1198
1199 node = bp->b_addr; 1199 node = bp->b_addr;
1200 dp->d_ops->node_hdr_from_disk(&thdr, node); 1200 dp->d_ops->node_hdr_from_disk(&thdr, node);
@@ -1486,7 +1486,7 @@ xfs_da3_node_lookup_int(
1486 if (error) { 1486 if (error) {
1487 blk->blkno = 0; 1487 blk->blkno = 0;
1488 state->path.active--; 1488 state->path.active--;
1489 return(error); 1489 return error;
1490 } 1490 }
1491 curr = blk->bp->b_addr; 1491 curr = blk->bp->b_addr;
1492 blk->magic = be16_to_cpu(curr->magic); 1492 blk->magic = be16_to_cpu(curr->magic);
@@ -1579,25 +1579,25 @@ xfs_da3_node_lookup_int(
1579 args->blkno = blk->blkno; 1579 args->blkno = blk->blkno;
1580 } else { 1580 } else {
1581 ASSERT(0); 1581 ASSERT(0);
1582 return XFS_ERROR(EFSCORRUPTED); 1582 return -EFSCORRUPTED;
1583 } 1583 }
1584 if (((retval == ENOENT) || (retval == ENOATTR)) && 1584 if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
1585 (blk->hashval == args->hashval)) { 1585 (blk->hashval == args->hashval)) {
1586 error = xfs_da3_path_shift(state, &state->path, 1, 1, 1586 error = xfs_da3_path_shift(state, &state->path, 1, 1,
1587 &retval); 1587 &retval);
1588 if (error) 1588 if (error)
1589 return(error); 1589 return error;
1590 if (retval == 0) { 1590 if (retval == 0) {
1591 continue; 1591 continue;
1592 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1592 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1593 /* path_shift() gives ENOENT */ 1593 /* path_shift() gives ENOENT */
1594 retval = XFS_ERROR(ENOATTR); 1594 retval = -ENOATTR;
1595 } 1595 }
1596 } 1596 }
1597 break; 1597 break;
1598 } 1598 }
1599 *result = retval; 1599 *result = retval;
1600 return(0); 1600 return 0;
1601} 1601}
1602 1602
1603/*======================================================================== 1603/*========================================================================
@@ -1692,7 +1692,7 @@ xfs_da3_blk_link(
1692 be32_to_cpu(old_info->back), 1692 be32_to_cpu(old_info->back),
1693 -1, &bp, args->whichfork); 1693 -1, &bp, args->whichfork);
1694 if (error) 1694 if (error)
1695 return(error); 1695 return error;
1696 ASSERT(bp != NULL); 1696 ASSERT(bp != NULL);
1697 tmp_info = bp->b_addr; 1697 tmp_info = bp->b_addr;
1698 ASSERT(tmp_info->magic == old_info->magic); 1698 ASSERT(tmp_info->magic == old_info->magic);
@@ -1713,7 +1713,7 @@ xfs_da3_blk_link(
1713 be32_to_cpu(old_info->forw), 1713 be32_to_cpu(old_info->forw),
1714 -1, &bp, args->whichfork); 1714 -1, &bp, args->whichfork);
1715 if (error) 1715 if (error)
1716 return(error); 1716 return error;
1717 ASSERT(bp != NULL); 1717 ASSERT(bp != NULL);
1718 tmp_info = bp->b_addr; 1718 tmp_info = bp->b_addr;
1719 ASSERT(tmp_info->magic == old_info->magic); 1719 ASSERT(tmp_info->magic == old_info->magic);
@@ -1726,7 +1726,7 @@ xfs_da3_blk_link(
1726 1726
1727 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); 1727 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1728 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); 1728 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1729 return(0); 1729 return 0;
1730} 1730}
1731 1731
1732/* 1732/*
@@ -1772,7 +1772,7 @@ xfs_da3_blk_unlink(
1772 be32_to_cpu(drop_info->back), 1772 be32_to_cpu(drop_info->back),
1773 -1, &bp, args->whichfork); 1773 -1, &bp, args->whichfork);
1774 if (error) 1774 if (error)
1775 return(error); 1775 return error;
1776 ASSERT(bp != NULL); 1776 ASSERT(bp != NULL);
1777 tmp_info = bp->b_addr; 1777 tmp_info = bp->b_addr;
1778 ASSERT(tmp_info->magic == save_info->magic); 1778 ASSERT(tmp_info->magic == save_info->magic);
@@ -1789,7 +1789,7 @@ xfs_da3_blk_unlink(
1789 be32_to_cpu(drop_info->forw), 1789 be32_to_cpu(drop_info->forw),
1790 -1, &bp, args->whichfork); 1790 -1, &bp, args->whichfork);
1791 if (error) 1791 if (error)
1792 return(error); 1792 return error;
1793 ASSERT(bp != NULL); 1793 ASSERT(bp != NULL);
1794 tmp_info = bp->b_addr; 1794 tmp_info = bp->b_addr;
1795 ASSERT(tmp_info->magic == save_info->magic); 1795 ASSERT(tmp_info->magic == save_info->magic);
@@ -1801,7 +1801,7 @@ xfs_da3_blk_unlink(
1801 } 1801 }
1802 1802
1803 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); 1803 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1804 return(0); 1804 return 0;
1805} 1805}
1806 1806
1807/* 1807/*
@@ -1859,9 +1859,9 @@ xfs_da3_path_shift(
1859 } 1859 }
1860 } 1860 }
1861 if (level < 0) { 1861 if (level < 0) {
1862 *result = XFS_ERROR(ENOENT); /* we're out of our tree */ 1862 *result = -ENOENT; /* we're out of our tree */
1863 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 1863 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1864 return(0); 1864 return 0;
1865 } 1865 }
1866 1866
1867 /* 1867 /*
@@ -1883,7 +1883,7 @@ xfs_da3_path_shift(
1883 error = xfs_da3_node_read(args->trans, dp, blkno, -1, 1883 error = xfs_da3_node_read(args->trans, dp, blkno, -1,
1884 &blk->bp, args->whichfork); 1884 &blk->bp, args->whichfork);
1885 if (error) 1885 if (error)
1886 return(error); 1886 return error;
1887 info = blk->bp->b_addr; 1887 info = blk->bp->b_addr;
1888 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1888 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1889 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 1889 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
@@ -2004,7 +2004,7 @@ xfs_da_grow_inode_int(
2004 struct xfs_trans *tp = args->trans; 2004 struct xfs_trans *tp = args->trans;
2005 struct xfs_inode *dp = args->dp; 2005 struct xfs_inode *dp = args->dp;
2006 int w = args->whichfork; 2006 int w = args->whichfork;
2007 xfs_drfsbno_t nblks = dp->i_d.di_nblocks; 2007 xfs_rfsblock_t nblks = dp->i_d.di_nblocks;
2008 struct xfs_bmbt_irec map, *mapp; 2008 struct xfs_bmbt_irec map, *mapp;
2009 int nmap, error, got, i, mapi; 2009 int nmap, error, got, i, mapi;
2010 2010
@@ -2068,7 +2068,7 @@ xfs_da_grow_inode_int(
2068 if (got != count || mapp[0].br_startoff != *bno || 2068 if (got != count || mapp[0].br_startoff != *bno ||
2069 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 2069 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
2070 *bno + count) { 2070 *bno + count) {
2071 error = XFS_ERROR(ENOSPC); 2071 error = -ENOSPC;
2072 goto out_free_map; 2072 goto out_free_map;
2073 } 2073 }
2074 2074
@@ -2158,7 +2158,7 @@ xfs_da3_swap_lastblock(
2158 if (unlikely(lastoff == 0)) { 2158 if (unlikely(lastoff == 0)) {
2159 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, 2159 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
2160 mp); 2160 mp);
2161 return XFS_ERROR(EFSCORRUPTED); 2161 return -EFSCORRUPTED;
2162 } 2162 }
2163 /* 2163 /*
2164 * Read the last block in the btree space. 2164 * Read the last block in the btree space.
@@ -2209,7 +2209,7 @@ xfs_da3_swap_lastblock(
2209 sib_info->magic != dead_info->magic)) { 2209 sib_info->magic != dead_info->magic)) {
2210 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", 2210 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
2211 XFS_ERRLEVEL_LOW, mp); 2211 XFS_ERRLEVEL_LOW, mp);
2212 error = XFS_ERROR(EFSCORRUPTED); 2212 error = -EFSCORRUPTED;
2213 goto done; 2213 goto done;
2214 } 2214 }
2215 sib_info->forw = cpu_to_be32(dead_blkno); 2215 sib_info->forw = cpu_to_be32(dead_blkno);
@@ -2231,7 +2231,7 @@ xfs_da3_swap_lastblock(
2231 sib_info->magic != dead_info->magic)) { 2231 sib_info->magic != dead_info->magic)) {
2232 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", 2232 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
2233 XFS_ERRLEVEL_LOW, mp); 2233 XFS_ERRLEVEL_LOW, mp);
2234 error = XFS_ERROR(EFSCORRUPTED); 2234 error = -EFSCORRUPTED;
2235 goto done; 2235 goto done;
2236 } 2236 }
2237 sib_info->back = cpu_to_be32(dead_blkno); 2237 sib_info->back = cpu_to_be32(dead_blkno);
@@ -2254,7 +2254,7 @@ xfs_da3_swap_lastblock(
2254 if (level >= 0 && level != par_hdr.level + 1) { 2254 if (level >= 0 && level != par_hdr.level + 1) {
2255 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 2255 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
2256 XFS_ERRLEVEL_LOW, mp); 2256 XFS_ERRLEVEL_LOW, mp);
2257 error = XFS_ERROR(EFSCORRUPTED); 2257 error = -EFSCORRUPTED;
2258 goto done; 2258 goto done;
2259 } 2259 }
2260 level = par_hdr.level; 2260 level = par_hdr.level;
@@ -2267,7 +2267,7 @@ xfs_da3_swap_lastblock(
2267 if (entno == par_hdr.count) { 2267 if (entno == par_hdr.count) {
2268 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 2268 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
2269 XFS_ERRLEVEL_LOW, mp); 2269 XFS_ERRLEVEL_LOW, mp);
2270 error = XFS_ERROR(EFSCORRUPTED); 2270 error = -EFSCORRUPTED;
2271 goto done; 2271 goto done;
2272 } 2272 }
2273 par_blkno = be32_to_cpu(btree[entno].before); 2273 par_blkno = be32_to_cpu(btree[entno].before);
@@ -2294,7 +2294,7 @@ xfs_da3_swap_lastblock(
2294 if (unlikely(par_blkno == 0)) { 2294 if (unlikely(par_blkno == 0)) {
2295 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", 2295 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
2296 XFS_ERRLEVEL_LOW, mp); 2296 XFS_ERRLEVEL_LOW, mp);
2297 error = XFS_ERROR(EFSCORRUPTED); 2297 error = -EFSCORRUPTED;
2298 goto done; 2298 goto done;
2299 } 2299 }
2300 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2300 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
@@ -2305,7 +2305,7 @@ xfs_da3_swap_lastblock(
2305 if (par_hdr.level != level) { 2305 if (par_hdr.level != level) {
2306 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 2306 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
2307 XFS_ERRLEVEL_LOW, mp); 2307 XFS_ERRLEVEL_LOW, mp);
2308 error = XFS_ERROR(EFSCORRUPTED); 2308 error = -EFSCORRUPTED;
2309 goto done; 2309 goto done;
2310 } 2310 }
2311 btree = dp->d_ops->node_tree_p(par_node); 2311 btree = dp->d_ops->node_tree_p(par_node);
@@ -2359,7 +2359,7 @@ xfs_da_shrink_inode(
2359 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2359 error = xfs_bunmapi(tp, dp, dead_blkno, count,
2360 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2360 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
2361 0, args->firstblock, args->flist, &done); 2361 0, args->firstblock, args->flist, &done);
2362 if (error == ENOSPC) { 2362 if (error == -ENOSPC) {
2363 if (w != XFS_DATA_FORK) 2363 if (w != XFS_DATA_FORK)
2364 break; 2364 break;
2365 error = xfs_da3_swap_lastblock(args, &dead_blkno, 2365 error = xfs_da3_swap_lastblock(args, &dead_blkno,
@@ -2427,7 +2427,7 @@ xfs_buf_map_from_irec(
2427 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), 2427 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
2428 KM_SLEEP | KM_NOFS); 2428 KM_SLEEP | KM_NOFS);
2429 if (!map) 2429 if (!map)
2430 return ENOMEM; 2430 return -ENOMEM;
2431 *mapp = map; 2431 *mapp = map;
2432 } 2432 }
2433 2433
@@ -2500,8 +2500,8 @@ xfs_dabuf_map(
2500 } 2500 }
2501 2501
2502 if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { 2502 if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
2503 error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED); 2503 error = mappedbno == -2 ? -1 : -EFSCORRUPTED;
2504 if (unlikely(error == EFSCORRUPTED)) { 2504 if (unlikely(error == -EFSCORRUPTED)) {
2505 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 2505 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2506 int i; 2506 int i;
2507 xfs_alert(mp, "%s: bno %lld dir: inode %lld", 2507 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
@@ -2561,7 +2561,7 @@ xfs_da_get_buf(
2561 2561
2562 bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, 2562 bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
2563 mapp, nmap, 0); 2563 mapp, nmap, 0);
2564 error = bp ? bp->b_error : XFS_ERROR(EIO); 2564 error = bp ? bp->b_error : -EIO;
2565 if (error) { 2565 if (error) {
2566 xfs_trans_brelse(trans, bp); 2566 xfs_trans_brelse(trans, bp);
2567 goto out_free; 2567 goto out_free;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 6e153e399a77..6e153e399a77 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index c9aee52a37e2..c9aee52a37e2 100644
--- a/fs/xfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 0a49b0286372..0a49b0286372 100644
--- a/fs/xfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/libxfs/xfs_dinode.h
index 623bbe8fd921..623bbe8fd921 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/libxfs/xfs_dinode.h
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 79670cda48ae..6cef22152fd6 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -108,7 +108,7 @@ xfs_da_mount(
108 if (!mp->m_dir_geo || !mp->m_attr_geo) { 108 if (!mp->m_dir_geo || !mp->m_attr_geo) {
109 kmem_free(mp->m_dir_geo); 109 kmem_free(mp->m_dir_geo);
110 kmem_free(mp->m_attr_geo); 110 kmem_free(mp->m_attr_geo);
111 return ENOMEM; 111 return -ENOMEM;
112 } 112 }
113 113
114 /* set up directory geometry */ 114 /* set up directory geometry */
@@ -202,7 +202,7 @@ xfs_dir_ino_validate(
202 xfs_warn(mp, "Invalid inode number 0x%Lx", 202 xfs_warn(mp, "Invalid inode number 0x%Lx",
203 (unsigned long long) ino); 203 (unsigned long long) ino);
204 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); 204 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
205 return XFS_ERROR(EFSCORRUPTED); 205 return -EFSCORRUPTED;
206 } 206 }
207 return 0; 207 return 0;
208} 208}
@@ -226,7 +226,7 @@ xfs_dir_init(
226 226
227 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 227 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
228 if (!args) 228 if (!args)
229 return ENOMEM; 229 return -ENOMEM;
230 230
231 args->geo = dp->i_mount->m_dir_geo; 231 args->geo = dp->i_mount->m_dir_geo;
232 args->dp = dp; 232 args->dp = dp;
@@ -261,7 +261,7 @@ xfs_dir_createname(
261 261
262 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 262 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
263 if (!args) 263 if (!args)
264 return ENOMEM; 264 return -ENOMEM;
265 265
266 args->geo = dp->i_mount->m_dir_geo; 266 args->geo = dp->i_mount->m_dir_geo;
267 args->name = name->name; 267 args->name = name->name;
@@ -314,18 +314,18 @@ xfs_dir_cilookup_result(
314 int len) 314 int len)
315{ 315{
316 if (args->cmpresult == XFS_CMP_DIFFERENT) 316 if (args->cmpresult == XFS_CMP_DIFFERENT)
317 return ENOENT; 317 return -ENOENT;
318 if (args->cmpresult != XFS_CMP_CASE || 318 if (args->cmpresult != XFS_CMP_CASE ||
319 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 319 !(args->op_flags & XFS_DA_OP_CILOOKUP))
320 return EEXIST; 320 return -EEXIST;
321 321
322 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); 322 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
323 if (!args->value) 323 if (!args->value)
324 return ENOMEM; 324 return -ENOMEM;
325 325
326 memcpy(args->value, name, len); 326 memcpy(args->value, name, len);
327 args->valuelen = len; 327 args->valuelen = len;
328 return EEXIST; 328 return -EEXIST;
329} 329}
330 330
331/* 331/*
@@ -392,7 +392,7 @@ xfs_dir_lookup(
392 rval = xfs_dir2_node_lookup(args); 392 rval = xfs_dir2_node_lookup(args);
393 393
394out_check_rval: 394out_check_rval:
395 if (rval == EEXIST) 395 if (rval == -EEXIST)
396 rval = 0; 396 rval = 0;
397 if (!rval) { 397 if (!rval) {
398 *inum = args->inumber; 398 *inum = args->inumber;
@@ -428,7 +428,7 @@ xfs_dir_removename(
428 428
429 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 429 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
430 if (!args) 430 if (!args)
431 return ENOMEM; 431 return -ENOMEM;
432 432
433 args->geo = dp->i_mount->m_dir_geo; 433 args->geo = dp->i_mount->m_dir_geo;
434 args->name = name->name; 434 args->name = name->name;
@@ -493,7 +493,7 @@ xfs_dir_replace(
493 493
494 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 494 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
495 if (!args) 495 if (!args)
496 return ENOMEM; 496 return -ENOMEM;
497 497
498 args->geo = dp->i_mount->m_dir_geo; 498 args->geo = dp->i_mount->m_dir_geo;
499 args->name = name->name; 499 args->name = name->name;
@@ -555,7 +555,7 @@ xfs_dir_canenter(
555 555
556 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 556 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
557 if (!args) 557 if (!args)
558 return ENOMEM; 558 return -ENOMEM;
559 559
560 args->geo = dp->i_mount->m_dir_geo; 560 args->geo = dp->i_mount->m_dir_geo;
561 args->name = name->name; 561 args->name = name->name;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index c8e86b0b5e99..c8e86b0b5e99 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index c7cd3154026a..9628ceccfa02 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -91,9 +91,9 @@ xfs_dir3_block_read_verify(
91 91
92 if (xfs_sb_version_hascrc(&mp->m_sb) && 92 if (xfs_sb_version_hascrc(&mp->m_sb) &&
93 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 93 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
94 xfs_buf_ioerror(bp, EFSBADCRC); 94 xfs_buf_ioerror(bp, -EFSBADCRC);
95 else if (!xfs_dir3_block_verify(bp)) 95 else if (!xfs_dir3_block_verify(bp))
96 xfs_buf_ioerror(bp, EFSCORRUPTED); 96 xfs_buf_ioerror(bp, -EFSCORRUPTED);
97 97
98 if (bp->b_error) 98 if (bp->b_error)
99 xfs_verifier_error(bp); 99 xfs_verifier_error(bp);
@@ -108,7 +108,7 @@ xfs_dir3_block_write_verify(
108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
109 109
110 if (!xfs_dir3_block_verify(bp)) { 110 if (!xfs_dir3_block_verify(bp)) {
111 xfs_buf_ioerror(bp, EFSCORRUPTED); 111 xfs_buf_ioerror(bp, -EFSCORRUPTED);
112 xfs_verifier_error(bp); 112 xfs_verifier_error(bp);
113 return; 113 return;
114 } 114 }
@@ -392,7 +392,7 @@ xfs_dir2_block_addname(
392 if (args->op_flags & XFS_DA_OP_JUSTCHECK) { 392 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
393 xfs_trans_brelse(tp, bp); 393 xfs_trans_brelse(tp, bp);
394 if (!dup) 394 if (!dup)
395 return XFS_ERROR(ENOSPC); 395 return -ENOSPC;
396 return 0; 396 return 0;
397 } 397 }
398 398
@@ -402,7 +402,7 @@ xfs_dir2_block_addname(
402 if (!dup) { 402 if (!dup) {
403 /* Don't have a space reservation: return no-space. */ 403 /* Don't have a space reservation: return no-space. */
404 if (args->total == 0) 404 if (args->total == 0)
405 return XFS_ERROR(ENOSPC); 405 return -ENOSPC;
406 /* 406 /*
407 * Convert to the next larger format. 407 * Convert to the next larger format.
408 * Then add the new entry in that format. 408 * Then add the new entry in that format.
@@ -647,7 +647,7 @@ xfs_dir2_block_lookup(
647 args->filetype = dp->d_ops->data_get_ftype(dep); 647 args->filetype = dp->d_ops->data_get_ftype(dep);
648 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); 648 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
649 xfs_trans_brelse(args->trans, bp); 649 xfs_trans_brelse(args->trans, bp);
650 return XFS_ERROR(error); 650 return error;
651} 651}
652 652
653/* 653/*
@@ -703,7 +703,7 @@ xfs_dir2_block_lookup_int(
703 if (low > high) { 703 if (low > high) {
704 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 704 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
705 xfs_trans_brelse(tp, bp); 705 xfs_trans_brelse(tp, bp);
706 return XFS_ERROR(ENOENT); 706 return -ENOENT;
707 } 707 }
708 } 708 }
709 /* 709 /*
@@ -751,7 +751,7 @@ xfs_dir2_block_lookup_int(
751 * No match, release the buffer and return ENOENT. 751 * No match, release the buffer and return ENOENT.
752 */ 752 */
753 xfs_trans_brelse(tp, bp); 753 xfs_trans_brelse(tp, bp);
754 return XFS_ERROR(ENOENT); 754 return -ENOENT;
755} 755}
756 756
757/* 757/*
@@ -1091,7 +1091,7 @@ xfs_dir2_sf_to_block(
1091 */ 1091 */
1092 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { 1092 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
1093 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1093 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1094 return XFS_ERROR(EIO); 1094 return -EIO;
1095 } 1095 }
1096 1096
1097 oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; 1097 oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 8c2f6422648e..fdd803fecb8e 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -100,7 +100,7 @@ __xfs_dir3_data_check(
100 break; 100 break;
101 default: 101 default:
102 XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp); 102 XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
103 return EFSCORRUPTED; 103 return -EFSCORRUPTED;
104 } 104 }
105 105
106 /* 106 /*
@@ -256,7 +256,7 @@ xfs_dir3_data_reada_verify(
256 xfs_dir3_data_verify(bp); 256 xfs_dir3_data_verify(bp);
257 return; 257 return;
258 default: 258 default:
259 xfs_buf_ioerror(bp, EFSCORRUPTED); 259 xfs_buf_ioerror(bp, -EFSCORRUPTED);
260 xfs_verifier_error(bp); 260 xfs_verifier_error(bp);
261 break; 261 break;
262 } 262 }
@@ -270,9 +270,9 @@ xfs_dir3_data_read_verify(
270 270
271 if (xfs_sb_version_hascrc(&mp->m_sb) && 271 if (xfs_sb_version_hascrc(&mp->m_sb) &&
272 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) 272 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
273 xfs_buf_ioerror(bp, EFSBADCRC); 273 xfs_buf_ioerror(bp, -EFSBADCRC);
274 else if (!xfs_dir3_data_verify(bp)) 274 else if (!xfs_dir3_data_verify(bp))
275 xfs_buf_ioerror(bp, EFSCORRUPTED); 275 xfs_buf_ioerror(bp, -EFSCORRUPTED);
276 276
277 if (bp->b_error) 277 if (bp->b_error)
278 xfs_verifier_error(bp); 278 xfs_verifier_error(bp);
@@ -287,7 +287,7 @@ xfs_dir3_data_write_verify(
287 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 287 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
288 288
289 if (!xfs_dir3_data_verify(bp)) { 289 if (!xfs_dir3_data_verify(bp)) {
290 xfs_buf_ioerror(bp, EFSCORRUPTED); 290 xfs_buf_ioerror(bp, -EFSCORRUPTED);
291 xfs_verifier_error(bp); 291 xfs_verifier_error(bp);
292 return; 292 return;
293 } 293 }
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index fb0aad4440c1..a19174eb3cb2 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -183,9 +183,9 @@ __read_verify(
183 183
184 if (xfs_sb_version_hascrc(&mp->m_sb) && 184 if (xfs_sb_version_hascrc(&mp->m_sb) &&
185 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) 185 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
186 xfs_buf_ioerror(bp, EFSBADCRC); 186 xfs_buf_ioerror(bp, -EFSBADCRC);
187 else if (!xfs_dir3_leaf_verify(bp, magic)) 187 else if (!xfs_dir3_leaf_verify(bp, magic))
188 xfs_buf_ioerror(bp, EFSCORRUPTED); 188 xfs_buf_ioerror(bp, -EFSCORRUPTED);
189 189
190 if (bp->b_error) 190 if (bp->b_error)
191 xfs_verifier_error(bp); 191 xfs_verifier_error(bp);
@@ -201,7 +201,7 @@ __write_verify(
201 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 201 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
202 202
203 if (!xfs_dir3_leaf_verify(bp, magic)) { 203 if (!xfs_dir3_leaf_verify(bp, magic)) {
204 xfs_buf_ioerror(bp, EFSCORRUPTED); 204 xfs_buf_ioerror(bp, -EFSCORRUPTED);
205 xfs_verifier_error(bp); 205 xfs_verifier_error(bp);
206 return; 206 return;
207 } 207 }
@@ -731,7 +731,7 @@ xfs_dir2_leaf_addname(
731 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || 731 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
732 args->total == 0) { 732 args->total == 0) {
733 xfs_trans_brelse(tp, lbp); 733 xfs_trans_brelse(tp, lbp);
734 return XFS_ERROR(ENOSPC); 734 return -ENOSPC;
735 } 735 }
736 /* 736 /*
737 * Convert to node form. 737 * Convert to node form.
@@ -755,7 +755,7 @@ xfs_dir2_leaf_addname(
755 */ 755 */
756 if (args->op_flags & XFS_DA_OP_JUSTCHECK) { 756 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
757 xfs_trans_brelse(tp, lbp); 757 xfs_trans_brelse(tp, lbp);
758 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; 758 return use_block == -1 ? -ENOSPC : 0;
759 } 759 }
760 /* 760 /*
761 * If no allocations are allowed, return now before we've 761 * If no allocations are allowed, return now before we've
@@ -763,7 +763,7 @@ xfs_dir2_leaf_addname(
763 */ 763 */
764 if (args->total == 0 && use_block == -1) { 764 if (args->total == 0 && use_block == -1) {
765 xfs_trans_brelse(tp, lbp); 765 xfs_trans_brelse(tp, lbp);
766 return XFS_ERROR(ENOSPC); 766 return -ENOSPC;
767 } 767 }
768 /* 768 /*
769 * Need to compact the leaf entries, removing stale ones. 769 * Need to compact the leaf entries, removing stale ones.
@@ -1198,7 +1198,7 @@ xfs_dir2_leaf_lookup(
1198 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); 1198 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1199 xfs_trans_brelse(tp, dbp); 1199 xfs_trans_brelse(tp, dbp);
1200 xfs_trans_brelse(tp, lbp); 1200 xfs_trans_brelse(tp, lbp);
1201 return XFS_ERROR(error); 1201 return error;
1202} 1202}
1203 1203
1204/* 1204/*
@@ -1327,13 +1327,13 @@ xfs_dir2_leaf_lookup_int(
1327 return 0; 1327 return 0;
1328 } 1328 }
1329 /* 1329 /*
1330 * No match found, return ENOENT. 1330 * No match found, return -ENOENT.
1331 */ 1331 */
1332 ASSERT(cidb == -1); 1332 ASSERT(cidb == -1);
1333 if (dbp) 1333 if (dbp)
1334 xfs_trans_brelse(tp, dbp); 1334 xfs_trans_brelse(tp, dbp);
1335 xfs_trans_brelse(tp, lbp); 1335 xfs_trans_brelse(tp, lbp);
1336 return XFS_ERROR(ENOENT); 1336 return -ENOENT;
1337} 1337}
1338 1338
1339/* 1339/*
@@ -1440,7 +1440,7 @@ xfs_dir2_leaf_removename(
1440 * Just go on, returning success, leaving the 1440 * Just go on, returning success, leaving the
1441 * empty block in place. 1441 * empty block in place.
1442 */ 1442 */
1443 if (error == ENOSPC && args->total == 0) 1443 if (error == -ENOSPC && args->total == 0)
1444 error = 0; 1444 error = 0;
1445 xfs_dir3_leaf_check(dp, lbp); 1445 xfs_dir3_leaf_check(dp, lbp);
1446 return error; 1446 return error;
@@ -1641,7 +1641,7 @@ xfs_dir2_leaf_trim_data(
1641 * Get rid of the data block. 1641 * Get rid of the data block.
1642 */ 1642 */
1643 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { 1643 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1644 ASSERT(error != ENOSPC); 1644 ASSERT(error != -ENOSPC);
1645 xfs_trans_brelse(tp, dbp); 1645 xfs_trans_brelse(tp, dbp);
1646 return error; 1646 return error;
1647 } 1647 }
@@ -1815,7 +1815,7 @@ xfs_dir2_node_to_leaf(
1815 * punching out the middle of an extent, and this is an 1815 * punching out the middle of an extent, and this is an
1816 * isolated block. 1816 * isolated block.
1817 */ 1817 */
1818 ASSERT(error != ENOSPC); 1818 ASSERT(error != -ENOSPC);
1819 return error; 1819 return error;
1820 } 1820 }
1821 fbp = NULL; 1821 fbp = NULL;
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index da43d304fca2..2ae6ac2c11ae 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -117,9 +117,9 @@ xfs_dir3_free_read_verify(
117 117
118 if (xfs_sb_version_hascrc(&mp->m_sb) && 118 if (xfs_sb_version_hascrc(&mp->m_sb) &&
119 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) 119 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
120 xfs_buf_ioerror(bp, EFSBADCRC); 120 xfs_buf_ioerror(bp, -EFSBADCRC);
121 else if (!xfs_dir3_free_verify(bp)) 121 else if (!xfs_dir3_free_verify(bp))
122 xfs_buf_ioerror(bp, EFSCORRUPTED); 122 xfs_buf_ioerror(bp, -EFSCORRUPTED);
123 123
124 if (bp->b_error) 124 if (bp->b_error)
125 xfs_verifier_error(bp); 125 xfs_verifier_error(bp);
@@ -134,7 +134,7 @@ xfs_dir3_free_write_verify(
134 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 134 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
135 135
136 if (!xfs_dir3_free_verify(bp)) { 136 if (!xfs_dir3_free_verify(bp)) {
137 xfs_buf_ioerror(bp, EFSCORRUPTED); 137 xfs_buf_ioerror(bp, -EFSCORRUPTED);
138 xfs_verifier_error(bp); 138 xfs_verifier_error(bp);
139 return; 139 return;
140 } 140 }
@@ -406,7 +406,7 @@ xfs_dir2_leafn_add(
406 * into other peoples memory 406 * into other peoples memory
407 */ 407 */
408 if (index < 0) 408 if (index < 0)
409 return XFS_ERROR(EFSCORRUPTED); 409 return -EFSCORRUPTED;
410 410
411 /* 411 /*
412 * If there are already the maximum number of leaf entries in 412 * If there are already the maximum number of leaf entries in
@@ -417,7 +417,7 @@ xfs_dir2_leafn_add(
417 417
418 if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) { 418 if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
419 if (!leafhdr.stale) 419 if (!leafhdr.stale)
420 return XFS_ERROR(ENOSPC); 420 return -ENOSPC;
421 compact = leafhdr.stale > 1; 421 compact = leafhdr.stale > 1;
422 } else 422 } else
423 compact = 0; 423 compact = 0;
@@ -629,7 +629,7 @@ xfs_dir2_leafn_lookup_for_addname(
629 XFS_ERRLEVEL_LOW, mp); 629 XFS_ERRLEVEL_LOW, mp);
630 if (curfdb != newfdb) 630 if (curfdb != newfdb)
631 xfs_trans_brelse(tp, curbp); 631 xfs_trans_brelse(tp, curbp);
632 return XFS_ERROR(EFSCORRUPTED); 632 return -EFSCORRUPTED;
633 } 633 }
634 curfdb = newfdb; 634 curfdb = newfdb;
635 if (be16_to_cpu(bests[fi]) >= length) 635 if (be16_to_cpu(bests[fi]) >= length)
@@ -660,7 +660,7 @@ out:
660 * Return the index, that will be the insertion point. 660 * Return the index, that will be the insertion point.
661 */ 661 */
662 *indexp = index; 662 *indexp = index;
663 return XFS_ERROR(ENOENT); 663 return -ENOENT;
664} 664}
665 665
666/* 666/*
@@ -789,7 +789,7 @@ xfs_dir2_leafn_lookup_for_entry(
789 curbp->b_ops = &xfs_dir3_data_buf_ops; 789 curbp->b_ops = &xfs_dir3_data_buf_ops;
790 xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF); 790 xfs_trans_buf_set_type(tp, curbp, XFS_BLFT_DIR_DATA_BUF);
791 if (cmp == XFS_CMP_EXACT) 791 if (cmp == XFS_CMP_EXACT)
792 return XFS_ERROR(EEXIST); 792 return -EEXIST;
793 } 793 }
794 } 794 }
795 ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT)); 795 ASSERT(index == leafhdr.count || (args->op_flags & XFS_DA_OP_OKNOENT));
@@ -812,7 +812,7 @@ xfs_dir2_leafn_lookup_for_entry(
812 state->extravalid = 0; 812 state->extravalid = 0;
813 } 813 }
814 *indexp = index; 814 *indexp = index;
815 return XFS_ERROR(ENOENT); 815 return -ENOENT;
816} 816}
817 817
818/* 818/*
@@ -1133,7 +1133,7 @@ xfs_dir3_data_block_free(
1133 if (error == 0) { 1133 if (error == 0) {
1134 fbp = NULL; 1134 fbp = NULL;
1135 logfree = 0; 1135 logfree = 0;
1136 } else if (error != ENOSPC || args->total != 0) 1136 } else if (error != -ENOSPC || args->total != 0)
1137 return error; 1137 return error;
1138 /* 1138 /*
1139 * It's possible to get ENOSPC if there is no 1139 * It's possible to get ENOSPC if there is no
@@ -1287,7 +1287,7 @@ xfs_dir2_leafn_remove(
1287 * In this case just drop the buffer and some one else 1287 * In this case just drop the buffer and some one else
1288 * will eventually get rid of the empty block. 1288 * will eventually get rid of the empty block.
1289 */ 1289 */
1290 else if (!(error == ENOSPC && args->total == 0)) 1290 else if (!(error == -ENOSPC && args->total == 0))
1291 return error; 1291 return error;
1292 } 1292 }
1293 /* 1293 /*
@@ -1599,7 +1599,7 @@ xfs_dir2_node_addname(
1599 error = xfs_da3_node_lookup_int(state, &rval); 1599 error = xfs_da3_node_lookup_int(state, &rval);
1600 if (error) 1600 if (error)
1601 rval = error; 1601 rval = error;
1602 if (rval != ENOENT) { 1602 if (rval != -ENOENT) {
1603 goto done; 1603 goto done;
1604 } 1604 }
1605 /* 1605 /*
@@ -1628,7 +1628,7 @@ xfs_dir2_node_addname(
1628 * It didn't work, we need to split the leaf block. 1628 * It didn't work, we need to split the leaf block.
1629 */ 1629 */
1630 if (args->total == 0) { 1630 if (args->total == 0) {
1631 ASSERT(rval == ENOSPC); 1631 ASSERT(rval == -ENOSPC);
1632 goto done; 1632 goto done;
1633 } 1633 }
1634 /* 1634 /*
@@ -1815,7 +1815,7 @@ xfs_dir2_node_addname_int(
1815 * Not allowed to allocate, return failure. 1815 * Not allowed to allocate, return failure.
1816 */ 1816 */
1817 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) 1817 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
1818 return XFS_ERROR(ENOSPC); 1818 return -ENOSPC;
1819 1819
1820 /* 1820 /*
1821 * Allocate and initialize the new data block. 1821 * Allocate and initialize the new data block.
@@ -1876,7 +1876,7 @@ xfs_dir2_node_addname_int(
1876 } 1876 }
1877 XFS_ERROR_REPORT("xfs_dir2_node_addname_int", 1877 XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
1878 XFS_ERRLEVEL_LOW, mp); 1878 XFS_ERRLEVEL_LOW, mp);
1879 return XFS_ERROR(EFSCORRUPTED); 1879 return -EFSCORRUPTED;
1880 } 1880 }
1881 1881
1882 /* 1882 /*
@@ -2042,8 +2042,8 @@ xfs_dir2_node_lookup(
2042 error = xfs_da3_node_lookup_int(state, &rval); 2042 error = xfs_da3_node_lookup_int(state, &rval);
2043 if (error) 2043 if (error)
2044 rval = error; 2044 rval = error;
2045 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { 2045 else if (rval == -ENOENT && args->cmpresult == XFS_CMP_CASE) {
2046 /* If a CI match, dup the actual name and return EEXIST */ 2046 /* If a CI match, dup the actual name and return -EEXIST */
2047 xfs_dir2_data_entry_t *dep; 2047 xfs_dir2_data_entry_t *dep;
2048 2048
2049 dep = (xfs_dir2_data_entry_t *) 2049 dep = (xfs_dir2_data_entry_t *)
@@ -2096,7 +2096,7 @@ xfs_dir2_node_removename(
2096 goto out_free; 2096 goto out_free;
2097 2097
2098 /* Didn't find it, upper layer screwed up. */ 2098 /* Didn't find it, upper layer screwed up. */
2099 if (rval != EEXIST) { 2099 if (rval != -EEXIST) {
2100 error = rval; 2100 error = rval;
2101 goto out_free; 2101 goto out_free;
2102 } 2102 }
@@ -2169,7 +2169,7 @@ xfs_dir2_node_replace(
2169 * It should be found, since the vnodeops layer has looked it up 2169 * It should be found, since the vnodeops layer has looked it up
2170 * and locked it. But paranoia is good. 2170 * and locked it. But paranoia is good.
2171 */ 2171 */
2172 if (rval == EEXIST) { 2172 if (rval == -EEXIST) {
2173 struct xfs_dir2_leaf_entry *ents; 2173 struct xfs_dir2_leaf_entry *ents;
2174 /* 2174 /*
2175 * Find the leaf entry. 2175 * Find the leaf entry.
@@ -2272,7 +2272,7 @@ xfs_dir2_node_trim_free(
2272 * space reservation, when breaking up an extent into two 2272 * space reservation, when breaking up an extent into two
2273 * pieces. This is the last block of an extent. 2273 * pieces. This is the last block of an extent.
2274 */ 2274 */
2275 ASSERT(error != ENOSPC); 2275 ASSERT(error != -ENOSPC);
2276 xfs_trans_brelse(tp, bp); 2276 xfs_trans_brelse(tp, bp);
2277 return error; 2277 return error;
2278 } 2278 }
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 27ce0794d196..27ce0794d196 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 53c3be619db5..5079e051ef08 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -51,10 +51,9 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
51#else 51#else
52#define xfs_dir2_sf_check(args) 52#define xfs_dir2_sf_check(args)
53#endif /* DEBUG */ 53#endif /* DEBUG */
54#if XFS_BIG_INUMS 54
55static void xfs_dir2_sf_toino4(xfs_da_args_t *args); 55static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
56static void xfs_dir2_sf_toino8(xfs_da_args_t *args); 56static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
57#endif /* XFS_BIG_INUMS */
58 57
59/* 58/*
60 * Given a block directory (dp/block), calculate its size as a shortform (sf) 59 * Given a block directory (dp/block), calculate its size as a shortform (sf)
@@ -117,10 +116,10 @@ xfs_dir2_block_sfsize(
117 isdotdot = 116 isdotdot =
118 dep->namelen == 2 && 117 dep->namelen == 2 &&
119 dep->name[0] == '.' && dep->name[1] == '.'; 118 dep->name[0] == '.' && dep->name[1] == '.';
120#if XFS_BIG_INUMS 119
121 if (!isdot) 120 if (!isdot)
122 i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM; 121 i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
123#endif 122
124 /* take into account the file type field */ 123 /* take into account the file type field */
125 if (!isdot && !isdotdot) { 124 if (!isdot && !isdotdot) {
126 count++; 125 count++;
@@ -251,7 +250,7 @@ xfs_dir2_block_to_sf(
251 logflags = XFS_ILOG_CORE; 250 logflags = XFS_ILOG_CORE;
252 error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); 251 error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp);
253 if (error) { 252 if (error) {
254 ASSERT(error != ENOSPC); 253 ASSERT(error != -ENOSPC);
255 goto out; 254 goto out;
256 } 255 }
257 256
@@ -299,7 +298,7 @@ xfs_dir2_sf_addname(
299 298
300 trace_xfs_dir2_sf_addname(args); 299 trace_xfs_dir2_sf_addname(args);
301 300
302 ASSERT(xfs_dir2_sf_lookup(args) == ENOENT); 301 ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT);
303 dp = args->dp; 302 dp = args->dp;
304 ASSERT(dp->i_df.if_flags & XFS_IFINLINE); 303 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
305 /* 304 /*
@@ -307,7 +306,7 @@ xfs_dir2_sf_addname(
307 */ 306 */
308 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { 307 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
309 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); 308 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
310 return XFS_ERROR(EIO); 309 return -EIO;
311 } 310 }
312 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 311 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
313 ASSERT(dp->i_df.if_u1.if_data != NULL); 312 ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -318,7 +317,7 @@ xfs_dir2_sf_addname(
318 */ 317 */
319 incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen); 318 incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
320 objchange = 0; 319 objchange = 0;
321#if XFS_BIG_INUMS 320
322 /* 321 /*
323 * Do we have to change to 8 byte inodes? 322 * Do we have to change to 8 byte inodes?
324 */ 323 */
@@ -332,7 +331,7 @@ xfs_dir2_sf_addname(
332 (uint)sizeof(xfs_dir2_ino4_t)); 331 (uint)sizeof(xfs_dir2_ino4_t));
333 objchange = 1; 332 objchange = 1;
334 } 333 }
335#endif 334
336 new_isize = (int)dp->i_d.di_size + incr_isize; 335 new_isize = (int)dp->i_d.di_size + incr_isize;
337 /* 336 /*
338 * Won't fit as shortform any more (due to size), 337 * Won't fit as shortform any more (due to size),
@@ -345,7 +344,7 @@ xfs_dir2_sf_addname(
345 * Just checking or no space reservation, it doesn't fit. 344 * Just checking or no space reservation, it doesn't fit.
346 */ 345 */
347 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) 346 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
348 return XFS_ERROR(ENOSPC); 347 return -ENOSPC;
349 /* 348 /*
350 * Convert to block form then add the name. 349 * Convert to block form then add the name.
351 */ 350 */
@@ -370,10 +369,8 @@ xfs_dir2_sf_addname(
370 */ 369 */
371 else { 370 else {
372 ASSERT(pick == 2); 371 ASSERT(pick == 2);
373#if XFS_BIG_INUMS
374 if (objchange) 372 if (objchange)
375 xfs_dir2_sf_toino8(args); 373 xfs_dir2_sf_toino8(args);
376#endif
377 xfs_dir2_sf_addname_hard(args, objchange, new_isize); 374 xfs_dir2_sf_addname_hard(args, objchange, new_isize);
378 } 375 }
379 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 376 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -425,10 +422,8 @@ xfs_dir2_sf_addname_easy(
425 * Update the header and inode. 422 * Update the header and inode.
426 */ 423 */
427 sfp->count++; 424 sfp->count++;
428#if XFS_BIG_INUMS
429 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) 425 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
430 sfp->i8count++; 426 sfp->i8count++;
431#endif
432 dp->i_d.di_size = new_isize; 427 dp->i_d.di_size = new_isize;
433 xfs_dir2_sf_check(args); 428 xfs_dir2_sf_check(args);
434} 429}
@@ -516,10 +511,8 @@ xfs_dir2_sf_addname_hard(
516 dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); 511 dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
517 dp->d_ops->sf_put_ftype(sfep, args->filetype); 512 dp->d_ops->sf_put_ftype(sfep, args->filetype);
518 sfp->count++; 513 sfp->count++;
519#if XFS_BIG_INUMS
520 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) 514 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
521 sfp->i8count++; 515 sfp->i8count++;
522#endif
523 /* 516 /*
524 * If there's more left to copy, do that. 517 * If there's more left to copy, do that.
525 */ 518 */
@@ -593,13 +586,8 @@ xfs_dir2_sf_addname_pick(
593 /* 586 /*
594 * If changing the inode number size, do it the hard way. 587 * If changing the inode number size, do it the hard way.
595 */ 588 */
596#if XFS_BIG_INUMS 589 if (objchange)
597 if (objchange) {
598 return 2; 590 return 2;
599 }
600#else
601 ASSERT(objchange == 0);
602#endif
603 /* 591 /*
604 * If it won't fit at the end then do it the hard way (use the hole). 592 * If it won't fit at the end then do it the hard way (use the hole).
605 */ 593 */
@@ -650,7 +638,6 @@ xfs_dir2_sf_check(
650 ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); 638 ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
651 } 639 }
652 ASSERT(i8count == sfp->i8count); 640 ASSERT(i8count == sfp->i8count);
653 ASSERT(XFS_BIG_INUMS || i8count == 0);
654 ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); 641 ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
655 ASSERT(offset + 642 ASSERT(offset +
656 (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + 643 (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
@@ -738,7 +725,7 @@ xfs_dir2_sf_lookup(
738 */ 725 */
739 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { 726 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
740 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); 727 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
741 return XFS_ERROR(EIO); 728 return -EIO;
742 } 729 }
743 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 730 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
744 ASSERT(dp->i_df.if_u1.if_data != NULL); 731 ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -751,7 +738,7 @@ xfs_dir2_sf_lookup(
751 args->inumber = dp->i_ino; 738 args->inumber = dp->i_ino;
752 args->cmpresult = XFS_CMP_EXACT; 739 args->cmpresult = XFS_CMP_EXACT;
753 args->filetype = XFS_DIR3_FT_DIR; 740 args->filetype = XFS_DIR3_FT_DIR;
754 return XFS_ERROR(EEXIST); 741 return -EEXIST;
755 } 742 }
756 /* 743 /*
757 * Special case for .. 744 * Special case for ..
@@ -761,7 +748,7 @@ xfs_dir2_sf_lookup(
761 args->inumber = dp->d_ops->sf_get_parent_ino(sfp); 748 args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
762 args->cmpresult = XFS_CMP_EXACT; 749 args->cmpresult = XFS_CMP_EXACT;
763 args->filetype = XFS_DIR3_FT_DIR; 750 args->filetype = XFS_DIR3_FT_DIR;
764 return XFS_ERROR(EEXIST); 751 return -EEXIST;
765 } 752 }
766 /* 753 /*
767 * Loop over all the entries trying to match ours. 754 * Loop over all the entries trying to match ours.
@@ -781,20 +768,20 @@ xfs_dir2_sf_lookup(
781 args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); 768 args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
782 args->filetype = dp->d_ops->sf_get_ftype(sfep); 769 args->filetype = dp->d_ops->sf_get_ftype(sfep);
783 if (cmp == XFS_CMP_EXACT) 770 if (cmp == XFS_CMP_EXACT)
784 return XFS_ERROR(EEXIST); 771 return -EEXIST;
785 ci_sfep = sfep; 772 ci_sfep = sfep;
786 } 773 }
787 } 774 }
788 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 775 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
789 /* 776 /*
790 * Here, we can only be doing a lookup (not a rename or replace). 777 * Here, we can only be doing a lookup (not a rename or replace).
791 * If a case-insensitive match was not found, return ENOENT. 778 * If a case-insensitive match was not found, return -ENOENT.
792 */ 779 */
793 if (!ci_sfep) 780 if (!ci_sfep)
794 return XFS_ERROR(ENOENT); 781 return -ENOENT;
795 /* otherwise process the CI match as required by the caller */ 782 /* otherwise process the CI match as required by the caller */
796 error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); 783 error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
797 return XFS_ERROR(error); 784 return error;
798} 785}
799 786
800/* 787/*
@@ -824,7 +811,7 @@ xfs_dir2_sf_removename(
824 */ 811 */
825 if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) { 812 if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
826 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); 813 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
827 return XFS_ERROR(EIO); 814 return -EIO;
828 } 815 }
829 ASSERT(dp->i_df.if_bytes == oldsize); 816 ASSERT(dp->i_df.if_bytes == oldsize);
830 ASSERT(dp->i_df.if_u1.if_data != NULL); 817 ASSERT(dp->i_df.if_u1.if_data != NULL);
@@ -847,7 +834,7 @@ xfs_dir2_sf_removename(
847 * Didn't find it. 834 * Didn't find it.
848 */ 835 */
849 if (i == sfp->count) 836 if (i == sfp->count)
850 return XFS_ERROR(ENOENT); 837 return -ENOENT;
851 /* 838 /*
852 * Calculate sizes. 839 * Calculate sizes.
853 */ 840 */
@@ -870,7 +857,6 @@ xfs_dir2_sf_removename(
870 */ 857 */
871 xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); 858 xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
872 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 859 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
873#if XFS_BIG_INUMS
874 /* 860 /*
875 * Are we changing inode number size? 861 * Are we changing inode number size?
876 */ 862 */
@@ -880,7 +866,6 @@ xfs_dir2_sf_removename(
880 else 866 else
881 sfp->i8count--; 867 sfp->i8count--;
882 } 868 }
883#endif
884 xfs_dir2_sf_check(args); 869 xfs_dir2_sf_check(args);
885 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 870 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
886 return 0; 871 return 0;
@@ -895,12 +880,8 @@ xfs_dir2_sf_replace(
895{ 880{
896 xfs_inode_t *dp; /* incore directory inode */ 881 xfs_inode_t *dp; /* incore directory inode */
897 int i; /* entry index */ 882 int i; /* entry index */
898#if XFS_BIG_INUMS || defined(DEBUG)
899 xfs_ino_t ino=0; /* entry old inode number */ 883 xfs_ino_t ino=0; /* entry old inode number */
900#endif
901#if XFS_BIG_INUMS
902 int i8elevated; /* sf_toino8 set i8count=1 */ 884 int i8elevated; /* sf_toino8 set i8count=1 */
903#endif
904 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 885 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
905 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ 886 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
906 887
@@ -914,13 +895,13 @@ xfs_dir2_sf_replace(
914 */ 895 */
915 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { 896 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
916 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); 897 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
917 return XFS_ERROR(EIO); 898 return -EIO;
918 } 899 }
919 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 900 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
920 ASSERT(dp->i_df.if_u1.if_data != NULL); 901 ASSERT(dp->i_df.if_u1.if_data != NULL);
921 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 902 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
922 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); 903 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
923#if XFS_BIG_INUMS 904
924 /* 905 /*
925 * New inode number is large, and need to convert to 8-byte inodes. 906 * New inode number is large, and need to convert to 8-byte inodes.
926 */ 907 */
@@ -951,17 +932,15 @@ xfs_dir2_sf_replace(
951 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; 932 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
952 } else 933 } else
953 i8elevated = 0; 934 i8elevated = 0;
954#endif 935
955 ASSERT(args->namelen != 1 || args->name[0] != '.'); 936 ASSERT(args->namelen != 1 || args->name[0] != '.');
956 /* 937 /*
957 * Replace ..'s entry. 938 * Replace ..'s entry.
958 */ 939 */
959 if (args->namelen == 2 && 940 if (args->namelen == 2 &&
960 args->name[0] == '.' && args->name[1] == '.') { 941 args->name[0] == '.' && args->name[1] == '.') {
961#if XFS_BIG_INUMS || defined(DEBUG)
962 ino = dp->d_ops->sf_get_parent_ino(sfp); 942 ino = dp->d_ops->sf_get_parent_ino(sfp);
963 ASSERT(args->inumber != ino); 943 ASSERT(args->inumber != ino);
964#endif
965 dp->d_ops->sf_put_parent_ino(sfp, args->inumber); 944 dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
966 } 945 }
967 /* 946 /*
@@ -972,10 +951,8 @@ xfs_dir2_sf_replace(
972 i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { 951 i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
973 if (xfs_da_compname(args, sfep->name, sfep->namelen) == 952 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
974 XFS_CMP_EXACT) { 953 XFS_CMP_EXACT) {
975#if XFS_BIG_INUMS || defined(DEBUG)
976 ino = dp->d_ops->sf_get_ino(sfp, sfep); 954 ino = dp->d_ops->sf_get_ino(sfp, sfep);
977 ASSERT(args->inumber != ino); 955 ASSERT(args->inumber != ino);
978#endif
979 dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); 956 dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
980 dp->d_ops->sf_put_ftype(sfep, args->filetype); 957 dp->d_ops->sf_put_ftype(sfep, args->filetype);
981 break; 958 break;
@@ -986,14 +963,11 @@ xfs_dir2_sf_replace(
986 */ 963 */
987 if (i == sfp->count) { 964 if (i == sfp->count) {
988 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 965 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
989#if XFS_BIG_INUMS
990 if (i8elevated) 966 if (i8elevated)
991 xfs_dir2_sf_toino4(args); 967 xfs_dir2_sf_toino4(args);
992#endif 968 return -ENOENT;
993 return XFS_ERROR(ENOENT);
994 } 969 }
995 } 970 }
996#if XFS_BIG_INUMS
997 /* 971 /*
998 * See if the old number was large, the new number is small. 972 * See if the old number was large, the new number is small.
999 */ 973 */
@@ -1020,13 +994,11 @@ xfs_dir2_sf_replace(
1020 if (!i8elevated) 994 if (!i8elevated)
1021 sfp->i8count++; 995 sfp->i8count++;
1022 } 996 }
1023#endif
1024 xfs_dir2_sf_check(args); 997 xfs_dir2_sf_check(args);
1025 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA); 998 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
1026 return 0; 999 return 0;
1027} 1000}
1028 1001
1029#if XFS_BIG_INUMS
1030/* 1002/*
1031 * Convert from 8-byte inode numbers to 4-byte inode numbers. 1003 * Convert from 8-byte inode numbers to 4-byte inode numbers.
1032 * The last 8-byte inode number is gone, but the count is still 1. 1004 * The last 8-byte inode number is gone, but the count is still 1.
@@ -1181,4 +1153,3 @@ xfs_dir2_sf_toino8(
1181 dp->i_d.di_size = newsize; 1153 dp->i_d.di_size = newsize;
1182 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1154 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1183} 1155}
1184#endif /* XFS_BIG_INUMS */
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index c2ac0c611ad8..bb969337efc8 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -257,9 +257,9 @@ xfs_dquot_buf_read_verify(
257 struct xfs_mount *mp = bp->b_target->bt_mount; 257 struct xfs_mount *mp = bp->b_target->bt_mount;
258 258
259 if (!xfs_dquot_buf_verify_crc(mp, bp)) 259 if (!xfs_dquot_buf_verify_crc(mp, bp))
260 xfs_buf_ioerror(bp, EFSBADCRC); 260 xfs_buf_ioerror(bp, -EFSBADCRC);
261 else if (!xfs_dquot_buf_verify(mp, bp)) 261 else if (!xfs_dquot_buf_verify(mp, bp))
262 xfs_buf_ioerror(bp, EFSCORRUPTED); 262 xfs_buf_ioerror(bp, -EFSCORRUPTED);
263 263
264 if (bp->b_error) 264 if (bp->b_error)
265 xfs_verifier_error(bp); 265 xfs_verifier_error(bp);
@@ -277,7 +277,7 @@ xfs_dquot_buf_write_verify(
277 struct xfs_mount *mp = bp->b_target->bt_mount; 277 struct xfs_mount *mp = bp->b_target->bt_mount;
278 278
279 if (!xfs_dquot_buf_verify(mp, bp)) { 279 if (!xfs_dquot_buf_verify(mp, bp)) {
280 xfs_buf_ioerror(bp, EFSCORRUPTED); 280 xfs_buf_ioerror(bp, -EFSCORRUPTED);
281 xfs_verifier_error(bp); 281 xfs_verifier_error(bp);
282 return; 282 return;
283 } 283 }
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 34d85aca3058..7e42bba9a420 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -68,11 +68,7 @@ struct xfs_ifork;
68#define XFS_RTLOBIT(w) xfs_lowbit32(w) 68#define XFS_RTLOBIT(w) xfs_lowbit32(w)
69#define XFS_RTHIBIT(w) xfs_highbit32(w) 69#define XFS_RTHIBIT(w) xfs_highbit32(w)
70 70
71#if XFS_BIG_BLKNOS
72#define XFS_RTBLOCKLOG(b) xfs_highbit64(b) 71#define XFS_RTBLOCKLOG(b) xfs_highbit64(b)
73#else
74#define XFS_RTBLOCKLOG(b) xfs_highbit32(b)
75#endif
76 72
77/* 73/*
78 * Dquot and dquot block format definitions 74 * Dquot and dquot block format definitions
@@ -304,23 +300,15 @@ typedef struct xfs_bmbt_rec_host {
304 * Values and macros for delayed-allocation startblock fields. 300 * Values and macros for delayed-allocation startblock fields.
305 */ 301 */
306#define STARTBLOCKVALBITS 17 302#define STARTBLOCKVALBITS 17
307#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) 303#define STARTBLOCKMASKBITS (15 + 20)
308#define DSTARTBLOCKMASKBITS (15 + 20)
309#define STARTBLOCKMASK \ 304#define STARTBLOCKMASK \
310 (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) 305 (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
311#define DSTARTBLOCKMASK \
312 (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS)
313 306
314static inline int isnullstartblock(xfs_fsblock_t x) 307static inline int isnullstartblock(xfs_fsblock_t x)
315{ 308{
316 return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; 309 return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK;
317} 310}
318 311
319static inline int isnulldstartblock(xfs_dfsbno_t x)
320{
321 return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK;
322}
323
324static inline xfs_fsblock_t nullstartblock(int k) 312static inline xfs_fsblock_t nullstartblock(int k)
325{ 313{
326 ASSERT(k < (1 << STARTBLOCKVALBITS)); 314 ASSERT(k < (1 << STARTBLOCKVALBITS));
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 5960e5593fe0..b62771f1f4b5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -292,7 +292,7 @@ xfs_ialloc_inode_init(
292 mp->m_bsize * blks_per_cluster, 292 mp->m_bsize * blks_per_cluster,
293 XBF_UNMAPPED); 293 XBF_UNMAPPED);
294 if (!fbuf) 294 if (!fbuf)
295 return ENOMEM; 295 return -ENOMEM;
296 296
297 /* Initialize the inode buffers and log them appropriately. */ 297 /* Initialize the inode buffers and log them appropriately. */
298 fbuf->b_ops = &xfs_inode_buf_ops; 298 fbuf->b_ops = &xfs_inode_buf_ops;
@@ -380,7 +380,7 @@ xfs_ialloc_ag_alloc(
380 newlen = args.mp->m_ialloc_inos; 380 newlen = args.mp->m_ialloc_inos;
381 if (args.mp->m_maxicount && 381 if (args.mp->m_maxicount &&
382 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 382 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
383 return XFS_ERROR(ENOSPC); 383 return -ENOSPC;
384 args.minlen = args.maxlen = args.mp->m_ialloc_blks; 384 args.minlen = args.maxlen = args.mp->m_ialloc_blks;
385 /* 385 /*
386 * First try to allocate inodes contiguous with the last-allocated 386 * First try to allocate inodes contiguous with the last-allocated
@@ -1385,7 +1385,7 @@ xfs_dialloc(
1385 if (error) { 1385 if (error) {
1386 xfs_trans_brelse(tp, agbp); 1386 xfs_trans_brelse(tp, agbp);
1387 1387
1388 if (error != ENOSPC) 1388 if (error != -ENOSPC)
1389 goto out_error; 1389 goto out_error;
1390 1390
1391 xfs_perag_put(pag); 1391 xfs_perag_put(pag);
@@ -1416,7 +1416,7 @@ nextag:
1416 agno = 0; 1416 agno = 0;
1417 if (agno == start_agno) { 1417 if (agno == start_agno) {
1418 *inop = NULLFSINO; 1418 *inop = NULLFSINO;
1419 return noroom ? ENOSPC : 0; 1419 return noroom ? -ENOSPC : 0;
1420 } 1420 }
1421 } 1421 }
1422 1422
@@ -1425,7 +1425,7 @@ out_alloc:
1425 return xfs_dialloc_ag(tp, agbp, parent, inop); 1425 return xfs_dialloc_ag(tp, agbp, parent, inop);
1426out_error: 1426out_error:
1427 xfs_perag_put(pag); 1427 xfs_perag_put(pag);
1428 return XFS_ERROR(error); 1428 return error;
1429} 1429}
1430 1430
1431STATIC int 1431STATIC int
@@ -1682,7 +1682,7 @@ xfs_difree(
1682 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", 1682 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1683 __func__, agno, mp->m_sb.sb_agcount); 1683 __func__, agno, mp->m_sb.sb_agcount);
1684 ASSERT(0); 1684 ASSERT(0);
1685 return XFS_ERROR(EINVAL); 1685 return -EINVAL;
1686 } 1686 }
1687 agino = XFS_INO_TO_AGINO(mp, inode); 1687 agino = XFS_INO_TO_AGINO(mp, inode);
1688 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1688 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
@@ -1690,14 +1690,14 @@ xfs_difree(
1690 __func__, (unsigned long long)inode, 1690 __func__, (unsigned long long)inode,
1691 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); 1691 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1692 ASSERT(0); 1692 ASSERT(0);
1693 return XFS_ERROR(EINVAL); 1693 return -EINVAL;
1694 } 1694 }
1695 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1695 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1696 if (agbno >= mp->m_sb.sb_agblocks) { 1696 if (agbno >= mp->m_sb.sb_agblocks) {
1697 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", 1697 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1698 __func__, agbno, mp->m_sb.sb_agblocks); 1698 __func__, agbno, mp->m_sb.sb_agblocks);
1699 ASSERT(0); 1699 ASSERT(0);
1700 return XFS_ERROR(EINVAL); 1700 return -EINVAL;
1701 } 1701 }
1702 /* 1702 /*
1703 * Get the allocation group header. 1703 * Get the allocation group header.
@@ -1769,7 +1769,7 @@ xfs_imap_lookup(
1769 if (i) 1769 if (i)
1770 error = xfs_inobt_get_rec(cur, &rec, &i); 1770 error = xfs_inobt_get_rec(cur, &rec, &i);
1771 if (!error && i == 0) 1771 if (!error && i == 0)
1772 error = EINVAL; 1772 error = -EINVAL;
1773 } 1773 }
1774 1774
1775 xfs_trans_brelse(tp, agbp); 1775 xfs_trans_brelse(tp, agbp);
@@ -1780,12 +1780,12 @@ xfs_imap_lookup(
1780 /* check that the returned record contains the required inode */ 1780 /* check that the returned record contains the required inode */
1781 if (rec.ir_startino > agino || 1781 if (rec.ir_startino > agino ||
1782 rec.ir_startino + mp->m_ialloc_inos <= agino) 1782 rec.ir_startino + mp->m_ialloc_inos <= agino)
1783 return EINVAL; 1783 return -EINVAL;
1784 1784
1785 /* for untrusted inodes check it is allocated first */ 1785 /* for untrusted inodes check it is allocated first */
1786 if ((flags & XFS_IGET_UNTRUSTED) && 1786 if ((flags & XFS_IGET_UNTRUSTED) &&
1787 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino))) 1787 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1788 return EINVAL; 1788 return -EINVAL;
1789 1789
1790 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino); 1790 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1791 *offset_agbno = agbno - *chunk_agbno; 1791 *offset_agbno = agbno - *chunk_agbno;
@@ -1829,7 +1829,7 @@ xfs_imap(
1829 * as they can be invalid without implying corruption. 1829 * as they can be invalid without implying corruption.
1830 */ 1830 */
1831 if (flags & XFS_IGET_UNTRUSTED) 1831 if (flags & XFS_IGET_UNTRUSTED)
1832 return XFS_ERROR(EINVAL); 1832 return -EINVAL;
1833 if (agno >= mp->m_sb.sb_agcount) { 1833 if (agno >= mp->m_sb.sb_agcount) {
1834 xfs_alert(mp, 1834 xfs_alert(mp,
1835 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", 1835 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
@@ -1849,7 +1849,7 @@ xfs_imap(
1849 } 1849 }
1850 xfs_stack_trace(); 1850 xfs_stack_trace();
1851#endif /* DEBUG */ 1851#endif /* DEBUG */
1852 return XFS_ERROR(EINVAL); 1852 return -EINVAL;
1853 } 1853 }
1854 1854
1855 blks_per_cluster = xfs_icluster_size_fsb(mp); 1855 blks_per_cluster = xfs_icluster_size_fsb(mp);
@@ -1922,7 +1922,7 @@ out_map:
1922 __func__, (unsigned long long) imap->im_blkno, 1922 __func__, (unsigned long long) imap->im_blkno,
1923 (unsigned long long) imap->im_len, 1923 (unsigned long long) imap->im_len,
1924 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1924 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1925 return XFS_ERROR(EINVAL); 1925 return -EINVAL;
1926 } 1926 }
1927 return 0; 1927 return 0;
1928} 1928}
@@ -2072,11 +2072,11 @@ xfs_agi_read_verify(
2072 2072
2073 if (xfs_sb_version_hascrc(&mp->m_sb) && 2073 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2074 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) 2074 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
2075 xfs_buf_ioerror(bp, EFSBADCRC); 2075 xfs_buf_ioerror(bp, -EFSBADCRC);
2076 else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, 2076 else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
2077 XFS_ERRTAG_IALLOC_READ_AGI, 2077 XFS_ERRTAG_IALLOC_READ_AGI,
2078 XFS_RANDOM_IALLOC_READ_AGI)) 2078 XFS_RANDOM_IALLOC_READ_AGI))
2079 xfs_buf_ioerror(bp, EFSCORRUPTED); 2079 xfs_buf_ioerror(bp, -EFSCORRUPTED);
2080 2080
2081 if (bp->b_error) 2081 if (bp->b_error)
2082 xfs_verifier_error(bp); 2082 xfs_verifier_error(bp);
@@ -2090,7 +2090,7 @@ xfs_agi_write_verify(
2090 struct xfs_buf_log_item *bip = bp->b_fspriv; 2090 struct xfs_buf_log_item *bip = bp->b_fspriv;
2091 2091
2092 if (!xfs_agi_verify(bp)) { 2092 if (!xfs_agi_verify(bp)) {
2093 xfs_buf_ioerror(bp, EFSCORRUPTED); 2093 xfs_buf_ioerror(bp, -EFSCORRUPTED);
2094 xfs_verifier_error(bp); 2094 xfs_verifier_error(bp);
2095 return; 2095 return;
2096 } 2096 }
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 95ad1c002d60..95ad1c002d60 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 726f83a681a5..c9b06f30fe86 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -272,9 +272,9 @@ xfs_inobt_read_verify(
272 struct xfs_buf *bp) 272 struct xfs_buf *bp)
273{ 273{
274 if (!xfs_btree_sblock_verify_crc(bp)) 274 if (!xfs_btree_sblock_verify_crc(bp))
275 xfs_buf_ioerror(bp, EFSBADCRC); 275 xfs_buf_ioerror(bp, -EFSBADCRC);
276 else if (!xfs_inobt_verify(bp)) 276 else if (!xfs_inobt_verify(bp))
277 xfs_buf_ioerror(bp, EFSCORRUPTED); 277 xfs_buf_ioerror(bp, -EFSCORRUPTED);
278 278
279 if (bp->b_error) { 279 if (bp->b_error) {
280 trace_xfs_btree_corrupt(bp, _RET_IP_); 280 trace_xfs_btree_corrupt(bp, _RET_IP_);
@@ -288,7 +288,7 @@ xfs_inobt_write_verify(
288{ 288{
289 if (!xfs_inobt_verify(bp)) { 289 if (!xfs_inobt_verify(bp)) {
290 trace_xfs_btree_corrupt(bp, _RET_IP_); 290 trace_xfs_btree_corrupt(bp, _RET_IP_);
291 xfs_buf_ioerror(bp, EFSCORRUPTED); 291 xfs_buf_ioerror(bp, -EFSCORRUPTED);
292 xfs_verifier_error(bp); 292 xfs_verifier_error(bp);
293 return; 293 return;
294 } 294 }
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index d7ebea72c2d0..d7ebea72c2d0 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index cb35ae41d4a1..f18fd2da49f7 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -101,7 +101,7 @@ xfs_inode_buf_verify(
101 return; 101 return;
102 } 102 }
103 103
104 xfs_buf_ioerror(bp, EFSCORRUPTED); 104 xfs_buf_ioerror(bp, -EFSCORRUPTED);
105 xfs_verifier_error(bp); 105 xfs_verifier_error(bp);
106#ifdef DEBUG 106#ifdef DEBUG
107 xfs_alert(mp, 107 xfs_alert(mp,
@@ -174,14 +174,14 @@ xfs_imap_to_bp(
174 (int)imap->im_len, buf_flags, &bp, 174 (int)imap->im_len, buf_flags, &bp,
175 &xfs_inode_buf_ops); 175 &xfs_inode_buf_ops);
176 if (error) { 176 if (error) {
177 if (error == EAGAIN) { 177 if (error == -EAGAIN) {
178 ASSERT(buf_flags & XBF_TRYLOCK); 178 ASSERT(buf_flags & XBF_TRYLOCK);
179 return error; 179 return error;
180 } 180 }
181 181
182 if (error == EFSCORRUPTED && 182 if (error == -EFSCORRUPTED &&
183 (iget_flags & XFS_IGET_UNTRUSTED)) 183 (iget_flags & XFS_IGET_UNTRUSTED))
184 return XFS_ERROR(EINVAL); 184 return -EINVAL;
185 185
186 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.", 186 xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
187 __func__, error); 187 __func__, error);
@@ -390,7 +390,7 @@ xfs_iread(
390 __func__, ip->i_ino); 390 __func__, ip->i_ino);
391 391
392 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); 392 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
393 error = XFS_ERROR(EFSCORRUPTED); 393 error = -EFSCORRUPTED;
394 goto out_brelse; 394 goto out_brelse;
395 } 395 }
396 396
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 9308c47f2a52..9308c47f2a52 100644
--- a/fs/xfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index b031e8d0d928..6a00f7fed69d 100644
--- a/fs/xfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -102,7 +102,7 @@ xfs_iformat_fork(
102 be64_to_cpu(dip->di_nblocks)); 102 be64_to_cpu(dip->di_nblocks));
103 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, 103 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
104 ip->i_mount, dip); 104 ip->i_mount, dip);
105 return XFS_ERROR(EFSCORRUPTED); 105 return -EFSCORRUPTED;
106 } 106 }
107 107
108 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 108 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
@@ -111,7 +111,7 @@ xfs_iformat_fork(
111 dip->di_forkoff); 111 dip->di_forkoff);
112 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 112 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
113 ip->i_mount, dip); 113 ip->i_mount, dip);
114 return XFS_ERROR(EFSCORRUPTED); 114 return -EFSCORRUPTED;
115 } 115 }
116 116
117 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 117 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
@@ -121,7 +121,7 @@ xfs_iformat_fork(
121 ip->i_ino); 121 ip->i_ino);
122 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 122 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
123 XFS_ERRLEVEL_LOW, ip->i_mount, dip); 123 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
124 return XFS_ERROR(EFSCORRUPTED); 124 return -EFSCORRUPTED;
125 } 125 }
126 126
127 switch (ip->i_d.di_mode & S_IFMT) { 127 switch (ip->i_d.di_mode & S_IFMT) {
@@ -132,7 +132,7 @@ xfs_iformat_fork(
132 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { 132 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
133 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, 133 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
134 ip->i_mount, dip); 134 ip->i_mount, dip);
135 return XFS_ERROR(EFSCORRUPTED); 135 return -EFSCORRUPTED;
136 } 136 }
137 ip->i_d.di_size = 0; 137 ip->i_d.di_size = 0;
138 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); 138 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
@@ -153,7 +153,7 @@ xfs_iformat_fork(
153 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 153 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
154 XFS_ERRLEVEL_LOW, 154 XFS_ERRLEVEL_LOW,
155 ip->i_mount, dip); 155 ip->i_mount, dip);
156 return XFS_ERROR(EFSCORRUPTED); 156 return -EFSCORRUPTED;
157 } 157 }
158 158
159 di_size = be64_to_cpu(dip->di_size); 159 di_size = be64_to_cpu(dip->di_size);
@@ -166,7 +166,7 @@ xfs_iformat_fork(
166 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 166 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
167 XFS_ERRLEVEL_LOW, 167 XFS_ERRLEVEL_LOW,
168 ip->i_mount, dip); 168 ip->i_mount, dip);
169 return XFS_ERROR(EFSCORRUPTED); 169 return -EFSCORRUPTED;
170 } 170 }
171 171
172 size = (int)di_size; 172 size = (int)di_size;
@@ -181,13 +181,13 @@ xfs_iformat_fork(
181 default: 181 default:
182 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW, 182 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
183 ip->i_mount); 183 ip->i_mount);
184 return XFS_ERROR(EFSCORRUPTED); 184 return -EFSCORRUPTED;
185 } 185 }
186 break; 186 break;
187 187
188 default: 188 default:
189 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount); 189 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
190 return XFS_ERROR(EFSCORRUPTED); 190 return -EFSCORRUPTED;
191 } 191 }
192 if (error) { 192 if (error) {
193 return error; 193 return error;
@@ -211,7 +211,7 @@ xfs_iformat_fork(
211 XFS_CORRUPTION_ERROR("xfs_iformat(8)", 211 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
212 XFS_ERRLEVEL_LOW, 212 XFS_ERRLEVEL_LOW,
213 ip->i_mount, dip); 213 ip->i_mount, dip);
214 return XFS_ERROR(EFSCORRUPTED); 214 return -EFSCORRUPTED;
215 } 215 }
216 216
217 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size); 217 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
@@ -223,7 +223,7 @@ xfs_iformat_fork(
223 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); 223 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
224 break; 224 break;
225 default: 225 default:
226 error = XFS_ERROR(EFSCORRUPTED); 226 error = -EFSCORRUPTED;
227 break; 227 break;
228 } 228 }
229 if (error) { 229 if (error) {
@@ -266,7 +266,7 @@ xfs_iformat_local(
266 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 266 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
267 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 267 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
268 ip->i_mount, dip); 268 ip->i_mount, dip);
269 return XFS_ERROR(EFSCORRUPTED); 269 return -EFSCORRUPTED;
270 } 270 }
271 ifp = XFS_IFORK_PTR(ip, whichfork); 271 ifp = XFS_IFORK_PTR(ip, whichfork);
272 real_size = 0; 272 real_size = 0;
@@ -322,7 +322,7 @@ xfs_iformat_extents(
322 (unsigned long long) ip->i_ino, nex); 322 (unsigned long long) ip->i_ino, nex);
323 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 323 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
324 ip->i_mount, dip); 324 ip->i_mount, dip);
325 return XFS_ERROR(EFSCORRUPTED); 325 return -EFSCORRUPTED;
326 } 326 }
327 327
328 ifp->if_real_bytes = 0; 328 ifp->if_real_bytes = 0;
@@ -350,7 +350,7 @@ xfs_iformat_extents(
350 XFS_ERROR_REPORT("xfs_iformat_extents(2)", 350 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
351 XFS_ERRLEVEL_LOW, 351 XFS_ERRLEVEL_LOW,
352 ip->i_mount); 352 ip->i_mount);
353 return XFS_ERROR(EFSCORRUPTED); 353 return -EFSCORRUPTED;
354 } 354 }
355 } 355 }
356 ifp->if_flags |= XFS_IFEXTENTS; 356 ifp->if_flags |= XFS_IFEXTENTS;
@@ -399,7 +399,7 @@ xfs_iformat_btree(
399 (unsigned long long) ip->i_ino); 399 (unsigned long long) ip->i_ino);
400 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 400 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
401 mp, dip); 401 mp, dip);
402 return XFS_ERROR(EFSCORRUPTED); 402 return -EFSCORRUPTED;
403 } 403 }
404 404
405 ifp->if_broot_bytes = size; 405 ifp->if_broot_bytes = size;
@@ -436,7 +436,7 @@ xfs_iread_extents(
436 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 436 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
437 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, 437 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
438 ip->i_mount); 438 ip->i_mount);
439 return XFS_ERROR(EFSCORRUPTED); 439 return -EFSCORRUPTED;
440 } 440 }
441 nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 441 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
442 ifp = XFS_IFORK_PTR(ip, whichfork); 442 ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -528,7 +528,7 @@ xfs_iroot_realloc(
528 ifp->if_broot_bytes = (int)new_size; 528 ifp->if_broot_bytes = (int)new_size;
529 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= 529 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
530 XFS_IFORK_SIZE(ip, whichfork)); 530 XFS_IFORK_SIZE(ip, whichfork));
531 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); 531 memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
532 return; 532 return;
533 } 533 }
534 534
@@ -575,7 +575,7 @@ xfs_iroot_realloc(
575 ifp->if_broot_bytes); 575 ifp->if_broot_bytes);
576 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, 576 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
577 (int)new_size); 577 (int)new_size);
578 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 578 memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
579 } 579 }
580 kmem_free(ifp->if_broot); 580 kmem_free(ifp->if_broot);
581 ifp->if_broot = new_broot; 581 ifp->if_broot = new_broot;
@@ -1692,7 +1692,7 @@ xfs_iext_idx_to_irec(
1692 } 1692 }
1693 *idxp = page_idx; 1693 *idxp = page_idx;
1694 *erp_idxp = erp_idx; 1694 *erp_idxp = erp_idx;
1695 return(erp); 1695 return erp;
1696} 1696}
1697 1697
1698/* 1698/*
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7d3b1ed6dcbe..7d3b1ed6dcbe 100644
--- a/fs/xfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/libxfs/xfs_inum.h
index 90efdaf1706f..4ff2278e147a 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/libxfs/xfs_inum.h
@@ -54,11 +54,7 @@ struct xfs_mount;
54#define XFS_OFFBNO_TO_AGINO(mp,b,o) \ 54#define XFS_OFFBNO_TO_AGINO(mp,b,o) \
55 ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o))) 55 ((xfs_agino_t)(((b) << XFS_INO_OFFSET_BITS(mp)) | (o)))
56 56
57#if XFS_BIG_INUMS
58#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL)) 57#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 56) - 1ULL))
59#else
60#define XFS_MAXINUMBER ((xfs_ino_t)((1ULL << 32) - 1ULL))
61#endif
62#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL)) 58#define XFS_MAXINUMBER_32 ((xfs_ino_t)((1ULL << 32) - 1ULL))
63 59
64#endif /* __XFS_INUM_H__ */ 60#endif /* __XFS_INUM_H__ */
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index f0969c77bdbe..aff12f2d4428 100644
--- a/fs/xfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -380,7 +380,7 @@ typedef struct xfs_icdinode {
380 xfs_ictimestamp_t di_mtime; /* time last modified */ 380 xfs_ictimestamp_t di_mtime; /* time last modified */
381 xfs_ictimestamp_t di_ctime; /* time created/inode modified */ 381 xfs_ictimestamp_t di_ctime; /* time created/inode modified */
382 xfs_fsize_t di_size; /* number of bytes in file */ 382 xfs_fsize_t di_size; /* number of bytes in file */
383 xfs_drfsbno_t di_nblocks; /* # of direct & btree blocks used */ 383 xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
384 xfs_extlen_t di_extsize; /* basic/minimum extent size for file */ 384 xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
385 xfs_extnum_t di_nextents; /* number of extents in data fork */ 385 xfs_extnum_t di_nextents; /* number of extents in data fork */
386 xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/ 386 xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
@@ -516,7 +516,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)
516 * EFI/EFD log format definitions 516 * EFI/EFD log format definitions
517 */ 517 */
518typedef struct xfs_extent { 518typedef struct xfs_extent {
519 xfs_dfsbno_t ext_start; 519 xfs_fsblock_t ext_start;
520 xfs_extlen_t ext_len; 520 xfs_extlen_t ext_len;
521} xfs_extent_t; 521} xfs_extent_t;
522 522
diff --git a/fs/xfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb379d..1c55ccbb379d 100644
--- a/fs/xfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index ee7e0e80246b..ee7e0e80246b 100644
--- a/fs/xfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 137e20937077..1b0a08379759 100644
--- a/fs/xfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -98,8 +98,6 @@ typedef __uint16_t xfs_qwarncnt_t;
98#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \ 98#define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
99 XFS_GQUOTA_ACTIVE | \ 99 XFS_GQUOTA_ACTIVE | \
100 XFS_PQUOTA_ACTIVE)) 100 XFS_PQUOTA_ACTIVE))
101#define XFS_IS_OQUOTA_ON(mp) ((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
102 XFS_PQUOTA_ACTIVE))
103#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE) 101#define XFS_IS_UQUOTA_ON(mp) ((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
104#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE) 102#define XFS_IS_GQUOTA_ON(mp) ((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
105#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE) 103#define XFS_IS_PQUOTA_ON(mp) ((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index f4dd697cac08..f4dd697cac08 100644
--- a/fs/xfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 7703fa6770ff..ad525a5623a4 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -186,13 +186,13 @@ xfs_mount_validate_sb(
186 */ 186 */
187 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 187 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
188 xfs_warn(mp, "bad magic number"); 188 xfs_warn(mp, "bad magic number");
189 return XFS_ERROR(EWRONGFS); 189 return -EWRONGFS;
190 } 190 }
191 191
192 192
193 if (!xfs_sb_good_version(sbp)) { 193 if (!xfs_sb_good_version(sbp)) {
194 xfs_warn(mp, "bad version"); 194 xfs_warn(mp, "bad version");
195 return XFS_ERROR(EWRONGFS); 195 return -EWRONGFS;
196 } 196 }
197 197
198 /* 198 /*
@@ -220,7 +220,7 @@ xfs_mount_validate_sb(
220 xfs_warn(mp, 220 xfs_warn(mp,
221"Attempted to mount read-only compatible filesystem read-write.\n" 221"Attempted to mount read-only compatible filesystem read-write.\n"
222"Filesystem can only be safely mounted read only."); 222"Filesystem can only be safely mounted read only.");
223 return XFS_ERROR(EINVAL); 223 return -EINVAL;
224 } 224 }
225 } 225 }
226 if (xfs_sb_has_incompat_feature(sbp, 226 if (xfs_sb_has_incompat_feature(sbp,
@@ -230,7 +230,7 @@ xfs_mount_validate_sb(
230"Filesystem can not be safely mounted by this kernel.", 230"Filesystem can not be safely mounted by this kernel.",
231 (sbp->sb_features_incompat & 231 (sbp->sb_features_incompat &
232 XFS_SB_FEAT_INCOMPAT_UNKNOWN)); 232 XFS_SB_FEAT_INCOMPAT_UNKNOWN));
233 return XFS_ERROR(EINVAL); 233 return -EINVAL;
234 } 234 }
235 } 235 }
236 236
@@ -238,13 +238,13 @@ xfs_mount_validate_sb(
238 if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { 238 if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
239 xfs_notice(mp, 239 xfs_notice(mp,
240 "Version 5 of Super block has XFS_OQUOTA bits."); 240 "Version 5 of Super block has XFS_OQUOTA bits.");
241 return XFS_ERROR(EFSCORRUPTED); 241 return -EFSCORRUPTED;
242 } 242 }
243 } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | 243 } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
244 XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { 244 XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
245 xfs_notice(mp, 245 xfs_notice(mp,
246"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); 246"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");
247 return XFS_ERROR(EFSCORRUPTED); 247 return -EFSCORRUPTED;
248 } 248 }
249 249
250 if (unlikely( 250 if (unlikely(
@@ -252,7 +252,7 @@ xfs_mount_validate_sb(
252 xfs_warn(mp, 252 xfs_warn(mp,
253 "filesystem is marked as having an external log; " 253 "filesystem is marked as having an external log; "
254 "specify logdev on the mount command line."); 254 "specify logdev on the mount command line.");
255 return XFS_ERROR(EINVAL); 255 return -EINVAL;
256 } 256 }
257 257
258 if (unlikely( 258 if (unlikely(
@@ -260,7 +260,7 @@ xfs_mount_validate_sb(
260 xfs_warn(mp, 260 xfs_warn(mp,
261 "filesystem is marked as having an internal log; " 261 "filesystem is marked as having an internal log; "
262 "do not specify logdev on the mount command line."); 262 "do not specify logdev on the mount command line.");
263 return XFS_ERROR(EINVAL); 263 return -EINVAL;
264 } 264 }
265 265
266 /* 266 /*
@@ -294,7 +294,7 @@ xfs_mount_validate_sb(
294 sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || 294 sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) ||
295 sbp->sb_shared_vn != 0)) { 295 sbp->sb_shared_vn != 0)) {
296 xfs_notice(mp, "SB sanity check failed"); 296 xfs_notice(mp, "SB sanity check failed");
297 return XFS_ERROR(EFSCORRUPTED); 297 return -EFSCORRUPTED;
298 } 298 }
299 299
300 /* 300 /*
@@ -305,7 +305,7 @@ xfs_mount_validate_sb(
305 "File system with blocksize %d bytes. " 305 "File system with blocksize %d bytes. "
306 "Only pagesize (%ld) or less will currently work.", 306 "Only pagesize (%ld) or less will currently work.",
307 sbp->sb_blocksize, PAGE_SIZE); 307 sbp->sb_blocksize, PAGE_SIZE);
308 return XFS_ERROR(ENOSYS); 308 return -ENOSYS;
309 } 309 }
310 310
311 /* 311 /*
@@ -320,19 +320,19 @@ xfs_mount_validate_sb(
320 default: 320 default:
321 xfs_warn(mp, "inode size of %d bytes not supported", 321 xfs_warn(mp, "inode size of %d bytes not supported",
322 sbp->sb_inodesize); 322 sbp->sb_inodesize);
323 return XFS_ERROR(ENOSYS); 323 return -ENOSYS;
324 } 324 }
325 325
326 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 326 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
327 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 327 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
328 xfs_warn(mp, 328 xfs_warn(mp,
329 "file system too large to be mounted on this system."); 329 "file system too large to be mounted on this system.");
330 return XFS_ERROR(EFBIG); 330 return -EFBIG;
331 } 331 }
332 332
333 if (check_inprogress && sbp->sb_inprogress) { 333 if (check_inprogress && sbp->sb_inprogress) {
334 xfs_warn(mp, "Offline file system operation in progress!"); 334 xfs_warn(mp, "Offline file system operation in progress!");
335 return XFS_ERROR(EFSCORRUPTED); 335 return -EFSCORRUPTED;
336 } 336 }
337 return 0; 337 return 0;
338} 338}
@@ -386,10 +386,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp)
386 } 386 }
387} 387}
388 388
389void 389static void
390xfs_sb_from_disk( 390__xfs_sb_from_disk(
391 struct xfs_sb *to, 391 struct xfs_sb *to,
392 xfs_dsb_t *from) 392 xfs_dsb_t *from,
393 bool convert_xquota)
393{ 394{
394 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 395 to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
395 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 396 to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
@@ -445,6 +446,17 @@ xfs_sb_from_disk(
445 to->sb_pad = 0; 446 to->sb_pad = 0;
446 to->sb_pquotino = be64_to_cpu(from->sb_pquotino); 447 to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
447 to->sb_lsn = be64_to_cpu(from->sb_lsn); 448 to->sb_lsn = be64_to_cpu(from->sb_lsn);
449 /* Convert on-disk flags to in-memory flags? */
450 if (convert_xquota)
451 xfs_sb_quota_from_disk(to);
452}
453
454void
455xfs_sb_from_disk(
456 struct xfs_sb *to,
457 xfs_dsb_t *from)
458{
459 __xfs_sb_from_disk(to, from, true);
448} 460}
449 461
450static inline void 462static inline void
@@ -577,7 +589,11 @@ xfs_sb_verify(
577 struct xfs_mount *mp = bp->b_target->bt_mount; 589 struct xfs_mount *mp = bp->b_target->bt_mount;
578 struct xfs_sb sb; 590 struct xfs_sb sb;
579 591
580 xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); 592 /*
593 * Use call variant which doesn't convert quota flags from disk
594 * format, because xfs_mount_validate_sb checks the on-disk flags.
595 */
596 __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false);
581 597
582 /* 598 /*
583 * Only check the in progress field for the primary superblock as 599 * Only check the in progress field for the primary superblock as
@@ -620,7 +636,7 @@ xfs_sb_read_verify(
620 /* Only fail bad secondaries on a known V5 filesystem */ 636 /* Only fail bad secondaries on a known V5 filesystem */
621 if (bp->b_bn == XFS_SB_DADDR || 637 if (bp->b_bn == XFS_SB_DADDR ||
622 xfs_sb_version_hascrc(&mp->m_sb)) { 638 xfs_sb_version_hascrc(&mp->m_sb)) {
623 error = EFSBADCRC; 639 error = -EFSBADCRC;
624 goto out_error; 640 goto out_error;
625 } 641 }
626 } 642 }
@@ -630,7 +646,7 @@ xfs_sb_read_verify(
630out_error: 646out_error:
631 if (error) { 647 if (error) {
632 xfs_buf_ioerror(bp, error); 648 xfs_buf_ioerror(bp, error);
633 if (error == EFSCORRUPTED || error == EFSBADCRC) 649 if (error == -EFSCORRUPTED || error == -EFSBADCRC)
634 xfs_verifier_error(bp); 650 xfs_verifier_error(bp);
635 } 651 }
636} 652}
@@ -653,7 +669,7 @@ xfs_sb_quiet_read_verify(
653 return; 669 return;
654 } 670 }
655 /* quietly fail */ 671 /* quietly fail */
656 xfs_buf_ioerror(bp, EWRONGFS); 672 xfs_buf_ioerror(bp, -EWRONGFS);
657} 673}
658 674
659static void 675static void
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index c43c2d609a24..2e739708afd3 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -87,11 +87,11 @@ struct xfs_trans;
87typedef struct xfs_sb { 87typedef struct xfs_sb {
88 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */ 88 __uint32_t sb_magicnum; /* magic number == XFS_SB_MAGIC */
89 __uint32_t sb_blocksize; /* logical block size, bytes */ 89 __uint32_t sb_blocksize; /* logical block size, bytes */
90 xfs_drfsbno_t sb_dblocks; /* number of data blocks */ 90 xfs_rfsblock_t sb_dblocks; /* number of data blocks */
91 xfs_drfsbno_t sb_rblocks; /* number of realtime blocks */ 91 xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
92 xfs_drtbno_t sb_rextents; /* number of realtime extents */ 92 xfs_rtblock_t sb_rextents; /* number of realtime extents */
93 uuid_t sb_uuid; /* file system unique id */ 93 uuid_t sb_uuid; /* file system unique id */
94 xfs_dfsbno_t sb_logstart; /* starting block of log if internal */ 94 xfs_fsblock_t sb_logstart; /* starting block of log if internal */
95 xfs_ino_t sb_rootino; /* root inode number */ 95 xfs_ino_t sb_rootino; /* root inode number */
96 xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */ 96 xfs_ino_t sb_rbmino; /* bitmap inode for realtime extents */
97 xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */ 97 xfs_ino_t sb_rsumino; /* summary inode for rt bitmap */
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 82404da2ca67..82404da2ca67 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 23c2f2577c8d..5782f037eab4 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -133,9 +133,9 @@ xfs_symlink_read_verify(
133 return; 133 return;
134 134
135 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) 135 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
136 xfs_buf_ioerror(bp, EFSBADCRC); 136 xfs_buf_ioerror(bp, -EFSBADCRC);
137 else if (!xfs_symlink_verify(bp)) 137 else if (!xfs_symlink_verify(bp))
138 xfs_buf_ioerror(bp, EFSCORRUPTED); 138 xfs_buf_ioerror(bp, -EFSCORRUPTED);
139 139
140 if (bp->b_error) 140 if (bp->b_error)
141 xfs_verifier_error(bp); 141 xfs_verifier_error(bp);
@@ -153,7 +153,7 @@ xfs_symlink_write_verify(
153 return; 153 return;
154 154
155 if (!xfs_symlink_verify(bp)) { 155 if (!xfs_symlink_verify(bp)) {
156 xfs_buf_ioerror(bp, EFSCORRUPTED); 156 xfs_buf_ioerror(bp, -EFSCORRUPTED);
157 xfs_verifier_error(bp); 157 xfs_verifier_error(bp);
158 return; 158 return;
159 } 159 }
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index f2bda7c76b8a..f2bda7c76b8a 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 1097d14cd583..1097d14cd583 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index bf9c4579334d..bf9c4579334d 100644
--- a/fs/xfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6888ad886ff6..a65fa5dde6e9 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type)
152 if (!xfs_acl) 152 if (!xfs_acl)
153 return ERR_PTR(-ENOMEM); 153 return ERR_PTR(-ENOMEM);
154 154
155 error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, 155 error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
156 &len, ATTR_ROOT); 156 &len, ATTR_ROOT);
157 if (error) { 157 if (error) {
158 /* 158 /*
@@ -210,7 +210,7 @@ __xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
210 len -= sizeof(struct xfs_acl_entry) * 210 len -= sizeof(struct xfs_acl_entry) *
211 (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count); 211 (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
212 212
213 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, 213 error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
214 len, ATTR_ROOT); 214 len, ATTR_ROOT);
215 215
216 kmem_free(xfs_acl); 216 kmem_free(xfs_acl);
@@ -218,7 +218,7 @@ __xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
218 /* 218 /*
219 * A NULL ACL argument means we want to remove the ACL. 219 * A NULL ACL argument means we want to remove the ACL.
220 */ 220 */
221 error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); 221 error = xfs_attr_remove(ip, ea_name, ATTR_ROOT);
222 222
223 /* 223 /*
224 * If the attribute didn't exist to start with that's fine. 224 * If the attribute didn't exist to start with that's fine.
@@ -244,7 +244,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
244 iattr.ia_mode = mode; 244 iattr.ia_mode = mode;
245 iattr.ia_ctime = current_fs_time(inode->i_sb); 245 iattr.ia_ctime = current_fs_time(inode->i_sb);
246 246
247 error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 247 error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
248 } 248 }
249 249
250 return error; 250 return error;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index faaf716e2080..11e9b4caa54f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -240,7 +240,7 @@ xfs_end_io(
240 240
241done: 241done:
242 if (error) 242 if (error)
243 ioend->io_error = -error; 243 ioend->io_error = error;
244 xfs_destroy_ioend(ioend); 244 xfs_destroy_ioend(ioend);
245} 245}
246 246
@@ -308,14 +308,14 @@ xfs_map_blocks(
308 int nimaps = 1; 308 int nimaps = 1;
309 309
310 if (XFS_FORCED_SHUTDOWN(mp)) 310 if (XFS_FORCED_SHUTDOWN(mp))
311 return -XFS_ERROR(EIO); 311 return -EIO;
312 312
313 if (type == XFS_IO_UNWRITTEN) 313 if (type == XFS_IO_UNWRITTEN)
314 bmapi_flags |= XFS_BMAPI_IGSTATE; 314 bmapi_flags |= XFS_BMAPI_IGSTATE;
315 315
316 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 316 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
317 if (nonblocking) 317 if (nonblocking)
318 return -XFS_ERROR(EAGAIN); 318 return -EAGAIN;
319 xfs_ilock(ip, XFS_ILOCK_SHARED); 319 xfs_ilock(ip, XFS_ILOCK_SHARED);
320 } 320 }
321 321
@@ -332,14 +332,14 @@ xfs_map_blocks(
332 xfs_iunlock(ip, XFS_ILOCK_SHARED); 332 xfs_iunlock(ip, XFS_ILOCK_SHARED);
333 333
334 if (error) 334 if (error)
335 return -XFS_ERROR(error); 335 return error;
336 336
337 if (type == XFS_IO_DELALLOC && 337 if (type == XFS_IO_DELALLOC &&
338 (!nimaps || isnullstartblock(imap->br_startblock))) { 338 (!nimaps || isnullstartblock(imap->br_startblock))) {
339 error = xfs_iomap_write_allocate(ip, offset, imap); 339 error = xfs_iomap_write_allocate(ip, offset, imap);
340 if (!error) 340 if (!error)
341 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 341 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
342 return -XFS_ERROR(error); 342 return error;
343 } 343 }
344 344
345#ifdef DEBUG 345#ifdef DEBUG
@@ -502,7 +502,7 @@ xfs_submit_ioend(
502 * time. 502 * time.
503 */ 503 */
504 if (fail) { 504 if (fail) {
505 ioend->io_error = -fail; 505 ioend->io_error = fail;
506 xfs_finish_ioend(ioend); 506 xfs_finish_ioend(ioend);
507 continue; 507 continue;
508 } 508 }
@@ -1253,7 +1253,7 @@ __xfs_get_blocks(
1253 int new = 0; 1253 int new = 0;
1254 1254
1255 if (XFS_FORCED_SHUTDOWN(mp)) 1255 if (XFS_FORCED_SHUTDOWN(mp))
1256 return -XFS_ERROR(EIO); 1256 return -EIO;
1257 1257
1258 offset = (xfs_off_t)iblock << inode->i_blkbits; 1258 offset = (xfs_off_t)iblock << inode->i_blkbits;
1259 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1259 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
@@ -1302,7 +1302,7 @@ __xfs_get_blocks(
1302 error = xfs_iomap_write_direct(ip, offset, size, 1302 error = xfs_iomap_write_direct(ip, offset, size,
1303 &imap, nimaps); 1303 &imap, nimaps);
1304 if (error) 1304 if (error)
1305 return -error; 1305 return error;
1306 new = 1; 1306 new = 1;
1307 } else { 1307 } else {
1308 /* 1308 /*
@@ -1415,7 +1415,7 @@ __xfs_get_blocks(
1415 1415
1416out_unlock: 1416out_unlock:
1417 xfs_iunlock(ip, lockmode); 1417 xfs_iunlock(ip, lockmode);
1418 return -error; 1418 return error;
1419} 1419}
1420 1420
1421int 1421int
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 09480c57f069..aa2a8b1838a2 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -76,7 +76,7 @@ xfs_attr3_leaf_freextent(
76 error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, 76 error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
77 &map, &nmap, XFS_BMAPI_ATTRFORK); 77 &map, &nmap, XFS_BMAPI_ATTRFORK);
78 if (error) { 78 if (error) {
79 return(error); 79 return error;
80 } 80 }
81 ASSERT(nmap == 1); 81 ASSERT(nmap == 1);
82 ASSERT(map.br_startblock != DELAYSTARTBLOCK); 82 ASSERT(map.br_startblock != DELAYSTARTBLOCK);
@@ -95,21 +95,21 @@ xfs_attr3_leaf_freextent(
95 dp->i_mount->m_ddev_targp, 95 dp->i_mount->m_ddev_targp,
96 dblkno, dblkcnt, 0); 96 dblkno, dblkcnt, 0);
97 if (!bp) 97 if (!bp)
98 return ENOMEM; 98 return -ENOMEM;
99 xfs_trans_binval(*trans, bp); 99 xfs_trans_binval(*trans, bp);
100 /* 100 /*
101 * Roll to next transaction. 101 * Roll to next transaction.
102 */ 102 */
103 error = xfs_trans_roll(trans, dp); 103 error = xfs_trans_roll(trans, dp);
104 if (error) 104 if (error)
105 return (error); 105 return error;
106 } 106 }
107 107
108 tblkno += map.br_blockcount; 108 tblkno += map.br_blockcount;
109 tblkcnt -= map.br_blockcount; 109 tblkcnt -= map.br_blockcount;
110 } 110 }
111 111
112 return(0); 112 return 0;
113} 113}
114 114
115/* 115/*
@@ -227,7 +227,7 @@ xfs_attr3_node_inactive(
227 */ 227 */
228 if (level > XFS_DA_NODE_MAXDEPTH) { 228 if (level > XFS_DA_NODE_MAXDEPTH) {
229 xfs_trans_brelse(*trans, bp); /* no locks for later trans */ 229 xfs_trans_brelse(*trans, bp); /* no locks for later trans */
230 return XFS_ERROR(EIO); 230 return -EIO;
231 } 231 }
232 232
233 node = bp->b_addr; 233 node = bp->b_addr;
@@ -256,7 +256,7 @@ xfs_attr3_node_inactive(
256 error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp, 256 error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
257 XFS_ATTR_FORK); 257 XFS_ATTR_FORK);
258 if (error) 258 if (error)
259 return(error); 259 return error;
260 if (child_bp) { 260 if (child_bp) {
261 /* save for re-read later */ 261 /* save for re-read later */
262 child_blkno = XFS_BUF_ADDR(child_bp); 262 child_blkno = XFS_BUF_ADDR(child_bp);
@@ -277,7 +277,7 @@ xfs_attr3_node_inactive(
277 child_bp); 277 child_bp);
278 break; 278 break;
279 default: 279 default:
280 error = XFS_ERROR(EIO); 280 error = -EIO;
281 xfs_trans_brelse(*trans, child_bp); 281 xfs_trans_brelse(*trans, child_bp);
282 break; 282 break;
283 } 283 }
@@ -360,7 +360,7 @@ xfs_attr3_root_inactive(
360 error = xfs_attr3_leaf_inactive(trans, dp, bp); 360 error = xfs_attr3_leaf_inactive(trans, dp, bp);
361 break; 361 break;
362 default: 362 default:
363 error = XFS_ERROR(EIO); 363 error = -EIO;
364 xfs_trans_brelse(*trans, bp); 364 xfs_trans_brelse(*trans, bp);
365 break; 365 break;
366 } 366 }
@@ -414,7 +414,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
414 error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); 414 error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
415 if (error) { 415 if (error) {
416 xfs_trans_cancel(trans, 0); 416 xfs_trans_cancel(trans, 0);
417 return(error); 417 return error;
418 } 418 }
419 xfs_ilock(dp, XFS_ILOCK_EXCL); 419 xfs_ilock(dp, XFS_ILOCK_EXCL);
420 420
@@ -443,10 +443,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
443 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); 443 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
444 xfs_iunlock(dp, XFS_ILOCK_EXCL); 444 xfs_iunlock(dp, XFS_ILOCK_EXCL);
445 445
446 return(error); 446 return error;
447 447
448out: 448out:
449 xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 449 xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
450 xfs_iunlock(dp, XFS_ILOCK_EXCL); 450 xfs_iunlock(dp, XFS_ILOCK_EXCL);
451 return(error); 451 return error;
452} 452}
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 90e2eeb21207..62db83ab6cbc 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -50,11 +50,11 @@ xfs_attr_shortform_compare(const void *a, const void *b)
50 sa = (xfs_attr_sf_sort_t *)a; 50 sa = (xfs_attr_sf_sort_t *)a;
51 sb = (xfs_attr_sf_sort_t *)b; 51 sb = (xfs_attr_sf_sort_t *)b;
52 if (sa->hash < sb->hash) { 52 if (sa->hash < sb->hash) {
53 return(-1); 53 return -1;
54 } else if (sa->hash > sb->hash) { 54 } else if (sa->hash > sb->hash) {
55 return(1); 55 return 1;
56 } else { 56 } else {
57 return(sa->entno - sb->entno); 57 return sa->entno - sb->entno;
58 } 58 }
59} 59}
60 60
@@ -86,7 +86,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
86 sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; 86 sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
87 ASSERT(sf != NULL); 87 ASSERT(sf != NULL);
88 if (!sf->hdr.count) 88 if (!sf->hdr.count)
89 return(0); 89 return 0;
90 cursor = context->cursor; 90 cursor = context->cursor;
91 ASSERT(cursor != NULL); 91 ASSERT(cursor != NULL);
92 92
@@ -124,7 +124,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
124 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 124 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
125 } 125 }
126 trace_xfs_attr_list_sf_all(context); 126 trace_xfs_attr_list_sf_all(context);
127 return(0); 127 return 0;
128 } 128 }
129 129
130 /* do no more for a search callback */ 130 /* do no more for a search callback */
@@ -150,7 +150,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
150 XFS_ERRLEVEL_LOW, 150 XFS_ERRLEVEL_LOW,
151 context->dp->i_mount, sfe); 151 context->dp->i_mount, sfe);
152 kmem_free(sbuf); 152 kmem_free(sbuf);
153 return XFS_ERROR(EFSCORRUPTED); 153 return -EFSCORRUPTED;
154 } 154 }
155 155
156 sbp->entno = i; 156 sbp->entno = i;
@@ -188,7 +188,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
188 } 188 }
189 if (i == nsbuf) { 189 if (i == nsbuf) {
190 kmem_free(sbuf); 190 kmem_free(sbuf);
191 return(0); 191 return 0;
192 } 192 }
193 193
194 /* 194 /*
@@ -213,7 +213,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
213 } 213 }
214 214
215 kmem_free(sbuf); 215 kmem_free(sbuf);
216 return(0); 216 return 0;
217} 217}
218 218
219STATIC int 219STATIC int
@@ -243,8 +243,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
243 if (cursor->blkno > 0) { 243 if (cursor->blkno > 0) {
244 error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, 244 error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,
245 &bp, XFS_ATTR_FORK); 245 &bp, XFS_ATTR_FORK);
246 if ((error != 0) && (error != EFSCORRUPTED)) 246 if ((error != 0) && (error != -EFSCORRUPTED))
247 return(error); 247 return error;
248 if (bp) { 248 if (bp) {
249 struct xfs_attr_leaf_entry *entries; 249 struct xfs_attr_leaf_entry *entries;
250 250
@@ -295,7 +295,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
295 cursor->blkno, -1, &bp, 295 cursor->blkno, -1, &bp,
296 XFS_ATTR_FORK); 296 XFS_ATTR_FORK);
297 if (error) 297 if (error)
298 return(error); 298 return error;
299 node = bp->b_addr; 299 node = bp->b_addr;
300 magic = be16_to_cpu(node->hdr.info.magic); 300 magic = be16_to_cpu(node->hdr.info.magic);
301 if (magic == XFS_ATTR_LEAF_MAGIC || 301 if (magic == XFS_ATTR_LEAF_MAGIC ||
@@ -308,7 +308,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
308 context->dp->i_mount, 308 context->dp->i_mount,
309 node); 309 node);
310 xfs_trans_brelse(NULL, bp); 310 xfs_trans_brelse(NULL, bp);
311 return XFS_ERROR(EFSCORRUPTED); 311 return -EFSCORRUPTED;
312 } 312 }
313 313
314 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 314 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
@@ -496,11 +496,11 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
496 context->cursor->blkno = 0; 496 context->cursor->blkno = 0;
497 error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp); 497 error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
498 if (error) 498 if (error)
499 return XFS_ERROR(error); 499 return error;
500 500
501 error = xfs_attr3_leaf_list_int(bp, context); 501 error = xfs_attr3_leaf_list_int(bp, context);
502 xfs_trans_brelse(NULL, bp); 502 xfs_trans_brelse(NULL, bp);
503 return XFS_ERROR(error); 503 return error;
504} 504}
505 505
506int 506int
@@ -514,7 +514,7 @@ xfs_attr_list_int(
514 XFS_STATS_INC(xs_attr_list); 514 XFS_STATS_INC(xs_attr_list);
515 515
516 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 516 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
517 return EIO; 517 return -EIO;
518 518
519 /* 519 /*
520 * Decide on what work routines to call based on the inode size. 520 * Decide on what work routines to call based on the inode size.
@@ -616,16 +616,16 @@ xfs_attr_list(
616 * Validate the cursor. 616 * Validate the cursor.
617 */ 617 */
618 if (cursor->pad1 || cursor->pad2) 618 if (cursor->pad1 || cursor->pad2)
619 return(XFS_ERROR(EINVAL)); 619 return -EINVAL;
620 if ((cursor->initted == 0) && 620 if ((cursor->initted == 0) &&
621 (cursor->hashval || cursor->blkno || cursor->offset)) 621 (cursor->hashval || cursor->blkno || cursor->offset))
622 return XFS_ERROR(EINVAL); 622 return -EINVAL;
623 623
624 /* 624 /*
625 * Check for a properly aligned buffer. 625 * Check for a properly aligned buffer.
626 */ 626 */
627 if (((long)buffer) & (sizeof(int)-1)) 627 if (((long)buffer) & (sizeof(int)-1))
628 return XFS_ERROR(EFAULT); 628 return -EFAULT;
629 if (flags & ATTR_KERNOVAL) 629 if (flags & ATTR_KERNOVAL)
630 bufsize = 0; 630 bufsize = 0;
631 631
@@ -648,6 +648,6 @@ xfs_attr_list(
648 alist->al_offset[0] = context.bufsize; 648 alist->al_offset[0] = context.bufsize;
649 649
650 error = xfs_attr_list_int(&context); 650 error = xfs_attr_list_int(&context);
651 ASSERT(error >= 0); 651 ASSERT(error <= 0);
652 return error; 652 return error;
653} 653}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 64731ef3324d..2f1e30d39a35 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -133,7 +133,7 @@ xfs_bmap_finish(
133 mp = ntp->t_mountp; 133 mp = ntp->t_mountp;
134 if (!XFS_FORCED_SHUTDOWN(mp)) 134 if (!XFS_FORCED_SHUTDOWN(mp))
135 xfs_force_shutdown(mp, 135 xfs_force_shutdown(mp,
136 (error == EFSCORRUPTED) ? 136 (error == -EFSCORRUPTED) ?
137 SHUTDOWN_CORRUPT_INCORE : 137 SHUTDOWN_CORRUPT_INCORE :
138 SHUTDOWN_META_IO_ERROR); 138 SHUTDOWN_META_IO_ERROR);
139 return error; 139 return error;
@@ -365,7 +365,7 @@ xfs_bmap_count_tree(
365 xfs_trans_brelse(tp, bp); 365 xfs_trans_brelse(tp, bp);
366 XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", 366 XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
367 XFS_ERRLEVEL_LOW, mp); 367 XFS_ERRLEVEL_LOW, mp);
368 return XFS_ERROR(EFSCORRUPTED); 368 return -EFSCORRUPTED;
369 } 369 }
370 xfs_trans_brelse(tp, bp); 370 xfs_trans_brelse(tp, bp);
371 } else { 371 } else {
@@ -425,14 +425,14 @@ xfs_bmap_count_blocks(
425 ASSERT(level > 0); 425 ASSERT(level > 0);
426 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 426 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
427 bno = be64_to_cpu(*pp); 427 bno = be64_to_cpu(*pp);
428 ASSERT(bno != NULLDFSBNO); 428 ASSERT(bno != NULLFSBLOCK);
429 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 429 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
430 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 430 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
431 431
432 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { 432 if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
433 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, 433 XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
434 mp); 434 mp);
435 return XFS_ERROR(EFSCORRUPTED); 435 return -EFSCORRUPTED;
436 } 436 }
437 437
438 return 0; 438 return 0;
@@ -524,13 +524,13 @@ xfs_getbmap(
524 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 524 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
525 ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && 525 ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
526 ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 526 ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
527 return XFS_ERROR(EINVAL); 527 return -EINVAL;
528 } else if (unlikely( 528 } else if (unlikely(
529 ip->i_d.di_aformat != 0 && 529 ip->i_d.di_aformat != 0 &&
530 ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { 530 ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
531 XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, 531 XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
532 ip->i_mount); 532 ip->i_mount);
533 return XFS_ERROR(EFSCORRUPTED); 533 return -EFSCORRUPTED;
534 } 534 }
535 535
536 prealloced = 0; 536 prealloced = 0;
@@ -539,7 +539,7 @@ xfs_getbmap(
539 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 539 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
540 ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 540 ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
541 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 541 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
542 return XFS_ERROR(EINVAL); 542 return -EINVAL;
543 543
544 if (xfs_get_extsz_hint(ip) || 544 if (xfs_get_extsz_hint(ip) ||
545 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 545 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
@@ -559,26 +559,26 @@ xfs_getbmap(
559 bmv->bmv_entries = 0; 559 bmv->bmv_entries = 0;
560 return 0; 560 return 0;
561 } else if (bmv->bmv_length < 0) { 561 } else if (bmv->bmv_length < 0) {
562 return XFS_ERROR(EINVAL); 562 return -EINVAL;
563 } 563 }
564 564
565 nex = bmv->bmv_count - 1; 565 nex = bmv->bmv_count - 1;
566 if (nex <= 0) 566 if (nex <= 0)
567 return XFS_ERROR(EINVAL); 567 return -EINVAL;
568 bmvend = bmv->bmv_offset + bmv->bmv_length; 568 bmvend = bmv->bmv_offset + bmv->bmv_length;
569 569
570 570
571 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 571 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
572 return XFS_ERROR(ENOMEM); 572 return -ENOMEM;
573 out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); 573 out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
574 if (!out) 574 if (!out)
575 return XFS_ERROR(ENOMEM); 575 return -ENOMEM;
576 576
577 xfs_ilock(ip, XFS_IOLOCK_SHARED); 577 xfs_ilock(ip, XFS_IOLOCK_SHARED);
578 if (whichfork == XFS_DATA_FORK) { 578 if (whichfork == XFS_DATA_FORK) {
579 if (!(iflags & BMV_IF_DELALLOC) && 579 if (!(iflags & BMV_IF_DELALLOC) &&
580 (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 580 (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
581 error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); 581 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
582 if (error) 582 if (error)
583 goto out_unlock_iolock; 583 goto out_unlock_iolock;
584 584
@@ -611,7 +611,7 @@ xfs_getbmap(
611 /* 611 /*
612 * Allocate enough space to handle "subnex" maps at a time. 612 * Allocate enough space to handle "subnex" maps at a time.
613 */ 613 */
614 error = ENOMEM; 614 error = -ENOMEM;
615 subnex = 16; 615 subnex = 16;
616 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); 616 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
617 if (!map) 617 if (!map)
@@ -809,7 +809,7 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
809 * have speculative prealloc/delalloc blocks to remove. 809 * have speculative prealloc/delalloc blocks to remove.
810 */ 810 */
811 if (VFS_I(ip)->i_size == 0 && 811 if (VFS_I(ip)->i_size == 0 &&
812 VN_CACHED(VFS_I(ip)) == 0 && 812 VFS_I(ip)->i_mapping->nrpages == 0 &&
813 ip->i_delayed_blks == 0) 813 ip->i_delayed_blks == 0)
814 return false; 814 return false;
815 815
@@ -882,7 +882,7 @@ xfs_free_eofblocks(
882 if (need_iolock) { 882 if (need_iolock) {
883 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 883 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
884 xfs_trans_cancel(tp, 0); 884 xfs_trans_cancel(tp, 0);
885 return EAGAIN; 885 return -EAGAIN;
886 } 886 }
887 } 887 }
888 888
@@ -955,14 +955,14 @@ xfs_alloc_file_space(
955 trace_xfs_alloc_file_space(ip); 955 trace_xfs_alloc_file_space(ip);
956 956
957 if (XFS_FORCED_SHUTDOWN(mp)) 957 if (XFS_FORCED_SHUTDOWN(mp))
958 return XFS_ERROR(EIO); 958 return -EIO;
959 959
960 error = xfs_qm_dqattach(ip, 0); 960 error = xfs_qm_dqattach(ip, 0);
961 if (error) 961 if (error)
962 return error; 962 return error;
963 963
964 if (len <= 0) 964 if (len <= 0)
965 return XFS_ERROR(EINVAL); 965 return -EINVAL;
966 966
967 rt = XFS_IS_REALTIME_INODE(ip); 967 rt = XFS_IS_REALTIME_INODE(ip);
968 extsz = xfs_get_extsz_hint(ip); 968 extsz = xfs_get_extsz_hint(ip);
@@ -1028,7 +1028,7 @@ xfs_alloc_file_space(
1028 /* 1028 /*
1029 * Free the transaction structure. 1029 * Free the transaction structure.
1030 */ 1030 */
1031 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1031 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1032 xfs_trans_cancel(tp, 0); 1032 xfs_trans_cancel(tp, 0);
1033 break; 1033 break;
1034 } 1034 }
@@ -1065,7 +1065,7 @@ xfs_alloc_file_space(
1065 allocated_fsb = imapp->br_blockcount; 1065 allocated_fsb = imapp->br_blockcount;
1066 1066
1067 if (nimaps == 0) { 1067 if (nimaps == 0) {
1068 error = XFS_ERROR(ENOSPC); 1068 error = -ENOSPC;
1069 break; 1069 break;
1070 } 1070 }
1071 1071
@@ -1126,7 +1126,7 @@ xfs_zero_remaining_bytes(
1126 mp->m_rtdev_targp : mp->m_ddev_targp, 1126 mp->m_rtdev_targp : mp->m_ddev_targp,
1127 BTOBB(mp->m_sb.sb_blocksize), 0); 1127 BTOBB(mp->m_sb.sb_blocksize), 0);
1128 if (!bp) 1128 if (!bp)
1129 return XFS_ERROR(ENOMEM); 1129 return -ENOMEM;
1130 1130
1131 xfs_buf_unlock(bp); 1131 xfs_buf_unlock(bp);
1132 1132
@@ -1158,7 +1158,7 @@ xfs_zero_remaining_bytes(
1158 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 1158 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
1159 1159
1160 if (XFS_FORCED_SHUTDOWN(mp)) { 1160 if (XFS_FORCED_SHUTDOWN(mp)) {
1161 error = XFS_ERROR(EIO); 1161 error = -EIO;
1162 break; 1162 break;
1163 } 1163 }
1164 xfs_buf_iorequest(bp); 1164 xfs_buf_iorequest(bp);
@@ -1176,7 +1176,7 @@ xfs_zero_remaining_bytes(
1176 XFS_BUF_WRITE(bp); 1176 XFS_BUF_WRITE(bp);
1177 1177
1178 if (XFS_FORCED_SHUTDOWN(mp)) { 1178 if (XFS_FORCED_SHUTDOWN(mp)) {
1179 error = XFS_ERROR(EIO); 1179 error = -EIO;
1180 break; 1180 break;
1181 } 1181 }
1182 xfs_buf_iorequest(bp); 1182 xfs_buf_iorequest(bp);
@@ -1234,7 +1234,7 @@ xfs_free_file_space(
1234 1234
1235 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1235 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1236 ioffset = offset & ~(rounding - 1); 1236 ioffset = offset & ~(rounding - 1);
1237 error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1237 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1238 ioffset, -1); 1238 ioffset, -1);
1239 if (error) 1239 if (error)
1240 goto out; 1240 goto out;
@@ -1315,7 +1315,7 @@ xfs_free_file_space(
1315 /* 1315 /*
1316 * Free the transaction structure. 1316 * Free the transaction structure.
1317 */ 1317 */
1318 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1318 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1319 xfs_trans_cancel(tp, 0); 1319 xfs_trans_cancel(tp, 0);
1320 break; 1320 break;
1321 } 1321 }
@@ -1557,14 +1557,14 @@ xfs_swap_extents_check_format(
1557 /* Should never get a local format */ 1557 /* Should never get a local format */
1558 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1558 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
1559 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 1559 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
1560 return EINVAL; 1560 return -EINVAL;
1561 1561
1562 /* 1562 /*
1563 * if the target inode has less extents that then temporary inode then 1563 * if the target inode has less extents that then temporary inode then
1564 * why did userspace call us? 1564 * why did userspace call us?
1565 */ 1565 */
1566 if (ip->i_d.di_nextents < tip->i_d.di_nextents) 1566 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
1567 return EINVAL; 1567 return -EINVAL;
1568 1568
1569 /* 1569 /*
1570 * if the target inode is in extent form and the temp inode is in btree 1570 * if the target inode is in extent form and the temp inode is in btree
@@ -1573,19 +1573,19 @@ xfs_swap_extents_check_format(
1573 */ 1573 */
1574 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1574 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1575 tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 1575 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
1576 return EINVAL; 1576 return -EINVAL;
1577 1577
1578 /* Check temp in extent form to max in target */ 1578 /* Check temp in extent form to max in target */
1579 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1579 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1580 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1580 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
1581 XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1581 XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
1582 return EINVAL; 1582 return -EINVAL;
1583 1583
1584 /* Check target in extent form to max in temp */ 1584 /* Check target in extent form to max in temp */
1585 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1585 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1586 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1586 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
1587 XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1587 XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
1588 return EINVAL; 1588 return -EINVAL;
1589 1589
1590 /* 1590 /*
1591 * If we are in a btree format, check that the temp root block will fit 1591 * If we are in a btree format, check that the temp root block will fit
@@ -1599,26 +1599,50 @@ xfs_swap_extents_check_format(
1599 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1599 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1600 if (XFS_IFORK_BOFF(ip) && 1600 if (XFS_IFORK_BOFF(ip) &&
1601 XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 1601 XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
1602 return EINVAL; 1602 return -EINVAL;
1603 if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1603 if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
1604 XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1604 XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
1605 return EINVAL; 1605 return -EINVAL;
1606 } 1606 }
1607 1607
1608 /* Reciprocal target->temp btree format checks */ 1608 /* Reciprocal target->temp btree format checks */
1609 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1609 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1610 if (XFS_IFORK_BOFF(tip) && 1610 if (XFS_IFORK_BOFF(tip) &&
1611 XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 1611 XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
1612 return EINVAL; 1612 return -EINVAL;
1613 if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1613 if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
1614 XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1614 XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
1615 return EINVAL; 1615 return -EINVAL;
1616 } 1616 }
1617 1617
1618 return 0; 1618 return 0;
1619} 1619}
1620 1620
1621int 1621int
1622xfs_swap_extent_flush(
1623 struct xfs_inode *ip)
1624{
1625 int error;
1626
1627 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
1628 if (error)
1629 return error;
1630 truncate_pagecache_range(VFS_I(ip), 0, -1);
1631
1632 /* Verify O_DIRECT for ftmp */
1633 if (VFS_I(ip)->i_mapping->nrpages)
1634 return -EINVAL;
1635
1636 /*
1637 * Don't try to swap extents on mmap()d files because we can't lock
1638 * out races against page faults safely.
1639 */
1640 if (mapping_mapped(VFS_I(ip)->i_mapping))
1641 return -EBUSY;
1642 return 0;
1643}
1644
1645int
1622xfs_swap_extents( 1646xfs_swap_extents(
1623 xfs_inode_t *ip, /* target inode */ 1647 xfs_inode_t *ip, /* target inode */
1624 xfs_inode_t *tip, /* tmp inode */ 1648 xfs_inode_t *tip, /* tmp inode */
@@ -1633,51 +1657,57 @@ xfs_swap_extents(
1633 int aforkblks = 0; 1657 int aforkblks = 0;
1634 int taforkblks = 0; 1658 int taforkblks = 0;
1635 __uint64_t tmp; 1659 __uint64_t tmp;
1660 int lock_flags;
1636 1661
1637 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1662 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
1638 if (!tempifp) { 1663 if (!tempifp) {
1639 error = XFS_ERROR(ENOMEM); 1664 error = -ENOMEM;
1640 goto out; 1665 goto out;
1641 } 1666 }
1642 1667
1643 /* 1668 /*
1644 * we have to do two separate lock calls here to keep lockdep 1669 * Lock up the inodes against other IO and truncate to begin with.
1645 * happy. If we try to get all the locks in one call, lock will 1670 * Then we can ensure the inodes are flushed and have no page cache
1646 * report false positives when we drop the ILOCK and regain them 1671 * safely. Once we have done this we can take the ilocks and do the rest
1647 * below. 1672 * of the checks.
1648 */ 1673 */
1674 lock_flags = XFS_IOLOCK_EXCL;
1649 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1675 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
1650 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
1651 1676
1652 /* Verify that both files have the same format */ 1677 /* Verify that both files have the same format */
1653 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 1678 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
1654 error = XFS_ERROR(EINVAL); 1679 error = -EINVAL;
1655 goto out_unlock; 1680 goto out_unlock;
1656 } 1681 }
1657 1682
1658 /* Verify both files are either real-time or non-realtime */ 1683 /* Verify both files are either real-time or non-realtime */
1659 if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 1684 if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
1660 error = XFS_ERROR(EINVAL); 1685 error = -EINVAL;
1661 goto out_unlock; 1686 goto out_unlock;
1662 } 1687 }
1663 1688
1664 error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); 1689 error = xfs_swap_extent_flush(ip);
1690 if (error)
1691 goto out_unlock;
1692 error = xfs_swap_extent_flush(tip);
1665 if (error) 1693 if (error)
1666 goto out_unlock; 1694 goto out_unlock;
1667 truncate_pagecache_range(VFS_I(tip), 0, -1);
1668 1695
1669 /* Verify O_DIRECT for ftmp */ 1696 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
1670 if (VN_CACHED(VFS_I(tip)) != 0) { 1697 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
1671 error = XFS_ERROR(EINVAL); 1698 if (error) {
1699 xfs_trans_cancel(tp, 0);
1672 goto out_unlock; 1700 goto out_unlock;
1673 } 1701 }
1702 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
1703 lock_flags |= XFS_ILOCK_EXCL;
1674 1704
1675 /* Verify all data are being swapped */ 1705 /* Verify all data are being swapped */
1676 if (sxp->sx_offset != 0 || 1706 if (sxp->sx_offset != 0 ||
1677 sxp->sx_length != ip->i_d.di_size || 1707 sxp->sx_length != ip->i_d.di_size ||
1678 sxp->sx_length != tip->i_d.di_size) { 1708 sxp->sx_length != tip->i_d.di_size) {
1679 error = XFS_ERROR(EFAULT); 1709 error = -EFAULT;
1680 goto out_unlock; 1710 goto out_trans_cancel;
1681 } 1711 }
1682 1712
1683 trace_xfs_swap_extent_before(ip, 0); 1713 trace_xfs_swap_extent_before(ip, 0);
@@ -1689,7 +1719,7 @@ xfs_swap_extents(
1689 xfs_notice(mp, 1719 xfs_notice(mp,
1690 "%s: inode 0x%llx format is incompatible for exchanging.", 1720 "%s: inode 0x%llx format is incompatible for exchanging.",
1691 __func__, ip->i_ino); 1721 __func__, ip->i_ino);
1692 goto out_unlock; 1722 goto out_trans_cancel;
1693 } 1723 }
1694 1724
1695 /* 1725 /*
@@ -1703,43 +1733,9 @@ xfs_swap_extents(
1703 (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 1733 (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
1704 (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 1734 (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
1705 (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 1735 (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
1706 error = XFS_ERROR(EBUSY); 1736 error = -EBUSY;
1707 goto out_unlock; 1737 goto out_trans_cancel;
1708 }
1709
1710 /* We need to fail if the file is memory mapped. Once we have tossed
1711 * all existing pages, the page fault will have no option
1712 * but to go to the filesystem for pages. By making the page fault call
1713 * vop_read (or write in the case of autogrow) they block on the iolock
1714 * until we have switched the extents.
1715 */
1716 if (VN_MAPPED(VFS_I(ip))) {
1717 error = XFS_ERROR(EBUSY);
1718 goto out_unlock;
1719 }
1720
1721 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1722 xfs_iunlock(tip, XFS_ILOCK_EXCL);
1723
1724 /*
1725 * There is a race condition here since we gave up the
1726 * ilock. However, the data fork will not change since
1727 * we have the iolock (locked for truncation too) so we
1728 * are safe. We don't really care if non-io related
1729 * fields change.
1730 */
1731 truncate_pagecache_range(VFS_I(ip), 0, -1);
1732
1733 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
1734 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
1735 if (error) {
1736 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1737 xfs_iunlock(tip, XFS_IOLOCK_EXCL);
1738 xfs_trans_cancel(tp, 0);
1739 goto out;
1740 } 1738 }
1741 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
1742
1743 /* 1739 /*
1744 * Count the number of extended attribute blocks 1740 * Count the number of extended attribute blocks
1745 */ 1741 */
@@ -1757,8 +1753,8 @@ xfs_swap_extents(
1757 goto out_trans_cancel; 1753 goto out_trans_cancel;
1758 } 1754 }
1759 1755
1760 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1756 xfs_trans_ijoin(tp, ip, lock_flags);
1761 xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1757 xfs_trans_ijoin(tp, tip, lock_flags);
1762 1758
1763 /* 1759 /*
1764 * Before we've swapped the forks, lets set the owners of the forks 1760 * Before we've swapped the forks, lets set the owners of the forks
@@ -1887,8 +1883,8 @@ out:
1887 return error; 1883 return error;
1888 1884
1889out_unlock: 1885out_unlock:
1890 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1886 xfs_iunlock(ip, lock_flags);
1891 xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1887 xfs_iunlock(tip, lock_flags);
1892 goto out; 1888 goto out;
1893 1889
1894out_trans_cancel: 1890out_trans_cancel:
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7a34a1ae6552..cd7b8ca9b064 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -130,7 +130,7 @@ xfs_buf_get_maps(
130 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map), 130 bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
131 KM_NOFS); 131 KM_NOFS);
132 if (!bp->b_maps) 132 if (!bp->b_maps)
133 return ENOMEM; 133 return -ENOMEM;
134 return 0; 134 return 0;
135} 135}
136 136
@@ -344,7 +344,7 @@ retry:
344 if (unlikely(page == NULL)) { 344 if (unlikely(page == NULL)) {
345 if (flags & XBF_READ_AHEAD) { 345 if (flags & XBF_READ_AHEAD) {
346 bp->b_page_count = i; 346 bp->b_page_count = i;
347 error = ENOMEM; 347 error = -ENOMEM;
348 goto out_free_pages; 348 goto out_free_pages;
349 } 349 }
350 350
@@ -465,7 +465,7 @@ _xfs_buf_find(
465 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); 465 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
466 if (blkno >= eofs) { 466 if (blkno >= eofs) {
467 /* 467 /*
468 * XXX (dgc): we should really be returning EFSCORRUPTED here, 468 * XXX (dgc): we should really be returning -EFSCORRUPTED here,
469 * but none of the higher level infrastructure supports 469 * but none of the higher level infrastructure supports
470 * returning a specific error on buffer lookup failures. 470 * returning a specific error on buffer lookup failures.
471 */ 471 */
@@ -1052,8 +1052,8 @@ xfs_buf_ioerror(
1052 xfs_buf_t *bp, 1052 xfs_buf_t *bp,
1053 int error) 1053 int error)
1054{ 1054{
1055 ASSERT(error >= 0 && error <= 0xffff); 1055 ASSERT(error <= 0 && error >= -1000);
1056 bp->b_error = (unsigned short)error; 1056 bp->b_error = error;
1057 trace_xfs_buf_ioerror(bp, error, _RET_IP_); 1057 trace_xfs_buf_ioerror(bp, error, _RET_IP_);
1058} 1058}
1059 1059
@@ -1064,7 +1064,7 @@ xfs_buf_ioerror_alert(
1064{ 1064{
1065 xfs_alert(bp->b_target->bt_mount, 1065 xfs_alert(bp->b_target->bt_mount,
1066"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d", 1066"metadata I/O error: block 0x%llx (\"%s\") error %d numblks %d",
1067 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); 1067 (__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
1068} 1068}
1069 1069
1070/* 1070/*
@@ -1083,7 +1083,7 @@ xfs_bioerror(
1083 /* 1083 /*
1084 * No need to wait until the buffer is unpinned, we aren't flushing it. 1084 * No need to wait until the buffer is unpinned, we aren't flushing it.
1085 */ 1085 */
1086 xfs_buf_ioerror(bp, EIO); 1086 xfs_buf_ioerror(bp, -EIO);
1087 1087
1088 /* 1088 /*
1089 * We're calling xfs_buf_ioend, so delete XBF_DONE flag. 1089 * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
@@ -1094,7 +1094,7 @@ xfs_bioerror(
1094 1094
1095 xfs_buf_ioend(bp, 0); 1095 xfs_buf_ioend(bp, 0);
1096 1096
1097 return EIO; 1097 return -EIO;
1098} 1098}
1099 1099
1100/* 1100/*
@@ -1127,13 +1127,13 @@ xfs_bioerror_relse(
1127 * There's no reason to mark error for 1127 * There's no reason to mark error for
1128 * ASYNC buffers. 1128 * ASYNC buffers.
1129 */ 1129 */
1130 xfs_buf_ioerror(bp, EIO); 1130 xfs_buf_ioerror(bp, -EIO);
1131 complete(&bp->b_iowait); 1131 complete(&bp->b_iowait);
1132 } else { 1132 } else {
1133 xfs_buf_relse(bp); 1133 xfs_buf_relse(bp);
1134 } 1134 }
1135 1135
1136 return EIO; 1136 return -EIO;
1137} 1137}
1138 1138
1139STATIC int 1139STATIC int
@@ -1199,7 +1199,7 @@ xfs_buf_bio_end_io(
1199 * buffers that require multiple bios to complete. 1199 * buffers that require multiple bios to complete.
1200 */ 1200 */
1201 if (!bp->b_error) 1201 if (!bp->b_error)
1202 xfs_buf_ioerror(bp, -error); 1202 xfs_buf_ioerror(bp, error);
1203 1203
1204 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1204 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1205 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1205 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
@@ -1286,7 +1286,7 @@ next_chunk:
1286 * because the caller (xfs_buf_iorequest) holds a count itself. 1286 * because the caller (xfs_buf_iorequest) holds a count itself.
1287 */ 1287 */
1288 atomic_dec(&bp->b_io_remaining); 1288 atomic_dec(&bp->b_io_remaining);
1289 xfs_buf_ioerror(bp, EIO); 1289 xfs_buf_ioerror(bp, -EIO);
1290 bio_put(bio); 1290 bio_put(bio);
1291 } 1291 }
1292 1292
@@ -1330,6 +1330,20 @@ _xfs_buf_ioapply(
1330 SHUTDOWN_CORRUPT_INCORE); 1330 SHUTDOWN_CORRUPT_INCORE);
1331 return; 1331 return;
1332 } 1332 }
1333 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1334 struct xfs_mount *mp = bp->b_target->bt_mount;
1335
1336 /*
1337 * non-crc filesystems don't attach verifiers during
1338 * log recovery, so don't warn for such filesystems.
1339 */
1340 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1341 xfs_warn(mp,
1342 "%s: no ops on block 0x%llx/0x%x",
1343 __func__, bp->b_bn, bp->b_length);
1344 xfs_hex_dump(bp->b_addr, 64);
1345 dump_stack();
1346 }
1333 } 1347 }
1334 } else if (bp->b_flags & XBF_READ_AHEAD) { 1348 } else if (bp->b_flags & XBF_READ_AHEAD) {
1335 rw = READA; 1349 rw = READA;
@@ -1628,7 +1642,7 @@ xfs_setsize_buftarg(
1628 xfs_warn(btp->bt_mount, 1642 xfs_warn(btp->bt_mount,
1629 "Cannot set_blocksize to %u on device %s", 1643 "Cannot set_blocksize to %u on device %s",
1630 sectorsize, name); 1644 sectorsize, name);
1631 return EINVAL; 1645 return -EINVAL;
1632 } 1646 }
1633 1647
1634 /* Set up device logical sector size mask */ 1648 /* Set up device logical sector size mask */
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 3a7a5523d3dc..c753183900b3 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -178,7 +178,7 @@ typedef struct xfs_buf {
178 atomic_t b_io_remaining; /* #outstanding I/O requests */ 178 atomic_t b_io_remaining; /* #outstanding I/O requests */
179 unsigned int b_page_count; /* size of page array */ 179 unsigned int b_page_count; /* size of page array */
180 unsigned int b_offset; /* page offset in first page */ 180 unsigned int b_offset; /* page offset in first page */
181 unsigned short b_error; /* error code on I/O */ 181 int b_error; /* error code on I/O */
182 const struct xfs_buf_ops *b_ops; 182 const struct xfs_buf_ops *b_ops;
183 183
184#ifdef XFS_BUF_LOCK_TRACKING 184#ifdef XFS_BUF_LOCK_TRACKING
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 4654338b03fc..76007deed31f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -488,7 +488,7 @@ xfs_buf_item_unpin(
488 xfs_buf_lock(bp); 488 xfs_buf_lock(bp);
489 xfs_buf_hold(bp); 489 xfs_buf_hold(bp);
490 bp->b_flags |= XBF_ASYNC; 490 bp->b_flags |= XBF_ASYNC;
491 xfs_buf_ioerror(bp, EIO); 491 xfs_buf_ioerror(bp, -EIO);
492 XFS_BUF_UNDONE(bp); 492 XFS_BUF_UNDONE(bp);
493 xfs_buf_stale(bp); 493 xfs_buf_stale(bp);
494 xfs_buf_ioend(bp, 0); 494 xfs_buf_ioend(bp, 0);
@@ -725,7 +725,7 @@ xfs_buf_item_get_format(
725 bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), 725 bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
726 KM_SLEEP); 726 KM_SLEEP);
727 if (!bip->bli_formats) 727 if (!bip->bli_formats)
728 return ENOMEM; 728 return -ENOMEM;
729 return 0; 729 return 0;
730} 730}
731 731
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 48e99afb9cb0..f1b69edcdf31 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -95,7 +95,7 @@ xfs_dir2_sf_getdents(
95 */ 95 */
96 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { 96 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
97 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); 97 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
98 return XFS_ERROR(EIO); 98 return -EIO;
99 } 99 }
100 100
101 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 101 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
@@ -677,7 +677,7 @@ xfs_readdir(
677 trace_xfs_readdir(dp); 677 trace_xfs_readdir(dp);
678 678
679 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 679 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
680 return XFS_ERROR(EIO); 680 return -EIO;
681 681
682 ASSERT(S_ISDIR(dp->i_d.di_mode)); 682 ASSERT(S_ISDIR(dp->i_d.di_mode));
683 XFS_STATS_INC(xs_dir_getdents); 683 XFS_STATS_INC(xs_dir_getdents);
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 4f11ef011139..13d08a1b390e 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -124,7 +124,7 @@ xfs_trim_extents(
124 } 124 }
125 125
126 trace_xfs_discard_extent(mp, agno, fbno, flen); 126 trace_xfs_discard_extent(mp, agno, fbno, flen);
127 error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0); 127 error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
128 if (error) 128 if (error)
129 goto out_del_cursor; 129 goto out_del_cursor;
130 *blocks_trimmed += flen; 130 *blocks_trimmed += flen;
@@ -166,11 +166,11 @@ xfs_ioc_trim(
166 int error, last_error = 0; 166 int error, last_error = 0;
167 167
168 if (!capable(CAP_SYS_ADMIN)) 168 if (!capable(CAP_SYS_ADMIN))
169 return -XFS_ERROR(EPERM); 169 return -EPERM;
170 if (!blk_queue_discard(q)) 170 if (!blk_queue_discard(q))
171 return -XFS_ERROR(EOPNOTSUPP); 171 return -EOPNOTSUPP;
172 if (copy_from_user(&range, urange, sizeof(range))) 172 if (copy_from_user(&range, urange, sizeof(range)))
173 return -XFS_ERROR(EFAULT); 173 return -EFAULT;
174 174
175 /* 175 /*
176 * Truncating down the len isn't actually quite correct, but using 176 * Truncating down the len isn't actually quite correct, but using
@@ -182,7 +182,7 @@ xfs_ioc_trim(
182 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || 182 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
183 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || 183 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) ||
184 range.len < mp->m_sb.sb_blocksize) 184 range.len < mp->m_sb.sb_blocksize)
185 return -XFS_ERROR(EINVAL); 185 return -EINVAL;
186 186
187 start = BTOBB(range.start); 187 start = BTOBB(range.start);
188 end = start + BTOBBT(range.len) - 1; 188 end = start + BTOBBT(range.len) - 1;
@@ -195,7 +195,7 @@ xfs_ioc_trim(
195 end_agno = xfs_daddr_to_agno(mp, end); 195 end_agno = xfs_daddr_to_agno(mp, end);
196 196
197 for (agno = start_agno; agno <= end_agno; agno++) { 197 for (agno = start_agno; agno <= end_agno; agno++) {
198 error = -xfs_trim_extents(mp, agno, start, end, minlen, 198 error = xfs_trim_extents(mp, agno, start, end, minlen,
199 &blocks_trimmed); 199 &blocks_trimmed);
200 if (error) 200 if (error)
201 last_error = error; 201 last_error = error;
@@ -206,7 +206,7 @@ xfs_ioc_trim(
206 206
207 range.len = XFS_FSB_TO_B(mp, blocks_trimmed); 207 range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
208 if (copy_to_user(urange, &range, sizeof(range))) 208 if (copy_to_user(urange, &range, sizeof(range)))
209 return -XFS_ERROR(EFAULT); 209 return -EFAULT;
210 return 0; 210 return 0;
211} 211}
212 212
@@ -222,11 +222,11 @@ xfs_discard_extents(
222 trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, 222 trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
223 busyp->length); 223 busyp->length);
224 224
225 error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, 225 error = blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
226 XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), 226 XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
227 XFS_FSB_TO_BB(mp, busyp->length), 227 XFS_FSB_TO_BB(mp, busyp->length),
228 GFP_NOFS, 0); 228 GFP_NOFS, 0);
229 if (error && error != EOPNOTSUPP) { 229 if (error && error != -EOPNOTSUPP) {
230 xfs_info(mp, 230 xfs_info(mp,
231 "discard failed for extent [0x%llu,%u], error %d", 231 "discard failed for extent [0x%llu,%u], error %d",
232 (unsigned long long)busyp->bno, 232 (unsigned long long)busyp->bno,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 3ee0cd43edc0..63c2de49f61d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -327,7 +327,7 @@ xfs_qm_dqalloc(
327 */ 327 */
328 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { 328 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
329 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 329 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
330 return (ESRCH); 330 return -ESRCH;
331 } 331 }
332 332
333 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 333 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
@@ -354,7 +354,7 @@ xfs_qm_dqalloc(
354 mp->m_quotainfo->qi_dqchunklen, 354 mp->m_quotainfo->qi_dqchunklen,
355 0); 355 0);
356 if (!bp) { 356 if (!bp) {
357 error = ENOMEM; 357 error = -ENOMEM;
358 goto error1; 358 goto error1;
359 } 359 }
360 bp->b_ops = &xfs_dquot_buf_ops; 360 bp->b_ops = &xfs_dquot_buf_ops;
@@ -400,7 +400,7 @@ xfs_qm_dqalloc(
400 error0: 400 error0:
401 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 401 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
402 402
403 return (error); 403 return error;
404} 404}
405 405
406STATIC int 406STATIC int
@@ -426,7 +426,7 @@ xfs_qm_dqrepair(
426 426
427 if (error) { 427 if (error) {
428 ASSERT(*bpp == NULL); 428 ASSERT(*bpp == NULL);
429 return XFS_ERROR(error); 429 return error;
430 } 430 }
431 (*bpp)->b_ops = &xfs_dquot_buf_ops; 431 (*bpp)->b_ops = &xfs_dquot_buf_ops;
432 432
@@ -442,7 +442,7 @@ xfs_qm_dqrepair(
442 if (error) { 442 if (error) {
443 /* repair failed, we're screwed */ 443 /* repair failed, we're screwed */
444 xfs_trans_brelse(tp, *bpp); 444 xfs_trans_brelse(tp, *bpp);
445 return XFS_ERROR(EIO); 445 return -EIO;
446 } 446 }
447 } 447 }
448 448
@@ -480,7 +480,7 @@ xfs_qm_dqtobp(
480 * didn't have the quota inode lock. 480 * didn't have the quota inode lock.
481 */ 481 */
482 xfs_iunlock(quotip, lock_mode); 482 xfs_iunlock(quotip, lock_mode);
483 return ESRCH; 483 return -ESRCH;
484 } 484 }
485 485
486 /* 486 /*
@@ -508,7 +508,7 @@ xfs_qm_dqtobp(
508 * We don't allocate unless we're asked to 508 * We don't allocate unless we're asked to
509 */ 509 */
510 if (!(flags & XFS_QMOPT_DQALLOC)) 510 if (!(flags & XFS_QMOPT_DQALLOC))
511 return ENOENT; 511 return -ENOENT;
512 512
513 ASSERT(tp); 513 ASSERT(tp);
514 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, 514 error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
@@ -530,7 +530,7 @@ xfs_qm_dqtobp(
530 mp->m_quotainfo->qi_dqchunklen, 530 mp->m_quotainfo->qi_dqchunklen,
531 0, &bp, &xfs_dquot_buf_ops); 531 0, &bp, &xfs_dquot_buf_ops);
532 532
533 if (error == EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) { 533 if (error == -EFSCORRUPTED && (flags & XFS_QMOPT_DQREPAIR)) {
534 xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff * 534 xfs_dqid_t firstid = (xfs_dqid_t)map.br_startoff *
535 mp->m_quotainfo->qi_dqperchunk; 535 mp->m_quotainfo->qi_dqperchunk;
536 ASSERT(bp == NULL); 536 ASSERT(bp == NULL);
@@ -539,7 +539,7 @@ xfs_qm_dqtobp(
539 539
540 if (error) { 540 if (error) {
541 ASSERT(bp == NULL); 541 ASSERT(bp == NULL);
542 return XFS_ERROR(error); 542 return error;
543 } 543 }
544 } 544 }
545 545
@@ -547,7 +547,7 @@ xfs_qm_dqtobp(
547 *O_bpp = bp; 547 *O_bpp = bp;
548 *O_ddpp = bp->b_addr + dqp->q_bufoffset; 548 *O_ddpp = bp->b_addr + dqp->q_bufoffset;
549 549
550 return (0); 550 return 0;
551} 551}
552 552
553 553
@@ -715,7 +715,7 @@ xfs_qm_dqget(
715 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || 715 if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
716 (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || 716 (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
717 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { 717 (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
718 return (ESRCH); 718 return -ESRCH;
719 } 719 }
720 720
721#ifdef DEBUG 721#ifdef DEBUG
@@ -723,7 +723,7 @@ xfs_qm_dqget(
723 if ((xfs_dqerror_target == mp->m_ddev_targp) && 723 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
724 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 724 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
725 xfs_debug(mp, "Returning error in dqget"); 725 xfs_debug(mp, "Returning error in dqget");
726 return (EIO); 726 return -EIO;
727 } 727 }
728 } 728 }
729 729
@@ -796,14 +796,14 @@ restart:
796 } else { 796 } else {
797 /* inode stays locked on return */ 797 /* inode stays locked on return */
798 xfs_qm_dqdestroy(dqp); 798 xfs_qm_dqdestroy(dqp);
799 return XFS_ERROR(ESRCH); 799 return -ESRCH;
800 } 800 }
801 } 801 }
802 802
803 mutex_lock(&qi->qi_tree_lock); 803 mutex_lock(&qi->qi_tree_lock);
804 error = -radix_tree_insert(tree, id, dqp); 804 error = radix_tree_insert(tree, id, dqp);
805 if (unlikely(error)) { 805 if (unlikely(error)) {
806 WARN_ON(error != EEXIST); 806 WARN_ON(error != -EEXIST);
807 807
808 /* 808 /*
809 * Duplicate found. Just throw away the new dquot and start 809 * Duplicate found. Just throw away the new dquot and start
@@ -829,7 +829,7 @@ restart:
829 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 829 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
830 trace_xfs_dqget_miss(dqp); 830 trace_xfs_dqget_miss(dqp);
831 *O_dqpp = dqp; 831 *O_dqpp = dqp;
832 return (0); 832 return 0;
833} 833}
834 834
835/* 835/*
@@ -966,7 +966,7 @@ xfs_qm_dqflush(
966 SHUTDOWN_CORRUPT_INCORE); 966 SHUTDOWN_CORRUPT_INCORE);
967 else 967 else
968 spin_unlock(&mp->m_ail->xa_lock); 968 spin_unlock(&mp->m_ail->xa_lock);
969 error = XFS_ERROR(EIO); 969 error = -EIO;
970 goto out_unlock; 970 goto out_unlock;
971 } 971 }
972 972
@@ -974,7 +974,8 @@ xfs_qm_dqflush(
974 * Get the buffer containing the on-disk dquot 974 * Get the buffer containing the on-disk dquot
975 */ 975 */
976 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 976 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
977 mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); 977 mp->m_quotainfo->qi_dqchunklen, 0, &bp,
978 &xfs_dquot_buf_ops);
978 if (error) 979 if (error)
979 goto out_unlock; 980 goto out_unlock;
980 981
@@ -992,7 +993,7 @@ xfs_qm_dqflush(
992 xfs_buf_relse(bp); 993 xfs_buf_relse(bp);
993 xfs_dqfunlock(dqp); 994 xfs_dqfunlock(dqp);
994 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 995 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
995 return XFS_ERROR(EIO); 996 return -EIO;
996 } 997 }
997 998
998 /* This is the only portion of data that needs to persist */ 999 /* This is the only portion of data that needs to persist */
@@ -1045,7 +1046,7 @@ xfs_qm_dqflush(
1045 1046
1046out_unlock: 1047out_unlock:
1047 xfs_dqfunlock(dqp); 1048 xfs_dqfunlock(dqp);
1048 return XFS_ERROR(EIO); 1049 return -EIO;
1049} 1050}
1050 1051
1051/* 1052/*
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 68a68f704837..c24c67e22a2a 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
139 } 139 }
140} 140}
141 141
142/*
143 * Check whether a dquot is under low free space conditions. We assume the quota
144 * is enabled and enforced.
145 */
146static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
147{
148 int64_t freesp;
149
150 freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount;
151 if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
152 return true;
153
154 return false;
155}
156
142#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 157#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
143#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 158#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
144#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 159#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index edac5b057d28..b92fd7bc49e3 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -27,29 +27,6 @@
27 27
28#ifdef DEBUG 28#ifdef DEBUG
29 29
30int xfs_etrap[XFS_ERROR_NTRAP] = {
31 0,
32};
33
34int
35xfs_error_trap(int e)
36{
37 int i;
38
39 if (!e)
40 return 0;
41 for (i = 0; i < XFS_ERROR_NTRAP; i++) {
42 if (xfs_etrap[i] == 0)
43 break;
44 if (e != xfs_etrap[i])
45 continue;
46 xfs_notice(NULL, "%s: error %d", __func__, e);
47 BUG();
48 break;
49 }
50 return e;
51}
52
53int xfs_etest[XFS_NUM_INJECT_ERROR]; 30int xfs_etest[XFS_NUM_INJECT_ERROR];
54int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 31int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
55char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; 32char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
@@ -190,7 +167,7 @@ xfs_verifier_error(
190 struct xfs_mount *mp = bp->b_target->bt_mount; 167 struct xfs_mount *mp = bp->b_target->bt_mount;
191 168
192 xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", 169 xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
193 bp->b_error == EFSBADCRC ? "CRC error" : "corruption", 170 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
194 __return_address, bp->b_bn); 171 __return_address, bp->b_bn);
195 172
196 xfs_alert(mp, "Unmount and run xfs_repair"); 173 xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c1c57d4a4b5d..279a76e52791 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -18,15 +18,6 @@
18#ifndef __XFS_ERROR_H__ 18#ifndef __XFS_ERROR_H__
19#define __XFS_ERROR_H__ 19#define __XFS_ERROR_H__
20 20
21#ifdef DEBUG
22#define XFS_ERROR_NTRAP 10
23extern int xfs_etrap[XFS_ERROR_NTRAP];
24extern int xfs_error_trap(int);
25#define XFS_ERROR(e) xfs_error_trap(e)
26#else
27#define XFS_ERROR(e) (e)
28#endif
29
30struct xfs_mount; 21struct xfs_mount;
31 22
32extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, 23extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
@@ -56,7 +47,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
56 if (unlikely(!fs_is_ok)) { \ 47 if (unlikely(!fs_is_ok)) { \
57 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ 48 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
58 XFS_ERRLEVEL_LOW, NULL); \ 49 XFS_ERRLEVEL_LOW, NULL); \
59 error = XFS_ERROR(EFSCORRUPTED); \ 50 error = -EFSCORRUPTED; \
60 goto l; \ 51 goto l; \
61 } \ 52 } \
62 } 53 }
@@ -68,7 +59,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
68 if (unlikely(!fs_is_ok)) { \ 59 if (unlikely(!fs_is_ok)) { \
69 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ 60 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
70 XFS_ERRLEVEL_LOW, NULL); \ 61 XFS_ERRLEVEL_LOW, NULL); \
71 return XFS_ERROR(EFSCORRUPTED); \ 62 return -EFSCORRUPTED; \
72 } \ 63 } \
73 } 64 }
74 65
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 753e467aa1a5..5a6bd5d8779a 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -147,9 +147,9 @@ xfs_nfs_get_inode(
147 * We don't use ESTALE directly down the chain to not 147 * We don't use ESTALE directly down the chain to not
148 * confuse applications using bulkstat that expect EINVAL. 148 * confuse applications using bulkstat that expect EINVAL.
149 */ 149 */
150 if (error == EINVAL || error == ENOENT) 150 if (error == -EINVAL || error == -ENOENT)
151 error = ESTALE; 151 error = -ESTALE;
152 return ERR_PTR(-error); 152 return ERR_PTR(error);
153 } 153 }
154 154
155 if (ip->i_d.di_gen != generation) { 155 if (ip->i_d.di_gen != generation) {
@@ -217,7 +217,7 @@ xfs_fs_get_parent(
217 217
218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); 218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
219 if (unlikely(error)) 219 if (unlikely(error))
220 return ERR_PTR(-error); 220 return ERR_PTR(error);
221 221
222 return d_obtain_alias(VFS_I(cip)); 222 return d_obtain_alias(VFS_I(cip));
223} 223}
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
237 237
238 if (!lsn) 238 if (!lsn)
239 return 0; 239 return 0;
240 return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); 240 return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
241} 241}
242 242
243const struct export_operations xfs_export_operations = { 243const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index fb7a4c1ce1c5..c4327419dc5c 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -298,7 +298,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
298 } 298 }
299 return 0; 299 return 0;
300 } 300 }
301 return EFSCORRUPTED; 301 return -EFSCORRUPTED;
302} 302}
303 303
304/* 304/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f66779d7a46..076b1708d134 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,6 +38,7 @@
38#include "xfs_trace.h" 38#include "xfs_trace.h"
39#include "xfs_log.h" 39#include "xfs_log.h"
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_icache.h"
41 42
42#include <linux/aio.h> 43#include <linux/aio.h>
43#include <linux/dcache.h> 44#include <linux/dcache.h>
@@ -155,7 +156,7 @@ xfs_dir_fsync(
155 156
156 if (!lsn) 157 if (!lsn)
157 return 0; 158 return 0;
158 return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); 159 return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
159} 160}
160 161
161STATIC int 162STATIC int
@@ -179,7 +180,7 @@ xfs_file_fsync(
179 return error; 180 return error;
180 181
181 if (XFS_FORCED_SHUTDOWN(mp)) 182 if (XFS_FORCED_SHUTDOWN(mp))
182 return -XFS_ERROR(EIO); 183 return -EIO;
183 184
184 xfs_iflags_clear(ip, XFS_ITRUNCATED); 185 xfs_iflags_clear(ip, XFS_ITRUNCATED);
185 186
@@ -225,7 +226,7 @@ xfs_file_fsync(
225 !log_flushed) 226 !log_flushed)
226 xfs_blkdev_issue_flush(mp->m_ddev_targp); 227 xfs_blkdev_issue_flush(mp->m_ddev_targp);
227 228
228 return -error; 229 return error;
229} 230}
230 231
231STATIC ssize_t 232STATIC ssize_t
@@ -246,11 +247,11 @@ xfs_file_read_iter(
246 XFS_STATS_INC(xs_read_calls); 247 XFS_STATS_INC(xs_read_calls);
247 248
248 if (unlikely(file->f_flags & O_DIRECT)) 249 if (unlikely(file->f_flags & O_DIRECT))
249 ioflags |= IO_ISDIRECT; 250 ioflags |= XFS_IO_ISDIRECT;
250 if (file->f_mode & FMODE_NOCMTIME) 251 if (file->f_mode & FMODE_NOCMTIME)
251 ioflags |= IO_INVIS; 252 ioflags |= XFS_IO_INVIS;
252 253
253 if (unlikely(ioflags & IO_ISDIRECT)) { 254 if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
254 xfs_buftarg_t *target = 255 xfs_buftarg_t *target =
255 XFS_IS_REALTIME_INODE(ip) ? 256 XFS_IS_REALTIME_INODE(ip) ?
256 mp->m_rtdev_targp : mp->m_ddev_targp; 257 mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -258,7 +259,7 @@ xfs_file_read_iter(
258 if ((pos | size) & target->bt_logical_sectormask) { 259 if ((pos | size) & target->bt_logical_sectormask) {
259 if (pos == i_size_read(inode)) 260 if (pos == i_size_read(inode))
260 return 0; 261 return 0;
261 return -XFS_ERROR(EINVAL); 262 return -EINVAL;
262 } 263 }
263 } 264 }
264 265
@@ -283,7 +284,7 @@ xfs_file_read_iter(
283 * proceeed concurrently without serialisation. 284 * proceeed concurrently without serialisation.
284 */ 285 */
285 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 286 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
286 if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { 287 if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
287 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 288 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
288 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); 289 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
289 290
@@ -325,7 +326,7 @@ xfs_file_splice_read(
325 XFS_STATS_INC(xs_read_calls); 326 XFS_STATS_INC(xs_read_calls);
326 327
327 if (infilp->f_mode & FMODE_NOCMTIME) 328 if (infilp->f_mode & FMODE_NOCMTIME)
328 ioflags |= IO_INVIS; 329 ioflags |= XFS_IO_INVIS;
329 330
330 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 331 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
331 return -EIO; 332 return -EIO;
@@ -524,7 +525,7 @@ restart:
524 xfs_rw_ilock(ip, *iolock); 525 xfs_rw_ilock(ip, *iolock);
525 goto restart; 526 goto restart;
526 } 527 }
527 error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); 528 error = xfs_zero_eof(ip, *pos, i_size_read(inode));
528 if (error) 529 if (error)
529 return error; 530 return error;
530 } 531 }
@@ -594,7 +595,7 @@ xfs_file_dio_aio_write(
594 595
595 /* DIO must be aligned to device logical sector size */ 596 /* DIO must be aligned to device logical sector size */
596 if ((pos | count) & target->bt_logical_sectormask) 597 if ((pos | count) & target->bt_logical_sectormask)
597 return -XFS_ERROR(EINVAL); 598 return -EINVAL;
598 599
599 /* "unaligned" here means not aligned to a filesystem block */ 600 /* "unaligned" here means not aligned to a filesystem block */
600 if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) 601 if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
@@ -689,14 +690,28 @@ write_retry:
689 ret = generic_perform_write(file, from, pos); 690 ret = generic_perform_write(file, from, pos);
690 if (likely(ret >= 0)) 691 if (likely(ret >= 0))
691 iocb->ki_pos = pos + ret; 692 iocb->ki_pos = pos + ret;
693
692 /* 694 /*
693 * If we just got an ENOSPC, try to write back all dirty inodes to 695 * If we hit a space limit, try to free up some lingering preallocated
694 * convert delalloc space to free up some of the excess reserved 696 * space before returning an error. In the case of ENOSPC, first try to
695 * metadata space. 697 * write back all dirty inodes to free up some of the excess reserved
698 * metadata space. This reduces the chances that the eofblocks scan
699 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
700 * also behaves as a filter to prevent too many eofblocks scans from
701 * running at the same time.
696 */ 702 */
697 if (ret == -ENOSPC && !enospc) { 703 if (ret == -EDQUOT && !enospc) {
704 enospc = xfs_inode_free_quota_eofblocks(ip);
705 if (enospc)
706 goto write_retry;
707 } else if (ret == -ENOSPC && !enospc) {
708 struct xfs_eofblocks eofb = {0};
709
698 enospc = 1; 710 enospc = 1;
699 xfs_flush_inodes(ip->i_mount); 711 xfs_flush_inodes(ip->i_mount);
712 eofb.eof_scan_owner = ip->i_ino; /* for locking */
713 eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
714 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
700 goto write_retry; 715 goto write_retry;
701 } 716 }
702 717
@@ -772,7 +787,7 @@ xfs_file_fallocate(
772 unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 787 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
773 788
774 if (offset & blksize_mask || len & blksize_mask) { 789 if (offset & blksize_mask || len & blksize_mask) {
775 error = EINVAL; 790 error = -EINVAL;
776 goto out_unlock; 791 goto out_unlock;
777 } 792 }
778 793
@@ -781,7 +796,7 @@ xfs_file_fallocate(
781 * in which case it is effectively a truncate operation 796 * in which case it is effectively a truncate operation
782 */ 797 */
783 if (offset + len >= i_size_read(inode)) { 798 if (offset + len >= i_size_read(inode)) {
784 error = EINVAL; 799 error = -EINVAL;
785 goto out_unlock; 800 goto out_unlock;
786 } 801 }
787 802
@@ -794,7 +809,7 @@ xfs_file_fallocate(
794 if (!(mode & FALLOC_FL_KEEP_SIZE) && 809 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
795 offset + len > i_size_read(inode)) { 810 offset + len > i_size_read(inode)) {
796 new_size = offset + len; 811 new_size = offset + len;
797 error = -inode_newsize_ok(inode, new_size); 812 error = inode_newsize_ok(inode, new_size);
798 if (error) 813 if (error)
799 goto out_unlock; 814 goto out_unlock;
800 } 815 }
@@ -844,7 +859,7 @@ xfs_file_fallocate(
844 859
845out_unlock: 860out_unlock:
846 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 861 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
847 return -error; 862 return error;
848} 863}
849 864
850 865
@@ -889,7 +904,7 @@ xfs_file_release(
889 struct inode *inode, 904 struct inode *inode,
890 struct file *filp) 905 struct file *filp)
891{ 906{
892 return -xfs_release(XFS_I(inode)); 907 return xfs_release(XFS_I(inode));
893} 908}
894 909
895STATIC int 910STATIC int
@@ -918,7 +933,7 @@ xfs_file_readdir(
918 933
919 error = xfs_readdir(ip, ctx, bufsize); 934 error = xfs_readdir(ip, ctx, bufsize);
920 if (error) 935 if (error)
921 return -error; 936 return error;
922 return 0; 937 return 0;
923} 938}
924 939
@@ -1184,7 +1199,7 @@ xfs_seek_data(
1184 1199
1185 isize = i_size_read(inode); 1200 isize = i_size_read(inode);
1186 if (start >= isize) { 1201 if (start >= isize) {
1187 error = ENXIO; 1202 error = -ENXIO;
1188 goto out_unlock; 1203 goto out_unlock;
1189 } 1204 }
1190 1205
@@ -1206,7 +1221,7 @@ xfs_seek_data(
1206 1221
1207 /* No extents at given offset, must be beyond EOF */ 1222 /* No extents at given offset, must be beyond EOF */
1208 if (nmap == 0) { 1223 if (nmap == 0) {
1209 error = ENXIO; 1224 error = -ENXIO;
1210 goto out_unlock; 1225 goto out_unlock;
1211 } 1226 }
1212 1227
@@ -1237,7 +1252,7 @@ xfs_seek_data(
1237 * we are reading after EOF if nothing in map[1]. 1252 * we are reading after EOF if nothing in map[1].
1238 */ 1253 */
1239 if (nmap == 1) { 1254 if (nmap == 1) {
1240 error = ENXIO; 1255 error = -ENXIO;
1241 goto out_unlock; 1256 goto out_unlock;
1242 } 1257 }
1243 1258
@@ -1250,7 +1265,7 @@ xfs_seek_data(
1250 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; 1265 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1251 start = XFS_FSB_TO_B(mp, fsbno); 1266 start = XFS_FSB_TO_B(mp, fsbno);
1252 if (start >= isize) { 1267 if (start >= isize) {
1253 error = ENXIO; 1268 error = -ENXIO;
1254 goto out_unlock; 1269 goto out_unlock;
1255 } 1270 }
1256 } 1271 }
@@ -1262,7 +1277,7 @@ out_unlock:
1262 xfs_iunlock(ip, lock); 1277 xfs_iunlock(ip, lock);
1263 1278
1264 if (error) 1279 if (error)
1265 return -error; 1280 return error;
1266 return offset; 1281 return offset;
1267} 1282}
1268 1283
@@ -1282,13 +1297,13 @@ xfs_seek_hole(
1282 int error; 1297 int error;
1283 1298
1284 if (XFS_FORCED_SHUTDOWN(mp)) 1299 if (XFS_FORCED_SHUTDOWN(mp))
1285 return -XFS_ERROR(EIO); 1300 return -EIO;
1286 1301
1287 lock = xfs_ilock_data_map_shared(ip); 1302 lock = xfs_ilock_data_map_shared(ip);
1288 1303
1289 isize = i_size_read(inode); 1304 isize = i_size_read(inode);
1290 if (start >= isize) { 1305 if (start >= isize) {
1291 error = ENXIO; 1306 error = -ENXIO;
1292 goto out_unlock; 1307 goto out_unlock;
1293 } 1308 }
1294 1309
@@ -1307,7 +1322,7 @@ xfs_seek_hole(
1307 1322
1308 /* No extents at given offset, must be beyond EOF */ 1323 /* No extents at given offset, must be beyond EOF */
1309 if (nmap == 0) { 1324 if (nmap == 0) {
1310 error = ENXIO; 1325 error = -ENXIO;
1311 goto out_unlock; 1326 goto out_unlock;
1312 } 1327 }
1313 1328
@@ -1370,7 +1385,7 @@ out_unlock:
1370 xfs_iunlock(ip, lock); 1385 xfs_iunlock(ip, lock);
1371 1386
1372 if (error) 1387 if (error)
1373 return -error; 1388 return error;
1374 return offset; 1389 return offset;
1375} 1390}
1376 1391
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 8ec81bed7992..e92730c1d3ca 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -258,7 +258,7 @@ next_ag:
258 if (*agp == NULLAGNUMBER) 258 if (*agp == NULLAGNUMBER)
259 return 0; 259 return 0;
260 260
261 err = ENOMEM; 261 err = -ENOMEM;
262 item = kmem_alloc(sizeof(*item), KM_MAYFAIL); 262 item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
263 if (!item) 263 if (!item)
264 goto out_put_ag; 264 goto out_put_ag;
@@ -268,7 +268,7 @@ next_ag:
268 268
269 err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); 269 err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
270 if (err) { 270 if (err) {
271 if (err == EEXIST) 271 if (err == -EEXIST)
272 err = 0; 272 err = 0;
273 goto out_free_item; 273 goto out_free_item;
274 } 274 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index d34703dbcb42..18dc721ca19f 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -255,8 +255,8 @@ typedef struct xfs_fsop_resblks {
255 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) 255 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
256 256
257/* Used for sanity checks on superblock */ 257/* Used for sanity checks on superblock */
258#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks) 258#define XFS_MAX_DBLOCKS(s) ((xfs_rfsblock_t)(s)->sb_agcount * (s)->sb_agblocks)
259#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \ 259#define XFS_MIN_DBLOCKS(s) ((xfs_rfsblock_t)((s)->sb_agcount - 1) * \
260 (s)->sb_agblocks + XFS_MIN_AG_BLOCKS) 260 (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
261 261
262/* 262/*
@@ -375,6 +375,9 @@ struct xfs_fs_eofblocks {
375#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ 375#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */
376#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ 376#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */
377#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ 377#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */
378#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm;
379 * kernel only, not included in
380 * valid mask */
378#define XFS_EOF_FLAGS_VALID \ 381#define XFS_EOF_FLAGS_VALID \
379 (XFS_EOF_FLAGS_SYNC | \ 382 (XFS_EOF_FLAGS_SYNC | \
380 XFS_EOF_FLAGS_UID | \ 383 XFS_EOF_FLAGS_UID | \
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d2295561570a..f91de1ef05e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -168,7 +168,7 @@ xfs_growfs_data_private(
168 nb = in->newblocks; 168 nb = in->newblocks;
169 pct = in->imaxpct; 169 pct = in->imaxpct;
170 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100) 170 if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
171 return XFS_ERROR(EINVAL); 171 return -EINVAL;
172 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) 172 if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
173 return error; 173 return error;
174 dpct = pct - mp->m_sb.sb_imax_pct; 174 dpct = pct - mp->m_sb.sb_imax_pct;
@@ -176,7 +176,7 @@ xfs_growfs_data_private(
176 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 176 XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
177 XFS_FSS_TO_BB(mp, 1), 0, NULL); 177 XFS_FSS_TO_BB(mp, 1), 0, NULL);
178 if (!bp) 178 if (!bp)
179 return EIO; 179 return -EIO;
180 if (bp->b_error) { 180 if (bp->b_error) {
181 error = bp->b_error; 181 error = bp->b_error;
182 xfs_buf_relse(bp); 182 xfs_buf_relse(bp);
@@ -191,7 +191,7 @@ xfs_growfs_data_private(
191 nagcount--; 191 nagcount--;
192 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; 192 nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
193 if (nb < mp->m_sb.sb_dblocks) 193 if (nb < mp->m_sb.sb_dblocks)
194 return XFS_ERROR(EINVAL); 194 return -EINVAL;
195 } 195 }
196 new = nb - mp->m_sb.sb_dblocks; 196 new = nb - mp->m_sb.sb_dblocks;
197 oagcount = mp->m_sb.sb_agcount; 197 oagcount = mp->m_sb.sb_agcount;
@@ -229,7 +229,7 @@ xfs_growfs_data_private(
229 XFS_FSS_TO_BB(mp, 1), 0, 229 XFS_FSS_TO_BB(mp, 1), 0,
230 &xfs_agf_buf_ops); 230 &xfs_agf_buf_ops);
231 if (!bp) { 231 if (!bp) {
232 error = ENOMEM; 232 error = -ENOMEM;
233 goto error0; 233 goto error0;
234 } 234 }
235 235
@@ -270,7 +270,7 @@ xfs_growfs_data_private(
270 XFS_FSS_TO_BB(mp, 1), 0, 270 XFS_FSS_TO_BB(mp, 1), 0,
271 &xfs_agfl_buf_ops); 271 &xfs_agfl_buf_ops);
272 if (!bp) { 272 if (!bp) {
273 error = ENOMEM; 273 error = -ENOMEM;
274 goto error0; 274 goto error0;
275 } 275 }
276 276
@@ -298,7 +298,7 @@ xfs_growfs_data_private(
298 XFS_FSS_TO_BB(mp, 1), 0, 298 XFS_FSS_TO_BB(mp, 1), 0,
299 &xfs_agi_buf_ops); 299 &xfs_agi_buf_ops);
300 if (!bp) { 300 if (!bp) {
301 error = ENOMEM; 301 error = -ENOMEM;
302 goto error0; 302 goto error0;
303 } 303 }
304 304
@@ -336,7 +336,7 @@ xfs_growfs_data_private(
336 &xfs_allocbt_buf_ops); 336 &xfs_allocbt_buf_ops);
337 337
338 if (!bp) { 338 if (!bp) {
339 error = ENOMEM; 339 error = -ENOMEM;
340 goto error0; 340 goto error0;
341 } 341 }
342 342
@@ -365,7 +365,7 @@ xfs_growfs_data_private(
365 BTOBB(mp->m_sb.sb_blocksize), 0, 365 BTOBB(mp->m_sb.sb_blocksize), 0,
366 &xfs_allocbt_buf_ops); 366 &xfs_allocbt_buf_ops);
367 if (!bp) { 367 if (!bp) {
368 error = ENOMEM; 368 error = -ENOMEM;
369 goto error0; 369 goto error0;
370 } 370 }
371 371
@@ -395,7 +395,7 @@ xfs_growfs_data_private(
395 BTOBB(mp->m_sb.sb_blocksize), 0, 395 BTOBB(mp->m_sb.sb_blocksize), 0,
396 &xfs_inobt_buf_ops); 396 &xfs_inobt_buf_ops);
397 if (!bp) { 397 if (!bp) {
398 error = ENOMEM; 398 error = -ENOMEM;
399 goto error0; 399 goto error0;
400 } 400 }
401 401
@@ -420,7 +420,7 @@ xfs_growfs_data_private(
420 BTOBB(mp->m_sb.sb_blocksize), 0, 420 BTOBB(mp->m_sb.sb_blocksize), 0,
421 &xfs_inobt_buf_ops); 421 &xfs_inobt_buf_ops);
422 if (!bp) { 422 if (!bp) {
423 error = ENOMEM; 423 error = -ENOMEM;
424 goto error0; 424 goto error0;
425 } 425 }
426 426
@@ -531,7 +531,7 @@ xfs_growfs_data_private(
531 bp->b_ops = &xfs_sb_buf_ops; 531 bp->b_ops = &xfs_sb_buf_ops;
532 xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); 532 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
533 } else 533 } else
534 error = ENOMEM; 534 error = -ENOMEM;
535 } 535 }
536 536
537 /* 537 /*
@@ -576,17 +576,17 @@ xfs_growfs_log_private(
576 576
577 nb = in->newblocks; 577 nb = in->newblocks;
578 if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) 578 if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
579 return XFS_ERROR(EINVAL); 579 return -EINVAL;
580 if (nb == mp->m_sb.sb_logblocks && 580 if (nb == mp->m_sb.sb_logblocks &&
581 in->isint == (mp->m_sb.sb_logstart != 0)) 581 in->isint == (mp->m_sb.sb_logstart != 0))
582 return XFS_ERROR(EINVAL); 582 return -EINVAL;
583 /* 583 /*
584 * Moving the log is hard, need new interfaces to sync 584 * Moving the log is hard, need new interfaces to sync
585 * the log first, hold off all activity while moving it. 585 * the log first, hold off all activity while moving it.
586 * Can have shorter or longer log in the same space, 586 * Can have shorter or longer log in the same space,
587 * or transform internal to external log or vice versa. 587 * or transform internal to external log or vice versa.
588 */ 588 */
589 return XFS_ERROR(ENOSYS); 589 return -ENOSYS;
590} 590}
591 591
592/* 592/*
@@ -604,9 +604,9 @@ xfs_growfs_data(
604 int error; 604 int error;
605 605
606 if (!capable(CAP_SYS_ADMIN)) 606 if (!capable(CAP_SYS_ADMIN))
607 return XFS_ERROR(EPERM); 607 return -EPERM;
608 if (!mutex_trylock(&mp->m_growlock)) 608 if (!mutex_trylock(&mp->m_growlock))
609 return XFS_ERROR(EWOULDBLOCK); 609 return -EWOULDBLOCK;
610 error = xfs_growfs_data_private(mp, in); 610 error = xfs_growfs_data_private(mp, in);
611 mutex_unlock(&mp->m_growlock); 611 mutex_unlock(&mp->m_growlock);
612 return error; 612 return error;
@@ -620,9 +620,9 @@ xfs_growfs_log(
620 int error; 620 int error;
621 621
622 if (!capable(CAP_SYS_ADMIN)) 622 if (!capable(CAP_SYS_ADMIN))
623 return XFS_ERROR(EPERM); 623 return -EPERM;
624 if (!mutex_trylock(&mp->m_growlock)) 624 if (!mutex_trylock(&mp->m_growlock))
625 return XFS_ERROR(EWOULDBLOCK); 625 return -EWOULDBLOCK;
626 error = xfs_growfs_log_private(mp, in); 626 error = xfs_growfs_log_private(mp, in);
627 mutex_unlock(&mp->m_growlock); 627 mutex_unlock(&mp->m_growlock);
628 return error; 628 return error;
@@ -674,7 +674,7 @@ xfs_reserve_blocks(
674 /* If inval is null, report current values and return */ 674 /* If inval is null, report current values and return */
675 if (inval == (__uint64_t *)NULL) { 675 if (inval == (__uint64_t *)NULL) {
676 if (!outval) 676 if (!outval)
677 return EINVAL; 677 return -EINVAL;
678 outval->resblks = mp->m_resblks; 678 outval->resblks = mp->m_resblks;
679 outval->resblks_avail = mp->m_resblks_avail; 679 outval->resblks_avail = mp->m_resblks_avail;
680 return 0; 680 return 0;
@@ -757,7 +757,7 @@ out:
757 int error; 757 int error;
758 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 758 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
759 fdblks_delta, 0); 759 fdblks_delta, 0);
760 if (error == ENOSPC) 760 if (error == -ENOSPC)
761 goto retry; 761 goto retry;
762 } 762 }
763 return 0; 763 return 0;
@@ -818,7 +818,7 @@ xfs_fs_goingdown(
818 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); 818 SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
819 break; 819 break;
820 default: 820 default:
821 return XFS_ERROR(EINVAL); 821 return -EINVAL;
822 } 822 }
823 823
824 return 0; 824 return 0;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index c48df5f25b9f..981b2cf51985 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
33#include "xfs_trace.h" 33#include "xfs_trace.h"
34#include "xfs_icache.h" 34#include "xfs_icache.h"
35#include "xfs_bmap_util.h" 35#include "xfs_bmap_util.h"
36#include "xfs_quota.h"
37#include "xfs_dquot_item.h"
38#include "xfs_dquot.h"
36 39
37#include <linux/kthread.h> 40#include <linux/kthread.h>
38#include <linux/freezer.h> 41#include <linux/freezer.h>
@@ -158,7 +161,7 @@ xfs_iget_cache_hit(
158 if (ip->i_ino != ino) { 161 if (ip->i_ino != ino) {
159 trace_xfs_iget_skip(ip); 162 trace_xfs_iget_skip(ip);
160 XFS_STATS_INC(xs_ig_frecycle); 163 XFS_STATS_INC(xs_ig_frecycle);
161 error = EAGAIN; 164 error = -EAGAIN;
162 goto out_error; 165 goto out_error;
163 } 166 }
164 167
@@ -176,7 +179,7 @@ xfs_iget_cache_hit(
176 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 179 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
177 trace_xfs_iget_skip(ip); 180 trace_xfs_iget_skip(ip);
178 XFS_STATS_INC(xs_ig_frecycle); 181 XFS_STATS_INC(xs_ig_frecycle);
179 error = EAGAIN; 182 error = -EAGAIN;
180 goto out_error; 183 goto out_error;
181 } 184 }
182 185
@@ -184,7 +187,7 @@ xfs_iget_cache_hit(
184 * If lookup is racing with unlink return an error immediately. 187 * If lookup is racing with unlink return an error immediately.
185 */ 188 */
186 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 189 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
187 error = ENOENT; 190 error = -ENOENT;
188 goto out_error; 191 goto out_error;
189 } 192 }
190 193
@@ -206,7 +209,7 @@ xfs_iget_cache_hit(
206 spin_unlock(&ip->i_flags_lock); 209 spin_unlock(&ip->i_flags_lock);
207 rcu_read_unlock(); 210 rcu_read_unlock();
208 211
209 error = -inode_init_always(mp->m_super, inode); 212 error = inode_init_always(mp->m_super, inode);
210 if (error) { 213 if (error) {
211 /* 214 /*
212 * Re-initializing the inode failed, and we are in deep 215 * Re-initializing the inode failed, and we are in deep
@@ -243,7 +246,7 @@ xfs_iget_cache_hit(
243 /* If the VFS inode is being torn down, pause and try again. */ 246 /* If the VFS inode is being torn down, pause and try again. */
244 if (!igrab(inode)) { 247 if (!igrab(inode)) {
245 trace_xfs_iget_skip(ip); 248 trace_xfs_iget_skip(ip);
246 error = EAGAIN; 249 error = -EAGAIN;
247 goto out_error; 250 goto out_error;
248 } 251 }
249 252
@@ -285,7 +288,7 @@ xfs_iget_cache_miss(
285 288
286 ip = xfs_inode_alloc(mp, ino); 289 ip = xfs_inode_alloc(mp, ino);
287 if (!ip) 290 if (!ip)
288 return ENOMEM; 291 return -ENOMEM;
289 292
290 error = xfs_iread(mp, tp, ip, flags); 293 error = xfs_iread(mp, tp, ip, flags);
291 if (error) 294 if (error)
@@ -294,7 +297,7 @@ xfs_iget_cache_miss(
294 trace_xfs_iget_miss(ip); 297 trace_xfs_iget_miss(ip);
295 298
296 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 299 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
297 error = ENOENT; 300 error = -ENOENT;
298 goto out_destroy; 301 goto out_destroy;
299 } 302 }
300 303
@@ -305,7 +308,7 @@ xfs_iget_cache_miss(
305 * recurse into the file system. 308 * recurse into the file system.
306 */ 309 */
307 if (radix_tree_preload(GFP_NOFS)) { 310 if (radix_tree_preload(GFP_NOFS)) {
308 error = EAGAIN; 311 error = -EAGAIN;
309 goto out_destroy; 312 goto out_destroy;
310 } 313 }
311 314
@@ -341,7 +344,7 @@ xfs_iget_cache_miss(
341 if (unlikely(error)) { 344 if (unlikely(error)) {
342 WARN_ON(error != -EEXIST); 345 WARN_ON(error != -EEXIST);
343 XFS_STATS_INC(xs_ig_dup); 346 XFS_STATS_INC(xs_ig_dup);
344 error = EAGAIN; 347 error = -EAGAIN;
345 goto out_preload_end; 348 goto out_preload_end;
346 } 349 }
347 spin_unlock(&pag->pag_ici_lock); 350 spin_unlock(&pag->pag_ici_lock);
@@ -408,7 +411,7 @@ xfs_iget(
408 411
409 /* reject inode numbers outside existing AGs */ 412 /* reject inode numbers outside existing AGs */
410 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 413 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
411 return EINVAL; 414 return -EINVAL;
412 415
413 /* get the perag structure and ensure that it's inode capable */ 416 /* get the perag structure and ensure that it's inode capable */
414 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 417 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -445,7 +448,7 @@ again:
445 return 0; 448 return 0;
446 449
447out_error_or_again: 450out_error_or_again:
448 if (error == EAGAIN) { 451 if (error == -EAGAIN) {
449 delay(1); 452 delay(1);
450 goto again; 453 goto again;
451 } 454 }
@@ -489,18 +492,18 @@ xfs_inode_ag_walk_grab(
489 492
490 /* nothing to sync during shutdown */ 493 /* nothing to sync during shutdown */
491 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 494 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
492 return EFSCORRUPTED; 495 return -EFSCORRUPTED;
493 496
494 /* If we can't grab the inode, it must on it's way to reclaim. */ 497 /* If we can't grab the inode, it must on it's way to reclaim. */
495 if (!igrab(inode)) 498 if (!igrab(inode))
496 return ENOENT; 499 return -ENOENT;
497 500
498 /* inode is valid */ 501 /* inode is valid */
499 return 0; 502 return 0;
500 503
501out_unlock_noent: 504out_unlock_noent:
502 spin_unlock(&ip->i_flags_lock); 505 spin_unlock(&ip->i_flags_lock);
503 return ENOENT; 506 return -ENOENT;
504} 507}
505 508
506STATIC int 509STATIC int
@@ -583,16 +586,16 @@ restart:
583 continue; 586 continue;
584 error = execute(batch[i], flags, args); 587 error = execute(batch[i], flags, args);
585 IRELE(batch[i]); 588 IRELE(batch[i]);
586 if (error == EAGAIN) { 589 if (error == -EAGAIN) {
587 skipped++; 590 skipped++;
588 continue; 591 continue;
589 } 592 }
590 if (error && last_error != EFSCORRUPTED) 593 if (error && last_error != -EFSCORRUPTED)
591 last_error = error; 594 last_error = error;
592 } 595 }
593 596
594 /* bail out if the filesystem is corrupted. */ 597 /* bail out if the filesystem is corrupted. */
595 if (error == EFSCORRUPTED) 598 if (error == -EFSCORRUPTED)
596 break; 599 break;
597 600
598 cond_resched(); 601 cond_resched();
@@ -652,11 +655,11 @@ xfs_inode_ag_iterator(
652 xfs_perag_put(pag); 655 xfs_perag_put(pag);
653 if (error) { 656 if (error) {
654 last_error = error; 657 last_error = error;
655 if (error == EFSCORRUPTED) 658 if (error == -EFSCORRUPTED)
656 break; 659 break;
657 } 660 }
658 } 661 }
659 return XFS_ERROR(last_error); 662 return last_error;
660} 663}
661 664
662int 665int
@@ -680,11 +683,11 @@ xfs_inode_ag_iterator_tag(
680 xfs_perag_put(pag); 683 xfs_perag_put(pag);
681 if (error) { 684 if (error) {
682 last_error = error; 685 last_error = error;
683 if (error == EFSCORRUPTED) 686 if (error == -EFSCORRUPTED)
684 break; 687 break;
685 } 688 }
686 } 689 }
687 return XFS_ERROR(last_error); 690 return last_error;
688} 691}
689 692
690/* 693/*
@@ -944,7 +947,7 @@ restart:
944 * see the stale flag set on the inode. 947 * see the stale flag set on the inode.
945 */ 948 */
946 error = xfs_iflush(ip, &bp); 949 error = xfs_iflush(ip, &bp);
947 if (error == EAGAIN) { 950 if (error == -EAGAIN) {
948 xfs_iunlock(ip, XFS_ILOCK_EXCL); 951 xfs_iunlock(ip, XFS_ILOCK_EXCL);
949 /* backoff longer than in xfs_ifree_cluster */ 952 /* backoff longer than in xfs_ifree_cluster */
950 delay(2); 953 delay(2);
@@ -997,7 +1000,7 @@ out:
997 xfs_iflags_clear(ip, XFS_IRECLAIM); 1000 xfs_iflags_clear(ip, XFS_IRECLAIM);
998 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1001 xfs_iunlock(ip, XFS_ILOCK_EXCL);
999 /* 1002 /*
1000 * We could return EAGAIN here to make reclaim rescan the inode tree in 1003 * We could return -EAGAIN here to make reclaim rescan the inode tree in
1001 * a short while. However, this just burns CPU time scanning the tree 1004 * a short while. However, this just burns CPU time scanning the tree
1002 * waiting for IO to complete and the reclaim work never goes back to 1005 * waiting for IO to complete and the reclaim work never goes back to
1003 * the idle state. Instead, return 0 to let the next scheduled 1006 * the idle state. Instead, return 0 to let the next scheduled
@@ -1100,7 +1103,7 @@ restart:
1100 if (!batch[i]) 1103 if (!batch[i])
1101 continue; 1104 continue;
1102 error = xfs_reclaim_inode(batch[i], pag, flags); 1105 error = xfs_reclaim_inode(batch[i], pag, flags);
1103 if (error && last_error != EFSCORRUPTED) 1106 if (error && last_error != -EFSCORRUPTED)
1104 last_error = error; 1107 last_error = error;
1105 } 1108 }
1106 1109
@@ -1129,7 +1132,7 @@ restart:
1129 trylock = 0; 1132 trylock = 0;
1130 goto restart; 1133 goto restart;
1131 } 1134 }
1132 return XFS_ERROR(last_error); 1135 return last_error;
1133} 1136}
1134 1137
1135int 1138int
@@ -1203,6 +1206,30 @@ xfs_inode_match_id(
1203 return 1; 1206 return 1;
1204} 1207}
1205 1208
1209/*
1210 * A union-based inode filtering algorithm. Process the inode if any of the
1211 * criteria match. This is for global/internal scans only.
1212 */
1213STATIC int
1214xfs_inode_match_id_union(
1215 struct xfs_inode *ip,
1216 struct xfs_eofblocks *eofb)
1217{
1218 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1219 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
1220 return 1;
1221
1222 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1223 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
1224 return 1;
1225
1226 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
1227 xfs_get_projid(ip) == eofb->eof_prid)
1228 return 1;
1229
1230 return 0;
1231}
1232
1206STATIC int 1233STATIC int
1207xfs_inode_free_eofblocks( 1234xfs_inode_free_eofblocks(
1208 struct xfs_inode *ip, 1235 struct xfs_inode *ip,
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks(
1211{ 1238{
1212 int ret; 1239 int ret;
1213 struct xfs_eofblocks *eofb = args; 1240 struct xfs_eofblocks *eofb = args;
1241 bool need_iolock = true;
1242 int match;
1243
1244 ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
1214 1245
1215 if (!xfs_can_free_eofblocks(ip, false)) { 1246 if (!xfs_can_free_eofblocks(ip, false)) {
1216 /* inode could be preallocated or append-only */ 1247 /* inode could be preallocated or append-only */
@@ -1228,19 +1259,31 @@ xfs_inode_free_eofblocks(
1228 return 0; 1259 return 0;
1229 1260
1230 if (eofb) { 1261 if (eofb) {
1231 if (!xfs_inode_match_id(ip, eofb)) 1262 if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
1263 match = xfs_inode_match_id_union(ip, eofb);
1264 else
1265 match = xfs_inode_match_id(ip, eofb);
1266 if (!match)
1232 return 0; 1267 return 0;
1233 1268
1234 /* skip the inode if the file size is too small */ 1269 /* skip the inode if the file size is too small */
1235 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1270 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
1236 XFS_ISIZE(ip) < eofb->eof_min_file_size) 1271 XFS_ISIZE(ip) < eofb->eof_min_file_size)
1237 return 0; 1272 return 0;
1273
1274 /*
1275 * A scan owner implies we already hold the iolock. Skip it in
1276 * xfs_free_eofblocks() to avoid deadlock. This also eliminates
1277 * the possibility of EAGAIN being returned.
1278 */
1279 if (eofb->eof_scan_owner == ip->i_ino)
1280 need_iolock = false;
1238 } 1281 }
1239 1282
1240 ret = xfs_free_eofblocks(ip->i_mount, ip, true); 1283 ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
1241 1284
1242 /* don't revisit the inode if we're not waiting */ 1285 /* don't revisit the inode if we're not waiting */
1243 if (ret == EAGAIN && !(flags & SYNC_WAIT)) 1286 if (ret == -EAGAIN && !(flags & SYNC_WAIT))
1244 ret = 0; 1287 ret = 0;
1245 1288
1246 return ret; 1289 return ret;
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks(
1260 eofb, XFS_ICI_EOFBLOCKS_TAG); 1303 eofb, XFS_ICI_EOFBLOCKS_TAG);
1261} 1304}
1262 1305
1306/*
1307 * Run eofblocks scans on the quotas applicable to the inode. For inodes with
1308 * multiple quotas, we don't know exactly which quota caused an allocation
1309 * failure. We make a best effort by including each quota under low free space
1310 * conditions (less than 1% free space) in the scan.
1311 */
1312int
1313xfs_inode_free_quota_eofblocks(
1314 struct xfs_inode *ip)
1315{
1316 int scan = 0;
1317 struct xfs_eofblocks eofb = {0};
1318 struct xfs_dquot *dq;
1319
1320 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1321
1322 /*
1323 * Set the scan owner to avoid a potential livelock. Otherwise, the scan
1324 * can repeatedly trylock on the inode we're currently processing. We
1325 * run a sync scan to increase effectiveness and use the union filter to
1326 * cover all applicable quotas in a single scan.
1327 */
1328 eofb.eof_scan_owner = ip->i_ino;
1329 eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
1330
1331 if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
1332 dq = xfs_inode_dquot(ip, XFS_DQ_USER);
1333 if (dq && xfs_dquot_lowsp(dq)) {
1334 eofb.eof_uid = VFS_I(ip)->i_uid;
1335 eofb.eof_flags |= XFS_EOF_FLAGS_UID;
1336 scan = 1;
1337 }
1338 }
1339
1340 if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
1341 dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
1342 if (dq && xfs_dquot_lowsp(dq)) {
1343 eofb.eof_gid = VFS_I(ip)->i_gid;
1344 eofb.eof_flags |= XFS_EOF_FLAGS_GID;
1345 scan = 1;
1346 }
1347 }
1348
1349 if (scan)
1350 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
1351
1352 return scan;
1353}
1354
1263void 1355void
1264xfs_inode_set_eofblocks_tag( 1356xfs_inode_set_eofblocks_tag(
1265 xfs_inode_t *ip) 1357 xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 9cf017b899be..46748b86b12f 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,6 +27,7 @@ struct xfs_eofblocks {
27 kgid_t eof_gid; 27 kgid_t eof_gid;
28 prid_t eof_prid; 28 prid_t eof_prid;
29 __u64 eof_min_file_size; 29 __u64 eof_min_file_size;
30 xfs_ino_t eof_scan_owner;
30}; 31};
31 32
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 33#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
57void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); 58void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
58void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); 59void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
59int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); 60int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
61int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
60void xfs_eofblocks_worker(struct work_struct *); 62void xfs_eofblocks_worker(struct work_struct *);
61 63
62int xfs_inode_ag_iterator(struct xfs_mount *mp, 64int xfs_inode_ag_iterator(struct xfs_mount *mp,
@@ -72,31 +74,32 @@ xfs_fs_eofblocks_from_user(
72 struct xfs_eofblocks *dst) 74 struct xfs_eofblocks *dst)
73{ 75{
74 if (src->eof_version != XFS_EOFBLOCKS_VERSION) 76 if (src->eof_version != XFS_EOFBLOCKS_VERSION)
75 return EINVAL; 77 return -EINVAL;
76 78
77 if (src->eof_flags & ~XFS_EOF_FLAGS_VALID) 79 if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
78 return EINVAL; 80 return -EINVAL;
79 81
80 if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) || 82 if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
81 memchr_inv(src->pad64, 0, sizeof(src->pad64))) 83 memchr_inv(src->pad64, 0, sizeof(src->pad64)))
82 return EINVAL; 84 return -EINVAL;
83 85
84 dst->eof_flags = src->eof_flags; 86 dst->eof_flags = src->eof_flags;
85 dst->eof_prid = src->eof_prid; 87 dst->eof_prid = src->eof_prid;
86 dst->eof_min_file_size = src->eof_min_file_size; 88 dst->eof_min_file_size = src->eof_min_file_size;
89 dst->eof_scan_owner = NULLFSINO;
87 90
88 dst->eof_uid = INVALID_UID; 91 dst->eof_uid = INVALID_UID;
89 if (src->eof_flags & XFS_EOF_FLAGS_UID) { 92 if (src->eof_flags & XFS_EOF_FLAGS_UID) {
90 dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid); 93 dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
91 if (!uid_valid(dst->eof_uid)) 94 if (!uid_valid(dst->eof_uid))
92 return EINVAL; 95 return -EINVAL;
93 } 96 }
94 97
95 dst->eof_gid = INVALID_GID; 98 dst->eof_gid = INVALID_GID;
96 if (src->eof_flags & XFS_EOF_FLAGS_GID) { 99 if (src->eof_flags & XFS_EOF_FLAGS_GID) {
97 dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid); 100 dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
98 if (!gid_valid(dst->eof_gid)) 101 if (!gid_valid(dst->eof_gid))
99 return EINVAL; 102 return -EINVAL;
100 } 103 }
101 return 0; 104 return 0;
102} 105}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a6115fe1ac94..fea3c92fb3f0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -583,7 +583,7 @@ xfs_lookup(
583 trace_xfs_lookup(dp, name); 583 trace_xfs_lookup(dp, name);
584 584
585 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 585 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
586 return XFS_ERROR(EIO); 586 return -EIO;
587 587
588 lock_mode = xfs_ilock_data_map_shared(dp); 588 lock_mode = xfs_ilock_data_map_shared(dp);
589 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 589 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
@@ -893,7 +893,7 @@ xfs_dir_ialloc(
893 } 893 }
894 if (!ialloc_context && !ip) { 894 if (!ialloc_context && !ip) {
895 *ipp = NULL; 895 *ipp = NULL;
896 return XFS_ERROR(ENOSPC); 896 return -ENOSPC;
897 } 897 }
898 898
899 /* 899 /*
@@ -1088,7 +1088,7 @@ xfs_create(
1088 trace_xfs_create(dp, name); 1088 trace_xfs_create(dp, name);
1089 1089
1090 if (XFS_FORCED_SHUTDOWN(mp)) 1090 if (XFS_FORCED_SHUTDOWN(mp))
1091 return XFS_ERROR(EIO); 1091 return -EIO;
1092 1092
1093 prid = xfs_get_initial_prid(dp); 1093 prid = xfs_get_initial_prid(dp);
1094 1094
@@ -1125,12 +1125,12 @@ xfs_create(
1125 */ 1125 */
1126 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 1126 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
1127 error = xfs_trans_reserve(tp, &tres, resblks, 0); 1127 error = xfs_trans_reserve(tp, &tres, resblks, 0);
1128 if (error == ENOSPC) { 1128 if (error == -ENOSPC) {
1129 /* flush outstanding delalloc blocks and retry */ 1129 /* flush outstanding delalloc blocks and retry */
1130 xfs_flush_inodes(mp); 1130 xfs_flush_inodes(mp);
1131 error = xfs_trans_reserve(tp, &tres, resblks, 0); 1131 error = xfs_trans_reserve(tp, &tres, resblks, 0);
1132 } 1132 }
1133 if (error == ENOSPC) { 1133 if (error == -ENOSPC) {
1134 /* No space at all so try a "no-allocation" reservation */ 1134 /* No space at all so try a "no-allocation" reservation */
1135 resblks = 0; 1135 resblks = 0;
1136 error = xfs_trans_reserve(tp, &tres, 0, 0); 1136 error = xfs_trans_reserve(tp, &tres, 0, 0);
@@ -1165,7 +1165,7 @@ xfs_create(
1165 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 1165 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
1166 prid, resblks > 0, &ip, &committed); 1166 prid, resblks > 0, &ip, &committed);
1167 if (error) { 1167 if (error) {
1168 if (error == ENOSPC) 1168 if (error == -ENOSPC)
1169 goto out_trans_cancel; 1169 goto out_trans_cancel;
1170 goto out_trans_abort; 1170 goto out_trans_abort;
1171 } 1171 }
@@ -1184,7 +1184,7 @@ xfs_create(
1184 &first_block, &free_list, resblks ? 1184 &first_block, &free_list, resblks ?
1185 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1185 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1186 if (error) { 1186 if (error) {
1187 ASSERT(error != ENOSPC); 1187 ASSERT(error != -ENOSPC);
1188 goto out_trans_abort; 1188 goto out_trans_abort;
1189 } 1189 }
1190 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1190 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1274,7 +1274,7 @@ xfs_create_tmpfile(
1274 uint resblks; 1274 uint resblks;
1275 1275
1276 if (XFS_FORCED_SHUTDOWN(mp)) 1276 if (XFS_FORCED_SHUTDOWN(mp))
1277 return XFS_ERROR(EIO); 1277 return -EIO;
1278 1278
1279 prid = xfs_get_initial_prid(dp); 1279 prid = xfs_get_initial_prid(dp);
1280 1280
@@ -1293,7 +1293,7 @@ xfs_create_tmpfile(
1293 1293
1294 tres = &M_RES(mp)->tr_create_tmpfile; 1294 tres = &M_RES(mp)->tr_create_tmpfile;
1295 error = xfs_trans_reserve(tp, tres, resblks, 0); 1295 error = xfs_trans_reserve(tp, tres, resblks, 0);
1296 if (error == ENOSPC) { 1296 if (error == -ENOSPC) {
1297 /* No space at all so try a "no-allocation" reservation */ 1297 /* No space at all so try a "no-allocation" reservation */
1298 resblks = 0; 1298 resblks = 0;
1299 error = xfs_trans_reserve(tp, tres, 0, 0); 1299 error = xfs_trans_reserve(tp, tres, 0, 0);
@@ -1311,7 +1311,7 @@ xfs_create_tmpfile(
1311 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, 1311 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
1312 prid, resblks > 0, &ip, NULL); 1312 prid, resblks > 0, &ip, NULL);
1313 if (error) { 1313 if (error) {
1314 if (error == ENOSPC) 1314 if (error == -ENOSPC)
1315 goto out_trans_cancel; 1315 goto out_trans_cancel;
1316 goto out_trans_abort; 1316 goto out_trans_abort;
1317 } 1317 }
@@ -1382,7 +1382,7 @@ xfs_link(
1382 ASSERT(!S_ISDIR(sip->i_d.di_mode)); 1382 ASSERT(!S_ISDIR(sip->i_d.di_mode));
1383 1383
1384 if (XFS_FORCED_SHUTDOWN(mp)) 1384 if (XFS_FORCED_SHUTDOWN(mp))
1385 return XFS_ERROR(EIO); 1385 return -EIO;
1386 1386
1387 error = xfs_qm_dqattach(sip, 0); 1387 error = xfs_qm_dqattach(sip, 0);
1388 if (error) 1388 if (error)
@@ -1396,7 +1396,7 @@ xfs_link(
1396 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1396 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1397 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1397 resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
1398 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); 1398 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
1399 if (error == ENOSPC) { 1399 if (error == -ENOSPC) {
1400 resblks = 0; 1400 resblks = 0;
1401 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); 1401 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
1402 } 1402 }
@@ -1417,7 +1417,7 @@ xfs_link(
1417 */ 1417 */
1418 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1418 if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1419 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { 1419 (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
1420 error = XFS_ERROR(EXDEV); 1420 error = -EXDEV;
1421 goto error_return; 1421 goto error_return;
1422 } 1422 }
1423 1423
@@ -1635,8 +1635,8 @@ xfs_release(
1635 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); 1635 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
1636 if (truncated) { 1636 if (truncated) {
1637 xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE); 1637 xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
1638 if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) { 1638 if (ip->i_delayed_blks > 0) {
1639 error = -filemap_flush(VFS_I(ip)->i_mapping); 1639 error = filemap_flush(VFS_I(ip)->i_mapping);
1640 if (error) 1640 if (error)
1641 return error; 1641 return error;
1642 } 1642 }
@@ -1673,7 +1673,7 @@ xfs_release(
1673 return 0; 1673 return 0;
1674 1674
1675 error = xfs_free_eofblocks(mp, ip, true); 1675 error = xfs_free_eofblocks(mp, ip, true);
1676 if (error && error != EAGAIN) 1676 if (error && error != -EAGAIN)
1677 return error; 1677 return error;
1678 1678
1679 /* delalloc blocks after truncation means it really is dirty */ 1679 /* delalloc blocks after truncation means it really is dirty */
@@ -1772,7 +1772,7 @@ xfs_inactive_ifree(
1772 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 1772 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
1773 XFS_IFREE_SPACE_RES(mp), 0); 1773 XFS_IFREE_SPACE_RES(mp), 0);
1774 if (error) { 1774 if (error) {
1775 if (error == ENOSPC) { 1775 if (error == -ENOSPC) {
1776 xfs_warn_ratelimited(mp, 1776 xfs_warn_ratelimited(mp,
1777 "Failed to remove inode(s) from unlinked list. " 1777 "Failed to remove inode(s) from unlinked list. "
1778 "Please free space, unmount and run xfs_repair."); 1778 "Please free space, unmount and run xfs_repair.");
@@ -2219,7 +2219,7 @@ xfs_ifree_cluster(
2219 XBF_UNMAPPED); 2219 XBF_UNMAPPED);
2220 2220
2221 if (!bp) 2221 if (!bp)
2222 return ENOMEM; 2222 return -ENOMEM;
2223 2223
2224 /* 2224 /*
2225 * This buffer may not have been correctly initialised as we 2225 * This buffer may not have been correctly initialised as we
@@ -2491,7 +2491,7 @@ xfs_remove(
2491 trace_xfs_remove(dp, name); 2491 trace_xfs_remove(dp, name);
2492 2492
2493 if (XFS_FORCED_SHUTDOWN(mp)) 2493 if (XFS_FORCED_SHUTDOWN(mp))
2494 return XFS_ERROR(EIO); 2494 return -EIO;
2495 2495
2496 error = xfs_qm_dqattach(dp, 0); 2496 error = xfs_qm_dqattach(dp, 0);
2497 if (error) 2497 if (error)
@@ -2521,12 +2521,12 @@ xfs_remove(
2521 */ 2521 */
2522 resblks = XFS_REMOVE_SPACE_RES(mp); 2522 resblks = XFS_REMOVE_SPACE_RES(mp);
2523 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); 2523 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
2524 if (error == ENOSPC) { 2524 if (error == -ENOSPC) {
2525 resblks = 0; 2525 resblks = 0;
2526 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); 2526 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
2527 } 2527 }
2528 if (error) { 2528 if (error) {
2529 ASSERT(error != ENOSPC); 2529 ASSERT(error != -ENOSPC);
2530 cancel_flags = 0; 2530 cancel_flags = 0;
2531 goto out_trans_cancel; 2531 goto out_trans_cancel;
2532 } 2532 }
@@ -2543,11 +2543,11 @@ xfs_remove(
2543 if (is_dir) { 2543 if (is_dir) {
2544 ASSERT(ip->i_d.di_nlink >= 2); 2544 ASSERT(ip->i_d.di_nlink >= 2);
2545 if (ip->i_d.di_nlink != 2) { 2545 if (ip->i_d.di_nlink != 2) {
2546 error = XFS_ERROR(ENOTEMPTY); 2546 error = -ENOTEMPTY;
2547 goto out_trans_cancel; 2547 goto out_trans_cancel;
2548 } 2548 }
2549 if (!xfs_dir_isempty(ip)) { 2549 if (!xfs_dir_isempty(ip)) {
2550 error = XFS_ERROR(ENOTEMPTY); 2550 error = -ENOTEMPTY;
2551 goto out_trans_cancel; 2551 goto out_trans_cancel;
2552 } 2552 }
2553 2553
@@ -2582,7 +2582,7 @@ xfs_remove(
2582 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2582 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2583 &first_block, &free_list, resblks); 2583 &first_block, &free_list, resblks);
2584 if (error) { 2584 if (error) {
2585 ASSERT(error != ENOENT); 2585 ASSERT(error != -ENOENT);
2586 goto out_bmap_cancel; 2586 goto out_bmap_cancel;
2587 } 2587 }
2588 2588
@@ -2702,7 +2702,7 @@ xfs_rename(
2702 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 2702 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2703 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 2703 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
2704 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2704 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
2705 if (error == ENOSPC) { 2705 if (error == -ENOSPC) {
2706 spaceres = 0; 2706 spaceres = 0;
2707 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2707 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
2708 } 2708 }
@@ -2747,7 +2747,7 @@ xfs_rename(
2747 */ 2747 */
2748 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2748 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2749 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { 2749 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
2750 error = XFS_ERROR(EXDEV); 2750 error = -EXDEV;
2751 goto error_return; 2751 goto error_return;
2752 } 2752 }
2753 2753
@@ -2770,7 +2770,7 @@ xfs_rename(
2770 error = xfs_dir_createname(tp, target_dp, target_name, 2770 error = xfs_dir_createname(tp, target_dp, target_name,
2771 src_ip->i_ino, &first_block, 2771 src_ip->i_ino, &first_block,
2772 &free_list, spaceres); 2772 &free_list, spaceres);
2773 if (error == ENOSPC) 2773 if (error == -ENOSPC)
2774 goto error_return; 2774 goto error_return;
2775 if (error) 2775 if (error)
2776 goto abort_return; 2776 goto abort_return;
@@ -2795,7 +2795,7 @@ xfs_rename(
2795 */ 2795 */
2796 if (!(xfs_dir_isempty(target_ip)) || 2796 if (!(xfs_dir_isempty(target_ip)) ||
2797 (target_ip->i_d.di_nlink > 2)) { 2797 (target_ip->i_d.di_nlink > 2)) {
2798 error = XFS_ERROR(EEXIST); 2798 error = -EEXIST;
2799 goto error_return; 2799 goto error_return;
2800 } 2800 }
2801 } 2801 }
@@ -2847,7 +2847,7 @@ xfs_rename(
2847 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, 2847 error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
2848 target_dp->i_ino, 2848 target_dp->i_ino,
2849 &first_block, &free_list, spaceres); 2849 &first_block, &free_list, spaceres);
2850 ASSERT(error != EEXIST); 2850 ASSERT(error != -EEXIST);
2851 if (error) 2851 if (error)
2852 goto abort_return; 2852 goto abort_return;
2853 } 2853 }
@@ -3055,7 +3055,7 @@ cluster_corrupt_out:
3055 if (bp->b_iodone) { 3055 if (bp->b_iodone) {
3056 XFS_BUF_UNDONE(bp); 3056 XFS_BUF_UNDONE(bp);
3057 xfs_buf_stale(bp); 3057 xfs_buf_stale(bp);
3058 xfs_buf_ioerror(bp, EIO); 3058 xfs_buf_ioerror(bp, -EIO);
3059 xfs_buf_ioend(bp, 0); 3059 xfs_buf_ioend(bp, 0);
3060 } else { 3060 } else {
3061 xfs_buf_stale(bp); 3061 xfs_buf_stale(bp);
@@ -3069,7 +3069,7 @@ cluster_corrupt_out:
3069 xfs_iflush_abort(iq, false); 3069 xfs_iflush_abort(iq, false);
3070 kmem_free(ilist); 3070 kmem_free(ilist);
3071 xfs_perag_put(pag); 3071 xfs_perag_put(pag);
3072 return XFS_ERROR(EFSCORRUPTED); 3072 return -EFSCORRUPTED;
3073} 3073}
3074 3074
3075/* 3075/*
@@ -3124,7 +3124,7 @@ xfs_iflush(
3124 * as we wait for an empty AIL as part of the unmount process. 3124 * as we wait for an empty AIL as part of the unmount process.
3125 */ 3125 */
3126 if (XFS_FORCED_SHUTDOWN(mp)) { 3126 if (XFS_FORCED_SHUTDOWN(mp)) {
3127 error = XFS_ERROR(EIO); 3127 error = -EIO;
3128 goto abort_out; 3128 goto abort_out;
3129 } 3129 }
3130 3130
@@ -3167,7 +3167,7 @@ corrupt_out:
3167 xfs_buf_relse(bp); 3167 xfs_buf_relse(bp);
3168 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3168 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3169cluster_corrupt_out: 3169cluster_corrupt_out:
3170 error = XFS_ERROR(EFSCORRUPTED); 3170 error = -EFSCORRUPTED;
3171abort_out: 3171abort_out:
3172 /* 3172 /*
3173 * Unlocks the flush lock 3173 * Unlocks the flush lock
@@ -3331,5 +3331,5 @@ xfs_iflush_int(
3331 return 0; 3331 return 0;
3332 3332
3333corrupt_out: 3333corrupt_out:
3334 return XFS_ERROR(EFSCORRUPTED); 3334 return -EFSCORRUPTED;
3335} 3335}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f72bffa67266..c10e3fadd9af 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -398,4 +398,14 @@ do { \
398 398
399extern struct kmem_zone *xfs_inode_zone; 399extern struct kmem_zone *xfs_inode_zone;
400 400
401/*
402 * Flags for read/write calls
403 */
404#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */
405#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */
406
407#define XFS_IO_FLAGS \
408 { XFS_IO_ISDIRECT, "DIRECT" }, \
409 { XFS_IO_INVIS, "INVIS"}
410
401#endif /* __XFS_INODE_H__ */ 411#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index a640137b3573..de5a7be36e60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -788,5 +788,5 @@ xfs_inode_item_format_convert(
788 in_f->ilf_boffset = in_f64->ilf_boffset; 788 in_f->ilf_boffset = in_f64->ilf_boffset;
789 return 0; 789 return 0;
790 } 790 }
791 return EFSCORRUPTED; 791 return -EFSCORRUPTED;
792} 792}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8bc1bbce7451..3799695b9249 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -207,7 +207,7 @@ xfs_open_by_handle(
207 struct path path; 207 struct path path;
208 208
209 if (!capable(CAP_SYS_ADMIN)) 209 if (!capable(CAP_SYS_ADMIN))
210 return -XFS_ERROR(EPERM); 210 return -EPERM;
211 211
212 dentry = xfs_handlereq_to_dentry(parfilp, hreq); 212 dentry = xfs_handlereq_to_dentry(parfilp, hreq);
213 if (IS_ERR(dentry)) 213 if (IS_ERR(dentry))
@@ -216,7 +216,7 @@ xfs_open_by_handle(
216 216
217 /* Restrict xfs_open_by_handle to directories & regular files. */ 217 /* Restrict xfs_open_by_handle to directories & regular files. */
218 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { 218 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
219 error = -XFS_ERROR(EPERM); 219 error = -EPERM;
220 goto out_dput; 220 goto out_dput;
221 } 221 }
222 222
@@ -228,18 +228,18 @@ xfs_open_by_handle(
228 fmode = OPEN_FMODE(permflag); 228 fmode = OPEN_FMODE(permflag);
229 if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && 229 if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
230 (fmode & FMODE_WRITE) && IS_APPEND(inode)) { 230 (fmode & FMODE_WRITE) && IS_APPEND(inode)) {
231 error = -XFS_ERROR(EPERM); 231 error = -EPERM;
232 goto out_dput; 232 goto out_dput;
233 } 233 }
234 234
235 if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) { 235 if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
236 error = -XFS_ERROR(EACCES); 236 error = -EACCES;
237 goto out_dput; 237 goto out_dput;
238 } 238 }
239 239
240 /* Can't write directories. */ 240 /* Can't write directories. */
241 if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) { 241 if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
242 error = -XFS_ERROR(EISDIR); 242 error = -EISDIR;
243 goto out_dput; 243 goto out_dput;
244 } 244 }
245 245
@@ -282,7 +282,7 @@ xfs_readlink_by_handle(
282 int error; 282 int error;
283 283
284 if (!capable(CAP_SYS_ADMIN)) 284 if (!capable(CAP_SYS_ADMIN))
285 return -XFS_ERROR(EPERM); 285 return -EPERM;
286 286
287 dentry = xfs_handlereq_to_dentry(parfilp, hreq); 287 dentry = xfs_handlereq_to_dentry(parfilp, hreq);
288 if (IS_ERR(dentry)) 288 if (IS_ERR(dentry))
@@ -290,22 +290,22 @@ xfs_readlink_by_handle(
290 290
291 /* Restrict this handle operation to symlinks only. */ 291 /* Restrict this handle operation to symlinks only. */
292 if (!S_ISLNK(dentry->d_inode->i_mode)) { 292 if (!S_ISLNK(dentry->d_inode->i_mode)) {
293 error = -XFS_ERROR(EINVAL); 293 error = -EINVAL;
294 goto out_dput; 294 goto out_dput;
295 } 295 }
296 296
297 if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { 297 if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
298 error = -XFS_ERROR(EFAULT); 298 error = -EFAULT;
299 goto out_dput; 299 goto out_dput;
300 } 300 }
301 301
302 link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); 302 link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
303 if (!link) { 303 if (!link) {
304 error = -XFS_ERROR(ENOMEM); 304 error = -ENOMEM;
305 goto out_dput; 305 goto out_dput;
306 } 306 }
307 307
308 error = -xfs_readlink(XFS_I(dentry->d_inode), link); 308 error = xfs_readlink(XFS_I(dentry->d_inode), link);
309 if (error) 309 if (error)
310 goto out_kfree; 310 goto out_kfree;
311 error = readlink_copy(hreq->ohandle, olen, link); 311 error = readlink_copy(hreq->ohandle, olen, link);
@@ -330,10 +330,10 @@ xfs_set_dmattrs(
330 int error; 330 int error;
331 331
332 if (!capable(CAP_SYS_ADMIN)) 332 if (!capable(CAP_SYS_ADMIN))
333 return XFS_ERROR(EPERM); 333 return -EPERM;
334 334
335 if (XFS_FORCED_SHUTDOWN(mp)) 335 if (XFS_FORCED_SHUTDOWN(mp))
336 return XFS_ERROR(EIO); 336 return -EIO;
337 337
338 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 338 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
339 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 339 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
@@ -364,9 +364,9 @@ xfs_fssetdm_by_handle(
364 struct dentry *dentry; 364 struct dentry *dentry;
365 365
366 if (!capable(CAP_MKNOD)) 366 if (!capable(CAP_MKNOD))
367 return -XFS_ERROR(EPERM); 367 return -EPERM;
368 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) 368 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
369 return -XFS_ERROR(EFAULT); 369 return -EFAULT;
370 370
371 error = mnt_want_write_file(parfilp); 371 error = mnt_want_write_file(parfilp);
372 if (error) 372 if (error)
@@ -379,16 +379,16 @@ xfs_fssetdm_by_handle(
379 } 379 }
380 380
381 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { 381 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
382 error = -XFS_ERROR(EPERM); 382 error = -EPERM;
383 goto out; 383 goto out;
384 } 384 }
385 385
386 if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { 386 if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
387 error = -XFS_ERROR(EFAULT); 387 error = -EFAULT;
388 goto out; 388 goto out;
389 } 389 }
390 390
391 error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, 391 error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
392 fsd.fsd_dmstate); 392 fsd.fsd_dmstate);
393 393
394 out: 394 out:
@@ -409,18 +409,18 @@ xfs_attrlist_by_handle(
409 char *kbuf; 409 char *kbuf;
410 410
411 if (!capable(CAP_SYS_ADMIN)) 411 if (!capable(CAP_SYS_ADMIN))
412 return -XFS_ERROR(EPERM); 412 return -EPERM;
413 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) 413 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
414 return -XFS_ERROR(EFAULT); 414 return -EFAULT;
415 if (al_hreq.buflen < sizeof(struct attrlist) || 415 if (al_hreq.buflen < sizeof(struct attrlist) ||
416 al_hreq.buflen > XATTR_LIST_MAX) 416 al_hreq.buflen > XATTR_LIST_MAX)
417 return -XFS_ERROR(EINVAL); 417 return -EINVAL;
418 418
419 /* 419 /*
420 * Reject flags, only allow namespaces. 420 * Reject flags, only allow namespaces.
421 */ 421 */
422 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) 422 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
423 return -XFS_ERROR(EINVAL); 423 return -EINVAL;
424 424
425 dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); 425 dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
426 if (IS_ERR(dentry)) 426 if (IS_ERR(dentry))
@@ -431,7 +431,7 @@ xfs_attrlist_by_handle(
431 goto out_dput; 431 goto out_dput;
432 432
433 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; 433 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
434 error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, 434 error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
435 al_hreq.flags, cursor); 435 al_hreq.flags, cursor);
436 if (error) 436 if (error)
437 goto out_kfree; 437 goto out_kfree;
@@ -455,20 +455,20 @@ xfs_attrmulti_attr_get(
455 __uint32_t flags) 455 __uint32_t flags)
456{ 456{
457 unsigned char *kbuf; 457 unsigned char *kbuf;
458 int error = EFAULT; 458 int error = -EFAULT;
459 459
460 if (*len > XATTR_SIZE_MAX) 460 if (*len > XATTR_SIZE_MAX)
461 return EINVAL; 461 return -EINVAL;
462 kbuf = kmem_zalloc_large(*len, KM_SLEEP); 462 kbuf = kmem_zalloc_large(*len, KM_SLEEP);
463 if (!kbuf) 463 if (!kbuf)
464 return ENOMEM; 464 return -ENOMEM;
465 465
466 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); 466 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
467 if (error) 467 if (error)
468 goto out_kfree; 468 goto out_kfree;
469 469
470 if (copy_to_user(ubuf, kbuf, *len)) 470 if (copy_to_user(ubuf, kbuf, *len))
471 error = EFAULT; 471 error = -EFAULT;
472 472
473out_kfree: 473out_kfree:
474 kmem_free(kbuf); 474 kmem_free(kbuf);
@@ -484,20 +484,17 @@ xfs_attrmulti_attr_set(
484 __uint32_t flags) 484 __uint32_t flags)
485{ 485{
486 unsigned char *kbuf; 486 unsigned char *kbuf;
487 int error = EFAULT;
488 487
489 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 488 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
490 return EPERM; 489 return -EPERM;
491 if (len > XATTR_SIZE_MAX) 490 if (len > XATTR_SIZE_MAX)
492 return EINVAL; 491 return -EINVAL;
493 492
494 kbuf = memdup_user(ubuf, len); 493 kbuf = memdup_user(ubuf, len);
495 if (IS_ERR(kbuf)) 494 if (IS_ERR(kbuf))
496 return PTR_ERR(kbuf); 495 return PTR_ERR(kbuf);
497 496
498 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 497 return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
499
500 return error;
501} 498}
502 499
503int 500int
@@ -507,7 +504,7 @@ xfs_attrmulti_attr_remove(
507 __uint32_t flags) 504 __uint32_t flags)
508{ 505{
509 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 506 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
510 return EPERM; 507 return -EPERM;
511 return xfs_attr_remove(XFS_I(inode), name, flags); 508 return xfs_attr_remove(XFS_I(inode), name, flags);
512} 509}
513 510
@@ -524,9 +521,9 @@ xfs_attrmulti_by_handle(
524 unsigned char *attr_name; 521 unsigned char *attr_name;
525 522
526 if (!capable(CAP_SYS_ADMIN)) 523 if (!capable(CAP_SYS_ADMIN))
527 return -XFS_ERROR(EPERM); 524 return -EPERM;
528 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) 525 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
529 return -XFS_ERROR(EFAULT); 526 return -EFAULT;
530 527
531 /* overflow check */ 528 /* overflow check */
532 if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) 529 if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
@@ -536,18 +533,18 @@ xfs_attrmulti_by_handle(
536 if (IS_ERR(dentry)) 533 if (IS_ERR(dentry))
537 return PTR_ERR(dentry); 534 return PTR_ERR(dentry);
538 535
539 error = E2BIG; 536 error = -E2BIG;
540 size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); 537 size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
541 if (!size || size > 16 * PAGE_SIZE) 538 if (!size || size > 16 * PAGE_SIZE)
542 goto out_dput; 539 goto out_dput;
543 540
544 ops = memdup_user(am_hreq.ops, size); 541 ops = memdup_user(am_hreq.ops, size);
545 if (IS_ERR(ops)) { 542 if (IS_ERR(ops)) {
546 error = -PTR_ERR(ops); 543 error = PTR_ERR(ops);
547 goto out_dput; 544 goto out_dput;
548 } 545 }
549 546
550 error = ENOMEM; 547 error = -ENOMEM;
551 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 548 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
552 if (!attr_name) 549 if (!attr_name)
553 goto out_kfree_ops; 550 goto out_kfree_ops;
@@ -557,7 +554,7 @@ xfs_attrmulti_by_handle(
557 ops[i].am_error = strncpy_from_user((char *)attr_name, 554 ops[i].am_error = strncpy_from_user((char *)attr_name,
558 ops[i].am_attrname, MAXNAMELEN); 555 ops[i].am_attrname, MAXNAMELEN);
559 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 556 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
560 error = ERANGE; 557 error = -ERANGE;
561 if (ops[i].am_error < 0) 558 if (ops[i].am_error < 0)
562 break; 559 break;
563 560
@@ -588,19 +585,19 @@ xfs_attrmulti_by_handle(
588 mnt_drop_write_file(parfilp); 585 mnt_drop_write_file(parfilp);
589 break; 586 break;
590 default: 587 default:
591 ops[i].am_error = EINVAL; 588 ops[i].am_error = -EINVAL;
592 } 589 }
593 } 590 }
594 591
595 if (copy_to_user(am_hreq.ops, ops, size)) 592 if (copy_to_user(am_hreq.ops, ops, size))
596 error = XFS_ERROR(EFAULT); 593 error = -EFAULT;
597 594
598 kfree(attr_name); 595 kfree(attr_name);
599 out_kfree_ops: 596 out_kfree_ops:
600 kfree(ops); 597 kfree(ops);
601 out_dput: 598 out_dput:
602 dput(dentry); 599 dput(dentry);
603 return -error; 600 return error;
604} 601}
605 602
606int 603int
@@ -625,16 +622,16 @@ xfs_ioc_space(
625 */ 622 */
626 if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && 623 if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
627 !capable(CAP_SYS_ADMIN)) 624 !capable(CAP_SYS_ADMIN))
628 return -XFS_ERROR(EPERM); 625 return -EPERM;
629 626
630 if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) 627 if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
631 return -XFS_ERROR(EPERM); 628 return -EPERM;
632 629
633 if (!(filp->f_mode & FMODE_WRITE)) 630 if (!(filp->f_mode & FMODE_WRITE))
634 return -XFS_ERROR(EBADF); 631 return -EBADF;
635 632
636 if (!S_ISREG(inode->i_mode)) 633 if (!S_ISREG(inode->i_mode))
637 return -XFS_ERROR(EINVAL); 634 return -EINVAL;
638 635
639 error = mnt_want_write_file(filp); 636 error = mnt_want_write_file(filp);
640 if (error) 637 if (error)
@@ -652,7 +649,7 @@ xfs_ioc_space(
652 bf->l_start += XFS_ISIZE(ip); 649 bf->l_start += XFS_ISIZE(ip);
653 break; 650 break;
654 default: 651 default:
655 error = XFS_ERROR(EINVAL); 652 error = -EINVAL;
656 goto out_unlock; 653 goto out_unlock;
657 } 654 }
658 655
@@ -669,7 +666,7 @@ xfs_ioc_space(
669 case XFS_IOC_UNRESVSP: 666 case XFS_IOC_UNRESVSP:
670 case XFS_IOC_UNRESVSP64: 667 case XFS_IOC_UNRESVSP64:
671 if (bf->l_len <= 0) { 668 if (bf->l_len <= 0) {
672 error = XFS_ERROR(EINVAL); 669 error = -EINVAL;
673 goto out_unlock; 670 goto out_unlock;
674 } 671 }
675 break; 672 break;
@@ -682,7 +679,7 @@ xfs_ioc_space(
682 bf->l_start > mp->m_super->s_maxbytes || 679 bf->l_start > mp->m_super->s_maxbytes ||
683 bf->l_start + bf->l_len < 0 || 680 bf->l_start + bf->l_len < 0 ||
684 bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { 681 bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) {
685 error = XFS_ERROR(EINVAL); 682 error = -EINVAL;
686 goto out_unlock; 683 goto out_unlock;
687 } 684 }
688 685
@@ -723,7 +720,7 @@ xfs_ioc_space(
723 break; 720 break;
724 default: 721 default:
725 ASSERT(0); 722 ASSERT(0);
726 error = XFS_ERROR(EINVAL); 723 error = -EINVAL;
727 } 724 }
728 725
729 if (error) 726 if (error)
@@ -739,7 +736,7 @@ xfs_ioc_space(
739 xfs_ilock(ip, XFS_ILOCK_EXCL); 736 xfs_ilock(ip, XFS_ILOCK_EXCL);
740 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 737 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
741 738
742 if (!(ioflags & IO_INVIS)) { 739 if (!(ioflags & XFS_IO_INVIS)) {
743 ip->i_d.di_mode &= ~S_ISUID; 740 ip->i_d.di_mode &= ~S_ISUID;
744 if (ip->i_d.di_mode & S_IXGRP) 741 if (ip->i_d.di_mode & S_IXGRP)
745 ip->i_d.di_mode &= ~S_ISGID; 742 ip->i_d.di_mode &= ~S_ISGID;
@@ -759,7 +756,7 @@ xfs_ioc_space(
759out_unlock: 756out_unlock:
760 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 757 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
761 mnt_drop_write_file(filp); 758 mnt_drop_write_file(filp);
762 return -error; 759 return error;
763} 760}
764 761
765STATIC int 762STATIC int
@@ -781,41 +778,41 @@ xfs_ioc_bulkstat(
781 return -EPERM; 778 return -EPERM;
782 779
783 if (XFS_FORCED_SHUTDOWN(mp)) 780 if (XFS_FORCED_SHUTDOWN(mp))
784 return -XFS_ERROR(EIO); 781 return -EIO;
785 782
786 if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) 783 if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
787 return -XFS_ERROR(EFAULT); 784 return -EFAULT;
788 785
789 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 786 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
790 return -XFS_ERROR(EFAULT); 787 return -EFAULT;
791 788
792 if ((count = bulkreq.icount) <= 0) 789 if ((count = bulkreq.icount) <= 0)
793 return -XFS_ERROR(EINVAL); 790 return -EINVAL;
794 791
795 if (bulkreq.ubuffer == NULL) 792 if (bulkreq.ubuffer == NULL)
796 return -XFS_ERROR(EINVAL); 793 return -EINVAL;
797 794
798 if (cmd == XFS_IOC_FSINUMBERS) 795 if (cmd == XFS_IOC_FSINUMBERS)
799 error = xfs_inumbers(mp, &inlast, &count, 796 error = xfs_inumbers(mp, &inlast, &count,
800 bulkreq.ubuffer, xfs_inumbers_fmt); 797 bulkreq.ubuffer, xfs_inumbers_fmt);
801 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) 798 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
802 error = xfs_bulkstat_single(mp, &inlast, 799 error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
803 bulkreq.ubuffer, &done); 800 sizeof(xfs_bstat_t), NULL, &done);
804 else /* XFS_IOC_FSBULKSTAT */ 801 else /* XFS_IOC_FSBULKSTAT */
805 error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, 802 error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
806 sizeof(xfs_bstat_t), bulkreq.ubuffer, 803 sizeof(xfs_bstat_t), bulkreq.ubuffer,
807 &done); 804 &done);
808 805
809 if (error) 806 if (error)
810 return -error; 807 return error;
811 808
812 if (bulkreq.ocount != NULL) { 809 if (bulkreq.ocount != NULL) {
813 if (copy_to_user(bulkreq.lastip, &inlast, 810 if (copy_to_user(bulkreq.lastip, &inlast,
814 sizeof(xfs_ino_t))) 811 sizeof(xfs_ino_t)))
815 return -XFS_ERROR(EFAULT); 812 return -EFAULT;
816 813
817 if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 814 if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
818 return -XFS_ERROR(EFAULT); 815 return -EFAULT;
819 } 816 }
820 817
821 return 0; 818 return 0;
@@ -831,7 +828,7 @@ xfs_ioc_fsgeometry_v1(
831 828
832 error = xfs_fs_geometry(mp, &fsgeo, 3); 829 error = xfs_fs_geometry(mp, &fsgeo, 3);
833 if (error) 830 if (error)
834 return -error; 831 return error;
835 832
836 /* 833 /*
837 * Caller should have passed an argument of type 834 * Caller should have passed an argument of type
@@ -839,7 +836,7 @@ xfs_ioc_fsgeometry_v1(
839 * xfs_fsop_geom_t that xfs_fs_geometry() fills in. 836 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
840 */ 837 */
841 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) 838 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
842 return -XFS_ERROR(EFAULT); 839 return -EFAULT;
843 return 0; 840 return 0;
844} 841}
845 842
@@ -853,10 +850,10 @@ xfs_ioc_fsgeometry(
853 850
854 error = xfs_fs_geometry(mp, &fsgeo, 4); 851 error = xfs_fs_geometry(mp, &fsgeo, 4);
855 if (error) 852 if (error)
856 return -error; 853 return error;
857 854
858 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 855 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
859 return -XFS_ERROR(EFAULT); 856 return -EFAULT;
860 return 0; 857 return 0;
861} 858}
862 859
@@ -1041,16 +1038,16 @@ xfs_ioctl_setattr(
1041 trace_xfs_ioctl_setattr(ip); 1038 trace_xfs_ioctl_setattr(ip);
1042 1039
1043 if (mp->m_flags & XFS_MOUNT_RDONLY) 1040 if (mp->m_flags & XFS_MOUNT_RDONLY)
1044 return XFS_ERROR(EROFS); 1041 return -EROFS;
1045 if (XFS_FORCED_SHUTDOWN(mp)) 1042 if (XFS_FORCED_SHUTDOWN(mp))
1046 return XFS_ERROR(EIO); 1043 return -EIO;
1047 1044
1048 /* 1045 /*
1049 * Disallow 32bit project ids when projid32bit feature is not enabled. 1046 * Disallow 32bit project ids when projid32bit feature is not enabled.
1050 */ 1047 */
1051 if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && 1048 if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
1052 !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) 1049 !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
1053 return XFS_ERROR(EINVAL); 1050 return -EINVAL;
1054 1051
1055 /* 1052 /*
1056 * If disk quotas is on, we make sure that the dquots do exist on disk, 1053 * If disk quotas is on, we make sure that the dquots do exist on disk,
@@ -1088,7 +1085,7 @@ xfs_ioctl_setattr(
1088 * CAP_FSETID capability is applicable. 1085 * CAP_FSETID capability is applicable.
1089 */ 1086 */
1090 if (!inode_owner_or_capable(VFS_I(ip))) { 1087 if (!inode_owner_or_capable(VFS_I(ip))) {
1091 code = XFS_ERROR(EPERM); 1088 code = -EPERM;
1092 goto error_return; 1089 goto error_return;
1093 } 1090 }
1094 1091
@@ -1099,7 +1096,7 @@ xfs_ioctl_setattr(
1099 */ 1096 */
1100 if (mask & FSX_PROJID) { 1097 if (mask & FSX_PROJID) {
1101 if (current_user_ns() != &init_user_ns) { 1098 if (current_user_ns() != &init_user_ns) {
1102 code = XFS_ERROR(EINVAL); 1099 code = -EINVAL;
1103 goto error_return; 1100 goto error_return;
1104 } 1101 }
1105 1102
@@ -1122,7 +1119,7 @@ xfs_ioctl_setattr(
1122 if (ip->i_d.di_nextents && 1119 if (ip->i_d.di_nextents &&
1123 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != 1120 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
1124 fa->fsx_extsize)) { 1121 fa->fsx_extsize)) {
1125 code = XFS_ERROR(EINVAL); /* EFBIG? */ 1122 code = -EINVAL; /* EFBIG? */
1126 goto error_return; 1123 goto error_return;
1127 } 1124 }
1128 1125
@@ -1141,7 +1138,7 @@ xfs_ioctl_setattr(
1141 1138
1142 extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); 1139 extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
1143 if (extsize_fsb > MAXEXTLEN) { 1140 if (extsize_fsb > MAXEXTLEN) {
1144 code = XFS_ERROR(EINVAL); 1141 code = -EINVAL;
1145 goto error_return; 1142 goto error_return;
1146 } 1143 }
1147 1144
@@ -1153,13 +1150,13 @@ xfs_ioctl_setattr(
1153 } else { 1150 } else {
1154 size = mp->m_sb.sb_blocksize; 1151 size = mp->m_sb.sb_blocksize;
1155 if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { 1152 if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
1156 code = XFS_ERROR(EINVAL); 1153 code = -EINVAL;
1157 goto error_return; 1154 goto error_return;
1158 } 1155 }
1159 } 1156 }
1160 1157
1161 if (fa->fsx_extsize % size) { 1158 if (fa->fsx_extsize % size) {
1162 code = XFS_ERROR(EINVAL); 1159 code = -EINVAL;
1163 goto error_return; 1160 goto error_return;
1164 } 1161 }
1165 } 1162 }
@@ -1173,7 +1170,7 @@ xfs_ioctl_setattr(
1173 if ((ip->i_d.di_nextents || ip->i_delayed_blks) && 1170 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
1174 (XFS_IS_REALTIME_INODE(ip)) != 1171 (XFS_IS_REALTIME_INODE(ip)) !=
1175 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { 1172 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1176 code = XFS_ERROR(EINVAL); /* EFBIG? */ 1173 code = -EINVAL; /* EFBIG? */
1177 goto error_return; 1174 goto error_return;
1178 } 1175 }
1179 1176
@@ -1184,7 +1181,7 @@ xfs_ioctl_setattr(
1184 if ((mp->m_sb.sb_rblocks == 0) || 1181 if ((mp->m_sb.sb_rblocks == 0) ||
1185 (mp->m_sb.sb_rextsize == 0) || 1182 (mp->m_sb.sb_rextsize == 0) ||
1186 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { 1183 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
1187 code = XFS_ERROR(EINVAL); 1184 code = -EINVAL;
1188 goto error_return; 1185 goto error_return;
1189 } 1186 }
1190 } 1187 }
@@ -1198,7 +1195,7 @@ xfs_ioctl_setattr(
1198 (fa->fsx_xflags & 1195 (fa->fsx_xflags &
1199 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && 1196 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
1200 !capable(CAP_LINUX_IMMUTABLE)) { 1197 !capable(CAP_LINUX_IMMUTABLE)) {
1201 code = XFS_ERROR(EPERM); 1198 code = -EPERM;
1202 goto error_return; 1199 goto error_return;
1203 } 1200 }
1204 } 1201 }
@@ -1301,7 +1298,7 @@ xfs_ioc_fssetxattr(
1301 return error; 1298 return error;
1302 error = xfs_ioctl_setattr(ip, &fa, mask); 1299 error = xfs_ioctl_setattr(ip, &fa, mask);
1303 mnt_drop_write_file(filp); 1300 mnt_drop_write_file(filp);
1304 return -error; 1301 return error;
1305} 1302}
1306 1303
1307STATIC int 1304STATIC int
@@ -1346,7 +1343,7 @@ xfs_ioc_setxflags(
1346 return error; 1343 return error;
1347 error = xfs_ioctl_setattr(ip, &fa, mask); 1344 error = xfs_ioctl_setattr(ip, &fa, mask);
1348 mnt_drop_write_file(filp); 1345 mnt_drop_write_file(filp);
1349 return -error; 1346 return error;
1350} 1347}
1351 1348
1352STATIC int 1349STATIC int
@@ -1356,7 +1353,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
1356 1353
1357 /* copy only getbmap portion (not getbmapx) */ 1354 /* copy only getbmap portion (not getbmapx) */
1358 if (copy_to_user(base, bmv, sizeof(struct getbmap))) 1355 if (copy_to_user(base, bmv, sizeof(struct getbmap)))
1359 return XFS_ERROR(EFAULT); 1356 return -EFAULT;
1360 1357
1361 *ap += sizeof(struct getbmap); 1358 *ap += sizeof(struct getbmap);
1362 return 0; 1359 return 0;
@@ -1373,23 +1370,23 @@ xfs_ioc_getbmap(
1373 int error; 1370 int error;
1374 1371
1375 if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) 1372 if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
1376 return -XFS_ERROR(EFAULT); 1373 return -EFAULT;
1377 1374
1378 if (bmx.bmv_count < 2) 1375 if (bmx.bmv_count < 2)
1379 return -XFS_ERROR(EINVAL); 1376 return -EINVAL;
1380 1377
1381 bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); 1378 bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
1382 if (ioflags & IO_INVIS) 1379 if (ioflags & XFS_IO_INVIS)
1383 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; 1380 bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
1384 1381
1385 error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, 1382 error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
1386 (struct getbmap *)arg+1); 1383 (struct getbmap *)arg+1);
1387 if (error) 1384 if (error)
1388 return -error; 1385 return error;
1389 1386
1390 /* copy back header - only size of getbmap */ 1387 /* copy back header - only size of getbmap */
1391 if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) 1388 if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
1392 return -XFS_ERROR(EFAULT); 1389 return -EFAULT;
1393 return 0; 1390 return 0;
1394} 1391}
1395 1392
@@ -1399,7 +1396,7 @@ xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
1399 struct getbmapx __user *base = *ap; 1396 struct getbmapx __user *base = *ap;
1400 1397
1401 if (copy_to_user(base, bmv, sizeof(struct getbmapx))) 1398 if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
1402 return XFS_ERROR(EFAULT); 1399 return -EFAULT;
1403 1400
1404 *ap += sizeof(struct getbmapx); 1401 *ap += sizeof(struct getbmapx);
1405 return 0; 1402 return 0;
@@ -1414,22 +1411,22 @@ xfs_ioc_getbmapx(
1414 int error; 1411 int error;
1415 1412
1416 if (copy_from_user(&bmx, arg, sizeof(bmx))) 1413 if (copy_from_user(&bmx, arg, sizeof(bmx)))
1417 return -XFS_ERROR(EFAULT); 1414 return -EFAULT;
1418 1415
1419 if (bmx.bmv_count < 2) 1416 if (bmx.bmv_count < 2)
1420 return -XFS_ERROR(EINVAL); 1417 return -EINVAL;
1421 1418
1422 if (bmx.bmv_iflags & (~BMV_IF_VALID)) 1419 if (bmx.bmv_iflags & (~BMV_IF_VALID))
1423 return -XFS_ERROR(EINVAL); 1420 return -EINVAL;
1424 1421
1425 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, 1422 error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
1426 (struct getbmapx *)arg+1); 1423 (struct getbmapx *)arg+1);
1427 if (error) 1424 if (error)
1428 return -error; 1425 return error;
1429 1426
1430 /* copy back header */ 1427 /* copy back header */
1431 if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) 1428 if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
1432 return -XFS_ERROR(EFAULT); 1429 return -EFAULT;
1433 1430
1434 return 0; 1431 return 0;
1435} 1432}
@@ -1445,33 +1442,33 @@ xfs_ioc_swapext(
1445 /* Pull information for the target fd */ 1442 /* Pull information for the target fd */
1446 f = fdget((int)sxp->sx_fdtarget); 1443 f = fdget((int)sxp->sx_fdtarget);
1447 if (!f.file) { 1444 if (!f.file) {
1448 error = XFS_ERROR(EINVAL); 1445 error = -EINVAL;
1449 goto out; 1446 goto out;
1450 } 1447 }
1451 1448
1452 if (!(f.file->f_mode & FMODE_WRITE) || 1449 if (!(f.file->f_mode & FMODE_WRITE) ||
1453 !(f.file->f_mode & FMODE_READ) || 1450 !(f.file->f_mode & FMODE_READ) ||
1454 (f.file->f_flags & O_APPEND)) { 1451 (f.file->f_flags & O_APPEND)) {
1455 error = XFS_ERROR(EBADF); 1452 error = -EBADF;
1456 goto out_put_file; 1453 goto out_put_file;
1457 } 1454 }
1458 1455
1459 tmp = fdget((int)sxp->sx_fdtmp); 1456 tmp = fdget((int)sxp->sx_fdtmp);
1460 if (!tmp.file) { 1457 if (!tmp.file) {
1461 error = XFS_ERROR(EINVAL); 1458 error = -EINVAL;
1462 goto out_put_file; 1459 goto out_put_file;
1463 } 1460 }
1464 1461
1465 if (!(tmp.file->f_mode & FMODE_WRITE) || 1462 if (!(tmp.file->f_mode & FMODE_WRITE) ||
1466 !(tmp.file->f_mode & FMODE_READ) || 1463 !(tmp.file->f_mode & FMODE_READ) ||
1467 (tmp.file->f_flags & O_APPEND)) { 1464 (tmp.file->f_flags & O_APPEND)) {
1468 error = XFS_ERROR(EBADF); 1465 error = -EBADF;
1469 goto out_put_tmp_file; 1466 goto out_put_tmp_file;
1470 } 1467 }
1471 1468
1472 if (IS_SWAPFILE(file_inode(f.file)) || 1469 if (IS_SWAPFILE(file_inode(f.file)) ||
1473 IS_SWAPFILE(file_inode(tmp.file))) { 1470 IS_SWAPFILE(file_inode(tmp.file))) {
1474 error = XFS_ERROR(EINVAL); 1471 error = -EINVAL;
1475 goto out_put_tmp_file; 1472 goto out_put_tmp_file;
1476 } 1473 }
1477 1474
@@ -1479,17 +1476,17 @@ xfs_ioc_swapext(
1479 tip = XFS_I(file_inode(tmp.file)); 1476 tip = XFS_I(file_inode(tmp.file));
1480 1477
1481 if (ip->i_mount != tip->i_mount) { 1478 if (ip->i_mount != tip->i_mount) {
1482 error = XFS_ERROR(EINVAL); 1479 error = -EINVAL;
1483 goto out_put_tmp_file; 1480 goto out_put_tmp_file;
1484 } 1481 }
1485 1482
1486 if (ip->i_ino == tip->i_ino) { 1483 if (ip->i_ino == tip->i_ino) {
1487 error = XFS_ERROR(EINVAL); 1484 error = -EINVAL;
1488 goto out_put_tmp_file; 1485 goto out_put_tmp_file;
1489 } 1486 }
1490 1487
1491 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1488 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1492 error = XFS_ERROR(EIO); 1489 error = -EIO;
1493 goto out_put_tmp_file; 1490 goto out_put_tmp_file;
1494 } 1491 }
1495 1492
@@ -1523,7 +1520,7 @@ xfs_file_ioctl(
1523 int error; 1520 int error;
1524 1521
1525 if (filp->f_mode & FMODE_NOCMTIME) 1522 if (filp->f_mode & FMODE_NOCMTIME)
1526 ioflags |= IO_INVIS; 1523 ioflags |= XFS_IO_INVIS;
1527 1524
1528 trace_xfs_file_ioctl(ip); 1525 trace_xfs_file_ioctl(ip);
1529 1526
@@ -1542,7 +1539,7 @@ xfs_file_ioctl(
1542 xfs_flock64_t bf; 1539 xfs_flock64_t bf;
1543 1540
1544 if (copy_from_user(&bf, arg, sizeof(bf))) 1541 if (copy_from_user(&bf, arg, sizeof(bf)))
1545 return -XFS_ERROR(EFAULT); 1542 return -EFAULT;
1546 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); 1543 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
1547 } 1544 }
1548 case XFS_IOC_DIOINFO: { 1545 case XFS_IOC_DIOINFO: {
@@ -1555,7 +1552,7 @@ xfs_file_ioctl(
1555 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); 1552 da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
1556 1553
1557 if (copy_to_user(arg, &da, sizeof(da))) 1554 if (copy_to_user(arg, &da, sizeof(da)))
1558 return -XFS_ERROR(EFAULT); 1555 return -EFAULT;
1559 return 0; 1556 return 0;
1560 } 1557 }
1561 1558
@@ -1588,7 +1585,7 @@ xfs_file_ioctl(
1588 struct fsdmidata dmi; 1585 struct fsdmidata dmi;
1589 1586
1590 if (copy_from_user(&dmi, arg, sizeof(dmi))) 1587 if (copy_from_user(&dmi, arg, sizeof(dmi)))
1591 return -XFS_ERROR(EFAULT); 1588 return -EFAULT;
1592 1589
1593 error = mnt_want_write_file(filp); 1590 error = mnt_want_write_file(filp);
1594 if (error) 1591 if (error)
@@ -1597,7 +1594,7 @@ xfs_file_ioctl(
1597 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, 1594 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
1598 dmi.fsd_dmstate); 1595 dmi.fsd_dmstate);
1599 mnt_drop_write_file(filp); 1596 mnt_drop_write_file(filp);
1600 return -error; 1597 return error;
1601 } 1598 }
1602 1599
1603 case XFS_IOC_GETBMAP: 1600 case XFS_IOC_GETBMAP:
@@ -1613,14 +1610,14 @@ xfs_file_ioctl(
1613 xfs_fsop_handlereq_t hreq; 1610 xfs_fsop_handlereq_t hreq;
1614 1611
1615 if (copy_from_user(&hreq, arg, sizeof(hreq))) 1612 if (copy_from_user(&hreq, arg, sizeof(hreq)))
1616 return -XFS_ERROR(EFAULT); 1613 return -EFAULT;
1617 return xfs_find_handle(cmd, &hreq); 1614 return xfs_find_handle(cmd, &hreq);
1618 } 1615 }
1619 case XFS_IOC_OPEN_BY_HANDLE: { 1616 case XFS_IOC_OPEN_BY_HANDLE: {
1620 xfs_fsop_handlereq_t hreq; 1617 xfs_fsop_handlereq_t hreq;
1621 1618
1622 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) 1619 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
1623 return -XFS_ERROR(EFAULT); 1620 return -EFAULT;
1624 return xfs_open_by_handle(filp, &hreq); 1621 return xfs_open_by_handle(filp, &hreq);
1625 } 1622 }
1626 case XFS_IOC_FSSETDM_BY_HANDLE: 1623 case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -1630,7 +1627,7 @@ xfs_file_ioctl(
1630 xfs_fsop_handlereq_t hreq; 1627 xfs_fsop_handlereq_t hreq;
1631 1628
1632 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) 1629 if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
1633 return -XFS_ERROR(EFAULT); 1630 return -EFAULT;
1634 return xfs_readlink_by_handle(filp, &hreq); 1631 return xfs_readlink_by_handle(filp, &hreq);
1635 } 1632 }
1636 case XFS_IOC_ATTRLIST_BY_HANDLE: 1633 case XFS_IOC_ATTRLIST_BY_HANDLE:
@@ -1643,13 +1640,13 @@ xfs_file_ioctl(
1643 struct xfs_swapext sxp; 1640 struct xfs_swapext sxp;
1644 1641
1645 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) 1642 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
1646 return -XFS_ERROR(EFAULT); 1643 return -EFAULT;
1647 error = mnt_want_write_file(filp); 1644 error = mnt_want_write_file(filp);
1648 if (error) 1645 if (error)
1649 return error; 1646 return error;
1650 error = xfs_ioc_swapext(&sxp); 1647 error = xfs_ioc_swapext(&sxp);
1651 mnt_drop_write_file(filp); 1648 mnt_drop_write_file(filp);
1652 return -error; 1649 return error;
1653 } 1650 }
1654 1651
1655 case XFS_IOC_FSCOUNTS: { 1652 case XFS_IOC_FSCOUNTS: {
@@ -1657,10 +1654,10 @@ xfs_file_ioctl(
1657 1654
1658 error = xfs_fs_counts(mp, &out); 1655 error = xfs_fs_counts(mp, &out);
1659 if (error) 1656 if (error)
1660 return -error; 1657 return error;
1661 1658
1662 if (copy_to_user(arg, &out, sizeof(out))) 1659 if (copy_to_user(arg, &out, sizeof(out)))
1663 return -XFS_ERROR(EFAULT); 1660 return -EFAULT;
1664 return 0; 1661 return 0;
1665 } 1662 }
1666 1663
@@ -1672,10 +1669,10 @@ xfs_file_ioctl(
1672 return -EPERM; 1669 return -EPERM;
1673 1670
1674 if (mp->m_flags & XFS_MOUNT_RDONLY) 1671 if (mp->m_flags & XFS_MOUNT_RDONLY)
1675 return -XFS_ERROR(EROFS); 1672 return -EROFS;
1676 1673
1677 if (copy_from_user(&inout, arg, sizeof(inout))) 1674 if (copy_from_user(&inout, arg, sizeof(inout)))
1678 return -XFS_ERROR(EFAULT); 1675 return -EFAULT;
1679 1676
1680 error = mnt_want_write_file(filp); 1677 error = mnt_want_write_file(filp);
1681 if (error) 1678 if (error)
@@ -1686,10 +1683,10 @@ xfs_file_ioctl(
1686 error = xfs_reserve_blocks(mp, &in, &inout); 1683 error = xfs_reserve_blocks(mp, &in, &inout);
1687 mnt_drop_write_file(filp); 1684 mnt_drop_write_file(filp);
1688 if (error) 1685 if (error)
1689 return -error; 1686 return error;
1690 1687
1691 if (copy_to_user(arg, &inout, sizeof(inout))) 1688 if (copy_to_user(arg, &inout, sizeof(inout)))
1692 return -XFS_ERROR(EFAULT); 1689 return -EFAULT;
1693 return 0; 1690 return 0;
1694 } 1691 }
1695 1692
@@ -1701,10 +1698,10 @@ xfs_file_ioctl(
1701 1698
1702 error = xfs_reserve_blocks(mp, NULL, &out); 1699 error = xfs_reserve_blocks(mp, NULL, &out);
1703 if (error) 1700 if (error)
1704 return -error; 1701 return error;
1705 1702
1706 if (copy_to_user(arg, &out, sizeof(out))) 1703 if (copy_to_user(arg, &out, sizeof(out)))
1707 return -XFS_ERROR(EFAULT); 1704 return -EFAULT;
1708 1705
1709 return 0; 1706 return 0;
1710 } 1707 }
@@ -1713,42 +1710,42 @@ xfs_file_ioctl(
1713 xfs_growfs_data_t in; 1710 xfs_growfs_data_t in;
1714 1711
1715 if (copy_from_user(&in, arg, sizeof(in))) 1712 if (copy_from_user(&in, arg, sizeof(in)))
1716 return -XFS_ERROR(EFAULT); 1713 return -EFAULT;
1717 1714
1718 error = mnt_want_write_file(filp); 1715 error = mnt_want_write_file(filp);
1719 if (error) 1716 if (error)
1720 return error; 1717 return error;
1721 error = xfs_growfs_data(mp, &in); 1718 error = xfs_growfs_data(mp, &in);
1722 mnt_drop_write_file(filp); 1719 mnt_drop_write_file(filp);
1723 return -error; 1720 return error;
1724 } 1721 }
1725 1722
1726 case XFS_IOC_FSGROWFSLOG: { 1723 case XFS_IOC_FSGROWFSLOG: {
1727 xfs_growfs_log_t in; 1724 xfs_growfs_log_t in;
1728 1725
1729 if (copy_from_user(&in, arg, sizeof(in))) 1726 if (copy_from_user(&in, arg, sizeof(in)))
1730 return -XFS_ERROR(EFAULT); 1727 return -EFAULT;
1731 1728
1732 error = mnt_want_write_file(filp); 1729 error = mnt_want_write_file(filp);
1733 if (error) 1730 if (error)
1734 return error; 1731 return error;
1735 error = xfs_growfs_log(mp, &in); 1732 error = xfs_growfs_log(mp, &in);
1736 mnt_drop_write_file(filp); 1733 mnt_drop_write_file(filp);
1737 return -error; 1734 return error;
1738 } 1735 }
1739 1736
1740 case XFS_IOC_FSGROWFSRT: { 1737 case XFS_IOC_FSGROWFSRT: {
1741 xfs_growfs_rt_t in; 1738 xfs_growfs_rt_t in;
1742 1739
1743 if (copy_from_user(&in, arg, sizeof(in))) 1740 if (copy_from_user(&in, arg, sizeof(in)))
1744 return -XFS_ERROR(EFAULT); 1741 return -EFAULT;
1745 1742
1746 error = mnt_want_write_file(filp); 1743 error = mnt_want_write_file(filp);
1747 if (error) 1744 if (error)
1748 return error; 1745 return error;
1749 error = xfs_growfs_rt(mp, &in); 1746 error = xfs_growfs_rt(mp, &in);
1750 mnt_drop_write_file(filp); 1747 mnt_drop_write_file(filp);
1751 return -error; 1748 return error;
1752 } 1749 }
1753 1750
1754 case XFS_IOC_GOINGDOWN: { 1751 case XFS_IOC_GOINGDOWN: {
@@ -1758,10 +1755,9 @@ xfs_file_ioctl(
1758 return -EPERM; 1755 return -EPERM;
1759 1756
1760 if (get_user(in, (__uint32_t __user *)arg)) 1757 if (get_user(in, (__uint32_t __user *)arg))
1761 return -XFS_ERROR(EFAULT); 1758 return -EFAULT;
1762 1759
1763 error = xfs_fs_goingdown(mp, in); 1760 return xfs_fs_goingdown(mp, in);
1764 return -error;
1765 } 1761 }
1766 1762
1767 case XFS_IOC_ERROR_INJECTION: { 1763 case XFS_IOC_ERROR_INJECTION: {
@@ -1771,18 +1767,16 @@ xfs_file_ioctl(
1771 return -EPERM; 1767 return -EPERM;
1772 1768
1773 if (copy_from_user(&in, arg, sizeof(in))) 1769 if (copy_from_user(&in, arg, sizeof(in)))
1774 return -XFS_ERROR(EFAULT); 1770 return -EFAULT;
1775 1771
1776 error = xfs_errortag_add(in.errtag, mp); 1772 return xfs_errortag_add(in.errtag, mp);
1777 return -error;
1778 } 1773 }
1779 1774
1780 case XFS_IOC_ERROR_CLEARALL: 1775 case XFS_IOC_ERROR_CLEARALL:
1781 if (!capable(CAP_SYS_ADMIN)) 1776 if (!capable(CAP_SYS_ADMIN))
1782 return -EPERM; 1777 return -EPERM;
1783 1778
1784 error = xfs_errortag_clearall(mp, 1); 1779 return xfs_errortag_clearall(mp, 1);
1785 return -error;
1786 1780
1787 case XFS_IOC_FREE_EOFBLOCKS: { 1781 case XFS_IOC_FREE_EOFBLOCKS: {
1788 struct xfs_fs_eofblocks eofb; 1782 struct xfs_fs_eofblocks eofb;
@@ -1792,16 +1786,16 @@ xfs_file_ioctl(
1792 return -EPERM; 1786 return -EPERM;
1793 1787
1794 if (mp->m_flags & XFS_MOUNT_RDONLY) 1788 if (mp->m_flags & XFS_MOUNT_RDONLY)
1795 return -XFS_ERROR(EROFS); 1789 return -EROFS;
1796 1790
1797 if (copy_from_user(&eofb, arg, sizeof(eofb))) 1791 if (copy_from_user(&eofb, arg, sizeof(eofb)))
1798 return -XFS_ERROR(EFAULT); 1792 return -EFAULT;
1799 1793
1800 error = xfs_fs_eofblocks_from_user(&eofb, &keofb); 1794 error = xfs_fs_eofblocks_from_user(&eofb, &keofb);
1801 if (error) 1795 if (error)
1802 return -error; 1796 return error;
1803 1797
1804 return -xfs_icache_free_eofblocks(mp, &keofb); 1798 return xfs_icache_free_eofblocks(mp, &keofb);
1805 } 1799 }
1806 1800
1807 default: 1801 default:
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 944d5baa710a..a554646ff141 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -28,7 +28,6 @@
28#include "xfs_sb.h" 28#include "xfs_sb.h"
29#include "xfs_ag.h" 29#include "xfs_ag.h"
30#include "xfs_mount.h" 30#include "xfs_mount.h"
31#include "xfs_vnode.h"
32#include "xfs_inode.h" 31#include "xfs_inode.h"
33#include "xfs_itable.h" 32#include "xfs_itable.h"
34#include "xfs_error.h" 33#include "xfs_error.h"
@@ -56,7 +55,7 @@ xfs_compat_flock64_copyin(
56 get_user(bf->l_sysid, &arg32->l_sysid) || 55 get_user(bf->l_sysid, &arg32->l_sysid) ||
57 get_user(bf->l_pid, &arg32->l_pid) || 56 get_user(bf->l_pid, &arg32->l_pid) ||
58 copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) 57 copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
59 return -XFS_ERROR(EFAULT); 58 return -EFAULT;
60 return 0; 59 return 0;
61} 60}
62 61
@@ -70,10 +69,10 @@ xfs_compat_ioc_fsgeometry_v1(
70 69
71 error = xfs_fs_geometry(mp, &fsgeo, 3); 70 error = xfs_fs_geometry(mp, &fsgeo, 3);
72 if (error) 71 if (error)
73 return -error; 72 return error;
74 /* The 32-bit variant simply has some padding at the end */ 73 /* The 32-bit variant simply has some padding at the end */
75 if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) 74 if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
76 return -XFS_ERROR(EFAULT); 75 return -EFAULT;
77 return 0; 76 return 0;
78} 77}
79 78
@@ -84,7 +83,7 @@ xfs_compat_growfs_data_copyin(
84{ 83{
85 if (get_user(in->newblocks, &arg32->newblocks) || 84 if (get_user(in->newblocks, &arg32->newblocks) ||
86 get_user(in->imaxpct, &arg32->imaxpct)) 85 get_user(in->imaxpct, &arg32->imaxpct))
87 return -XFS_ERROR(EFAULT); 86 return -EFAULT;
88 return 0; 87 return 0;
89} 88}
90 89
@@ -95,14 +94,14 @@ xfs_compat_growfs_rt_copyin(
95{ 94{
96 if (get_user(in->newblocks, &arg32->newblocks) || 95 if (get_user(in->newblocks, &arg32->newblocks) ||
97 get_user(in->extsize, &arg32->extsize)) 96 get_user(in->extsize, &arg32->extsize))
98 return -XFS_ERROR(EFAULT); 97 return -EFAULT;
99 return 0; 98 return 0;
100} 99}
101 100
102STATIC int 101STATIC int
103xfs_inumbers_fmt_compat( 102xfs_inumbers_fmt_compat(
104 void __user *ubuffer, 103 void __user *ubuffer,
105 const xfs_inogrp_t *buffer, 104 const struct xfs_inogrp *buffer,
106 long count, 105 long count,
107 long *written) 106 long *written)
108{ 107{
@@ -113,7 +112,7 @@ xfs_inumbers_fmt_compat(
113 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || 112 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
114 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || 113 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
115 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) 114 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
116 return -XFS_ERROR(EFAULT); 115 return -EFAULT;
117 } 116 }
118 *written = count * sizeof(*p32); 117 *written = count * sizeof(*p32);
119 return 0; 118 return 0;
@@ -132,7 +131,7 @@ xfs_ioctl32_bstime_copyin(
132 131
133 if (get_user(sec32, &bstime32->tv_sec) || 132 if (get_user(sec32, &bstime32->tv_sec) ||
134 get_user(bstime->tv_nsec, &bstime32->tv_nsec)) 133 get_user(bstime->tv_nsec, &bstime32->tv_nsec))
135 return -XFS_ERROR(EFAULT); 134 return -EFAULT;
136 bstime->tv_sec = sec32; 135 bstime->tv_sec = sec32;
137 return 0; 136 return 0;
138} 137}
@@ -164,7 +163,7 @@ xfs_ioctl32_bstat_copyin(
164 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || 163 get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
165 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || 164 get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
166 get_user(bstat->bs_aextents, &bstat32->bs_aextents)) 165 get_user(bstat->bs_aextents, &bstat32->bs_aextents))
167 return -XFS_ERROR(EFAULT); 166 return -EFAULT;
168 return 0; 167 return 0;
169} 168}
170 169
@@ -180,7 +179,7 @@ xfs_bstime_store_compat(
180 sec32 = p->tv_sec; 179 sec32 = p->tv_sec;
181 if (put_user(sec32, &p32->tv_sec) || 180 if (put_user(sec32, &p32->tv_sec) ||
182 put_user(p->tv_nsec, &p32->tv_nsec)) 181 put_user(p->tv_nsec, &p32->tv_nsec))
183 return -XFS_ERROR(EFAULT); 182 return -EFAULT;
184 return 0; 183 return 0;
185} 184}
186 185
@@ -195,7 +194,7 @@ xfs_bulkstat_one_fmt_compat(
195 compat_xfs_bstat_t __user *p32 = ubuffer; 194 compat_xfs_bstat_t __user *p32 = ubuffer;
196 195
197 if (ubsize < sizeof(*p32)) 196 if (ubsize < sizeof(*p32))
198 return XFS_ERROR(ENOMEM); 197 return -ENOMEM;
199 198
200 if (put_user(buffer->bs_ino, &p32->bs_ino) || 199 if (put_user(buffer->bs_ino, &p32->bs_ino) ||
201 put_user(buffer->bs_mode, &p32->bs_mode) || 200 put_user(buffer->bs_mode, &p32->bs_mode) ||
@@ -218,7 +217,7 @@ xfs_bulkstat_one_fmt_compat(
218 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || 217 put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
219 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 218 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
220 put_user(buffer->bs_aextents, &p32->bs_aextents)) 219 put_user(buffer->bs_aextents, &p32->bs_aextents))
221 return XFS_ERROR(EFAULT); 220 return -EFAULT;
222 if (ubused) 221 if (ubused)
223 *ubused = sizeof(*p32); 222 *ubused = sizeof(*p32);
224 return 0; 223 return 0;
@@ -256,30 +255,30 @@ xfs_compat_ioc_bulkstat(
256 /* should be called again (unused here, but used in dmapi) */ 255 /* should be called again (unused here, but used in dmapi) */
257 256
258 if (!capable(CAP_SYS_ADMIN)) 257 if (!capable(CAP_SYS_ADMIN))
259 return -XFS_ERROR(EPERM); 258 return -EPERM;
260 259
261 if (XFS_FORCED_SHUTDOWN(mp)) 260 if (XFS_FORCED_SHUTDOWN(mp))
262 return -XFS_ERROR(EIO); 261 return -EIO;
263 262
264 if (get_user(addr, &p32->lastip)) 263 if (get_user(addr, &p32->lastip))
265 return -XFS_ERROR(EFAULT); 264 return -EFAULT;
266 bulkreq.lastip = compat_ptr(addr); 265 bulkreq.lastip = compat_ptr(addr);
267 if (get_user(bulkreq.icount, &p32->icount) || 266 if (get_user(bulkreq.icount, &p32->icount) ||
268 get_user(addr, &p32->ubuffer)) 267 get_user(addr, &p32->ubuffer))
269 return -XFS_ERROR(EFAULT); 268 return -EFAULT;
270 bulkreq.ubuffer = compat_ptr(addr); 269 bulkreq.ubuffer = compat_ptr(addr);
271 if (get_user(addr, &p32->ocount)) 270 if (get_user(addr, &p32->ocount))
272 return -XFS_ERROR(EFAULT); 271 return -EFAULT;
273 bulkreq.ocount = compat_ptr(addr); 272 bulkreq.ocount = compat_ptr(addr);
274 273
275 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 274 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
276 return -XFS_ERROR(EFAULT); 275 return -EFAULT;
277 276
278 if ((count = bulkreq.icount) <= 0) 277 if ((count = bulkreq.icount) <= 0)
279 return -XFS_ERROR(EINVAL); 278 return -EINVAL;
280 279
281 if (bulkreq.ubuffer == NULL) 280 if (bulkreq.ubuffer == NULL)
282 return -XFS_ERROR(EINVAL); 281 return -EINVAL;
283 282
284 if (cmd == XFS_IOC_FSINUMBERS_32) { 283 if (cmd == XFS_IOC_FSINUMBERS_32) {
285 error = xfs_inumbers(mp, &inlast, &count, 284 error = xfs_inumbers(mp, &inlast, &count,
@@ -294,17 +293,17 @@ xfs_compat_ioc_bulkstat(
294 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), 293 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
295 bulkreq.ubuffer, &done); 294 bulkreq.ubuffer, &done);
296 } else 295 } else
297 error = XFS_ERROR(EINVAL); 296 error = -EINVAL;
298 if (error) 297 if (error)
299 return -error; 298 return error;
300 299
301 if (bulkreq.ocount != NULL) { 300 if (bulkreq.ocount != NULL) {
302 if (copy_to_user(bulkreq.lastip, &inlast, 301 if (copy_to_user(bulkreq.lastip, &inlast,
303 sizeof(xfs_ino_t))) 302 sizeof(xfs_ino_t)))
304 return -XFS_ERROR(EFAULT); 303 return -EFAULT;
305 304
306 if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 305 if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
307 return -XFS_ERROR(EFAULT); 306 return -EFAULT;
308 } 307 }
309 308
310 return 0; 309 return 0;
@@ -318,7 +317,7 @@ xfs_compat_handlereq_copyin(
318 compat_xfs_fsop_handlereq_t hreq32; 317 compat_xfs_fsop_handlereq_t hreq32;
319 318
320 if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) 319 if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
321 return -XFS_ERROR(EFAULT); 320 return -EFAULT;
322 321
323 hreq->fd = hreq32.fd; 322 hreq->fd = hreq32.fd;
324 hreq->path = compat_ptr(hreq32.path); 323 hreq->path = compat_ptr(hreq32.path);
@@ -352,19 +351,19 @@ xfs_compat_attrlist_by_handle(
352 char *kbuf; 351 char *kbuf;
353 352
354 if (!capable(CAP_SYS_ADMIN)) 353 if (!capable(CAP_SYS_ADMIN))
355 return -XFS_ERROR(EPERM); 354 return -EPERM;
356 if (copy_from_user(&al_hreq, arg, 355 if (copy_from_user(&al_hreq, arg,
357 sizeof(compat_xfs_fsop_attrlist_handlereq_t))) 356 sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
358 return -XFS_ERROR(EFAULT); 357 return -EFAULT;
359 if (al_hreq.buflen < sizeof(struct attrlist) || 358 if (al_hreq.buflen < sizeof(struct attrlist) ||
360 al_hreq.buflen > XATTR_LIST_MAX) 359 al_hreq.buflen > XATTR_LIST_MAX)
361 return -XFS_ERROR(EINVAL); 360 return -EINVAL;
362 361
363 /* 362 /*
364 * Reject flags, only allow namespaces. 363 * Reject flags, only allow namespaces.
365 */ 364 */
366 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) 365 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
367 return -XFS_ERROR(EINVAL); 366 return -EINVAL;
368 367
369 dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); 368 dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
370 if (IS_ERR(dentry)) 369 if (IS_ERR(dentry))
@@ -376,7 +375,7 @@ xfs_compat_attrlist_by_handle(
376 goto out_dput; 375 goto out_dput;
377 376
378 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; 377 cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
379 error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, 378 error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
380 al_hreq.flags, cursor); 379 al_hreq.flags, cursor);
381 if (error) 380 if (error)
382 goto out_kfree; 381 goto out_kfree;
@@ -404,10 +403,10 @@ xfs_compat_attrmulti_by_handle(
404 unsigned char *attr_name; 403 unsigned char *attr_name;
405 404
406 if (!capable(CAP_SYS_ADMIN)) 405 if (!capable(CAP_SYS_ADMIN))
407 return -XFS_ERROR(EPERM); 406 return -EPERM;
408 if (copy_from_user(&am_hreq, arg, 407 if (copy_from_user(&am_hreq, arg,
409 sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) 408 sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
410 return -XFS_ERROR(EFAULT); 409 return -EFAULT;
411 410
412 /* overflow check */ 411 /* overflow check */
413 if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) 412 if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
@@ -417,7 +416,7 @@ xfs_compat_attrmulti_by_handle(
417 if (IS_ERR(dentry)) 416 if (IS_ERR(dentry))
418 return PTR_ERR(dentry); 417 return PTR_ERR(dentry);
419 418
420 error = E2BIG; 419 error = -E2BIG;
421 size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); 420 size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
422 if (!size || size > 16 * PAGE_SIZE) 421 if (!size || size > 16 * PAGE_SIZE)
423 goto out_dput; 422 goto out_dput;
@@ -428,7 +427,7 @@ xfs_compat_attrmulti_by_handle(
428 goto out_dput; 427 goto out_dput;
429 } 428 }
430 429
431 error = ENOMEM; 430 error = -ENOMEM;
432 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 431 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
433 if (!attr_name) 432 if (!attr_name)
434 goto out_kfree_ops; 433 goto out_kfree_ops;
@@ -439,7 +438,7 @@ xfs_compat_attrmulti_by_handle(
439 compat_ptr(ops[i].am_attrname), 438 compat_ptr(ops[i].am_attrname),
440 MAXNAMELEN); 439 MAXNAMELEN);
441 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) 440 if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
442 error = ERANGE; 441 error = -ERANGE;
443 if (ops[i].am_error < 0) 442 if (ops[i].am_error < 0)
444 break; 443 break;
445 444
@@ -470,19 +469,19 @@ xfs_compat_attrmulti_by_handle(
470 mnt_drop_write_file(parfilp); 469 mnt_drop_write_file(parfilp);
471 break; 470 break;
472 default: 471 default:
473 ops[i].am_error = EINVAL; 472 ops[i].am_error = -EINVAL;
474 } 473 }
475 } 474 }
476 475
477 if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) 476 if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
478 error = XFS_ERROR(EFAULT); 477 error = -EFAULT;
479 478
480 kfree(attr_name); 479 kfree(attr_name);
481 out_kfree_ops: 480 out_kfree_ops:
482 kfree(ops); 481 kfree(ops);
483 out_dput: 482 out_dput:
484 dput(dentry); 483 dput(dentry);
485 return -error; 484 return error;
486} 485}
487 486
488STATIC int 487STATIC int
@@ -496,26 +495,26 @@ xfs_compat_fssetdm_by_handle(
496 struct dentry *dentry; 495 struct dentry *dentry;
497 496
498 if (!capable(CAP_MKNOD)) 497 if (!capable(CAP_MKNOD))
499 return -XFS_ERROR(EPERM); 498 return -EPERM;
500 if (copy_from_user(&dmhreq, arg, 499 if (copy_from_user(&dmhreq, arg,
501 sizeof(compat_xfs_fsop_setdm_handlereq_t))) 500 sizeof(compat_xfs_fsop_setdm_handlereq_t)))
502 return -XFS_ERROR(EFAULT); 501 return -EFAULT;
503 502
504 dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); 503 dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
505 if (IS_ERR(dentry)) 504 if (IS_ERR(dentry))
506 return PTR_ERR(dentry); 505 return PTR_ERR(dentry);
507 506
508 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { 507 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
509 error = -XFS_ERROR(EPERM); 508 error = -EPERM;
510 goto out; 509 goto out;
511 } 510 }
512 511
513 if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { 512 if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
514 error = -XFS_ERROR(EFAULT); 513 error = -EFAULT;
515 goto out; 514 goto out;
516 } 515 }
517 516
518 error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, 517 error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
519 fsd.fsd_dmstate); 518 fsd.fsd_dmstate);
520 519
521out: 520out:
@@ -537,7 +536,7 @@ xfs_file_compat_ioctl(
537 int error; 536 int error;
538 537
539 if (filp->f_mode & FMODE_NOCMTIME) 538 if (filp->f_mode & FMODE_NOCMTIME)
540 ioflags |= IO_INVIS; 539 ioflags |= XFS_IO_INVIS;
541 540
542 trace_xfs_file_compat_ioctl(ip); 541 trace_xfs_file_compat_ioctl(ip);
543 542
@@ -588,7 +587,7 @@ xfs_file_compat_ioctl(
588 struct xfs_flock64 bf; 587 struct xfs_flock64 bf;
589 588
590 if (xfs_compat_flock64_copyin(&bf, arg)) 589 if (xfs_compat_flock64_copyin(&bf, arg))
591 return -XFS_ERROR(EFAULT); 590 return -EFAULT;
592 cmd = _NATIVE_IOC(cmd, struct xfs_flock64); 591 cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
593 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); 592 return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
594 } 593 }
@@ -598,25 +597,25 @@ xfs_file_compat_ioctl(
598 struct xfs_growfs_data in; 597 struct xfs_growfs_data in;
599 598
600 if (xfs_compat_growfs_data_copyin(&in, arg)) 599 if (xfs_compat_growfs_data_copyin(&in, arg))
601 return -XFS_ERROR(EFAULT); 600 return -EFAULT;
602 error = mnt_want_write_file(filp); 601 error = mnt_want_write_file(filp);
603 if (error) 602 if (error)
604 return error; 603 return error;
605 error = xfs_growfs_data(mp, &in); 604 error = xfs_growfs_data(mp, &in);
606 mnt_drop_write_file(filp); 605 mnt_drop_write_file(filp);
607 return -error; 606 return error;
608 } 607 }
609 case XFS_IOC_FSGROWFSRT_32: { 608 case XFS_IOC_FSGROWFSRT_32: {
610 struct xfs_growfs_rt in; 609 struct xfs_growfs_rt in;
611 610
612 if (xfs_compat_growfs_rt_copyin(&in, arg)) 611 if (xfs_compat_growfs_rt_copyin(&in, arg))
613 return -XFS_ERROR(EFAULT); 612 return -EFAULT;
614 error = mnt_want_write_file(filp); 613 error = mnt_want_write_file(filp);
615 if (error) 614 if (error)
616 return error; 615 return error;
617 error = xfs_growfs_rt(mp, &in); 616 error = xfs_growfs_rt(mp, &in);
618 mnt_drop_write_file(filp); 617 mnt_drop_write_file(filp);
619 return -error; 618 return error;
620 } 619 }
621#endif 620#endif
622 /* long changes size, but xfs only copiese out 32 bits */ 621 /* long changes size, but xfs only copiese out 32 bits */
@@ -633,13 +632,13 @@ xfs_file_compat_ioctl(
633 if (copy_from_user(&sxp, sxu, 632 if (copy_from_user(&sxp, sxu,
634 offsetof(struct xfs_swapext, sx_stat)) || 633 offsetof(struct xfs_swapext, sx_stat)) ||
635 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) 634 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
636 return -XFS_ERROR(EFAULT); 635 return -EFAULT;
637 error = mnt_want_write_file(filp); 636 error = mnt_want_write_file(filp);
638 if (error) 637 if (error)
639 return error; 638 return error;
640 error = xfs_ioc_swapext(&sxp); 639 error = xfs_ioc_swapext(&sxp);
641 mnt_drop_write_file(filp); 640 mnt_drop_write_file(filp);
642 return -error; 641 return error;
643 } 642 }
644 case XFS_IOC_FSBULKSTAT_32: 643 case XFS_IOC_FSBULKSTAT_32:
645 case XFS_IOC_FSBULKSTAT_SINGLE_32: 644 case XFS_IOC_FSBULKSTAT_SINGLE_32:
@@ -651,7 +650,7 @@ xfs_file_compat_ioctl(
651 struct xfs_fsop_handlereq hreq; 650 struct xfs_fsop_handlereq hreq;
652 651
653 if (xfs_compat_handlereq_copyin(&hreq, arg)) 652 if (xfs_compat_handlereq_copyin(&hreq, arg))
654 return -XFS_ERROR(EFAULT); 653 return -EFAULT;
655 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); 654 cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
656 return xfs_find_handle(cmd, &hreq); 655 return xfs_find_handle(cmd, &hreq);
657 } 656 }
@@ -659,14 +658,14 @@ xfs_file_compat_ioctl(
659 struct xfs_fsop_handlereq hreq; 658 struct xfs_fsop_handlereq hreq;
660 659
661 if (xfs_compat_handlereq_copyin(&hreq, arg)) 660 if (xfs_compat_handlereq_copyin(&hreq, arg))
662 return -XFS_ERROR(EFAULT); 661 return -EFAULT;
663 return xfs_open_by_handle(filp, &hreq); 662 return xfs_open_by_handle(filp, &hreq);
664 } 663 }
665 case XFS_IOC_READLINK_BY_HANDLE_32: { 664 case XFS_IOC_READLINK_BY_HANDLE_32: {
666 struct xfs_fsop_handlereq hreq; 665 struct xfs_fsop_handlereq hreq;
667 666
668 if (xfs_compat_handlereq_copyin(&hreq, arg)) 667 if (xfs_compat_handlereq_copyin(&hreq, arg))
669 return -XFS_ERROR(EFAULT); 668 return -EFAULT;
670 return xfs_readlink_by_handle(filp, &hreq); 669 return xfs_readlink_by_handle(filp, &hreq);
671 } 670 }
672 case XFS_IOC_ATTRLIST_BY_HANDLE_32: 671 case XFS_IOC_ATTRLIST_BY_HANDLE_32:
@@ -676,6 +675,6 @@ xfs_file_compat_ioctl(
676 case XFS_IOC_FSSETDM_BY_HANDLE_32: 675 case XFS_IOC_FSSETDM_BY_HANDLE_32:
677 return xfs_compat_fssetdm_by_handle(filp, arg); 676 return xfs_compat_fssetdm_by_handle(filp, arg);
678 default: 677 default:
679 return -XFS_ERROR(ENOIOCTLCMD); 678 return -ENOIOCTLCMD;
680 } 679 }
681} 680}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6d3ec2b6ee29..e9c47b6f5e5a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -110,7 +110,7 @@ xfs_alert_fsblock_zero(
110 (unsigned long long)imap->br_startoff, 110 (unsigned long long)imap->br_startoff,
111 (unsigned long long)imap->br_blockcount, 111 (unsigned long long)imap->br_blockcount,
112 imap->br_state); 112 imap->br_state);
113 return EFSCORRUPTED; 113 return -EFSCORRUPTED;
114} 114}
115 115
116int 116int
@@ -138,7 +138,7 @@ xfs_iomap_write_direct(
138 138
139 error = xfs_qm_dqattach(ip, 0); 139 error = xfs_qm_dqattach(ip, 0);
140 if (error) 140 if (error)
141 return XFS_ERROR(error); 141 return error;
142 142
143 rt = XFS_IS_REALTIME_INODE(ip); 143 rt = XFS_IS_REALTIME_INODE(ip);
144 extsz = xfs_get_extsz_hint(ip); 144 extsz = xfs_get_extsz_hint(ip);
@@ -148,7 +148,7 @@ xfs_iomap_write_direct(
148 if ((offset + count) > XFS_ISIZE(ip)) { 148 if ((offset + count) > XFS_ISIZE(ip)) {
149 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 149 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
150 if (error) 150 if (error)
151 return XFS_ERROR(error); 151 return error;
152 } else { 152 } else {
153 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 153 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
154 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 154 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -188,7 +188,7 @@ xfs_iomap_write_direct(
188 */ 188 */
189 if (error) { 189 if (error) {
190 xfs_trans_cancel(tp, 0); 190 xfs_trans_cancel(tp, 0);
191 return XFS_ERROR(error); 191 return error;
192 } 192 }
193 193
194 xfs_ilock(ip, XFS_ILOCK_EXCL); 194 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -225,7 +225,7 @@ xfs_iomap_write_direct(
225 * Copy any maps to caller's array and return any error. 225 * Copy any maps to caller's array and return any error.
226 */ 226 */
227 if (nimaps == 0) { 227 if (nimaps == 0) {
228 error = XFS_ERROR(ENOSPC); 228 error = -ENOSPC;
229 goto out_unlock; 229 goto out_unlock;
230 } 230 }
231 231
@@ -397,7 +397,8 @@ xfs_quota_calc_throttle(
397 struct xfs_inode *ip, 397 struct xfs_inode *ip,
398 int type, 398 int type,
399 xfs_fsblock_t *qblocks, 399 xfs_fsblock_t *qblocks,
400 int *qshift) 400 int *qshift,
401 int64_t *qfreesp)
401{ 402{
402 int64_t freesp; 403 int64_t freesp;
403 int shift = 0; 404 int shift = 0;
@@ -406,6 +407,7 @@ xfs_quota_calc_throttle(
406 /* over hi wmark, squash the prealloc completely */ 407 /* over hi wmark, squash the prealloc completely */
407 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { 408 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
408 *qblocks = 0; 409 *qblocks = 0;
410 *qfreesp = 0;
409 return; 411 return;
410 } 412 }
411 413
@@ -418,6 +420,9 @@ xfs_quota_calc_throttle(
418 shift += 2; 420 shift += 2;
419 } 421 }
420 422
423 if (freesp < *qfreesp)
424 *qfreesp = freesp;
425
421 /* only overwrite the throttle values if we are more aggressive */ 426 /* only overwrite the throttle values if we are more aggressive */
422 if ((freesp >> shift) < (*qblocks >> *qshift)) { 427 if ((freesp >> shift) < (*qblocks >> *qshift)) {
423 *qblocks = freesp; 428 *qblocks = freesp;
@@ -476,15 +481,18 @@ xfs_iomap_prealloc_size(
476 } 481 }
477 482
478 /* 483 /*
479 * Check each quota to cap the prealloc size and provide a shift 484 * Check each quota to cap the prealloc size, provide a shift value to
480 * value to throttle with. 485 * throttle with and adjust amount of available space.
481 */ 486 */
482 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) 487 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
483 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); 488 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
489 &freesp);
484 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) 490 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
485 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); 491 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
492 &freesp);
486 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) 493 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
487 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); 494 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
495 &freesp);
488 496
489 /* 497 /*
490 * The final prealloc size is set to the minimum of free space available 498 * The final prealloc size is set to the minimum of free space available
@@ -552,7 +560,7 @@ xfs_iomap_write_delay(
552 */ 560 */
553 error = xfs_qm_dqattach_locked(ip, 0); 561 error = xfs_qm_dqattach_locked(ip, 0);
554 if (error) 562 if (error)
555 return XFS_ERROR(error); 563 return error;
556 564
557 extsz = xfs_get_extsz_hint(ip); 565 extsz = xfs_get_extsz_hint(ip);
558 offset_fsb = XFS_B_TO_FSBT(mp, offset); 566 offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -596,11 +604,11 @@ retry:
596 imap, &nimaps, XFS_BMAPI_ENTIRE); 604 imap, &nimaps, XFS_BMAPI_ENTIRE);
597 switch (error) { 605 switch (error) {
598 case 0: 606 case 0:
599 case ENOSPC: 607 case -ENOSPC:
600 case EDQUOT: 608 case -EDQUOT:
601 break; 609 break;
602 default: 610 default:
603 return XFS_ERROR(error); 611 return error;
604 } 612 }
605 613
606 /* 614 /*
@@ -614,7 +622,7 @@ retry:
614 error = 0; 622 error = 0;
615 goto retry; 623 goto retry;
616 } 624 }
617 return XFS_ERROR(error ? error : ENOSPC); 625 return error ? error : -ENOSPC;
618 } 626 }
619 627
620 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) 628 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
@@ -663,7 +671,7 @@ xfs_iomap_write_allocate(
663 */ 671 */
664 error = xfs_qm_dqattach(ip, 0); 672 error = xfs_qm_dqattach(ip, 0);
665 if (error) 673 if (error)
666 return XFS_ERROR(error); 674 return error;
667 675
668 offset_fsb = XFS_B_TO_FSBT(mp, offset); 676 offset_fsb = XFS_B_TO_FSBT(mp, offset);
669 count_fsb = imap->br_blockcount; 677 count_fsb = imap->br_blockcount;
@@ -690,7 +698,7 @@ xfs_iomap_write_allocate(
690 nres, 0); 698 nres, 0);
691 if (error) { 699 if (error) {
692 xfs_trans_cancel(tp, 0); 700 xfs_trans_cancel(tp, 0);
693 return XFS_ERROR(error); 701 return error;
694 } 702 }
695 xfs_ilock(ip, XFS_ILOCK_EXCL); 703 xfs_ilock(ip, XFS_ILOCK_EXCL);
696 xfs_trans_ijoin(tp, ip, 0); 704 xfs_trans_ijoin(tp, ip, 0);
@@ -739,7 +747,7 @@ xfs_iomap_write_allocate(
739 if ((map_start_fsb + count_fsb) > last_block) { 747 if ((map_start_fsb + count_fsb) > last_block) {
740 count_fsb = last_block - map_start_fsb; 748 count_fsb = last_block - map_start_fsb;
741 if (count_fsb == 0) { 749 if (count_fsb == 0) {
742 error = EAGAIN; 750 error = -EAGAIN;
743 goto trans_cancel; 751 goto trans_cancel;
744 } 752 }
745 } 753 }
@@ -793,7 +801,7 @@ trans_cancel:
793 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 801 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
794error0: 802error0:
795 xfs_iunlock(ip, XFS_ILOCK_EXCL); 803 xfs_iunlock(ip, XFS_ILOCK_EXCL);
796 return XFS_ERROR(error); 804 return error;
797} 805}
798 806
799int 807int
@@ -853,7 +861,7 @@ xfs_iomap_write_unwritten(
853 resblks, 0); 861 resblks, 0);
854 if (error) { 862 if (error) {
855 xfs_trans_cancel(tp, 0); 863 xfs_trans_cancel(tp, 0);
856 return XFS_ERROR(error); 864 return error;
857 } 865 }
858 866
859 xfs_ilock(ip, XFS_ILOCK_EXCL); 867 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -892,7 +900,7 @@ xfs_iomap_write_unwritten(
892 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 900 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
893 xfs_iunlock(ip, XFS_ILOCK_EXCL); 901 xfs_iunlock(ip, XFS_ILOCK_EXCL);
894 if (error) 902 if (error)
895 return XFS_ERROR(error); 903 return error;
896 904
897 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 905 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
898 return xfs_alert_fsblock_zero(ip, &imap); 906 return xfs_alert_fsblock_zero(ip, &imap);
@@ -915,5 +923,5 @@ error_on_bmapi_transaction:
915 xfs_bmap_cancel(&free_list); 923 xfs_bmap_cancel(&free_list);
916 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); 924 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
917 xfs_iunlock(ip, XFS_ILOCK_EXCL); 925 xfs_iunlock(ip, XFS_ILOCK_EXCL);
918 return XFS_ERROR(error); 926 return error;
919} 927}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 205613a06068..72129493e9d3 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -72,7 +72,7 @@ xfs_initxattrs(
72 int error = 0; 72 int error = 0;
73 73
74 for (xattr = xattr_array; xattr->name != NULL; xattr++) { 74 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
75 error = -xfs_attr_set(ip, xattr->name, xattr->value, 75 error = xfs_attr_set(ip, xattr->name, xattr->value,
76 xattr->value_len, ATTR_SECURE); 76 xattr->value_len, ATTR_SECURE);
77 if (error < 0) 77 if (error < 0)
78 break; 78 break;
@@ -93,7 +93,7 @@ xfs_init_security(
93 struct inode *dir, 93 struct inode *dir,
94 const struct qstr *qstr) 94 const struct qstr *qstr)
95{ 95{
96 return -security_inode_init_security(inode, dir, qstr, 96 return security_inode_init_security(inode, dir, qstr,
97 &xfs_initxattrs, NULL); 97 &xfs_initxattrs, NULL);
98} 98}
99 99
@@ -173,12 +173,12 @@ xfs_generic_create(
173 173
174#ifdef CONFIG_XFS_POSIX_ACL 174#ifdef CONFIG_XFS_POSIX_ACL
175 if (default_acl) { 175 if (default_acl) {
176 error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 176 error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
177 if (error) 177 if (error)
178 goto out_cleanup_inode; 178 goto out_cleanup_inode;
179 } 179 }
180 if (acl) { 180 if (acl) {
181 error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); 181 error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
182 if (error) 182 if (error)
183 goto out_cleanup_inode; 183 goto out_cleanup_inode;
184 } 184 }
@@ -194,7 +194,7 @@ xfs_generic_create(
194 posix_acl_release(default_acl); 194 posix_acl_release(default_acl);
195 if (acl) 195 if (acl)
196 posix_acl_release(acl); 196 posix_acl_release(acl);
197 return -error; 197 return error;
198 198
199 out_cleanup_inode: 199 out_cleanup_inode:
200 if (!tmpfile) 200 if (!tmpfile)
@@ -248,8 +248,8 @@ xfs_vn_lookup(
248 xfs_dentry_to_name(&name, dentry, 0); 248 xfs_dentry_to_name(&name, dentry, 0);
249 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); 249 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
250 if (unlikely(error)) { 250 if (unlikely(error)) {
251 if (unlikely(error != ENOENT)) 251 if (unlikely(error != -ENOENT))
252 return ERR_PTR(-error); 252 return ERR_PTR(error);
253 d_add(dentry, NULL); 253 d_add(dentry, NULL);
254 return NULL; 254 return NULL;
255 } 255 }
@@ -275,8 +275,8 @@ xfs_vn_ci_lookup(
275 xfs_dentry_to_name(&xname, dentry, 0); 275 xfs_dentry_to_name(&xname, dentry, 0);
276 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); 276 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
277 if (unlikely(error)) { 277 if (unlikely(error)) {
278 if (unlikely(error != ENOENT)) 278 if (unlikely(error != -ENOENT))
279 return ERR_PTR(-error); 279 return ERR_PTR(error);
280 /* 280 /*
281 * call d_add(dentry, NULL) here when d_drop_negative_children 281 * call d_add(dentry, NULL) here when d_drop_negative_children
282 * is called in xfs_vn_mknod (ie. allow negative dentries 282 * is called in xfs_vn_mknod (ie. allow negative dentries
@@ -311,7 +311,7 @@ xfs_vn_link(
311 311
312 error = xfs_link(XFS_I(dir), XFS_I(inode), &name); 312 error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
313 if (unlikely(error)) 313 if (unlikely(error))
314 return -error; 314 return error;
315 315
316 ihold(inode); 316 ihold(inode);
317 d_instantiate(dentry, inode); 317 d_instantiate(dentry, inode);
@@ -328,7 +328,7 @@ xfs_vn_unlink(
328 328
329 xfs_dentry_to_name(&name, dentry, 0); 329 xfs_dentry_to_name(&name, dentry, 0);
330 330
331 error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); 331 error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
332 if (error) 332 if (error)
333 return error; 333 return error;
334 334
@@ -375,7 +375,7 @@ xfs_vn_symlink(
375 xfs_cleanup_inode(dir, inode, dentry); 375 xfs_cleanup_inode(dir, inode, dentry);
376 iput(inode); 376 iput(inode);
377 out: 377 out:
378 return -error; 378 return error;
379} 379}
380 380
381STATIC int 381STATIC int
@@ -392,8 +392,8 @@ xfs_vn_rename(
392 xfs_dentry_to_name(&oname, odentry, 0); 392 xfs_dentry_to_name(&oname, odentry, 0);
393 xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode); 393 xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
394 394
395 return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 395 return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
396 XFS_I(ndir), &nname, new_inode ? 396 XFS_I(ndir), &nname, new_inode ?
397 XFS_I(new_inode) : NULL); 397 XFS_I(new_inode) : NULL);
398} 398}
399 399
@@ -414,7 +414,7 @@ xfs_vn_follow_link(
414 if (!link) 414 if (!link)
415 goto out_err; 415 goto out_err;
416 416
417 error = -xfs_readlink(XFS_I(dentry->d_inode), link); 417 error = xfs_readlink(XFS_I(dentry->d_inode), link);
418 if (unlikely(error)) 418 if (unlikely(error))
419 goto out_kfree; 419 goto out_kfree;
420 420
@@ -441,7 +441,7 @@ xfs_vn_getattr(
441 trace_xfs_getattr(ip); 441 trace_xfs_getattr(ip);
442 442
443 if (XFS_FORCED_SHUTDOWN(mp)) 443 if (XFS_FORCED_SHUTDOWN(mp))
444 return -XFS_ERROR(EIO); 444 return -EIO;
445 445
446 stat->size = XFS_ISIZE(ip); 446 stat->size = XFS_ISIZE(ip);
447 stat->dev = inode->i_sb->s_dev; 447 stat->dev = inode->i_sb->s_dev;
@@ -546,14 +546,14 @@ xfs_setattr_nonsize(
546 /* If acls are being inherited, we already have this checked */ 546 /* If acls are being inherited, we already have this checked */
547 if (!(flags & XFS_ATTR_NOACL)) { 547 if (!(flags & XFS_ATTR_NOACL)) {
548 if (mp->m_flags & XFS_MOUNT_RDONLY) 548 if (mp->m_flags & XFS_MOUNT_RDONLY)
549 return XFS_ERROR(EROFS); 549 return -EROFS;
550 550
551 if (XFS_FORCED_SHUTDOWN(mp)) 551 if (XFS_FORCED_SHUTDOWN(mp))
552 return XFS_ERROR(EIO); 552 return -EIO;
553 553
554 error = -inode_change_ok(inode, iattr); 554 error = inode_change_ok(inode, iattr);
555 if (error) 555 if (error)
556 return XFS_ERROR(error); 556 return error;
557 } 557 }
558 558
559 ASSERT((mask & ATTR_SIZE) == 0); 559 ASSERT((mask & ATTR_SIZE) == 0);
@@ -703,7 +703,7 @@ xfs_setattr_nonsize(
703 xfs_qm_dqrele(gdqp); 703 xfs_qm_dqrele(gdqp);
704 704
705 if (error) 705 if (error)
706 return XFS_ERROR(error); 706 return error;
707 707
708 /* 708 /*
709 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode 709 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
@@ -713,9 +713,9 @@ xfs_setattr_nonsize(
713 * Posix ACL code seems to care about this issue either. 713 * Posix ACL code seems to care about this issue either.
714 */ 714 */
715 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { 715 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
716 error = -posix_acl_chmod(inode, inode->i_mode); 716 error = posix_acl_chmod(inode, inode->i_mode);
717 if (error) 717 if (error)
718 return XFS_ERROR(error); 718 return error;
719 } 719 }
720 720
721 return 0; 721 return 0;
@@ -748,14 +748,14 @@ xfs_setattr_size(
748 trace_xfs_setattr(ip); 748 trace_xfs_setattr(ip);
749 749
750 if (mp->m_flags & XFS_MOUNT_RDONLY) 750 if (mp->m_flags & XFS_MOUNT_RDONLY)
751 return XFS_ERROR(EROFS); 751 return -EROFS;
752 752
753 if (XFS_FORCED_SHUTDOWN(mp)) 753 if (XFS_FORCED_SHUTDOWN(mp))
754 return XFS_ERROR(EIO); 754 return -EIO;
755 755
756 error = -inode_change_ok(inode, iattr); 756 error = inode_change_ok(inode, iattr);
757 if (error) 757 if (error)
758 return XFS_ERROR(error); 758 return error;
759 759
760 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 760 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
761 ASSERT(S_ISREG(ip->i_d.di_mode)); 761 ASSERT(S_ISREG(ip->i_d.di_mode));
@@ -818,7 +818,7 @@ xfs_setattr_size(
818 * care about here. 818 * care about here.
819 */ 819 */
820 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { 820 if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
821 error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 821 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
822 ip->i_d.di_size, newsize); 822 ip->i_d.di_size, newsize);
823 if (error) 823 if (error)
824 return error; 824 return error;
@@ -844,7 +844,7 @@ xfs_setattr_size(
844 * much we can do about this, except to hope that the caller sees ENOMEM 844 * much we can do about this, except to hope that the caller sees ENOMEM
845 * and retries the truncate operation. 845 * and retries the truncate operation.
846 */ 846 */
847 error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 847 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
848 if (error) 848 if (error)
849 return error; 849 return error;
850 truncate_setsize(inode, newsize); 850 truncate_setsize(inode, newsize);
@@ -950,7 +950,7 @@ xfs_vn_setattr(
950 error = xfs_setattr_nonsize(ip, iattr, 0); 950 error = xfs_setattr_nonsize(ip, iattr, 0);
951 } 951 }
952 952
953 return -error; 953 return error;
954} 954}
955 955
956STATIC int 956STATIC int
@@ -970,7 +970,7 @@ xfs_vn_update_time(
970 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 970 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
971 if (error) { 971 if (error) {
972 xfs_trans_cancel(tp, 0); 972 xfs_trans_cancel(tp, 0);
973 return -error; 973 return error;
974 } 974 }
975 975
976 xfs_ilock(ip, XFS_ILOCK_EXCL); 976 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -991,7 +991,7 @@ xfs_vn_update_time(
991 } 991 }
992 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 992 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
993 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 993 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
994 return -xfs_trans_commit(tp, 0); 994 return xfs_trans_commit(tp, 0);
995} 995}
996 996
997#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 997#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -1036,7 +1036,7 @@ xfs_fiemap_format(
1036 *full = 1; /* user array now full */ 1036 *full = 1; /* user array now full */
1037 } 1037 }
1038 1038
1039 return -error; 1039 return error;
1040} 1040}
1041 1041
1042STATIC int 1042STATIC int
@@ -1055,12 +1055,12 @@ xfs_vn_fiemap(
1055 return error; 1055 return error;
1056 1056
1057 /* Set up bmap header for xfs internal routine */ 1057 /* Set up bmap header for xfs internal routine */
1058 bm.bmv_offset = BTOBB(start); 1058 bm.bmv_offset = BTOBBT(start);
1059 /* Special case for whole file */ 1059 /* Special case for whole file */
1060 if (length == FIEMAP_MAX_OFFSET) 1060 if (length == FIEMAP_MAX_OFFSET)
1061 bm.bmv_length = -1LL; 1061 bm.bmv_length = -1LL;
1062 else 1062 else
1063 bm.bmv_length = BTOBB(length); 1063 bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
1064 1064
1065 /* We add one because in getbmap world count includes the header */ 1065 /* We add one because in getbmap world count includes the header */
1066 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : 1066 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
@@ -1075,7 +1075,7 @@ xfs_vn_fiemap(
1075 1075
1076 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); 1076 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
1077 if (error) 1077 if (error)
1078 return -error; 1078 return error;
1079 1079
1080 return 0; 1080 return 0;
1081} 1081}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index cb64f222d607..f71be9c68017 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -67,19 +67,17 @@ xfs_bulkstat_one_int(
67 *stat = BULKSTAT_RV_NOTHING; 67 *stat = BULKSTAT_RV_NOTHING;
68 68
69 if (!buffer || xfs_internal_inum(mp, ino)) 69 if (!buffer || xfs_internal_inum(mp, ino))
70 return XFS_ERROR(EINVAL); 70 return -EINVAL;
71 71
72 buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); 72 buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
73 if (!buf) 73 if (!buf)
74 return XFS_ERROR(ENOMEM); 74 return -ENOMEM;
75 75
76 error = xfs_iget(mp, NULL, ino, 76 error = xfs_iget(mp, NULL, ino,
77 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), 77 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
78 XFS_ILOCK_SHARED, &ip); 78 XFS_ILOCK_SHARED, &ip);
79 if (error) { 79 if (error)
80 *stat = BULKSTAT_RV_NOTHING;
81 goto out_free; 80 goto out_free;
82 }
83 81
84 ASSERT(ip != NULL); 82 ASSERT(ip != NULL);
85 ASSERT(ip->i_imap.im_blkno != 0); 83 ASSERT(ip->i_imap.im_blkno != 0);
@@ -136,7 +134,6 @@ xfs_bulkstat_one_int(
136 IRELE(ip); 134 IRELE(ip);
137 135
138 error = formatter(buffer, ubsize, ubused, buf); 136 error = formatter(buffer, ubsize, ubused, buf);
139
140 if (!error) 137 if (!error)
141 *stat = BULKSTAT_RV_DIDONE; 138 *stat = BULKSTAT_RV_DIDONE;
142 139
@@ -154,9 +151,9 @@ xfs_bulkstat_one_fmt(
154 const xfs_bstat_t *buffer) 151 const xfs_bstat_t *buffer)
155{ 152{
156 if (ubsize < sizeof(*buffer)) 153 if (ubsize < sizeof(*buffer))
157 return XFS_ERROR(ENOMEM); 154 return -ENOMEM;
158 if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) 155 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
159 return XFS_ERROR(EFAULT); 156 return -EFAULT;
160 if (ubused) 157 if (ubused)
161 *ubused = sizeof(*buffer); 158 *ubused = sizeof(*buffer);
162 return 0; 159 return 0;
@@ -175,9 +172,170 @@ xfs_bulkstat_one(
175 xfs_bulkstat_one_fmt, ubused, stat); 172 xfs_bulkstat_one_fmt, ubused, stat);
176} 173}
177 174
175/*
176 * Loop over all clusters in a chunk for a given incore inode allocation btree
177 * record. Do a readahead if there are any allocated inodes in that cluster.
178 */
179STATIC void
180xfs_bulkstat_ichunk_ra(
181 struct xfs_mount *mp,
182 xfs_agnumber_t agno,
183 struct xfs_inobt_rec_incore *irec)
184{
185 xfs_agblock_t agbno;
186 struct blk_plug plug;
187 int blks_per_cluster;
188 int inodes_per_cluster;
189 int i; /* inode chunk index */
190
191 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
192 blks_per_cluster = xfs_icluster_size_fsb(mp);
193 inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
194
195 blk_start_plug(&plug);
196 for (i = 0; i < XFS_INODES_PER_CHUNK;
197 i += inodes_per_cluster, agbno += blks_per_cluster) {
198 if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) {
199 xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster,
200 &xfs_inode_buf_ops);
201 }
202 }
203 blk_finish_plug(&plug);
204}
205
206/*
207 * Lookup the inode chunk that the given inode lives in and then get the record
208 * if we found the chunk. If the inode was not the last in the chunk and there
209 * are some left allocated, update the data for the pointed-to record as well as
210 * return the count of grabbed inodes.
211 */
212STATIC int
213xfs_bulkstat_grab_ichunk(
214 struct xfs_btree_cur *cur, /* btree cursor */
215 xfs_agino_t agino, /* starting inode of chunk */
216 int *icount,/* return # of inodes grabbed */
217 struct xfs_inobt_rec_incore *irec) /* btree record */
218{
219 int idx; /* index into inode chunk */
220 int stat;
221 int error = 0;
222
223 /* Lookup the inode chunk that this inode lives in */
224 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
225 if (error)
226 return error;
227 if (!stat) {
228 *icount = 0;
229 return error;
230 }
231
232 /* Get the record, should always work */
233 error = xfs_inobt_get_rec(cur, irec, &stat);
234 if (error)
235 return error;
236 XFS_WANT_CORRUPTED_RETURN(stat == 1);
237
238 /* Check if the record contains the inode in request */
239 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
240 return -EINVAL;
241
242 idx = agino - irec->ir_startino + 1;
243 if (idx < XFS_INODES_PER_CHUNK &&
244 (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
245 int i;
246
247 /* We got a right chunk with some left inodes allocated at it.
248 * Grab the chunk record. Mark all the uninteresting inodes
249 * free -- because they're before our start point.
250 */
251 for (i = 0; i < idx; i++) {
252 if (XFS_INOBT_MASK(i) & ~irec->ir_free)
253 irec->ir_freecount++;
254 }
255
256 irec->ir_free |= xfs_inobt_maskn(0, idx);
257 *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount;
258 }
259
260 return 0;
261}
262
178#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 263#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
179 264
180/* 265/*
266 * Process inodes in chunk with a pointer to a formatter function
267 * that will iget the inode and fill in the appropriate structure.
268 */
269int
270xfs_bulkstat_ag_ichunk(
271 struct xfs_mount *mp,
272 xfs_agnumber_t agno,
273 struct xfs_inobt_rec_incore *irbp,
274 bulkstat_one_pf formatter,
275 size_t statstruct_size,
276 struct xfs_bulkstat_agichunk *acp)
277{
278 xfs_ino_t lastino = acp->ac_lastino;
279 char __user **ubufp = acp->ac_ubuffer;
280 int ubleft = acp->ac_ubleft;
281 int ubelem = acp->ac_ubelem;
282 int chunkidx, clustidx;
283 int error = 0;
284 xfs_agino_t agino;
285
286 for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
287 XFS_BULKSTAT_UBLEFT(ubleft) &&
288 irbp->ir_freecount < XFS_INODES_PER_CHUNK;
289 chunkidx++, clustidx++, agino++) {
290 int fmterror; /* bulkstat formatter result */
291 int ubused;
292 xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
293
294 ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
295
296 /* Skip if this inode is free */
297 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
298 lastino = ino;
299 continue;
300 }
301
302 /*
303 * Count used inodes as free so we can tell when the
304 * chunk is used up.
305 */
306 irbp->ir_freecount++;
307
308 /* Get the inode and fill in a single buffer */
309 ubused = statstruct_size;
310 error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
311 if (fmterror == BULKSTAT_RV_NOTHING) {
312 if (error && error != -ENOENT && error != -EINVAL) {
313 ubleft = 0;
314 break;
315 }
316 lastino = ino;
317 continue;
318 }
319 if (fmterror == BULKSTAT_RV_GIVEUP) {
320 ubleft = 0;
321 ASSERT(error);
322 break;
323 }
324 if (*ubufp)
325 *ubufp += ubused;
326 ubleft -= ubused;
327 ubelem++;
328 lastino = ino;
329 }
330
331 acp->ac_lastino = lastino;
332 acp->ac_ubleft = ubleft;
333 acp->ac_ubelem = ubelem;
334
335 return error;
336}
337
338/*
181 * Return stat information in bulk (by-inode) for the filesystem. 339 * Return stat information in bulk (by-inode) for the filesystem.
182 */ 340 */
183int /* error status */ 341int /* error status */
@@ -190,13 +348,10 @@ xfs_bulkstat(
190 char __user *ubuffer, /* buffer with inode stats */ 348 char __user *ubuffer, /* buffer with inode stats */
191 int *done) /* 1 if there are more stats to get */ 349 int *done) /* 1 if there are more stats to get */
192{ 350{
193 xfs_agblock_t agbno=0;/* allocation group block number */
194 xfs_buf_t *agbp; /* agi header buffer */ 351 xfs_buf_t *agbp; /* agi header buffer */
195 xfs_agi_t *agi; /* agi header data */ 352 xfs_agi_t *agi; /* agi header data */
196 xfs_agino_t agino; /* inode # in allocation group */ 353 xfs_agino_t agino; /* inode # in allocation group */
197 xfs_agnumber_t agno; /* allocation group number */ 354 xfs_agnumber_t agno; /* allocation group number */
198 int chunkidx; /* current index into inode chunk */
199 int clustidx; /* current index into inode cluster */
200 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 355 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
201 int end_of_ag; /* set if we've seen the ag end */ 356 int end_of_ag; /* set if we've seen the ag end */
202 int error; /* error code */ 357 int error; /* error code */
@@ -209,8 +364,6 @@ xfs_bulkstat(
209 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ 364 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
210 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ 365 xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
211 xfs_ino_t lastino; /* last inode number returned */ 366 xfs_ino_t lastino; /* last inode number returned */
212 int blks_per_cluster; /* # of blocks per cluster */
213 int inodes_per_cluster;/* # of inodes per cluster */
214 int nirbuf; /* size of irbuf */ 367 int nirbuf; /* size of irbuf */
215 int rval; /* return value error code */ 368 int rval; /* return value error code */
216 int tmp; /* result value from btree calls */ 369 int tmp; /* result value from btree calls */
@@ -218,7 +371,6 @@ xfs_bulkstat(
218 int ubleft; /* bytes left in user's buffer */ 371 int ubleft; /* bytes left in user's buffer */
219 char __user *ubufp; /* pointer into user's buffer */ 372 char __user *ubufp; /* pointer into user's buffer */
220 int ubelem; /* spaces used in user's buffer */ 373 int ubelem; /* spaces used in user's buffer */
221 int ubused; /* bytes used by formatter */
222 374
223 /* 375 /*
224 * Get the last inode value, see if there's nothing to do. 376 * Get the last inode value, see if there's nothing to do.
@@ -233,20 +385,16 @@ xfs_bulkstat(
233 *ubcountp = 0; 385 *ubcountp = 0;
234 return 0; 386 return 0;
235 } 387 }
236 if (!ubcountp || *ubcountp <= 0) { 388
237 return EINVAL;
238 }
239 ubcount = *ubcountp; /* statstruct's */ 389 ubcount = *ubcountp; /* statstruct's */
240 ubleft = ubcount * statstruct_size; /* bytes */ 390 ubleft = ubcount * statstruct_size; /* bytes */
241 *ubcountp = ubelem = 0; 391 *ubcountp = ubelem = 0;
242 *done = 0; 392 *done = 0;
243 fmterror = 0; 393 fmterror = 0;
244 ubufp = ubuffer; 394 ubufp = ubuffer;
245 blks_per_cluster = xfs_icluster_size_fsb(mp);
246 inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
247 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); 395 irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
248 if (!irbuf) 396 if (!irbuf)
249 return ENOMEM; 397 return -ENOMEM;
250 398
251 nirbuf = irbsize / sizeof(*irbuf); 399 nirbuf = irbsize / sizeof(*irbuf);
252 400
@@ -258,14 +406,8 @@ xfs_bulkstat(
258 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { 406 while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
259 cond_resched(); 407 cond_resched();
260 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 408 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
261 if (error) { 409 if (error)
262 /* 410 break;
263 * Skip this allocation group and go to the next one.
264 */
265 agno++;
266 agino = 0;
267 continue;
268 }
269 agi = XFS_BUF_TO_AGI(agbp); 411 agi = XFS_BUF_TO_AGI(agbp);
270 /* 412 /*
271 * Allocate and initialize a btree cursor for ialloc btree. 413 * Allocate and initialize a btree cursor for ialloc btree.
@@ -275,96 +417,39 @@ xfs_bulkstat(
275 irbp = irbuf; 417 irbp = irbuf;
276 irbufend = irbuf + nirbuf; 418 irbufend = irbuf + nirbuf;
277 end_of_ag = 0; 419 end_of_ag = 0;
278 /* 420 icount = 0;
279 * If we're returning in the middle of an allocation group,
280 * we need to get the remainder of the chunk we're in.
281 */
282 if (agino > 0) { 421 if (agino > 0) {
283 xfs_inobt_rec_incore_t r;
284
285 /* 422 /*
286 * Lookup the inode chunk that this inode lives in. 423 * In the middle of an allocation group, we need to get
424 * the remainder of the chunk we're in.
287 */ 425 */
288 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, 426 struct xfs_inobt_rec_incore r;
289 &tmp); 427
290 if (!error && /* no I/O error */ 428 error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
291 tmp && /* lookup succeeded */ 429 if (error)
292 /* got the record, should always work */ 430 break;
293 !(error = xfs_inobt_get_rec(cur, &r, &i)) && 431 if (icount) {
294 i == 1 &&
295 /* this is the right chunk */
296 agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
297 /* lastino was not last in chunk */
298 (chunkidx = agino - r.ir_startino + 1) <
299 XFS_INODES_PER_CHUNK &&
300 /* there are some left allocated */
301 xfs_inobt_maskn(chunkidx,
302 XFS_INODES_PER_CHUNK - chunkidx) &
303 ~r.ir_free) {
304 /*
305 * Grab the chunk record. Mark all the
306 * uninteresting inodes (because they're
307 * before our start point) free.
308 */
309 for (i = 0; i < chunkidx; i++) {
310 if (XFS_INOBT_MASK(i) & ~r.ir_free)
311 r.ir_freecount++;
312 }
313 r.ir_free |= xfs_inobt_maskn(0, chunkidx);
314 irbp->ir_startino = r.ir_startino; 432 irbp->ir_startino = r.ir_startino;
315 irbp->ir_freecount = r.ir_freecount; 433 irbp->ir_freecount = r.ir_freecount;
316 irbp->ir_free = r.ir_free; 434 irbp->ir_free = r.ir_free;
317 irbp++; 435 irbp++;
318 agino = r.ir_startino + XFS_INODES_PER_CHUNK; 436 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
319 icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
320 } else {
321 /*
322 * If any of those tests failed, bump the
323 * inode number (just in case).
324 */
325 agino++;
326 icount = 0;
327 } 437 }
328 /* 438 /* Increment to the next record */
329 * In any case, increment to the next record. 439 error = xfs_btree_increment(cur, 0, &tmp);
330 */
331 if (!error)
332 error = xfs_btree_increment(cur, 0, &tmp);
333 } else { 440 } else {
334 /* 441 /* Start of ag. Lookup the first inode chunk */
335 * Start of ag. Lookup the first inode chunk.
336 */
337 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); 442 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
338 icount = 0;
339 } 443 }
444 if (error)
445 break;
446
340 /* 447 /*
341 * Loop through inode btree records in this ag, 448 * Loop through inode btree records in this ag,
342 * until we run out of inodes or space in the buffer. 449 * until we run out of inodes or space in the buffer.
343 */ 450 */
344 while (irbp < irbufend && icount < ubcount) { 451 while (irbp < irbufend && icount < ubcount) {
345 xfs_inobt_rec_incore_t r; 452 struct xfs_inobt_rec_incore r;
346
347 /*
348 * Loop as long as we're unable to read the
349 * inode btree.
350 */
351 while (error) {
352 agino += XFS_INODES_PER_CHUNK;
353 if (XFS_AGINO_TO_AGBNO(mp, agino) >=
354 be32_to_cpu(agi->agi_length))
355 break;
356 error = xfs_inobt_lookup(cur, agino,
357 XFS_LOOKUP_GE, &tmp);
358 cond_resched();
359 }
360 /*
361 * If ran off the end of the ag either with an error,
362 * or the normal way, set end and stop collecting.
363 */
364 if (error) {
365 end_of_ag = 1;
366 break;
367 }
368 453
369 error = xfs_inobt_get_rec(cur, &r, &i); 454 error = xfs_inobt_get_rec(cur, &r, &i);
370 if (error || i == 0) { 455 if (error || i == 0) {
@@ -377,25 +462,7 @@ xfs_bulkstat(
377 * Also start read-ahead now for this chunk. 462 * Also start read-ahead now for this chunk.
378 */ 463 */
379 if (r.ir_freecount < XFS_INODES_PER_CHUNK) { 464 if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
380 struct blk_plug plug; 465 xfs_bulkstat_ichunk_ra(mp, agno, &r);
381 /*
382 * Loop over all clusters in the next chunk.
383 * Do a readahead if there are any allocated
384 * inodes in that cluster.
385 */
386 blk_start_plug(&plug);
387 agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);
388 for (chunkidx = 0;
389 chunkidx < XFS_INODES_PER_CHUNK;
390 chunkidx += inodes_per_cluster,
391 agbno += blks_per_cluster) {
392 if (xfs_inobt_maskn(chunkidx,
393 inodes_per_cluster) & ~r.ir_free)
394 xfs_btree_reada_bufs(mp, agno,
395 agbno, blks_per_cluster,
396 &xfs_inode_buf_ops);
397 }
398 blk_finish_plug(&plug);
399 irbp->ir_startino = r.ir_startino; 466 irbp->ir_startino = r.ir_startino;
400 irbp->ir_freecount = r.ir_freecount; 467 irbp->ir_freecount = r.ir_freecount;
401 irbp->ir_free = r.ir_free; 468 irbp->ir_free = r.ir_free;
@@ -422,57 +489,20 @@ xfs_bulkstat(
422 irbufend = irbp; 489 irbufend = irbp;
423 for (irbp = irbuf; 490 for (irbp = irbuf;
424 irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { 491 irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
425 /* 492 struct xfs_bulkstat_agichunk ac;
426 * Now process this chunk of inodes. 493
427 */ 494 ac.ac_lastino = lastino;
428 for (agino = irbp->ir_startino, chunkidx = clustidx = 0; 495 ac.ac_ubuffer = &ubuffer;
429 XFS_BULKSTAT_UBLEFT(ubleft) && 496 ac.ac_ubleft = ubleft;
430 irbp->ir_freecount < XFS_INODES_PER_CHUNK; 497 ac.ac_ubelem = ubelem;
431 chunkidx++, clustidx++, agino++) { 498 error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
432 ASSERT(chunkidx < XFS_INODES_PER_CHUNK); 499 formatter, statstruct_size, &ac);
433 500 if (error)
434 ino = XFS_AGINO_TO_INO(mp, agno, agino); 501 rval = error;
435 /* 502
436 * Skip if this inode is free. 503 lastino = ac.ac_lastino;
437 */ 504 ubleft = ac.ac_ubleft;
438 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { 505 ubelem = ac.ac_ubelem;
439 lastino = ino;
440 continue;
441 }
442 /*
443 * Count used inodes as free so we can tell
444 * when the chunk is used up.
445 */
446 irbp->ir_freecount++;
447
448 /*
449 * Get the inode and fill in a single buffer.
450 */
451 ubused = statstruct_size;
452 error = formatter(mp, ino, ubufp, ubleft,
453 &ubused, &fmterror);
454 if (fmterror == BULKSTAT_RV_NOTHING) {
455 if (error && error != ENOENT &&
456 error != EINVAL) {
457 ubleft = 0;
458 rval = error;
459 break;
460 }
461 lastino = ino;
462 continue;
463 }
464 if (fmterror == BULKSTAT_RV_GIVEUP) {
465 ubleft = 0;
466 ASSERT(error);
467 rval = error;
468 break;
469 }
470 if (ubufp)
471 ubufp += ubused;
472 ubleft -= ubused;
473 ubelem++;
474 lastino = ino;
475 }
476 506
477 cond_resched(); 507 cond_resched();
478 } 508 }
@@ -512,58 +542,10 @@ xfs_bulkstat(
512 return rval; 542 return rval;
513} 543}
514 544
515/*
516 * Return stat information in bulk (by-inode) for the filesystem.
517 * Special case for non-sequential one inode bulkstat.
518 */
519int /* error status */
520xfs_bulkstat_single(
521 xfs_mount_t *mp, /* mount point for filesystem */
522 xfs_ino_t *lastinop, /* inode to return */
523 char __user *buffer, /* buffer with inode stats */
524 int *done) /* 1 if there are more stats to get */
525{
526 int count; /* count value for bulkstat call */
527 int error; /* return value */
528 xfs_ino_t ino; /* filesystem inode number */
529 int res; /* result from bs1 */
530
531 /*
532 * note that requesting valid inode numbers which are not allocated
533 * to inodes will most likely cause xfs_imap_to_bp to generate warning
534 * messages about bad magic numbers. This is ok. The fact that
535 * the inode isn't actually an inode is handled by the
536 * error check below. Done this way to make the usual case faster
537 * at the expense of the error case.
538 */
539
540 ino = *lastinop;
541 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
542 NULL, &res);
543 if (error) {
544 /*
545 * Special case way failed, do it the "long" way
546 * to see if that works.
547 */
548 (*lastinop)--;
549 count = 1;
550 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
551 sizeof(xfs_bstat_t), buffer, done))
552 return error;
553 if (count == 0 || (xfs_ino_t)*lastinop != ino)
554 return error == EFSCORRUPTED ?
555 XFS_ERROR(EINVAL) : error;
556 else
557 return 0;
558 }
559 *done = 0;
560 return 0;
561}
562
563int 545int
564xfs_inumbers_fmt( 546xfs_inumbers_fmt(
565 void __user *ubuffer, /* buffer to write to */ 547 void __user *ubuffer, /* buffer to write to */
566 const xfs_inogrp_t *buffer, /* buffer to read from */ 548 const struct xfs_inogrp *buffer, /* buffer to read from */
567 long count, /* # of elements to read */ 549 long count, /* # of elements to read */
568 long *written) /* # of bytes written */ 550 long *written) /* # of bytes written */
569{ 551{
@@ -578,127 +560,104 @@ xfs_inumbers_fmt(
578 */ 560 */
579int /* error status */ 561int /* error status */
580xfs_inumbers( 562xfs_inumbers(
581 xfs_mount_t *mp, /* mount point for filesystem */ 563 struct xfs_mount *mp,/* mount point for filesystem */
582 xfs_ino_t *lastino, /* last inode returned */ 564 xfs_ino_t *lastino,/* last inode returned */
583 int *count, /* size of buffer/count returned */ 565 int *count,/* size of buffer/count returned */
584 void __user *ubuffer,/* buffer with inode descriptions */ 566 void __user *ubuffer,/* buffer with inode descriptions */
585 inumbers_fmt_pf formatter) 567 inumbers_fmt_pf formatter)
586{ 568{
587 xfs_buf_t *agbp; 569 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino);
588 xfs_agino_t agino; 570 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino);
589 xfs_agnumber_t agno; 571 struct xfs_btree_cur *cur = NULL;
590 int bcount; 572 struct xfs_buf *agbp = NULL;
591 xfs_inogrp_t *buffer; 573 struct xfs_inogrp *buffer;
592 int bufidx; 574 int bcount;
593 xfs_btree_cur_t *cur; 575 int left = *count;
594 int error; 576 int bufidx = 0;
595 xfs_inobt_rec_incore_t r; 577 int error = 0;
596 int i; 578
597 xfs_ino_t ino;
598 int left;
599 int tmp;
600
601 ino = (xfs_ino_t)*lastino;
602 agno = XFS_INO_TO_AGNO(mp, ino);
603 agino = XFS_INO_TO_AGINO(mp, ino);
604 left = *count;
605 *count = 0; 579 *count = 0;
580 if (agno >= mp->m_sb.sb_agcount ||
581 *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
582 return error;
583
606 bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); 584 bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
607 buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); 585 buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
608 error = bufidx = 0; 586 do {
609 cur = NULL; 587 struct xfs_inobt_rec_incore r;
610 agbp = NULL; 588 int stat;
611 while (left > 0 && agno < mp->m_sb.sb_agcount) { 589
612 if (agbp == NULL) { 590 if (!agbp) {
613 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); 591 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
614 if (error) { 592 if (error)
615 /* 593 break;
616 * If we can't read the AGI of this ag, 594
617 * then just skip to the next one.
618 */
619 ASSERT(cur == NULL);
620 agbp = NULL;
621 agno++;
622 agino = 0;
623 continue;
624 }
625 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, 595 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
626 XFS_BTNUM_INO); 596 XFS_BTNUM_INO);
627 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, 597 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
628 &tmp); 598 &stat);
629 if (error) { 599 if (error)
630 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 600 break;
631 cur = NULL; 601 if (!stat)
632 xfs_buf_relse(agbp); 602 goto next_ag;
633 agbp = NULL;
634 /*
635 * Move up the last inode in the current
636 * chunk. The lookup_ge will always get
637 * us the first inode in the next chunk.
638 */
639 agino += XFS_INODES_PER_CHUNK - 1;
640 continue;
641 }
642 }
643 error = xfs_inobt_get_rec(cur, &r, &i);
644 if (error || i == 0) {
645 xfs_buf_relse(agbp);
646 agbp = NULL;
647 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
648 cur = NULL;
649 agno++;
650 agino = 0;
651 continue;
652 } 603 }
604
605 error = xfs_inobt_get_rec(cur, &r, &stat);
606 if (error)
607 break;
608 if (!stat)
609 goto next_ag;
610
653 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; 611 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
654 buffer[bufidx].xi_startino = 612 buffer[bufidx].xi_startino =
655 XFS_AGINO_TO_INO(mp, agno, r.ir_startino); 613 XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
656 buffer[bufidx].xi_alloccount = 614 buffer[bufidx].xi_alloccount =
657 XFS_INODES_PER_CHUNK - r.ir_freecount; 615 XFS_INODES_PER_CHUNK - r.ir_freecount;
658 buffer[bufidx].xi_allocmask = ~r.ir_free; 616 buffer[bufidx].xi_allocmask = ~r.ir_free;
659 bufidx++; 617 if (++bufidx == bcount) {
660 left--; 618 long written;
661 if (bufidx == bcount) { 619
662 long written; 620 error = formatter(ubuffer, buffer, bufidx, &written);
663 if (formatter(ubuffer, buffer, bufidx, &written)) { 621 if (error)
664 error = XFS_ERROR(EFAULT);
665 break; 622 break;
666 }
667 ubuffer += written; 623 ubuffer += written;
668 *count += bufidx; 624 *count += bufidx;
669 bufidx = 0; 625 bufidx = 0;
670 } 626 }
671 if (left) { 627 if (!--left)
672 error = xfs_btree_increment(cur, 0, &tmp); 628 break;
673 if (error) { 629
674 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 630 error = xfs_btree_increment(cur, 0, &stat);
675 cur = NULL; 631 if (error)
676 xfs_buf_relse(agbp); 632 break;
677 agbp = NULL; 633 if (stat)
678 /* 634 continue;
679 * The agino value has already been bumped. 635
680 * Just try to skip up to it. 636next_ag:
681 */ 637 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
682 agino += XFS_INODES_PER_CHUNK; 638 cur = NULL;
683 continue; 639 xfs_buf_relse(agbp);
684 } 640 agbp = NULL;
685 } 641 agino = 0;
686 } 642 } while (++agno < mp->m_sb.sb_agcount);
643
687 if (!error) { 644 if (!error) {
688 if (bufidx) { 645 if (bufidx) {
689 long written; 646 long written;
690 if (formatter(ubuffer, buffer, bufidx, &written)) 647
691 error = XFS_ERROR(EFAULT); 648 error = formatter(ubuffer, buffer, bufidx, &written);
692 else 649 if (!error)
693 *count += bufidx; 650 *count += bufidx;
694 } 651 }
695 *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 652 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
696 } 653 }
654
697 kmem_free(buffer); 655 kmem_free(buffer);
698 if (cur) 656 if (cur)
699 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : 657 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
700 XFS_BTREE_NOERROR)); 658 XFS_BTREE_NOERROR));
701 if (agbp) 659 if (agbp)
702 xfs_buf_relse(agbp); 660 xfs_buf_relse(agbp);
661
703 return error; 662 return error;
704} 663}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 97295d91d170..aaed08022eb9 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,6 +30,22 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
30 int *ubused, 30 int *ubused,
31 int *stat); 31 int *stat);
32 32
33struct xfs_bulkstat_agichunk {
34 xfs_ino_t ac_lastino; /* last inode returned */
35 char __user **ac_ubuffer;/* pointer into user's buffer */
36 int ac_ubleft; /* bytes left in user's buffer */
37 int ac_ubelem; /* spaces used in user's buffer */
38};
39
40int
41xfs_bulkstat_ag_ichunk(
42 struct xfs_mount *mp,
43 xfs_agnumber_t agno,
44 struct xfs_inobt_rec_incore *irbp,
45 bulkstat_one_pf formatter,
46 size_t statstruct_size,
47 struct xfs_bulkstat_agichunk *acp);
48
33/* 49/*
34 * Values for stat return value. 50 * Values for stat return value.
35 */ 51 */
@@ -50,13 +66,6 @@ xfs_bulkstat(
50 char __user *ubuffer,/* buffer with inode stats */ 66 char __user *ubuffer,/* buffer with inode stats */
51 int *done); /* 1 if there are more stats to get */ 67 int *done); /* 1 if there are more stats to get */
52 68
53int
54xfs_bulkstat_single(
55 xfs_mount_t *mp,
56 xfs_ino_t *lastinop,
57 char __user *buffer,
58 int *done);
59
60typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ 69typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
61 void __user *ubuffer, /* buffer to write to */ 70 void __user *ubuffer, /* buffer to write to */
62 int ubsize, /* remaining user buffer sz */ 71 int ubsize, /* remaining user buffer sz */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 825249d2dfc1..d10dc8f397c9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -21,18 +21,6 @@
21#include <linux/types.h> 21#include <linux/types.h>
22 22
23/* 23/*
24 * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
25 * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
26 */
27#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
28# define XFS_BIG_BLKNOS 1
29# define XFS_BIG_INUMS 1
30#else
31# define XFS_BIG_BLKNOS 0
32# define XFS_BIG_INUMS 0
33#endif
34
35/*
36 * Kernel specific type declarations for XFS 24 * Kernel specific type declarations for XFS
37 */ 25 */
38typedef signed char __int8_t; 26typedef signed char __int8_t;
@@ -113,7 +101,7 @@ typedef __uint64_t __psunsigned_t;
113#include <asm/byteorder.h> 101#include <asm/byteorder.h>
114#include <asm/unaligned.h> 102#include <asm/unaligned.h>
115 103
116#include "xfs_vnode.h" 104#include "xfs_fs.h"
117#include "xfs_stats.h" 105#include "xfs_stats.h"
118#include "xfs_sysctl.h" 106#include "xfs_sysctl.h"
119#include "xfs_iops.h" 107#include "xfs_iops.h"
@@ -191,6 +179,17 @@ typedef __uint64_t __psunsigned_t;
191#define MAX(a,b) (max(a,b)) 179#define MAX(a,b) (max(a,b))
192#define howmany(x, y) (((x)+((y)-1))/(y)) 180#define howmany(x, y) (((x)+((y)-1))/(y))
193 181
182/*
183 * XFS wrapper structure for sysfs support. It depends on external data
184 * structures and is embedded in various internal data structures to implement
185 * the XFS sysfs object heirarchy. Define it here for broad access throughout
186 * the codebase.
187 */
188struct xfs_kobj {
189 struct kobject kobject;
190 struct completion complete;
191};
192
194/* Kernel uid/gid conversion. These are used to convert to/from the on disk 193/* Kernel uid/gid conversion. These are used to convert to/from the on disk
195 * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. 194 * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
196 * The conversion here is type only, the value will remain the same since we 195 * The conversion here is type only, the value will remain the same since we
@@ -331,7 +330,7 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
331{ 330{
332 x += y - 1; 331 x += y - 1;
333 do_div(x, y); 332 do_div(x, y);
334 return(x * y); 333 return x * y;
335} 334}
336 335
337static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) 336static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 292308dede6d..ca4fd5bd8522 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -34,6 +34,7 @@
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35#include "xfs_fsops.h" 35#include "xfs_fsops.h"
36#include "xfs_cksum.h" 36#include "xfs_cksum.h"
37#include "xfs_sysfs.h"
37 38
38kmem_zone_t *xfs_log_ticket_zone; 39kmem_zone_t *xfs_log_ticket_zone;
39 40
@@ -283,7 +284,7 @@ xlog_grant_head_wait(
283 return 0; 284 return 0;
284shutdown: 285shutdown:
285 list_del_init(&tic->t_queue); 286 list_del_init(&tic->t_queue);
286 return XFS_ERROR(EIO); 287 return -EIO;
287} 288}
288 289
289/* 290/*
@@ -377,7 +378,7 @@ xfs_log_regrant(
377 int error = 0; 378 int error = 0;
378 379
379 if (XLOG_FORCED_SHUTDOWN(log)) 380 if (XLOG_FORCED_SHUTDOWN(log))
380 return XFS_ERROR(EIO); 381 return -EIO;
381 382
382 XFS_STATS_INC(xs_try_logspace); 383 XFS_STATS_INC(xs_try_logspace);
383 384
@@ -446,7 +447,7 @@ xfs_log_reserve(
446 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); 447 ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
447 448
448 if (XLOG_FORCED_SHUTDOWN(log)) 449 if (XLOG_FORCED_SHUTDOWN(log))
449 return XFS_ERROR(EIO); 450 return -EIO;
450 451
451 XFS_STATS_INC(xs_try_logspace); 452 XFS_STATS_INC(xs_try_logspace);
452 453
@@ -454,7 +455,7 @@ xfs_log_reserve(
454 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 455 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
455 KM_SLEEP | KM_MAYFAIL); 456 KM_SLEEP | KM_MAYFAIL);
456 if (!tic) 457 if (!tic)
457 return XFS_ERROR(ENOMEM); 458 return -ENOMEM;
458 459
459 tic->t_trans_type = t_type; 460 tic->t_trans_type = t_type;
460 *ticp = tic; 461 *ticp = tic;
@@ -590,7 +591,7 @@ xfs_log_release_iclog(
590{ 591{
591 if (xlog_state_release_iclog(mp->m_log, iclog)) { 592 if (xlog_state_release_iclog(mp->m_log, iclog)) {
592 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 593 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
593 return EIO; 594 return -EIO;
594 } 595 }
595 596
596 return 0; 597 return 0;
@@ -628,7 +629,7 @@ xfs_log_mount(
628 629
629 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 630 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
630 if (IS_ERR(mp->m_log)) { 631 if (IS_ERR(mp->m_log)) {
631 error = -PTR_ERR(mp->m_log); 632 error = PTR_ERR(mp->m_log);
632 goto out; 633 goto out;
633 } 634 }
634 635
@@ -652,18 +653,18 @@ xfs_log_mount(
652 xfs_warn(mp, 653 xfs_warn(mp,
653 "Log size %d blocks too small, minimum size is %d blocks", 654 "Log size %d blocks too small, minimum size is %d blocks",
654 mp->m_sb.sb_logblocks, min_logfsbs); 655 mp->m_sb.sb_logblocks, min_logfsbs);
655 error = EINVAL; 656 error = -EINVAL;
656 } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { 657 } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
657 xfs_warn(mp, 658 xfs_warn(mp,
658 "Log size %d blocks too large, maximum size is %lld blocks", 659 "Log size %d blocks too large, maximum size is %lld blocks",
659 mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); 660 mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
660 error = EINVAL; 661 error = -EINVAL;
661 } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { 662 } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
662 xfs_warn(mp, 663 xfs_warn(mp,
663 "log size %lld bytes too large, maximum size is %lld bytes", 664 "log size %lld bytes too large, maximum size is %lld bytes",
664 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), 665 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
665 XFS_MAX_LOG_BYTES); 666 XFS_MAX_LOG_BYTES);
666 error = EINVAL; 667 error = -EINVAL;
667 } 668 }
668 if (error) { 669 if (error) {
669 if (xfs_sb_version_hascrc(&mp->m_sb)) { 670 if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -707,6 +708,11 @@ xfs_log_mount(
707 } 708 }
708 } 709 }
709 710
711 error = xfs_sysfs_init(&mp->m_log->l_kobj, &xfs_log_ktype, &mp->m_kobj,
712 "log");
713 if (error)
714 goto out_destroy_ail;
715
710 /* Normal transactions can now occur */ 716 /* Normal transactions can now occur */
711 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY; 717 mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
712 718
@@ -947,6 +953,9 @@ xfs_log_unmount(
947 xfs_log_quiesce(mp); 953 xfs_log_quiesce(mp);
948 954
949 xfs_trans_ail_destroy(mp); 955 xfs_trans_ail_destroy(mp);
956
957 xfs_sysfs_del(&mp->m_log->l_kobj);
958
950 xlog_dealloc_log(mp->m_log); 959 xlog_dealloc_log(mp->m_log);
951} 960}
952 961
@@ -1313,7 +1322,7 @@ xlog_alloc_log(
1313 xlog_in_core_t *iclog, *prev_iclog=NULL; 1322 xlog_in_core_t *iclog, *prev_iclog=NULL;
1314 xfs_buf_t *bp; 1323 xfs_buf_t *bp;
1315 int i; 1324 int i;
1316 int error = ENOMEM; 1325 int error = -ENOMEM;
1317 uint log2_size = 0; 1326 uint log2_size = 0;
1318 1327
1319 log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL); 1328 log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
@@ -1340,7 +1349,7 @@ xlog_alloc_log(
1340 xlog_grant_head_init(&log->l_reserve_head); 1349 xlog_grant_head_init(&log->l_reserve_head);
1341 xlog_grant_head_init(&log->l_write_head); 1350 xlog_grant_head_init(&log->l_write_head);
1342 1351
1343 error = EFSCORRUPTED; 1352 error = -EFSCORRUPTED;
1344 if (xfs_sb_version_hassector(&mp->m_sb)) { 1353 if (xfs_sb_version_hassector(&mp->m_sb)) {
1345 log2_size = mp->m_sb.sb_logsectlog; 1354 log2_size = mp->m_sb.sb_logsectlog;
1346 if (log2_size < BBSHIFT) { 1355 if (log2_size < BBSHIFT) {
@@ -1369,8 +1378,14 @@ xlog_alloc_log(
1369 1378
1370 xlog_get_iclog_buffer_size(mp, log); 1379 xlog_get_iclog_buffer_size(mp, log);
1371 1380
1372 error = ENOMEM; 1381 /*
1373 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); 1382 * Use a NULL block for the extra log buffer used during splits so that
1383 * it will trigger errors if we ever try to do IO on it without first
1384 * having set it up properly.
1385 */
1386 error = -ENOMEM;
1387 bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
1388 BTOBB(log->l_iclog_size), 0);
1374 if (!bp) 1389 if (!bp)
1375 goto out_free_log; 1390 goto out_free_log;
1376 1391
@@ -1463,7 +1478,7 @@ out_free_iclog:
1463out_free_log: 1478out_free_log:
1464 kmem_free(log); 1479 kmem_free(log);
1465out: 1480out:
1466 return ERR_PTR(-error); 1481 return ERR_PTR(error);
1467} /* xlog_alloc_log */ 1482} /* xlog_alloc_log */
1468 1483
1469 1484
@@ -1661,7 +1676,7 @@ xlog_bdstrat(
1661 1676
1662 xfs_buf_lock(bp); 1677 xfs_buf_lock(bp);
1663 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1678 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1664 xfs_buf_ioerror(bp, EIO); 1679 xfs_buf_ioerror(bp, -EIO);
1665 xfs_buf_stale(bp); 1680 xfs_buf_stale(bp);
1666 xfs_buf_ioend(bp, 0); 1681 xfs_buf_ioend(bp, 0);
1667 /* 1682 /*
@@ -2360,7 +2375,7 @@ xlog_write(
2360 2375
2361 ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); 2376 ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
2362 if (!ophdr) 2377 if (!ophdr)
2363 return XFS_ERROR(EIO); 2378 return -EIO;
2364 2379
2365 xlog_write_adv_cnt(&ptr, &len, &log_offset, 2380 xlog_write_adv_cnt(&ptr, &len, &log_offset,
2366 sizeof(struct xlog_op_header)); 2381 sizeof(struct xlog_op_header));
@@ -2859,7 +2874,7 @@ restart:
2859 spin_lock(&log->l_icloglock); 2874 spin_lock(&log->l_icloglock);
2860 if (XLOG_FORCED_SHUTDOWN(log)) { 2875 if (XLOG_FORCED_SHUTDOWN(log)) {
2861 spin_unlock(&log->l_icloglock); 2876 spin_unlock(&log->l_icloglock);
2862 return XFS_ERROR(EIO); 2877 return -EIO;
2863 } 2878 }
2864 2879
2865 iclog = log->l_iclog; 2880 iclog = log->l_iclog;
@@ -3047,7 +3062,7 @@ xlog_state_release_iclog(
3047 int sync = 0; /* do we sync? */ 3062 int sync = 0; /* do we sync? */
3048 3063
3049 if (iclog->ic_state & XLOG_STATE_IOERROR) 3064 if (iclog->ic_state & XLOG_STATE_IOERROR)
3050 return XFS_ERROR(EIO); 3065 return -EIO;
3051 3066
3052 ASSERT(atomic_read(&iclog->ic_refcnt) > 0); 3067 ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
3053 if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) 3068 if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
@@ -3055,7 +3070,7 @@ xlog_state_release_iclog(
3055 3070
3056 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3071 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3057 spin_unlock(&log->l_icloglock); 3072 spin_unlock(&log->l_icloglock);
3058 return XFS_ERROR(EIO); 3073 return -EIO;
3059 } 3074 }
3060 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || 3075 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
3061 iclog->ic_state == XLOG_STATE_WANT_SYNC); 3076 iclog->ic_state == XLOG_STATE_WANT_SYNC);
@@ -3172,7 +3187,7 @@ _xfs_log_force(
3172 iclog = log->l_iclog; 3187 iclog = log->l_iclog;
3173 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3188 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3174 spin_unlock(&log->l_icloglock); 3189 spin_unlock(&log->l_icloglock);
3175 return XFS_ERROR(EIO); 3190 return -EIO;
3176 } 3191 }
3177 3192
3178 /* If the head iclog is not active nor dirty, we just attach 3193 /* If the head iclog is not active nor dirty, we just attach
@@ -3210,7 +3225,7 @@ _xfs_log_force(
3210 spin_unlock(&log->l_icloglock); 3225 spin_unlock(&log->l_icloglock);
3211 3226
3212 if (xlog_state_release_iclog(log, iclog)) 3227 if (xlog_state_release_iclog(log, iclog))
3213 return XFS_ERROR(EIO); 3228 return -EIO;
3214 3229
3215 if (log_flushed) 3230 if (log_flushed)
3216 *log_flushed = 1; 3231 *log_flushed = 1;
@@ -3246,7 +3261,7 @@ maybe_sleep:
3246 */ 3261 */
3247 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3262 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3248 spin_unlock(&log->l_icloglock); 3263 spin_unlock(&log->l_icloglock);
3249 return XFS_ERROR(EIO); 3264 return -EIO;
3250 } 3265 }
3251 XFS_STATS_INC(xs_log_force_sleep); 3266 XFS_STATS_INC(xs_log_force_sleep);
3252 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 3267 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
@@ -3256,7 +3271,7 @@ maybe_sleep:
3256 * and the memory read should be atomic. 3271 * and the memory read should be atomic.
3257 */ 3272 */
3258 if (iclog->ic_state & XLOG_STATE_IOERROR) 3273 if (iclog->ic_state & XLOG_STATE_IOERROR)
3259 return XFS_ERROR(EIO); 3274 return -EIO;
3260 if (log_flushed) 3275 if (log_flushed)
3261 *log_flushed = 1; 3276 *log_flushed = 1;
3262 } else { 3277 } else {
@@ -3324,7 +3339,7 @@ try_again:
3324 iclog = log->l_iclog; 3339 iclog = log->l_iclog;
3325 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3340 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3326 spin_unlock(&log->l_icloglock); 3341 spin_unlock(&log->l_icloglock);
3327 return XFS_ERROR(EIO); 3342 return -EIO;
3328 } 3343 }
3329 3344
3330 do { 3345 do {
@@ -3375,7 +3390,7 @@ try_again:
3375 xlog_state_switch_iclogs(log, iclog, 0); 3390 xlog_state_switch_iclogs(log, iclog, 0);
3376 spin_unlock(&log->l_icloglock); 3391 spin_unlock(&log->l_icloglock);
3377 if (xlog_state_release_iclog(log, iclog)) 3392 if (xlog_state_release_iclog(log, iclog))
3378 return XFS_ERROR(EIO); 3393 return -EIO;
3379 if (log_flushed) 3394 if (log_flushed)
3380 *log_flushed = 1; 3395 *log_flushed = 1;
3381 spin_lock(&log->l_icloglock); 3396 spin_lock(&log->l_icloglock);
@@ -3390,7 +3405,7 @@ try_again:
3390 */ 3405 */
3391 if (iclog->ic_state & XLOG_STATE_IOERROR) { 3406 if (iclog->ic_state & XLOG_STATE_IOERROR) {
3392 spin_unlock(&log->l_icloglock); 3407 spin_unlock(&log->l_icloglock);
3393 return XFS_ERROR(EIO); 3408 return -EIO;
3394 } 3409 }
3395 XFS_STATS_INC(xs_log_force_sleep); 3410 XFS_STATS_INC(xs_log_force_sleep);
3396 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 3411 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
@@ -3400,7 +3415,7 @@ try_again:
3400 * and the memory read should be atomic. 3415 * and the memory read should be atomic.
3401 */ 3416 */
3402 if (iclog->ic_state & XLOG_STATE_IOERROR) 3417 if (iclog->ic_state & XLOG_STATE_IOERROR)
3403 return XFS_ERROR(EIO); 3418 return -EIO;
3404 3419
3405 if (log_flushed) 3420 if (log_flushed)
3406 *log_flushed = 1; 3421 *log_flushed = 1;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b3425b34e3d5..f6b79e5325dd 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -78,8 +78,6 @@ xlog_cil_init_post_recovery(
78{ 78{
79 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); 79 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
80 log->l_cilp->xc_ctx->sequence = 1; 80 log->l_cilp->xc_ctx->sequence = 1;
81 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
82 log->l_curr_block);
83} 81}
84 82
85/* 83/*
@@ -634,7 +632,7 @@ out_abort_free_ticket:
634 xfs_log_ticket_put(tic); 632 xfs_log_ticket_put(tic);
635out_abort: 633out_abort:
636 xlog_cil_committed(ctx, XFS_LI_ABORTED); 634 xlog_cil_committed(ctx, XFS_LI_ABORTED);
637 return XFS_ERROR(EIO); 635 return -EIO;
638} 636}
639 637
640static void 638static void
@@ -928,12 +926,12 @@ xlog_cil_init(
928 926
929 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); 927 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
930 if (!cil) 928 if (!cil)
931 return ENOMEM; 929 return -ENOMEM;
932 930
933 ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); 931 ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
934 if (!ctx) { 932 if (!ctx) {
935 kmem_free(cil); 933 kmem_free(cil);
936 return ENOMEM; 934 return -ENOMEM;
937 } 935 }
938 936
939 INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); 937 INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 9bc403a9e54f..db7cbdeb2b42 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -405,6 +405,8 @@ struct xlog {
405 struct xlog_grant_head l_reserve_head; 405 struct xlog_grant_head l_reserve_head;
406 struct xlog_grant_head l_write_head; 406 struct xlog_grant_head l_write_head;
407 407
408 struct xfs_kobj l_kobj;
409
408 /* The following field are used for debugging; need to hold icloglock */ 410 /* The following field are used for debugging; need to hold icloglock */
409#ifdef DEBUG 411#ifdef DEBUG
410 char *l_iclog_bak[XLOG_MAX_ICLOGS]; 412 char *l_iclog_bak[XLOG_MAX_ICLOGS];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 981af0f6504b..1fd5787add99 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -179,7 +179,7 @@ xlog_bread_noalign(
179 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 179 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
180 nbblks); 180 nbblks);
181 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 181 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
182 return EFSCORRUPTED; 182 return -EFSCORRUPTED;
183 } 183 }
184 184
185 blk_no = round_down(blk_no, log->l_sectBBsize); 185 blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -194,7 +194,7 @@ xlog_bread_noalign(
194 bp->b_error = 0; 194 bp->b_error = 0;
195 195
196 if (XFS_FORCED_SHUTDOWN(log->l_mp)) 196 if (XFS_FORCED_SHUTDOWN(log->l_mp))
197 return XFS_ERROR(EIO); 197 return -EIO;
198 198
199 xfs_buf_iorequest(bp); 199 xfs_buf_iorequest(bp);
200 error = xfs_buf_iowait(bp); 200 error = xfs_buf_iowait(bp);
@@ -268,7 +268,7 @@ xlog_bwrite(
268 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 268 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
269 nbblks); 269 nbblks);
270 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 270 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
271 return EFSCORRUPTED; 271 return -EFSCORRUPTED;
272 } 272 }
273 273
274 blk_no = round_down(blk_no, log->l_sectBBsize); 274 blk_no = round_down(blk_no, log->l_sectBBsize);
@@ -330,14 +330,14 @@ xlog_header_check_recover(
330 xlog_header_check_dump(mp, head); 330 xlog_header_check_dump(mp, head);
331 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 331 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
332 XFS_ERRLEVEL_HIGH, mp); 332 XFS_ERRLEVEL_HIGH, mp);
333 return XFS_ERROR(EFSCORRUPTED); 333 return -EFSCORRUPTED;
334 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 334 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
335 xfs_warn(mp, 335 xfs_warn(mp,
336 "dirty log entry has mismatched uuid - can't recover"); 336 "dirty log entry has mismatched uuid - can't recover");
337 xlog_header_check_dump(mp, head); 337 xlog_header_check_dump(mp, head);
338 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 338 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
339 XFS_ERRLEVEL_HIGH, mp); 339 XFS_ERRLEVEL_HIGH, mp);
340 return XFS_ERROR(EFSCORRUPTED); 340 return -EFSCORRUPTED;
341 } 341 }
342 return 0; 342 return 0;
343} 343}
@@ -364,7 +364,7 @@ xlog_header_check_mount(
364 xlog_header_check_dump(mp, head); 364 xlog_header_check_dump(mp, head);
365 XFS_ERROR_REPORT("xlog_header_check_mount", 365 XFS_ERROR_REPORT("xlog_header_check_mount",
366 XFS_ERRLEVEL_HIGH, mp); 366 XFS_ERRLEVEL_HIGH, mp);
367 return XFS_ERROR(EFSCORRUPTED); 367 return -EFSCORRUPTED;
368 } 368 }
369 return 0; 369 return 0;
370} 370}
@@ -462,7 +462,7 @@ xlog_find_verify_cycle(
462 while (!(bp = xlog_get_bp(log, bufblks))) { 462 while (!(bp = xlog_get_bp(log, bufblks))) {
463 bufblks >>= 1; 463 bufblks >>= 1;
464 if (bufblks < log->l_sectBBsize) 464 if (bufblks < log->l_sectBBsize)
465 return ENOMEM; 465 return -ENOMEM;
466 } 466 }
467 467
468 for (i = start_blk; i < start_blk + nbblks; i += bufblks) { 468 for (i = start_blk; i < start_blk + nbblks; i += bufblks) {
@@ -524,7 +524,7 @@ xlog_find_verify_log_record(
524 524
525 if (!(bp = xlog_get_bp(log, num_blks))) { 525 if (!(bp = xlog_get_bp(log, num_blks))) {
526 if (!(bp = xlog_get_bp(log, 1))) 526 if (!(bp = xlog_get_bp(log, 1)))
527 return ENOMEM; 527 return -ENOMEM;
528 smallmem = 1; 528 smallmem = 1;
529 } else { 529 } else {
530 error = xlog_bread(log, start_blk, num_blks, bp, &offset); 530 error = xlog_bread(log, start_blk, num_blks, bp, &offset);
@@ -539,7 +539,7 @@ xlog_find_verify_log_record(
539 xfs_warn(log->l_mp, 539 xfs_warn(log->l_mp,
540 "Log inconsistent (didn't find previous header)"); 540 "Log inconsistent (didn't find previous header)");
541 ASSERT(0); 541 ASSERT(0);
542 error = XFS_ERROR(EIO); 542 error = -EIO;
543 goto out; 543 goto out;
544 } 544 }
545 545
@@ -564,7 +564,7 @@ xlog_find_verify_log_record(
564 * will be called again for the end of the physical log. 564 * will be called again for the end of the physical log.
565 */ 565 */
566 if (i == -1) { 566 if (i == -1) {
567 error = -1; 567 error = 1;
568 goto out; 568 goto out;
569 } 569 }
570 570
@@ -628,7 +628,12 @@ xlog_find_head(
628 int error, log_bbnum = log->l_logBBsize; 628 int error, log_bbnum = log->l_logBBsize;
629 629
630 /* Is the end of the log device zeroed? */ 630 /* Is the end of the log device zeroed? */
631 if ((error = xlog_find_zeroed(log, &first_blk)) == -1) { 631 error = xlog_find_zeroed(log, &first_blk);
632 if (error < 0) {
633 xfs_warn(log->l_mp, "empty log check failed");
634 return error;
635 }
636 if (error == 1) {
632 *return_head_blk = first_blk; 637 *return_head_blk = first_blk;
633 638
634 /* Is the whole lot zeroed? */ 639 /* Is the whole lot zeroed? */
@@ -641,15 +646,12 @@ xlog_find_head(
641 } 646 }
642 647
643 return 0; 648 return 0;
644 } else if (error) {
645 xfs_warn(log->l_mp, "empty log check failed");
646 return error;
647 } 649 }
648 650
649 first_blk = 0; /* get cycle # of 1st block */ 651 first_blk = 0; /* get cycle # of 1st block */
650 bp = xlog_get_bp(log, 1); 652 bp = xlog_get_bp(log, 1);
651 if (!bp) 653 if (!bp)
652 return ENOMEM; 654 return -ENOMEM;
653 655
654 error = xlog_bread(log, 0, 1, bp, &offset); 656 error = xlog_bread(log, 0, 1, bp, &offset);
655 if (error) 657 if (error)
@@ -818,29 +820,29 @@ validate_head:
818 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ 820 start_blk = head_blk - num_scan_bblks; /* don't read head_blk */
819 821
820 /* start ptr at last block ptr before head_blk */ 822 /* start ptr at last block ptr before head_blk */
821 if ((error = xlog_find_verify_log_record(log, start_blk, 823 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
822 &head_blk, 0)) == -1) { 824 if (error == 1)
823 error = XFS_ERROR(EIO); 825 error = -EIO;
824 goto bp_err; 826 if (error)
825 } else if (error)
826 goto bp_err; 827 goto bp_err;
827 } else { 828 } else {
828 start_blk = 0; 829 start_blk = 0;
829 ASSERT(head_blk <= INT_MAX); 830 ASSERT(head_blk <= INT_MAX);
830 if ((error = xlog_find_verify_log_record(log, start_blk, 831 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
831 &head_blk, 0)) == -1) { 832 if (error < 0)
833 goto bp_err;
834 if (error == 1) {
832 /* We hit the beginning of the log during our search */ 835 /* We hit the beginning of the log during our search */
833 start_blk = log_bbnum - (num_scan_bblks - head_blk); 836 start_blk = log_bbnum - (num_scan_bblks - head_blk);
834 new_blk = log_bbnum; 837 new_blk = log_bbnum;
835 ASSERT(start_blk <= INT_MAX && 838 ASSERT(start_blk <= INT_MAX &&
836 (xfs_daddr_t) log_bbnum-start_blk >= 0); 839 (xfs_daddr_t) log_bbnum-start_blk >= 0);
837 ASSERT(head_blk <= INT_MAX); 840 ASSERT(head_blk <= INT_MAX);
838 if ((error = xlog_find_verify_log_record(log, 841 error = xlog_find_verify_log_record(log, start_blk,
839 start_blk, &new_blk, 842 &new_blk, (int)head_blk);
840 (int)head_blk)) == -1) { 843 if (error == 1)
841 error = XFS_ERROR(EIO); 844 error = -EIO;
842 goto bp_err; 845 if (error)
843 } else if (error)
844 goto bp_err; 846 goto bp_err;
845 if (new_blk != log_bbnum) 847 if (new_blk != log_bbnum)
846 head_blk = new_blk; 848 head_blk = new_blk;
@@ -911,7 +913,7 @@ xlog_find_tail(
911 913
912 bp = xlog_get_bp(log, 1); 914 bp = xlog_get_bp(log, 1);
913 if (!bp) 915 if (!bp)
914 return ENOMEM; 916 return -ENOMEM;
915 if (*head_blk == 0) { /* special case */ 917 if (*head_blk == 0) { /* special case */
916 error = xlog_bread(log, 0, 1, bp, &offset); 918 error = xlog_bread(log, 0, 1, bp, &offset);
917 if (error) 919 if (error)
@@ -961,7 +963,7 @@ xlog_find_tail(
961 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); 963 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
962 xlog_put_bp(bp); 964 xlog_put_bp(bp);
963 ASSERT(0); 965 ASSERT(0);
964 return XFS_ERROR(EIO); 966 return -EIO;
965 } 967 }
966 968
967 /* find blk_no of tail of log */ 969 /* find blk_no of tail of log */
@@ -1092,8 +1094,8 @@ done:
1092 * 1094 *
1093 * Return: 1095 * Return:
1094 * 0 => the log is completely written to 1096 * 0 => the log is completely written to
1095 * -1 => use *blk_no as the first block of the log 1097 * 1 => use *blk_no as the first block of the log
1096 * >0 => error has occurred 1098 * <0 => error has occurred
1097 */ 1099 */
1098STATIC int 1100STATIC int
1099xlog_find_zeroed( 1101xlog_find_zeroed(
@@ -1112,7 +1114,7 @@ xlog_find_zeroed(
1112 /* check totally zeroed log */ 1114 /* check totally zeroed log */
1113 bp = xlog_get_bp(log, 1); 1115 bp = xlog_get_bp(log, 1);
1114 if (!bp) 1116 if (!bp)
1115 return ENOMEM; 1117 return -ENOMEM;
1116 error = xlog_bread(log, 0, 1, bp, &offset); 1118 error = xlog_bread(log, 0, 1, bp, &offset);
1117 if (error) 1119 if (error)
1118 goto bp_err; 1120 goto bp_err;
@@ -1121,7 +1123,7 @@ xlog_find_zeroed(
1121 if (first_cycle == 0) { /* completely zeroed log */ 1123 if (first_cycle == 0) { /* completely zeroed log */
1122 *blk_no = 0; 1124 *blk_no = 0;
1123 xlog_put_bp(bp); 1125 xlog_put_bp(bp);
1124 return -1; 1126 return 1;
1125 } 1127 }
1126 1128
1127 /* check partially zeroed log */ 1129 /* check partially zeroed log */
@@ -1141,7 +1143,7 @@ xlog_find_zeroed(
1141 */ 1143 */
1142 xfs_warn(log->l_mp, 1144 xfs_warn(log->l_mp,
1143 "Log inconsistent or not a log (last==0, first!=1)"); 1145 "Log inconsistent or not a log (last==0, first!=1)");
1144 error = XFS_ERROR(EINVAL); 1146 error = -EINVAL;
1145 goto bp_err; 1147 goto bp_err;
1146 } 1148 }
1147 1149
@@ -1179,19 +1181,18 @@ xlog_find_zeroed(
1179 * Potentially backup over partial log record write. We don't need 1181 * Potentially backup over partial log record write. We don't need
1180 * to search the end of the log because we know it is zero. 1182 * to search the end of the log because we know it is zero.
1181 */ 1183 */
1182 if ((error = xlog_find_verify_log_record(log, start_blk, 1184 error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0);
1183 &last_blk, 0)) == -1) { 1185 if (error == 1)
1184 error = XFS_ERROR(EIO); 1186 error = -EIO;
1185 goto bp_err; 1187 if (error)
1186 } else if (error) 1188 goto bp_err;
1187 goto bp_err;
1188 1189
1189 *blk_no = last_blk; 1190 *blk_no = last_blk;
1190bp_err: 1191bp_err:
1191 xlog_put_bp(bp); 1192 xlog_put_bp(bp);
1192 if (error) 1193 if (error)
1193 return error; 1194 return error;
1194 return -1; 1195 return 1;
1195} 1196}
1196 1197
1197/* 1198/*
@@ -1251,7 +1252,7 @@ xlog_write_log_records(
1251 while (!(bp = xlog_get_bp(log, bufblks))) { 1252 while (!(bp = xlog_get_bp(log, bufblks))) {
1252 bufblks >>= 1; 1253 bufblks >>= 1;
1253 if (bufblks < sectbb) 1254 if (bufblks < sectbb)
1254 return ENOMEM; 1255 return -ENOMEM;
1255 } 1256 }
1256 1257
1257 /* We may need to do a read at the start to fill in part of 1258 /* We may need to do a read at the start to fill in part of
@@ -1354,7 +1355,7 @@ xlog_clear_stale_blocks(
1354 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) { 1355 if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
1355 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)", 1356 XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
1356 XFS_ERRLEVEL_LOW, log->l_mp); 1357 XFS_ERRLEVEL_LOW, log->l_mp);
1357 return XFS_ERROR(EFSCORRUPTED); 1358 return -EFSCORRUPTED;
1358 } 1359 }
1359 tail_distance = tail_block + (log->l_logBBsize - head_block); 1360 tail_distance = tail_block + (log->l_logBBsize - head_block);
1360 } else { 1361 } else {
@@ -1366,7 +1367,7 @@ xlog_clear_stale_blocks(
1366 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){ 1367 if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
1367 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)", 1368 XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
1368 XFS_ERRLEVEL_LOW, log->l_mp); 1369 XFS_ERRLEVEL_LOW, log->l_mp);
1369 return XFS_ERROR(EFSCORRUPTED); 1370 return -EFSCORRUPTED;
1370 } 1371 }
1371 tail_distance = tail_block - head_block; 1372 tail_distance = tail_block - head_block;
1372 } 1373 }
@@ -1551,7 +1552,7 @@ xlog_recover_add_to_trans(
1551 xfs_warn(log->l_mp, "%s: bad header magic number", 1552 xfs_warn(log->l_mp, "%s: bad header magic number",
1552 __func__); 1553 __func__);
1553 ASSERT(0); 1554 ASSERT(0);
1554 return XFS_ERROR(EIO); 1555 return -EIO;
1555 } 1556 }
1556 if (len == sizeof(xfs_trans_header_t)) 1557 if (len == sizeof(xfs_trans_header_t))
1557 xlog_recover_add_item(&trans->r_itemq); 1558 xlog_recover_add_item(&trans->r_itemq);
@@ -1581,7 +1582,7 @@ xlog_recover_add_to_trans(
1581 in_f->ilf_size); 1582 in_f->ilf_size);
1582 ASSERT(0); 1583 ASSERT(0);
1583 kmem_free(ptr); 1584 kmem_free(ptr);
1584 return XFS_ERROR(EIO); 1585 return -EIO;
1585 } 1586 }
1586 1587
1587 item->ri_total = in_f->ilf_size; 1588 item->ri_total = in_f->ilf_size;
@@ -1702,7 +1703,7 @@ xlog_recover_reorder_trans(
1702 */ 1703 */
1703 if (!list_empty(&sort_list)) 1704 if (!list_empty(&sort_list))
1704 list_splice_init(&sort_list, &trans->r_itemq); 1705 list_splice_init(&sort_list, &trans->r_itemq);
1705 error = XFS_ERROR(EIO); 1706 error = -EIO;
1706 goto out; 1707 goto out;
1707 } 1708 }
1708 } 1709 }
@@ -1943,7 +1944,7 @@ xlog_recover_do_inode_buffer(
1943 item, bp); 1944 item, bp);
1944 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1945 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1945 XFS_ERRLEVEL_LOW, mp); 1946 XFS_ERRLEVEL_LOW, mp);
1946 return XFS_ERROR(EFSCORRUPTED); 1947 return -EFSCORRUPTED;
1947 } 1948 }
1948 1949
1949 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1950 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
@@ -2125,6 +2126,17 @@ xlog_recover_validate_buf_type(
2125 __uint16_t magic16; 2126 __uint16_t magic16;
2126 __uint16_t magicda; 2127 __uint16_t magicda;
2127 2128
2129 /*
2130 * We can only do post recovery validation on items on CRC enabled
2131 * fielsystems as we need to know when the buffer was written to be able
2132 * to determine if we should have replayed the item. If we replay old
2133 * metadata over a newer buffer, then it will enter a temporarily
2134 * inconsistent state resulting in verification failures. Hence for now
2135 * just avoid the verification stage for non-crc filesystems
2136 */
2137 if (!xfs_sb_version_hascrc(&mp->m_sb))
2138 return;
2139
2128 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); 2140 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
2129 magic16 = be16_to_cpu(*(__be16*)bp->b_addr); 2141 magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
2130 magicda = be16_to_cpu(info->magic); 2142 magicda = be16_to_cpu(info->magic);
@@ -2162,8 +2174,6 @@ xlog_recover_validate_buf_type(
2162 bp->b_ops = &xfs_agf_buf_ops; 2174 bp->b_ops = &xfs_agf_buf_ops;
2163 break; 2175 break;
2164 case XFS_BLFT_AGFL_BUF: 2176 case XFS_BLFT_AGFL_BUF:
2165 if (!xfs_sb_version_hascrc(&mp->m_sb))
2166 break;
2167 if (magic32 != XFS_AGFL_MAGIC) { 2177 if (magic32 != XFS_AGFL_MAGIC) {
2168 xfs_warn(mp, "Bad AGFL block magic!"); 2178 xfs_warn(mp, "Bad AGFL block magic!");
2169 ASSERT(0); 2179 ASSERT(0);
@@ -2196,10 +2206,6 @@ xlog_recover_validate_buf_type(
2196#endif 2206#endif
2197 break; 2207 break;
2198 case XFS_BLFT_DINO_BUF: 2208 case XFS_BLFT_DINO_BUF:
2199 /*
2200 * we get here with inode allocation buffers, not buffers that
2201 * track unlinked list changes.
2202 */
2203 if (magic16 != XFS_DINODE_MAGIC) { 2209 if (magic16 != XFS_DINODE_MAGIC) {
2204 xfs_warn(mp, "Bad INODE block magic!"); 2210 xfs_warn(mp, "Bad INODE block magic!");
2205 ASSERT(0); 2211 ASSERT(0);
@@ -2279,8 +2285,6 @@ xlog_recover_validate_buf_type(
2279 bp->b_ops = &xfs_attr3_leaf_buf_ops; 2285 bp->b_ops = &xfs_attr3_leaf_buf_ops;
2280 break; 2286 break;
2281 case XFS_BLFT_ATTR_RMT_BUF: 2287 case XFS_BLFT_ATTR_RMT_BUF:
2282 if (!xfs_sb_version_hascrc(&mp->m_sb))
2283 break;
2284 if (magic32 != XFS_ATTR3_RMT_MAGIC) { 2288 if (magic32 != XFS_ATTR3_RMT_MAGIC) {
2285 xfs_warn(mp, "Bad attr remote magic!"); 2289 xfs_warn(mp, "Bad attr remote magic!");
2286 ASSERT(0); 2290 ASSERT(0);
@@ -2387,16 +2391,7 @@ xlog_recover_do_reg_buffer(
2387 /* Shouldn't be any more regions */ 2391 /* Shouldn't be any more regions */
2388 ASSERT(i == item->ri_total); 2392 ASSERT(i == item->ri_total);
2389 2393
2390 /* 2394 xlog_recover_validate_buf_type(mp, bp, buf_f);
2391 * We can only do post recovery validation on items on CRC enabled
2392 * fielsystems as we need to know when the buffer was written to be able
2393 * to determine if we should have replayed the item. If we replay old
2394 * metadata over a newer buffer, then it will enter a temporarily
2395 * inconsistent state resulting in verification failures. Hence for now
2396 * just avoid the verification stage for non-crc filesystems
2397 */
2398 if (xfs_sb_version_hascrc(&mp->m_sb))
2399 xlog_recover_validate_buf_type(mp, bp, buf_f);
2400} 2395}
2401 2396
2402/* 2397/*
@@ -2404,8 +2399,11 @@ xlog_recover_do_reg_buffer(
2404 * Simple algorithm: if we have found a QUOTAOFF log item of the same type 2399 * Simple algorithm: if we have found a QUOTAOFF log item of the same type
2405 * (ie. USR or GRP), then just toss this buffer away; don't recover it. 2400 * (ie. USR or GRP), then just toss this buffer away; don't recover it.
2406 * Else, treat it as a regular buffer and do recovery. 2401 * Else, treat it as a regular buffer and do recovery.
2402 *
2403 * Return false if the buffer was tossed and true if we recovered the buffer to
2404 * indicate to the caller if the buffer needs writing.
2407 */ 2405 */
2408STATIC void 2406STATIC bool
2409xlog_recover_do_dquot_buffer( 2407xlog_recover_do_dquot_buffer(
2410 struct xfs_mount *mp, 2408 struct xfs_mount *mp,
2411 struct xlog *log, 2409 struct xlog *log,
@@ -2420,9 +2418,8 @@ xlog_recover_do_dquot_buffer(
2420 /* 2418 /*
2421 * Filesystems are required to send in quota flags at mount time. 2419 * Filesystems are required to send in quota flags at mount time.
2422 */ 2420 */
2423 if (mp->m_qflags == 0) { 2421 if (!mp->m_qflags)
2424 return; 2422 return false;
2425 }
2426 2423
2427 type = 0; 2424 type = 0;
2428 if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF) 2425 if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
@@ -2435,9 +2432,10 @@ xlog_recover_do_dquot_buffer(
2435 * This type of quotas was turned off, so ignore this buffer 2432 * This type of quotas was turned off, so ignore this buffer
2436 */ 2433 */
2437 if (log->l_quotaoffs_flag & type) 2434 if (log->l_quotaoffs_flag & type)
2438 return; 2435 return false;
2439 2436
2440 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2437 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2438 return true;
2441} 2439}
2442 2440
2443/* 2441/*
@@ -2496,7 +2494,7 @@ xlog_recover_buffer_pass2(
2496 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, 2494 bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
2497 buf_flags, NULL); 2495 buf_flags, NULL);
2498 if (!bp) 2496 if (!bp)
2499 return XFS_ERROR(ENOMEM); 2497 return -ENOMEM;
2500 error = bp->b_error; 2498 error = bp->b_error;
2501 if (error) { 2499 if (error) {
2502 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); 2500 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
@@ -2504,23 +2502,44 @@ xlog_recover_buffer_pass2(
2504 } 2502 }
2505 2503
2506 /* 2504 /*
2507 * recover the buffer only if we get an LSN from it and it's less than 2505 * Recover the buffer only if we get an LSN from it and it's less than
2508 * the lsn of the transaction we are replaying. 2506 * the lsn of the transaction we are replaying.
2507 *
2508 * Note that we have to be extremely careful of readahead here.
2509 * Readahead does not attach verfiers to the buffers so if we don't
2510 * actually do any replay after readahead because of the LSN we found
2511 * in the buffer if more recent than that current transaction then we
2512 * need to attach the verifier directly. Failure to do so can lead to
2513 * future recovery actions (e.g. EFI and unlinked list recovery) can
2514 * operate on the buffers and they won't get the verifier attached. This
2515 * can lead to blocks on disk having the correct content but a stale
2516 * CRC.
2517 *
2518 * It is safe to assume these clean buffers are currently up to date.
2519 * If the buffer is dirtied by a later transaction being replayed, then
2520 * the verifier will be reset to match whatever recover turns that
2521 * buffer into.
2509 */ 2522 */
2510 lsn = xlog_recover_get_buf_lsn(mp, bp); 2523 lsn = xlog_recover_get_buf_lsn(mp, bp);
2511 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) 2524 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
2525 xlog_recover_validate_buf_type(mp, bp, buf_f);
2512 goto out_release; 2526 goto out_release;
2527 }
2513 2528
2514 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 2529 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
2515 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2530 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2531 if (error)
2532 goto out_release;
2516 } else if (buf_f->blf_flags & 2533 } else if (buf_f->blf_flags &
2517 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 2534 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
2518 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2535 bool dirty;
2536
2537 dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2538 if (!dirty)
2539 goto out_release;
2519 } else { 2540 } else {
2520 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2541 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2521 } 2542 }
2522 if (error)
2523 goto out_release;
2524 2543
2525 /* 2544 /*
2526 * Perform delayed write on the buffer. Asynchronous writes will be 2545 * Perform delayed write on the buffer. Asynchronous writes will be
@@ -2598,7 +2617,7 @@ xfs_recover_inode_owner_change(
2598 2617
2599 ip = xfs_inode_alloc(mp, in_f->ilf_ino); 2618 ip = xfs_inode_alloc(mp, in_f->ilf_ino);
2600 if (!ip) 2619 if (!ip)
2601 return ENOMEM; 2620 return -ENOMEM;
2602 2621
2603 /* instantiate the inode */ 2622 /* instantiate the inode */
2604 xfs_dinode_from_disk(&ip->i_d, dip); 2623 xfs_dinode_from_disk(&ip->i_d, dip);
@@ -2676,7 +2695,7 @@ xlog_recover_inode_pass2(
2676 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0, 2695 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
2677 &xfs_inode_buf_ops); 2696 &xfs_inode_buf_ops);
2678 if (!bp) { 2697 if (!bp) {
2679 error = ENOMEM; 2698 error = -ENOMEM;
2680 goto error; 2699 goto error;
2681 } 2700 }
2682 error = bp->b_error; 2701 error = bp->b_error;
@@ -2697,7 +2716,7 @@ xlog_recover_inode_pass2(
2697 __func__, dip, bp, in_f->ilf_ino); 2716 __func__, dip, bp, in_f->ilf_ino);
2698 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 2717 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2699 XFS_ERRLEVEL_LOW, mp); 2718 XFS_ERRLEVEL_LOW, mp);
2700 error = EFSCORRUPTED; 2719 error = -EFSCORRUPTED;
2701 goto out_release; 2720 goto out_release;
2702 } 2721 }
2703 dicp = item->ri_buf[1].i_addr; 2722 dicp = item->ri_buf[1].i_addr;
@@ -2707,7 +2726,7 @@ xlog_recover_inode_pass2(
2707 __func__, item, in_f->ilf_ino); 2726 __func__, item, in_f->ilf_ino);
2708 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 2727 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2709 XFS_ERRLEVEL_LOW, mp); 2728 XFS_ERRLEVEL_LOW, mp);
2710 error = EFSCORRUPTED; 2729 error = -EFSCORRUPTED;
2711 goto out_release; 2730 goto out_release;
2712 } 2731 }
2713 2732
@@ -2764,7 +2783,7 @@ xlog_recover_inode_pass2(
2764 "%s: Bad regular inode log record, rec ptr 0x%p, " 2783 "%s: Bad regular inode log record, rec ptr 0x%p, "
2765 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2784 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2766 __func__, item, dip, bp, in_f->ilf_ino); 2785 __func__, item, dip, bp, in_f->ilf_ino);
2767 error = EFSCORRUPTED; 2786 error = -EFSCORRUPTED;
2768 goto out_release; 2787 goto out_release;
2769 } 2788 }
2770 } else if (unlikely(S_ISDIR(dicp->di_mode))) { 2789 } else if (unlikely(S_ISDIR(dicp->di_mode))) {
@@ -2777,7 +2796,7 @@ xlog_recover_inode_pass2(
2777 "%s: Bad dir inode log record, rec ptr 0x%p, " 2796 "%s: Bad dir inode log record, rec ptr 0x%p, "
2778 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2797 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2779 __func__, item, dip, bp, in_f->ilf_ino); 2798 __func__, item, dip, bp, in_f->ilf_ino);
2780 error = EFSCORRUPTED; 2799 error = -EFSCORRUPTED;
2781 goto out_release; 2800 goto out_release;
2782 } 2801 }
2783 } 2802 }
@@ -2790,7 +2809,7 @@ xlog_recover_inode_pass2(
2790 __func__, item, dip, bp, in_f->ilf_ino, 2809 __func__, item, dip, bp, in_f->ilf_ino,
2791 dicp->di_nextents + dicp->di_anextents, 2810 dicp->di_nextents + dicp->di_anextents,
2792 dicp->di_nblocks); 2811 dicp->di_nblocks);
2793 error = EFSCORRUPTED; 2812 error = -EFSCORRUPTED;
2794 goto out_release; 2813 goto out_release;
2795 } 2814 }
2796 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { 2815 if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
@@ -2800,7 +2819,7 @@ xlog_recover_inode_pass2(
2800 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " 2819 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2801 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, 2820 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
2802 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); 2821 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2803 error = EFSCORRUPTED; 2822 error = -EFSCORRUPTED;
2804 goto out_release; 2823 goto out_release;
2805 } 2824 }
2806 isize = xfs_icdinode_size(dicp->di_version); 2825 isize = xfs_icdinode_size(dicp->di_version);
@@ -2810,7 +2829,7 @@ xlog_recover_inode_pass2(
2810 xfs_alert(mp, 2829 xfs_alert(mp,
2811 "%s: Bad inode log record length %d, rec ptr 0x%p", 2830 "%s: Bad inode log record length %d, rec ptr 0x%p",
2812 __func__, item->ri_buf[1].i_len, item); 2831 __func__, item->ri_buf[1].i_len, item);
2813 error = EFSCORRUPTED; 2832 error = -EFSCORRUPTED;
2814 goto out_release; 2833 goto out_release;
2815 } 2834 }
2816 2835
@@ -2898,7 +2917,7 @@ xlog_recover_inode_pass2(
2898 default: 2917 default:
2899 xfs_warn(log->l_mp, "%s: Invalid flag", __func__); 2918 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
2900 ASSERT(0); 2919 ASSERT(0);
2901 error = EIO; 2920 error = -EIO;
2902 goto out_release; 2921 goto out_release;
2903 } 2922 }
2904 } 2923 }
@@ -2919,7 +2938,7 @@ out_release:
2919error: 2938error:
2920 if (need_free) 2939 if (need_free)
2921 kmem_free(in_f); 2940 kmem_free(in_f);
2922 return XFS_ERROR(error); 2941 return error;
2923} 2942}
2924 2943
2925/* 2944/*
@@ -2946,7 +2965,7 @@ xlog_recover_quotaoff_pass1(
2946 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT) 2965 if (qoff_f->qf_flags & XFS_GQUOTA_ACCT)
2947 log->l_quotaoffs_flag |= XFS_DQ_GROUP; 2966 log->l_quotaoffs_flag |= XFS_DQ_GROUP;
2948 2967
2949 return (0); 2968 return 0;
2950} 2969}
2951 2970
2952/* 2971/*
@@ -2971,17 +2990,17 @@ xlog_recover_dquot_pass2(
2971 * Filesystems are required to send in quota flags at mount time. 2990 * Filesystems are required to send in quota flags at mount time.
2972 */ 2991 */
2973 if (mp->m_qflags == 0) 2992 if (mp->m_qflags == 0)
2974 return (0); 2993 return 0;
2975 2994
2976 recddq = item->ri_buf[1].i_addr; 2995 recddq = item->ri_buf[1].i_addr;
2977 if (recddq == NULL) { 2996 if (recddq == NULL) {
2978 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); 2997 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
2979 return XFS_ERROR(EIO); 2998 return -EIO;
2980 } 2999 }
2981 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { 3000 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
2982 xfs_alert(log->l_mp, "dquot too small (%d) in %s.", 3001 xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
2983 item->ri_buf[1].i_len, __func__); 3002 item->ri_buf[1].i_len, __func__);
2984 return XFS_ERROR(EIO); 3003 return -EIO;
2985 } 3004 }
2986 3005
2987 /* 3006 /*
@@ -2990,7 +3009,7 @@ xlog_recover_dquot_pass2(
2990 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); 3009 type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
2991 ASSERT(type); 3010 ASSERT(type);
2992 if (log->l_quotaoffs_flag & type) 3011 if (log->l_quotaoffs_flag & type)
2993 return (0); 3012 return 0;
2994 3013
2995 /* 3014 /*
2996 * At this point we know that quota was _not_ turned off. 3015 * At this point we know that quota was _not_ turned off.
@@ -3007,12 +3026,19 @@ xlog_recover_dquot_pass2(
3007 error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 3026 error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
3008 "xlog_recover_dquot_pass2 (log copy)"); 3027 "xlog_recover_dquot_pass2 (log copy)");
3009 if (error) 3028 if (error)
3010 return XFS_ERROR(EIO); 3029 return -EIO;
3011 ASSERT(dq_f->qlf_len == 1); 3030 ASSERT(dq_f->qlf_len == 1);
3012 3031
3032 /*
3033 * At this point we are assuming that the dquots have been allocated
3034 * and hence the buffer has valid dquots stamped in it. It should,
3035 * therefore, pass verifier validation. If the dquot is bad, then the
3036 * we'll return an error here, so we don't need to specifically check
3037 * the dquot in the buffer after the verifier has run.
3038 */
3013 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno, 3039 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dq_f->qlf_blkno,
3014 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp, 3040 XFS_FSB_TO_BB(mp, dq_f->qlf_len), 0, &bp,
3015 NULL); 3041 &xfs_dquot_buf_ops);
3016 if (error) 3042 if (error)
3017 return error; 3043 return error;
3018 3044
@@ -3020,18 +3046,6 @@ xlog_recover_dquot_pass2(
3020 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); 3046 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset);
3021 3047
3022 /* 3048 /*
3023 * At least the magic num portion should be on disk because this
3024 * was among a chunk of dquots created earlier, and we did some
3025 * minimal initialization then.
3026 */
3027 error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
3028 "xlog_recover_dquot_pass2");
3029 if (error) {
3030 xfs_buf_relse(bp);
3031 return XFS_ERROR(EIO);
3032 }
3033
3034 /*
3035 * If the dquot has an LSN in it, recover the dquot only if it's less 3049 * If the dquot has an LSN in it, recover the dquot only if it's less
3036 * than the lsn of the transaction we are replaying. 3050 * than the lsn of the transaction we are replaying.
3037 */ 3051 */
@@ -3178,38 +3192,38 @@ xlog_recover_do_icreate_pass2(
3178 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; 3192 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
3179 if (icl->icl_type != XFS_LI_ICREATE) { 3193 if (icl->icl_type != XFS_LI_ICREATE) {
3180 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); 3194 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
3181 return EINVAL; 3195 return -EINVAL;
3182 } 3196 }
3183 3197
3184 if (icl->icl_size != 1) { 3198 if (icl->icl_size != 1) {
3185 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); 3199 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
3186 return EINVAL; 3200 return -EINVAL;
3187 } 3201 }
3188 3202
3189 agno = be32_to_cpu(icl->icl_ag); 3203 agno = be32_to_cpu(icl->icl_ag);
3190 if (agno >= mp->m_sb.sb_agcount) { 3204 if (agno >= mp->m_sb.sb_agcount) {
3191 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); 3205 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
3192 return EINVAL; 3206 return -EINVAL;
3193 } 3207 }
3194 agbno = be32_to_cpu(icl->icl_agbno); 3208 agbno = be32_to_cpu(icl->icl_agbno);
3195 if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { 3209 if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
3196 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); 3210 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
3197 return EINVAL; 3211 return -EINVAL;
3198 } 3212 }
3199 isize = be32_to_cpu(icl->icl_isize); 3213 isize = be32_to_cpu(icl->icl_isize);
3200 if (isize != mp->m_sb.sb_inodesize) { 3214 if (isize != mp->m_sb.sb_inodesize) {
3201 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); 3215 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
3202 return EINVAL; 3216 return -EINVAL;
3203 } 3217 }
3204 count = be32_to_cpu(icl->icl_count); 3218 count = be32_to_cpu(icl->icl_count);
3205 if (!count) { 3219 if (!count) {
3206 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); 3220 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
3207 return EINVAL; 3221 return -EINVAL;
3208 } 3222 }
3209 length = be32_to_cpu(icl->icl_length); 3223 length = be32_to_cpu(icl->icl_length);
3210 if (!length || length >= mp->m_sb.sb_agblocks) { 3224 if (!length || length >= mp->m_sb.sb_agblocks) {
3211 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); 3225 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
3212 return EINVAL; 3226 return -EINVAL;
3213 } 3227 }
3214 3228
3215 /* existing allocation is fixed value */ 3229 /* existing allocation is fixed value */
@@ -3218,7 +3232,7 @@ xlog_recover_do_icreate_pass2(
3218 if (count != mp->m_ialloc_inos || 3232 if (count != mp->m_ialloc_inos ||
3219 length != mp->m_ialloc_blks) { 3233 length != mp->m_ialloc_blks) {
3220 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); 3234 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
3221 return EINVAL; 3235 return -EINVAL;
3222 } 3236 }
3223 3237
3224 /* 3238 /*
@@ -3389,7 +3403,7 @@ xlog_recover_commit_pass1(
3389 xfs_warn(log->l_mp, "%s: invalid item type (%d)", 3403 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
3390 __func__, ITEM_TYPE(item)); 3404 __func__, ITEM_TYPE(item));
3391 ASSERT(0); 3405 ASSERT(0);
3392 return XFS_ERROR(EIO); 3406 return -EIO;
3393 } 3407 }
3394} 3408}
3395 3409
@@ -3425,7 +3439,7 @@ xlog_recover_commit_pass2(
3425 xfs_warn(log->l_mp, "%s: invalid item type (%d)", 3439 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
3426 __func__, ITEM_TYPE(item)); 3440 __func__, ITEM_TYPE(item));
3427 ASSERT(0); 3441 ASSERT(0);
3428 return XFS_ERROR(EIO); 3442 return -EIO;
3429 } 3443 }
3430} 3444}
3431 3445
@@ -3560,7 +3574,7 @@ xlog_recover_process_data(
3560 3574
3561 /* check the log format matches our own - else we can't recover */ 3575 /* check the log format matches our own - else we can't recover */
3562 if (xlog_header_check_recover(log->l_mp, rhead)) 3576 if (xlog_header_check_recover(log->l_mp, rhead))
3563 return (XFS_ERROR(EIO)); 3577 return -EIO;
3564 3578
3565 while ((dp < lp) && num_logops) { 3579 while ((dp < lp) && num_logops) {
3566 ASSERT(dp + sizeof(xlog_op_header_t) <= lp); 3580 ASSERT(dp + sizeof(xlog_op_header_t) <= lp);
@@ -3571,7 +3585,7 @@ xlog_recover_process_data(
3571 xfs_warn(log->l_mp, "%s: bad clientid 0x%x", 3585 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
3572 __func__, ohead->oh_clientid); 3586 __func__, ohead->oh_clientid);
3573 ASSERT(0); 3587 ASSERT(0);
3574 return (XFS_ERROR(EIO)); 3588 return -EIO;
3575 } 3589 }
3576 tid = be32_to_cpu(ohead->oh_tid); 3590 tid = be32_to_cpu(ohead->oh_tid);
3577 hash = XLOG_RHASH(tid); 3591 hash = XLOG_RHASH(tid);
@@ -3585,7 +3599,7 @@ xlog_recover_process_data(
3585 xfs_warn(log->l_mp, "%s: bad length 0x%x", 3599 xfs_warn(log->l_mp, "%s: bad length 0x%x",
3586 __func__, be32_to_cpu(ohead->oh_len)); 3600 __func__, be32_to_cpu(ohead->oh_len));
3587 WARN_ON(1); 3601 WARN_ON(1);
3588 return (XFS_ERROR(EIO)); 3602 return -EIO;
3589 } 3603 }
3590 flags = ohead->oh_flags & ~XLOG_END_TRANS; 3604 flags = ohead->oh_flags & ~XLOG_END_TRANS;
3591 if (flags & XLOG_WAS_CONT_TRANS) 3605 if (flags & XLOG_WAS_CONT_TRANS)
@@ -3607,7 +3621,7 @@ xlog_recover_process_data(
3607 xfs_warn(log->l_mp, "%s: bad transaction", 3621 xfs_warn(log->l_mp, "%s: bad transaction",
3608 __func__); 3622 __func__);
3609 ASSERT(0); 3623 ASSERT(0);
3610 error = XFS_ERROR(EIO); 3624 error = -EIO;
3611 break; 3625 break;
3612 case 0: 3626 case 0:
3613 case XLOG_CONTINUE_TRANS: 3627 case XLOG_CONTINUE_TRANS:
@@ -3618,7 +3632,7 @@ xlog_recover_process_data(
3618 xfs_warn(log->l_mp, "%s: bad flag 0x%x", 3632 xfs_warn(log->l_mp, "%s: bad flag 0x%x",
3619 __func__, flags); 3633 __func__, flags);
3620 ASSERT(0); 3634 ASSERT(0);
3621 error = XFS_ERROR(EIO); 3635 error = -EIO;
3622 break; 3636 break;
3623 } 3637 }
3624 if (error) { 3638 if (error) {
@@ -3669,7 +3683,7 @@ xlog_recover_process_efi(
3669 */ 3683 */
3670 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 3684 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
3671 xfs_efi_release(efip, efip->efi_format.efi_nextents); 3685 xfs_efi_release(efip, efip->efi_format.efi_nextents);
3672 return XFS_ERROR(EIO); 3686 return -EIO;
3673 } 3687 }
3674 } 3688 }
3675 3689
@@ -3969,7 +3983,7 @@ xlog_unpack_data_crc(
3969 * CRC protection by punting an error back up the stack. 3983 * CRC protection by punting an error back up the stack.
3970 */ 3984 */
3971 if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) 3985 if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
3972 return EFSCORRUPTED; 3986 return -EFSCORRUPTED;
3973 } 3987 }
3974 3988
3975 return 0; 3989 return 0;
@@ -4018,14 +4032,14 @@ xlog_valid_rec_header(
4018 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) { 4032 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
4019 XFS_ERROR_REPORT("xlog_valid_rec_header(1)", 4033 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
4020 XFS_ERRLEVEL_LOW, log->l_mp); 4034 XFS_ERRLEVEL_LOW, log->l_mp);
4021 return XFS_ERROR(EFSCORRUPTED); 4035 return -EFSCORRUPTED;
4022 } 4036 }
4023 if (unlikely( 4037 if (unlikely(
4024 (!rhead->h_version || 4038 (!rhead->h_version ||
4025 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 4039 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
4026 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", 4040 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
4027 __func__, be32_to_cpu(rhead->h_version)); 4041 __func__, be32_to_cpu(rhead->h_version));
4028 return XFS_ERROR(EIO); 4042 return -EIO;
4029 } 4043 }
4030 4044
4031 /* LR body must have data or it wouldn't have been written */ 4045 /* LR body must have data or it wouldn't have been written */
@@ -4033,12 +4047,12 @@ xlog_valid_rec_header(
4033 if (unlikely( hlen <= 0 || hlen > INT_MAX )) { 4047 if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
4034 XFS_ERROR_REPORT("xlog_valid_rec_header(2)", 4048 XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
4035 XFS_ERRLEVEL_LOW, log->l_mp); 4049 XFS_ERRLEVEL_LOW, log->l_mp);
4036 return XFS_ERROR(EFSCORRUPTED); 4050 return -EFSCORRUPTED;
4037 } 4051 }
4038 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) { 4052 if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
4039 XFS_ERROR_REPORT("xlog_valid_rec_header(3)", 4053 XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
4040 XFS_ERRLEVEL_LOW, log->l_mp); 4054 XFS_ERRLEVEL_LOW, log->l_mp);
4041 return XFS_ERROR(EFSCORRUPTED); 4055 return -EFSCORRUPTED;
4042 } 4056 }
4043 return 0; 4057 return 0;
4044} 4058}
@@ -4081,7 +4095,7 @@ xlog_do_recovery_pass(
4081 */ 4095 */
4082 hbp = xlog_get_bp(log, 1); 4096 hbp = xlog_get_bp(log, 1);
4083 if (!hbp) 4097 if (!hbp)
4084 return ENOMEM; 4098 return -ENOMEM;
4085 4099
4086 error = xlog_bread(log, tail_blk, 1, hbp, &offset); 4100 error = xlog_bread(log, tail_blk, 1, hbp, &offset);
4087 if (error) 4101 if (error)
@@ -4110,11 +4124,11 @@ xlog_do_recovery_pass(
4110 } 4124 }
4111 4125
4112 if (!hbp) 4126 if (!hbp)
4113 return ENOMEM; 4127 return -ENOMEM;
4114 dbp = xlog_get_bp(log, BTOBB(h_size)); 4128 dbp = xlog_get_bp(log, BTOBB(h_size));
4115 if (!dbp) { 4129 if (!dbp) {
4116 xlog_put_bp(hbp); 4130 xlog_put_bp(hbp);
4117 return ENOMEM; 4131 return -ENOMEM;
4118 } 4132 }
4119 4133
4120 memset(rhash, 0, sizeof(rhash)); 4134 memset(rhash, 0, sizeof(rhash));
@@ -4388,7 +4402,7 @@ xlog_do_recover(
4388 * If IO errors happened during recovery, bail out. 4402 * If IO errors happened during recovery, bail out.
4389 */ 4403 */
4390 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 4404 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
4391 return (EIO); 4405 return -EIO;
4392 } 4406 }
4393 4407
4394 /* 4408 /*
@@ -4415,7 +4429,7 @@ xlog_do_recover(
4415 4429
4416 if (XFS_FORCED_SHUTDOWN(log->l_mp)) { 4430 if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
4417 xfs_buf_relse(bp); 4431 xfs_buf_relse(bp);
4418 return XFS_ERROR(EIO); 4432 return -EIO;
4419 } 4433 }
4420 4434
4421 xfs_buf_iorequest(bp); 4435 xfs_buf_iorequest(bp);
@@ -4492,7 +4506,7 @@ xlog_recover(
4492"Please recover the log on a kernel that supports the unknown features.", 4506"Please recover the log on a kernel that supports the unknown features.",
4493 (log->l_mp->m_sb.sb_features_log_incompat & 4507 (log->l_mp->m_sb.sb_features_log_incompat &
4494 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); 4508 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
4495 return EINVAL; 4509 return -EINVAL;
4496 } 4510 }
4497 4511
4498 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 4512 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 3507cd0ec400..fbf0384a466f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -42,6 +42,7 @@
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43#include "xfs_icache.h" 43#include "xfs_icache.h"
44#include "xfs_dinode.h" 44#include "xfs_dinode.h"
45#include "xfs_sysfs.h"
45 46
46 47
47#ifdef HAVE_PERCPU_SB 48#ifdef HAVE_PERCPU_SB
@@ -60,6 +61,8 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
60static int xfs_uuid_table_size; 61static int xfs_uuid_table_size;
61static uuid_t *xfs_uuid_table; 62static uuid_t *xfs_uuid_table;
62 63
64extern struct kset *xfs_kset;
65
63/* 66/*
64 * See if the UUID is unique among mounted XFS filesystems. 67 * See if the UUID is unique among mounted XFS filesystems.
65 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 68 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -76,7 +79,7 @@ xfs_uuid_mount(
76 79
77 if (uuid_is_nil(uuid)) { 80 if (uuid_is_nil(uuid)) {
78 xfs_warn(mp, "Filesystem has nil UUID - can't mount"); 81 xfs_warn(mp, "Filesystem has nil UUID - can't mount");
79 return XFS_ERROR(EINVAL); 82 return -EINVAL;
80 } 83 }
81 84
82 mutex_lock(&xfs_uuid_table_mutex); 85 mutex_lock(&xfs_uuid_table_mutex);
@@ -104,7 +107,7 @@ xfs_uuid_mount(
104 out_duplicate: 107 out_duplicate:
105 mutex_unlock(&xfs_uuid_table_mutex); 108 mutex_unlock(&xfs_uuid_table_mutex);
106 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); 109 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid);
107 return XFS_ERROR(EINVAL); 110 return -EINVAL;
108} 111}
109 112
110STATIC void 113STATIC void
@@ -173,13 +176,9 @@ xfs_sb_validate_fsb_count(
173 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 176 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
174 ASSERT(sbp->sb_blocklog >= BBSHIFT); 177 ASSERT(sbp->sb_blocklog >= BBSHIFT);
175 178
176#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 179 /* Limited by ULONG_MAX of page cache index */
177 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 180 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
178 return EFBIG; 181 return -EFBIG;
179#else /* Limited by UINT_MAX of sectors */
180 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
181 return EFBIG;
182#endif
183 return 0; 182 return 0;
184} 183}
185 184
@@ -250,9 +249,9 @@ xfs_initialize_perag(
250 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 249 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
251 250
252 if (mp->m_flags & XFS_MOUNT_32BITINODES) 251 if (mp->m_flags & XFS_MOUNT_32BITINODES)
253 index = xfs_set_inode32(mp); 252 index = xfs_set_inode32(mp, agcount);
254 else 253 else
255 index = xfs_set_inode64(mp); 254 index = xfs_set_inode64(mp, agcount);
256 255
257 if (maxagi) 256 if (maxagi)
258 *maxagi = index; 257 *maxagi = index;
@@ -308,15 +307,15 @@ reread:
308 if (!bp) { 307 if (!bp) {
309 if (loud) 308 if (loud)
310 xfs_warn(mp, "SB buffer read failed"); 309 xfs_warn(mp, "SB buffer read failed");
311 return EIO; 310 return -EIO;
312 } 311 }
313 if (bp->b_error) { 312 if (bp->b_error) {
314 error = bp->b_error; 313 error = bp->b_error;
315 if (loud) 314 if (loud)
316 xfs_warn(mp, "SB validate failed with error %d.", error); 315 xfs_warn(mp, "SB validate failed with error %d.", error);
317 /* bad CRC means corrupted metadata */ 316 /* bad CRC means corrupted metadata */
318 if (error == EFSBADCRC) 317 if (error == -EFSBADCRC)
319 error = EFSCORRUPTED; 318 error = -EFSCORRUPTED;
320 goto release_buf; 319 goto release_buf;
321 } 320 }
322 321
@@ -324,7 +323,6 @@ reread:
324 * Initialize the mount structure from the superblock. 323 * Initialize the mount structure from the superblock.
325 */ 324 */
326 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 325 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
327 xfs_sb_quota_from_disk(sbp);
328 326
329 /* 327 /*
330 * If we haven't validated the superblock, do so now before we try 328 * If we haven't validated the superblock, do so now before we try
@@ -333,7 +331,7 @@ reread:
333 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 331 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
334 if (loud) 332 if (loud)
335 xfs_warn(mp, "Invalid superblock magic number"); 333 xfs_warn(mp, "Invalid superblock magic number");
336 error = EINVAL; 334 error = -EINVAL;
337 goto release_buf; 335 goto release_buf;
338 } 336 }
339 337
@@ -344,7 +342,7 @@ reread:
344 if (loud) 342 if (loud)
345 xfs_warn(mp, "device supports %u byte sectors (not %u)", 343 xfs_warn(mp, "device supports %u byte sectors (not %u)",
346 sector_size, sbp->sb_sectsize); 344 sector_size, sbp->sb_sectsize);
347 error = ENOSYS; 345 error = -ENOSYS;
348 goto release_buf; 346 goto release_buf;
349 } 347 }
350 348
@@ -392,7 +390,7 @@ xfs_update_alignment(xfs_mount_t *mp)
392 xfs_warn(mp, 390 xfs_warn(mp,
393 "alignment check failed: sunit/swidth vs. blocksize(%d)", 391 "alignment check failed: sunit/swidth vs. blocksize(%d)",
394 sbp->sb_blocksize); 392 sbp->sb_blocksize);
395 return XFS_ERROR(EINVAL); 393 return -EINVAL;
396 } else { 394 } else {
397 /* 395 /*
398 * Convert the stripe unit and width to FSBs. 396 * Convert the stripe unit and width to FSBs.
@@ -402,14 +400,14 @@ xfs_update_alignment(xfs_mount_t *mp)
402 xfs_warn(mp, 400 xfs_warn(mp,
403 "alignment check failed: sunit/swidth vs. agsize(%d)", 401 "alignment check failed: sunit/swidth vs. agsize(%d)",
404 sbp->sb_agblocks); 402 sbp->sb_agblocks);
405 return XFS_ERROR(EINVAL); 403 return -EINVAL;
406 } else if (mp->m_dalign) { 404 } else if (mp->m_dalign) {
407 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 405 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
408 } else { 406 } else {
409 xfs_warn(mp, 407 xfs_warn(mp,
410 "alignment check failed: sunit(%d) less than bsize(%d)", 408 "alignment check failed: sunit(%d) less than bsize(%d)",
411 mp->m_dalign, sbp->sb_blocksize); 409 mp->m_dalign, sbp->sb_blocksize);
412 return XFS_ERROR(EINVAL); 410 return -EINVAL;
413 } 411 }
414 } 412 }
415 413
@@ -429,7 +427,7 @@ xfs_update_alignment(xfs_mount_t *mp)
429 } else { 427 } else {
430 xfs_warn(mp, 428 xfs_warn(mp,
431 "cannot change alignment: superblock does not support data alignment"); 429 "cannot change alignment: superblock does not support data alignment");
432 return XFS_ERROR(EINVAL); 430 return -EINVAL;
433 } 431 }
434 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 432 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
435 xfs_sb_version_hasdalign(&mp->m_sb)) { 433 xfs_sb_version_hasdalign(&mp->m_sb)) {
@@ -556,14 +554,14 @@ xfs_check_sizes(xfs_mount_t *mp)
556 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 554 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
557 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 555 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
558 xfs_warn(mp, "filesystem size mismatch detected"); 556 xfs_warn(mp, "filesystem size mismatch detected");
559 return XFS_ERROR(EFBIG); 557 return -EFBIG;
560 } 558 }
561 bp = xfs_buf_read_uncached(mp->m_ddev_targp, 559 bp = xfs_buf_read_uncached(mp->m_ddev_targp,
562 d - XFS_FSS_TO_BB(mp, 1), 560 d - XFS_FSS_TO_BB(mp, 1),
563 XFS_FSS_TO_BB(mp, 1), 0, NULL); 561 XFS_FSS_TO_BB(mp, 1), 0, NULL);
564 if (!bp) { 562 if (!bp) {
565 xfs_warn(mp, "last sector read failed"); 563 xfs_warn(mp, "last sector read failed");
566 return EIO; 564 return -EIO;
567 } 565 }
568 xfs_buf_relse(bp); 566 xfs_buf_relse(bp);
569 567
@@ -571,14 +569,14 @@ xfs_check_sizes(xfs_mount_t *mp)
571 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 569 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
572 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 570 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
573 xfs_warn(mp, "log size mismatch detected"); 571 xfs_warn(mp, "log size mismatch detected");
574 return XFS_ERROR(EFBIG); 572 return -EFBIG;
575 } 573 }
576 bp = xfs_buf_read_uncached(mp->m_logdev_targp, 574 bp = xfs_buf_read_uncached(mp->m_logdev_targp,
577 d - XFS_FSB_TO_BB(mp, 1), 575 d - XFS_FSB_TO_BB(mp, 1),
578 XFS_FSB_TO_BB(mp, 1), 0, NULL); 576 XFS_FSB_TO_BB(mp, 1), 0, NULL);
579 if (!bp) { 577 if (!bp) {
580 xfs_warn(mp, "log device read failed"); 578 xfs_warn(mp, "log device read failed");
581 return EIO; 579 return -EIO;
582 } 580 }
583 xfs_buf_relse(bp); 581 xfs_buf_relse(bp);
584 } 582 }
@@ -731,10 +729,15 @@ xfs_mountfs(
731 729
732 xfs_set_maxicount(mp); 730 xfs_set_maxicount(mp);
733 731
734 error = xfs_uuid_mount(mp); 732 mp->m_kobj.kobject.kset = xfs_kset;
733 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
735 if (error) 734 if (error)
736 goto out; 735 goto out;
737 736
737 error = xfs_uuid_mount(mp);
738 if (error)
739 goto out_remove_sysfs;
740
738 /* 741 /*
739 * Set the minimum read and write sizes 742 * Set the minimum read and write sizes
740 */ 743 */
@@ -816,7 +819,7 @@ xfs_mountfs(
816 if (!sbp->sb_logblocks) { 819 if (!sbp->sb_logblocks) {
817 xfs_warn(mp, "no log defined"); 820 xfs_warn(mp, "no log defined");
818 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); 821 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
819 error = XFS_ERROR(EFSCORRUPTED); 822 error = -EFSCORRUPTED;
820 goto out_free_perag; 823 goto out_free_perag;
821 } 824 }
822 825
@@ -855,7 +858,7 @@ xfs_mountfs(
855 !mp->m_sb.sb_inprogress) { 858 !mp->m_sb.sb_inprogress) {
856 error = xfs_initialize_perag_data(mp, sbp->sb_agcount); 859 error = xfs_initialize_perag_data(mp, sbp->sb_agcount);
857 if (error) 860 if (error)
858 goto out_fail_wait; 861 goto out_log_dealloc;
859 } 862 }
860 863
861 /* 864 /*
@@ -876,7 +879,7 @@ xfs_mountfs(
876 xfs_iunlock(rip, XFS_ILOCK_EXCL); 879 xfs_iunlock(rip, XFS_ILOCK_EXCL);
877 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 880 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
878 mp); 881 mp);
879 error = XFS_ERROR(EFSCORRUPTED); 882 error = -EFSCORRUPTED;
880 goto out_rele_rip; 883 goto out_rele_rip;
881 } 884 }
882 mp->m_rootip = rip; /* save it */ 885 mp->m_rootip = rip; /* save it */
@@ -927,7 +930,7 @@ xfs_mountfs(
927 xfs_notice(mp, "resetting quota flags"); 930 xfs_notice(mp, "resetting quota flags");
928 error = xfs_mount_reset_sbqflags(mp); 931 error = xfs_mount_reset_sbqflags(mp);
929 if (error) 932 if (error)
930 return error; 933 goto out_rtunmount;
931 } 934 }
932 } 935 }
933 936
@@ -989,6 +992,8 @@ xfs_mountfs(
989 xfs_da_unmount(mp); 992 xfs_da_unmount(mp);
990 out_remove_uuid: 993 out_remove_uuid:
991 xfs_uuid_unmount(mp); 994 xfs_uuid_unmount(mp);
995 out_remove_sysfs:
996 xfs_sysfs_del(&mp->m_kobj);
992 out: 997 out:
993 return error; 998 return error;
994} 999}
@@ -1071,6 +1076,8 @@ xfs_unmountfs(
1071 xfs_errortag_clearall(mp, 0); 1076 xfs_errortag_clearall(mp, 0);
1072#endif 1077#endif
1073 xfs_free_perag(mp); 1078 xfs_free_perag(mp);
1079
1080 xfs_sysfs_del(&mp->m_kobj);
1074} 1081}
1075 1082
1076int 1083int
@@ -1152,7 +1159,7 @@ xfs_mod_incore_sb_unlocked(
1152 lcounter += delta; 1159 lcounter += delta;
1153 if (lcounter < 0) { 1160 if (lcounter < 0) {
1154 ASSERT(0); 1161 ASSERT(0);
1155 return XFS_ERROR(EINVAL); 1162 return -EINVAL;
1156 } 1163 }
1157 mp->m_sb.sb_icount = lcounter; 1164 mp->m_sb.sb_icount = lcounter;
1158 return 0; 1165 return 0;
@@ -1161,7 +1168,7 @@ xfs_mod_incore_sb_unlocked(
1161 lcounter += delta; 1168 lcounter += delta;
1162 if (lcounter < 0) { 1169 if (lcounter < 0) {
1163 ASSERT(0); 1170 ASSERT(0);
1164 return XFS_ERROR(EINVAL); 1171 return -EINVAL;
1165 } 1172 }
1166 mp->m_sb.sb_ifree = lcounter; 1173 mp->m_sb.sb_ifree = lcounter;
1167 return 0; 1174 return 0;
@@ -1191,7 +1198,7 @@ xfs_mod_incore_sb_unlocked(
1191 * blocks if were allowed to. 1198 * blocks if were allowed to.
1192 */ 1199 */
1193 if (!rsvd) 1200 if (!rsvd)
1194 return XFS_ERROR(ENOSPC); 1201 return -ENOSPC;
1195 1202
1196 lcounter = (long long)mp->m_resblks_avail + delta; 1203 lcounter = (long long)mp->m_resblks_avail + delta;
1197 if (lcounter >= 0) { 1204 if (lcounter >= 0) {
@@ -1202,7 +1209,7 @@ xfs_mod_incore_sb_unlocked(
1202 "Filesystem \"%s\": reserve blocks depleted! " 1209 "Filesystem \"%s\": reserve blocks depleted! "
1203 "Consider increasing reserve pool size.", 1210 "Consider increasing reserve pool size.",
1204 mp->m_fsname); 1211 mp->m_fsname);
1205 return XFS_ERROR(ENOSPC); 1212 return -ENOSPC;
1206 } 1213 }
1207 1214
1208 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1215 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1211,7 +1218,7 @@ xfs_mod_incore_sb_unlocked(
1211 lcounter = (long long)mp->m_sb.sb_frextents; 1218 lcounter = (long long)mp->m_sb.sb_frextents;
1212 lcounter += delta; 1219 lcounter += delta;
1213 if (lcounter < 0) { 1220 if (lcounter < 0) {
1214 return XFS_ERROR(ENOSPC); 1221 return -ENOSPC;
1215 } 1222 }
1216 mp->m_sb.sb_frextents = lcounter; 1223 mp->m_sb.sb_frextents = lcounter;
1217 return 0; 1224 return 0;
@@ -1220,7 +1227,7 @@ xfs_mod_incore_sb_unlocked(
1220 lcounter += delta; 1227 lcounter += delta;
1221 if (lcounter < 0) { 1228 if (lcounter < 0) {
1222 ASSERT(0); 1229 ASSERT(0);
1223 return XFS_ERROR(EINVAL); 1230 return -EINVAL;
1224 } 1231 }
1225 mp->m_sb.sb_dblocks = lcounter; 1232 mp->m_sb.sb_dblocks = lcounter;
1226 return 0; 1233 return 0;
@@ -1229,7 +1236,7 @@ xfs_mod_incore_sb_unlocked(
1229 scounter += delta; 1236 scounter += delta;
1230 if (scounter < 0) { 1237 if (scounter < 0) {
1231 ASSERT(0); 1238 ASSERT(0);
1232 return XFS_ERROR(EINVAL); 1239 return -EINVAL;
1233 } 1240 }
1234 mp->m_sb.sb_agcount = scounter; 1241 mp->m_sb.sb_agcount = scounter;
1235 return 0; 1242 return 0;
@@ -1238,7 +1245,7 @@ xfs_mod_incore_sb_unlocked(
1238 scounter += delta; 1245 scounter += delta;
1239 if (scounter < 0) { 1246 if (scounter < 0) {
1240 ASSERT(0); 1247 ASSERT(0);
1241 return XFS_ERROR(EINVAL); 1248 return -EINVAL;
1242 } 1249 }
1243 mp->m_sb.sb_imax_pct = scounter; 1250 mp->m_sb.sb_imax_pct = scounter;
1244 return 0; 1251 return 0;
@@ -1247,7 +1254,7 @@ xfs_mod_incore_sb_unlocked(
1247 scounter += delta; 1254 scounter += delta;
1248 if (scounter < 0) { 1255 if (scounter < 0) {
1249 ASSERT(0); 1256 ASSERT(0);
1250 return XFS_ERROR(EINVAL); 1257 return -EINVAL;
1251 } 1258 }
1252 mp->m_sb.sb_rextsize = scounter; 1259 mp->m_sb.sb_rextsize = scounter;
1253 return 0; 1260 return 0;
@@ -1256,7 +1263,7 @@ xfs_mod_incore_sb_unlocked(
1256 scounter += delta; 1263 scounter += delta;
1257 if (scounter < 0) { 1264 if (scounter < 0) {
1258 ASSERT(0); 1265 ASSERT(0);
1259 return XFS_ERROR(EINVAL); 1266 return -EINVAL;
1260 } 1267 }
1261 mp->m_sb.sb_rbmblocks = scounter; 1268 mp->m_sb.sb_rbmblocks = scounter;
1262 return 0; 1269 return 0;
@@ -1265,7 +1272,7 @@ xfs_mod_incore_sb_unlocked(
1265 lcounter += delta; 1272 lcounter += delta;
1266 if (lcounter < 0) { 1273 if (lcounter < 0) {
1267 ASSERT(0); 1274 ASSERT(0);
1268 return XFS_ERROR(EINVAL); 1275 return -EINVAL;
1269 } 1276 }
1270 mp->m_sb.sb_rblocks = lcounter; 1277 mp->m_sb.sb_rblocks = lcounter;
1271 return 0; 1278 return 0;
@@ -1274,7 +1281,7 @@ xfs_mod_incore_sb_unlocked(
1274 lcounter += delta; 1281 lcounter += delta;
1275 if (lcounter < 0) { 1282 if (lcounter < 0) {
1276 ASSERT(0); 1283 ASSERT(0);
1277 return XFS_ERROR(EINVAL); 1284 return -EINVAL;
1278 } 1285 }
1279 mp->m_sb.sb_rextents = lcounter; 1286 mp->m_sb.sb_rextents = lcounter;
1280 return 0; 1287 return 0;
@@ -1283,13 +1290,13 @@ xfs_mod_incore_sb_unlocked(
1283 scounter += delta; 1290 scounter += delta;
1284 if (scounter < 0) { 1291 if (scounter < 0) {
1285 ASSERT(0); 1292 ASSERT(0);
1286 return XFS_ERROR(EINVAL); 1293 return -EINVAL;
1287 } 1294 }
1288 mp->m_sb.sb_rextslog = scounter; 1295 mp->m_sb.sb_rextslog = scounter;
1289 return 0; 1296 return 0;
1290 default: 1297 default:
1291 ASSERT(0); 1298 ASSERT(0);
1292 return XFS_ERROR(EINVAL); 1299 return -EINVAL;
1293 } 1300 }
1294} 1301}
1295 1302
@@ -1452,7 +1459,7 @@ xfs_dev_is_read_only(
1452 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 1459 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
1453 xfs_notice(mp, "%s required on read-only device.", message); 1460 xfs_notice(mp, "%s required on read-only device.", message);
1454 xfs_notice(mp, "write access unavailable, cannot proceed."); 1461 xfs_notice(mp, "write access unavailable, cannot proceed.");
1455 return EROFS; 1462 return -EROFS;
1456 } 1463 }
1457 return 0; 1464 return 0;
1458} 1465}
@@ -1995,7 +2002,7 @@ slow_path:
1995 * (e.g. lots of space just got freed). After that 2002 * (e.g. lots of space just got freed). After that
1996 * we are done. 2003 * we are done.
1997 */ 2004 */
1998 if (ret != ENOSPC) 2005 if (ret != -ENOSPC)
1999 xfs_icsb_balance_counter(mp, field, 0); 2006 xfs_icsb_balance_counter(mp, field, 0);
2000 xfs_icsb_unlock(mp); 2007 xfs_icsb_unlock(mp);
2001 return ret; 2008 return ret;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7295a0b7c343..b0447c86e7e2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -166,6 +166,7 @@ typedef struct xfs_mount {
166 on the next remount,rw */ 166 on the next remount,rw */
167 int64_t m_low_space[XFS_LOWSP_MAX]; 167 int64_t m_low_space[XFS_LOWSP_MAX];
168 /* low free space thresholds */ 168 /* low free space thresholds */
169 struct xfs_kobj m_kobj;
169 170
170 struct workqueue_struct *m_data_workqueue; 171 struct workqueue_struct *m_data_workqueue;
171 struct workqueue_struct *m_unwritten_workqueue; 172 struct workqueue_struct *m_unwritten_workqueue;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index f99b4933dc22..1eb6f3df698c 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -337,20 +337,20 @@ xfs_mru_cache_create(
337 *mrup = NULL; 337 *mrup = NULL;
338 338
339 if (!mrup || !grp_count || !lifetime_ms || !free_func) 339 if (!mrup || !grp_count || !lifetime_ms || !free_func)
340 return EINVAL; 340 return -EINVAL;
341 341
342 if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) 342 if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
343 return EINVAL; 343 return -EINVAL;
344 344
345 if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP))) 345 if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
346 return ENOMEM; 346 return -ENOMEM;
347 347
348 /* An extra list is needed to avoid reaping up to a grp_time early. */ 348 /* An extra list is needed to avoid reaping up to a grp_time early. */
349 mru->grp_count = grp_count + 1; 349 mru->grp_count = grp_count + 1;
350 mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); 350 mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
351 351
352 if (!mru->lists) { 352 if (!mru->lists) {
353 err = ENOMEM; 353 err = -ENOMEM;
354 goto exit; 354 goto exit;
355 } 355 }
356 356
@@ -434,16 +434,16 @@ xfs_mru_cache_insert(
434 434
435 ASSERT(mru && mru->lists); 435 ASSERT(mru && mru->lists);
436 if (!mru || !mru->lists) 436 if (!mru || !mru->lists)
437 return EINVAL; 437 return -EINVAL;
438 438
439 if (radix_tree_preload(GFP_KERNEL)) 439 if (radix_tree_preload(GFP_KERNEL))
440 return ENOMEM; 440 return -ENOMEM;
441 441
442 INIT_LIST_HEAD(&elem->list_node); 442 INIT_LIST_HEAD(&elem->list_node);
443 elem->key = key; 443 elem->key = key;
444 444
445 spin_lock(&mru->lock); 445 spin_lock(&mru->lock);
446 error = -radix_tree_insert(&mru->store, key, elem); 446 error = radix_tree_insert(&mru->store, key, elem);
447 radix_tree_preload_end(); 447 radix_tree_preload_end();
448 if (!error) 448 if (!error)
449 _xfs_mru_cache_list_insert(mru, elem); 449 _xfs_mru_cache_list_insert(mru, elem);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 6d26759c779a..10232102b4a6 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -98,18 +98,18 @@ restart:
98 next_index = be32_to_cpu(dqp->q_core.d_id) + 1; 98 next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
99 99
100 error = execute(batch[i], data); 100 error = execute(batch[i], data);
101 if (error == EAGAIN) { 101 if (error == -EAGAIN) {
102 skipped++; 102 skipped++;
103 continue; 103 continue;
104 } 104 }
105 if (error && last_error != EFSCORRUPTED) 105 if (error && last_error != -EFSCORRUPTED)
106 last_error = error; 106 last_error = error;
107 } 107 }
108 108
109 mutex_unlock(&qi->qi_tree_lock); 109 mutex_unlock(&qi->qi_tree_lock);
110 110
111 /* bail out if the filesystem is corrupted. */ 111 /* bail out if the filesystem is corrupted. */
112 if (last_error == EFSCORRUPTED) { 112 if (last_error == -EFSCORRUPTED) {
113 skipped = 0; 113 skipped = 0;
114 break; 114 break;
115 } 115 }
@@ -138,7 +138,7 @@ xfs_qm_dqpurge(
138 xfs_dqlock(dqp); 138 xfs_dqlock(dqp);
139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { 139 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
140 xfs_dqunlock(dqp); 140 xfs_dqunlock(dqp);
141 return EAGAIN; 141 return -EAGAIN;
142 } 142 }
143 143
144 dqp->dq_flags |= XFS_DQ_FREEING; 144 dqp->dq_flags |= XFS_DQ_FREEING;
@@ -221,100 +221,6 @@ xfs_qm_unmount(
221 } 221 }
222} 222}
223 223
224
225/*
226 * This is called from xfs_mountfs to start quotas and initialize all
227 * necessary data structures like quotainfo. This is also responsible for
228 * running a quotacheck as necessary. We are guaranteed that the superblock
229 * is consistently read in at this point.
230 *
231 * If we fail here, the mount will continue with quota turned off. We don't
232 * need to inidicate success or failure at all.
233 */
234void
235xfs_qm_mount_quotas(
236 xfs_mount_t *mp)
237{
238 int error = 0;
239 uint sbf;
240
241 /*
242 * If quotas on realtime volumes is not supported, we disable
243 * quotas immediately.
244 */
245 if (mp->m_sb.sb_rextents) {
246 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
247 mp->m_qflags = 0;
248 goto write_changes;
249 }
250
251 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
252
253 /*
254 * Allocate the quotainfo structure inside the mount struct, and
255 * create quotainode(s), and change/rev superblock if necessary.
256 */
257 error = xfs_qm_init_quotainfo(mp);
258 if (error) {
259 /*
260 * We must turn off quotas.
261 */
262 ASSERT(mp->m_quotainfo == NULL);
263 mp->m_qflags = 0;
264 goto write_changes;
265 }
266 /*
267 * If any of the quotas are not consistent, do a quotacheck.
268 */
269 if (XFS_QM_NEED_QUOTACHECK(mp)) {
270 error = xfs_qm_quotacheck(mp);
271 if (error) {
272 /* Quotacheck failed and disabled quotas. */
273 return;
274 }
275 }
276 /*
277 * If one type of quotas is off, then it will lose its
278 * quotachecked status, since we won't be doing accounting for
279 * that type anymore.
280 */
281 if (!XFS_IS_UQUOTA_ON(mp))
282 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
283 if (!XFS_IS_GQUOTA_ON(mp))
284 mp->m_qflags &= ~XFS_GQUOTA_CHKD;
285 if (!XFS_IS_PQUOTA_ON(mp))
286 mp->m_qflags &= ~XFS_PQUOTA_CHKD;
287
288 write_changes:
289 /*
290 * We actually don't have to acquire the m_sb_lock at all.
291 * This can only be called from mount, and that's single threaded. XXX
292 */
293 spin_lock(&mp->m_sb_lock);
294 sbf = mp->m_sb.sb_qflags;
295 mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
296 spin_unlock(&mp->m_sb_lock);
297
298 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
299 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
300 /*
301 * We could only have been turning quotas off.
302 * We aren't in very good shape actually because
303 * the incore structures are convinced that quotas are
304 * off, but the on disk superblock doesn't know that !
305 */
306 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
307 xfs_alert(mp, "%s: Superblock update failed!",
308 __func__);
309 }
310 }
311
312 if (error) {
313 xfs_warn(mp, "Failed to initialize disk quotas.");
314 return;
315 }
316}
317
318/* 224/*
319 * Called from the vfsops layer. 225 * Called from the vfsops layer.
320 */ 226 */
@@ -671,7 +577,7 @@ xfs_qm_init_quotainfo(
671 577
672 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); 578 qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
673 579
674 error = -list_lru_init(&qinf->qi_lru); 580 error = list_lru_init(&qinf->qi_lru);
675 if (error) 581 if (error)
676 goto out_free_qinf; 582 goto out_free_qinf;
677 583
@@ -995,7 +901,7 @@ xfs_qm_dqiter_bufs(
995 * will leave a trace in the log indicating corruption has 901 * will leave a trace in the log indicating corruption has
996 * been detected. 902 * been detected.
997 */ 903 */
998 if (error == EFSCORRUPTED) { 904 if (error == -EFSCORRUPTED) {
999 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 905 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1000 XFS_FSB_TO_DADDR(mp, bno), 906 XFS_FSB_TO_DADDR(mp, bno),
1001 mp->m_quotainfo->qi_dqchunklen, 0, &bp, 907 mp->m_quotainfo->qi_dqchunklen, 0, &bp,
@@ -1005,6 +911,12 @@ xfs_qm_dqiter_bufs(
1005 if (error) 911 if (error)
1006 break; 912 break;
1007 913
914 /*
915 * A corrupt buffer might not have a verifier attached, so
916 * make sure we have the correct one attached before writeback
917 * occurs.
918 */
919 bp->b_ops = &xfs_dquot_buf_ops;
1008 xfs_qm_reset_dqcounts(mp, bp, firstid, type); 920 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1009 xfs_buf_delwri_queue(bp, buffer_list); 921 xfs_buf_delwri_queue(bp, buffer_list);
1010 xfs_buf_relse(bp); 922 xfs_buf_relse(bp);
@@ -1090,7 +1002,7 @@ xfs_qm_dqiterate(
1090 xfs_buf_readahead(mp->m_ddev_targp, 1002 xfs_buf_readahead(mp->m_ddev_targp,
1091 XFS_FSB_TO_DADDR(mp, rablkno), 1003 XFS_FSB_TO_DADDR(mp, rablkno),
1092 mp->m_quotainfo->qi_dqchunklen, 1004 mp->m_quotainfo->qi_dqchunklen,
1093 NULL); 1005 &xfs_dquot_buf_ops);
1094 rablkno++; 1006 rablkno++;
1095 } 1007 }
1096 } 1008 }
@@ -1138,8 +1050,8 @@ xfs_qm_quotacheck_dqadjust(
1138 /* 1050 /*
1139 * Shouldn't be able to turn off quotas here. 1051 * Shouldn't be able to turn off quotas here.
1140 */ 1052 */
1141 ASSERT(error != ESRCH); 1053 ASSERT(error != -ESRCH);
1142 ASSERT(error != ENOENT); 1054 ASSERT(error != -ENOENT);
1143 return error; 1055 return error;
1144 } 1056 }
1145 1057
@@ -1226,7 +1138,7 @@ xfs_qm_dqusage_adjust(
1226 */ 1138 */
1227 if (xfs_is_quota_inode(&mp->m_sb, ino)) { 1139 if (xfs_is_quota_inode(&mp->m_sb, ino)) {
1228 *res = BULKSTAT_RV_NOTHING; 1140 *res = BULKSTAT_RV_NOTHING;
1229 return XFS_ERROR(EINVAL); 1141 return -EINVAL;
1230 } 1142 }
1231 1143
1232 /* 1144 /*
@@ -1330,7 +1242,7 @@ out_unlock:
1330 * Walk thru all the filesystem inodes and construct a consistent view 1242 * Walk thru all the filesystem inodes and construct a consistent view
1331 * of the disk quota world. If the quotacheck fails, disable quotas. 1243 * of the disk quota world. If the quotacheck fails, disable quotas.
1332 */ 1244 */
1333int 1245STATIC int
1334xfs_qm_quotacheck( 1246xfs_qm_quotacheck(
1335 xfs_mount_t *mp) 1247 xfs_mount_t *mp)
1336{ 1248{
@@ -1463,7 +1375,100 @@ xfs_qm_quotacheck(
1463 } 1375 }
1464 } else 1376 } else
1465 xfs_notice(mp, "Quotacheck: Done."); 1377 xfs_notice(mp, "Quotacheck: Done.");
1466 return (error); 1378 return error;
1379}
1380
1381/*
1382 * This is called from xfs_mountfs to start quotas and initialize all
1383 * necessary data structures like quotainfo. This is also responsible for
1384 * running a quotacheck as necessary. We are guaranteed that the superblock
1385 * is consistently read in at this point.
1386 *
1387 * If we fail here, the mount will continue with quota turned off. We don't
1388 * need to inidicate success or failure at all.
1389 */
1390void
1391xfs_qm_mount_quotas(
1392 struct xfs_mount *mp)
1393{
1394 int error = 0;
1395 uint sbf;
1396
1397 /*
1398 * If quotas on realtime volumes is not supported, we disable
1399 * quotas immediately.
1400 */
1401 if (mp->m_sb.sb_rextents) {
1402 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
1403 mp->m_qflags = 0;
1404 goto write_changes;
1405 }
1406
1407 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1408
1409 /*
1410 * Allocate the quotainfo structure inside the mount struct, and
1411 * create quotainode(s), and change/rev superblock if necessary.
1412 */
1413 error = xfs_qm_init_quotainfo(mp);
1414 if (error) {
1415 /*
1416 * We must turn off quotas.
1417 */
1418 ASSERT(mp->m_quotainfo == NULL);
1419 mp->m_qflags = 0;
1420 goto write_changes;
1421 }
1422 /*
1423 * If any of the quotas are not consistent, do a quotacheck.
1424 */
1425 if (XFS_QM_NEED_QUOTACHECK(mp)) {
1426 error = xfs_qm_quotacheck(mp);
1427 if (error) {
1428 /* Quotacheck failed and disabled quotas. */
1429 return;
1430 }
1431 }
1432 /*
1433 * If one type of quotas is off, then it will lose its
1434 * quotachecked status, since we won't be doing accounting for
1435 * that type anymore.
1436 */
1437 if (!XFS_IS_UQUOTA_ON(mp))
1438 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
1439 if (!XFS_IS_GQUOTA_ON(mp))
1440 mp->m_qflags &= ~XFS_GQUOTA_CHKD;
1441 if (!XFS_IS_PQUOTA_ON(mp))
1442 mp->m_qflags &= ~XFS_PQUOTA_CHKD;
1443
1444 write_changes:
1445 /*
1446 * We actually don't have to acquire the m_sb_lock at all.
1447 * This can only be called from mount, and that's single threaded. XXX
1448 */
1449 spin_lock(&mp->m_sb_lock);
1450 sbf = mp->m_sb.sb_qflags;
1451 mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
1452 spin_unlock(&mp->m_sb_lock);
1453
1454 if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
1455 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
1456 /*
1457 * We could only have been turning quotas off.
1458 * We aren't in very good shape actually because
1459 * the incore structures are convinced that quotas are
1460 * off, but the on disk superblock doesn't know that !
1461 */
1462 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
1463 xfs_alert(mp, "%s: Superblock update failed!",
1464 __func__);
1465 }
1466 }
1467
1468 if (error) {
1469 xfs_warn(mp, "Failed to initialize disk quotas.");
1470 return;
1471 }
1467} 1472}
1468 1473
1469/* 1474/*
@@ -1493,7 +1498,7 @@ xfs_qm_init_quotainos(
1493 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 1498 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1494 0, 0, &uip); 1499 0, 0, &uip);
1495 if (error) 1500 if (error)
1496 return XFS_ERROR(error); 1501 return error;
1497 } 1502 }
1498 if (XFS_IS_GQUOTA_ON(mp) && 1503 if (XFS_IS_GQUOTA_ON(mp) &&
1499 mp->m_sb.sb_gquotino != NULLFSINO) { 1504 mp->m_sb.sb_gquotino != NULLFSINO) {
@@ -1563,7 +1568,7 @@ error_rele:
1563 IRELE(gip); 1568 IRELE(gip);
1564 if (pip) 1569 if (pip)
1565 IRELE(pip); 1570 IRELE(pip);
1566 return XFS_ERROR(error); 1571 return error;
1567} 1572}
1568 1573
1569STATIC void 1574STATIC void
@@ -1679,7 +1684,7 @@ xfs_qm_vop_dqalloc(
1679 XFS_QMOPT_DOWARN, 1684 XFS_QMOPT_DOWARN,
1680 &uq); 1685 &uq);
1681 if (error) { 1686 if (error) {
1682 ASSERT(error != ENOENT); 1687 ASSERT(error != -ENOENT);
1683 return error; 1688 return error;
1684 } 1689 }
1685 /* 1690 /*
@@ -1706,7 +1711,7 @@ xfs_qm_vop_dqalloc(
1706 XFS_QMOPT_DOWARN, 1711 XFS_QMOPT_DOWARN,
1707 &gq); 1712 &gq);
1708 if (error) { 1713 if (error) {
1709 ASSERT(error != ENOENT); 1714 ASSERT(error != -ENOENT);
1710 goto error_rele; 1715 goto error_rele;
1711 } 1716 }
1712 xfs_dqunlock(gq); 1717 xfs_dqunlock(gq);
@@ -1726,7 +1731,7 @@ xfs_qm_vop_dqalloc(
1726 XFS_QMOPT_DOWARN, 1731 XFS_QMOPT_DOWARN,
1727 &pq); 1732 &pq);
1728 if (error) { 1733 if (error) {
1729 ASSERT(error != ENOENT); 1734 ASSERT(error != -ENOENT);
1730 goto error_rele; 1735 goto error_rele;
1731 } 1736 }
1732 xfs_dqunlock(pq); 1737 xfs_dqunlock(pq);
@@ -1895,7 +1900,7 @@ xfs_qm_vop_chown_reserve(
1895 -((xfs_qcnt_t)delblks), 0, blkflags); 1900 -((xfs_qcnt_t)delblks), 0, blkflags);
1896 } 1901 }
1897 1902
1898 return (0); 1903 return 0;
1899} 1904}
1900 1905
1901int 1906int
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 797fd4636273..3a07a937e232 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -157,7 +157,6 @@ struct xfs_dquot_acct {
157#define XFS_QM_RTBWARNLIMIT 5 157#define XFS_QM_RTBWARNLIMIT 5
158 158
159extern void xfs_qm_destroy_quotainfo(struct xfs_mount *); 159extern void xfs_qm_destroy_quotainfo(struct xfs_mount *);
160extern int xfs_qm_quotacheck(struct xfs_mount *);
161extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t); 160extern int xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t);
162 161
163/* dquot stuff */ 162/* dquot stuff */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index e9be63abd8d2..2c61e61b0205 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -117,7 +117,7 @@ xfs_qm_newmount(
117 (uquotaondisk ? " usrquota" : ""), 117 (uquotaondisk ? " usrquota" : ""),
118 (gquotaondisk ? " grpquota" : ""), 118 (gquotaondisk ? " grpquota" : ""),
119 (pquotaondisk ? " prjquota" : "")); 119 (pquotaondisk ? " prjquota" : ""));
120 return XFS_ERROR(EPERM); 120 return -EPERM;
121 } 121 }
122 122
123 if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { 123 if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index bbc813caba4c..80f2d77d929a 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -64,10 +64,10 @@ xfs_qm_scall_quotaoff(
64 /* 64 /*
65 * No file system can have quotas enabled on disk but not in core. 65 * No file system can have quotas enabled on disk but not in core.
66 * Note that quota utilities (like quotaoff) _expect_ 66 * Note that quota utilities (like quotaoff) _expect_
67 * errno == EEXIST here. 67 * errno == -EEXIST here.
68 */ 68 */
69 if ((mp->m_qflags & flags) == 0) 69 if ((mp->m_qflags & flags) == 0)
70 return XFS_ERROR(EEXIST); 70 return -EEXIST;
71 error = 0; 71 error = 0;
72 72
73 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 73 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
@@ -94,7 +94,7 @@ xfs_qm_scall_quotaoff(
94 94
95 /* XXX what to do if error ? Revert back to old vals incore ? */ 95 /* XXX what to do if error ? Revert back to old vals incore ? */
96 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); 96 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
97 return (error); 97 return error;
98 } 98 }
99 99
100 dqtype = 0; 100 dqtype = 0;
@@ -198,7 +198,7 @@ xfs_qm_scall_quotaoff(
198 if (mp->m_qflags == 0) { 198 if (mp->m_qflags == 0) {
199 mutex_unlock(&q->qi_quotaofflock); 199 mutex_unlock(&q->qi_quotaofflock);
200 xfs_qm_destroy_quotainfo(mp); 200 xfs_qm_destroy_quotainfo(mp);
201 return (0); 201 return 0;
202 } 202 }
203 203
204 /* 204 /*
@@ -278,13 +278,13 @@ xfs_qm_scall_trunc_qfiles(
278 xfs_mount_t *mp, 278 xfs_mount_t *mp,
279 uint flags) 279 uint flags)
280{ 280{
281 int error = EINVAL; 281 int error = -EINVAL;
282 282
283 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 || 283 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 ||
284 (flags & ~XFS_DQ_ALLTYPES)) { 284 (flags & ~XFS_DQ_ALLTYPES)) {
285 xfs_debug(mp, "%s: flags=%x m_qflags=%x", 285 xfs_debug(mp, "%s: flags=%x m_qflags=%x",
286 __func__, flags, mp->m_qflags); 286 __func__, flags, mp->m_qflags);
287 return XFS_ERROR(EINVAL); 287 return -EINVAL;
288 } 288 }
289 289
290 if (flags & XFS_DQ_USER) { 290 if (flags & XFS_DQ_USER) {
@@ -328,7 +328,7 @@ xfs_qm_scall_quotaon(
328 if (flags == 0) { 328 if (flags == 0) {
329 xfs_debug(mp, "%s: zero flags, m_qflags=%x", 329 xfs_debug(mp, "%s: zero flags, m_qflags=%x",
330 __func__, mp->m_qflags); 330 __func__, mp->m_qflags);
331 return XFS_ERROR(EINVAL); 331 return -EINVAL;
332 } 332 }
333 333
334 /* No fs can turn on quotas with a delayed effect */ 334 /* No fs can turn on quotas with a delayed effect */
@@ -351,13 +351,13 @@ xfs_qm_scall_quotaon(
351 xfs_debug(mp, 351 xfs_debug(mp,
352 "%s: Can't enforce without acct, flags=%x sbflags=%x", 352 "%s: Can't enforce without acct, flags=%x sbflags=%x",
353 __func__, flags, mp->m_sb.sb_qflags); 353 __func__, flags, mp->m_sb.sb_qflags);
354 return XFS_ERROR(EINVAL); 354 return -EINVAL;
355 } 355 }
356 /* 356 /*
357 * If everything's up to-date incore, then don't waste time. 357 * If everything's up to-date incore, then don't waste time.
358 */ 358 */
359 if ((mp->m_qflags & flags) == flags) 359 if ((mp->m_qflags & flags) == flags)
360 return XFS_ERROR(EEXIST); 360 return -EEXIST;
361 361
362 /* 362 /*
363 * Change sb_qflags on disk but not incore mp->qflags 363 * Change sb_qflags on disk but not incore mp->qflags
@@ -372,11 +372,11 @@ xfs_qm_scall_quotaon(
372 * There's nothing to change if it's the same. 372 * There's nothing to change if it's the same.
373 */ 373 */
374 if ((qf & flags) == flags && sbflags == 0) 374 if ((qf & flags) == flags && sbflags == 0)
375 return XFS_ERROR(EEXIST); 375 return -EEXIST;
376 sbflags |= XFS_SB_QFLAGS; 376 sbflags |= XFS_SB_QFLAGS;
377 377
378 if ((error = xfs_qm_write_sb_changes(mp, sbflags))) 378 if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
379 return (error); 379 return error;
380 /* 380 /*
381 * If we aren't trying to switch on quota enforcement, we are done. 381 * If we aren't trying to switch on quota enforcement, we are done.
382 */ 382 */
@@ -387,10 +387,10 @@ xfs_qm_scall_quotaon(
387 ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != 387 ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
388 (mp->m_qflags & XFS_GQUOTA_ACCT)) || 388 (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
389 (flags & XFS_ALL_QUOTA_ENFD) == 0) 389 (flags & XFS_ALL_QUOTA_ENFD) == 0)
390 return (0); 390 return 0;
391 391
392 if (! XFS_IS_QUOTA_RUNNING(mp)) 392 if (! XFS_IS_QUOTA_RUNNING(mp))
393 return XFS_ERROR(ESRCH); 393 return -ESRCH;
394 394
395 /* 395 /*
396 * Switch on quota enforcement in core. 396 * Switch on quota enforcement in core.
@@ -399,7 +399,7 @@ xfs_qm_scall_quotaon(
399 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); 399 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
400 mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); 400 mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
401 401
402 return (0); 402 return 0;
403} 403}
404 404
405 405
@@ -426,7 +426,7 @@ xfs_qm_scall_getqstat(
426 if (!xfs_sb_version_hasquota(&mp->m_sb)) { 426 if (!xfs_sb_version_hasquota(&mp->m_sb)) {
427 out->qs_uquota.qfs_ino = NULLFSINO; 427 out->qs_uquota.qfs_ino = NULLFSINO;
428 out->qs_gquota.qfs_ino = NULLFSINO; 428 out->qs_gquota.qfs_ino = NULLFSINO;
429 return (0); 429 return 0;
430 } 430 }
431 431
432 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & 432 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
@@ -514,7 +514,7 @@ xfs_qm_scall_getqstatv(
514 out->qs_uquota.qfs_ino = NULLFSINO; 514 out->qs_uquota.qfs_ino = NULLFSINO;
515 out->qs_gquota.qfs_ino = NULLFSINO; 515 out->qs_gquota.qfs_ino = NULLFSINO;
516 out->qs_pquota.qfs_ino = NULLFSINO; 516 out->qs_pquota.qfs_ino = NULLFSINO;
517 return (0); 517 return 0;
518 } 518 }
519 519
520 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & 520 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
@@ -595,7 +595,7 @@ xfs_qm_scall_setqlim(
595 xfs_qcnt_t hard, soft; 595 xfs_qcnt_t hard, soft;
596 596
597 if (newlim->d_fieldmask & ~XFS_DQ_MASK) 597 if (newlim->d_fieldmask & ~XFS_DQ_MASK)
598 return EINVAL; 598 return -EINVAL;
599 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) 599 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
600 return 0; 600 return 0;
601 601
@@ -615,7 +615,7 @@ xfs_qm_scall_setqlim(
615 */ 615 */
616 error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp); 616 error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
617 if (error) { 617 if (error) {
618 ASSERT(error != ENOENT); 618 ASSERT(error != -ENOENT);
619 goto out_unlock; 619 goto out_unlock;
620 } 620 }
621 xfs_dqunlock(dqp); 621 xfs_dqunlock(dqp);
@@ -758,7 +758,7 @@ xfs_qm_log_quotaoff_end(
758 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); 758 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
759 if (error) { 759 if (error) {
760 xfs_trans_cancel(tp, 0); 760 xfs_trans_cancel(tp, 0);
761 return (error); 761 return error;
762 } 762 }
763 763
764 qoffi = xfs_trans_get_qoff_item(tp, startqoff, 764 qoffi = xfs_trans_get_qoff_item(tp, startqoff,
@@ -772,7 +772,7 @@ xfs_qm_log_quotaoff_end(
772 */ 772 */
773 xfs_trans_set_sync(tp); 773 xfs_trans_set_sync(tp);
774 error = xfs_trans_commit(tp, 0); 774 error = xfs_trans_commit(tp, 0);
775 return (error); 775 return error;
776} 776}
777 777
778 778
@@ -822,7 +822,7 @@ error0:
822 spin_unlock(&mp->m_sb_lock); 822 spin_unlock(&mp->m_sb_lock);
823 } 823 }
824 *qoffstartp = qoffi; 824 *qoffstartp = qoffi;
825 return (error); 825 return error;
826} 826}
827 827
828 828
@@ -850,7 +850,7 @@ xfs_qm_scall_getquota(
850 * our utility programs are concerned. 850 * our utility programs are concerned.
851 */ 851 */
852 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { 852 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
853 error = XFS_ERROR(ENOENT); 853 error = -ENOENT;
854 goto out_put; 854 goto out_put;
855 } 855 }
856 856
@@ -953,7 +953,7 @@ xfs_qm_export_flags(
953 uflags |= FS_QUOTA_GDQ_ENFD; 953 uflags |= FS_QUOTA_GDQ_ENFD;
954 if (flags & XFS_PQUOTA_ENFD) 954 if (flags & XFS_PQUOTA_ENFD)
955 uflags |= FS_QUOTA_PDQ_ENFD; 955 uflags |= FS_QUOTA_PDQ_ENFD;
956 return (uflags); 956 return uflags;
957} 957}
958 958
959 959
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 2ad1b9822e92..b238027df987 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -51,7 +51,7 @@ xfs_fs_get_xstate(
51 51
52 if (!XFS_IS_QUOTA_RUNNING(mp)) 52 if (!XFS_IS_QUOTA_RUNNING(mp))
53 return -ENOSYS; 53 return -ENOSYS;
54 return -xfs_qm_scall_getqstat(mp, fqs); 54 return xfs_qm_scall_getqstat(mp, fqs);
55} 55}
56 56
57STATIC int 57STATIC int
@@ -63,7 +63,7 @@ xfs_fs_get_xstatev(
63 63
64 if (!XFS_IS_QUOTA_RUNNING(mp)) 64 if (!XFS_IS_QUOTA_RUNNING(mp))
65 return -ENOSYS; 65 return -ENOSYS;
66 return -xfs_qm_scall_getqstatv(mp, fqs); 66 return xfs_qm_scall_getqstatv(mp, fqs);
67} 67}
68 68
69STATIC int 69STATIC int
@@ -95,11 +95,11 @@ xfs_fs_set_xstate(
95 95
96 switch (op) { 96 switch (op) {
97 case Q_XQUOTAON: 97 case Q_XQUOTAON:
98 return -xfs_qm_scall_quotaon(mp, flags); 98 return xfs_qm_scall_quotaon(mp, flags);
99 case Q_XQUOTAOFF: 99 case Q_XQUOTAOFF:
100 if (!XFS_IS_QUOTA_ON(mp)) 100 if (!XFS_IS_QUOTA_ON(mp))
101 return -EINVAL; 101 return -EINVAL;
102 return -xfs_qm_scall_quotaoff(mp, flags); 102 return xfs_qm_scall_quotaoff(mp, flags);
103 } 103 }
104 104
105 return -EINVAL; 105 return -EINVAL;
@@ -112,7 +112,7 @@ xfs_fs_rm_xquota(
112{ 112{
113 struct xfs_mount *mp = XFS_M(sb); 113 struct xfs_mount *mp = XFS_M(sb);
114 unsigned int flags = 0; 114 unsigned int flags = 0;
115 115
116 if (sb->s_flags & MS_RDONLY) 116 if (sb->s_flags & MS_RDONLY)
117 return -EROFS; 117 return -EROFS;
118 118
@@ -123,11 +123,11 @@ xfs_fs_rm_xquota(
123 flags |= XFS_DQ_USER; 123 flags |= XFS_DQ_USER;
124 if (uflags & FS_GROUP_QUOTA) 124 if (uflags & FS_GROUP_QUOTA)
125 flags |= XFS_DQ_GROUP; 125 flags |= XFS_DQ_GROUP;
126 if (uflags & FS_USER_QUOTA) 126 if (uflags & FS_PROJ_QUOTA)
127 flags |= XFS_DQ_PROJ; 127 flags |= XFS_DQ_PROJ;
128 128
129 return -xfs_qm_scall_trunc_qfiles(mp, flags); 129 return xfs_qm_scall_trunc_qfiles(mp, flags);
130} 130}
131 131
132STATIC int 132STATIC int
133xfs_fs_get_dqblk( 133xfs_fs_get_dqblk(
@@ -142,7 +142,7 @@ xfs_fs_get_dqblk(
142 if (!XFS_IS_QUOTA_ON(mp)) 142 if (!XFS_IS_QUOTA_ON(mp))
143 return -ESRCH; 143 return -ESRCH;
144 144
145 return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), 145 return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
146 xfs_quota_type(qid.type), fdq); 146 xfs_quota_type(qid.type), fdq);
147} 147}
148 148
@@ -161,7 +161,7 @@ xfs_fs_set_dqblk(
161 if (!XFS_IS_QUOTA_ON(mp)) 161 if (!XFS_IS_QUOTA_ON(mp))
162 return -ESRCH; 162 return -ESRCH;
163 163
164 return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), 164 return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
165 xfs_quota_type(qid.type), fdq); 165 xfs_quota_type(qid.type), fdq);
166} 166}
167 167
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ec5ca65c6211..909e143b87ae 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -863,7 +863,7 @@ xfs_growfs_rt_alloc(
863 XFS_BMAPI_METADATA, &firstblock, 863 XFS_BMAPI_METADATA, &firstblock,
864 resblks, &map, &nmap, &flist); 864 resblks, &map, &nmap, &flist);
865 if (!error && nmap < 1) 865 if (!error && nmap < 1)
866 error = XFS_ERROR(ENOSPC); 866 error = -ENOSPC;
867 if (error) 867 if (error)
868 goto error_cancel; 868 goto error_cancel;
869 /* 869 /*
@@ -903,7 +903,7 @@ xfs_growfs_rt_alloc(
903 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 903 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
904 mp->m_bsize, 0); 904 mp->m_bsize, 0);
905 if (bp == NULL) { 905 if (bp == NULL) {
906 error = XFS_ERROR(EIO); 906 error = -EIO;
907error_cancel: 907error_cancel:
908 xfs_trans_cancel(tp, cancelflags); 908 xfs_trans_cancel(tp, cancelflags);
909 goto error; 909 goto error;
@@ -944,9 +944,9 @@ xfs_growfs_rt(
944 xfs_buf_t *bp; /* temporary buffer */ 944 xfs_buf_t *bp; /* temporary buffer */
945 int error; /* error return value */ 945 int error; /* error return value */
946 xfs_mount_t *nmp; /* new (fake) mount structure */ 946 xfs_mount_t *nmp; /* new (fake) mount structure */
947 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ 947 xfs_rfsblock_t nrblocks; /* new number of realtime blocks */
948 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ 948 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
949 xfs_drtbno_t nrextents; /* new number of realtime extents */ 949 xfs_rtblock_t nrextents; /* new number of realtime extents */
950 uint8_t nrextslog; /* new log2 of sb_rextents */ 950 uint8_t nrextslog; /* new log2 of sb_rextents */
951 xfs_extlen_t nrsumblocks; /* new number of summary blocks */ 951 xfs_extlen_t nrsumblocks; /* new number of summary blocks */
952 uint nrsumlevels; /* new rt summary levels */ 952 uint nrsumlevels; /* new rt summary levels */
@@ -962,11 +962,11 @@ xfs_growfs_rt(
962 * Initial error checking. 962 * Initial error checking.
963 */ 963 */
964 if (!capable(CAP_SYS_ADMIN)) 964 if (!capable(CAP_SYS_ADMIN))
965 return XFS_ERROR(EPERM); 965 return -EPERM;
966 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || 966 if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
967 (nrblocks = in->newblocks) <= sbp->sb_rblocks || 967 (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
968 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) 968 (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
969 return XFS_ERROR(EINVAL); 969 return -EINVAL;
970 if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks))) 970 if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
971 return error; 971 return error;
972 /* 972 /*
@@ -976,7 +976,7 @@ xfs_growfs_rt(
976 XFS_FSB_TO_BB(mp, nrblocks - 1), 976 XFS_FSB_TO_BB(mp, nrblocks - 1),
977 XFS_FSB_TO_BB(mp, 1), 0, NULL); 977 XFS_FSB_TO_BB(mp, 1), 0, NULL);
978 if (!bp) 978 if (!bp)
979 return EIO; 979 return -EIO;
980 if (bp->b_error) { 980 if (bp->b_error) {
981 error = bp->b_error; 981 error = bp->b_error;
982 xfs_buf_relse(bp); 982 xfs_buf_relse(bp);
@@ -1001,7 +1001,7 @@ xfs_growfs_rt(
1001 * since we'll log basically the whole summary file at once. 1001 * since we'll log basically the whole summary file at once.
1002 */ 1002 */
1003 if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) 1003 if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
1004 return XFS_ERROR(EINVAL); 1004 return -EINVAL;
1005 /* 1005 /*
1006 * Get the old block counts for bitmap and summary inodes. 1006 * Get the old block counts for bitmap and summary inodes.
1007 * These can't change since other growfs callers are locked out. 1007 * These can't change since other growfs callers are locked out.
@@ -1208,7 +1208,7 @@ xfs_rtallocate_extent(
1208 len, &sumbp, &sb, prod, &r); 1208 len, &sumbp, &sb, prod, &r);
1209 break; 1209 break;
1210 default: 1210 default:
1211 error = EIO; 1211 error = -EIO;
1212 ASSERT(0); 1212 ASSERT(0);
1213 } 1213 }
1214 if (error) 1214 if (error)
@@ -1247,7 +1247,7 @@ xfs_rtmount_init(
1247 if (mp->m_rtdev_targp == NULL) { 1247 if (mp->m_rtdev_targp == NULL) {
1248 xfs_warn(mp, 1248 xfs_warn(mp,
1249 "Filesystem has a realtime volume, use rtdev=device option"); 1249 "Filesystem has a realtime volume, use rtdev=device option");
1250 return XFS_ERROR(ENODEV); 1250 return -ENODEV;
1251 } 1251 }
1252 mp->m_rsumlevels = sbp->sb_rextslog + 1; 1252 mp->m_rsumlevels = sbp->sb_rextslog + 1;
1253 mp->m_rsumsize = 1253 mp->m_rsumsize =
@@ -1263,7 +1263,7 @@ xfs_rtmount_init(
1263 xfs_warn(mp, "realtime mount -- %llu != %llu", 1263 xfs_warn(mp, "realtime mount -- %llu != %llu",
1264 (unsigned long long) XFS_BB_TO_FSB(mp, d), 1264 (unsigned long long) XFS_BB_TO_FSB(mp, d),
1265 (unsigned long long) mp->m_sb.sb_rblocks); 1265 (unsigned long long) mp->m_sb.sb_rblocks);
1266 return XFS_ERROR(EFBIG); 1266 return -EFBIG;
1267 } 1267 }
1268 bp = xfs_buf_read_uncached(mp->m_rtdev_targp, 1268 bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
1269 d - XFS_FSB_TO_BB(mp, 1), 1269 d - XFS_FSB_TO_BB(mp, 1),
@@ -1272,7 +1272,7 @@ xfs_rtmount_init(
1272 xfs_warn(mp, "realtime device size check failed"); 1272 xfs_warn(mp, "realtime device size check failed");
1273 if (bp) 1273 if (bp)
1274 xfs_buf_relse(bp); 1274 xfs_buf_relse(bp);
1275 return EIO; 1275 return -EIO;
1276 } 1276 }
1277 xfs_buf_relse(bp); 1277 xfs_buf_relse(bp);
1278 return 0; 1278 return 0;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 752b63d10300..c642795324af 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -132,7 +132,7 @@ xfs_rtmount_init(
132 return 0; 132 return 0;
133 133
134 xfs_warn(mp, "Not built with CONFIG_XFS_RT"); 134 xfs_warn(mp, "Not built with CONFIG_XFS_RT");
135 return ENOSYS; 135 return -ENOSYS;
136} 136}
137# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 137# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
138# define xfs_rtunmount_inodes(m) 138# define xfs_rtunmount_inodes(m)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8f0333b3f7a0..b194652033cd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -61,6 +61,7 @@
61static const struct super_operations xfs_super_operations; 61static const struct super_operations xfs_super_operations;
62static kmem_zone_t *xfs_ioend_zone; 62static kmem_zone_t *xfs_ioend_zone;
63mempool_t *xfs_ioend_pool; 63mempool_t *xfs_ioend_pool;
64struct kset *xfs_kset;
64 65
65#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ 66#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
66#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ 67#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
@@ -185,7 +186,7 @@ xfs_parseargs(
185 */ 186 */
186 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); 187 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
187 if (!mp->m_fsname) 188 if (!mp->m_fsname)
188 return ENOMEM; 189 return -ENOMEM;
189 mp->m_fsname_len = strlen(mp->m_fsname) + 1; 190 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
190 191
191 /* 192 /*
@@ -204,9 +205,6 @@ xfs_parseargs(
204 */ 205 */
205 mp->m_flags |= XFS_MOUNT_BARRIER; 206 mp->m_flags |= XFS_MOUNT_BARRIER;
206 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 207 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
207#if !XFS_BIG_INUMS
208 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
209#endif
210 208
211 /* 209 /*
212 * These can be overridden by the mount option parsing. 210 * These can be overridden by the mount option parsing.
@@ -227,57 +225,57 @@ xfs_parseargs(
227 if (!value || !*value) { 225 if (!value || !*value) {
228 xfs_warn(mp, "%s option requires an argument", 226 xfs_warn(mp, "%s option requires an argument",
229 this_char); 227 this_char);
230 return EINVAL; 228 return -EINVAL;
231 } 229 }
232 if (kstrtoint(value, 10, &mp->m_logbufs)) 230 if (kstrtoint(value, 10, &mp->m_logbufs))
233 return EINVAL; 231 return -EINVAL;
234 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 232 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
235 if (!value || !*value) { 233 if (!value || !*value) {
236 xfs_warn(mp, "%s option requires an argument", 234 xfs_warn(mp, "%s option requires an argument",
237 this_char); 235 this_char);
238 return EINVAL; 236 return -EINVAL;
239 } 237 }
240 if (suffix_kstrtoint(value, 10, &mp->m_logbsize)) 238 if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
241 return EINVAL; 239 return -EINVAL;
242 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 240 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
243 if (!value || !*value) { 241 if (!value || !*value) {
244 xfs_warn(mp, "%s option requires an argument", 242 xfs_warn(mp, "%s option requires an argument",
245 this_char); 243 this_char);
246 return EINVAL; 244 return -EINVAL;
247 } 245 }
248 mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); 246 mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
249 if (!mp->m_logname) 247 if (!mp->m_logname)
250 return ENOMEM; 248 return -ENOMEM;
251 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 249 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
252 xfs_warn(mp, "%s option not allowed on this system", 250 xfs_warn(mp, "%s option not allowed on this system",
253 this_char); 251 this_char);
254 return EINVAL; 252 return -EINVAL;
255 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 253 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
256 if (!value || !*value) { 254 if (!value || !*value) {
257 xfs_warn(mp, "%s option requires an argument", 255 xfs_warn(mp, "%s option requires an argument",
258 this_char); 256 this_char);
259 return EINVAL; 257 return -EINVAL;
260 } 258 }
261 mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); 259 mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
262 if (!mp->m_rtname) 260 if (!mp->m_rtname)
263 return ENOMEM; 261 return -ENOMEM;
264 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 262 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
265 if (!value || !*value) { 263 if (!value || !*value) {
266 xfs_warn(mp, "%s option requires an argument", 264 xfs_warn(mp, "%s option requires an argument",
267 this_char); 265 this_char);
268 return EINVAL; 266 return -EINVAL;
269 } 267 }
270 if (kstrtoint(value, 10, &iosize)) 268 if (kstrtoint(value, 10, &iosize))
271 return EINVAL; 269 return -EINVAL;
272 iosizelog = ffs(iosize) - 1; 270 iosizelog = ffs(iosize) - 1;
273 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 271 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
274 if (!value || !*value) { 272 if (!value || !*value) {
275 xfs_warn(mp, "%s option requires an argument", 273 xfs_warn(mp, "%s option requires an argument",
276 this_char); 274 this_char);
277 return EINVAL; 275 return -EINVAL;
278 } 276 }
279 if (suffix_kstrtoint(value, 10, &iosize)) 277 if (suffix_kstrtoint(value, 10, &iosize))
280 return EINVAL; 278 return -EINVAL;
281 iosizelog = ffs(iosize) - 1; 279 iosizelog = ffs(iosize) - 1;
282 } else if (!strcmp(this_char, MNTOPT_GRPID) || 280 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
283 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 281 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -297,27 +295,22 @@ xfs_parseargs(
297 if (!value || !*value) { 295 if (!value || !*value) {
298 xfs_warn(mp, "%s option requires an argument", 296 xfs_warn(mp, "%s option requires an argument",
299 this_char); 297 this_char);
300 return EINVAL; 298 return -EINVAL;
301 } 299 }
302 if (kstrtoint(value, 10, &dsunit)) 300 if (kstrtoint(value, 10, &dsunit))
303 return EINVAL; 301 return -EINVAL;
304 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 302 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
305 if (!value || !*value) { 303 if (!value || !*value) {
306 xfs_warn(mp, "%s option requires an argument", 304 xfs_warn(mp, "%s option requires an argument",
307 this_char); 305 this_char);
308 return EINVAL; 306 return -EINVAL;
309 } 307 }
310 if (kstrtoint(value, 10, &dswidth)) 308 if (kstrtoint(value, 10, &dswidth))
311 return EINVAL; 309 return -EINVAL;
312 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { 310 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
313 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 311 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
314 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 312 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
315 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 313 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
316#if !XFS_BIG_INUMS
317 xfs_warn(mp, "%s option not allowed on this system",
318 this_char);
319 return EINVAL;
320#endif
321 } else if (!strcmp(this_char, MNTOPT_NOUUID)) { 314 } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
322 mp->m_flags |= XFS_MOUNT_NOUUID; 315 mp->m_flags |= XFS_MOUNT_NOUUID;
323 } else if (!strcmp(this_char, MNTOPT_BARRIER)) { 316 } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
@@ -390,7 +383,7 @@ xfs_parseargs(
390 "irixsgid is now a sysctl(2) variable, option is deprecated."); 383 "irixsgid is now a sysctl(2) variable, option is deprecated.");
391 } else { 384 } else {
392 xfs_warn(mp, "unknown mount option [%s].", this_char); 385 xfs_warn(mp, "unknown mount option [%s].", this_char);
393 return EINVAL; 386 return -EINVAL;
394 } 387 }
395 } 388 }
396 389
@@ -400,32 +393,32 @@ xfs_parseargs(
400 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 393 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
401 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 394 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
402 xfs_warn(mp, "no-recovery mounts must be read-only."); 395 xfs_warn(mp, "no-recovery mounts must be read-only.");
403 return EINVAL; 396 return -EINVAL;
404 } 397 }
405 398
406 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { 399 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
407 xfs_warn(mp, 400 xfs_warn(mp,
408 "sunit and swidth options incompatible with the noalign option"); 401 "sunit and swidth options incompatible with the noalign option");
409 return EINVAL; 402 return -EINVAL;
410 } 403 }
411 404
412#ifndef CONFIG_XFS_QUOTA 405#ifndef CONFIG_XFS_QUOTA
413 if (XFS_IS_QUOTA_RUNNING(mp)) { 406 if (XFS_IS_QUOTA_RUNNING(mp)) {
414 xfs_warn(mp, "quota support not available in this kernel."); 407 xfs_warn(mp, "quota support not available in this kernel.");
415 return EINVAL; 408 return -EINVAL;
416 } 409 }
417#endif 410#endif
418 411
419 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 412 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
420 xfs_warn(mp, "sunit and swidth must be specified together"); 413 xfs_warn(mp, "sunit and swidth must be specified together");
421 return EINVAL; 414 return -EINVAL;
422 } 415 }
423 416
424 if (dsunit && (dswidth % dsunit != 0)) { 417 if (dsunit && (dswidth % dsunit != 0)) {
425 xfs_warn(mp, 418 xfs_warn(mp,
426 "stripe width (%d) must be a multiple of the stripe unit (%d)", 419 "stripe width (%d) must be a multiple of the stripe unit (%d)",
427 dswidth, dsunit); 420 dswidth, dsunit);
428 return EINVAL; 421 return -EINVAL;
429 } 422 }
430 423
431done: 424done:
@@ -446,7 +439,7 @@ done:
446 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 439 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
447 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 440 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
448 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 441 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
449 return XFS_ERROR(EINVAL); 442 return -EINVAL;
450 } 443 }
451 if (mp->m_logbsize != -1 && 444 if (mp->m_logbsize != -1 &&
452 mp->m_logbsize != 0 && 445 mp->m_logbsize != 0 &&
@@ -456,7 +449,7 @@ done:
456 xfs_warn(mp, 449 xfs_warn(mp,
457 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 450 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
458 mp->m_logbsize); 451 mp->m_logbsize);
459 return XFS_ERROR(EINVAL); 452 return -EINVAL;
460 } 453 }
461 454
462 if (iosizelog) { 455 if (iosizelog) {
@@ -465,7 +458,7 @@ done:
465 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 458 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
466 iosizelog, XFS_MIN_IO_LOG, 459 iosizelog, XFS_MIN_IO_LOG,
467 XFS_MAX_IO_LOG); 460 XFS_MAX_IO_LOG);
468 return XFS_ERROR(EINVAL); 461 return -EINVAL;
469 } 462 }
470 463
471 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; 464 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
@@ -597,15 +590,20 @@ xfs_max_file_offset(
597 return (((__uint64_t)pagefactor) << bitshift) - 1; 590 return (((__uint64_t)pagefactor) << bitshift) - 1;
598} 591}
599 592
593/*
594 * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
595 * because in the growfs case, mp->m_sb.sb_agcount is not updated
596 * yet to the potentially higher ag count.
597 */
600xfs_agnumber_t 598xfs_agnumber_t
601xfs_set_inode32(struct xfs_mount *mp) 599xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
602{ 600{
603 xfs_agnumber_t index = 0; 601 xfs_agnumber_t index = 0;
604 xfs_agnumber_t maxagi = 0; 602 xfs_agnumber_t maxagi = 0;
605 xfs_sb_t *sbp = &mp->m_sb; 603 xfs_sb_t *sbp = &mp->m_sb;
606 xfs_agnumber_t max_metadata; 604 xfs_agnumber_t max_metadata;
607 xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0); 605 xfs_agino_t agino;
608 xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino); 606 xfs_ino_t ino;
609 xfs_perag_t *pag; 607 xfs_perag_t *pag;
610 608
611 /* Calculate how much should be reserved for inodes to meet 609 /* Calculate how much should be reserved for inodes to meet
@@ -620,10 +618,12 @@ xfs_set_inode32(struct xfs_mount *mp)
620 do_div(icount, sbp->sb_agblocks); 618 do_div(icount, sbp->sb_agblocks);
621 max_metadata = icount; 619 max_metadata = icount;
622 } else { 620 } else {
623 max_metadata = sbp->sb_agcount; 621 max_metadata = agcount;
624 } 622 }
625 623
626 for (index = 0; index < sbp->sb_agcount; index++) { 624 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
625
626 for (index = 0; index < agcount; index++) {
627 ino = XFS_AGINO_TO_INO(mp, index, agino); 627 ino = XFS_AGINO_TO_INO(mp, index, agino);
628 628
629 if (ino > XFS_MAXINUMBER_32) { 629 if (ino > XFS_MAXINUMBER_32) {
@@ -648,11 +648,11 @@ xfs_set_inode32(struct xfs_mount *mp)
648} 648}
649 649
650xfs_agnumber_t 650xfs_agnumber_t
651xfs_set_inode64(struct xfs_mount *mp) 651xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
652{ 652{
653 xfs_agnumber_t index = 0; 653 xfs_agnumber_t index = 0;
654 654
655 for (index = 0; index < mp->m_sb.sb_agcount; index++) { 655 for (index = 0; index < agcount; index++) {
656 struct xfs_perag *pag; 656 struct xfs_perag *pag;
657 657
658 pag = xfs_perag_get(mp, index); 658 pag = xfs_perag_get(mp, index);
@@ -686,7 +686,7 @@ xfs_blkdev_get(
686 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); 686 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
687 } 687 }
688 688
689 return -error; 689 return error;
690} 690}
691 691
692STATIC void 692STATIC void
@@ -756,7 +756,7 @@ xfs_open_devices(
756 if (rtdev == ddev || rtdev == logdev) { 756 if (rtdev == ddev || rtdev == logdev) {
757 xfs_warn(mp, 757 xfs_warn(mp,
758 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 758 "Cannot mount filesystem with identical rtdev and ddev/logdev.");
759 error = EINVAL; 759 error = -EINVAL;
760 goto out_close_rtdev; 760 goto out_close_rtdev;
761 } 761 }
762 } 762 }
@@ -764,7 +764,7 @@ xfs_open_devices(
764 /* 764 /*
765 * Setup xfs_mount buffer target pointers 765 * Setup xfs_mount buffer target pointers
766 */ 766 */
767 error = ENOMEM; 767 error = -ENOMEM;
768 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); 768 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);
769 if (!mp->m_ddev_targp) 769 if (!mp->m_ddev_targp)
770 goto out_close_rtdev; 770 goto out_close_rtdev;
@@ -1188,6 +1188,7 @@ xfs_fs_remount(
1188 char *options) 1188 char *options)
1189{ 1189{
1190 struct xfs_mount *mp = XFS_M(sb); 1190 struct xfs_mount *mp = XFS_M(sb);
1191 xfs_sb_t *sbp = &mp->m_sb;
1191 substring_t args[MAX_OPT_ARGS]; 1192 substring_t args[MAX_OPT_ARGS];
1192 char *p; 1193 char *p;
1193 int error; 1194 int error;
@@ -1208,10 +1209,10 @@ xfs_fs_remount(
1208 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1209 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1209 break; 1210 break;
1210 case Opt_inode64: 1211 case Opt_inode64:
1211 mp->m_maxagi = xfs_set_inode64(mp); 1212 mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
1212 break; 1213 break;
1213 case Opt_inode32: 1214 case Opt_inode32:
1214 mp->m_maxagi = xfs_set_inode32(mp); 1215 mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
1215 break; 1216 break;
1216 default: 1217 default:
1217 /* 1218 /*
@@ -1295,7 +1296,7 @@ xfs_fs_freeze(
1295 1296
1296 xfs_save_resvblks(mp); 1297 xfs_save_resvblks(mp);
1297 xfs_quiesce_attr(mp); 1298 xfs_quiesce_attr(mp);
1298 return -xfs_fs_log_dummy(mp); 1299 return xfs_fs_log_dummy(mp);
1299} 1300}
1300 1301
1301STATIC int 1302STATIC int
@@ -1314,7 +1315,7 @@ xfs_fs_show_options(
1314 struct seq_file *m, 1315 struct seq_file *m,
1315 struct dentry *root) 1316 struct dentry *root)
1316{ 1317{
1317 return -xfs_showargs(XFS_M(root->d_sb), m); 1318 return xfs_showargs(XFS_M(root->d_sb), m);
1318} 1319}
1319 1320
1320/* 1321/*
@@ -1336,14 +1337,14 @@ xfs_finish_flags(
1336 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1337 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1337 xfs_warn(mp, 1338 xfs_warn(mp,
1338 "logbuf size must be greater than or equal to log stripe size"); 1339 "logbuf size must be greater than or equal to log stripe size");
1339 return XFS_ERROR(EINVAL); 1340 return -EINVAL;
1340 } 1341 }
1341 } else { 1342 } else {
1342 /* Fail a mount if the logbuf is larger than 32K */ 1343 /* Fail a mount if the logbuf is larger than 32K */
1343 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1344 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1344 xfs_warn(mp, 1345 xfs_warn(mp,
1345 "logbuf size for version 1 logs must be 16K or 32K"); 1346 "logbuf size for version 1 logs must be 16K or 32K");
1346 return XFS_ERROR(EINVAL); 1347 return -EINVAL;
1347 } 1348 }
1348 } 1349 }
1349 1350
@@ -1355,7 +1356,7 @@ xfs_finish_flags(
1355 xfs_warn(mp, 1356 xfs_warn(mp,
1356"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.", 1357"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
1357 MNTOPT_NOATTR2, MNTOPT_ATTR2); 1358 MNTOPT_NOATTR2, MNTOPT_ATTR2);
1358 return XFS_ERROR(EINVAL); 1359 return -EINVAL;
1359 } 1360 }
1360 1361
1361 /* 1362 /*
@@ -1372,7 +1373,7 @@ xfs_finish_flags(
1372 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 1373 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1373 xfs_warn(mp, 1374 xfs_warn(mp,
1374 "cannot mount a read-only filesystem as read-write"); 1375 "cannot mount a read-only filesystem as read-write");
1375 return XFS_ERROR(EROFS); 1376 return -EROFS;
1376 } 1377 }
1377 1378
1378 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 1379 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
@@ -1380,7 +1381,7 @@ xfs_finish_flags(
1380 !xfs_sb_version_has_pquotino(&mp->m_sb)) { 1381 !xfs_sb_version_has_pquotino(&mp->m_sb)) {
1381 xfs_warn(mp, 1382 xfs_warn(mp,
1382 "Super block does not support project and group quota together"); 1383 "Super block does not support project and group quota together");
1383 return XFS_ERROR(EINVAL); 1384 return -EINVAL;
1384 } 1385 }
1385 1386
1386 return 0; 1387 return 0;
@@ -1394,7 +1395,7 @@ xfs_fs_fill_super(
1394{ 1395{
1395 struct inode *root; 1396 struct inode *root;
1396 struct xfs_mount *mp = NULL; 1397 struct xfs_mount *mp = NULL;
1397 int flags = 0, error = ENOMEM; 1398 int flags = 0, error = -ENOMEM;
1398 1399
1399 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1400 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1400 if (!mp) 1401 if (!mp)
@@ -1428,11 +1429,11 @@ xfs_fs_fill_super(
1428 if (error) 1429 if (error)
1429 goto out_free_fsname; 1430 goto out_free_fsname;
1430 1431
1431 error = -xfs_init_mount_workqueues(mp); 1432 error = xfs_init_mount_workqueues(mp);
1432 if (error) 1433 if (error)
1433 goto out_close_devices; 1434 goto out_close_devices;
1434 1435
1435 error = -xfs_icsb_init_counters(mp); 1436 error = xfs_icsb_init_counters(mp);
1436 if (error) 1437 if (error)
1437 goto out_destroy_workqueues; 1438 goto out_destroy_workqueues;
1438 1439
@@ -1474,12 +1475,12 @@ xfs_fs_fill_super(
1474 1475
1475 root = igrab(VFS_I(mp->m_rootip)); 1476 root = igrab(VFS_I(mp->m_rootip));
1476 if (!root) { 1477 if (!root) {
1477 error = ENOENT; 1478 error = -ENOENT;
1478 goto out_unmount; 1479 goto out_unmount;
1479 } 1480 }
1480 sb->s_root = d_make_root(root); 1481 sb->s_root = d_make_root(root);
1481 if (!sb->s_root) { 1482 if (!sb->s_root) {
1482 error = ENOMEM; 1483 error = -ENOMEM;
1483 goto out_unmount; 1484 goto out_unmount;
1484 } 1485 }
1485 1486
@@ -1499,7 +1500,7 @@ out_destroy_workqueues:
1499 xfs_free_fsname(mp); 1500 xfs_free_fsname(mp);
1500 kfree(mp); 1501 kfree(mp);
1501 out: 1502 out:
1502 return -error; 1503 return error;
1503 1504
1504 out_unmount: 1505 out_unmount:
1505 xfs_filestream_unmount(mp); 1506 xfs_filestream_unmount(mp);
@@ -1761,9 +1762,15 @@ init_xfs_fs(void)
1761 if (error) 1762 if (error)
1762 goto out_cleanup_procfs; 1763 goto out_cleanup_procfs;
1763 1764
1765 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
1766 if (!xfs_kset) {
1767 error = -ENOMEM;
1768 goto out_sysctl_unregister;;
1769 }
1770
1764 error = xfs_qm_init(); 1771 error = xfs_qm_init();
1765 if (error) 1772 if (error)
1766 goto out_sysctl_unregister; 1773 goto out_kset_unregister;
1767 1774
1768 error = register_filesystem(&xfs_fs_type); 1775 error = register_filesystem(&xfs_fs_type);
1769 if (error) 1776 if (error)
@@ -1772,6 +1779,8 @@ init_xfs_fs(void)
1772 1779
1773 out_qm_exit: 1780 out_qm_exit:
1774 xfs_qm_exit(); 1781 xfs_qm_exit();
1782 out_kset_unregister:
1783 kset_unregister(xfs_kset);
1775 out_sysctl_unregister: 1784 out_sysctl_unregister:
1776 xfs_sysctl_unregister(); 1785 xfs_sysctl_unregister();
1777 out_cleanup_procfs: 1786 out_cleanup_procfs:
@@ -1793,6 +1802,7 @@ exit_xfs_fs(void)
1793{ 1802{
1794 xfs_qm_exit(); 1803 xfs_qm_exit();
1795 unregister_filesystem(&xfs_fs_type); 1804 unregister_filesystem(&xfs_fs_type);
1805 kset_unregister(xfs_kset);
1796 xfs_sysctl_unregister(); 1806 xfs_sysctl_unregister();
1797 xfs_cleanup_procfs(); 1807 xfs_cleanup_procfs();
1798 xfs_buf_terminate(); 1808 xfs_buf_terminate();
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index bbe3d15a7904..2b830c2f322e 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -44,16 +44,6 @@ extern void xfs_qm_exit(void);
44# define XFS_REALTIME_STRING 44# define XFS_REALTIME_STRING
45#endif 45#endif
46 46
47#if XFS_BIG_BLKNOS
48# if XFS_BIG_INUMS
49# define XFS_BIGFS_STRING "large block/inode numbers, "
50# else
51# define XFS_BIGFS_STRING "large block numbers, "
52# endif
53#else
54# define XFS_BIGFS_STRING
55#endif
56
57#ifdef DEBUG 47#ifdef DEBUG
58# define XFS_DBG_STRING "debug" 48# define XFS_DBG_STRING "debug"
59#else 49#else
@@ -64,7 +54,6 @@ extern void xfs_qm_exit(void);
64#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ 54#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
65 XFS_SECURITY_STRING \ 55 XFS_SECURITY_STRING \
66 XFS_REALTIME_STRING \ 56 XFS_REALTIME_STRING \
67 XFS_BIGFS_STRING \
68 XFS_DBG_STRING /* DBG must be last */ 57 XFS_DBG_STRING /* DBG must be last */
69 58
70struct xfs_inode; 59struct xfs_inode;
@@ -76,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
76 65
77extern void xfs_flush_inodes(struct xfs_mount *mp); 66extern void xfs_flush_inodes(struct xfs_mount *mp);
78extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 67extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
79extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *); 68extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
80extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *); 69extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
81 70
82extern const struct export_operations xfs_export_operations; 71extern const struct export_operations xfs_export_operations;
83extern const struct xattr_handler *xfs_xattr_handlers[]; 72extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index d69363c833e1..6a944a2cd36f 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -76,15 +76,15 @@ xfs_readlink_bmap(
76 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, 76 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
77 &xfs_symlink_buf_ops); 77 &xfs_symlink_buf_ops);
78 if (!bp) 78 if (!bp)
79 return XFS_ERROR(ENOMEM); 79 return -ENOMEM;
80 error = bp->b_error; 80 error = bp->b_error;
81 if (error) { 81 if (error) {
82 xfs_buf_ioerror_alert(bp, __func__); 82 xfs_buf_ioerror_alert(bp, __func__);
83 xfs_buf_relse(bp); 83 xfs_buf_relse(bp);
84 84
85 /* bad CRC means corrupted metadata */ 85 /* bad CRC means corrupted metadata */
86 if (error == EFSBADCRC) 86 if (error == -EFSBADCRC)
87 error = EFSCORRUPTED; 87 error = -EFSCORRUPTED;
88 goto out; 88 goto out;
89 } 89 }
90 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); 90 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -95,7 +95,7 @@ xfs_readlink_bmap(
95 if (xfs_sb_version_hascrc(&mp->m_sb)) { 95 if (xfs_sb_version_hascrc(&mp->m_sb)) {
96 if (!xfs_symlink_hdr_ok(ip->i_ino, offset, 96 if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
97 byte_cnt, bp)) { 97 byte_cnt, bp)) {
98 error = EFSCORRUPTED; 98 error = -EFSCORRUPTED;
99 xfs_alert(mp, 99 xfs_alert(mp,
100"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)", 100"symlink header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
101 offset, byte_cnt, ip->i_ino); 101 offset, byte_cnt, ip->i_ino);
@@ -135,7 +135,7 @@ xfs_readlink(
135 trace_xfs_readlink(ip); 135 trace_xfs_readlink(ip);
136 136
137 if (XFS_FORCED_SHUTDOWN(mp)) 137 if (XFS_FORCED_SHUTDOWN(mp))
138 return XFS_ERROR(EIO); 138 return -EIO;
139 139
140 xfs_ilock(ip, XFS_ILOCK_SHARED); 140 xfs_ilock(ip, XFS_ILOCK_SHARED);
141 141
@@ -148,7 +148,7 @@ xfs_readlink(
148 __func__, (unsigned long long) ip->i_ino, 148 __func__, (unsigned long long) ip->i_ino,
149 (long long) pathlen); 149 (long long) pathlen);
150 ASSERT(0); 150 ASSERT(0);
151 error = XFS_ERROR(EFSCORRUPTED); 151 error = -EFSCORRUPTED;
152 goto out; 152 goto out;
153 } 153 }
154 154
@@ -203,14 +203,14 @@ xfs_symlink(
203 trace_xfs_symlink(dp, link_name); 203 trace_xfs_symlink(dp, link_name);
204 204
205 if (XFS_FORCED_SHUTDOWN(mp)) 205 if (XFS_FORCED_SHUTDOWN(mp))
206 return XFS_ERROR(EIO); 206 return -EIO;
207 207
208 /* 208 /*
209 * Check component lengths of the target path name. 209 * Check component lengths of the target path name.
210 */ 210 */
211 pathlen = strlen(target_path); 211 pathlen = strlen(target_path);
212 if (pathlen >= MAXPATHLEN) /* total string too long */ 212 if (pathlen >= MAXPATHLEN) /* total string too long */
213 return XFS_ERROR(ENAMETOOLONG); 213 return -ENAMETOOLONG;
214 214
215 udqp = gdqp = NULL; 215 udqp = gdqp = NULL;
216 prid = xfs_get_initial_prid(dp); 216 prid = xfs_get_initial_prid(dp);
@@ -238,7 +238,7 @@ xfs_symlink(
238 fs_blocks = xfs_symlink_blocks(mp, pathlen); 238 fs_blocks = xfs_symlink_blocks(mp, pathlen);
239 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 239 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
240 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0); 240 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
241 if (error == ENOSPC && fs_blocks == 0) { 241 if (error == -ENOSPC && fs_blocks == 0) {
242 resblks = 0; 242 resblks = 0;
243 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); 243 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
244 } 244 }
@@ -254,7 +254,7 @@ xfs_symlink(
254 * Check whether the directory allows new symlinks or not. 254 * Check whether the directory allows new symlinks or not.
255 */ 255 */
256 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 256 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
257 error = XFS_ERROR(EPERM); 257 error = -EPERM;
258 goto error_return; 258 goto error_return;
259 } 259 }
260 260
@@ -284,7 +284,7 @@ xfs_symlink(
284 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 284 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
285 prid, resblks > 0, &ip, NULL); 285 prid, resblks > 0, &ip, NULL);
286 if (error) { 286 if (error) {
287 if (error == ENOSPC) 287 if (error == -ENOSPC)
288 goto error_return; 288 goto error_return;
289 goto error1; 289 goto error1;
290 } 290 }
@@ -348,7 +348,7 @@ xfs_symlink(
348 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 348 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
349 BTOBB(byte_cnt), 0); 349 BTOBB(byte_cnt), 0);
350 if (!bp) { 350 if (!bp) {
351 error = ENOMEM; 351 error = -ENOMEM;
352 goto error2; 352 goto error2;
353 } 353 }
354 bp->b_ops = &xfs_symlink_buf_ops; 354 bp->b_ops = &xfs_symlink_buf_ops;
@@ -489,7 +489,7 @@ xfs_inactive_symlink_rmt(
489 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), 489 XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
490 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); 490 XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
491 if (!bp) { 491 if (!bp) {
492 error = ENOMEM; 492 error = -ENOMEM;
493 goto error_bmap_cancel; 493 goto error_bmap_cancel;
494 } 494 }
495 xfs_trans_binval(tp, bp); 495 xfs_trans_binval(tp, bp);
@@ -562,7 +562,7 @@ xfs_inactive_symlink(
562 trace_xfs_inactive_symlink(ip); 562 trace_xfs_inactive_symlink(ip);
563 563
564 if (XFS_FORCED_SHUTDOWN(mp)) 564 if (XFS_FORCED_SHUTDOWN(mp))
565 return XFS_ERROR(EIO); 565 return -EIO;
566 566
567 xfs_ilock(ip, XFS_ILOCK_EXCL); 567 xfs_ilock(ip, XFS_ILOCK_EXCL);
568 568
@@ -580,7 +580,7 @@ xfs_inactive_symlink(
580 __func__, (unsigned long long)ip->i_ino, pathlen); 580 __func__, (unsigned long long)ip->i_ino, pathlen);
581 xfs_iunlock(ip, XFS_ILOCK_EXCL); 581 xfs_iunlock(ip, XFS_ILOCK_EXCL);
582 ASSERT(0); 582 ASSERT(0);
583 return XFS_ERROR(EFSCORRUPTED); 583 return -EFSCORRUPTED;
584 } 584 }
585 585
586 if (ip->i_df.if_flags & XFS_IFINLINE) { 586 if (ip->i_df.if_flags & XFS_IFINLINE) {
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
new file mode 100644
index 000000000000..9835139ce1ec
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.c
@@ -0,0 +1,165 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "xfs.h"
20#include "xfs_sysfs.h"
21#include "xfs_log_format.h"
22#include "xfs_log.h"
23#include "xfs_log_priv.h"
24
25struct xfs_sysfs_attr {
26 struct attribute attr;
27 ssize_t (*show)(char *buf, void *data);
28 ssize_t (*store)(const char *buf, size_t count, void *data);
29};
30
31static inline struct xfs_sysfs_attr *
32to_attr(struct attribute *attr)
33{
34 return container_of(attr, struct xfs_sysfs_attr, attr);
35}
36
37#define XFS_SYSFS_ATTR_RW(name) \
38 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
39#define XFS_SYSFS_ATTR_RO(name) \
40 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
41
42#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
43
44/*
45 * xfs_mount kobject. This currently has no attributes and thus no need for show
46 * and store helpers. The mp kobject serves as the per-mount parent object that
47 * is identified by the fsname under sysfs.
48 */
49
50struct kobj_type xfs_mp_ktype = {
51 .release = xfs_sysfs_release,
52};
53
54/* xlog */
55
56STATIC ssize_t
57log_head_lsn_show(
58 char *buf,
59 void *data)
60{
61 struct xlog *log = data;
62 int cycle;
63 int block;
64
65 spin_lock(&log->l_icloglock);
66 cycle = log->l_curr_cycle;
67 block = log->l_curr_block;
68 spin_unlock(&log->l_icloglock);
69
70 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
71}
72XFS_SYSFS_ATTR_RO(log_head_lsn);
73
74STATIC ssize_t
75log_tail_lsn_show(
76 char *buf,
77 void *data)
78{
79 struct xlog *log = data;
80 int cycle;
81 int block;
82
83 xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
84 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
85}
86XFS_SYSFS_ATTR_RO(log_tail_lsn);
87
88STATIC ssize_t
89reserve_grant_head_show(
90 char *buf,
91 void *data)
92{
93 struct xlog *log = data;
94 int cycle;
95 int bytes;
96
97 xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
98 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
99}
100XFS_SYSFS_ATTR_RO(reserve_grant_head);
101
102STATIC ssize_t
103write_grant_head_show(
104 char *buf,
105 void *data)
106{
107 struct xlog *log = data;
108 int cycle;
109 int bytes;
110
111 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
112 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
113}
114XFS_SYSFS_ATTR_RO(write_grant_head);
115
116static struct attribute *xfs_log_attrs[] = {
117 ATTR_LIST(log_head_lsn),
118 ATTR_LIST(log_tail_lsn),
119 ATTR_LIST(reserve_grant_head),
120 ATTR_LIST(write_grant_head),
121 NULL,
122};
123
124static inline struct xlog *
125to_xlog(struct kobject *kobject)
126{
127 struct xfs_kobj *kobj = to_kobj(kobject);
128 return container_of(kobj, struct xlog, l_kobj);
129}
130
131STATIC ssize_t
132xfs_log_show(
133 struct kobject *kobject,
134 struct attribute *attr,
135 char *buf)
136{
137 struct xlog *log = to_xlog(kobject);
138 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
139
140 return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
141}
142
143STATIC ssize_t
144xfs_log_store(
145 struct kobject *kobject,
146 struct attribute *attr,
147 const char *buf,
148 size_t count)
149{
150 struct xlog *log = to_xlog(kobject);
151 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
152
153 return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
154}
155
156static struct sysfs_ops xfs_log_ops = {
157 .show = xfs_log_show,
158 .store = xfs_log_store,
159};
160
161struct kobj_type xfs_log_ktype = {
162 .release = xfs_sysfs_release,
163 .sysfs_ops = &xfs_log_ops,
164 .default_attrs = xfs_log_attrs,
165};
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
new file mode 100644
index 000000000000..54a2091183c0
--- /dev/null
+++ b/fs/xfs/xfs_sysfs.h
@@ -0,0 +1,59 @@
1/*
2 * Copyright (c) 2014 Red Hat, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#ifndef __XFS_SYSFS_H__
20#define __XFS_SYSFS_H__
21
22extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
23extern struct kobj_type xfs_log_ktype; /* xlog */
24
25static inline struct xfs_kobj *
26to_kobj(struct kobject *kobject)
27{
28 return container_of(kobject, struct xfs_kobj, kobject);
29}
30
31static inline void
32xfs_sysfs_release(struct kobject *kobject)
33{
34 struct xfs_kobj *kobj = to_kobj(kobject);
35 complete(&kobj->complete);
36}
37
38static inline int
39xfs_sysfs_init(
40 struct xfs_kobj *kobj,
41 struct kobj_type *ktype,
42 struct xfs_kobj *parent_kobj,
43 const char *name)
44{
45 init_completion(&kobj->complete);
46 return kobject_init_and_add(&kobj->kobject, ktype,
47 &parent_kobj->kobject, "%s", name);
48}
49
50static inline void
51xfs_sysfs_del(
52 struct xfs_kobj *kobj)
53{
54 kobject_del(&kobj->kobject);
55 kobject_put(&kobj->kobject);
56 wait_for_completion(&kobj->complete);
57}
58
59#endif /* __XFS_SYSFS_H__ */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index d03932564ccb..30e8e3410955 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -190,7 +190,7 @@ xfs_trans_reserve(
190 -((int64_t)blocks), rsvd); 190 -((int64_t)blocks), rsvd);
191 if (error != 0) { 191 if (error != 0) {
192 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 192 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
193 return (XFS_ERROR(ENOSPC)); 193 return -ENOSPC;
194 } 194 }
195 tp->t_blk_res += blocks; 195 tp->t_blk_res += blocks;
196 } 196 }
@@ -241,7 +241,7 @@ xfs_trans_reserve(
241 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, 241 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
242 -((int64_t)rtextents), rsvd); 242 -((int64_t)rtextents), rsvd);
243 if (error) { 243 if (error) {
244 error = XFS_ERROR(ENOSPC); 244 error = -ENOSPC;
245 goto undo_log; 245 goto undo_log;
246 } 246 }
247 tp->t_rtx_res += rtextents; 247 tp->t_rtx_res += rtextents;
@@ -874,7 +874,7 @@ xfs_trans_commit(
874 goto out_unreserve; 874 goto out_unreserve;
875 875
876 if (XFS_FORCED_SHUTDOWN(mp)) { 876 if (XFS_FORCED_SHUTDOWN(mp)) {
877 error = XFS_ERROR(EIO); 877 error = -EIO;
878 goto out_unreserve; 878 goto out_unreserve;
879 } 879 }
880 880
@@ -917,7 +917,7 @@ out_unreserve:
917 if (tp->t_ticket) { 917 if (tp->t_ticket) {
918 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 918 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
919 if (commit_lsn == -1 && !error) 919 if (commit_lsn == -1 && !error)
920 error = XFS_ERROR(EIO); 920 error = -EIO;
921 } 921 }
922 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 922 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
923 xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); 923 xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
@@ -1024,7 +1024,7 @@ xfs_trans_roll(
1024 */ 1024 */
1025 error = xfs_trans_commit(trans, 0); 1025 error = xfs_trans_commit(trans, 0);
1026 if (error) 1026 if (error)
1027 return (error); 1027 return error;
1028 1028
1029 trans = *tpp; 1029 trans = *tpp;
1030 1030
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index cb0f3a84cc68..859482f53b5a 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -762,7 +762,7 @@ xfs_trans_ail_init(
762 762
763 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); 763 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
764 if (!ailp) 764 if (!ailp)
765 return ENOMEM; 765 return -ENOMEM;
766 766
767 ailp->xa_mount = mp; 767 ailp->xa_mount = mp;
768 INIT_LIST_HEAD(&ailp->xa_ail); 768 INIT_LIST_HEAD(&ailp->xa_ail);
@@ -781,7 +781,7 @@ xfs_trans_ail_init(
781 781
782out_free_ailp: 782out_free_ailp:
783 kmem_free(ailp); 783 kmem_free(ailp);
784 return ENOMEM; 784 return -ENOMEM;
785} 785}
786 786
787void 787void
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index b8eef0549f3f..96c898e7ac9a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -166,7 +166,7 @@ xfs_trans_get_buf_map(
166 ASSERT(atomic_read(&bip->bli_refcount) > 0); 166 ASSERT(atomic_read(&bip->bli_refcount) > 0);
167 bip->bli_recur++; 167 bip->bli_recur++;
168 trace_xfs_trans_get_buf_recur(bip); 168 trace_xfs_trans_get_buf_recur(bip);
169 return (bp); 169 return bp;
170 } 170 }
171 171
172 bp = xfs_buf_get_map(target, map, nmaps, flags); 172 bp = xfs_buf_get_map(target, map, nmaps, flags);
@@ -178,7 +178,7 @@ xfs_trans_get_buf_map(
178 178
179 _xfs_trans_bjoin(tp, bp, 1); 179 _xfs_trans_bjoin(tp, bp, 1);
180 trace_xfs_trans_get_buf(bp->b_fspriv); 180 trace_xfs_trans_get_buf(bp->b_fspriv);
181 return (bp); 181 return bp;
182} 182}
183 183
184/* 184/*
@@ -201,9 +201,8 @@ xfs_trans_getsb(xfs_trans_t *tp,
201 * Default to just trying to lock the superblock buffer 201 * Default to just trying to lock the superblock buffer
202 * if tp is NULL. 202 * if tp is NULL.
203 */ 203 */
204 if (tp == NULL) { 204 if (tp == NULL)
205 return (xfs_getsb(mp, flags)); 205 return xfs_getsb(mp, flags);
206 }
207 206
208 /* 207 /*
209 * If the superblock buffer already has this transaction 208 * If the superblock buffer already has this transaction
@@ -218,7 +217,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
218 ASSERT(atomic_read(&bip->bli_refcount) > 0); 217 ASSERT(atomic_read(&bip->bli_refcount) > 0);
219 bip->bli_recur++; 218 bip->bli_recur++;
220 trace_xfs_trans_getsb_recur(bip); 219 trace_xfs_trans_getsb_recur(bip);
221 return (bp); 220 return bp;
222 } 221 }
223 222
224 bp = xfs_getsb(mp, flags); 223 bp = xfs_getsb(mp, flags);
@@ -227,7 +226,7 @@ xfs_trans_getsb(xfs_trans_t *tp,
227 226
228 _xfs_trans_bjoin(tp, bp, 1); 227 _xfs_trans_bjoin(tp, bp, 1);
229 trace_xfs_trans_getsb(bp->b_fspriv); 228 trace_xfs_trans_getsb(bp->b_fspriv);
230 return (bp); 229 return bp;
231} 230}
232 231
233#ifdef DEBUG 232#ifdef DEBUG
@@ -267,7 +266,7 @@ xfs_trans_read_buf_map(
267 bp = xfs_buf_read_map(target, map, nmaps, flags, ops); 266 bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
268 if (!bp) 267 if (!bp)
269 return (flags & XBF_TRYLOCK) ? 268 return (flags & XBF_TRYLOCK) ?
270 EAGAIN : XFS_ERROR(ENOMEM); 269 -EAGAIN : -ENOMEM;
271 270
272 if (bp->b_error) { 271 if (bp->b_error) {
273 error = bp->b_error; 272 error = bp->b_error;
@@ -277,8 +276,8 @@ xfs_trans_read_buf_map(
277 xfs_buf_relse(bp); 276 xfs_buf_relse(bp);
278 277
279 /* bad CRC means corrupted metadata */ 278 /* bad CRC means corrupted metadata */
280 if (error == EFSBADCRC) 279 if (error == -EFSBADCRC)
281 error = EFSCORRUPTED; 280 error = -EFSCORRUPTED;
282 return error; 281 return error;
283 } 282 }
284#ifdef DEBUG 283#ifdef DEBUG
@@ -287,7 +286,7 @@ xfs_trans_read_buf_map(
287 if (((xfs_req_num++) % xfs_error_mod) == 0) { 286 if (((xfs_req_num++) % xfs_error_mod) == 0) {
288 xfs_buf_relse(bp); 287 xfs_buf_relse(bp);
289 xfs_debug(mp, "Returning error!"); 288 xfs_debug(mp, "Returning error!");
290 return XFS_ERROR(EIO); 289 return -EIO;
291 } 290 }
292 } 291 }
293 } 292 }
@@ -343,8 +342,8 @@ xfs_trans_read_buf_map(
343 xfs_force_shutdown(tp->t_mountp, 342 xfs_force_shutdown(tp->t_mountp,
344 SHUTDOWN_META_IO_ERROR); 343 SHUTDOWN_META_IO_ERROR);
345 /* bad CRC means corrupted metadata */ 344 /* bad CRC means corrupted metadata */
346 if (error == EFSBADCRC) 345 if (error == -EFSBADCRC)
347 error = EFSCORRUPTED; 346 error = -EFSCORRUPTED;
348 return error; 347 return error;
349 } 348 }
350 } 349 }
@@ -355,7 +354,7 @@ xfs_trans_read_buf_map(
355 if (XFS_FORCED_SHUTDOWN(mp)) { 354 if (XFS_FORCED_SHUTDOWN(mp)) {
356 trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 355 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
357 *bpp = NULL; 356 *bpp = NULL;
358 return XFS_ERROR(EIO); 357 return -EIO;
359 } 358 }
360 359
361 360
@@ -372,7 +371,7 @@ xfs_trans_read_buf_map(
372 if (bp == NULL) { 371 if (bp == NULL) {
373 *bpp = NULL; 372 *bpp = NULL;
374 return (flags & XBF_TRYLOCK) ? 373 return (flags & XBF_TRYLOCK) ?
375 0 : XFS_ERROR(ENOMEM); 374 0 : -ENOMEM;
376 } 375 }
377 if (bp->b_error) { 376 if (bp->b_error) {
378 error = bp->b_error; 377 error = bp->b_error;
@@ -384,8 +383,8 @@ xfs_trans_read_buf_map(
384 xfs_buf_relse(bp); 383 xfs_buf_relse(bp);
385 384
386 /* bad CRC means corrupted metadata */ 385 /* bad CRC means corrupted metadata */
387 if (error == EFSBADCRC) 386 if (error == -EFSBADCRC)
388 error = EFSCORRUPTED; 387 error = -EFSCORRUPTED;
389 return error; 388 return error;
390 } 389 }
391#ifdef DEBUG 390#ifdef DEBUG
@@ -396,7 +395,7 @@ xfs_trans_read_buf_map(
396 SHUTDOWN_META_IO_ERROR); 395 SHUTDOWN_META_IO_ERROR);
397 xfs_buf_relse(bp); 396 xfs_buf_relse(bp);
398 xfs_debug(mp, "Returning trans error!"); 397 xfs_debug(mp, "Returning trans error!");
399 return XFS_ERROR(EIO); 398 return -EIO;
400 } 399 }
401 } 400 }
402 } 401 }
@@ -414,7 +413,7 @@ shutdown_abort:
414 trace_xfs_trans_read_buf_shut(bp, _RET_IP_); 413 trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
415 xfs_buf_relse(bp); 414 xfs_buf_relse(bp);
416 *bpp = NULL; 415 *bpp = NULL;
417 return XFS_ERROR(EIO); 416 return -EIO;
418} 417}
419 418
420/* 419/*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 41172861e857..846e061c2e98 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -722,8 +722,8 @@ xfs_trans_dqresv(
722error_return: 722error_return:
723 xfs_dqunlock(dqp); 723 xfs_dqunlock(dqp);
724 if (flags & XFS_QMOPT_ENOSPC) 724 if (flags & XFS_QMOPT_ENOSPC)
725 return ENOSPC; 725 return -ENOSPC;
726 return EDQUOT; 726 return -EDQUOT;
727} 727}
728 728
729 729
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 65c6e6650b1a..b79dc66b2ecd 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -38,43 +38,18 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
38typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 38typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
39typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 39typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
40 40
41/*
42 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
43 * Disk based types:
44 */
45typedef __uint64_t xfs_dfsbno_t; /* blockno in filesystem (agno|agbno) */
46typedef __uint64_t xfs_drfsbno_t; /* blockno in filesystem (raw) */
47typedef __uint64_t xfs_drtbno_t; /* extent (block) in realtime area */
48typedef __uint64_t xfs_dfiloff_t; /* block number in a file */
49typedef __uint64_t xfs_dfilblks_t; /* number of blocks in a file */
50
51/*
52 * Memory based types are conditional.
53 */
54#if XFS_BIG_BLKNOS
55typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */ 41typedef __uint64_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
56typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ 42typedef __uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
57typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ 43typedef __uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
58typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
59#else
60typedef __uint32_t xfs_fsblock_t; /* blockno in filesystem (agno|agbno) */
61typedef __uint32_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
62typedef __uint32_t xfs_rtblock_t; /* extent (block) in realtime area */
63typedef __int32_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
64#endif
65typedef __uint64_t xfs_fileoff_t; /* block number in a file */ 44typedef __uint64_t xfs_fileoff_t; /* block number in a file */
66typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
67typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ 45typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
68 46
47typedef __int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
48typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */
69 49
70/* 50/*
71 * Null values for the types. 51 * Null values for the types.
72 */ 52 */
73#define NULLDFSBNO ((xfs_dfsbno_t)-1)
74#define NULLDRFSBNO ((xfs_drfsbno_t)-1)
75#define NULLDRTBNO ((xfs_drtbno_t)-1)
76#define NULLDFILOFF ((xfs_dfiloff_t)-1)
77
78#define NULLFSBLOCK ((xfs_fsblock_t)-1) 53#define NULLFSBLOCK ((xfs_fsblock_t)-1)
79#define NULLRFSBLOCK ((xfs_rfsblock_t)-1) 54#define NULLRFSBLOCK ((xfs_rfsblock_t)-1)
80#define NULLRTBLOCK ((xfs_rtblock_t)-1) 55#define NULLRTBLOCK ((xfs_rtblock_t)-1)
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h
deleted file mode 100644
index e8a77383c0d5..000000000000
--- a/fs/xfs/xfs_vnode.h
+++ /dev/null
@@ -1,46 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_VNODE_H__
19#define __XFS_VNODE_H__
20
21#include "xfs_fs.h"
22
23struct file;
24struct xfs_inode;
25struct attrlist_cursor_kern;
26
27/*
28 * Flags for read/write calls - same values as IRIX
29 */
30#define IO_ISDIRECT 0x00004 /* bypass page cache */
31#define IO_INVIS 0x00020 /* don't update inode timestamps */
32
33#define XFS_IO_FLAGS \
34 { IO_ISDIRECT, "DIRECT" }, \
35 { IO_INVIS, "INVIS"}
36
37/*
38 * Some useful predicates.
39 */
40#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
41#define VN_CACHED(vp) (vp->i_mapping->nrpages)
42#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
43 PAGECACHE_TAG_DIRTY)
44
45
46#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 78ed92a46fdd..93455b998041 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -49,7 +49,7 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
49 value = NULL; 49 value = NULL;
50 } 50 }
51 51
52 error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); 52 error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
53 if (error) 53 if (error)
54 return error; 54 return error;
55 return asize; 55 return asize;
@@ -71,8 +71,8 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
71 xflags |= ATTR_REPLACE; 71 xflags |= ATTR_REPLACE;
72 72
73 if (!value) 73 if (!value)
74 return -xfs_attr_remove(ip, (unsigned char *)name, xflags); 74 return xfs_attr_remove(ip, (unsigned char *)name, xflags);
75 return -xfs_attr_set(ip, (unsigned char *)name, 75 return xfs_attr_set(ip, (unsigned char *)name,
76 (void *)value, size, xflags); 76 (void *)value, size, xflags);
77} 77}
78 78