aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig3
-rw-r--r--fs/9p/acl.c37
-rw-r--r--fs/9p/acl.h20
-rw-r--r--fs/9p/fid.c17
-rw-r--r--fs/9p/v9fs.c34
-rw-r--r--fs/9p/v9fs.h10
-rw-r--r--fs/9p/vfs_dentry.c1
-rw-r--r--fs/9p/vfs_dir.c92
-rw-r--r--fs/9p/vfs_file.c15
-rw-r--r--fs/9p/vfs_inode.c9
-rw-r--r--fs/9p/vfs_inode_dotl.c96
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/9p/xattr.c33
-rw-r--r--fs/9p/xattr.h2
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/adfs/Kconfig4
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/affs/Kconfig4
-rw-r--r--fs/affs/amigaffs.c3
-rw-r--r--fs/affs/dir.c2
-rw-r--r--fs/afs/Kconfig7
-rw-r--r--fs/afs/afs.h11
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/fsclient.c14
-rw-r--r--fs/afs/inode.c6
-rw-r--r--fs/afs/super.c6
-rw-r--r--fs/afs/write.c7
-rw-r--r--fs/aio.c10
-rw-r--r--fs/anon_inodes.c10
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/befs/Kconfig4
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/Kconfig4
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/binfmt_aout.c4
-rw-r--r--fs/binfmt_elf.c16
-rw-r--r--fs/binfmt_elf_fdpic.c11
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/binfmt_misc.c4
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-tree.c54
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c43
-rw-r--r--fs/btrfs/free-space-cache.c20
-rw-r--r--fs/btrfs/inode.c141
-rw-r--r--fs/btrfs/ioctl.c186
-rw-r--r--fs/btrfs/ordered-data.c13
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c47
-rw-r--r--fs/btrfs/tree-log.c10
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/buffer.c21
-rw-r--r--fs/ceph/Kconfig4
-rw-r--r--fs/ceph/addr.c50
-rw-r--r--fs/ceph/caps.c49
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c18
-rw-r--r--fs/ceph/inode.c63
-rw-r--r--fs/ceph/ioctl.c22
-rw-r--r--fs/ceph/locks.c2
-rw-r--r--fs/ceph/mds_client.c37
-rw-r--r--fs/ceph/mds_client.h10
-rw-r--r--fs/ceph/mdsmap.c12
-rw-r--r--fs/ceph/strings.c4
-rw-r--r--fs/ceph/super.c7
-rw-r--r--fs/ceph/super.h14
-rw-r--r--fs/ceph/xattr.c214
-rw-r--r--fs/cifs/Kconfig8
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h8
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifsacl.c47
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/cifsglob.h24
-rw-r--r--fs/cifs/cifspdu.h1
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c10
-rw-r--r--fs/cifs/connect.c68
-rw-r--r--fs/cifs/dir.c18
-rw-r--r--fs/cifs/file.c175
-rw-r--r--fs/cifs/inode.c61
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/readdir.c12
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/coda_fs_i.h2
-rw-r--r--fs/coda/coda_linux.c8
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/coda/file.c12
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/psdev.c7
-rw-r--r--fs/coda/upcall.c10
-rw-r--r--fs/compat.c52
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/dir.c7
-rw-r--r--fs/coredump.c6
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/dcache.c89
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/devpts/inode.c18
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dlm/config.c2
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c33
-rw-r--r--fs/dlm/lockspace.c1
-rw-r--r--fs/dlm/lowcomms.c11
-rw-r--r--fs/dlm/recover.c52
-rw-r--r--fs/dlm/user.c8
-rw-r--r--fs/ecryptfs/Kconfig4
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h6
-rw-r--r--fs/ecryptfs/file.c4
-rw-r--r--fs/ecryptfs/inode.c3
-rw-r--r--fs/ecryptfs/messaging.c6
-rw-r--r--fs/ecryptfs/read_write.c6
-rw-r--r--fs/efs/Kconfig4
-rw-r--r--fs/efs/dir.c2
-rw-r--r--fs/exec.c23
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/exportfs/expfs.c3
-rw-r--r--fs/ext2/balloc.c28
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/inode.c12
-rw-r--r--fs/ext2/ioctl.c2
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr.c4
-rw-r--r--fs/ext3/dir.c8
-rw-r--r--fs/ext3/inode.c16
-rw-r--r--fs/ext3/ioctl.c2
-rw-r--r--fs/ext3/namei.c5
-rw-r--r--fs/ext3/resize.c12
-rw-r--r--fs/ext3/super.c52
-rw-r--r--fs/ext3/xattr.c4
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/acl.c7
-rw-r--r--fs/ext4/balloc.c13
-rw-r--r--fs/ext4/dir.c9
-rw-r--r--fs/ext4/ext4.h123
-rw-r--r--fs/ext4/ext4_extents.h6
-rw-r--r--fs/ext4/ext4_jbd2.c102
-rw-r--r--fs/ext4/ext4_jbd2.h51
-rw-r--r--fs/ext4/extents.c316
-rw-r--r--fs/ext4/extents_status.c631
-rw-r--r--fs/ext4/extents_status.h86
-rw-r--r--fs/ext4/file.c18
-rw-r--r--fs/ext4/hash.c6
-rw-r--r--fs/ext4/ialloc.c29
-rw-r--r--fs/ext4/indirect.c259
-rw-r--r--fs/ext4/inline.c14
-rw-r--r--fs/ext4/inode.c680
-rw-r--r--fs/ext4/ioctl.c15
-rw-r--r--fs/ext4/mballoc.c69
-rw-r--r--fs/ext4/mballoc.h4
-rw-r--r--fs/ext4/migrate.c15
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/move_extent.c16
-rw-r--r--fs/ext4/namei.c501
-rw-r--r--fs/ext4/page-io.c85
-rw-r--r--fs/ext4/resize.c36
-rw-r--r--fs/ext4/super.c487
-rw-r--r--fs/ext4/xattr.c23
-rw-r--r--fs/ext4/xattr.h68
-rw-r--r--fs/f2fs/acl.c13
-rw-r--r--fs/f2fs/checkpoint.c66
-rw-r--r--fs/f2fs/data.c18
-rw-r--r--fs/f2fs/debug.c54
-rw-r--r--fs/f2fs/dir.c47
-rw-r--r--fs/f2fs/f2fs.h64
-rw-r--r--fs/f2fs/file.c61
-rw-r--r--fs/f2fs/gc.c186
-rw-r--r--fs/f2fs/gc.h21
-rw-r--r--fs/f2fs/hash.c18
-rw-r--r--fs/f2fs/inode.c57
-rw-r--r--fs/f2fs/namei.c34
-rw-r--r--fs/f2fs/node.c76
-rw-r--r--fs/f2fs/recovery.c32
-rw-r--r--fs/f2fs/segment.c77
-rw-r--r--fs/f2fs/segment.h16
-rw-r--r--fs/f2fs/super.c192
-rw-r--r--fs/f2fs/xattr.c7
-rw-r--r--fs/fat/dir.c6
-rw-r--r--fs/fat/fat.h2
-rw-r--r--fs/fat/file.c4
-rw-r--r--fs/fat/inode.c77
-rw-r--r--fs/fat/nfs.c3
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/file.c4
-rw-r--r--fs/file_table.c31
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/fs-writeback.c60
-rw-r--r--fs/fscache/cookie.c11
-rw-r--r--fs/fuse/Kconfig16
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/cuse.c46
-rw-r--r--fs/fuse/dev.c133
-rw-r--r--fs/fuse/dir.c261
-rw-r--r--fs/fuse/file.c243
-rw-r--r--fs/fuse/fuse_i.h74
-rw-r--r--fs/fuse/inode.c18
-rw-r--r--fs/gfs2/acl.c2
-rw-r--r--fs/gfs2/aops.c17
-rw-r--r--fs/gfs2/bmap.c32
-rw-r--r--fs/gfs2/dir.c32
-rw-r--r--fs/gfs2/export.c4
-rw-r--r--fs/gfs2/file.c23
-rw-r--r--fs/gfs2/glock.c116
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/incore.h11
-rw-r--r--fs/gfs2/inode.c40
-rw-r--r--fs/gfs2/lock_dlm.c8
-rw-r--r--fs/gfs2/log.c76
-rw-r--r--fs/gfs2/log.h12
-rw-r--r--fs/gfs2/lops.c83
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/meta_io.c35
-rw-r--r--fs/gfs2/meta_io.h3
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/quota.c142
-rw-r--r--fs/gfs2/quota.h15
-rw-r--r--fs/gfs2/rgrp.c55
-rw-r--r--fs/gfs2/super.c76
-rw-r--r--fs/gfs2/super.h3
-rw-r--r--fs/gfs2/sys.c80
-rw-r--r--fs/gfs2/trans.c124
-rw-r--r--fs/gfs2/trans.h3
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/xattr.c40
-rw-r--r--fs/hfs/Kconfig4
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/Makefile4
-rw-r--r--fs/hfsplus/attributes.c399
-rw-r--r--fs/hfsplus/bfind.c93
-rw-r--r--fs/hfsplus/bnode.c8
-rw-r--r--fs/hfsplus/brec.c23
-rw-r--r--fs/hfsplus/btree.c8
-rw-r--r--fs/hfsplus/catalog.c36
-rw-r--r--fs/hfsplus/dir.c57
-rw-r--r--fs/hfsplus/extents.c4
-rw-r--r--fs/hfsplus/hfsplus_fs.h52
-rw-r--r--fs/hfsplus/hfsplus_raw.h68
-rw-r--r--fs/hfsplus/inode.c20
-rw-r--r--fs/hfsplus/ioctl.c112
-rw-r--r--fs/hfsplus/super.c56
-rw-r--r--fs/hfsplus/unicode.c7
-rw-r--r--fs/hfsplus/xattr.c709
-rw-r--r--fs/hfsplus/xattr.h60
-rw-r--r--fs/hfsplus/xattr_security.c104
-rw-r--r--fs/hfsplus/xattr_trusted.c63
-rw-r--r--fs/hfsplus/xattr_user.c63
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hpfs/dir.c4
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/inode.c2
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c33
-rw-r--r--fs/inode.c21
-rw-r--r--fs/ioctl.c12
-rw-r--r--fs/isofs/compress.c2
-rw-r--r--fs/isofs/dir.c2
-rw-r--r--fs/isofs/export.c4
-rw-r--r--fs/jbd/journal.c3
-rw-r--r--fs/jbd2/commit.c8
-rw-r--r--fs/jbd2/journal.c66
-rw-r--r--fs/jbd2/transaction.c29
-rw-r--r--fs/jffs2/Kconfig10
-rw-r--r--fs/jffs2/dir.c4
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_dtree.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c5
-rw-r--r--fs/lockd/host.c29
-rw-r--r--fs/lockd/svclock.c16
-rw-r--r--fs/lockd/svcsubs.c9
-rw-r--r--fs/locks.c24
-rw-r--r--fs/logfs/Kconfig4
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/minix/dir.c2
-rw-r--r--fs/namei.c67
-rw-r--r--fs/namespace.c63
-rw-r--r--fs/ncpfs/dir.c10
-rw-r--r--fs/ncpfs/inode.c59
-rw-r--r--fs/ncpfs/ioctl.c29
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--fs/ncpfs/ncp_fs_sb.h6
-rw-r--r--fs/nfs/blocklayout/blocklayout.c1
-rw-r--r--fs/nfs/callback_proc.c61
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/delegation.c154
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c64
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/idmap.c55
-rw-r--r--fs/nfs/inode.c21
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs2xdr.c19
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs3xdr.c18
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4client.c75
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c149
-rw-r--r--fs/nfs/nfs4state.c33
-rw-r--r--fs/nfs/nfs4super.c6
-rw-r--r--fs/nfs/nfs4xdr.c16
-rw-r--r--fs/nfs/objlayout/objio_osd.c1
-rw-r--r--fs/nfs/pnfs.c152
-rw-r--r--fs/nfs/pnfs.h7
-rw-r--r--fs/nfs/pnfs_dev.c9
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/super.c79
-rw-r--r--fs/nfs/unlink.c5
-rw-r--r--fs/nfs/write.c10
-rw-r--r--fs/nfs_common/nfsacl.c41
-rw-r--r--fs/nfsd/Kconfig4
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/auth.h6
-rw-r--r--fs/nfsd/export.c22
-rw-r--r--fs/nfsd/fault_inject.c6
-rw-r--r--fs/nfsd/idmap.h8
-rw-r--r--fs/nfsd/nfs2acl.c23
-rw-r--r--fs/nfsd/nfs3proc.c5
-rw-r--r--fs/nfsd/nfs3xdr.c24
-rw-r--r--fs/nfsd/nfs4acl.c63
-rw-r--r--fs/nfsd/nfs4idmap.c38
-rw-r--r--fs/nfsd/nfs4recover.c4
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c58
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsproc.c12
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nfsd/nfsxdr.c21
-rw-r--r--fs/nfsd/state.h4
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/nfsd/vfs.h8
-rw-r--r--fs/nfsd/xdr.h2
-rw-r--r--fs/nfsd/xdr3.h2
-rw-r--r--fs/nilfs2/Kconfig3
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/file.c4
-rw-r--r--fs/nilfs2/ioctl.c7
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/notify/dnotify/dnotify.c4
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/fsnotify.c3
-rw-r--r--fs/notify/inode_mark.c19
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c1
-rw-r--r--fs/notify/inotify/inotify_user.c28
-rw-r--r--fs/notify/vfsmount_mark.c19
-rw-r--r--fs/ntfs/dir.c2
-rw-r--r--fs/ocfs2/acl.c31
-rw-r--r--fs/ocfs2/alloc.c3
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/cluster/tcp.c40
-rw-r--r--fs/ocfs2/dcache.c3
-rw-r--r--fs/ocfs2/dir.c5
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c6
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c6
-rw-r--r--fs/ocfs2/dlmglue.c13
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/extent_map.c3
-rw-r--r--fs/ocfs2/file.c25
-rw-r--r--fs/ocfs2/inode.c12
-rw-r--r--fs/ocfs2/ioctl.c4
-rw-r--r--fs/ocfs2/journal.c10
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/mmap.c10
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/refcounttree.c6
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/suballoc.c7
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/sysfile.c3
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/omfs/dir.c4
-rw-r--r--fs/open.c34
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/pipe.c20
-rw-r--r--fs/proc/Makefile3
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c48
-rw-r--r--fs/proc/generic.c58
-rw-r--r--fs/proc/inode.c44
-rw-r--r--fs/proc/internal.h3
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/meminfo.c6
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_devtree.c13
-rw-r--r--fs/proc/proc_net.c16
-rw-r--r--fs/proc/proc_sysctl.c23
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/proc/vmcore.c35
-rw-r--r--fs/pstore/inode.c18
-rw-r--r--fs/pstore/platform.c35
-rw-r--r--fs/pstore/ram.c24
-rw-r--r--fs/pstore/ram_core.c9
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/qnx6/dir.c2
-rw-r--r--fs/qnx6/inode.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c8
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/select.c1
-rw-r--r--fs/seq_file.c42
-rw-r--r--fs/splice.c11
-rw-r--r--fs/squashfs/dir.c2
-rw-r--r--fs/stat.c13
-rw-r--r--fs/super.c8
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/bin.c9
-rw-r--r--fs/sysfs/group.c42
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/symlink.c45
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/timerfd.c85
-rw-r--r--fs/ubifs/debug.c8
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/file.c3
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/lpt_commit.c14
-rw-r--r--fs/ubifs/orphan.c12
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/ubifs.h6
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/inode.c86
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/udf/super.c14
-rw-r--r--fs/udf/udf_i.h16
-rw-r--r--fs/udf/udf_sb.h5
-rw-r--r--fs/udf/udfdecl.h5
-rw-r--r--fs/ufs/Kconfig2
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/xfs/Kconfig4
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_attr.c9
-rw-r--r--fs/xfs/xfs_bmap.c124
-rw-r--r--fs/xfs/xfs_buf.c34
-rw-r--r--fs/xfs/xfs_buf.h6
-rw-r--r--fs/xfs/xfs_buf_item.c177
-rw-r--r--fs/xfs/xfs_buf_item.h16
-rw-r--r--fs/xfs/xfs_dfrag.c12
-rw-r--r--fs/xfs/xfs_dir2_block.c6
-rw-r--r--fs/xfs/xfs_dquot.c12
-rw-r--r--fs/xfs/xfs_export.c4
-rw-r--r--fs/xfs/xfs_file.c4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_inode.c6
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h4
-rw-r--r--fs/xfs/xfs_ioctl.c6
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c86
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_log_recover.c3
-rw-r--r--fs/xfs/xfs_mount.c14
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c32
-rw-r--r--fs/xfs/xfs_super.c29
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c376
-rw-r--r--fs/xfs/xfs_trans.h18
-rw-r--r--fs/xfs/xfs_trans_ail.c14
-rw-r--r--fs/xfs/xfs_trans_buf.c27
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
-rw-r--r--fs/xfs/xfs_trans_inode.c41
-rw-r--r--fs/xfs/xfs_types.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c12
512 files changed, 9750 insertions, 6059 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 0a93dc1cb4ac..55abfd62654a 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -11,8 +11,7 @@ config 9P_FS
11 11
12if 9P_FS 12if 9P_FS
13config 9P_FSCACHE 13config 9P_FSCACHE
14 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support"
15 depends on EXPERIMENTAL
16 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y 15 depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
17 help 16 help
18 Choose Y here to enable persistent, read-only local 17 Choose Y here to enable persistent, read-only local
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 15b679166201..7af425f53bee 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -23,6 +23,7 @@
23#include "acl.h" 23#include "acl.h"
24#include "v9fs.h" 24#include "v9fs.h"
25#include "v9fs_vfs.h" 25#include "v9fs_vfs.h"
26#include "fid.h"
26 27
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) 28static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 29{
@@ -113,16 +114,12 @@ struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type)
113 114
114} 115}
115 116
116static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl) 117static int v9fs_set_acl(struct p9_fid *fid, int type, struct posix_acl *acl)
117{ 118{
118 int retval; 119 int retval;
119 char *name; 120 char *name;
120 size_t size; 121 size_t size;
121 void *buffer; 122 void *buffer;
122 struct inode *inode = dentry->d_inode;
123
124 set_cached_acl(inode, type, acl);
125
126 if (!acl) 123 if (!acl)
127 return 0; 124 return 0;
128 125
@@ -144,17 +141,16 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
144 default: 141 default:
145 BUG(); 142 BUG();
146 } 143 }
147 retval = v9fs_xattr_set(dentry, name, buffer, size, 0); 144 retval = v9fs_fid_xattr_set(fid, name, buffer, size, 0);
148err_free_out: 145err_free_out:
149 kfree(buffer); 146 kfree(buffer);
150 return retval; 147 return retval;
151} 148}
152 149
153int v9fs_acl_chmod(struct dentry *dentry) 150int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid)
154{ 151{
155 int retval = 0; 152 int retval = 0;
156 struct posix_acl *acl; 153 struct posix_acl *acl;
157 struct inode *inode = dentry->d_inode;
158 154
159 if (S_ISLNK(inode->i_mode)) 155 if (S_ISLNK(inode->i_mode))
160 return -EOPNOTSUPP; 156 return -EOPNOTSUPP;
@@ -163,25 +159,30 @@ int v9fs_acl_chmod(struct dentry *dentry)
163 retval = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); 159 retval = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
164 if (retval) 160 if (retval)
165 return retval; 161 return retval;
166 retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, acl); 162 set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
163 retval = v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl);
167 posix_acl_release(acl); 164 posix_acl_release(acl);
168 } 165 }
169 return retval; 166 return retval;
170} 167}
171 168
172int v9fs_set_create_acl(struct dentry *dentry, 169int v9fs_set_create_acl(struct inode *inode, struct p9_fid *fid,
173 struct posix_acl **dpacl, struct posix_acl **pacl) 170 struct posix_acl *dacl, struct posix_acl *acl)
174{ 171{
175 if (dentry) { 172 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
176 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, *dpacl); 173 set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
177 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, *pacl); 174 v9fs_set_acl(fid, ACL_TYPE_DEFAULT, dacl);
178 } 175 v9fs_set_acl(fid, ACL_TYPE_ACCESS, acl);
179 posix_acl_release(*dpacl);
180 posix_acl_release(*pacl);
181 *dpacl = *pacl = NULL;
182 return 0; 176 return 0;
183} 177}
184 178
179void v9fs_put_acl(struct posix_acl *dacl,
180 struct posix_acl *acl)
181{
182 posix_acl_release(dacl);
183 posix_acl_release(acl);
184}
185
185int v9fs_acl_mode(struct inode *dir, umode_t *modep, 186int v9fs_acl_mode(struct inode *dir, umode_t *modep,
186 struct posix_acl **dpacl, struct posix_acl **pacl) 187 struct posix_acl **dpacl, struct posix_acl **pacl)
187{ 188{
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 559556411965..e4f7e882272b 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -17,27 +17,33 @@
17#ifdef CONFIG_9P_FS_POSIX_ACL 17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *); 18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); 19extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct inode *, struct p9_fid *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct inode *, struct p9_fid *,
22 struct posix_acl **, struct posix_acl **); 22 struct posix_acl *, struct posix_acl *);
23extern int v9fs_acl_mode(struct inode *dir, umode_t *modep, 23extern int v9fs_acl_mode(struct inode *dir, umode_t *modep,
24 struct posix_acl **dpacl, struct posix_acl **pacl); 24 struct posix_acl **dpacl, struct posix_acl **pacl);
25extern void v9fs_put_acl(struct posix_acl *dacl, struct posix_acl *acl);
25#else 26#else
26#define v9fs_iop_get_acl NULL 27#define v9fs_iop_get_acl NULL
27static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) 28static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
28{ 29{
29 return 0; 30 return 0;
30} 31}
31static inline int v9fs_acl_chmod(struct dentry *dentry) 32static inline int v9fs_acl_chmod(struct inode *inode, struct p9_fid *fid)
32{ 33{
33 return 0; 34 return 0;
34} 35}
35static inline int v9fs_set_create_acl(struct dentry *dentry, 36static inline int v9fs_set_create_acl(struct inode *inode,
36 struct posix_acl **dpacl, 37 struct p9_fid *fid,
37 struct posix_acl **pacl) 38 struct posix_acl *dacl,
39 struct posix_acl *acl)
38{ 40{
39 return 0; 41 return 0;
40} 42}
43static inline void v9fs_put_acl(struct posix_acl *dacl,
44 struct posix_acl *acl)
45{
46}
41static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep, 47static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep,
42 struct posix_acl **dpacl, 48 struct posix_acl **dpacl,
43 struct posix_acl **pacl) 49 struct posix_acl **pacl)
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index da8eefbe830d..afd4724b2d92 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -74,19 +74,20 @@ int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
74 * 74 *
75 */ 75 */
76 76
77static struct p9_fid *v9fs_fid_find(struct dentry *dentry, u32 uid, int any) 77static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
78{ 78{
79 struct v9fs_dentry *dent; 79 struct v9fs_dentry *dent;
80 struct p9_fid *fid, *ret; 80 struct p9_fid *fid, *ret;
81 81
82 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", 82 p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n",
83 dentry->d_name.name, dentry, uid, any); 83 dentry->d_name.name, dentry, from_kuid(&init_user_ns, uid),
84 any);
84 dent = (struct v9fs_dentry *) dentry->d_fsdata; 85 dent = (struct v9fs_dentry *) dentry->d_fsdata;
85 ret = NULL; 86 ret = NULL;
86 if (dent) { 87 if (dent) {
87 spin_lock(&dent->lock); 88 spin_lock(&dent->lock);
88 list_for_each_entry(fid, &dent->fidlist, dlist) { 89 list_for_each_entry(fid, &dent->fidlist, dlist) {
89 if (any || fid->uid == uid) { 90 if (any || uid_eq(fid->uid, uid)) {
90 ret = fid; 91 ret = fid;
91 break; 92 break;
92 } 93 }
@@ -126,7 +127,7 @@ err_out:
126} 127}
127 128
128static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, 129static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
129 uid_t uid, int any) 130 kuid_t uid, int any)
130{ 131{
131 struct dentry *ds; 132 struct dentry *ds;
132 char **wnames, *uname; 133 char **wnames, *uname;
@@ -233,7 +234,7 @@ err_out:
233 234
234struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) 235struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
235{ 236{
236 uid_t uid; 237 kuid_t uid;
237 int any, access; 238 int any, access;
238 struct v9fs_session_info *v9ses; 239 struct v9fs_session_info *v9ses;
239 240
@@ -253,7 +254,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
253 break; 254 break;
254 255
255 default: 256 default:
256 uid = ~0; 257 uid = INVALID_UID;
257 any = 0; 258 any = 0;
258 break; 259 break;
259 } 260 }
@@ -272,7 +273,7 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
272 return ret; 273 return ret;
273} 274}
274 275
275static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid) 276static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, kuid_t uid)
276{ 277{
277 struct p9_fid *fid, *ret; 278 struct p9_fid *fid, *ret;
278 279
@@ -289,7 +290,7 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
289 int err; 290 int err;
290 struct p9_fid *fid; 291 struct p9_fid *fid;
291 292
292 fid = v9fs_fid_clone_with_uid(dentry, 0); 293 fid = v9fs_fid_clone_with_uid(dentry, GLOBAL_ROOT_UID);
293 if (IS_ERR(fid)) 294 if (IS_ERR(fid))
294 goto error_out; 295 goto error_out;
295 /* 296 /*
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index d934f04e7736..58e6cbce4156 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -161,7 +161,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
161 ret = r; 161 ret = r;
162 continue; 162 continue;
163 } 163 }
164 v9ses->dfltuid = option; 164 v9ses->dfltuid = make_kuid(current_user_ns(), option);
165 if (!uid_valid(v9ses->dfltuid)) {
166 p9_debug(P9_DEBUG_ERROR,
167 "uid field, but not a uid?\n");
168 ret = -EINVAL;
169 continue;
170 }
165 break; 171 break;
166 case Opt_dfltgid: 172 case Opt_dfltgid:
167 r = match_int(&args[0], &option); 173 r = match_int(&args[0], &option);
@@ -171,7 +177,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
171 ret = r; 177 ret = r;
172 continue; 178 continue;
173 } 179 }
174 v9ses->dfltgid = option; 180 v9ses->dfltgid = make_kgid(current_user_ns(), option);
181 if (!gid_valid(v9ses->dfltgid)) {
182 p9_debug(P9_DEBUG_ERROR,
183 "gid field, but not a gid?\n");
184 ret = -EINVAL;
185 continue;
186 }
175 break; 187 break;
176 case Opt_afid: 188 case Opt_afid:
177 r = match_int(&args[0], &option); 189 r = match_int(&args[0], &option);
@@ -248,8 +260,9 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
248 else if (strcmp(s, "client") == 0) { 260 else if (strcmp(s, "client") == 0) {
249 v9ses->flags |= V9FS_ACCESS_CLIENT; 261 v9ses->flags |= V9FS_ACCESS_CLIENT;
250 } else { 262 } else {
263 uid_t uid;
251 v9ses->flags |= V9FS_ACCESS_SINGLE; 264 v9ses->flags |= V9FS_ACCESS_SINGLE;
252 v9ses->uid = simple_strtoul(s, &e, 10); 265 uid = simple_strtoul(s, &e, 10);
253 if (*e != '\0') { 266 if (*e != '\0') {
254 ret = -EINVAL; 267 ret = -EINVAL;
255 pr_info("Unknown access argument %s\n", 268 pr_info("Unknown access argument %s\n",
@@ -257,6 +270,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
257 kfree(s); 270 kfree(s);
258 goto free_and_return; 271 goto free_and_return;
259 } 272 }
273 v9ses->uid = make_kuid(current_user_ns(), uid);
274 if (!uid_valid(v9ses->uid)) {
275 ret = -EINVAL;
276 pr_info("Uknown uid %s\n", s);
277 kfree(s);
278 goto free_and_return;
279 }
260 } 280 }
261 281
262 kfree(s); 282 kfree(s);
@@ -319,7 +339,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
319 list_add(&v9ses->slist, &v9fs_sessionlist); 339 list_add(&v9ses->slist, &v9fs_sessionlist);
320 spin_unlock(&v9fs_sessionlist_lock); 340 spin_unlock(&v9fs_sessionlist_lock);
321 341
322 v9ses->uid = ~0; 342 v9ses->uid = INVALID_UID;
323 v9ses->dfltuid = V9FS_DEFUID; 343 v9ses->dfltuid = V9FS_DEFUID;
324 v9ses->dfltgid = V9FS_DEFGID; 344 v9ses->dfltgid = V9FS_DEFGID;
325 345
@@ -364,7 +384,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
364 384
365 v9ses->flags &= ~V9FS_ACCESS_MASK; 385 v9ses->flags &= ~V9FS_ACCESS_MASK;
366 v9ses->flags |= V9FS_ACCESS_ANY; 386 v9ses->flags |= V9FS_ACCESS_ANY;
367 v9ses->uid = ~0; 387 v9ses->uid = INVALID_UID;
368 } 388 }
369 if (!v9fs_proto_dotl(v9ses) || 389 if (!v9fs_proto_dotl(v9ses) ||
370 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) { 390 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
@@ -375,7 +395,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
375 v9ses->flags &= ~V9FS_ACL_MASK; 395 v9ses->flags &= ~V9FS_ACL_MASK;
376 } 396 }
377 397
378 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, 398 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, INVALID_UID,
379 v9ses->aname); 399 v9ses->aname);
380 if (IS_ERR(fid)) { 400 if (IS_ERR(fid)) {
381 retval = PTR_ERR(fid); 401 retval = PTR_ERR(fid);
@@ -387,7 +407,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
387 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_SINGLE) 407 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_SINGLE)
388 fid->uid = v9ses->uid; 408 fid->uid = v9ses->uid;
389 else 409 else
390 fid->uid = ~0; 410 fid->uid = INVALID_UID;
391 411
392#ifdef CONFIG_9P_FSCACHE 412#ifdef CONFIG_9P_FSCACHE
393 /* register the session for caching */ 413 /* register the session for caching */
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 34c59f14a1c9..a8e127c89627 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -109,9 +109,9 @@ struct v9fs_session_info {
109 char *uname; /* user name to mount as */ 109 char *uname; /* user name to mount as */
110 char *aname; /* name of remote hierarchy being mounted */ 110 char *aname; /* name of remote hierarchy being mounted */
111 unsigned int maxdata; /* max data for client interface */ 111 unsigned int maxdata; /* max data for client interface */
112 unsigned int dfltuid; /* default uid/muid for legacy support */ 112 kuid_t dfltuid; /* default uid/muid for legacy support */
113 unsigned int dfltgid; /* default gid for legacy support */ 113 kgid_t dfltgid; /* default gid for legacy support */
114 u32 uid; /* if ACCESS_SINGLE, the uid that has access */ 114 kuid_t uid; /* if ACCESS_SINGLE, the uid that has access */
115 struct p9_client *clnt; /* 9p client */ 115 struct p9_client *clnt; /* 9p client */
116 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
117 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
@@ -165,8 +165,8 @@ extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
165#define V9FS_PORT 564 165#define V9FS_PORT 564
166#define V9FS_DEFUSER "nobody" 166#define V9FS_DEFUSER "nobody"
167#define V9FS_DEFANAME "" 167#define V9FS_DEFANAME ""
168#define V9FS_DEFUID (-2) 168#define V9FS_DEFUID KUIDT_INIT(-2)
169#define V9FS_DEFGID (-2) 169#define V9FS_DEFGID KGIDT_INIT(-2)
170 170
171static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) 171static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
172{ 172{
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 64600b5d0522..9ad68628522c 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -137,6 +137,7 @@ out_valid:
137 137
138const struct dentry_operations v9fs_cached_dentry_operations = { 138const struct dentry_operations v9fs_cached_dentry_operations = {
139 .d_revalidate = v9fs_lookup_revalidate, 139 .d_revalidate = v9fs_lookup_revalidate,
140 .d_weak_revalidate = v9fs_lookup_revalidate,
140 .d_delete = v9fs_cached_dentry_delete, 141 .d_delete = v9fs_cached_dentry_delete,
141 .d_release = v9fs_dentry_release, 142 .d_release = v9fs_dentry_release,
142}; 143};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index ff911e779651..be1e34adc3c6 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -52,10 +52,9 @@
52 */ 52 */
53 53
54struct p9_rdir { 54struct p9_rdir {
55 struct mutex mutex;
56 int head; 55 int head;
57 int tail; 56 int tail;
58 uint8_t *buf; 57 uint8_t buf[];
59}; 58};
60 59
61/** 60/**
@@ -93,33 +92,12 @@ static void p9stat_init(struct p9_wstat *stbuf)
93 * 92 *
94 */ 93 */
95 94
96static int v9fs_alloc_rdir_buf(struct file *filp, int buflen) 95static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen)
97{ 96{
98 struct p9_rdir *rdir; 97 struct p9_fid *fid = filp->private_data;
99 struct p9_fid *fid; 98 if (!fid->rdir)
100 int err = 0; 99 fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
101 100 return fid->rdir;
102 fid = filp->private_data;
103 if (!fid->rdir) {
104 rdir = kmalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL);
105
106 if (rdir == NULL) {
107 err = -ENOMEM;
108 goto exit;
109 }
110 spin_lock(&filp->f_dentry->d_lock);
111 if (!fid->rdir) {
112 rdir->buf = (uint8_t *)rdir + sizeof(struct p9_rdir);
113 mutex_init(&rdir->mutex);
114 rdir->head = rdir->tail = 0;
115 fid->rdir = (void *) rdir;
116 rdir = NULL;
117 }
118 spin_unlock(&filp->f_dentry->d_lock);
119 kfree(rdir);
120 }
121exit:
122 return err;
123} 101}
124 102
125/** 103/**
@@ -145,20 +123,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
145 123
146 buflen = fid->clnt->msize - P9_IOHDRSZ; 124 buflen = fid->clnt->msize - P9_IOHDRSZ;
147 125
148 err = v9fs_alloc_rdir_buf(filp, buflen); 126 rdir = v9fs_alloc_rdir_buf(filp, buflen);
149 if (err) 127 if (!rdir)
150 goto exit; 128 return -ENOMEM;
151 rdir = (struct p9_rdir *) fid->rdir;
152 129
153 err = mutex_lock_interruptible(&rdir->mutex); 130 while (1) {
154 if (err)
155 return err;
156 while (err == 0) {
157 if (rdir->tail == rdir->head) { 131 if (rdir->tail == rdir->head) {
158 err = v9fs_file_readn(filp, rdir->buf, NULL, 132 err = v9fs_file_readn(filp, rdir->buf, NULL,
159 buflen, filp->f_pos); 133 buflen, filp->f_pos);
160 if (err <= 0) 134 if (err <= 0)
161 goto unlock_and_exit; 135 return err;
162 136
163 rdir->head = 0; 137 rdir->head = 0;
164 rdir->tail = err; 138 rdir->tail = err;
@@ -169,9 +143,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
169 rdir->tail - rdir->head, &st); 143 rdir->tail - rdir->head, &st);
170 if (err) { 144 if (err) {
171 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 145 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
172 err = -EIO;
173 p9stat_free(&st); 146 p9stat_free(&st);
174 goto unlock_and_exit; 147 return -EIO;
175 } 148 }
176 reclen = st.size+2; 149 reclen = st.size+2;
177 150
@@ -180,19 +153,13 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
180 153
181 p9stat_free(&st); 154 p9stat_free(&st);
182 155
183 if (over) { 156 if (over)
184 err = 0; 157 return 0;
185 goto unlock_and_exit; 158
186 }
187 rdir->head += reclen; 159 rdir->head += reclen;
188 filp->f_pos += reclen; 160 filp->f_pos += reclen;
189 } 161 }
190 } 162 }
191
192unlock_and_exit:
193 mutex_unlock(&rdir->mutex);
194exit:
195 return err;
196} 163}
197 164
198/** 165/**
@@ -218,21 +185,16 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
218 185
219 buflen = fid->clnt->msize - P9_READDIRHDRSZ; 186 buflen = fid->clnt->msize - P9_READDIRHDRSZ;
220 187
221 err = v9fs_alloc_rdir_buf(filp, buflen); 188 rdir = v9fs_alloc_rdir_buf(filp, buflen);
222 if (err) 189 if (!rdir)
223 goto exit; 190 return -ENOMEM;
224 rdir = (struct p9_rdir *) fid->rdir;
225 191
226 err = mutex_lock_interruptible(&rdir->mutex); 192 while (1) {
227 if (err)
228 return err;
229
230 while (err == 0) {
231 if (rdir->tail == rdir->head) { 193 if (rdir->tail == rdir->head) {
232 err = p9_client_readdir(fid, rdir->buf, buflen, 194 err = p9_client_readdir(fid, rdir->buf, buflen,
233 filp->f_pos); 195 filp->f_pos);
234 if (err <= 0) 196 if (err <= 0)
235 goto unlock_and_exit; 197 return err;
236 198
237 rdir->head = 0; 199 rdir->head = 0;
238 rdir->tail = err; 200 rdir->tail = err;
@@ -245,8 +207,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
245 &curdirent); 207 &curdirent);
246 if (err < 0) { 208 if (err < 0) {
247 p9_debug(P9_DEBUG_VFS, "returned %d\n", err); 209 p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
248 err = -EIO; 210 return -EIO;
249 goto unlock_and_exit;
250 } 211 }
251 212
252 /* d_off in dirent structure tracks the offset into 213 /* d_off in dirent structure tracks the offset into
@@ -261,20 +222,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
261 curdirent.d_type); 222 curdirent.d_type);
262 oldoffset = curdirent.d_off; 223 oldoffset = curdirent.d_off;
263 224
264 if (over) { 225 if (over)
265 err = 0; 226 return 0;
266 goto unlock_and_exit;
267 }
268 227
269 filp->f_pos = curdirent.d_off; 228 filp->f_pos = curdirent.d_off;
270 rdir->head += err; 229 rdir->head += err;
271 } 230 }
272 } 231 }
273
274unlock_and_exit:
275 mutex_unlock(&rdir->mutex);
276exit:
277 return err;
278} 232}
279 233
280 234
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index c2483e97beee..d384a8b77ee8 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -80,10 +80,6 @@ int v9fs_file_open(struct inode *inode, struct file *file)
80 p9_client_clunk(fid); 80 p9_client_clunk(fid);
81 return err; 81 return err;
82 } 82 }
83 if (file->f_flags & O_TRUNC) {
84 i_size_write(inode, 0);
85 inode->i_blocks = 0;
86 }
87 if ((file->f_flags & O_APPEND) && 83 if ((file->f_flags & O_APPEND) &&
88 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))) 84 (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)))
89 generic_file_llseek(file, 0, SEEK_END); 85 generic_file_llseek(file, 0, SEEK_END);
@@ -133,7 +129,7 @@ out_error:
133static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) 129static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
134{ 130{
135 int res = 0; 131 int res = 0;
136 struct inode *inode = filp->f_path.dentry->d_inode; 132 struct inode *inode = file_inode(filp);
137 133
138 p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); 134 p9_debug(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
139 135
@@ -302,7 +298,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
302 298
303static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) 299static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
304{ 300{
305 struct inode *inode = filp->f_path.dentry->d_inode; 301 struct inode *inode = file_inode(filp);
306 int ret = -ENOLCK; 302 int ret = -ENOLCK;
307 303
308 p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", 304 p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
@@ -338,7 +334,7 @@ out_err:
338static int v9fs_file_flock_dotl(struct file *filp, int cmd, 334static int v9fs_file_flock_dotl(struct file *filp, int cmd,
339 struct file_lock *fl) 335 struct file_lock *fl)
340{ 336{
341 struct inode *inode = filp->f_path.dentry->d_inode; 337 struct inode *inode = file_inode(filp);
342 int ret = -ENOLCK; 338 int ret = -ENOLCK;
343 339
344 p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", 340 p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n",
@@ -529,7 +525,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
529 if (!count) 525 if (!count)
530 goto out; 526 goto out;
531 527
532 retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode, 528 retval = v9fs_file_write_internal(file_inode(filp),
533 filp->private_data, 529 filp->private_data,
534 data, count, &origin, 1); 530 data, count, &origin, 1);
535 /* update offset on successful write */ 531 /* update offset on successful write */
@@ -604,7 +600,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
604 struct v9fs_inode *v9inode; 600 struct v9fs_inode *v9inode;
605 struct page *page = vmf->page; 601 struct page *page = vmf->page;
606 struct file *filp = vma->vm_file; 602 struct file *filp = vma->vm_file;
607 struct inode *inode = filp->f_path.dentry->d_inode; 603 struct inode *inode = file_inode(filp);
608 604
609 605
610 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", 606 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
@@ -620,6 +616,7 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
620 lock_page(page); 616 lock_page(page);
621 if (page->mapping != inode->i_mapping) 617 if (page->mapping != inode->i_mapping)
622 goto out_unlock; 618 goto out_unlock;
619 wait_for_stable_page(page);
623 620
624 return VM_FAULT_LOCKED; 621 return VM_FAULT_LOCKED;
625out_unlock: 622out_unlock:
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 890bed538f9b..b5340c829de1 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -192,9 +192,6 @@ int v9fs_uflags2omode(int uflags, int extended)
192 break; 192 break;
193 } 193 }
194 194
195 if (uflags & O_TRUNC)
196 ret |= P9_OTRUNC;
197
198 if (extended) { 195 if (extended) {
199 if (uflags & O_EXCL) 196 if (uflags & O_EXCL)
200 ret |= P9_OEXCL; 197 ret |= P9_OEXCL;
@@ -228,9 +225,9 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
228 wstat->uid = NULL; 225 wstat->uid = NULL;
229 wstat->gid = NULL; 226 wstat->gid = NULL;
230 wstat->muid = NULL; 227 wstat->muid = NULL;
231 wstat->n_uid = ~0; 228 wstat->n_uid = INVALID_UID;
232 wstat->n_gid = ~0; 229 wstat->n_gid = INVALID_GID;
233 wstat->n_muid = ~0; 230 wstat->n_muid = INVALID_UID;
234 wstat->extension = NULL; 231 wstat->extension = NULL;
235} 232}
236 233
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 40895546e103..61e4fa70a6fa 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -57,7 +57,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
57 * group of the new file system object. 57 * group of the new file system object.
58 */ 58 */
59 59
60static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) 60static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
61{ 61{
62 BUG_ON(dir_inode == NULL); 62 BUG_ON(dir_inode == NULL);
63 63
@@ -186,7 +186,6 @@ static int v9fs_mapped_dotl_flags(int flags)
186 { O_CREAT, P9_DOTL_CREATE }, 186 { O_CREAT, P9_DOTL_CREATE },
187 { O_EXCL, P9_DOTL_EXCL }, 187 { O_EXCL, P9_DOTL_EXCL },
188 { O_NOCTTY, P9_DOTL_NOCTTY }, 188 { O_NOCTTY, P9_DOTL_NOCTTY },
189 { O_TRUNC, P9_DOTL_TRUNC },
190 { O_APPEND, P9_DOTL_APPEND }, 189 { O_APPEND, P9_DOTL_APPEND },
191 { O_NONBLOCK, P9_DOTL_NONBLOCK }, 190 { O_NONBLOCK, P9_DOTL_NONBLOCK },
192 { O_DSYNC, P9_DOTL_DSYNC }, 191 { O_DSYNC, P9_DOTL_DSYNC },
@@ -246,7 +245,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
246 int *opened) 245 int *opened)
247{ 246{
248 int err = 0; 247 int err = 0;
249 gid_t gid; 248 kgid_t gid;
250 umode_t mode; 249 umode_t mode;
251 char *name = NULL; 250 char *name = NULL;
252 struct p9_qid qid; 251 struct p9_qid qid;
@@ -268,8 +267,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
268 } 267 }
269 268
270 /* Only creates */ 269 /* Only creates */
271 if (!(flags & O_CREAT) || dentry->d_inode) 270 if (!(flags & O_CREAT))
272 return finish_no_open(file, res); 271 return finish_no_open(file, res);
272 else if (dentry->d_inode) {
273 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
274 return -EEXIST;
275 else
276 return finish_no_open(file, res);
277 }
273 278
274 v9ses = v9fs_inode2v9ses(dir); 279 v9ses = v9fs_inode2v9ses(dir);
275 280
@@ -325,14 +330,14 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
325 p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); 330 p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
326 goto error; 331 goto error;
327 } 332 }
333 /* Now set the ACL based on the default value */
334 v9fs_set_create_acl(inode, fid, dacl, pacl);
335
328 err = v9fs_fid_add(dentry, fid); 336 err = v9fs_fid_add(dentry, fid);
329 if (err < 0) 337 if (err < 0)
330 goto error; 338 goto error;
331 d_instantiate(dentry, inode); 339 d_instantiate(dentry, inode);
332 340
333 /* Now set the ACL based on the default value */
334 v9fs_set_create_acl(dentry, &dacl, &pacl);
335
336 v9inode = V9FS_I(inode); 341 v9inode = V9FS_I(inode);
337 mutex_lock(&v9inode->v_mutex); 342 mutex_lock(&v9inode->v_mutex);
338 if (v9ses->cache && !v9inode->writeback_fid && 343 if (v9ses->cache && !v9inode->writeback_fid &&
@@ -364,6 +369,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
364#endif 369#endif
365 *opened |= FILE_CREATED; 370 *opened |= FILE_CREATED;
366out: 371out:
372 v9fs_put_acl(dacl, pacl);
367 dput(res); 373 dput(res);
368 return err; 374 return err;
369 375
@@ -373,7 +379,6 @@ error:
373err_clunk_old_fid: 379err_clunk_old_fid:
374 if (ofid) 380 if (ofid)
375 p9_client_clunk(ofid); 381 p9_client_clunk(ofid);
376 v9fs_set_create_acl(NULL, &dacl, &pacl);
377 goto out; 382 goto out;
378} 383}
379 384
@@ -391,7 +396,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
391 int err; 396 int err;
392 struct v9fs_session_info *v9ses; 397 struct v9fs_session_info *v9ses;
393 struct p9_fid *fid = NULL, *dfid = NULL; 398 struct p9_fid *fid = NULL, *dfid = NULL;
394 gid_t gid; 399 kgid_t gid;
395 char *name; 400 char *name;
396 umode_t mode; 401 umode_t mode;
397 struct inode *inode; 402 struct inode *inode;
@@ -430,17 +435,17 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
430 if (err < 0) 435 if (err < 0)
431 goto error; 436 goto error;
432 437
438 fid = p9_client_walk(dfid, 1, &name, 1);
439 if (IS_ERR(fid)) {
440 err = PTR_ERR(fid);
441 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
442 err);
443 fid = NULL;
444 goto error;
445 }
446
433 /* instantiate inode and assign the unopened fid to the dentry */ 447 /* instantiate inode and assign the unopened fid to the dentry */
434 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 448 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
435 fid = p9_client_walk(dfid, 1, &name, 1);
436 if (IS_ERR(fid)) {
437 err = PTR_ERR(fid);
438 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
439 err);
440 fid = NULL;
441 goto error;
442 }
443
444 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 449 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
445 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
446 err = PTR_ERR(inode); 451 err = PTR_ERR(inode);
@@ -451,6 +456,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
451 err = v9fs_fid_add(dentry, fid); 456 err = v9fs_fid_add(dentry, fid);
452 if (err < 0) 457 if (err < 0)
453 goto error; 458 goto error;
459 v9fs_set_create_acl(inode, fid, dacl, pacl);
454 d_instantiate(dentry, inode); 460 d_instantiate(dentry, inode);
455 fid = NULL; 461 fid = NULL;
456 } else { 462 } else {
@@ -464,16 +470,15 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
464 err = PTR_ERR(inode); 470 err = PTR_ERR(inode);
465 goto error; 471 goto error;
466 } 472 }
473 v9fs_set_create_acl(inode, fid, dacl, pacl);
467 d_instantiate(dentry, inode); 474 d_instantiate(dentry, inode);
468 } 475 }
469 /* Now set the ACL based on the default value */
470 v9fs_set_create_acl(dentry, &dacl, &pacl);
471 inc_nlink(dir); 476 inc_nlink(dir);
472 v9fs_invalidate_inode_attr(dir); 477 v9fs_invalidate_inode_attr(dir);
473error: 478error:
474 if (fid) 479 if (fid)
475 p9_client_clunk(fid); 480 p9_client_clunk(fid);
476 v9fs_set_create_acl(NULL, &dacl, &pacl); 481 v9fs_put_acl(dacl, pacl);
477 return err; 482 return err;
478} 483}
479 484
@@ -567,10 +572,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
567 struct v9fs_session_info *v9ses; 572 struct v9fs_session_info *v9ses;
568 struct p9_fid *fid; 573 struct p9_fid *fid;
569 struct p9_iattr_dotl p9attr; 574 struct p9_iattr_dotl p9attr;
575 struct inode *inode = dentry->d_inode;
570 576
571 p9_debug(P9_DEBUG_VFS, "\n"); 577 p9_debug(P9_DEBUG_VFS, "\n");
572 578
573 retval = inode_change_ok(dentry->d_inode, iattr); 579 retval = inode_change_ok(inode, iattr);
574 if (retval) 580 if (retval)
575 return retval; 581 return retval;
576 582
@@ -591,23 +597,23 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
591 return PTR_ERR(fid); 597 return PTR_ERR(fid);
592 598
593 /* Write all dirty data */ 599 /* Write all dirty data */
594 if (S_ISREG(dentry->d_inode->i_mode)) 600 if (S_ISREG(inode->i_mode))
595 filemap_write_and_wait(dentry->d_inode->i_mapping); 601 filemap_write_and_wait(inode->i_mapping);
596 602
597 retval = p9_client_setattr(fid, &p9attr); 603 retval = p9_client_setattr(fid, &p9attr);
598 if (retval < 0) 604 if (retval < 0)
599 return retval; 605 return retval;
600 606
601 if ((iattr->ia_valid & ATTR_SIZE) && 607 if ((iattr->ia_valid & ATTR_SIZE) &&
602 iattr->ia_size != i_size_read(dentry->d_inode)) 608 iattr->ia_size != i_size_read(inode))
603 truncate_setsize(dentry->d_inode, iattr->ia_size); 609 truncate_setsize(inode, iattr->ia_size);
604 610
605 v9fs_invalidate_inode_attr(dentry->d_inode); 611 v9fs_invalidate_inode_attr(inode);
606 setattr_copy(dentry->d_inode, iattr); 612 setattr_copy(inode, iattr);
607 mark_inode_dirty(dentry->d_inode); 613 mark_inode_dirty(inode);
608 if (iattr->ia_valid & ATTR_MODE) { 614 if (iattr->ia_valid & ATTR_MODE) {
609 /* We also want to update ACL when we update mode bits */ 615 /* We also want to update ACL when we update mode bits */
610 retval = v9fs_acl_chmod(dentry); 616 retval = v9fs_acl_chmod(inode, fid);
611 if (retval < 0) 617 if (retval < 0)
612 return retval; 618 return retval;
613 } 619 }
@@ -692,7 +698,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
692 const char *symname) 698 const char *symname)
693{ 699{
694 int err; 700 int err;
695 gid_t gid; 701 kgid_t gid;
696 char *name; 702 char *name;
697 struct p9_qid qid; 703 struct p9_qid qid;
698 struct inode *inode; 704 struct inode *inode;
@@ -832,7 +838,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
832 dev_t rdev) 838 dev_t rdev)
833{ 839{
834 int err; 840 int err;
835 gid_t gid; 841 kgid_t gid;
836 char *name; 842 char *name;
837 umode_t mode; 843 umode_t mode;
838 struct v9fs_session_info *v9ses; 844 struct v9fs_session_info *v9ses;
@@ -875,17 +881,17 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
875 goto error; 881 goto error;
876 882
877 v9fs_invalidate_inode_attr(dir); 883 v9fs_invalidate_inode_attr(dir);
884 fid = p9_client_walk(dfid, 1, &name, 1);
885 if (IS_ERR(fid)) {
886 err = PTR_ERR(fid);
887 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
888 err);
889 fid = NULL;
890 goto error;
891 }
892
878 /* instantiate inode and assign the unopened fid to the dentry */ 893 /* instantiate inode and assign the unopened fid to the dentry */
879 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 894 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
880 fid = p9_client_walk(dfid, 1, &name, 1);
881 if (IS_ERR(fid)) {
882 err = PTR_ERR(fid);
883 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
884 err);
885 fid = NULL;
886 goto error;
887 }
888
889 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); 895 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
890 if (IS_ERR(inode)) { 896 if (IS_ERR(inode)) {
891 err = PTR_ERR(inode); 897 err = PTR_ERR(inode);
@@ -893,6 +899,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
893 err); 899 err);
894 goto error; 900 goto error;
895 } 901 }
902 v9fs_set_create_acl(inode, fid, dacl, pacl);
896 err = v9fs_fid_add(dentry, fid); 903 err = v9fs_fid_add(dentry, fid);
897 if (err < 0) 904 if (err < 0)
898 goto error; 905 goto error;
@@ -908,14 +915,13 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
908 err = PTR_ERR(inode); 915 err = PTR_ERR(inode);
909 goto error; 916 goto error;
910 } 917 }
918 v9fs_set_create_acl(inode, fid, dacl, pacl);
911 d_instantiate(dentry, inode); 919 d_instantiate(dentry, inode);
912 } 920 }
913 /* Now set the ACL based on the default value */
914 v9fs_set_create_acl(dentry, &dacl, &pacl);
915error: 921error:
916 if (fid) 922 if (fid)
917 p9_client_clunk(fid); 923 p9_client_clunk(fid);
918 v9fs_set_create_acl(NULL, &dacl, &pacl); 924 v9fs_put_acl(dacl, pacl);
919 return err; 925 return err;
920} 926}
921 927
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 137d50396898..91dad63e5a2d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -363,5 +363,5 @@ struct file_system_type v9fs_fs_type = {
363 .mount = v9fs_mount, 363 .mount = v9fs_mount,
364 .kill_sb = v9fs_kill_super, 364 .kill_sb = v9fs_kill_super,
365 .owner = THIS_MODULE, 365 .owner = THIS_MODULE,
366 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT, 366 .fs_flags = FS_RENAME_DOES_D_MOVE,
367}; 367};
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 29653b70a9c3..c45e016b190f 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -111,19 +111,26 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
111int v9fs_xattr_set(struct dentry *dentry, const char *name, 111int v9fs_xattr_set(struct dentry *dentry, const char *name,
112 const void *value, size_t value_len, int flags) 112 const void *value, size_t value_len, int flags)
113{ 113{
114 struct p9_fid *fid = v9fs_fid_lookup(dentry);
115 if (IS_ERR(fid))
116 return PTR_ERR(fid);
117 return v9fs_fid_xattr_set(fid, name, value, value_len, flags);
118}
119
120int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
121 const void *value, size_t value_len, int flags)
122{
114 u64 offset = 0; 123 u64 offset = 0;
115 int retval, msize, write_count; 124 int retval, msize, write_count;
116 struct p9_fid *fid = NULL;
117 125
118 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", 126 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
119 name, value_len, flags); 127 name, value_len, flags);
120 128
121 fid = v9fs_fid_clone(dentry); 129 /* Clone it */
122 if (IS_ERR(fid)) { 130 fid = p9_client_walk(fid, 0, NULL, 1);
123 retval = PTR_ERR(fid); 131 if (IS_ERR(fid))
124 fid = NULL; 132 return PTR_ERR(fid);
125 goto error; 133
126 }
127 /* 134 /*
128 * On success fid points to xattr 135 * On success fid points to xattr
129 */ 136 */
@@ -131,7 +138,8 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
131 if (retval < 0) { 138 if (retval < 0) {
132 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n", 139 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
133 retval); 140 retval);
134 goto error; 141 p9_client_clunk(fid);
142 return retval;
135 } 143 }
136 msize = fid->clnt->msize; 144 msize = fid->clnt->msize;
137 while (value_len) { 145 while (value_len) {
@@ -144,17 +152,12 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
144 if (write_count < 0) { 152 if (write_count < 0) {
145 /* error in xattr write */ 153 /* error in xattr write */
146 retval = write_count; 154 retval = write_count;
147 goto error; 155 break;
148 } 156 }
149 offset += write_count; 157 offset += write_count;
150 value_len -= write_count; 158 value_len -= write_count;
151 } 159 }
152 /* Total read xattr bytes */ 160 return p9_client_clunk(fid);
153 retval = offset;
154error:
155 if (fid)
156 retval = p9_client_clunk(fid);
157 return retval;
158} 161}
159 162
160ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) 163ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index eaa837c53bd5..eec348a3df71 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -27,6 +27,8 @@ extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
27 void *, size_t); 27 void *, size_t);
28extern ssize_t v9fs_xattr_get(struct dentry *, const char *, 28extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
29 void *, size_t); 29 void *, size_t);
30extern int v9fs_fid_xattr_set(struct p9_fid *, const char *,
31 const void *, size_t, int);
30extern int v9fs_xattr_set(struct dentry *, const char *, 32extern int v9fs_xattr_set(struct dentry *, const char *,
31 const void *, size_t, int); 33 const void *, size_t, int);
32extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t); 34extern ssize_t v9fs_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/Kconfig b/fs/Kconfig
index cfe512fd1caf..780725a463b1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -68,16 +68,6 @@ source "fs/quota/Kconfig"
68source "fs/autofs4/Kconfig" 68source "fs/autofs4/Kconfig"
69source "fs/fuse/Kconfig" 69source "fs/fuse/Kconfig"
70 70
71config CUSE
72 tristate "Character device in Userspace support"
73 depends on FUSE_FS
74 help
75 This FUSE extension allows character devices to be
76 implemented in userspace.
77
78 If you want to develop or use userspace character device
79 based on CUSE, answer Y or M.
80
81config GENERIC_ACL 71config GENERIC_ACL
82 bool 72 bool
83 select FS_POSIX_ACL 73 select FS_POSIX_ACL
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index e55182a74605..c5a7787dd5e9 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,6 +1,6 @@
1config ADFS_FS 1config ADFS_FS
2 tristate "ADFS file system support (EXPERIMENTAL)" 2 tristate "ADFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Acorn Disc Filing System is the standard file system of the 5 The Acorn Disc Filing System is the standard file system of the
6 RiscOS operating system which runs on Acorn's ARM-based Risc PC 6 RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index b3be2e7c5643..9cf874ce8336 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -19,7 +19,7 @@ static DEFINE_RWLOCK(adfs_dir_lock);
19static int 19static int
20adfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 20adfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
21{ 21{
22 struct inode *inode = filp->f_path.dentry->d_inode; 22 struct inode *inode = file_inode(filp);
23 struct super_block *sb = inode->i_sb; 23 struct super_block *sb = inode->i_sb;
24 struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; 24 struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir;
25 struct object_info obj; 25 struct object_info obj;
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index cfad9afb4762..a04d9e848d05 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -1,6 +1,6 @@
1config AFFS_FS 1config AFFS_FS
2 tristate "Amiga FFS file system support (EXPERIMENTAL)" 2 tristate "Amiga FFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 The Fast File System (FFS) is the common file system used on hard 5 The Fast File System (FFS) is the common file system used on hard
6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y 6 disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20). Say Y
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index eb82ee53ee0b..d9a43674cb94 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,9 +125,8 @@ static void
125affs_fix_dcache(struct inode *inode, u32 entry_ino) 125affs_fix_dcache(struct inode *inode, u32 entry_ino)
126{ 126{
127 struct dentry *dentry; 127 struct dentry *dentry;
128 struct hlist_node *p;
129 spin_lock(&inode->i_lock); 128 spin_lock(&inode->i_lock);
130 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { 129 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
131 if (entry_ino == (u32)(long)dentry->d_fsdata) { 130 if (entry_ino == (u32)(long)dentry->d_fsdata) {
132 dentry->d_fsdata = (void *)inode->i_ino; 131 dentry->d_fsdata = (void *)inode->i_ino;
133 break; 132 break;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 8ca8f3a55599..fd11a6d608ee 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -42,7 +42,7 @@ const struct inode_operations affs_dir_inode_operations = {
42static int 42static int
43affs_readdir(struct file *filp, void *dirent, filldir_t filldir) 43affs_readdir(struct file *filp, void *dirent, filldir_t filldir)
44{ 44{
45 struct inode *inode = filp->f_path.dentry->d_inode; 45 struct inode *inode = file_inode(filp);
46 struct super_block *sb = inode->i_sb; 46 struct super_block *sb = inode->i_sb;
47 struct buffer_head *dir_bh; 47 struct buffer_head *dir_bh;
48 struct buffer_head *fh_bh; 48 struct buffer_head *fh_bh;
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 8f975f25b486..ebba3b18e5da 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -1,6 +1,6 @@
1config AFS_FS 1config AFS_FS
2 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2 tristate "Andrew File System support (AFS)"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select AF_RXRPC 4 select AF_RXRPC
5 select DNS_RESOLVER 5 select DNS_RESOLVER
6 help 6 help
@@ -22,8 +22,7 @@ config AFS_DEBUG
22 If unsure, say N. 22 If unsure, say N.
23 23
24config AFS_FSCACHE 24config AFS_FSCACHE
25 bool "Provide AFS client caching support (EXPERIMENTAL)" 25 bool "Provide AFS client caching support"
26 depends on EXPERIMENTAL
27 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y 26 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
28 help 27 help
29 Say Y here if you want AFS data to be cached locally on disk through 28 Say Y here if you want AFS data to be cached locally on disk through
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index c548aa346f0d..3c462ff6db63 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -119,8 +119,8 @@ struct afs_file_status {
119 u64 size; /* file size */ 119 u64 size; /* file size */
120 afs_dataversion_t data_version; /* current data version */ 120 afs_dataversion_t data_version; /* current data version */
121 u32 author; /* author ID */ 121 u32 author; /* author ID */
122 u32 owner; /* owner ID */ 122 kuid_t owner; /* owner ID */
123 u32 group; /* group ID */ 123 kgid_t group; /* group ID */
124 afs_access_t caller_access; /* access rights for authenticated caller */ 124 afs_access_t caller_access; /* access rights for authenticated caller */
125 afs_access_t anon_access; /* access rights for unauthenticated caller */ 125 afs_access_t anon_access; /* access rights for unauthenticated caller */
126 umode_t mode; /* UNIX mode */ 126 umode_t mode; /* UNIX mode */
@@ -133,13 +133,6 @@ struct afs_file_status {
133/* 133/*
134 * AFS file status change request 134 * AFS file status change request
135 */ 135 */
136struct afs_store_status {
137 u32 mask; /* which bits of the struct are set */
138 u32 mtime_client; /* last time client changed data */
139 u32 owner; /* owner ID */
140 u32 group; /* group ID */
141 umode_t mode; /* UNIX mode */
142};
143 136
144#define AFS_SET_MTIME 0x01 /* set the mtime */ 137#define AFS_SET_MTIME 0x01 /* set the mtime */
145#define AFS_SET_OWNER 0x02 /* set the owner ID */ 138#define AFS_SET_OWNER 0x02 /* set the owner ID */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index db477906ba4f..7a465ed04444 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -393,12 +393,12 @@ static int afs_readdir(struct file *file, void *cookie, filldir_t filldir)
393 int ret; 393 int ret;
394 394
395 _enter("{%Ld,{%lu}}", 395 _enter("{%Ld,{%lu}}",
396 file->f_pos, file->f_path.dentry->d_inode->i_ino); 396 file->f_pos, file_inode(file)->i_ino);
397 397
398 ASSERT(file->private_data != NULL); 398 ASSERT(file->private_data != NULL);
399 399
400 fpos = file->f_pos; 400 fpos = file->f_pos;
401 ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos, 401 ret = afs_dir_iterate(file_inode(file), &fpos,
402 cookie, filldir, file->private_data); 402 cookie, filldir, file->private_data);
403 file->f_pos = fpos; 403 file->f_pos = fpos;
404 404
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 757d664575dd..2497bf306c70 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -514,7 +514,7 @@ error:
514 */ 514 */
515int afs_lock(struct file *file, int cmd, struct file_lock *fl) 515int afs_lock(struct file *file, int cmd, struct file_lock *fl)
516{ 516{
517 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); 517 struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
518 518
519 _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}", 519 _enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
520 vnode->fid.vid, vnode->fid.vnode, cmd, 520 vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -537,7 +537,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
537 */ 537 */
538int afs_flock(struct file *file, int cmd, struct file_lock *fl) 538int afs_flock(struct file *file, int cmd, struct file_lock *fl)
539{ 539{
540 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); 540 struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
541 541
542 _enter("{%x:%u},%d,{t=%x,fl=%x}", 542 _enter("{%x:%u},%d,{t=%x,fl=%x}",
543 vnode->fid.vid, vnode->fid.vnode, cmd, 543 vnode->fid.vid, vnode->fid.vnode, cmd,
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index b960ff05ea0b..c2e930ec2888 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -42,6 +42,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
42 umode_t mode; 42 umode_t mode;
43 u64 data_version, size; 43 u64 data_version, size;
44 u32 changed = 0; /* becomes non-zero if ctime-type changes seen */ 44 u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
45 kuid_t owner;
46 kgid_t group;
45 47
46#define EXTRACT(DST) \ 48#define EXTRACT(DST) \
47 do { \ 49 do { \
@@ -56,7 +58,9 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
56 size = ntohl(*bp++); 58 size = ntohl(*bp++);
57 data_version = ntohl(*bp++); 59 data_version = ntohl(*bp++);
58 EXTRACT(status->author); 60 EXTRACT(status->author);
59 EXTRACT(status->owner); 61 owner = make_kuid(&init_user_ns, ntohl(*bp++));
62 changed |= !uid_eq(owner, status->owner);
63 status->owner = owner;
60 EXTRACT(status->caller_access); /* call ticket dependent */ 64 EXTRACT(status->caller_access); /* call ticket dependent */
61 EXTRACT(status->anon_access); 65 EXTRACT(status->anon_access);
62 EXTRACT(status->mode); 66 EXTRACT(status->mode);
@@ -65,7 +69,9 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
65 bp++; /* seg size */ 69 bp++; /* seg size */
66 status->mtime_client = ntohl(*bp++); 70 status->mtime_client = ntohl(*bp++);
67 status->mtime_server = ntohl(*bp++); 71 status->mtime_server = ntohl(*bp++);
68 EXTRACT(status->group); 72 group = make_kgid(&init_user_ns, ntohl(*bp++));
73 changed |= !gid_eq(group, status->group);
74 status->group = group;
69 bp++; /* sync counter */ 75 bp++; /* sync counter */
70 data_version |= (u64) ntohl(*bp++) << 32; 76 data_version |= (u64) ntohl(*bp++) << 32;
71 EXTRACT(status->lock_count); 77 EXTRACT(status->lock_count);
@@ -181,12 +187,12 @@ static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr)
181 187
182 if (attr->ia_valid & ATTR_UID) { 188 if (attr->ia_valid & ATTR_UID) {
183 mask |= AFS_SET_OWNER; 189 mask |= AFS_SET_OWNER;
184 owner = attr->ia_uid; 190 owner = from_kuid(&init_user_ns, attr->ia_uid);
185 } 191 }
186 192
187 if (attr->ia_valid & ATTR_GID) { 193 if (attr->ia_valid & ATTR_GID) {
188 mask |= AFS_SET_GROUP; 194 mask |= AFS_SET_GROUP;
189 group = attr->ia_gid; 195 group = from_kgid(&init_user_ns, attr->ia_gid);
190 } 196 }
191 197
192 if (attr->ia_valid & ATTR_MODE) { 198 if (attr->ia_valid & ATTR_MODE) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 95cffd38239f..789bc253b5f6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -69,7 +69,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
69 69
70 set_nlink(inode, vnode->status.nlink); 70 set_nlink(inode, vnode->status.nlink);
71 inode->i_uid = vnode->status.owner; 71 inode->i_uid = vnode->status.owner;
72 inode->i_gid = 0; 72 inode->i_gid = GLOBAL_ROOT_GID;
73 inode->i_size = vnode->status.size; 73 inode->i_size = vnode->status.size;
74 inode->i_ctime.tv_sec = vnode->status.mtime_server; 74 inode->i_ctime.tv_sec = vnode->status.mtime_server;
75 inode->i_ctime.tv_nsec = 0; 75 inode->i_ctime.tv_nsec = 0;
@@ -175,8 +175,8 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
176 inode->i_op = &afs_autocell_inode_operations; 176 inode->i_op = &afs_autocell_inode_operations;
177 set_nlink(inode, 2); 177 set_nlink(inode, 2);
178 inode->i_uid = 0; 178 inode->i_uid = GLOBAL_ROOT_UID;
179 inode->i_gid = 0; 179 inode->i_gid = GLOBAL_ROOT_GID;
180 inode->i_ctime.tv_sec = get_seconds(); 180 inode->i_ctime.tv_sec = get_seconds();
181 inode->i_ctime.tv_nsec = 0; 181 inode->i_ctime.tv_nsec = 0;
182 inode->i_atime = inode->i_mtime = inode->i_ctime; 182 inode->i_atime = inode->i_mtime = inode->i_ctime;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 43165009428d..7c31ec399575 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -24,6 +24,8 @@
24#include <linux/parser.h> 24#include <linux/parser.h>
25#include <linux/statfs.h> 25#include <linux/statfs.h>
26#include <linux/sched.h> 26#include <linux/sched.h>
27#include <linux/nsproxy.h>
28#include <net/net_namespace.h>
27#include "internal.h" 29#include "internal.h"
28 30
29#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 31#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
@@ -363,6 +365,10 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
363 365
364 memset(&params, 0, sizeof(params)); 366 memset(&params, 0, sizeof(params));
365 367
368 ret = -EINVAL;
369 if (current->nsproxy->net_ns != &init_net)
370 goto error;
371
366 /* parse the options and device name */ 372 /* parse the options and device name */
367 if (options) { 373 if (options) {
368 ret = afs_parse_options(&params, options, &dev_name); 374 ret = afs_parse_options(&params, options, &dev_name);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9aa52d93c73c..7e03eadb40c0 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -120,7 +120,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
120 struct page **pagep, void **fsdata) 120 struct page **pagep, void **fsdata)
121{ 121{
122 struct afs_writeback *candidate, *wb; 122 struct afs_writeback *candidate, *wb;
123 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); 123 struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
124 struct page *page; 124 struct page *page;
125 struct key *key = file->private_data; 125 struct key *key = file->private_data;
126 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 126 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
@@ -245,7 +245,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
245 loff_t pos, unsigned len, unsigned copied, 245 loff_t pos, unsigned len, unsigned copied,
246 struct page *page, void *fsdata) 246 struct page *page, void *fsdata)
247{ 247{
248 struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); 248 struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
249 loff_t i_size, maybe_i_size; 249 loff_t i_size, maybe_i_size;
250 250
251 _enter("{%x:%u},{%lx}", 251 _enter("{%x:%u},{%lx}",
@@ -627,8 +627,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
627ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, 627ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
628 unsigned long nr_segs, loff_t pos) 628 unsigned long nr_segs, loff_t pos)
629{ 629{
630 struct dentry *dentry = iocb->ki_filp->f_path.dentry; 630 struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
631 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
632 ssize_t result; 631 ssize_t result;
633 size_t count = iov_length(iov, nr_segs); 632 size_t count = iov_length(iov, nr_segs);
634 633
diff --git a/fs/aio.c b/fs/aio.c
index 71f613cf4a85..3f941f2a3059 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -101,7 +101,7 @@ static int aio_setup_ring(struct kioctx *ctx)
101 struct aio_ring *ring; 101 struct aio_ring *ring;
102 struct aio_ring_info *info = &ctx->ring_info; 102 struct aio_ring_info *info = &ctx->ring_info;
103 unsigned nr_events = ctx->max_reqs; 103 unsigned nr_events = ctx->max_reqs;
104 unsigned long size; 104 unsigned long size, populate;
105 int nr_pages; 105 int nr_pages;
106 106
107 /* Compensate for the ring buffer's head/tail overlap entry */ 107 /* Compensate for the ring buffer's head/tail overlap entry */
@@ -129,7 +129,8 @@ static int aio_setup_ring(struct kioctx *ctx)
129 down_write(&ctx->mm->mmap_sem); 129 down_write(&ctx->mm->mmap_sem);
130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 130 info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
131 PROT_READ|PROT_WRITE, 131 PROT_READ|PROT_WRITE,
132 MAP_ANONYMOUS|MAP_PRIVATE, 0); 132 MAP_ANONYMOUS|MAP_PRIVATE, 0,
133 &populate);
133 if (IS_ERR((void *)info->mmap_base)) { 134 if (IS_ERR((void *)info->mmap_base)) {
134 up_write(&ctx->mm->mmap_sem); 135 up_write(&ctx->mm->mmap_sem);
135 info->mmap_size = 0; 136 info->mmap_size = 0;
@@ -147,6 +148,8 @@ static int aio_setup_ring(struct kioctx *ctx)
147 aio_free_ring(ctx); 148 aio_free_ring(ctx);
148 return -EAGAIN; 149 return -EAGAIN;
149 } 150 }
151 if (populate)
152 mm_populate(info->mmap_base, populate);
150 153
151 ctx->user_id = info->mmap_base; 154 ctx->user_id = info->mmap_base;
152 155
@@ -588,11 +591,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
588{ 591{
589 struct mm_struct *mm = current->mm; 592 struct mm_struct *mm = current->mm;
590 struct kioctx *ctx, *ret = NULL; 593 struct kioctx *ctx, *ret = NULL;
591 struct hlist_node *n;
592 594
593 rcu_read_lock(); 595 rcu_read_lock();
594 596
595 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) { 597 hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
596 /* 598 /*
597 * RCU protects us against accessing freed memory but 599 * RCU protects us against accessing freed memory but
598 * we have to be careful not to get a reference when the 600 * we have to be careful not to get a reference when the
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 28d39fb84ae3..47a65df8c871 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -131,7 +131,6 @@ struct file *anon_inode_getfile(const char *name,
131 struct qstr this; 131 struct qstr this;
132 struct path path; 132 struct path path;
133 struct file *file; 133 struct file *file;
134 int error;
135 134
136 if (IS_ERR(anon_inode_inode)) 135 if (IS_ERR(anon_inode_inode))
137 return ERR_PTR(-ENODEV); 136 return ERR_PTR(-ENODEV);
@@ -143,7 +142,7 @@ struct file *anon_inode_getfile(const char *name,
143 * Link the inode to a directory entry by creating a unique name 142 * Link the inode to a directory entry by creating a unique name
144 * using the inode sequence number. 143 * using the inode sequence number.
145 */ 144 */
146 error = -ENOMEM; 145 file = ERR_PTR(-ENOMEM);
147 this.name = name; 146 this.name = name;
148 this.len = strlen(name); 147 this.len = strlen(name);
149 this.hash = 0; 148 this.hash = 0;
@@ -160,15 +159,12 @@ struct file *anon_inode_getfile(const char *name,
160 159
161 d_instantiate(path.dentry, anon_inode_inode); 160 d_instantiate(path.dentry, anon_inode_inode);
162 161
163 error = -ENFILE;
164 file = alloc_file(&path, OPEN_FMODE(flags), fops); 162 file = alloc_file(&path, OPEN_FMODE(flags), fops);
165 if (!file) 163 if (IS_ERR(file))
166 goto err_dput; 164 goto err_dput;
167 file->f_mapping = anon_inode_inode->i_mapping; 165 file->f_mapping = anon_inode_inode->i_mapping;
168 166
169 file->f_pos = 0;
170 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); 167 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
171 file->f_version = 0;
172 file->private_data = priv; 168 file->private_data = priv;
173 169
174 return file; 170 return file;
@@ -177,7 +173,7 @@ err_dput:
177 path_put(&path); 173 path_put(&path);
178err_module: 174err_module:
179 module_put(fops->owner); 175 module_put(fops->owner);
180 return ERR_PTR(error); 176 return file;
181} 177}
182EXPORT_SYMBOL_GPL(anon_inode_getfile); 178EXPORT_SYMBOL_GPL(anon_inode_getfile);
183 179
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b785e7707959..3f1128b37e46 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -273,7 +273,7 @@ static inline int autofs_prepare_pipe(struct file *pipe)
273{ 273{
274 if (!pipe->f_op || !pipe->f_op->write) 274 if (!pipe->f_op || !pipe->f_op->write)
275 return -EINVAL; 275 return -EINVAL;
276 if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode)) 276 if (!S_ISFIFO(file_inode(pipe)->i_mode))
277 return -EINVAL; 277 return -EINVAL;
278 /* We want a packet pipe */ 278 /* We want a packet pipe */
279 pipe->f_flags |= O_DIRECT; 279 pipe->f_flags |= O_DIRECT;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9f68a37bb2b2..743c7c2c949d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -159,7 +159,7 @@ static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f)
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (f) { 161 if (f) {
162 inode = f->f_path.dentry->d_inode; 162 inode = file_inode(f);
163 sbi = autofs4_sbi(inode->i_sb); 163 sbi = autofs4_sbi(inode->i_sb);
164 } 164 }
165 return sbi; 165 return sbi;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c93447604da8..230bd2aad4f4 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -587,7 +587,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
587 587
588 /* This allows root to remove symlinks */ 588 /* This allows root to remove symlinks */
589 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) 589 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
590 return -EACCES; 590 return -EPERM;
591 591
592 if (atomic_dec_and_test(&ino->count)) { 592 if (atomic_dec_and_test(&ino->count)) {
593 p_ino = autofs4_dentry_ino(dentry->d_parent); 593 p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -874,7 +874,7 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
874static long autofs4_root_ioctl(struct file *filp, 874static long autofs4_root_ioctl(struct file *filp,
875 unsigned int cmd, unsigned long arg) 875 unsigned int cmd, unsigned long arg)
876{ 876{
877 struct inode *inode = filp->f_dentry->d_inode; 877 struct inode *inode = file_inode(filp);
878 return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); 878 return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
879} 879}
880 880
@@ -882,7 +882,7 @@ static long autofs4_root_ioctl(struct file *filp,
882static long autofs4_root_compat_ioctl(struct file *filp, 882static long autofs4_root_compat_ioctl(struct file *filp,
883 unsigned int cmd, unsigned long arg) 883 unsigned int cmd, unsigned long arg)
884{ 884{
885 struct inode *inode = filp->f_path.dentry->d_inode; 885 struct inode *inode = file_inode(filp);
886 int ret; 886 int ret;
887 887
888 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) 888 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
diff --git a/fs/befs/Kconfig b/fs/befs/Kconfig
index 7835d30f211f..edc5cc2aefad 100644
--- a/fs/befs/Kconfig
+++ b/fs/befs/Kconfig
@@ -1,6 +1,6 @@
1config BEFS_FS 1config BEFS_FS
2 tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)" 2 tristate "BeOS file system (BeFS) support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 The BeOS File System (BeFS) is the native file system of Be, Inc's 6 The BeOS File System (BeFS) is the native file system of Be, Inc's
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 2b3bda8d5e68..c8f4e25eb9e2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -213,7 +213,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
213static int 213static int
214befs_readdir(struct file *filp, void *dirent, filldir_t filldir) 214befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
215{ 215{
216 struct inode *inode = filp->f_path.dentry->d_inode; 216 struct inode *inode = file_inode(filp);
217 struct super_block *sb = inode->i_sb; 217 struct super_block *sb = inode->i_sb;
218 befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; 218 befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
219 befs_off_t value; 219 befs_off_t value;
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index c2336c62024f..3728a6479c64 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -1,6 +1,6 @@
1config BFS_FS 1config BFS_FS
2 tristate "BFS file system support (EXPERIMENTAL)" 2 tristate "BFS file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 Boot File System (BFS) is a file system used under SCO UnixWare to 5 Boot File System (BFS) is a file system used under SCO UnixWare to
6 allow the bootloader access to the kernel image and other important 6 allow the bootloader access to the kernel image and other important
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 2785ef91191a..3f422f6bb5ca 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -28,7 +28,7 @@ static struct buffer_head *bfs_find_entry(struct inode *dir,
28 28
29static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) 29static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
30{ 30{
31 struct inode *dir = f->f_path.dentry->d_inode; 31 struct inode *dir = file_inode(f);
32 struct buffer_head *bh; 32 struct buffer_head *bh;
33 struct bfs_dirent *de; 33 struct bfs_dirent *de;
34 struct bfs_sb_info *info = BFS_SB(dir->i_sb); 34 struct bfs_sb_info *info = BFS_SB(dir->i_sb);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 6043567b95c2..bbc8f8827eac 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -214,7 +214,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
214 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && 214 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
215 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || 215 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
216 N_TRSIZE(ex) || N_DRSIZE(ex) || 216 N_TRSIZE(ex) || N_DRSIZE(ex) ||
217 i_size_read(bprm->file->f_path.dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { 217 i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
218 return -ENOEXEC; 218 return -ENOEXEC;
219 } 219 }
220 220
@@ -367,7 +367,7 @@ static int load_aout_library(struct file *file)
367 int retval; 367 int retval;
368 struct exec ex; 368 struct exec ex;
369 369
370 inode = file->f_path.dentry->d_inode; 370 inode = file_inode(file);
371 371
372 retval = -ENOEXEC; 372 retval = -ENOEXEC;
373 error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); 373 error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..a5702d74d2bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -33,6 +33,7 @@
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/utsname.h> 34#include <linux/utsname.h>
35#include <linux/coredump.h> 35#include <linux/coredump.h>
36#include <linux/sched.h>
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/param.h> 38#include <asm/param.h>
38#include <asm/page.h> 39#include <asm/page.h>
@@ -1140,7 +1141,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1140 1141
1141 /* By default, dump shared memory if mapped from an anonymous file. */ 1142 /* By default, dump shared memory if mapped from an anonymous file. */
1142 if (vma->vm_flags & VM_SHARED) { 1143 if (vma->vm_flags & VM_SHARED) {
1143 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? 1144 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1144 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1145 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1145 goto whole; 1146 goto whole;
1146 return 0; 1147 return 0;
@@ -1248,7 +1249,7 @@ static int writenote(struct memelfnote *men, struct file *file,
1248#undef DUMP_WRITE 1249#undef DUMP_WRITE
1249 1250
1250static void fill_elf_header(struct elfhdr *elf, int segs, 1251static void fill_elf_header(struct elfhdr *elf, int segs,
1251 u16 machine, u32 flags, u8 osabi) 1252 u16 machine, u32 flags)
1252{ 1253{
1253 memset(elf, 0, sizeof(*elf)); 1254 memset(elf, 0, sizeof(*elf));
1254 1255
@@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1320 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1321 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1321 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1322 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1322 } else { 1323 } else {
1323 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1324 cputime_t utime, stime;
1324 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1325
1326 task_cputime(p, &utime, &stime);
1327 cputime_to_timeval(utime, &prstatus->pr_utime);
1328 cputime_to_timeval(stime, &prstatus->pr_stime);
1325 } 1329 }
1326 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1330 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1327 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1331 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
@@ -1630,7 +1634,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1630 * Initialize the ELF file header. 1634 * Initialize the ELF file header.
1631 */ 1635 */
1632 fill_elf_header(elf, phdrs, 1636 fill_elf_header(elf, phdrs,
1633 view->e_machine, view->e_flags, view->ei_osabi); 1637 view->e_machine, view->e_flags);
1634 1638
1635 /* 1639 /*
1636 * Allocate a structure for each thread. 1640 * Allocate a structure for each thread.
@@ -1870,7 +1874,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1870 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 1874 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1871 1875
1872 /* Set up header */ 1876 /* Set up header */
1873 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI); 1877 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1874 1878
1875 /* 1879 /*
1876 * Set up the notes in similar form to SVR4 core dumps made 1880 * Set up the notes in similar form to SVR4 core dumps made
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index dc84732e554f..9c13e023e2b7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -909,7 +909,7 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
909 909
910dynamic_error: 910dynamic_error:
911 printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n", 911 printk("ELF FDPIC %s with invalid DYNAMIC section (inode=%lu)\n",
912 what, file->f_path.dentry->d_inode->i_ino); 912 what, file_inode(file)->i_ino);
913 return -ELIBBAD; 913 return -ELIBBAD;
914} 914}
915 915
@@ -1219,7 +1219,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
1219 1219
1220 /* By default, dump shared memory if mapped from an anonymous file. */ 1220 /* By default, dump shared memory if mapped from an anonymous file. */
1221 if (vma->vm_flags & VM_SHARED) { 1221 if (vma->vm_flags & VM_SHARED) {
1222 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0) { 1222 if (file_inode(vma->vm_file)->i_nlink == 0) {
1223 dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags); 1223 dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
1224 kdcore("%08lx: %08lx: %s (share)", vma->vm_start, 1224 kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
1225 vma->vm_flags, dump_ok ? "yes" : "no"); 1225 vma->vm_flags, dump_ok ? "yes" : "no");
@@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime); 1375 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime); 1376 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1377 } else { 1377 } else {
1378 cputime_to_timeval(p->utime, &prstatus->pr_utime); 1378 cputime_t utime, stime;
1379 cputime_to_timeval(p->stime, &prstatus->pr_stime); 1379
1380 task_cputime(p, &utime, &stime);
1381 cputime_to_timeval(utime, &prstatus->pr_utime);
1382 cputime_to_timeval(stime, &prstatus->pr_stime);
1380 } 1383 }
1381 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); 1384 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1382 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); 1385 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b56371981d16..2036d21baaef 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -438,7 +438,7 @@ static int load_flat_file(struct linux_binprm * bprm,
438 int ret; 438 int ret;
439 439
440 hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */ 440 hdr = ((struct flat_hdr *) bprm->buf); /* exec-header */
441 inode = bprm->file->f_path.dentry->d_inode; 441 inode = file_inode(bprm->file);
442 442
443 text_len = ntohl(hdr->data_start); 443 text_len = ntohl(hdr->data_start);
444 data_len = ntohl(hdr->data_end) - ntohl(hdr->data_start); 444 data_len = ntohl(hdr->data_end) - ntohl(hdr->data_start);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 0c8869fdd14e..fecbbf3f8ff2 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -531,7 +531,7 @@ static void kill_node(Node *e)
531static ssize_t 531static ssize_t
532bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 532bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
533{ 533{
534 Node *e = file->f_path.dentry->d_inode->i_private; 534 Node *e = file_inode(file)->i_private;
535 ssize_t res; 535 ssize_t res;
536 char *page; 536 char *page;
537 537
@@ -550,7 +550,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
550 size_t count, loff_t *ppos) 550 size_t count, loff_t *ppos)
551{ 551{
552 struct dentry *root; 552 struct dentry *root;
553 Node *e = file->f_path.dentry->d_inode->i_private; 553 Node *e = file_inode(file)->i_private;
554 int res = parse_command(buffer, count); 554 int res = parse_command(buffer, count);
555 555
556 switch (res) { 556 switch (res) {
diff --git a/fs/bio.c b/fs/bio.c
index b96fc6ce4855..bb5768f59b32 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error)
1428 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 1428 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1429 error = -EIO; 1429 error = -EIO;
1430 1430
1431 trace_block_bio_complete(bio, error);
1432
1431 if (bio->bi_end_io) 1433 if (bio->bi_end_io)
1432 bio->bi_end_io(bio, error); 1434 bio->bi_end_io(bio, error);
1433} 1435}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 172f8491a2bd..aea605c98ba6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -318,7 +318,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
318 318
319/* 319/*
320 * private llseek: 320 * private llseek:
321 * for a block special file file->f_path.dentry->d_inode->i_size is zero 321 * for a block special file file_inode(file)->i_size is zero
322 * so we compute the size by hand (just as in block_read/write above) 322 * so we compute the size by hand (just as in block_read/write above)
323 */ 323 */
324static loff_t block_llseek(struct file *file, loff_t offset, int whence) 324static loff_t block_llseek(struct file *file, loff_t offset, int whence)
@@ -994,6 +994,7 @@ int revalidate_disk(struct gendisk *disk)
994 994
995 mutex_lock(&bdev->bd_mutex); 995 mutex_lock(&bdev->bd_mutex);
996 check_disk_size_change(disk, bdev); 996 check_disk_size_change(disk, bdev);
997 bdev->bd_invalidated = 0;
997 mutex_unlock(&bdev->bd_mutex); 998 mutex_unlock(&bdev->bd_mutex);
998 bdput(bdev); 999 bdput(bdev);
999 return ret; 1000 return ret;
@@ -1032,7 +1033,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
1032{ 1033{
1033 unsigned bsize = bdev_logical_block_size(bdev); 1034 unsigned bsize = bdev_logical_block_size(bdev);
1034 1035
1035 bdev->bd_inode->i_size = size; 1036 mutex_lock(&bdev->bd_inode->i_mutex);
1037 i_size_write(bdev->bd_inode, size);
1038 mutex_unlock(&bdev->bd_inode->i_mutex);
1036 while (bsize < PAGE_CACHE_SIZE) { 1039 while (bsize < PAGE_CACHE_SIZE) {
1037 if (size & bsize) 1040 if (size & bsize)
1038 break; 1041 break;
@@ -1117,7 +1120,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1117 } 1120 }
1118 } 1121 }
1119 1122
1120 if (!ret && !bdev->bd_openers) { 1123 if (!ret) {
1121 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1124 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1122 bdi = blk_get_backing_dev_info(bdev); 1125 bdi = blk_get_backing_dev_info(bdev);
1123 if (bdi == NULL) 1126 if (bdi == NULL)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index d33f01c08b60..ccd25ba7a9ac 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,5 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format" 2 tristate "Btrfs filesystem Unstable disk format"
3 depends on EXPERIMENTAL
4 select LIBCRC32C 3 select LIBCRC32C
5 select ZLIB_INFLATE 4 select ZLIB_INFLATE
6 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 614f34a899c2..81ee29eeb7ca 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -22,10 +22,10 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
22 22
23 if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { 23 if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
24 *max_len = BTRFS_FID_SIZE_CONNECTABLE; 24 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
25 return 255; 25 return FILEID_INVALID;
26 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { 26 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
27 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE; 27 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
28 return 255; 28 return FILEID_INVALID;
29 } 29 }
30 30
31 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 31 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f6..cf54bdfee334 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root,
3689 return 0; 3689 return 0;
3690} 3690}
3691 3691
3692static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
3693 unsigned long nr_pages,
3694 enum wb_reason reason)
3695{
3696 if (!writeback_in_progress(sb->s_bdi) &&
3697 down_read_trylock(&sb->s_umount)) {
3698 writeback_inodes_sb_nr(sb, nr_pages, reason);
3699 up_read(&sb->s_umount);
3700 return 1;
3701 }
3702
3703 return 0;
3704}
3705
3706/* 3692/*
3707 * shrink metadata reservation for delalloc 3693 * shrink metadata reservation for delalloc
3708 */ 3694 */
@@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3735 while (delalloc_bytes && loops < 3) { 3721 while (delalloc_bytes && loops < 3) {
3736 max_reclaim = min(delalloc_bytes, to_reclaim); 3722 max_reclaim = min(delalloc_bytes, to_reclaim);
3737 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; 3723 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
3738 writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, 3724 try_to_writeback_inodes_sb_nr(root->fs_info->sb,
3739 nr_pages, 3725 nr_pages,
3740 WB_REASON_FS_FREE_SPACE); 3726 WB_REASON_FS_FREE_SPACE);
3741 3727
3742 /* 3728 /*
3743 * We need to wait for the async pages to actually start before 3729 * We need to wait for the async pages to actually start before
@@ -3997,7 +3983,7 @@ again:
3997 * We make the other tasks wait for the flush only when we can flush 3983 * We make the other tasks wait for the flush only when we can flush
3998 * all things. 3984 * all things.
3999 */ 3985 */
4000 if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { 3986 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4001 flushing = true; 3987 flushing = true;
4002 space_info->flush = 1; 3988 space_info->flush = 1;
4003 } 3989 }
@@ -4534,7 +4520,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4534 unsigned nr_extents = 0; 4520 unsigned nr_extents = 0;
4535 int extra_reserve = 0; 4521 int extra_reserve = 0;
4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 4522 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
4537 int ret; 4523 int ret = 0;
4538 bool delalloc_lock = true; 4524 bool delalloc_lock = true;
4539 4525
4540 /* If we are a free space inode we need to not flush since we will be in 4526 /* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4565,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4579 csum_bytes = BTRFS_I(inode)->csum_bytes; 4565 csum_bytes = BTRFS_I(inode)->csum_bytes;
4580 spin_unlock(&BTRFS_I(inode)->lock); 4566 spin_unlock(&BTRFS_I(inode)->lock);
4581 4567
4582 if (root->fs_info->quota_enabled) { 4568 if (root->fs_info->quota_enabled)
4583 ret = btrfs_qgroup_reserve(root, num_bytes + 4569 ret = btrfs_qgroup_reserve(root, num_bytes +
4584 nr_extents * root->leafsize); 4570 nr_extents * root->leafsize);
4585 if (ret) {
4586 spin_lock(&BTRFS_I(inode)->lock);
4587 calc_csum_metadata_size(inode, num_bytes, 0);
4588 spin_unlock(&BTRFS_I(inode)->lock);
4589 if (delalloc_lock)
4590 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4591 return ret;
4592 }
4593 }
4594 4571
4595 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4572 /*
4573 * ret != 0 here means the qgroup reservation failed, we go straight to
4574 * the shared error handling then.
4575 */
4576 if (ret == 0)
4577 ret = reserve_metadata_bytes(root, block_rsv,
4578 to_reserve, flush);
4579
4596 if (ret) { 4580 if (ret) {
4597 u64 to_free = 0; 4581 u64 to_free = 0;
4598 unsigned dropped; 4582 unsigned dropped;
@@ -5560,7 +5544,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5560 int empty_cluster = 2 * 1024 * 1024; 5544 int empty_cluster = 2 * 1024 * 1024;
5561 struct btrfs_space_info *space_info; 5545 struct btrfs_space_info *space_info;
5562 int loop = 0; 5546 int loop = 0;
5563 int index = 0; 5547 int index = __get_raid_index(data);
5564 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5548 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
5565 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 5549 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
5566 bool found_uncached_bg = false; 5550 bool found_uncached_bg = false;
@@ -6524,7 +6508,7 @@ reada:
6524} 6508}
6525 6509
6526/* 6510/*
6527 * hepler to process tree block while walking down the tree. 6511 * helper to process tree block while walking down the tree.
6528 * 6512 *
6529 * when wc->stage == UPDATE_BACKREF, this function updates 6513 * when wc->stage == UPDATE_BACKREF, this function updates
6530 * back refs for pointers in the block. 6514 * back refs for pointers in the block.
@@ -6599,7 +6583,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6599} 6583}
6600 6584
6601/* 6585/*
6602 * hepler to process tree block pointer. 6586 * helper to process tree block pointer.
6603 * 6587 *
6604 * when wc->stage == DROP_REFERENCE, this function checks 6588 * when wc->stage == DROP_REFERENCE, this function checks
6605 * reference count of the block pointed to. if the block 6589 * reference count of the block pointed to. if the block
@@ -6737,7 +6721,7 @@ skip:
6737} 6721}
6738 6722
6739/* 6723/*
6740 * hepler to process tree block while walking up the tree. 6724 * helper to process tree block while walking up the tree.
6741 * 6725 *
6742 * when wc->stage == DROP_REFERENCE, this function drops 6726 * when wc->stage == DROP_REFERENCE, this function drops
6743 * reference count on the block. 6727 * reference count on the block.
@@ -6788,11 +6772,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6788 &wc->flags[level]); 6772 &wc->flags[level]);
6789 if (ret < 0) { 6773 if (ret < 0) {
6790 btrfs_tree_unlock_rw(eb, path->locks[level]); 6774 btrfs_tree_unlock_rw(eb, path->locks[level]);
6775 path->locks[level] = 0;
6791 return ret; 6776 return ret;
6792 } 6777 }
6793 BUG_ON(wc->refs[level] == 0); 6778 BUG_ON(wc->refs[level] == 0);
6794 if (wc->refs[level] == 1) { 6779 if (wc->refs[level] == 1) {
6795 btrfs_tree_unlock_rw(eb, path->locks[level]); 6780 btrfs_tree_unlock_rw(eb, path->locks[level]);
6781 path->locks[level] = 0;
6796 return 1; 6782 return 1;
6797 } 6783 }
6798 } 6784 }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f169d6b11d7f..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) 171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
172 return 0; 172 return 0;
173 173
174 if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
175 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
176 return 0;
177
174 if (extent_map_end(prev) == next->start && 178 if (extent_map_end(prev) == next->start &&
175 prev->flags == next->flags && 179 prev->flags == next->flags &&
176 prev->bdev == next->bdev && 180 prev->bdev == next->bdev &&
@@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
255 if (!em) 259 if (!em)
256 goto out; 260 goto out;
257 261
258 list_move(&em->list, &tree->modified_extents); 262 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
263 list_move(&em->list, &tree->modified_extents);
259 em->generation = gen; 264 em->generation = gen;
260 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 265 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
261 em->mod_start = em->start; 266 em->mod_start = em->start;
@@ -280,6 +285,13 @@ out:
280 285
281} 286}
282 287
288void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
289{
290 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
291 if (em->in_tree)
292 try_merge_map(tree, em);
293}
294
283/** 295/**
284 * add_extent_mapping - add new extent map to the extent tree 296 * add_extent_mapping - add new extent map to the extent tree
285 * @tree: tree to insert new map in 297 * @tree: tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 922943ce29e8..c6598c89cff8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
69int __init extent_map_init(void); 69int __init extent_map_init(void);
70void extent_map_exit(void); 70void extent_map_exit(void);
71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); 71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
72void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
72struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 73struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
73 u64 start, u64 len); 74 u64 start, u64 len);
74#endif 75#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index bd38cef42358..94aa53b38721 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
460 if (!contig) 460 if (!contig)
461 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 461 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
462 462
463 if (!contig && (offset >= ordered->file_offset + ordered->len || 463 if (offset >= ordered->file_offset + ordered->len ||
464 offset < ordered->file_offset)) { 464 offset < ordered->file_offset) {
465 unsigned long bytes_left; 465 unsigned long bytes_left;
466 sums->len = this_sum_bytes; 466 sums->len = this_sum_bytes;
467 this_sum_bytes = 0; 467 this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43edb..4b241fe9d2fe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
293 struct btrfs_key key; 293 struct btrfs_key key;
294 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
295 int num_defrag; 295 int num_defrag;
296 int index;
297 int ret;
296 298
297 /* get the inode */ 299 /* get the inode */
298 key.objectid = defrag->root; 300 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 301 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1; 302 key.offset = (u64)-1;
303
304 index = srcu_read_lock(&fs_info->subvol_srcu);
305
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key); 306 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) { 307 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 308 ret = PTR_ERR(inode_root);
304 return PTR_ERR(inode_root); 309 goto cleanup;
310 }
311 if (btrfs_root_refs(&inode_root->root_item) == 0) {
312 ret = -ENOENT;
313 goto cleanup;
305 } 314 }
306 315
307 key.objectid = defrag->ino; 316 key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
309 key.offset = 0; 318 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); 319 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) { 320 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 321 ret = PTR_ERR(inode);
313 return PTR_ERR(inode); 322 goto cleanup;
314 } 323 }
324 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 325
316 /* do a chunk of defrag */ 326 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 327 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
346 356
347 iput(inode); 357 iput(inode);
348 return 0; 358 return 0;
359cleanup:
360 srcu_read_unlock(&fs_info->subvol_srcu, index);
361 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
362 return ret;
349} 363}
350 364
351/* 365/*
@@ -1211,7 +1225,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1211 struct extent_state *cached_state = NULL; 1225 struct extent_state *cached_state = NULL;
1212 int i; 1226 int i;
1213 unsigned long index = pos >> PAGE_CACHE_SHIFT; 1227 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1214 struct inode *inode = fdentry(file)->d_inode; 1228 struct inode *inode = file_inode(file);
1215 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1229 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1216 int err = 0; 1230 int err = 0;
1217 int faili = 0; 1231 int faili = 0;
@@ -1298,7 +1312,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1298 struct iov_iter *i, 1312 struct iov_iter *i,
1299 loff_t pos) 1313 loff_t pos)
1300{ 1314{
1301 struct inode *inode = fdentry(file)->d_inode; 1315 struct inode *inode = file_inode(file);
1302 struct btrfs_root *root = BTRFS_I(inode)->root; 1316 struct btrfs_root *root = BTRFS_I(inode)->root;
1303 struct page **pages = NULL; 1317 struct page **pages = NULL;
1304 unsigned long first_index; 1318 unsigned long first_index;
@@ -1486,7 +1500,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1486 unsigned long nr_segs, loff_t pos) 1500 unsigned long nr_segs, loff_t pos)
1487{ 1501{
1488 struct file *file = iocb->ki_filp; 1502 struct file *file = iocb->ki_filp;
1489 struct inode *inode = fdentry(file)->d_inode; 1503 struct inode *inode = file_inode(file);
1490 struct btrfs_root *root = BTRFS_I(inode)->root; 1504 struct btrfs_root *root = BTRFS_I(inode)->root;
1491 loff_t *ppos = &iocb->ki_pos; 1505 loff_t *ppos = &iocb->ki_pos;
1492 u64 start_pos; 1506 u64 start_pos;
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1594 if (err < 0 && num_written > 0) 1608 if (err < 0 && num_written > 0)
1595 num_written = err; 1609 num_written = err;
1596 } 1610 }
1597out: 1611
1598 if (sync) 1612 if (sync)
1599 atomic_dec(&BTRFS_I(inode)->sync_writers); 1613 atomic_dec(&BTRFS_I(inode)->sync_writers);
1614out:
1600 sb_end_write(inode->i_sb); 1615 sb_end_write(inode->i_sb);
1601 current->backing_dev_info = NULL; 1616 current->backing_dev_info = NULL;
1602 return num_written ? num_written : err; 1617 return num_written ? num_written : err;
@@ -2087,7 +2102,7 @@ out:
2087static long btrfs_fallocate(struct file *file, int mode, 2102static long btrfs_fallocate(struct file *file, int mode,
2088 loff_t offset, loff_t len) 2103 loff_t offset, loff_t len)
2089{ 2104{
2090 struct inode *inode = file->f_path.dentry->d_inode; 2105 struct inode *inode = file_inode(file);
2091 struct extent_state *cached_state = NULL; 2106 struct extent_state *cached_state = NULL;
2092 u64 cur_offset; 2107 u64 cur_offset;
2093 u64 last_byte; 2108 u64 last_byte;
@@ -2241,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2241 if (lockend <= lockstart) 2256 if (lockend <= lockstart)
2242 lockend = lockstart + root->sectorsize; 2257 lockend = lockstart + root->sectorsize;
2243 2258
2259 lockend--;
2244 len = lockend - lockstart + 1; 2260 len = lockend - lockstart + 1;
2245 2261
2246 len = max_t(u64, len, root->sectorsize); 2262 len = max_t(u64, len, root->sectorsize);
@@ -2307,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2307 } 2323 }
2308 } 2324 }
2309 2325
2310 *offset = start; 2326 if (!test_bit(EXTENT_FLAG_PREALLOC,
2311 free_extent_map(em); 2327 &em->flags)) {
2312 break; 2328 *offset = start;
2329 free_extent_map(em);
2330 break;
2331 }
2313 } 2332 }
2314 } 2333 }
2315 2334
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 59ea2e4349c9..0be7a8742a43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1862{ 1862{
1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1864 struct btrfs_free_space *info; 1864 struct btrfs_free_space *info;
1865 int ret = 0; 1865 int ret;
1866 bool re_search = false;
1866 1867
1867 spin_lock(&ctl->tree_lock); 1868 spin_lock(&ctl->tree_lock);
1868 1869
1869again: 1870again:
1871 ret = 0;
1870 if (!bytes) 1872 if (!bytes)
1871 goto out_lock; 1873 goto out_lock;
1872 1874
@@ -1879,17 +1881,17 @@ again:
1879 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1881 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1880 1, 0); 1882 1, 0);
1881 if (!info) { 1883 if (!info) {
1882 /* the tree logging code might be calling us before we 1884 /*
1883 * have fully loaded the free space rbtree for this 1885 * If we found a partial bit of our free space in a
1884 * block group. So it is possible the entry won't 1886 * bitmap but then couldn't find the other part this may
1885 * be in the rbtree yet at all. The caching code 1887 * be a problem, so WARN about it.
1886 * will make sure not to put it in the rbtree if
1887 * the logging code has pinned it.
1888 */ 1888 */
1889 WARN_ON(re_search);
1889 goto out_lock; 1890 goto out_lock;
1890 } 1891 }
1891 } 1892 }
1892 1893
1894 re_search = false;
1893 if (!info->bitmap) { 1895 if (!info->bitmap) {
1894 unlink_free_space(ctl, info); 1896 unlink_free_space(ctl, info);
1895 if (offset == info->offset) { 1897 if (offset == info->offset) {
@@ -1935,8 +1937,10 @@ again:
1935 } 1937 }
1936 1938
1937 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1939 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1938 if (ret == -EAGAIN) 1940 if (ret == -EAGAIN) {
1941 re_search = true;
1939 goto again; 1942 goto again;
1943 }
1940 BUG_ON(ret); /* logic error */ 1944 BUG_ON(ret); /* logic error */
1941out_lock: 1945out_lock:
1942 spin_unlock(&ctl->tree_lock); 1946 spin_unlock(&ctl->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e6..55c07b650378 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, 88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
89}; 89};
90 90
91static int btrfs_setsize(struct inode *inode, loff_t newsize); 91static int btrfs_setsize(struct inode *inode, struct iattr *attr);
92static int btrfs_truncate(struct inode *inode); 92static int btrfs_truncate(struct inode *inode);
93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); 93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
94static noinline int cow_file_range(struct inode *inode, 94static noinline int cow_file_range(struct inode *inode,
@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2478 continue; 2478 continue;
2479 } 2479 }
2480 nr_truncate++; 2480 nr_truncate++;
2481
2482 /* 1 for the orphan item deletion. */
2483 trans = btrfs_start_transaction(root, 1);
2484 if (IS_ERR(trans)) {
2485 ret = PTR_ERR(trans);
2486 goto out;
2487 }
2488 ret = btrfs_orphan_add(trans, inode);
2489 btrfs_end_transaction(trans, root);
2490 if (ret)
2491 goto out;
2492
2481 ret = btrfs_truncate(inode); 2493 ret = btrfs_truncate(inode);
2482 } else { 2494 } else {
2483 nr_unlink++; 2495 nr_unlink++;
@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3665 block_end - cur_offset, 0); 3677 block_end - cur_offset, 0);
3666 if (IS_ERR(em)) { 3678 if (IS_ERR(em)) {
3667 err = PTR_ERR(em); 3679 err = PTR_ERR(em);
3680 em = NULL;
3668 break; 3681 break;
3669 } 3682 }
3670 last_byte = min(extent_map_end(em), block_end); 3683 last_byte = min(extent_map_end(em), block_end);
@@ -3748,16 +3761,27 @@ next:
3748 return err; 3761 return err;
3749} 3762}
3750 3763
3751static int btrfs_setsize(struct inode *inode, loff_t newsize) 3764static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3752{ 3765{
3753 struct btrfs_root *root = BTRFS_I(inode)->root; 3766 struct btrfs_root *root = BTRFS_I(inode)->root;
3754 struct btrfs_trans_handle *trans; 3767 struct btrfs_trans_handle *trans;
3755 loff_t oldsize = i_size_read(inode); 3768 loff_t oldsize = i_size_read(inode);
3769 loff_t newsize = attr->ia_size;
3770 int mask = attr->ia_valid;
3756 int ret; 3771 int ret;
3757 3772
3758 if (newsize == oldsize) 3773 if (newsize == oldsize)
3759 return 0; 3774 return 0;
3760 3775
3776 /*
3777 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
3778 * special case where we need to update the times despite not having
3779 * these flags set. For all other operations the VFS set these flags
3780 * explicitly if it wants a timestamp update.
3781 */
3782 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
3783 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
3784
3761 if (newsize > oldsize) { 3785 if (newsize > oldsize) {
3762 truncate_pagecache(inode, oldsize, newsize); 3786 truncate_pagecache(inode, oldsize, newsize);
3763 ret = btrfs_cont_expand(inode, oldsize, newsize); 3787 ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3783 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 3807 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
3784 &BTRFS_I(inode)->runtime_flags); 3808 &BTRFS_I(inode)->runtime_flags);
3785 3809
3810 /*
3811 * 1 for the orphan item we're going to add
3812 * 1 for the orphan item deletion.
3813 */
3814 trans = btrfs_start_transaction(root, 2);
3815 if (IS_ERR(trans))
3816 return PTR_ERR(trans);
3817
3818 /*
3819 * We need to do this in case we fail at _any_ point during the
3820 * actual truncate. Once we do the truncate_setsize we could
3821 * invalidate pages which forces any outstanding ordered io to
3822 * be instantly completed which will give us extents that need
3823 * to be truncated. If we fail to get an orphan inode down we
3824 * could have left over extents that were never meant to live,
3825 * so we need to garuntee from this point on that everything
3826 * will be consistent.
3827 */
3828 ret = btrfs_orphan_add(trans, inode);
3829 btrfs_end_transaction(trans, root);
3830 if (ret)
3831 return ret;
3832
3786 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3787 truncate_setsize(inode, newsize); 3834 truncate_setsize(inode, newsize);
3788 ret = btrfs_truncate(inode); 3835 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode);
3789 } 3838 }
3790 3839
3791 return ret; 3840 return ret;
@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3805 return err; 3854 return err;
3806 3855
3807 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3856 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3808 err = btrfs_setsize(inode, attr->ia_size); 3857 err = btrfs_setsize(inode, attr);
3809 if (err) 3858 if (err)
3810 return err; 3859 return err;
3811 } 3860 }
@@ -4342,7 +4391,7 @@ unsigned char btrfs_filetype_table[] = {
4342static int btrfs_real_readdir(struct file *filp, void *dirent, 4391static int btrfs_real_readdir(struct file *filp, void *dirent,
4343 filldir_t filldir) 4392 filldir_t filldir)
4344{ 4393{
4345 struct inode *inode = filp->f_dentry->d_inode; 4394 struct inode *inode = file_inode(filp);
4346 struct btrfs_root *root = BTRFS_I(inode)->root; 4395 struct btrfs_root *root = BTRFS_I(inode)->root;
4347 struct btrfs_item *item; 4396 struct btrfs_item *item;
4348 struct btrfs_dir_item *di; 4397 struct btrfs_dir_item *di;
@@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5572 return em; 5621 return em;
5573 if (em) { 5622 if (em) {
5574 /* 5623 /*
5575 * if our em maps to a hole, there might 5624 * if our em maps to
5576 * actually be delalloc bytes behind it 5625 * - a hole or
5626 * - a pre-alloc extent,
5627 * there might actually be delalloc bytes behind it.
5577 */ 5628 */
5578 if (em->block_start != EXTENT_MAP_HOLE) 5629 if (em->block_start != EXTENT_MAP_HOLE &&
5630 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5579 return em; 5631 return em;
5580 else 5632 else
5581 hole_em = em; 5633 hole_em = em;
@@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5657 */ 5709 */
5658 em->block_start = hole_em->block_start; 5710 em->block_start = hole_em->block_start;
5659 em->block_len = hole_len; 5711 em->block_len = hole_len;
5712 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
5713 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
5660 } else { 5714 } else {
5661 em->start = range_start; 5715 em->start = range_start;
5662 em->len = found; 5716 em->len = found;
@@ -6737,7 +6791,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6737int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 6791int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6738{ 6792{
6739 struct page *page = vmf->page; 6793 struct page *page = vmf->page;
6740 struct inode *inode = fdentry(vma->vm_file)->d_inode; 6794 struct inode *inode = file_inode(vma->vm_file);
6741 struct btrfs_root *root = BTRFS_I(inode)->root; 6795 struct btrfs_root *root = BTRFS_I(inode)->root;
6742 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 6796 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6743 struct btrfs_ordered_extent *ordered; 6797 struct btrfs_ordered_extent *ordered;
@@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
6915 6969
6916 /* 6970 /*
6917 * 1 for the truncate slack space 6971 * 1 for the truncate slack space
6918 * 1 for the orphan item we're going to add
6919 * 1 for the orphan item deletion
6920 * 1 for updating the inode. 6972 * 1 for updating the inode.
6921 */ 6973 */
6922 trans = btrfs_start_transaction(root, 4); 6974 trans = btrfs_start_transaction(root, 2);
6923 if (IS_ERR(trans)) { 6975 if (IS_ERR(trans)) {
6924 err = PTR_ERR(trans); 6976 err = PTR_ERR(trans);
6925 goto out; 6977 goto out;
@@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
6930 min_size); 6982 min_size);
6931 BUG_ON(ret); 6983 BUG_ON(ret);
6932 6984
6933 ret = btrfs_orphan_add(trans, inode);
6934 if (ret) {
6935 btrfs_end_transaction(trans, root);
6936 goto out;
6937 }
6938
6939 /* 6985 /*
6940 * setattr is responsible for setting the ordered_data_close flag, 6986 * setattr is responsible for setting the ordered_data_close flag,
6941 * but that is only tested during the last file release. That 6987 * but that is only tested during the last file release. That
@@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
7004 ret = btrfs_orphan_del(trans, inode); 7050 ret = btrfs_orphan_del(trans, inode);
7005 if (ret) 7051 if (ret)
7006 err = ret; 7052 err = ret;
7007 } else if (ret && inode->i_nlink > 0) {
7008 /*
7009 * Failed to do the truncate, remove us from the in memory
7010 * orphan list.
7011 */
7012 ret = btrfs_orphan_del(NULL, inode);
7013 } 7053 }
7014 7054
7015 if (trans) { 7055 if (trans) {
@@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
7531 */ 7571 */
7532int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) 7572int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7533{ 7573{
7534 struct list_head *head = &root->fs_info->delalloc_inodes;
7535 struct btrfs_inode *binode; 7574 struct btrfs_inode *binode;
7536 struct inode *inode; 7575 struct inode *inode;
7537 struct btrfs_delalloc_work *work, *next; 7576 struct btrfs_delalloc_work *work, *next;
7538 struct list_head works; 7577 struct list_head works;
7578 struct list_head splice;
7539 int ret = 0; 7579 int ret = 0;
7540 7580
7541 if (root->fs_info->sb->s_flags & MS_RDONLY) 7581 if (root->fs_info->sb->s_flags & MS_RDONLY)
7542 return -EROFS; 7582 return -EROFS;
7543 7583
7544 INIT_LIST_HEAD(&works); 7584 INIT_LIST_HEAD(&works);
7545 7585 INIT_LIST_HEAD(&splice);
7586again:
7546 spin_lock(&root->fs_info->delalloc_lock); 7587 spin_lock(&root->fs_info->delalloc_lock);
7547 while (!list_empty(head)) { 7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7548 binode = list_entry(head->next, struct btrfs_inode, 7589 while (!list_empty(&splice)) {
7590 binode = list_entry(splice.next, struct btrfs_inode,
7549 delalloc_inodes); 7591 delalloc_inodes);
7592
7593 list_del_init(&binode->delalloc_inodes);
7594
7550 inode = igrab(&binode->vfs_inode); 7595 inode = igrab(&binode->vfs_inode);
7551 if (!inode) 7596 if (!inode)
7552 list_del_init(&binode->delalloc_inodes); 7597 continue;
7598
7599 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes);
7553 spin_unlock(&root->fs_info->delalloc_lock); 7601 spin_unlock(&root->fs_info->delalloc_lock);
7554 if (inode) { 7602
7555 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 7603 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
7556 if (!work) { 7604 if (unlikely(!work)) {
7557 ret = -ENOMEM; 7605 ret = -ENOMEM;
7558 goto out; 7606 goto out;
7559 }
7560 list_add_tail(&work->list, &works);
7561 btrfs_queue_worker(&root->fs_info->flush_workers,
7562 &work->work);
7563 } 7607 }
7608 list_add_tail(&work->list, &works);
7609 btrfs_queue_worker(&root->fs_info->flush_workers,
7610 &work->work);
7611
7564 cond_resched(); 7612 cond_resched();
7565 spin_lock(&root->fs_info->delalloc_lock); 7613 spin_lock(&root->fs_info->delalloc_lock);
7566 } 7614 }
7567 spin_unlock(&root->fs_info->delalloc_lock); 7615 spin_unlock(&root->fs_info->delalloc_lock);
7568 7616
7617 list_for_each_entry_safe(work, next, &works, list) {
7618 list_del_init(&work->list);
7619 btrfs_wait_and_free_delalloc_work(work);
7620 }
7621
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7569 /* the filemap_flush will queue IO into the worker threads, but 7629 /* the filemap_flush will queue IO into the worker threads, but
7570 * we have to make sure the IO is actually started and that 7630 * we have to make sure the IO is actually started and that
7571 * ordered extents get created before we return 7631 * ordered extents get created before we return
@@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7578 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 7638 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
7579 } 7639 }
7580 atomic_dec(&root->fs_info->async_submit_draining); 7640 atomic_dec(&root->fs_info->async_submit_draining);
7641 return 0;
7581out: 7642out:
7582 list_for_each_entry_safe(work, next, &works, list) { 7643 list_for_each_entry_safe(work, next, &works, list) {
7583 list_del_init(&work->list); 7644 list_del_init(&work->list);
7584 btrfs_wait_and_free_delalloc_work(work); 7645 btrfs_wait_and_free_delalloc_work(work);
7585 } 7646 }
7647
7648 if (!list_empty_careful(&splice)) {
7649 spin_lock(&root->fs_info->delalloc_lock);
7650 list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
7651 spin_unlock(&root->fs_info->delalloc_lock);
7652 }
7586 return ret; 7653 return ret;
7587} 7654}
7588 7655
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f05..c3f09f71bedd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -152,7 +152,7 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
152 152
153static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 153static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
154{ 154{
155 struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode); 155 struct btrfs_inode *ip = BTRFS_I(file_inode(file));
156 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 156 unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
157 157
158 if (copy_to_user(arg, &flags, sizeof(flags))) 158 if (copy_to_user(arg, &flags, sizeof(flags)))
@@ -177,7 +177,7 @@ static int check_flags(unsigned int flags)
177 177
178static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 178static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
179{ 179{
180 struct inode *inode = file->f_path.dentry->d_inode; 180 struct inode *inode = file_inode(file);
181 struct btrfs_inode *ip = BTRFS_I(inode); 181 struct btrfs_inode *ip = BTRFS_I(inode);
182 struct btrfs_root *root = ip->root; 182 struct btrfs_root *root = ip->root;
183 struct btrfs_trans_handle *trans; 183 struct btrfs_trans_handle *trans;
@@ -310,7 +310,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
310 310
311static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 311static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
312{ 312{
313 struct inode *inode = file->f_path.dentry->d_inode; 313 struct inode *inode = file_inode(file);
314 314
315 return put_user(inode->i_generation, arg); 315 return put_user(inode->i_generation, arg);
316} 316}
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
515 515
516 BUG_ON(ret); 516 BUG_ON(ret);
517 517
518 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
519fail: 518fail:
520 if (async_transid) { 519 if (async_transid) {
521 *async_transid = trans->transid; 520 *async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
525 } 524 }
526 if (err && !ret) 525 if (err && !ret)
527 ret = err; 526 ret = err;
527
528 if (!ret)
529 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
530
528 return ret; 531 return ret;
529} 532}
530 533
@@ -1317,7 +1320,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1317 u64 new_size; 1320 u64 new_size;
1318 u64 old_size; 1321 u64 old_size;
1319 u64 devid = 1; 1322 u64 devid = 1;
1320 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 1323 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
1321 struct btrfs_ioctl_vol_args *vol_args; 1324 struct btrfs_ioctl_vol_args *vol_args;
1322 struct btrfs_trans_handle *trans; 1325 struct btrfs_trans_handle *trans;
1323 struct btrfs_device *device = NULL; 1326 struct btrfs_device *device = NULL;
@@ -1339,7 +1342,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1339 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1342 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1340 1)) { 1343 1)) {
1341 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 1344 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
1342 return -EINPROGRESS; 1345 mnt_drop_write_file(file);
1346 return -EINVAL;
1343 } 1347 }
1344 1348
1345 mutex_lock(&root->fs_info->volume_mutex); 1349 mutex_lock(&root->fs_info->volume_mutex);
@@ -1362,6 +1366,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1362 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1366 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1363 (unsigned long long)devid); 1367 (unsigned long long)devid);
1364 } 1368 }
1369
1365 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1370 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1366 if (!device) { 1371 if (!device) {
1367 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1372 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@@ -1369,9 +1374,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1369 ret = -EINVAL; 1374 ret = -EINVAL;
1370 goto out_free; 1375 goto out_free;
1371 } 1376 }
1372 if (device->fs_devices && device->fs_devices->seeding) { 1377
1378 if (!device->writeable) {
1373 printk(KERN_INFO "btrfs: resizer unable to apply on " 1379 printk(KERN_INFO "btrfs: resizer unable to apply on "
1374 "seeding device %llu\n", 1380 "readonly device %llu\n",
1375 (unsigned long long)devid); 1381 (unsigned long long)devid);
1376 ret = -EINVAL; 1382 ret = -EINVAL;
1377 goto out_free; 1383 goto out_free;
@@ -1443,8 +1449,8 @@ out_free:
1443 kfree(vol_args); 1449 kfree(vol_args);
1444out: 1450out:
1445 mutex_unlock(&root->fs_info->volume_mutex); 1451 mutex_unlock(&root->fs_info->volume_mutex);
1446 mnt_drop_write_file(file);
1447 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1452 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
1453 mnt_drop_write_file(file);
1448 return ret; 1454 return ret;
1449} 1455}
1450 1456
@@ -1483,8 +1489,8 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1483 goto out_drop_write; 1489 goto out_drop_write;
1484 } 1490 }
1485 1491
1486 src_inode = src.file->f_path.dentry->d_inode; 1492 src_inode = file_inode(src.file);
1487 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1493 if (src_inode->i_sb != file_inode(file)->i_sb) {
1488 printk(KERN_INFO "btrfs: Snapshot src from " 1494 printk(KERN_INFO "btrfs: Snapshot src from "
1489 "another FS\n"); 1495 "another FS\n");
1490 ret = -EINVAL; 1496 ret = -EINVAL;
@@ -1576,7 +1582,7 @@ out:
1576static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1582static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1577 void __user *arg) 1583 void __user *arg)
1578{ 1584{
1579 struct inode *inode = fdentry(file)->d_inode; 1585 struct inode *inode = file_inode(file);
1580 struct btrfs_root *root = BTRFS_I(inode)->root; 1586 struct btrfs_root *root = BTRFS_I(inode)->root;
1581 int ret = 0; 1587 int ret = 0;
1582 u64 flags = 0; 1588 u64 flags = 0;
@@ -1598,7 +1604,7 @@ static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
1598static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1604static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1599 void __user *arg) 1605 void __user *arg)
1600{ 1606{
1601 struct inode *inode = fdentry(file)->d_inode; 1607 struct inode *inode = file_inode(file);
1602 struct btrfs_root *root = BTRFS_I(inode)->root; 1608 struct btrfs_root *root = BTRFS_I(inode)->root;
1603 struct btrfs_trans_handle *trans; 1609 struct btrfs_trans_handle *trans;
1604 u64 root_flags; 1610 u64 root_flags;
@@ -1892,7 +1898,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
1892 if (IS_ERR(args)) 1898 if (IS_ERR(args))
1893 return PTR_ERR(args); 1899 return PTR_ERR(args);
1894 1900
1895 inode = fdentry(file)->d_inode; 1901 inode = file_inode(file);
1896 ret = search_ioctl(inode, args); 1902 ret = search_ioctl(inode, args);
1897 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1903 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1898 ret = -EFAULT; 1904 ret = -EFAULT;
@@ -2002,7 +2008,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
2002 if (IS_ERR(args)) 2008 if (IS_ERR(args))
2003 return PTR_ERR(args); 2009 return PTR_ERR(args);
2004 2010
2005 inode = fdentry(file)->d_inode; 2011 inode = file_inode(file);
2006 2012
2007 if (args->treeid == 0) 2013 if (args->treeid == 0)
2008 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2014 args->treeid = BTRFS_I(inode)->root->root_key.objectid;
@@ -2095,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2095 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2101 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2096 if (err) 2102 if (err)
2097 goto out_dput; 2103 goto out_dput;
2098
2099 /* check if subvolume may be deleted by a non-root user */
2100 err = btrfs_may_delete(dir, dentry, 1);
2101 if (err)
2102 goto out_dput;
2103 } 2104 }
2104 2105
2106 /* check if subvolume may be deleted by a user */
2107 err = btrfs_may_delete(dir, dentry, 1);
2108 if (err)
2109 goto out_dput;
2110
2105 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2111 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2106 err = -EINVAL; 2112 err = -EINVAL;
2107 goto out_dput; 2113 goto out_dput;
@@ -2178,24 +2184,25 @@ out:
2178 2184
2179static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2185static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2180{ 2186{
2181 struct inode *inode = fdentry(file)->d_inode; 2187 struct inode *inode = file_inode(file);
2182 struct btrfs_root *root = BTRFS_I(inode)->root; 2188 struct btrfs_root *root = BTRFS_I(inode)->root;
2183 struct btrfs_ioctl_defrag_range_args *range; 2189 struct btrfs_ioctl_defrag_range_args *range;
2184 int ret; 2190 int ret;
2185 2191
2186 if (btrfs_root_readonly(root)) 2192 ret = mnt_want_write_file(file);
2187 return -EROFS; 2193 if (ret)
2194 return ret;
2188 2195
2189 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2196 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2190 1)) { 2197 1)) {
2191 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2198 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2192 return -EINPROGRESS; 2199 mnt_drop_write_file(file);
2200 return -EINVAL;
2193 } 2201 }
2194 ret = mnt_want_write_file(file); 2202
2195 if (ret) { 2203 if (btrfs_root_readonly(root)) {
2196 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 2204 ret = -EROFS;
2197 0); 2205 goto out;
2198 return ret;
2199 } 2206 }
2200 2207
2201 switch (inode->i_mode & S_IFMT) { 2208 switch (inode->i_mode & S_IFMT) {
@@ -2237,7 +2244,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2237 /* the rest are all set to zero by kzalloc */ 2244 /* the rest are all set to zero by kzalloc */
2238 range->len = (u64)-1; 2245 range->len = (u64)-1;
2239 } 2246 }
2240 ret = btrfs_defrag_file(fdentry(file)->d_inode, file, 2247 ret = btrfs_defrag_file(file_inode(file), file,
2241 range, 0, 0); 2248 range, 0, 0);
2242 if (ret > 0) 2249 if (ret > 0)
2243 ret = 0; 2250 ret = 0;
@@ -2247,8 +2254,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2247 ret = -EINVAL; 2254 ret = -EINVAL;
2248 } 2255 }
2249out: 2256out:
2250 mnt_drop_write_file(file);
2251 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2257 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2258 mnt_drop_write_file(file);
2252 return ret; 2259 return ret;
2253} 2260}
2254 2261
@@ -2263,7 +2270,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2263 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2270 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2264 1)) { 2271 1)) {
2265 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2272 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2266 return -EINPROGRESS; 2273 return -EINVAL;
2267 } 2274 }
2268 2275
2269 mutex_lock(&root->fs_info->volume_mutex); 2276 mutex_lock(&root->fs_info->volume_mutex);
@@ -2285,7 +2292,7 @@ out:
2285 2292
2286static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 2293static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2287{ 2294{
2288 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 2295 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
2289 struct btrfs_ioctl_vol_args *vol_args; 2296 struct btrfs_ioctl_vol_args *vol_args;
2290 int ret; 2297 int ret;
2291 2298
@@ -2300,7 +2307,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2300 1)) { 2307 1)) {
2301 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2308 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2302 mnt_drop_write_file(file); 2309 mnt_drop_write_file(file);
2303 return -EINPROGRESS; 2310 return -EINVAL;
2304 } 2311 }
2305 2312
2306 mutex_lock(&root->fs_info->volume_mutex); 2313 mutex_lock(&root->fs_info->volume_mutex);
@@ -2316,8 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2316 kfree(vol_args); 2323 kfree(vol_args);
2317out: 2324out:
2318 mutex_unlock(&root->fs_info->volume_mutex); 2325 mutex_unlock(&root->fs_info->volume_mutex);
2319 mnt_drop_write_file(file);
2320 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2326 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2327 mnt_drop_write_file(file);
2321 return ret; 2328 return ret;
2322} 2329}
2323 2330
@@ -2408,7 +2415,7 @@ out:
2408static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2415static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2409 u64 off, u64 olen, u64 destoff) 2416 u64 off, u64 olen, u64 destoff)
2410{ 2417{
2411 struct inode *inode = fdentry(file)->d_inode; 2418 struct inode *inode = file_inode(file);
2412 struct btrfs_root *root = BTRFS_I(inode)->root; 2419 struct btrfs_root *root = BTRFS_I(inode)->root;
2413 struct fd src_file; 2420 struct fd src_file;
2414 struct inode *src; 2421 struct inode *src;
@@ -2454,7 +2461,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2454 if (src_file.file->f_path.mnt != file->f_path.mnt) 2461 if (src_file.file->f_path.mnt != file->f_path.mnt)
2455 goto out_fput; 2462 goto out_fput;
2456 2463
2457 src = src_file.file->f_dentry->d_inode; 2464 src = file_inode(src_file.file);
2458 2465
2459 ret = -EINVAL; 2466 ret = -EINVAL;
2460 if (src == inode) 2467 if (src == inode)
@@ -2816,7 +2823,7 @@ static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
2816 */ 2823 */
2817static long btrfs_ioctl_trans_start(struct file *file) 2824static long btrfs_ioctl_trans_start(struct file *file)
2818{ 2825{
2819 struct inode *inode = fdentry(file)->d_inode; 2826 struct inode *inode = file_inode(file);
2820 struct btrfs_root *root = BTRFS_I(inode)->root; 2827 struct btrfs_root *root = BTRFS_I(inode)->root;
2821 struct btrfs_trans_handle *trans; 2828 struct btrfs_trans_handle *trans;
2822 int ret; 2829 int ret;
@@ -2856,7 +2863,7 @@ out:
2856 2863
2857static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 2864static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
2858{ 2865{
2859 struct inode *inode = fdentry(file)->d_inode; 2866 struct inode *inode = file_inode(file);
2860 struct btrfs_root *root = BTRFS_I(inode)->root; 2867 struct btrfs_root *root = BTRFS_I(inode)->root;
2861 struct btrfs_root *new_root; 2868 struct btrfs_root *new_root;
2862 struct btrfs_dir_item *di; 2869 struct btrfs_dir_item *di;
@@ -3080,7 +3087,7 @@ out:
3080 */ 3087 */
3081long btrfs_ioctl_trans_end(struct file *file) 3088long btrfs_ioctl_trans_end(struct file *file)
3082{ 3089{
3083 struct inode *inode = fdentry(file)->d_inode; 3090 struct inode *inode = file_inode(file);
3084 struct btrfs_root *root = BTRFS_I(inode)->root; 3091 struct btrfs_root *root = BTRFS_I(inode)->root;
3085 struct btrfs_trans_handle *trans; 3092 struct btrfs_trans_handle *trans;
3086 3093
@@ -3142,7 +3149,7 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root,
3142 3149
3143static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 3150static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
3144{ 3151{
3145 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3152 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3146 struct btrfs_ioctl_scrub_args *sa; 3153 struct btrfs_ioctl_scrub_args *sa;
3147 int ret; 3154 int ret;
3148 3155
@@ -3433,12 +3440,12 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3433 3440
3434static long btrfs_ioctl_balance(struct file *file, void __user *arg) 3441static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3435{ 3442{
3436 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3443 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3437 struct btrfs_fs_info *fs_info = root->fs_info; 3444 struct btrfs_fs_info *fs_info = root->fs_info;
3438 struct btrfs_ioctl_balance_args *bargs; 3445 struct btrfs_ioctl_balance_args *bargs;
3439 struct btrfs_balance_control *bctl; 3446 struct btrfs_balance_control *bctl;
3447 bool need_unlock; /* for mut. excl. ops lock */
3440 int ret; 3448 int ret;
3441 int need_to_clear_lock = 0;
3442 3449
3443 if (!capable(CAP_SYS_ADMIN)) 3450 if (!capable(CAP_SYS_ADMIN))
3444 return -EPERM; 3451 return -EPERM;
@@ -3447,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3447 if (ret) 3454 if (ret)
3448 return ret; 3455 return ret;
3449 3456
3450 mutex_lock(&fs_info->volume_mutex); 3457again:
3458 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
3459 mutex_lock(&fs_info->volume_mutex);
3460 mutex_lock(&fs_info->balance_mutex);
3461 need_unlock = true;
3462 goto locked;
3463 }
3464
3465 /*
3466 * mut. excl. ops lock is locked. Three possibilites:
3467 * (1) some other op is running
3468 * (2) balance is running
3469 * (3) balance is paused -- special case (think resume)
3470 */
3451 mutex_lock(&fs_info->balance_mutex); 3471 mutex_lock(&fs_info->balance_mutex);
3472 if (fs_info->balance_ctl) {
3473 /* this is either (2) or (3) */
3474 if (!atomic_read(&fs_info->balance_running)) {
3475 mutex_unlock(&fs_info->balance_mutex);
3476 if (!mutex_trylock(&fs_info->volume_mutex))
3477 goto again;
3478 mutex_lock(&fs_info->balance_mutex);
3479
3480 if (fs_info->balance_ctl &&
3481 !atomic_read(&fs_info->balance_running)) {
3482 /* this is (3) */
3483 need_unlock = false;
3484 goto locked;
3485 }
3486
3487 mutex_unlock(&fs_info->balance_mutex);
3488 mutex_unlock(&fs_info->volume_mutex);
3489 goto again;
3490 } else {
3491 /* this is (2) */
3492 mutex_unlock(&fs_info->balance_mutex);
3493 ret = -EINPROGRESS;
3494 goto out;
3495 }
3496 } else {
3497 /* this is (1) */
3498 mutex_unlock(&fs_info->balance_mutex);
3499 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3500 ret = -EINVAL;
3501 goto out;
3502 }
3503
3504locked:
3505 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
3452 3506
3453 if (arg) { 3507 if (arg) {
3454 bargs = memdup_user(arg, sizeof(*bargs)); 3508 bargs = memdup_user(arg, sizeof(*bargs));
3455 if (IS_ERR(bargs)) { 3509 if (IS_ERR(bargs)) {
3456 ret = PTR_ERR(bargs); 3510 ret = PTR_ERR(bargs);
3457 goto out; 3511 goto out_unlock;
3458 } 3512 }
3459 3513
3460 if (bargs->flags & BTRFS_BALANCE_RESUME) { 3514 if (bargs->flags & BTRFS_BALANCE_RESUME) {
@@ -3474,13 +3528,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3474 bargs = NULL; 3528 bargs = NULL;
3475 } 3529 }
3476 3530
3477 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 3531 if (fs_info->balance_ctl) {
3478 1)) {
3479 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3480 ret = -EINPROGRESS; 3532 ret = -EINPROGRESS;
3481 goto out_bargs; 3533 goto out_bargs;
3482 } 3534 }
3483 need_to_clear_lock = 1;
3484 3535
3485 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 3536 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3486 if (!bctl) { 3537 if (!bctl) {
@@ -3501,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3501 } 3552 }
3502 3553
3503do_balance: 3554do_balance:
3504 ret = btrfs_balance(bctl, bargs);
3505 /* 3555 /*
3506 * bctl is freed in __cancel_balance or in free_fs_info if 3556 * Ownership of bctl and mutually_exclusive_operation_running
3507 * restriper was paused all the way until unmount 3557 * goes to to btrfs_balance. bctl is freed in __cancel_balance,
3558 * or, if restriper was paused all the way until unmount, in
3559 * free_fs_info. mutually_exclusive_operation_running is
3560 * cleared in __cancel_balance.
3508 */ 3561 */
3562 need_unlock = false;
3563
3564 ret = btrfs_balance(bctl, bargs);
3565
3509 if (arg) { 3566 if (arg) {
3510 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3567 if (copy_to_user(arg, bargs, sizeof(*bargs)))
3511 ret = -EFAULT; 3568 ret = -EFAULT;
@@ -3513,12 +3570,12 @@ do_balance:
3513 3570
3514out_bargs: 3571out_bargs:
3515 kfree(bargs); 3572 kfree(bargs);
3516out: 3573out_unlock:
3517 if (need_to_clear_lock)
3518 atomic_set(&root->fs_info->mutually_exclusive_operation_running,
3519 0);
3520 mutex_unlock(&fs_info->balance_mutex); 3574 mutex_unlock(&fs_info->balance_mutex);
3521 mutex_unlock(&fs_info->volume_mutex); 3575 mutex_unlock(&fs_info->volume_mutex);
3576 if (need_unlock)
3577 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3578out:
3522 mnt_drop_write_file(file); 3579 mnt_drop_write_file(file);
3523 return ret; 3580 return ret;
3524} 3581}
@@ -3573,7 +3630,7 @@ out:
3573 3630
3574static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 3631static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3575{ 3632{
3576 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3633 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3577 struct btrfs_ioctl_quota_ctl_args *sa; 3634 struct btrfs_ioctl_quota_ctl_args *sa;
3578 struct btrfs_trans_handle *trans = NULL; 3635 struct btrfs_trans_handle *trans = NULL;
3579 int ret; 3636 int ret;
@@ -3632,7 +3689,7 @@ drop_write:
3632 3689
3633static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 3690static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
3634{ 3691{
3635 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3692 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3636 struct btrfs_ioctl_qgroup_assign_args *sa; 3693 struct btrfs_ioctl_qgroup_assign_args *sa;
3637 struct btrfs_trans_handle *trans; 3694 struct btrfs_trans_handle *trans;
3638 int ret; 3695 int ret;
@@ -3679,7 +3736,7 @@ drop_write:
3679 3736
3680static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 3737static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
3681{ 3738{
3682 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3739 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3683 struct btrfs_ioctl_qgroup_create_args *sa; 3740 struct btrfs_ioctl_qgroup_create_args *sa;
3684 struct btrfs_trans_handle *trans; 3741 struct btrfs_trans_handle *trans;
3685 int ret; 3742 int ret;
@@ -3698,6 +3755,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
3698 goto drop_write; 3755 goto drop_write;
3699 } 3756 }
3700 3757
3758 if (!sa->qgroupid) {
3759 ret = -EINVAL;
3760 goto out;
3761 }
3762
3701 trans = btrfs_join_transaction(root); 3763 trans = btrfs_join_transaction(root);
3702 if (IS_ERR(trans)) { 3764 if (IS_ERR(trans)) {
3703 ret = PTR_ERR(trans); 3765 ret = PTR_ERR(trans);
@@ -3725,7 +3787,7 @@ drop_write:
3725 3787
3726static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 3788static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
3727{ 3789{
3728 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3790 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3729 struct btrfs_ioctl_qgroup_limit_args *sa; 3791 struct btrfs_ioctl_qgroup_limit_args *sa;
3730 struct btrfs_trans_handle *trans; 3792 struct btrfs_trans_handle *trans;
3731 int ret; 3793 int ret;
@@ -3775,7 +3837,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
3775 void __user *arg) 3837 void __user *arg)
3776{ 3838{
3777 struct btrfs_ioctl_received_subvol_args *sa = NULL; 3839 struct btrfs_ioctl_received_subvol_args *sa = NULL;
3778 struct inode *inode = fdentry(file)->d_inode; 3840 struct inode *inode = file_inode(file);
3779 struct btrfs_root *root = BTRFS_I(inode)->root; 3841 struct btrfs_root *root = BTRFS_I(inode)->root;
3780 struct btrfs_root_item *root_item = &root->root_item; 3842 struct btrfs_root_item *root_item = &root->root_item;
3781 struct btrfs_trans_handle *trans; 3843 struct btrfs_trans_handle *trans;
@@ -3855,7 +3917,7 @@ out:
3855long btrfs_ioctl(struct file *file, unsigned int 3917long btrfs_ioctl(struct file *file, unsigned int
3856 cmd, unsigned long arg) 3918 cmd, unsigned long arg)
3857{ 3919{
3858 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 3920 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
3859 void __user *argp = (void __user *)arg; 3921 void __user *argp = (void __user *)arg;
3860 3922
3861 switch (cmd) { 3923 switch (cmd) {
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
836 * if the disk i_size is already at the inode->i_size, or 836 * if the disk i_size is already at the inode->i_size, or
837 * this ordered extent is inside the disk i_size, we're done 837 * this ordered extent is inside the disk i_size, we're done
838 */ 838 */
839 if (disk_i_size == i_size || offset <= disk_i_size) { 839 if (disk_i_size == i_size)
840 goto out;
841
842 /*
843 * We still need to update disk_i_size if outstanding_isize is greater
844 * than disk_i_size.
845 */
846 if (offset <= disk_i_size &&
847 (!ordered || ordered->outstanding_isize <= disk_i_size))
840 goto out; 848 goto out;
841 }
842 849
843 /* 850 /*
844 * walk backward from this ordered extent to disk_i_size. 851 * walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
870 break; 877 break;
871 if (test->file_offset >= i_size) 878 if (test->file_offset >= i_size)
872 break; 879 break;
873 if (test->file_offset >= disk_i_size) { 880 if (entry_end(test) > disk_i_size) {
874 /* 881 /*
875 * we don't update disk_i_size now, so record this 882 * we don't update disk_i_size now, so record this
876 * undealt i_size. Or we will not know the real 883 * undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe9d02c45f8e..a5c856234323 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -379,6 +379,13 @@ next1:
379 379
380 ret = add_relation_rb(fs_info, found_key.objectid, 380 ret = add_relation_rb(fs_info, found_key.objectid,
381 found_key.offset); 381 found_key.offset);
382 if (ret == -ENOENT) {
383 printk(KERN_WARNING
384 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
385 (unsigned long long)found_key.objectid,
386 (unsigned long long)found_key.offset);
387 ret = 0; /* ignore the error */
388 }
382 if (ret) 389 if (ret)
383 goto out; 390 goto out;
384next2: 391next2:
@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
956 struct btrfs_fs_info *fs_info, u64 qgroupid) 963 struct btrfs_fs_info *fs_info, u64 qgroupid)
957{ 964{
958 struct btrfs_root *quota_root; 965 struct btrfs_root *quota_root;
966 struct btrfs_qgroup *qgroup;
959 int ret = 0; 967 int ret = 0;
960 968
961 quota_root = fs_info->quota_root; 969 quota_root = fs_info->quota_root;
962 if (!quota_root) 970 if (!quota_root)
963 return -EINVAL; 971 return -EINVAL;
964 972
973 /* check if there are no relations to this qgroup */
974 spin_lock(&fs_info->qgroup_lock);
975 qgroup = find_qgroup_rb(fs_info, qgroupid);
976 if (qgroup) {
977 if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
978 spin_unlock(&fs_info->qgroup_lock);
979 return -EBUSY;
980 }
981 }
982 spin_unlock(&fs_info->qgroup_lock);
983
965 ret = del_qgroup_item(trans, quota_root, qgroupid); 984 ret = del_qgroup_item(trans, quota_root, qgroupid);
966 985
967 spin_lock(&fs_info->qgroup_lock); 986 spin_lock(&fs_info->qgroup_lock);
968 del_qgroup_rb(quota_root->fs_info, qgroupid); 987 del_qgroup_rb(quota_root->fs_info, qgroupid);
969
970 spin_unlock(&fs_info->qgroup_lock); 988 spin_unlock(&fs_info->qgroup_lock);
971 989
972 return ret; 990 return ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 300e09ac3659..17c306bf177a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3472,7 +3472,7 @@ out:
3472} 3472}
3473 3473
3474/* 3474/*
3475 * hepler to find all tree blocks that reference a given data extent 3475 * helper to find all tree blocks that reference a given data extent
3476 */ 3476 */
3477static noinline_for_stack 3477static noinline_for_stack
3478int add_data_references(struct reloc_control *rc, 3478int add_data_references(struct reloc_control *rc,
@@ -3566,7 +3566,7 @@ int add_data_references(struct reloc_control *rc,
3566} 3566}
3567 3567
3568/* 3568/*
3569 * hepler to find next unprocessed extent 3569 * helper to find next unprocessed extent
3570 */ 3570 */
3571static noinline_for_stack 3571static noinline_for_stack
3572int find_next_extent(struct btrfs_trans_handle *trans, 3572int find_next_extent(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
580 int corrected = 0; 580 int corrected = 0;
581 struct btrfs_key key; 581 struct btrfs_key key;
582 struct inode *inode = NULL; 582 struct inode *inode = NULL;
583 struct btrfs_fs_info *fs_info;
583 u64 end = offset + PAGE_SIZE - 1; 584 u64 end = offset + PAGE_SIZE - 1;
584 struct btrfs_root *local_root; 585 struct btrfs_root *local_root;
586 int srcu_index;
585 587
586 key.objectid = root; 588 key.objectid = root;
587 key.type = BTRFS_ROOT_ITEM_KEY; 589 key.type = BTRFS_ROOT_ITEM_KEY;
588 key.offset = (u64)-1; 590 key.offset = (u64)-1;
589 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); 591
590 if (IS_ERR(local_root)) 592 fs_info = fixup->root->fs_info;
593 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
594
595 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
596 if (IS_ERR(local_root)) {
597 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
591 return PTR_ERR(local_root); 598 return PTR_ERR(local_root);
599 }
592 600
593 key.type = BTRFS_INODE_ITEM_KEY; 601 key.type = BTRFS_INODE_ITEM_KEY;
594 key.objectid = inum; 602 key.objectid = inum;
595 key.offset = 0; 603 key.offset = 0;
596 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); 604 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
605 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
597 if (IS_ERR(inode)) 606 if (IS_ERR(inode))
598 return PTR_ERR(inode); 607 return PTR_ERR(inode);
599 608
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
606 } 615 }
607 616
608 if (PageUptodate(page)) { 617 if (PageUptodate(page)) {
609 struct btrfs_fs_info *fs_info;
610 if (PageDirty(page)) { 618 if (PageDirty(page)) {
611 /* 619 /*
612 * we need to write the data to the defect sector. the 620 * we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3180 u64 physical_for_dev_replace; 3188 u64 physical_for_dev_replace;
3181 u64 len; 3189 u64 len;
3182 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3190 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3191 int srcu_index;
3183 3192
3184 key.objectid = root; 3193 key.objectid = root;
3185 key.type = BTRFS_ROOT_ITEM_KEY; 3194 key.type = BTRFS_ROOT_ITEM_KEY;
3186 key.offset = (u64)-1; 3195 key.offset = (u64)-1;
3196
3197 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3198
3187 local_root = btrfs_read_fs_root_no_name(fs_info, &key); 3199 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3188 if (IS_ERR(local_root)) 3200 if (IS_ERR(local_root)) {
3201 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3189 return PTR_ERR(local_root); 3202 return PTR_ERR(local_root);
3203 }
3190 3204
3191 key.type = BTRFS_INODE_ITEM_KEY; 3205 key.type = BTRFS_INODE_ITEM_KEY;
3192 key.objectid = inum; 3206 key.objectid = inum;
3193 key.offset = 0; 3207 key.offset = 0;
3194 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); 3208 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3209 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3195 if (IS_ERR(inode)) 3210 if (IS_ERR(inode))
3196 return PTR_ERR(inode); 3211 return PTR_ERR(inode);
3197 3212
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad40..f4ab7a9260eb 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
1814 (unsigned long)nce->ino); 1814 (unsigned long)nce->ino);
1815 if (!nce_head) { 1815 if (!nce_head) {
1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1817 if (!nce_head) 1817 if (!nce_head) {
1818 kfree(nce);
1818 return -ENOMEM; 1819 return -ENOMEM;
1820 }
1819 INIT_LIST_HEAD(nce_head); 1821 INIT_LIST_HEAD(nce_head);
1820 1822
1821 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1823 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
@@ -4542,7 +4544,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4542 if (!capable(CAP_SYS_ADMIN)) 4544 if (!capable(CAP_SYS_ADMIN))
4543 return -EPERM; 4545 return -EPERM;
4544 4546
4545 send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root; 4547 send_root = BTRFS_I(file_inode(mnt_file))->root;
4546 fs_info = send_root->fs_info; 4548 fs_info = send_root->fs_info;
4547 4549
4548 arg = memdup_user(arg_, sizeof(*arg)); 4550 arg = memdup_user(arg_, sizeof(*arg));
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 99545df1b86c..d8982e9601d3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
267 function, line, errstr); 267 function, line, errstr);
268 return; 268 return;
269 } 269 }
270 trans->transaction->aborted = errno; 270 ACCESS_ONCE(trans->transaction->aborted) = errno;
271 __btrfs_std_error(root->fs_info, function, line, errno, NULL); 271 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
272} 272}
273/* 273/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87fac9a21ea5..4c0067c4f76d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -112,7 +112,6 @@ loop:
112 * to redo the trans_no_join checks above 112 * to redo the trans_no_join checks above
113 */ 113 */
114 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 114 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
115 cur_trans = fs_info->running_transaction;
116 goto loop; 115 goto loop;
117 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 116 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
118 spin_unlock(&fs_info->trans_lock); 117 spin_unlock(&fs_info->trans_lock);
@@ -333,12 +332,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
333 &root->fs_info->trans_block_rsv, 332 &root->fs_info->trans_block_rsv,
334 num_bytes, flush); 333 num_bytes, flush);
335 if (ret) 334 if (ret)
336 return ERR_PTR(ret); 335 goto reserve_fail;
337 } 336 }
338again: 337again:
339 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 338 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
340 if (!h) 339 if (!h) {
341 return ERR_PTR(-ENOMEM); 340 ret = -ENOMEM;
341 goto alloc_fail;
342 }
342 343
343 /* 344 /*
344 * If we are JOIN_NOLOCK we're already committing a transaction and 345 * If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +366,7 @@ again:
365 if (ret < 0) { 366 if (ret < 0) {
366 /* We must get the transaction if we are JOIN_NOLOCK. */ 367 /* We must get the transaction if we are JOIN_NOLOCK. */
367 BUG_ON(type == TRANS_JOIN_NOLOCK); 368 BUG_ON(type == TRANS_JOIN_NOLOCK);
368 369 goto join_fail;
369 if (type < TRANS_JOIN_NOLOCK)
370 sb_end_intwrite(root->fs_info->sb);
371 kmem_cache_free(btrfs_trans_handle_cachep, h);
372 return ERR_PTR(ret);
373 } 370 }
374 371
375 cur_trans = root->fs_info->running_transaction; 372 cur_trans = root->fs_info->running_transaction;
@@ -410,6 +407,19 @@ got_it:
410 if (!current->journal_info && type != TRANS_USERSPACE) 407 if (!current->journal_info && type != TRANS_USERSPACE)
411 current->journal_info = h; 408 current->journal_info = h;
412 return h; 409 return h;
410
411join_fail:
412 if (type < TRANS_JOIN_NOLOCK)
413 sb_end_intwrite(root->fs_info->sb);
414 kmem_cache_free(btrfs_trans_handle_cachep, h);
415alloc_fail:
416 if (num_bytes)
417 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
418 num_bytes);
419reserve_fail:
420 if (qgroup_reserved)
421 btrfs_qgroup_free(root, qgroup_reserved);
422 return ERR_PTR(ret);
413} 423}
414 424
415struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 425struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
@@ -1468,7 +1478,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1468 goto cleanup_transaction; 1478 goto cleanup_transaction;
1469 } 1479 }
1470 1480
1471 if (cur_trans->aborted) { 1481 /* Stop the commit early if ->aborted is set */
1482 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1472 ret = cur_trans->aborted; 1483 ret = cur_trans->aborted;
1473 goto cleanup_transaction; 1484 goto cleanup_transaction;
1474 } 1485 }
@@ -1574,6 +1585,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1574 wait_event(cur_trans->writer_wait, 1585 wait_event(cur_trans->writer_wait,
1575 atomic_read(&cur_trans->num_writers) == 1); 1586 atomic_read(&cur_trans->num_writers) == 1);
1576 1587
1588 /* ->aborted might be set after the previous check, so check it */
1589 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1590 ret = cur_trans->aborted;
1591 goto cleanup_transaction;
1592 }
1577 /* 1593 /*
1578 * the reloc mutex makes sure that we stop 1594 * the reloc mutex makes sure that we stop
1579 * the balancing code from coming in and moving 1595 * the balancing code from coming in and moving
@@ -1657,6 +1673,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1657 goto cleanup_transaction; 1673 goto cleanup_transaction;
1658 } 1674 }
1659 1675
1676 /*
1677 * The tasks which save the space cache and inode cache may also
1678 * update ->aborted, check it.
1679 */
1680 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1681 ret = cur_trans->aborted;
1682 mutex_unlock(&root->fs_info->tree_log_mutex);
1683 mutex_unlock(&root->fs_info->reloc_mutex);
1684 goto cleanup_transaction;
1685 }
1686
1660 btrfs_prepare_extent_commit(trans, root); 1687 btrfs_prepare_extent_commit(trans, root);
1661 1688
1662 cur_trans = root->fs_info->running_transaction; 1689 cur_trans = root->fs_info->running_transaction;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 83186c7e45d4..9027bb1e7466 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3357 if (skip_csum) 3357 if (skip_csum)
3358 return 0; 3358 return 0;
3359 3359
3360 if (em->compress_type) {
3361 csum_offset = 0;
3362 csum_len = block_len;
3363 }
3364
3360 /* block start is already adjusted for the file extent offset. */ 3365 /* block start is already adjusted for the file extent offset. */
3361 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3362 em->block_start + csum_offset, 3367 em->block_start + csum_offset,
@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3410 em = list_entry(extents.next, struct extent_map, list); 3415 em = list_entry(extents.next, struct extent_map, list);
3411 3416
3412 list_del_init(&em->list); 3417 list_del_init(&em->list);
3413 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
3414 3418
3415 /* 3419 /*
3416 * If we had an error we just need to delete everybody from our 3420 * If we had an error we just need to delete everybody from our
3417 * private list. 3421 * private list.
3418 */ 3422 */
3419 if (ret) { 3423 if (ret) {
3424 clear_em_logging(tree, em);
3420 free_extent_map(em); 3425 free_extent_map(em);
3421 continue; 3426 continue;
3422 } 3427 }
@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3424 write_unlock(&tree->lock); 3429 write_unlock(&tree->lock);
3425 3430
3426 ret = log_one_extent(trans, inode, root, em, path); 3431 ret = log_one_extent(trans, inode, root, em, path);
3427 free_extent_map(em);
3428 write_lock(&tree->lock); 3432 write_lock(&tree->lock);
3433 clear_em_logging(tree, em);
3434 free_extent_map(em);
3429 } 3435 }
3430 WARN_ON(!list_empty(&extents)); 3436 WARN_ON(!list_empty(&extents));
3431 write_unlock(&tree->lock); 3437 write_unlock(&tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cce6aa74012..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1431 } 1431 }
1432 } else { 1432 } else {
1433 ret = btrfs_get_bdev_and_sb(device_path, 1433 ret = btrfs_get_bdev_and_sb(device_path,
1434 FMODE_READ | FMODE_EXCL, 1434 FMODE_WRITE | FMODE_EXCL,
1435 root->fs_info->bdev_holder, 0, 1435 root->fs_info->bdev_holder, 0,
1436 &bdev, &bh); 1436 &bdev, &bh);
1437 if (ret) 1437 if (ret)
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1556 ret = 0; 1556 ret = 0;
1557 1557
1558 /* Notify udev that device has changed */ 1558 /* Notify udev that device has changed */
1559 btrfs_kobject_uevent(bdev, KOBJ_CHANGE); 1559 if (bdev)
1560 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
1560 1561
1561error_brelse: 1562error_brelse:
1562 brelse(bh); 1563 brelse(bh);
@@ -2614,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2614 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 2615 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2615 chunk_used = btrfs_block_group_used(&cache->item); 2616 chunk_used = btrfs_block_group_used(&cache->item);
2616 2617
2617 user_thresh = div_factor_fine(cache->key.offset, bargs->usage); 2618 if (bargs->usage == 0)
2619 user_thresh = 0;
2620 else if (bargs->usage > 100)
2621 user_thresh = cache->key.offset;
2622 else
2623 user_thresh = div_factor_fine(cache->key.offset,
2624 bargs->usage);
2625
2618 if (chunk_used < user_thresh) 2626 if (chunk_used < user_thresh)
2619 ret = 0; 2627 ret = 0;
2620 2628
@@ -2959,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
2959 unset_balance_control(fs_info); 2967 unset_balance_control(fs_info);
2960 ret = del_balance_item(fs_info->tree_root); 2968 ret = del_balance_item(fs_info->tree_root);
2961 BUG_ON(ret); 2969 BUG_ON(ret);
2970
2971 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
2962} 2972}
2963 2973
2964void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 2974void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -3138,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3138out: 3148out:
3139 if (bctl->flags & BTRFS_BALANCE_RESUME) 3149 if (bctl->flags & BTRFS_BALANCE_RESUME)
3140 __cancel_balance(fs_info); 3150 __cancel_balance(fs_info);
3141 else 3151 else {
3142 kfree(bctl); 3152 kfree(bctl);
3153 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3154 }
3143 return ret; 3155 return ret;
3144} 3156}
3145 3157
@@ -3156,7 +3168,6 @@ static int balance_kthread(void *data)
3156 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3168 ret = btrfs_balance(fs_info->balance_ctl, NULL);
3157 } 3169 }
3158 3170
3159 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3160 mutex_unlock(&fs_info->balance_mutex); 3171 mutex_unlock(&fs_info->balance_mutex);
3161 mutex_unlock(&fs_info->volume_mutex); 3172 mutex_unlock(&fs_info->volume_mutex);
3162 3173
@@ -3179,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
3179 return 0; 3190 return 0;
3180 } 3191 }
3181 3192
3182 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3183 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); 3193 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
3184 if (IS_ERR(tsk)) 3194 if (IS_ERR(tsk))
3185 return PTR_ERR(tsk); 3195 return PTR_ERR(tsk);
@@ -3233,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
3233 btrfs_balance_sys(leaf, item, &disk_bargs); 3243 btrfs_balance_sys(leaf, item, &disk_bargs);
3234 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 3244 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
3235 3245
3246 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3247
3236 mutex_lock(&fs_info->volume_mutex); 3248 mutex_lock(&fs_info->volume_mutex);
3237 mutex_lock(&fs_info->balance_mutex); 3249 mutex_lock(&fs_info->balance_mutex);
3238 3250
@@ -3496,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3496 { 1, 1, 2, 2, 2, 2 /* raid1 */ }, 3508 { 1, 1, 2, 2, 2, 2 /* raid1 */ },
3497 { 1, 2, 1, 1, 1, 2 /* dup */ }, 3509 { 1, 2, 1, 1, 1, 2 /* dup */ },
3498 { 1, 1, 0, 2, 1, 1 /* raid0 */ }, 3510 { 1, 1, 0, 2, 1, 1 /* raid0 */ },
3499 { 1, 1, 0, 1, 1, 1 /* single */ }, 3511 { 1, 1, 1, 1, 1, 1 /* single */ },
3500}; 3512};
3501 3513
3502static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3514static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/buffer.c b/fs/buffer.c
index c017a2dfb909..b4dcb34c9635 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
44#include <trace/events/block.h>
44 45
45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46 47
@@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
53} 54}
54EXPORT_SYMBOL(init_buffer); 55EXPORT_SYMBOL(init_buffer);
55 56
57inline void touch_buffer(struct buffer_head *bh)
58{
59 trace_block_touch_buffer(bh);
60 mark_page_accessed(bh->b_page);
61}
62EXPORT_SYMBOL(touch_buffer);
63
56static int sleep_on_buffer(void *word) 64static int sleep_on_buffer(void *word)
57{ 65{
58 io_schedule(); 66 io_schedule();
@@ -1113,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh)
1113{ 1121{
1114 WARN_ON_ONCE(!buffer_uptodate(bh)); 1122 WARN_ON_ONCE(!buffer_uptodate(bh));
1115 1123
1124 trace_block_dirty_buffer(bh);
1125
1116 /* 1126 /*
1117 * Very *carefully* optimize the it-is-already-dirty case. 1127 * Very *carefully* optimize the it-is-already-dirty case.
1118 * 1128 *
@@ -2332,7 +2342,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2332 get_block_t get_block) 2342 get_block_t get_block)
2333{ 2343{
2334 struct page *page = vmf->page; 2344 struct page *page = vmf->page;
2335 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2345 struct inode *inode = file_inode(vma->vm_file);
2336 unsigned long end; 2346 unsigned long end;
2337 loff_t size; 2347 loff_t size;
2338 int ret; 2348 int ret;
@@ -2359,7 +2369,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2359 if (unlikely(ret < 0)) 2369 if (unlikely(ret < 0))
2360 goto out_unlock; 2370 goto out_unlock;
2361 set_page_dirty(page); 2371 set_page_dirty(page);
2362 wait_on_page_writeback(page); 2372 wait_for_stable_page(page);
2363 return 0; 2373 return 0;
2364out_unlock: 2374out_unlock:
2365 unlock_page(page); 2375 unlock_page(page);
@@ -2371,7 +2381,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2371 get_block_t get_block) 2381 get_block_t get_block)
2372{ 2382{
2373 int ret; 2383 int ret;
2374 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; 2384 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
2375 2385
2376 sb_start_pagefault(sb); 2386 sb_start_pagefault(sb);
2377 2387
@@ -2935,6 +2945,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2935 void *kaddr = kmap_atomic(bh->b_page); 2945 void *kaddr = kmap_atomic(bh->b_page);
2936 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); 2946 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2937 kunmap_atomic(kaddr); 2947 kunmap_atomic(kaddr);
2948 flush_dcache_page(bh->b_page);
2938 } 2949 }
2939} 2950}
2940 2951
@@ -3226,7 +3237,7 @@ static struct kmem_cache *bh_cachep __read_mostly;
3226 * Once the number of bh's in the machine exceeds this level, we start 3237 * Once the number of bh's in the machine exceeds this level, we start
3227 * stripping them in writeback. 3238 * stripping them in writeback.
3228 */ 3239 */
3229static int max_buffer_heads; 3240static unsigned long max_buffer_heads;
3230 3241
3231int buffer_heads_over_limit; 3242int buffer_heads_over_limit;
3232 3243
@@ -3342,7 +3353,7 @@ EXPORT_SYMBOL(bh_submit_read);
3342 3353
3343void __init buffer_init(void) 3354void __init buffer_init(void)
3344{ 3355{
3345 int nrpages; 3356 unsigned long nrpages;
3346 3357
3347 bh_cachep = kmem_cache_create("buffer_head", 3358 bh_cachep = kmem_cache_create("buffer_head",
3348 sizeof(struct buffer_head), 0, 3359 sizeof(struct buffer_head), 0,
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 9eb134ea6eb2..49bc78243db9 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -1,6 +1,6 @@
1config CEPH_FS 1config CEPH_FS
2 tristate "Ceph distributed file system (EXPERIMENTAL)" 2 tristate "Ceph distributed file system"
3 depends on INET && EXPERIMENTAL 3 depends on INET
4 select CEPH_LIB 4 select CEPH_LIB
5 select LIBCRC32C 5 select LIBCRC32C
6 select CRYPTO_AES 6 select CRYPTO_AES
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 064d1a68d2c1..a60ea977af6f 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -195,7 +195,7 @@ static int ceph_releasepage(struct page *page, gfp_t g)
195 */ 195 */
196static int readpage_nounlock(struct file *filp, struct page *page) 196static int readpage_nounlock(struct file *filp, struct page *page)
197{ 197{
198 struct inode *inode = filp->f_dentry->d_inode; 198 struct inode *inode = file_inode(filp);
199 struct ceph_inode_info *ci = ceph_inode(inode); 199 struct ceph_inode_info *ci = ceph_inode(inode);
200 struct ceph_osd_client *osdc = 200 struct ceph_osd_client *osdc =
201 &ceph_inode_to_client(inode)->client->osdc; 201 &ceph_inode_to_client(inode)->client->osdc;
@@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page)
236static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) 236static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
237{ 237{
238 struct inode *inode = req->r_inode; 238 struct inode *inode = req->r_inode;
239 struct ceph_osd_reply_head *replyhead; 239 int rc = req->r_result;
240 int rc, bytes; 240 int bytes = le32_to_cpu(msg->hdr.data_len);
241 int i; 241 int i;
242 242
243 /* parse reply */
244 replyhead = msg->front.iov_base;
245 WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
246 rc = le32_to_cpu(replyhead->result);
247 bytes = le32_to_cpu(msg->hdr.data_len);
248
249 dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); 243 dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
250 244
251 /* unlock all pages, zeroing any data we didn't read */ 245 /* unlock all pages, zeroing any data we didn't read */
@@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
315 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 309 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
316 NULL, 0, 310 NULL, 0,
317 ci->i_truncate_seq, ci->i_truncate_size, 311 ci->i_truncate_seq, ci->i_truncate_size,
318 NULL, false, 1, 0); 312 NULL, false, 0);
319 if (IS_ERR(req)) 313 if (IS_ERR(req))
320 return PTR_ERR(req); 314 return PTR_ERR(req);
321 315
@@ -370,7 +364,7 @@ out:
370static int ceph_readpages(struct file *file, struct address_space *mapping, 364static int ceph_readpages(struct file *file, struct address_space *mapping,
371 struct list_head *page_list, unsigned nr_pages) 365 struct list_head *page_list, unsigned nr_pages)
372{ 366{
373 struct inode *inode = file->f_dentry->d_inode; 367 struct inode *inode = file_inode(file);
374 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 368 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
375 int rc = 0; 369 int rc = 0;
376 int max = 0; 370 int max = 0;
@@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
492 &ci->i_layout, snapc, 486 &ci->i_layout, snapc,
493 page_off, len, 487 page_off, len,
494 ci->i_truncate_seq, ci->i_truncate_size, 488 ci->i_truncate_seq, ci->i_truncate_size,
495 &inode->i_mtime, 489 &inode->i_mtime, &page, 1);
496 &page, 1, 0, 0, true);
497 if (err < 0) { 490 if (err < 0) {
498 dout("writepage setting page/mapping error %d %p\n", err, page); 491 dout("writepage setting page/mapping error %d %p\n", err, page);
499 SetPageError(page); 492 SetPageError(page);
@@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req,
554 struct ceph_msg *msg) 547 struct ceph_msg *msg)
555{ 548{
556 struct inode *inode = req->r_inode; 549 struct inode *inode = req->r_inode;
557 struct ceph_osd_reply_head *replyhead;
558 struct ceph_osd_op *op;
559 struct ceph_inode_info *ci = ceph_inode(inode); 550 struct ceph_inode_info *ci = ceph_inode(inode);
560 unsigned wrote; 551 unsigned wrote;
561 struct page *page; 552 struct page *page;
562 int i; 553 int i;
563 struct ceph_snap_context *snapc = req->r_snapc; 554 struct ceph_snap_context *snapc = req->r_snapc;
564 struct address_space *mapping = inode->i_mapping; 555 struct address_space *mapping = inode->i_mapping;
565 __s32 rc = -EIO; 556 int rc = req->r_result;
566 u64 bytes = 0; 557 u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
567 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 558 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
568 long writeback_stat; 559 long writeback_stat;
569 unsigned issued = ceph_caps_issued(ci); 560 unsigned issued = ceph_caps_issued(ci);
570 561
571 /* parse reply */
572 replyhead = msg->front.iov_base;
573 WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
574 op = (void *)(replyhead + 1);
575 rc = le32_to_cpu(replyhead->result);
576 bytes = le64_to_cpu(op->extent.length);
577
578 if (rc >= 0) { 562 if (rc >= 0) {
579 /* 563 /*
580 * Assume we wrote the pages we originally sent. The 564 * Assume we wrote the pages we originally sent. The
@@ -741,8 +725,6 @@ retry:
741 struct page *page; 725 struct page *page;
742 int want; 726 int want;
743 u64 offset, len; 727 u64 offset, len;
744 struct ceph_osd_request_head *reqhead;
745 struct ceph_osd_op *op;
746 long writeback_stat; 728 long writeback_stat;
747 729
748 next = 0; 730 next = 0;
@@ -838,7 +820,7 @@ get_more_pages:
838 snapc, do_sync, 820 snapc, do_sync,
839 ci->i_truncate_seq, 821 ci->i_truncate_seq,
840 ci->i_truncate_size, 822 ci->i_truncate_size,
841 &inode->i_mtime, true, 1, 0); 823 &inode->i_mtime, true, 0);
842 824
843 if (IS_ERR(req)) { 825 if (IS_ERR(req)) {
844 rc = PTR_ERR(req); 826 rc = PTR_ERR(req);
@@ -906,10 +888,8 @@ get_more_pages:
906 888
907 /* revise final length, page count */ 889 /* revise final length, page count */
908 req->r_num_pages = locked_pages; 890 req->r_num_pages = locked_pages;
909 reqhead = req->r_request->front.iov_base; 891 req->r_request_ops[0].extent.length = cpu_to_le64(len);
910 op = (void *)(reqhead + 1); 892 req->r_request_ops[0].payload_len = cpu_to_le32(len);
911 op->extent.length = cpu_to_le64(len);
912 op->payload_len = cpu_to_le32(len);
913 req->r_request->hdr.data_len = cpu_to_le32(len); 893 req->r_request->hdr.data_len = cpu_to_le32(len);
914 894
915 rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); 895 rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
@@ -977,7 +957,7 @@ static int ceph_update_writeable_page(struct file *file,
977 loff_t pos, unsigned len, 957 loff_t pos, unsigned len,
978 struct page *page) 958 struct page *page)
979{ 959{
980 struct inode *inode = file->f_dentry->d_inode; 960 struct inode *inode = file_inode(file);
981 struct ceph_inode_info *ci = ceph_inode(inode); 961 struct ceph_inode_info *ci = ceph_inode(inode);
982 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 962 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
983 loff_t page_off = pos & PAGE_CACHE_MASK; 963 loff_t page_off = pos & PAGE_CACHE_MASK;
@@ -1086,7 +1066,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
1086 loff_t pos, unsigned len, unsigned flags, 1066 loff_t pos, unsigned len, unsigned flags,
1087 struct page **pagep, void **fsdata) 1067 struct page **pagep, void **fsdata)
1088{ 1068{
1089 struct inode *inode = file->f_dentry->d_inode; 1069 struct inode *inode = file_inode(file);
1090 struct ceph_inode_info *ci = ceph_inode(inode); 1070 struct ceph_inode_info *ci = ceph_inode(inode);
1091 struct ceph_file_info *fi = file->private_data; 1071 struct ceph_file_info *fi = file->private_data;
1092 struct page *page; 1072 struct page *page;
@@ -1144,7 +1124,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
1144 loff_t pos, unsigned len, unsigned copied, 1124 loff_t pos, unsigned len, unsigned copied,
1145 struct page *page, void *fsdata) 1125 struct page *page, void *fsdata)
1146{ 1126{
1147 struct inode *inode = file->f_dentry->d_inode; 1127 struct inode *inode = file_inode(file);
1148 struct ceph_inode_info *ci = ceph_inode(inode); 1128 struct ceph_inode_info *ci = ceph_inode(inode);
1149 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 1129 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
1150 struct ceph_mds_client *mdsc = fsc->mdsc; 1130 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1228,7 +1208,7 @@ const struct address_space_operations ceph_aops = {
1228 */ 1208 */
1229static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1209static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1230{ 1210{
1231 struct inode *inode = vma->vm_file->f_dentry->d_inode; 1211 struct inode *inode = file_inode(vma->vm_file);
1232 struct page *page = vmf->page; 1212 struct page *page = vmf->page;
1233 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 1213 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
1234 loff_t off = page_offset(page); 1214 loff_t off = page_offset(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a1d9bb30c1bf..78e2f575247d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -611,8 +611,16 @@ retry:
611 611
612 if (flags & CEPH_CAP_FLAG_AUTH) 612 if (flags & CEPH_CAP_FLAG_AUTH)
613 ci->i_auth_cap = cap; 613 ci->i_auth_cap = cap;
614 else if (ci->i_auth_cap == cap) 614 else if (ci->i_auth_cap == cap) {
615 ci->i_auth_cap = NULL; 615 ci->i_auth_cap = NULL;
616 spin_lock(&mdsc->cap_dirty_lock);
617 if (!list_empty(&ci->i_dirty_item)) {
618 dout(" moving %p to cap_dirty_migrating\n", inode);
619 list_move(&ci->i_dirty_item,
620 &mdsc->cap_dirty_migrating);
621 }
622 spin_unlock(&mdsc->cap_dirty_lock);
623 }
616 624
617 dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", 625 dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
618 inode, ceph_vinop(inode), cap, ceph_cap_string(issued), 626 inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
@@ -930,7 +938,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
930 u64 size, u64 max_size, 938 u64 size, u64 max_size,
931 struct timespec *mtime, struct timespec *atime, 939 struct timespec *mtime, struct timespec *atime,
932 u64 time_warp_seq, 940 u64 time_warp_seq,
933 uid_t uid, gid_t gid, umode_t mode, 941 kuid_t uid, kgid_t gid, umode_t mode,
934 u64 xattr_version, 942 u64 xattr_version,
935 struct ceph_buffer *xattrs_buf, 943 struct ceph_buffer *xattrs_buf,
936 u64 follows) 944 u64 follows)
@@ -974,8 +982,8 @@ static int send_cap_msg(struct ceph_mds_session *session,
974 ceph_encode_timespec(&fc->atime, atime); 982 ceph_encode_timespec(&fc->atime, atime);
975 fc->time_warp_seq = cpu_to_le32(time_warp_seq); 983 fc->time_warp_seq = cpu_to_le32(time_warp_seq);
976 984
977 fc->uid = cpu_to_le32(uid); 985 fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
978 fc->gid = cpu_to_le32(gid); 986 fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
979 fc->mode = cpu_to_le32(mode); 987 fc->mode = cpu_to_le32(mode);
980 988
981 fc->xattr_version = cpu_to_le64(xattr_version); 989 fc->xattr_version = cpu_to_le64(xattr_version);
@@ -1081,8 +1089,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1081 struct timespec mtime, atime; 1089 struct timespec mtime, atime;
1082 int wake = 0; 1090 int wake = 0;
1083 umode_t mode; 1091 umode_t mode;
1084 uid_t uid; 1092 kuid_t uid;
1085 gid_t gid; 1093 kgid_t gid;
1086 struct ceph_mds_session *session; 1094 struct ceph_mds_session *session;
1087 u64 xattr_version = 0; 1095 u64 xattr_version = 0;
1088 struct ceph_buffer *xattr_blob = NULL; 1096 struct ceph_buffer *xattr_blob = NULL;
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1460 struct ceph_mds_client *mdsc = fsc->mdsc; 1468 struct ceph_mds_client *mdsc = fsc->mdsc;
1461 struct inode *inode = &ci->vfs_inode; 1469 struct inode *inode = &ci->vfs_inode;
1462 struct ceph_cap *cap; 1470 struct ceph_cap *cap;
1463 int file_wanted, used; 1471 int file_wanted, used, cap_used;
1464 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ 1472 int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
1465 int issued, implemented, want, retain, revoking, flushing = 0; 1473 int issued, implemented, want, retain, revoking, flushing = 0;
1466 int mds = -1; /* keep track of how far we've gone through i_caps list 1474 int mds = -1; /* keep track of how far we've gone through i_caps list
@@ -1563,9 +1571,14 @@ retry_locked:
1563 1571
1564 /* NOTE: no side-effects allowed, until we take s_mutex */ 1572 /* NOTE: no side-effects allowed, until we take s_mutex */
1565 1573
1574 cap_used = used;
1575 if (ci->i_auth_cap && cap != ci->i_auth_cap)
1576 cap_used &= ~ci->i_auth_cap->issued;
1577
1566 revoking = cap->implemented & ~cap->issued; 1578 revoking = cap->implemented & ~cap->issued;
1567 dout(" mds%d cap %p issued %s implemented %s revoking %s\n", 1579 dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
1568 cap->mds, cap, ceph_cap_string(cap->issued), 1580 cap->mds, cap, ceph_cap_string(cap->issued),
1581 ceph_cap_string(cap_used),
1569 ceph_cap_string(cap->implemented), 1582 ceph_cap_string(cap->implemented),
1570 ceph_cap_string(revoking)); 1583 ceph_cap_string(revoking));
1571 1584
@@ -1593,7 +1606,7 @@ retry_locked:
1593 } 1606 }
1594 1607
1595 /* completed revocation? going down and there are no caps? */ 1608 /* completed revocation? going down and there are no caps? */
1596 if (revoking && (revoking & used) == 0) { 1609 if (revoking && (revoking & cap_used) == 0) {
1597 dout("completed revocation of %s\n", 1610 dout("completed revocation of %s\n",
1598 ceph_cap_string(cap->implemented & ~cap->issued)); 1611 ceph_cap_string(cap->implemented & ~cap->issued));
1599 goto ack; 1612 goto ack;
@@ -1670,8 +1683,8 @@ ack:
1670 sent++; 1683 sent++;
1671 1684
1672 /* __send_cap drops i_ceph_lock */ 1685 /* __send_cap drops i_ceph_lock */
1673 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, 1686 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
1674 retain, flushing, NULL); 1687 want, retain, flushing, NULL);
1675 goto retry; /* retake i_ceph_lock and restart our cap scan. */ 1688 goto retry; /* retake i_ceph_lock and restart our cap scan. */
1676 } 1689 }
1677 1690
@@ -2359,10 +2372,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2359 2372
2360 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 2373 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
2361 inode->i_mode = le32_to_cpu(grant->mode); 2374 inode->i_mode = le32_to_cpu(grant->mode);
2362 inode->i_uid = le32_to_cpu(grant->uid); 2375 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
2363 inode->i_gid = le32_to_cpu(grant->gid); 2376 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
2364 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, 2377 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
2365 inode->i_uid, inode->i_gid); 2378 from_kuid(&init_user_ns, inode->i_uid),
2379 from_kgid(&init_user_ns, inode->i_gid));
2366 } 2380 }
2367 2381
2368 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 2382 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -2416,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2416 dout("mds wanted %s -> %s\n", 2430 dout("mds wanted %s -> %s\n",
2417 ceph_cap_string(le32_to_cpu(grant->wanted)), 2431 ceph_cap_string(le32_to_cpu(grant->wanted)),
2418 ceph_cap_string(wanted)); 2432 ceph_cap_string(wanted));
2419 grant->wanted = cpu_to_le32(wanted); 2433 /* imported cap may not have correct mds_wanted */
2434 if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
2435 check_caps = 1;
2420 } 2436 }
2421 2437
2422 cap->seq = seq; 2438 cap->seq = seq;
@@ -2820,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2820 dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, 2836 dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
2821 (unsigned)seq); 2837 (unsigned)seq);
2822 2838
2839 if (op == CEPH_CAP_OP_IMPORT)
2840 ceph_add_cap_releases(mdsc, session);
2841
2823 /* lookup ino */ 2842 /* lookup ino */
2824 inode = ceph_find_inode(sb, vino); 2843 inode = ceph_find_inode(sb, vino);
2825 ci = ceph_inode(inode); 2844 ci = ceph_inode(inode);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8c1aabe93b67..6d797f46d772 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -238,7 +238,7 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name,
238static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) 238static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
239{ 239{
240 struct ceph_file_info *fi = filp->private_data; 240 struct ceph_file_info *fi = filp->private_data;
241 struct inode *inode = filp->f_dentry->d_inode; 241 struct inode *inode = file_inode(filp);
242 struct ceph_inode_info *ci = ceph_inode(inode); 242 struct ceph_inode_info *ci = ceph_inode(inode);
243 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 243 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
244 struct ceph_mds_client *mdsc = fsc->mdsc; 244 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1138,7 +1138,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1138 loff_t *ppos) 1138 loff_t *ppos)
1139{ 1139{
1140 struct ceph_file_info *cf = file->private_data; 1140 struct ceph_file_info *cf = file->private_data;
1141 struct inode *inode = file->f_dentry->d_inode; 1141 struct inode *inode = file_inode(file);
1142 struct ceph_inode_info *ci = ceph_inode(inode); 1142 struct ceph_inode_info *ci = ceph_inode(inode);
1143 int left; 1143 int left;
1144 const int bufsize = 1024; 1144 const int bufsize = 1024;
@@ -1188,7 +1188,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1188static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end, 1188static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1189 int datasync) 1189 int datasync)
1190{ 1190{
1191 struct inode *inode = file->f_path.dentry->d_inode; 1191 struct inode *inode = file_inode(file);
1192 struct ceph_inode_info *ci = ceph_inode(inode); 1192 struct ceph_inode_info *ci = ceph_inode(inode);
1193 struct list_head *head = &ci->i_unsafe_dirops; 1193 struct list_head *head = &ci->i_unsafe_dirops;
1194 struct ceph_mds_request *req; 1194 struct ceph_mds_request *req;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index ca3ab3f9ca70..16796be53ca5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -81,7 +81,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
81 if (parent_inode) { 81 if (parent_inode) {
82 /* nfsd wants connectable */ 82 /* nfsd wants connectable */
83 *max_len = connected_handle_length; 83 *max_len = connected_handle_length;
84 type = 255; 84 type = FILEID_INVALID;
85 } else { 85 } else {
86 dout("encode_fh %p\n", dentry); 86 dout("encode_fh %p\n", dentry);
87 fh->ino = ceph_ino(inode); 87 fh->ino = ceph_ino(inode);
@@ -90,7 +90,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
90 } 90 }
91 } else { 91 } else {
92 *max_len = handle_length; 92 *max_len = handle_length;
93 type = 255; 93 type = FILEID_INVALID;
94 } 94 }
95 if (dentry) 95 if (dentry)
96 dput(dentry); 96 dput(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e51558fca3a3..bf338d9b67e3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
243 err = ceph_mdsc_do_request(mdsc, 243 err = ceph_mdsc_do_request(mdsc,
244 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, 244 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
245 req); 245 req);
246 if (err)
247 goto out_err;
248
246 err = ceph_handle_snapdir(req, dentry, err); 249 err = ceph_handle_snapdir(req, dentry, err);
247 if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) 250 if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
248 err = ceph_handle_notrace_create(dir, dentry); 251 err = ceph_handle_notrace_create(dir, dentry);
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
263 err = finish_no_open(file, dn); 266 err = finish_no_open(file, dn);
264 } else { 267 } else {
265 dout("atomic_open finish_open on dn %p\n", dn); 268 dout("atomic_open finish_open on dn %p\n", dn);
269 if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
270 *opened |= FILE_CREATED;
271 }
266 err = finish_open(file, dentry, ceph_open, opened); 272 err = finish_open(file, dentry, ceph_open, opened);
267 } 273 }
268 274
@@ -393,7 +399,7 @@ more:
393static ssize_t ceph_sync_read(struct file *file, char __user *data, 399static ssize_t ceph_sync_read(struct file *file, char __user *data,
394 unsigned len, loff_t *poff, int *checkeof) 400 unsigned len, loff_t *poff, int *checkeof)
395{ 401{
396 struct inode *inode = file->f_dentry->d_inode; 402 struct inode *inode = file_inode(file);
397 struct page **pages; 403 struct page **pages;
398 u64 off = *poff; 404 u64 off = *poff;
399 int num_pages, ret; 405 int num_pages, ret;
@@ -466,7 +472,7 @@ static void sync_write_commit(struct ceph_osd_request *req,
466static ssize_t ceph_sync_write(struct file *file, const char __user *data, 472static ssize_t ceph_sync_write(struct file *file, const char __user *data,
467 size_t left, loff_t *offset) 473 size_t left, loff_t *offset)
468{ 474{
469 struct inode *inode = file->f_dentry->d_inode; 475 struct inode *inode = file_inode(file);
470 struct ceph_inode_info *ci = ceph_inode(inode); 476 struct ceph_inode_info *ci = ceph_inode(inode);
471 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 477 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
472 struct ceph_osd_request *req; 478 struct ceph_osd_request *req;
@@ -483,7 +489,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
483 int ret; 489 int ret;
484 struct timespec mtime = CURRENT_TIME; 490 struct timespec mtime = CURRENT_TIME;
485 491
486 if (ceph_snap(file->f_dentry->d_inode) != CEPH_NOSNAP) 492 if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
487 return -EROFS; 493 return -EROFS;
488 494
489 dout("sync_write on file %p %lld~%u %s\n", file, *offset, 495 dout("sync_write on file %p %lld~%u %s\n", file, *offset,
@@ -535,7 +541,7 @@ more:
535 ci->i_snap_realm->cached_context, 541 ci->i_snap_realm->cached_context,
536 do_sync, 542 do_sync,
537 ci->i_truncate_seq, ci->i_truncate_size, 543 ci->i_truncate_seq, ci->i_truncate_size,
538 &mtime, false, 2, page_align); 544 &mtime, false, page_align);
539 if (IS_ERR(req)) 545 if (IS_ERR(req))
540 return PTR_ERR(req); 546 return PTR_ERR(req);
541 547
@@ -637,7 +643,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
637 struct ceph_file_info *fi = filp->private_data; 643 struct ceph_file_info *fi = filp->private_data;
638 loff_t *ppos = &iocb->ki_pos; 644 loff_t *ppos = &iocb->ki_pos;
639 size_t len = iov->iov_len; 645 size_t len = iov->iov_len;
640 struct inode *inode = filp->f_dentry->d_inode; 646 struct inode *inode = file_inode(filp);
641 struct ceph_inode_info *ci = ceph_inode(inode); 647 struct ceph_inode_info *ci = ceph_inode(inode);
642 void __user *base = iov->iov_base; 648 void __user *base = iov->iov_base;
643 ssize_t ret; 649 ssize_t ret;
@@ -707,7 +713,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
707{ 713{
708 struct file *file = iocb->ki_filp; 714 struct file *file = iocb->ki_filp;
709 struct ceph_file_info *fi = file->private_data; 715 struct ceph_file_info *fi = file->private_data;
710 struct inode *inode = file->f_dentry->d_inode; 716 struct inode *inode = file_inode(file);
711 struct ceph_inode_info *ci = ceph_inode(inode); 717 struct ceph_inode_info *ci = ceph_inode(inode);
712 struct ceph_osd_client *osdc = 718 struct ceph_osd_client *osdc =
713 &ceph_sb_to_client(inode->i_sb)->client->osdc; 719 &ceph_sb_to_client(inode->i_sb)->client->osdc;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2971eaa65cdc..851814d951cd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -612,10 +612,11 @@ static int fill_inode(struct inode *inode,
612 612
613 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { 613 if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
614 inode->i_mode = le32_to_cpu(info->mode); 614 inode->i_mode = le32_to_cpu(info->mode);
615 inode->i_uid = le32_to_cpu(info->uid); 615 inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
616 inode->i_gid = le32_to_cpu(info->gid); 616 inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
617 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, 617 dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
618 inode->i_uid, inode->i_gid); 618 from_kuid(&init_user_ns, inode->i_uid),
619 from_kgid(&init_user_ns, inode->i_gid));
619 } 620 }
620 621
621 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 622 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
@@ -1130,8 +1131,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1130 req->r_request_started); 1131 req->r_request_started);
1131 dout(" final dn %p\n", dn); 1132 dout(" final dn %p\n", dn);
1132 i++; 1133 i++;
1133 } else if (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 1134 } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
1134 req->r_op == CEPH_MDS_OP_MKSNAP) { 1135 req->r_op == CEPH_MDS_OP_MKSNAP) && !req->r_aborted) {
1135 struct dentry *dn = req->r_dentry; 1136 struct dentry *dn = req->r_dentry;
1136 1137
1137 /* fill out a snapdir LOOKUPSNAP dentry */ 1138 /* fill out a snapdir LOOKUPSNAP dentry */
@@ -1195,6 +1196,39 @@ done:
1195/* 1196/*
1196 * Prepopulate our cache with readdir results, leases, etc. 1197 * Prepopulate our cache with readdir results, leases, etc.
1197 */ 1198 */
1199static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
1200 struct ceph_mds_session *session)
1201{
1202 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
1203 int i, err = 0;
1204
1205 for (i = 0; i < rinfo->dir_nr; i++) {
1206 struct ceph_vino vino;
1207 struct inode *in;
1208 int rc;
1209
1210 vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
1211 vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
1212
1213 in = ceph_get_inode(req->r_dentry->d_sb, vino);
1214 if (IS_ERR(in)) {
1215 err = PTR_ERR(in);
1216 dout("new_inode badness got %d\n", err);
1217 continue;
1218 }
1219 rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
1220 req->r_request_started, -1,
1221 &req->r_caps_reservation);
1222 if (rc < 0) {
1223 pr_err("fill_inode badness on %p got %d\n", in, rc);
1224 err = rc;
1225 continue;
1226 }
1227 }
1228
1229 return err;
1230}
1231
1198int ceph_readdir_prepopulate(struct ceph_mds_request *req, 1232int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1199 struct ceph_mds_session *session) 1233 struct ceph_mds_session *session)
1200{ 1234{
@@ -1209,6 +1243,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1209 u64 frag = le32_to_cpu(rhead->args.readdir.frag); 1243 u64 frag = le32_to_cpu(rhead->args.readdir.frag);
1210 struct ceph_dentry_info *di; 1244 struct ceph_dentry_info *di;
1211 1245
1246 if (req->r_aborted)
1247 return readdir_prepopulate_inodes_only(req, session);
1248
1212 if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { 1249 if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
1213 snapdir = ceph_get_snapdir(parent->d_inode); 1250 snapdir = ceph_get_snapdir(parent->d_inode);
1214 parent = d_find_alias(snapdir); 1251 parent = d_find_alias(snapdir);
@@ -1565,26 +1602,30 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1565 1602
1566 if (ia_valid & ATTR_UID) { 1603 if (ia_valid & ATTR_UID) {
1567 dout("setattr %p uid %d -> %d\n", inode, 1604 dout("setattr %p uid %d -> %d\n", inode,
1568 inode->i_uid, attr->ia_uid); 1605 from_kuid(&init_user_ns, inode->i_uid),
1606 from_kuid(&init_user_ns, attr->ia_uid));
1569 if (issued & CEPH_CAP_AUTH_EXCL) { 1607 if (issued & CEPH_CAP_AUTH_EXCL) {
1570 inode->i_uid = attr->ia_uid; 1608 inode->i_uid = attr->ia_uid;
1571 dirtied |= CEPH_CAP_AUTH_EXCL; 1609 dirtied |= CEPH_CAP_AUTH_EXCL;
1572 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1610 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
1573 attr->ia_uid != inode->i_uid) { 1611 !uid_eq(attr->ia_uid, inode->i_uid)) {
1574 req->r_args.setattr.uid = cpu_to_le32(attr->ia_uid); 1612 req->r_args.setattr.uid = cpu_to_le32(
1613 from_kuid(&init_user_ns, attr->ia_uid));
1575 mask |= CEPH_SETATTR_UID; 1614 mask |= CEPH_SETATTR_UID;
1576 release |= CEPH_CAP_AUTH_SHARED; 1615 release |= CEPH_CAP_AUTH_SHARED;
1577 } 1616 }
1578 } 1617 }
1579 if (ia_valid & ATTR_GID) { 1618 if (ia_valid & ATTR_GID) {
1580 dout("setattr %p gid %d -> %d\n", inode, 1619 dout("setattr %p gid %d -> %d\n", inode,
1581 inode->i_gid, attr->ia_gid); 1620 from_kgid(&init_user_ns, inode->i_gid),
1621 from_kgid(&init_user_ns, attr->ia_gid));
1582 if (issued & CEPH_CAP_AUTH_EXCL) { 1622 if (issued & CEPH_CAP_AUTH_EXCL) {
1583 inode->i_gid = attr->ia_gid; 1623 inode->i_gid = attr->ia_gid;
1584 dirtied |= CEPH_CAP_AUTH_EXCL; 1624 dirtied |= CEPH_CAP_AUTH_EXCL;
1585 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 1625 } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
1586 attr->ia_gid != inode->i_gid) { 1626 !gid_eq(attr->ia_gid, inode->i_gid)) {
1587 req->r_args.setattr.gid = cpu_to_le32(attr->ia_gid); 1627 req->r_args.setattr.gid = cpu_to_le32(
1628 from_kgid(&init_user_ns, attr->ia_gid));
1588 mask |= CEPH_SETATTR_GID; 1629 mask |= CEPH_SETATTR_GID;
1589 release |= CEPH_CAP_AUTH_SHARED; 1630 release |= CEPH_CAP_AUTH_SHARED;
1590 } 1631 }
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 36549a46e311..4a989345b37b 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -16,11 +16,11 @@
16 */ 16 */
17static long ceph_ioctl_get_layout(struct file *file, void __user *arg) 17static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
18{ 18{
19 struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode); 19 struct ceph_inode_info *ci = ceph_inode(file_inode(file));
20 struct ceph_ioctl_layout l; 20 struct ceph_ioctl_layout l;
21 int err; 21 int err;
22 22
23 err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT); 23 err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
24 if (!err) { 24 if (!err) {
25 l.stripe_unit = ceph_file_layout_su(ci->i_layout); 25 l.stripe_unit = ceph_file_layout_su(ci->i_layout);
26 l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout); 26 l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
@@ -63,12 +63,12 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
63 63
64static long ceph_ioctl_set_layout(struct file *file, void __user *arg) 64static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
65{ 65{
66 struct inode *inode = file->f_dentry->d_inode; 66 struct inode *inode = file_inode(file);
67 struct inode *parent_inode; 67 struct inode *parent_inode;
68 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 68 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
69 struct ceph_mds_request *req; 69 struct ceph_mds_request *req;
70 struct ceph_ioctl_layout l; 70 struct ceph_ioctl_layout l;
71 struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode); 71 struct ceph_inode_info *ci = ceph_inode(file_inode(file));
72 struct ceph_ioctl_layout nl; 72 struct ceph_ioctl_layout nl;
73 int err; 73 int err;
74 74
@@ -76,7 +76,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
76 return -EFAULT; 76 return -EFAULT;
77 77
78 /* validate changed params against current layout */ 78 /* validate changed params against current layout */
79 err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT); 79 err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT);
80 if (err) 80 if (err)
81 return err; 81 return err;
82 82
@@ -136,7 +136,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
136 */ 136 */
137static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) 137static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
138{ 138{
139 struct inode *inode = file->f_dentry->d_inode; 139 struct inode *inode = file_inode(file);
140 struct ceph_mds_request *req; 140 struct ceph_mds_request *req;
141 struct ceph_ioctl_layout l; 141 struct ceph_ioctl_layout l;
142 int err; 142 int err;
@@ -179,13 +179,12 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
179static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) 179static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
180{ 180{
181 struct ceph_ioctl_dataloc dl; 181 struct ceph_ioctl_dataloc dl;
182 struct inode *inode = file->f_dentry->d_inode; 182 struct inode *inode = file_inode(file);
183 struct ceph_inode_info *ci = ceph_inode(inode); 183 struct ceph_inode_info *ci = ceph_inode(inode);
184 struct ceph_osd_client *osdc = 184 struct ceph_osd_client *osdc =
185 &ceph_sb_to_client(inode->i_sb)->client->osdc; 185 &ceph_sb_to_client(inode->i_sb)->client->osdc;
186 u64 len = 1, olen; 186 u64 len = 1, olen;
187 u64 tmp; 187 u64 tmp;
188 struct ceph_object_layout ol;
189 struct ceph_pg pgid; 188 struct ceph_pg pgid;
190 int r; 189 int r;
191 190
@@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
194 return -EFAULT; 193 return -EFAULT;
195 194
196 down_read(&osdc->map_sem); 195 down_read(&osdc->map_sem);
197 r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, 196 r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
198 &dl.object_no, &dl.object_offset, 197 &dl.object_no, &dl.object_offset,
199 &olen); 198 &olen);
200 if (r < 0) 199 if (r < 0)
@@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
209 208
210 snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", 209 snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
211 ceph_ino(inode), dl.object_no); 210 ceph_ino(inode), dl.object_no);
212 ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, 211 ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
213 osdc->osdmap); 212 osdc->osdmap);
214 213
215 pgid = ol.ol_pgid;
216 dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); 214 dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
217 if (dl.osd >= 0) { 215 if (dl.osd >= 0) {
218 struct ceph_entity_addr *a = 216 struct ceph_entity_addr *a =
@@ -234,7 +232,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
234static long ceph_ioctl_lazyio(struct file *file) 232static long ceph_ioctl_lazyio(struct file *file)
235{ 233{
236 struct ceph_file_info *fi = file->private_data; 234 struct ceph_file_info *fi = file->private_data;
237 struct inode *inode = file->f_dentry->d_inode; 235 struct inode *inode = file_inode(file);
238 struct ceph_inode_info *ci = ceph_inode(inode); 236 struct ceph_inode_info *ci = ceph_inode(inode);
239 237
240 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { 238 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 80576d05d687..202dd3d68be0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -13,7 +13,7 @@
13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
14 int cmd, u8 wait, struct file_lock *fl) 14 int cmd, u8 wait, struct file_lock *fl)
15{ 15{
16 struct inode *inode = file->f_dentry->d_inode; 16 struct inode *inode = file_inode(file);
17 struct ceph_mds_client *mdsc = 17 struct ceph_mds_client *mdsc =
18 ceph_sb_to_client(inode->i_sb)->mdsc; 18 ceph_sb_to_client(inode->i_sb)->mdsc;
19 struct ceph_mds_request *req; 19 struct ceph_mds_request *req;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9165eb8309eb..442880d099c9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -233,6 +233,30 @@ bad:
233} 233}
234 234
235/* 235/*
236 * parse create results
237 */
238static int parse_reply_info_create(void **p, void *end,
239 struct ceph_mds_reply_info_parsed *info,
240 int features)
241{
242 if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
243 if (*p == end) {
244 info->has_create_ino = false;
245 } else {
246 info->has_create_ino = true;
247 info->ino = ceph_decode_64(p);
248 }
249 }
250
251 if (unlikely(*p != end))
252 goto bad;
253 return 0;
254
255bad:
256 return -EIO;
257}
258
259/*
236 * parse extra results 260 * parse extra results
237 */ 261 */
238static int parse_reply_info_extra(void **p, void *end, 262static int parse_reply_info_extra(void **p, void *end,
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
241{ 265{
242 if (info->head->op == CEPH_MDS_OP_GETFILELOCK) 266 if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
243 return parse_reply_info_filelock(p, end, info, features); 267 return parse_reply_info_filelock(p, end, info, features);
244 else 268 else if (info->head->op == CEPH_MDS_OP_READDIR)
245 return parse_reply_info_dir(p, end, info, features); 269 return parse_reply_info_dir(p, end, info, features);
270 else if (info->head->op == CEPH_MDS_OP_CREATE)
271 return parse_reply_info_create(p, end, info, features);
272 else
273 return -EIO;
246} 274}
247 275
248/* 276/*
@@ -1658,8 +1686,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1658 1686
1659 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 1687 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
1660 head->op = cpu_to_le32(req->r_op); 1688 head->op = cpu_to_le32(req->r_op);
1661 head->caller_uid = cpu_to_le32(req->r_uid); 1689 head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
1662 head->caller_gid = cpu_to_le32(req->r_gid); 1690 head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
1663 head->args = req->r_args; 1691 head->args = req->r_args;
1664 1692
1665 ceph_encode_filepath(&p, end, ino1, path1); 1693 ceph_encode_filepath(&p, end, ino1, path1);
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2170 mutex_lock(&req->r_fill_mutex); 2198 mutex_lock(&req->r_fill_mutex);
2171 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2199 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2172 if (err == 0) { 2200 if (err == 0) {
2173 if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && 2201 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
2202 req->r_op == CEPH_MDS_OP_LSSNAP) &&
2174 rinfo->dir_nr) 2203 rinfo->dir_nr)
2175 ceph_readdir_prepopulate(req, req->r_session); 2204 ceph_readdir_prepopulate(req, req->r_session);
2176 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2205 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index dd26846dd71d..c2a19fbbe517 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
74 struct ceph_mds_reply_info_in *dir_in; 74 struct ceph_mds_reply_info_in *dir_in;
75 u8 dir_complete, dir_end; 75 u8 dir_complete, dir_end;
76 }; 76 };
77
78 /* for create results */
79 struct {
80 bool has_create_ino;
81 u64 ino;
82 };
77 }; 83 };
78 84
79 /* encoded blob describing snapshot contexts for certain 85 /* encoded blob describing snapshot contexts for certain
@@ -184,8 +190,8 @@ struct ceph_mds_request {
184 190
185 union ceph_mds_request_args r_args; 191 union ceph_mds_request_args r_args;
186 int r_fmode; /* file mode, if expecting cap */ 192 int r_fmode; /* file mode, if expecting cap */
187 uid_t r_uid; 193 kuid_t r_uid;
188 gid_t r_gid; 194 kgid_t r_gid;
189 195
190 /* for choosing which mds to send this request to */ 196 /* for choosing which mds to send this request to */
191 int r_direct_mode; 197 int r_direct_mode;
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 73b7d44e8a35..0d3c9240c61b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
59 return ERR_PTR(-ENOMEM); 59 return ERR_PTR(-ENOMEM);
60 60
61 ceph_decode_16_safe(p, end, version, bad); 61 ceph_decode_16_safe(p, end, version, bad);
62 if (version > 3) {
63 pr_warning("got mdsmap version %d > 3, failing", version);
64 goto bad;
65 }
62 66
63 ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); 67 ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
64 m->m_epoch = ceph_decode_32(p); 68 m->m_epoch = ceph_decode_32(p);
@@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
144 /* pg_pools */ 148 /* pg_pools */
145 ceph_decode_32_safe(p, end, n, bad); 149 ceph_decode_32_safe(p, end, n, bad);
146 m->m_num_data_pg_pools = n; 150 m->m_num_data_pg_pools = n;
147 m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); 151 m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
148 if (!m->m_data_pg_pools) 152 if (!m->m_data_pg_pools)
149 goto badmem; 153 goto badmem;
150 ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); 154 ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
151 for (i = 0; i < n; i++) 155 for (i = 0; i < n; i++)
152 m->m_data_pg_pools[i] = ceph_decode_32(p); 156 m->m_data_pg_pools[i] = ceph_decode_64(p);
153 m->m_cas_pg_pool = ceph_decode_32(p); 157 m->m_cas_pg_pool = ceph_decode_64(p);
154 158
155 /* ok, we don't care about the rest. */ 159 /* ok, we don't care about the rest. */
156 dout("mdsmap_decode success epoch %u\n", m->m_epoch); 160 dout("mdsmap_decode success epoch %u\n", m->m_epoch);
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index cd5097d7c804..89fa4a940a0f 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s)
15 case CEPH_MDS_STATE_BOOT: return "up:boot"; 15 case CEPH_MDS_STATE_BOOT: return "up:boot";
16 case CEPH_MDS_STATE_STANDBY: return "up:standby"; 16 case CEPH_MDS_STATE_STANDBY: return "up:standby";
17 case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; 17 case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay";
18 case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay";
18 case CEPH_MDS_STATE_CREATING: return "up:creating"; 19 case CEPH_MDS_STATE_CREATING: return "up:creating";
19 case CEPH_MDS_STATE_STARTING: return "up:starting"; 20 case CEPH_MDS_STATE_STARTING: return "up:starting";
20 /* up and in */ 21 /* up and in */
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op)
50 case CEPH_MDS_OP_LOOKUP: return "lookup"; 51 case CEPH_MDS_OP_LOOKUP: return "lookup";
51 case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; 52 case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash";
52 case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; 53 case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent";
54 case CEPH_MDS_OP_LOOKUPINO: return "lookupino";
53 case CEPH_MDS_OP_GETATTR: return "getattr"; 55 case CEPH_MDS_OP_GETATTR: return "getattr";
54 case CEPH_MDS_OP_SETXATTR: return "setxattr"; 56 case CEPH_MDS_OP_SETXATTR: return "setxattr";
55 case CEPH_MDS_OP_SETATTR: return "setattr"; 57 case CEPH_MDS_OP_SETATTR: return "setattr";
56 case CEPH_MDS_OP_RMXATTR: return "rmxattr"; 58 case CEPH_MDS_OP_RMXATTR: return "rmxattr";
59 case CEPH_MDS_OP_SETLAYOUT: return "setlayou";
60 case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout";
57 case CEPH_MDS_OP_READDIR: return "readdir"; 61 case CEPH_MDS_OP_READDIR: return "readdir";
58 case CEPH_MDS_OP_MKNOD: return "mknod"; 62 case CEPH_MDS_OP_MKNOD: return "mknod";
59 case CEPH_MDS_OP_LINK: return "link"; 63 case CEPH_MDS_OP_LINK: return "link";
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e86aa9948124..9fe17c6c2876 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
71 /* 71 /*
72 * express utilization in terms of large blocks to avoid 72 * express utilization in terms of large blocks to avoid
73 * overflow on 32-bit machines. 73 * overflow on 32-bit machines.
74 *
75 * NOTE: for the time being, we make bsize == frsize to humor
76 * not-yet-ancient versions of glibc that are broken.
77 * Someday, we will probably want to report a real block
78 * size... whatever that may mean for a network file system!
74 */ 79 */
75 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; 80 buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
81 buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
76 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); 82 buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
77 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 83 buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
78 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); 84 buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
@@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
80 buf->f_files = le64_to_cpu(st.num_objects); 86 buf->f_files = le64_to_cpu(st.num_objects);
81 buf->f_ffree = -1; 87 buf->f_ffree = -1;
82 buf->f_namelen = NAME_MAX; 88 buf->f_namelen = NAME_MAX;
83 buf->f_frsize = PAGE_CACHE_SIZE;
84 89
85 /* leave fsid little-endian, regardless of host endianness */ 90 /* leave fsid little-endian, regardless of host endianness */
86 fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); 91 fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 66ebe720e40d..c7b309723dcc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -21,7 +21,7 @@
21 21
22/* large granularity for statfs utilization stats to facilitate 22/* large granularity for statfs utilization stats to facilitate
23 * large volume sizes on 32-bit machines. */ 23 * large volume sizes on 32-bit machines. */
24#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ 24#define CEPH_BLOCK_SHIFT 22 /* 4 MB */
25#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) 25#define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT)
26 26
27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ 27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
@@ -138,8 +138,8 @@ struct ceph_cap_snap {
138 struct ceph_snap_context *context; 138 struct ceph_snap_context *context;
139 139
140 umode_t mode; 140 umode_t mode;
141 uid_t uid; 141 kuid_t uid;
142 gid_t gid; 142 kgid_t gid;
143 143
144 struct ceph_buffer *xattr_blob; 144 struct ceph_buffer *xattr_blob;
145 u64 xattr_version; 145 u64 xattr_version;
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
798/* file.c */ 798/* file.c */
799extern const struct file_operations ceph_file_fops; 799extern const struct file_operations ceph_file_fops;
800extern const struct address_space_operations ceph_aops; 800extern const struct address_space_operations ceph_aops;
801extern int ceph_copy_to_page_vector(struct page **pages, 801
802 const char *data,
803 loff_t off, size_t len);
804extern int ceph_copy_from_page_vector(struct page **pages,
805 char *data,
806 loff_t off, size_t len);
807extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
808extern int ceph_open(struct inode *inode, struct file *file); 802extern int ceph_open(struct inode *inode, struct file *file);
809extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, 803extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
810 struct file *file, unsigned flags, umode_t mode, 804 struct file *file, unsigned flags, umode_t mode,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 2c2ae5be9902..9b6b2b6dd164 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -29,9 +29,94 @@ struct ceph_vxattr {
29 size_t name_size; /* strlen(name) + 1 (for '\0') */ 29 size_t name_size; /* strlen(name) + 1 (for '\0') */
30 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, 30 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
31 size_t size); 31 size_t size);
32 bool readonly; 32 bool readonly, hidden;
33 bool (*exists_cb)(struct ceph_inode_info *ci);
33}; 34};
34 35
36/* layouts */
37
38static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
39{
40 size_t s;
41 char *p = (char *)&ci->i_layout;
42
43 for (s = 0; s < sizeof(ci->i_layout); s++, p++)
44 if (*p)
45 return true;
46 return false;
47}
48
49static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
50 size_t size)
51{
52 int ret;
53 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
54 struct ceph_osd_client *osdc = &fsc->client->osdc;
55 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
56 const char *pool_name;
57
58 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
59 down_read(&osdc->map_sem);
60 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
61 if (pool_name)
62 ret = snprintf(val, size,
63 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
64 (unsigned long long)ceph_file_layout_su(ci->i_layout),
65 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
66 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
67 pool_name);
68 else
69 ret = snprintf(val, size,
70 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
71 (unsigned long long)ceph_file_layout_su(ci->i_layout),
72 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
73 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
74 (unsigned long long)pool);
75
76 up_read(&osdc->map_sem);
77 return ret;
78}
79
80static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
81 char *val, size_t size)
82{
83 return snprintf(val, size, "%lld",
84 (unsigned long long)ceph_file_layout_su(ci->i_layout));
85}
86
87static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
88 char *val, size_t size)
89{
90 return snprintf(val, size, "%lld",
91 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
92}
93
94static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
95 char *val, size_t size)
96{
97 return snprintf(val, size, "%lld",
98 (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
99}
100
101static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
102 char *val, size_t size)
103{
104 int ret;
105 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
106 struct ceph_osd_client *osdc = &fsc->client->osdc;
107 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
108 const char *pool_name;
109
110 down_read(&osdc->map_sem);
111 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
112 if (pool_name)
113 ret = snprintf(val, size, "%s", pool_name);
114 else
115 ret = snprintf(val, size, "%lld", (unsigned long long)pool);
116 up_read(&osdc->map_sem);
117 return ret;
118}
119
35/* directories */ 120/* directories */
36 121
37static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, 122static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
83 (long)ci->i_rctime.tv_nsec); 168 (long)ci->i_rctime.tv_nsec);
84} 169}
85 170
86#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
87 171
88#define XATTR_NAME_CEPH(_type, _name) \ 172#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
89 { \ 173#define CEPH_XATTR_NAME2(_type, _name, _name2) \
90 .name = CEPH_XATTR_NAME(_type, _name), \ 174 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
91 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ 175
92 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ 176#define XATTR_NAME_CEPH(_type, _name) \
93 .readonly = true, \ 177 { \
94 } 178 .name = CEPH_XATTR_NAME(_type, _name), \
179 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
180 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
181 .readonly = true, \
182 .hidden = false, \
183 .exists_cb = NULL, \
184 }
185#define XATTR_LAYOUT_FIELD(_type, _name, _field) \
186 { \
187 .name = CEPH_XATTR_NAME2(_type, _name, _field), \
188 .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
189 .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
190 .readonly = false, \
191 .hidden = true, \
192 .exists_cb = ceph_vxattrcb_layout_exists, \
193 }
95 194
96static struct ceph_vxattr ceph_dir_vxattrs[] = { 195static struct ceph_vxattr ceph_dir_vxattrs[] = {
196 {
197 .name = "ceph.dir.layout",
198 .name_size = sizeof("ceph.dir.layout"),
199 .getxattr_cb = ceph_vxattrcb_layout,
200 .readonly = false,
201 .hidden = false,
202 .exists_cb = ceph_vxattrcb_layout_exists,
203 },
204 XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
205 XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
206 XATTR_LAYOUT_FIELD(dir, layout, object_size),
207 XATTR_LAYOUT_FIELD(dir, layout, pool),
97 XATTR_NAME_CEPH(dir, entries), 208 XATTR_NAME_CEPH(dir, entries),
98 XATTR_NAME_CEPH(dir, files), 209 XATTR_NAME_CEPH(dir, files),
99 XATTR_NAME_CEPH(dir, subdirs), 210 XATTR_NAME_CEPH(dir, subdirs),
@@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
102 XATTR_NAME_CEPH(dir, rsubdirs), 213 XATTR_NAME_CEPH(dir, rsubdirs),
103 XATTR_NAME_CEPH(dir, rbytes), 214 XATTR_NAME_CEPH(dir, rbytes),
104 XATTR_NAME_CEPH(dir, rctime), 215 XATTR_NAME_CEPH(dir, rctime),
105 { 0 } /* Required table terminator */ 216 { .name = NULL, 0 } /* Required table terminator */
106}; 217};
107static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ 218static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
108 219
109/* files */ 220/* files */
110 221
111static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
112 size_t size)
113{
114 int ret;
115
116 ret = snprintf(val, size,
117 "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n",
118 (unsigned long long)ceph_file_layout_su(ci->i_layout),
119 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
120 (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
121 return ret;
122}
123
124static struct ceph_vxattr ceph_file_vxattrs[] = { 222static struct ceph_vxattr ceph_file_vxattrs[] = {
125 XATTR_NAME_CEPH(file, layout),
126 /* The following extended attribute name is deprecated */
127 { 223 {
128 .name = XATTR_CEPH_PREFIX "layout", 224 .name = "ceph.file.layout",
129 .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), 225 .name_size = sizeof("ceph.file.layout"),
130 .getxattr_cb = ceph_vxattrcb_file_layout, 226 .getxattr_cb = ceph_vxattrcb_layout,
131 .readonly = true, 227 .readonly = false,
228 .hidden = false,
229 .exists_cb = ceph_vxattrcb_layout_exists,
132 }, 230 },
133 { 0 } /* Required table terminator */ 231 XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
232 XATTR_LAYOUT_FIELD(file, layout, stripe_count),
233 XATTR_LAYOUT_FIELD(file, layout, object_size),
234 XATTR_LAYOUT_FIELD(file, layout, pool),
235 { .name = NULL, 0 } /* Required table terminator */
134}; 236};
135static size_t ceph_file_vxattrs_name_size; /* total size of all names */ 237static size_t ceph_file_vxattrs_name_size; /* total size of all names */
136 238
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
164 size_t size = 0; 266 size_t size = 0;
165 267
166 for (vxattr = vxattrs; vxattr->name; vxattr++) 268 for (vxattr = vxattrs; vxattr->name; vxattr++)
167 size += vxattr->name_size; 269 if (!vxattr->hidden)
270 size += vxattr->name_size;
168 271
169 return size; 272 return size;
170} 273}
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
572 if (!ceph_is_valid_xattr(name)) 675 if (!ceph_is_valid_xattr(name))
573 return -ENODATA; 676 return -ENODATA;
574 677
575 /* let's see if a virtual xattr was requested */
576 vxattr = ceph_match_vxattr(inode, name);
577
578 spin_lock(&ci->i_ceph_lock); 678 spin_lock(&ci->i_ceph_lock);
579 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 679 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
580 ci->i_xattrs.version, ci->i_xattrs.index_version); 680 ci->i_xattrs.version, ci->i_xattrs.index_version);
581 681
682 /* let's see if a virtual xattr was requested */
683 vxattr = ceph_match_vxattr(inode, name);
684 if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
685 err = vxattr->getxattr_cb(ci, value, size);
686 goto out;
687 }
688
582 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && 689 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
583 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 690 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
584 goto get_xattr; 691 goto get_xattr;
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
592 699
593 spin_lock(&ci->i_ceph_lock); 700 spin_lock(&ci->i_ceph_lock);
594 701
595 if (vxattr && vxattr->readonly) {
596 err = vxattr->getxattr_cb(ci, value, size);
597 goto out;
598 }
599
600 err = __build_xattrs(inode); 702 err = __build_xattrs(inode);
601 if (err < 0) 703 if (err < 0)
602 goto out; 704 goto out;
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
604get_xattr: 706get_xattr:
605 err = -ENODATA; /* == ENOATTR */ 707 err = -ENODATA; /* == ENOATTR */
606 xattr = __get_xattr(ci, name); 708 xattr = __get_xattr(ci, name);
607 if (!xattr) { 709 if (!xattr)
608 if (vxattr)
609 err = vxattr->getxattr_cb(ci, value, size);
610 goto out; 710 goto out;
611 }
612 711
613 err = -ERANGE; 712 err = -ERANGE;
614 if (size && size < xattr->val_len) 713 if (size && size < xattr->val_len)
@@ -664,23 +763,30 @@ list_xattr:
664 vir_namelen = ceph_vxattrs_name_size(vxattrs); 763 vir_namelen = ceph_vxattrs_name_size(vxattrs);
665 764
666 /* adding 1 byte per each variable due to the null termination */ 765 /* adding 1 byte per each variable due to the null termination */
667 namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; 766 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
668 err = -ERANGE; 767 err = -ERANGE;
669 if (size && namelen > size) 768 if (size && vir_namelen + namelen > size)
670 goto out; 769 goto out;
671 770
672 err = namelen; 771 err = namelen + vir_namelen;
673 if (size == 0) 772 if (size == 0)
674 goto out; 773 goto out;
675 774
676 names = __copy_xattr_names(ci, names); 775 names = __copy_xattr_names(ci, names);
677 776
678 /* virtual xattr names, too */ 777 /* virtual xattr names, too */
679 if (vxattrs) 778 err = namelen;
779 if (vxattrs) {
680 for (i = 0; vxattrs[i].name; i++) { 780 for (i = 0; vxattrs[i].name; i++) {
681 len = sprintf(names, "%s", vxattrs[i].name); 781 if (!vxattrs[i].hidden &&
682 names += len + 1; 782 !(vxattrs[i].exists_cb &&
783 !vxattrs[i].exists_cb(ci))) {
784 len = sprintf(names, "%s", vxattrs[i].name);
785 names += len + 1;
786 err += len + 1;
787 }
683 } 788 }
789 }
684 790
685out: 791out:
686 spin_unlock(&ci->i_ceph_lock); 792 spin_unlock(&ci->i_ceph_lock);
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
782 if (vxattr && vxattr->readonly) 888 if (vxattr && vxattr->readonly)
783 return -EOPNOTSUPP; 889 return -EOPNOTSUPP;
784 890
891 /* pass any unhandled ceph.* xattrs through to the MDS */
892 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
893 goto do_sync_unlocked;
894
785 /* preallocate memory for xattr name, value, index node */ 895 /* preallocate memory for xattr name, value, index node */
786 err = -ENOMEM; 896 err = -ENOMEM;
787 newname = kmemdup(name, name_len + 1, GFP_NOFS); 897 newname = kmemdup(name, name_len + 1, GFP_NOFS);
@@ -838,6 +948,7 @@ retry:
838 948
839do_sync: 949do_sync:
840 spin_unlock(&ci->i_ceph_lock); 950 spin_unlock(&ci->i_ceph_lock);
951do_sync_unlocked:
841 err = ceph_sync_setxattr(dentry, name, value, size, flags); 952 err = ceph_sync_setxattr(dentry, name, value, size, flags);
842out: 953out:
843 kfree(newname); 954 kfree(newname);
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
892 if (vxattr && vxattr->readonly) 1003 if (vxattr && vxattr->readonly)
893 return -EOPNOTSUPP; 1004 return -EOPNOTSUPP;
894 1005
1006 /* pass any unhandled ceph.* xattrs through to the MDS */
1007 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1008 goto do_sync_unlocked;
1009
895 err = -ENOMEM; 1010 err = -ENOMEM;
896 spin_lock(&ci->i_ceph_lock); 1011 spin_lock(&ci->i_ceph_lock);
897retry: 1012retry:
@@ -931,6 +1046,7 @@ retry:
931 return err; 1046 return err;
932do_sync: 1047do_sync:
933 spin_unlock(&ci->i_ceph_lock); 1048 spin_unlock(&ci->i_ceph_lock);
1049do_sync_unlocked:
934 err = ceph_send_removexattr(dentry, name); 1050 err = ceph_send_removexattr(dentry, name);
935out: 1051out:
936 return err; 1052 return err;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 21ff76c22a17..2906ee276408 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -155,14 +155,14 @@ config CIFS_DFS_UPCALL
155 points. If unsure, say N. 155 points. If unsure, say N.
156 156
157config CIFS_NFSD_EXPORT 157config CIFS_NFSD_EXPORT
158 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 158 bool "Allow nfsd to export CIFS file system"
159 depends on CIFS && EXPERIMENTAL && BROKEN 159 depends on CIFS && BROKEN
160 help 160 help
161 Allows NFS server to export a CIFS mounted share (nfsd over cifs) 161 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
162 162
163config CIFS_SMB2 163config CIFS_SMB2
164 bool "SMB2 network file system support (EXPERIMENTAL)" 164 bool "SMB2 network file system support"
165 depends on CIFS && EXPERIMENTAL && INET 165 depends on CIFS && INET
166 select NLS 166 select NLS
167 select KEYS 167 select KEYS
168 select FSCACHE 168 select FSCACHE
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ce5cbd717bfc..210fce2df308 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
226compose_mount_options_err: 226compose_mount_options_err:
227 kfree(mountdata); 227 kfree(mountdata);
228 mountdata = ERR_PTR(rc); 228 mountdata = ERR_PTR(rc);
229 kfree(*devname);
230 *devname = NULL;
229 goto compose_mount_options_out; 231 goto compose_mount_options_out;
230} 232}
231 233
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index c865bfdfe819..37e4a72a7d1c 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -55,10 +55,10 @@ struct cifs_sb_info {
55 unsigned int wsize; 55 unsigned int wsize;
56 unsigned long actimeo; /* attribute cache timeout (jiffies) */ 56 unsigned long actimeo; /* attribute cache timeout (jiffies) */
57 atomic_t active; 57 atomic_t active;
58 uid_t mnt_uid; 58 kuid_t mnt_uid;
59 gid_t mnt_gid; 59 kgid_t mnt_gid;
60 uid_t mnt_backupuid; 60 kuid_t mnt_backupuid;
61 gid_t mnt_backupgid; 61 kgid_t mnt_backupgid;
62 umode_t mnt_file_mode; 62 umode_t mnt_file_mode;
63 umode_t mnt_dir_mode; 63 umode_t mnt_dir_mode;
64 unsigned int mnt_cifs_flags; 64 unsigned int mnt_cifs_flags;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 086f381d6489..10e774761299 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -149,10 +149,12 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
149 goto out; 149 goto out;
150 150
151 dp = description + strlen(description); 151 dp = description + strlen(description);
152 sprintf(dp, ";uid=0x%x", sesInfo->linux_uid); 152 sprintf(dp, ";uid=0x%x",
153 from_kuid_munged(&init_user_ns, sesInfo->linux_uid));
153 154
154 dp = description + strlen(description); 155 dp = description + strlen(description);
155 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); 156 sprintf(dp, ";creduid=0x%x",
157 from_kuid_munged(&init_user_ns, sesInfo->cred_uid));
156 158
157 if (sesInfo->user_name) { 159 if (sesInfo->user_name) {
158 dp = description + strlen(description); 160 dp = description + strlen(description);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 5cbd00e74067..f1e3f25fe004 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -266,8 +266,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
266 struct key *sidkey; 266 struct key *sidkey;
267 char *sidstr; 267 char *sidstr;
268 const struct cred *saved_cred; 268 const struct cred *saved_cred;
269 uid_t fuid = cifs_sb->mnt_uid; 269 kuid_t fuid = cifs_sb->mnt_uid;
270 gid_t fgid = cifs_sb->mnt_gid; 270 kgid_t fgid = cifs_sb->mnt_gid;
271 271
272 /* 272 /*
273 * If we have too many subauthorities, then something is really wrong. 273 * If we have too many subauthorities, then something is really wrong.
@@ -297,6 +297,7 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
297 * probably a safe assumption but might be better to check based on 297 * probably a safe assumption but might be better to check based on
298 * sidtype. 298 * sidtype.
299 */ 299 */
300 BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
300 if (sidkey->datalen != sizeof(uid_t)) { 301 if (sidkey->datalen != sizeof(uid_t)) {
301 rc = -EIO; 302 rc = -EIO;
302 cFYI(1, "%s: Downcall contained malformed key " 303 cFYI(1, "%s: Downcall contained malformed key "
@@ -305,10 +306,21 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
305 goto out_key_put; 306 goto out_key_put;
306 } 307 }
307 308
308 if (sidtype == SIDOWNER) 309 if (sidtype == SIDOWNER) {
309 memcpy(&fuid, &sidkey->payload.value, sizeof(uid_t)); 310 kuid_t uid;
310 else 311 uid_t id;
311 memcpy(&fgid, &sidkey->payload.value, sizeof(gid_t)); 312 memcpy(&id, &sidkey->payload.value, sizeof(uid_t));
313 uid = make_kuid(&init_user_ns, id);
314 if (uid_valid(uid))
315 fuid = uid;
316 } else {
317 kgid_t gid;
318 gid_t id;
319 memcpy(&id, &sidkey->payload.value, sizeof(gid_t));
320 gid = make_kgid(&init_user_ns, id);
321 if (gid_valid(gid))
322 fgid = gid;
323 }
312 324
313out_key_put: 325out_key_put:
314 key_put(sidkey); 326 key_put(sidkey);
@@ -346,7 +358,8 @@ init_cifs_idmap(void)
346 if (!cred) 358 if (!cred)
347 return -ENOMEM; 359 return -ENOMEM;
348 360
349 keyring = keyring_alloc(".cifs_idmap", 0, 0, cred, 361 keyring = keyring_alloc(".cifs_idmap",
362 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
350 (KEY_POS_ALL & ~KEY_POS_SETATTR) | 363 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
351 KEY_USR_VIEW | KEY_USR_READ, 364 KEY_USR_VIEW | KEY_USR_READ,
352 KEY_ALLOC_NOT_IN_QUOTA, NULL); 365 KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -774,7 +787,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
774 787
775/* Convert permission bits from mode to equivalent CIFS ACL */ 788/* Convert permission bits from mode to equivalent CIFS ACL */
776static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, 789static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
777 __u32 secdesclen, __u64 nmode, uid_t uid, gid_t gid, int *aclflag) 790 __u32 secdesclen, __u64 nmode, kuid_t uid, kgid_t gid, int *aclflag)
778{ 791{
779 int rc = 0; 792 int rc = 0;
780 __u32 dacloffset; 793 __u32 dacloffset;
@@ -806,17 +819,19 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
806 *aclflag = CIFS_ACL_DACL; 819 *aclflag = CIFS_ACL_DACL;
807 } else { 820 } else {
808 memcpy(pnntsd, pntsd, secdesclen); 821 memcpy(pnntsd, pntsd, secdesclen);
809 if (uid != NO_CHANGE_32) { /* chown */ 822 if (uid_valid(uid)) { /* chown */
823 uid_t id;
810 owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + 824 owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
811 le32_to_cpu(pnntsd->osidoffset)); 825 le32_to_cpu(pnntsd->osidoffset));
812 nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid), 826 nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
813 GFP_KERNEL); 827 GFP_KERNEL);
814 if (!nowner_sid_ptr) 828 if (!nowner_sid_ptr)
815 return -ENOMEM; 829 return -ENOMEM;
816 rc = id_to_sid(uid, SIDOWNER, nowner_sid_ptr); 830 id = from_kuid(&init_user_ns, uid);
831 rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
817 if (rc) { 832 if (rc) {
818 cFYI(1, "%s: Mapping error %d for owner id %d", 833 cFYI(1, "%s: Mapping error %d for owner id %d",
819 __func__, rc, uid); 834 __func__, rc, id);
820 kfree(nowner_sid_ptr); 835 kfree(nowner_sid_ptr);
821 return rc; 836 return rc;
822 } 837 }
@@ -824,17 +839,19 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
824 kfree(nowner_sid_ptr); 839 kfree(nowner_sid_ptr);
825 *aclflag = CIFS_ACL_OWNER; 840 *aclflag = CIFS_ACL_OWNER;
826 } 841 }
827 if (gid != NO_CHANGE_32) { /* chgrp */ 842 if (gid_valid(gid)) { /* chgrp */
843 gid_t id;
828 group_sid_ptr = (struct cifs_sid *)((char *)pnntsd + 844 group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
829 le32_to_cpu(pnntsd->gsidoffset)); 845 le32_to_cpu(pnntsd->gsidoffset));
830 ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid), 846 ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
831 GFP_KERNEL); 847 GFP_KERNEL);
832 if (!ngroup_sid_ptr) 848 if (!ngroup_sid_ptr)
833 return -ENOMEM; 849 return -ENOMEM;
834 rc = id_to_sid(gid, SIDGROUP, ngroup_sid_ptr); 850 id = from_kgid(&init_user_ns, gid);
851 rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
835 if (rc) { 852 if (rc) {
836 cFYI(1, "%s: Mapping error %d for group id %d", 853 cFYI(1, "%s: Mapping error %d for group id %d",
837 __func__, rc, gid); 854 __func__, rc, id);
838 kfree(ngroup_sid_ptr); 855 kfree(ngroup_sid_ptr);
839 return rc; 856 return rc;
840 } 857 }
@@ -1002,7 +1019,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
1002/* Convert mode bits to an ACL so we can update the ACL on the server */ 1019/* Convert mode bits to an ACL so we can update the ACL on the server */
1003int 1020int
1004id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, 1021id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
1005 uid_t uid, gid_t gid) 1022 kuid_t uid, kgid_t gid)
1006{ 1023{
1007 int rc = 0; 1024 int rc = 0;
1008 int aclflag = CIFS_ACL_DACL; /* default flag to set */ 1025 int aclflag = CIFS_ACL_DACL; /* default flag to set */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f653835d067b..4bad7b16271f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -228,7 +228,6 @@ cifs_alloc_inode(struct super_block *sb)
228 cifs_set_oplock_level(cifs_inode, 0); 228 cifs_set_oplock_level(cifs_inode, 0);
229 cifs_inode->delete_pending = false; 229 cifs_inode->delete_pending = false;
230 cifs_inode->invalid_mapping = false; 230 cifs_inode->invalid_mapping = false;
231 cifs_inode->leave_pages_clean = false;
232 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 231 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
233 cifs_inode->server_eof = 0; 232 cifs_inode->server_eof = 0;
234 cifs_inode->uniqueid = 0; 233 cifs_inode->uniqueid = 0;
@@ -376,13 +375,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
376 (int)(srcaddr->sa_family)); 375 (int)(srcaddr->sa_family));
377 } 376 }
378 377
379 seq_printf(s, ",uid=%u", cifs_sb->mnt_uid); 378 seq_printf(s, ",uid=%u",
379 from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid));
380 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 380 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
381 seq_printf(s, ",forceuid"); 381 seq_printf(s, ",forceuid");
382 else 382 else
383 seq_printf(s, ",noforceuid"); 383 seq_printf(s, ",noforceuid");
384 384
385 seq_printf(s, ",gid=%u", cifs_sb->mnt_gid); 385 seq_printf(s, ",gid=%u",
386 from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid));
386 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 387 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
387 seq_printf(s, ",forcegid"); 388 seq_printf(s, ",forcegid");
388 else 389 else
@@ -437,9 +438,13 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
437 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) 438 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
438 seq_printf(s, ",noperm"); 439 seq_printf(s, ",noperm");
439 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) 440 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
440 seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid); 441 seq_printf(s, ",backupuid=%u",
442 from_kuid_munged(&init_user_ns,
443 cifs_sb->mnt_backupuid));
441 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) 444 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
442 seq_printf(s, ",backupgid=%u", cifs_sb->mnt_backupgid); 445 seq_printf(s, ",backupgid=%u",
446 from_kgid_munged(&init_user_ns,
447 cifs_sb->mnt_backupgid));
443 448
444 seq_printf(s, ",rsize=%u", cifs_sb->rsize); 449 seq_printf(s, ",rsize=%u", cifs_sb->rsize);
445 seq_printf(s, ",wsize=%u", cifs_sb->wsize); 450 seq_printf(s, ",wsize=%u", cifs_sb->wsize);
@@ -678,7 +683,7 @@ out_nls:
678static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 683static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
679 unsigned long nr_segs, loff_t pos) 684 unsigned long nr_segs, loff_t pos)
680{ 685{
681 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 686 struct inode *inode = file_inode(iocb->ki_filp);
682 ssize_t written; 687 ssize_t written;
683 int rc; 688 int rc;
684 689
@@ -702,7 +707,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
702 */ 707 */
703 if (whence != SEEK_SET && whence != SEEK_CUR) { 708 if (whence != SEEK_SET && whence != SEEK_CUR) {
704 int rc; 709 int rc;
705 struct inode *inode = file->f_path.dentry->d_inode; 710 struct inode *inode = file_inode(file);
706 711
707 /* 712 /*
708 * We need to be sure that all dirty pages are written and the 713 * We need to be sure that all dirty pages are written and the
@@ -734,7 +739,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
734{ 739{
735 /* note that this is called by vfs setlease with lock_flocks held 740 /* note that this is called by vfs setlease with lock_flocks held
736 to protect *lease from going away */ 741 to protect *lease from going away */
737 struct inode *inode = file->f_path.dentry->d_inode; 742 struct inode *inode = file_inode(file);
738 struct cifsFileInfo *cfile = file->private_data; 743 struct cifsFileInfo *cfile = file->private_data;
739 744
740 if (!(S_ISREG(inode->i_mode))) 745 if (!(S_ISREG(inode->i_mode)))
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index aea1eec64911..4f07f6fbe494 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -386,6 +386,7 @@ struct smb_version_values {
386 unsigned int cap_unix; 386 unsigned int cap_unix;
387 unsigned int cap_nt_find; 387 unsigned int cap_nt_find;
388 unsigned int cap_large_files; 388 unsigned int cap_large_files;
389 unsigned int oplock_read;
389}; 390};
390 391
391#define HEADER_SIZE(server) (server->vals->header_size) 392#define HEADER_SIZE(server) (server->vals->header_size)
@@ -399,11 +400,11 @@ struct smb_vol {
399 char *iocharset; /* local code page for mapping to and from Unicode */ 400 char *iocharset; /* local code page for mapping to and from Unicode */
400 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */ 401 char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
401 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */ 402 char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
402 uid_t cred_uid; 403 kuid_t cred_uid;
403 uid_t linux_uid; 404 kuid_t linux_uid;
404 gid_t linux_gid; 405 kgid_t linux_gid;
405 uid_t backupuid; 406 kuid_t backupuid;
406 gid_t backupgid; 407 kgid_t backupgid;
407 umode_t file_mode; 408 umode_t file_mode;
408 umode_t dir_mode; 409 umode_t dir_mode;
409 unsigned secFlg; 410 unsigned secFlg;
@@ -702,8 +703,8 @@ struct cifs_ses {
702 char *serverNOS; /* name of network operating system of server */ 703 char *serverNOS; /* name of network operating system of server */
703 char *serverDomain; /* security realm of server */ 704 char *serverDomain; /* security realm of server */
704 __u64 Suid; /* remote smb uid */ 705 __u64 Suid; /* remote smb uid */
705 uid_t linux_uid; /* overriding owner of files on the mount */ 706 kuid_t linux_uid; /* overriding owner of files on the mount */
706 uid_t cred_uid; /* owner of credentials */ 707 kuid_t cred_uid; /* owner of credentials */
707 unsigned int capabilities; 708 unsigned int capabilities;
708 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 709 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
709 TCP names - will ipv6 and sctp addresses fit? */ 710 TCP names - will ipv6 and sctp addresses fit? */
@@ -837,7 +838,7 @@ struct cifs_tcon {
837 */ 838 */
838struct tcon_link { 839struct tcon_link {
839 struct rb_node tl_rbnode; 840 struct rb_node tl_rbnode;
840 uid_t tl_uid; 841 kuid_t tl_uid;
841 unsigned long tl_flags; 842 unsigned long tl_flags;
842#define TCON_LINK_MASTER 0 843#define TCON_LINK_MASTER 0
843#define TCON_LINK_PENDING 1 844#define TCON_LINK_PENDING 1
@@ -930,7 +931,7 @@ struct cifsFileInfo {
930 struct list_head tlist; /* pointer to next fid owned by tcon */ 931 struct list_head tlist; /* pointer to next fid owned by tcon */
931 struct list_head flist; /* next fid (file instance) for this inode */ 932 struct list_head flist; /* next fid (file instance) for this inode */
932 struct cifs_fid_locks *llist; /* brlocks held by this fid */ 933 struct cifs_fid_locks *llist; /* brlocks held by this fid */
933 unsigned int uid; /* allows finding which FileInfo structure */ 934 kuid_t uid; /* allows finding which FileInfo structure */
934 __u32 pid; /* process id who opened file */ 935 __u32 pid; /* process id who opened file */
935 struct cifs_fid fid; /* file id from remote */ 936 struct cifs_fid fid; /* file id from remote */
936 /* BB add lock scope info here if needed */ ; 937 /* BB add lock scope info here if needed */ ;
@@ -1030,7 +1031,6 @@ struct cifsInodeInfo {
1030 bool clientCanCacheAll; /* read and writebehind oplock */ 1031 bool clientCanCacheAll; /* read and writebehind oplock */
1031 bool delete_pending; /* DELETE_ON_CLOSE is set */ 1032 bool delete_pending; /* DELETE_ON_CLOSE is set */
1032 bool invalid_mapping; /* pagecache is invalid */ 1033 bool invalid_mapping; /* pagecache is invalid */
1033 bool leave_pages_clean; /* protected by i_mutex, not set pages dirty */
1034 unsigned long time; /* jiffies of last update of inode */ 1034 unsigned long time; /* jiffies of last update of inode */
1035 u64 server_eof; /* current file size on server -- protected by i_lock */ 1035 u64 server_eof; /* current file size on server -- protected by i_lock */
1036 u64 uniqueid; /* server inode number */ 1036 u64 uniqueid; /* server inode number */
@@ -1245,8 +1245,8 @@ struct cifs_fattr {
1245 u64 cf_eof; 1245 u64 cf_eof;
1246 u64 cf_bytes; 1246 u64 cf_bytes;
1247 u64 cf_createtime; 1247 u64 cf_createtime;
1248 uid_t cf_uid; 1248 kuid_t cf_uid;
1249 gid_t cf_gid; 1249 kgid_t cf_gid;
1250 umode_t cf_mode; 1250 umode_t cf_mode;
1251 dev_t cf_rdev; 1251 dev_t cf_rdev;
1252 unsigned int cf_nlink; 1252 unsigned int cf_nlink;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b9d59a948a2c..e996ff6b26d1 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -277,7 +277,6 @@
277#define CIFS_NO_HANDLE 0xFFFF 277#define CIFS_NO_HANDLE 0xFFFF
278 278
279#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL 279#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
280#define NO_CHANGE_32 0xFFFFFFFFUL
281 280
282/* IPC$ in ASCII */ 281/* IPC$ in ASCII */
283#define CIFS_IPC_RESOURCE "\x49\x50\x43\x24" 282#define CIFS_IPC_RESOURCE "\x49\x50\x43\x24"
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1988c1baa224..f450f0683ddd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -46,7 +46,8 @@ extern void _free_xid(unsigned int);
46({ \ 46({ \
47 unsigned int __xid = _get_xid(); \ 47 unsigned int __xid = _get_xid(); \
48 cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d", \ 48 cFYI(1, "CIFS VFS: in %s as Xid: %u with uid: %d", \
49 __func__, __xid, current_fsuid()); \ 49 __func__, __xid, \
50 from_kuid(&init_user_ns, current_fsuid())); \
50 __xid; \ 51 __xid; \
51}) 52})
52 53
@@ -161,7 +162,7 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
161 struct cifs_fattr *fattr, struct inode *inode, 162 struct cifs_fattr *fattr, struct inode *inode,
162 const char *path, const __u16 *pfid); 163 const char *path, const __u16 *pfid);
163extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64, 164extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64,
164 uid_t, gid_t); 165 kuid_t, kgid_t);
165extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 166extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
166 const char *, u32 *); 167 const char *, u32 *);
167extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, 168extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
@@ -304,8 +305,8 @@ struct cifs_unix_set_info_args {
304 __u64 atime; 305 __u64 atime;
305 __u64 mtime; 306 __u64 mtime;
306 __u64 mode; 307 __u64 mode;
307 __u64 uid; 308 kuid_t uid;
308 __u64 gid; 309 kgid_t gid;
309 dev_t device; 310 dev_t device;
310}; 311};
311 312
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76d0d2998850..00e12f2d626b 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -5819,8 +5819,14 @@ static void
5819cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset, 5819cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5820 const struct cifs_unix_set_info_args *args) 5820 const struct cifs_unix_set_info_args *args)
5821{ 5821{
5822 u64 uid = NO_CHANGE_64, gid = NO_CHANGE_64;
5822 u64 mode = args->mode; 5823 u64 mode = args->mode;
5823 5824
5825 if (uid_valid(args->uid))
5826 uid = from_kuid(&init_user_ns, args->uid);
5827 if (gid_valid(args->gid))
5828 gid = from_kgid(&init_user_ns, args->gid);
5829
5824 /* 5830 /*
5825 * Samba server ignores set of file size to zero due to bugs in some 5831 * Samba server ignores set of file size to zero due to bugs in some
5826 * older clients, but we should be precise - we use SetFileSize to 5832 * older clients, but we should be precise - we use SetFileSize to
@@ -5833,8 +5839,8 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5833 data_offset->LastStatusChange = cpu_to_le64(args->ctime); 5839 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5834 data_offset->LastAccessTime = cpu_to_le64(args->atime); 5840 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5835 data_offset->LastModificationTime = cpu_to_le64(args->mtime); 5841 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5836 data_offset->Uid = cpu_to_le64(args->uid); 5842 data_offset->Uid = cpu_to_le64(uid);
5837 data_offset->Gid = cpu_to_le64(args->gid); 5843 data_offset->Gid = cpu_to_le64(gid);
5838 /* better to leave device as zero when it is */ 5844 /* better to leave device as zero when it is */
5839 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); 5845 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5840 data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); 5846 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 17c3643e5950..4474a57f30ab 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -987,6 +987,41 @@ static int get_option_ul(substring_t args[], unsigned long *option)
987 return rc; 987 return rc;
988} 988}
989 989
990static int get_option_uid(substring_t args[], kuid_t *result)
991{
992 unsigned long value;
993 kuid_t uid;
994 int rc;
995
996 rc = get_option_ul(args, &value);
997 if (rc)
998 return rc;
999
1000 uid = make_kuid(current_user_ns(), value);
1001 if (!uid_valid(uid))
1002 return -EINVAL;
1003
1004 *result = uid;
1005 return 0;
1006}
1007
1008static int get_option_gid(substring_t args[], kgid_t *result)
1009{
1010 unsigned long value;
1011 kgid_t gid;
1012 int rc;
1013
1014 rc = get_option_ul(args, &value);
1015 if (rc)
1016 return rc;
1017
1018 gid = make_kgid(current_user_ns(), value);
1019 if (!gid_valid(gid))
1020 return -EINVAL;
1021
1022 *result = gid;
1023 return 0;
1024}
990 1025
991static int cifs_parse_security_flavors(char *value, 1026static int cifs_parse_security_flavors(char *value,
992 struct smb_vol *vol) 1027 struct smb_vol *vol)
@@ -1424,47 +1459,42 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1424 1459
1425 /* Numeric Values */ 1460 /* Numeric Values */
1426 case Opt_backupuid: 1461 case Opt_backupuid:
1427 if (get_option_ul(args, &option)) { 1462 if (get_option_uid(args, &vol->backupuid)) {
1428 cERROR(1, "%s: Invalid backupuid value", 1463 cERROR(1, "%s: Invalid backupuid value",
1429 __func__); 1464 __func__);
1430 goto cifs_parse_mount_err; 1465 goto cifs_parse_mount_err;
1431 } 1466 }
1432 vol->backupuid = option;
1433 vol->backupuid_specified = true; 1467 vol->backupuid_specified = true;
1434 break; 1468 break;
1435 case Opt_backupgid: 1469 case Opt_backupgid:
1436 if (get_option_ul(args, &option)) { 1470 if (get_option_gid(args, &vol->backupgid)) {
1437 cERROR(1, "%s: Invalid backupgid value", 1471 cERROR(1, "%s: Invalid backupgid value",
1438 __func__); 1472 __func__);
1439 goto cifs_parse_mount_err; 1473 goto cifs_parse_mount_err;
1440 } 1474 }
1441 vol->backupgid = option;
1442 vol->backupgid_specified = true; 1475 vol->backupgid_specified = true;
1443 break; 1476 break;
1444 case Opt_uid: 1477 case Opt_uid:
1445 if (get_option_ul(args, &option)) { 1478 if (get_option_uid(args, &vol->linux_uid)) {
1446 cERROR(1, "%s: Invalid uid value", 1479 cERROR(1, "%s: Invalid uid value",
1447 __func__); 1480 __func__);
1448 goto cifs_parse_mount_err; 1481 goto cifs_parse_mount_err;
1449 } 1482 }
1450 vol->linux_uid = option;
1451 uid_specified = true; 1483 uid_specified = true;
1452 break; 1484 break;
1453 case Opt_cruid: 1485 case Opt_cruid:
1454 if (get_option_ul(args, &option)) { 1486 if (get_option_uid(args, &vol->cred_uid)) {
1455 cERROR(1, "%s: Invalid cruid value", 1487 cERROR(1, "%s: Invalid cruid value",
1456 __func__); 1488 __func__);
1457 goto cifs_parse_mount_err; 1489 goto cifs_parse_mount_err;
1458 } 1490 }
1459 vol->cred_uid = option;
1460 break; 1491 break;
1461 case Opt_gid: 1492 case Opt_gid:
1462 if (get_option_ul(args, &option)) { 1493 if (get_option_gid(args, &vol->linux_gid)) {
1463 cERROR(1, "%s: Invalid gid value", 1494 cERROR(1, "%s: Invalid gid value",
1464 __func__); 1495 __func__);
1465 goto cifs_parse_mount_err; 1496 goto cifs_parse_mount_err;
1466 } 1497 }
1467 vol->linux_gid = option;
1468 gid_specified = true; 1498 gid_specified = true;
1469 break; 1499 break;
1470 case Opt_file_mode: 1500 case Opt_file_mode:
@@ -1917,7 +1947,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1917 } 1947 }
1918 case AF_INET6: { 1948 case AF_INET6: {
1919 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; 1949 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
1920 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs; 1950 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
1921 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr); 1951 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
1922 } 1952 }
1923 default: 1953 default:
@@ -2241,7 +2271,7 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
2241{ 2271{
2242 switch (ses->server->secType) { 2272 switch (ses->server->secType) {
2243 case Kerberos: 2273 case Kerberos:
2244 if (vol->cred_uid != ses->cred_uid) 2274 if (!uid_eq(vol->cred_uid, ses->cred_uid))
2245 return 0; 2275 return 0;
2246 break; 2276 break;
2247 default: 2277 default:
@@ -2713,7 +2743,7 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
2713 if (new->rsize && new->rsize < old->rsize) 2743 if (new->rsize && new->rsize < old->rsize)
2714 return 0; 2744 return 0;
2715 2745
2716 if (old->mnt_uid != new->mnt_uid || old->mnt_gid != new->mnt_gid) 2746 if (!uid_eq(old->mnt_uid, new->mnt_uid) || !gid_eq(old->mnt_gid, new->mnt_gid))
2717 return 0; 2747 return 0;
2718 2748
2719 if (old->mnt_file_mode != new->mnt_file_mode || 2749 if (old->mnt_file_mode != new->mnt_file_mode ||
@@ -3919,7 +3949,7 @@ cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses)
3919} 3949}
3920 3950
3921static struct cifs_tcon * 3951static struct cifs_tcon *
3922cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) 3952cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
3923{ 3953{
3924 int rc; 3954 int rc;
3925 struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); 3955 struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
@@ -3989,7 +4019,7 @@ cifs_sb_tcon_pending_wait(void *unused)
3989 4019
3990/* find and return a tlink with given uid */ 4020/* find and return a tlink with given uid */
3991static struct tcon_link * 4021static struct tcon_link *
3992tlink_rb_search(struct rb_root *root, uid_t uid) 4022tlink_rb_search(struct rb_root *root, kuid_t uid)
3993{ 4023{
3994 struct rb_node *node = root->rb_node; 4024 struct rb_node *node = root->rb_node;
3995 struct tcon_link *tlink; 4025 struct tcon_link *tlink;
@@ -3997,9 +4027,9 @@ tlink_rb_search(struct rb_root *root, uid_t uid)
3997 while (node) { 4027 while (node) {
3998 tlink = rb_entry(node, struct tcon_link, tl_rbnode); 4028 tlink = rb_entry(node, struct tcon_link, tl_rbnode);
3999 4029
4000 if (tlink->tl_uid > uid) 4030 if (uid_gt(tlink->tl_uid, uid))
4001 node = node->rb_left; 4031 node = node->rb_left;
4002 else if (tlink->tl_uid < uid) 4032 else if (uid_lt(tlink->tl_uid, uid))
4003 node = node->rb_right; 4033 node = node->rb_right;
4004 else 4034 else
4005 return tlink; 4035 return tlink;
@@ -4018,7 +4048,7 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
4018 tlink = rb_entry(*new, struct tcon_link, tl_rbnode); 4048 tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
4019 parent = *new; 4049 parent = *new;
4020 4050
4021 if (tlink->tl_uid > new_tlink->tl_uid) 4051 if (uid_gt(tlink->tl_uid, new_tlink->tl_uid))
4022 new = &((*new)->rb_left); 4052 new = &((*new)->rb_left);
4023 else 4053 else
4024 new = &((*new)->rb_right); 4054 new = &((*new)->rb_right);
@@ -4048,7 +4078,7 @@ struct tcon_link *
4048cifs_sb_tlink(struct cifs_sb_info *cifs_sb) 4078cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
4049{ 4079{
4050 int ret; 4080 int ret;
4051 uid_t fsuid = current_fsuid(); 4081 kuid_t fsuid = current_fsuid();
4052 struct tcon_link *tlink, *newtlink; 4082 struct tcon_link *tlink, *newtlink;
4053 4083
4054 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 4084 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 8719bbe0dcc3..1cd016217448 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -342,14 +342,14 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
342 342
343 *created |= FILE_CREATED; 343 *created |= FILE_CREATED;
344 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 344 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
345 args.uid = (__u64) current_fsuid(); 345 args.uid = current_fsuid();
346 if (inode->i_mode & S_ISGID) 346 if (inode->i_mode & S_ISGID)
347 args.gid = (__u64) inode->i_gid; 347 args.gid = inode->i_gid;
348 else 348 else
349 args.gid = (__u64) current_fsgid(); 349 args.gid = current_fsgid();
350 } else { 350 } else {
351 args.uid = NO_CHANGE_64; 351 args.uid = INVALID_UID; /* no change */
352 args.gid = NO_CHANGE_64; 352 args.gid = INVALID_GID; /* no change */
353 } 353 }
354 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid, 354 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid,
355 current->tgid); 355 current->tgid);
@@ -588,11 +588,11 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
588 .device = device_number, 588 .device = device_number,
589 }; 589 };
590 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 590 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
591 args.uid = (__u64) current_fsuid(); 591 args.uid = current_fsuid();
592 args.gid = (__u64) current_fsgid(); 592 args.gid = current_fsgid();
593 } else { 593 } else {
594 args.uid = NO_CHANGE_64; 594 args.uid = INVALID_UID; /* no change */
595 args.gid = NO_CHANGE_64; 595 args.gid = INVALID_GID; /* no change */
596 } 596 }
597 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args, 597 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
598 cifs_sb->local_nls, 598 cifs_sb->local_nls,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0a6677ba212b..c16d2a018ab8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -238,6 +238,23 @@ out:
238 return rc; 238 return rc;
239} 239}
240 240
241static bool
242cifs_has_mand_locks(struct cifsInodeInfo *cinode)
243{
244 struct cifs_fid_locks *cur;
245 bool has_locks = false;
246
247 down_read(&cinode->lock_sem);
248 list_for_each_entry(cur, &cinode->llist, llist) {
249 if (!list_empty(&cur->locks)) {
250 has_locks = true;
251 break;
252 }
253 }
254 up_read(&cinode->lock_sem);
255 return has_locks;
256}
257
241struct cifsFileInfo * 258struct cifsFileInfo *
242cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, 259cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243 struct tcon_link *tlink, __u32 oplock) 260 struct tcon_link *tlink, __u32 oplock)
@@ -248,6 +265,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
248 struct cifsFileInfo *cfile; 265 struct cifsFileInfo *cfile;
249 struct cifs_fid_locks *fdlocks; 266 struct cifs_fid_locks *fdlocks;
250 struct cifs_tcon *tcon = tlink_tcon(tlink); 267 struct cifs_tcon *tcon = tlink_tcon(tlink);
268 struct TCP_Server_Info *server = tcon->ses->server;
251 269
252 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 270 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253 if (cfile == NULL) 271 if (cfile == NULL)
@@ -276,12 +294,22 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
276 INIT_WORK(&cfile->oplock_break, cifs_oplock_break); 294 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277 mutex_init(&cfile->fh_mutex); 295 mutex_init(&cfile->fh_mutex);
278 296
297 /*
298 * If the server returned a read oplock and we have mandatory brlocks,
299 * set oplock level to None.
300 */
301 if (oplock == server->vals->oplock_read &&
302 cifs_has_mand_locks(cinode)) {
303 cFYI(1, "Reset oplock val from read to None due to mand locks");
304 oplock = 0;
305 }
306
279 spin_lock(&cifs_file_list_lock); 307 spin_lock(&cifs_file_list_lock);
280 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE) 308 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
281 oplock = fid->pending_open->oplock; 309 oplock = fid->pending_open->oplock;
282 list_del(&fid->pending_open->olist); 310 list_del(&fid->pending_open->olist);
283 311
284 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock); 312 server->ops->set_fid(cfile, fid, oplock);
285 313
286 list_add(&cfile->tlist, &tcon->openFileList); 314 list_add(&cfile->tlist, &tcon->openFileList);
287 /* if readable file instance put first in list*/ 315 /* if readable file instance put first in list*/
@@ -487,8 +515,8 @@ int cifs_open(struct inode *inode, struct file *file)
487 */ 515 */
488 struct cifs_unix_set_info_args args = { 516 struct cifs_unix_set_info_args args = {
489 .mode = inode->i_mode, 517 .mode = inode->i_mode,
490 .uid = NO_CHANGE_64, 518 .uid = INVALID_UID, /* no change */
491 .gid = NO_CHANGE_64, 519 .gid = INVALID_GID, /* no change */
492 .ctime = NO_CHANGE_64, 520 .ctime = NO_CHANGE_64,
493 .atime = NO_CHANGE_64, 521 .atime = NO_CHANGE_64,
494 .mtime = NO_CHANGE_64, 522 .mtime = NO_CHANGE_64,
@@ -919,7 +947,7 @@ static int
919cifs_posix_lock_test(struct file *file, struct file_lock *flock) 947cifs_posix_lock_test(struct file *file, struct file_lock *flock)
920{ 948{
921 int rc = 0; 949 int rc = 0;
922 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); 950 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
923 unsigned char saved_type = flock->fl_type; 951 unsigned char saved_type = flock->fl_type;
924 952
925 if ((flock->fl_flags & FL_POSIX) == 0) 953 if ((flock->fl_flags & FL_POSIX) == 0)
@@ -946,7 +974,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
946static int 974static int
947cifs_posix_lock_set(struct file *file, struct file_lock *flock) 975cifs_posix_lock_set(struct file *file, struct file_lock *flock)
948{ 976{
949 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); 977 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
950 int rc = 1; 978 int rc = 1;
951 979
952 if ((flock->fl_flags & FL_POSIX) == 0) 980 if ((flock->fl_flags & FL_POSIX) == 0)
@@ -1422,6 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1422 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1450 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1423 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1451 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1424 struct TCP_Server_Info *server = tcon->ses->server; 1452 struct TCP_Server_Info *server = tcon->ses->server;
1453 struct inode *inode = cfile->dentry->d_inode;
1425 1454
1426 if (posix_lck) { 1455 if (posix_lck) {
1427 int posix_lock_type; 1456 int posix_lock_type;
@@ -1459,6 +1488,21 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1459 if (!rc) 1488 if (!rc)
1460 goto out; 1489 goto out;
1461 1490
1491 /*
1492 * Windows 7 server can delay breaking lease from read to None
1493 * if we set a byte-range lock on a file - break it explicitly
1494 * before sending the lock to the server to be sure the next
1495 * read won't conflict with non-overlapted locks due to
1496 * pagereading.
1497 */
1498 if (!CIFS_I(inode)->clientCanCacheAll &&
1499 CIFS_I(inode)->clientCanCacheRead) {
1500 cifs_invalidate_mapping(inode);
1501 cFYI(1, "Set no oplock for inode=%p due to mand locks",
1502 inode);
1503 CIFS_I(inode)->clientCanCacheRead = false;
1504 }
1505
1462 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, 1506 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1463 type, 1, 0, wait_flag); 1507 type, 1, 0, wait_flag);
1464 if (rc) { 1508 if (rc) {
@@ -1504,7 +1548,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1504 1548
1505 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1549 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1506 netfid = cfile->fid.netfid; 1550 netfid = cfile->fid.netfid;
1507 cinode = CIFS_I(file->f_path.dentry->d_inode); 1551 cinode = CIFS_I(file_inode(file));
1508 1552
1509 if (cap_unix(tcon->ses) && 1553 if (cap_unix(tcon->ses) &&
1510 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && 1554 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
@@ -1649,7 +1693,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1649 are always at the end of the list but since the first entry might 1693 are always at the end of the list but since the first entry might
1650 have a close pending, we go through the whole list */ 1694 have a close pending, we go through the whole list */
1651 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1695 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1652 if (fsuid_only && open_file->uid != current_fsuid()) 1696 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1653 continue; 1697 continue;
1654 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { 1698 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1655 if (!open_file->invalidHandle) { 1699 if (!open_file->invalidHandle) {
@@ -1702,7 +1746,7 @@ refind_writable:
1702 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { 1746 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1703 if (!any_available && open_file->pid != current->tgid) 1747 if (!any_available && open_file->pid != current->tgid)
1704 continue; 1748 continue;
1705 if (fsuid_only && open_file->uid != current_fsuid()) 1749 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1706 continue; 1750 continue;
1707 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { 1751 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1708 if (!open_file->invalidHandle) { 1752 if (!open_file->invalidHandle) {
@@ -2103,15 +2147,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
2103 } else { 2147 } else {
2104 rc = copied; 2148 rc = copied;
2105 pos += copied; 2149 pos += copied;
2106 /* 2150 set_page_dirty(page);
2107 * When we use strict cache mode and cifs_strict_writev was run
2108 * with level II oplock (indicated by leave_pages_clean field of
2109 * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev
2110 * sent the data to the server itself.
2111 */
2112 if (!CIFS_I(inode)->leave_pages_clean ||
2113 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO))
2114 set_page_dirty(page);
2115 } 2151 }
2116 2152
2117 if (rc > 0) { 2153 if (rc > 0) {
@@ -2135,7 +2171,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2135 struct cifs_tcon *tcon; 2171 struct cifs_tcon *tcon;
2136 struct TCP_Server_Info *server; 2172 struct TCP_Server_Info *server;
2137 struct cifsFileInfo *smbfile = file->private_data; 2173 struct cifsFileInfo *smbfile = file->private_data;
2138 struct inode *inode = file->f_path.dentry->d_inode; 2174 struct inode *inode = file_inode(file);
2139 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2175 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2140 2176
2141 rc = filemap_write_and_wait_range(inode->i_mapping, start, end); 2177 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2210,7 +2246,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2210 */ 2246 */
2211int cifs_flush(struct file *file, fl_owner_t id) 2247int cifs_flush(struct file *file, fl_owner_t id)
2212{ 2248{
2213 struct inode *inode = file->f_path.dentry->d_inode; 2249 struct inode *inode = file_inode(file);
2214 int rc = 0; 2250 int rc = 0;
2215 2251
2216 if (file->f_mode & FMODE_WRITE) 2252 if (file->f_mode & FMODE_WRITE)
@@ -2444,7 +2480,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2444 ssize_t written; 2480 ssize_t written;
2445 struct inode *inode; 2481 struct inode *inode;
2446 2482
2447 inode = iocb->ki_filp->f_path.dentry->d_inode; 2483 inode = file_inode(iocb->ki_filp);
2448 2484
2449 /* 2485 /*
2450 * BB - optimize the way when signing is disabled. We can drop this 2486 * BB - optimize the way when signing is disabled. We can drop this
@@ -2462,8 +2498,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2462} 2498}
2463 2499
2464static ssize_t 2500static ssize_t
2465cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov, 2501cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2466 unsigned long nr_segs, loff_t pos, bool cache_ex) 2502 unsigned long nr_segs, loff_t pos)
2467{ 2503{
2468 struct file *file = iocb->ki_filp; 2504 struct file *file = iocb->ki_filp;
2469 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 2505 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2485,12 +2521,8 @@ cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
2485 server->vals->exclusive_lock_type, NULL, 2521 server->vals->exclusive_lock_type, NULL,
2486 CIFS_WRITE_OP)) { 2522 CIFS_WRITE_OP)) {
2487 mutex_lock(&inode->i_mutex); 2523 mutex_lock(&inode->i_mutex);
2488 if (!cache_ex)
2489 cinode->leave_pages_clean = true;
2490 rc = __generic_file_aio_write(iocb, iov, nr_segs, 2524 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2491 &iocb->ki_pos); 2525 &iocb->ki_pos);
2492 if (!cache_ex)
2493 cinode->leave_pages_clean = false;
2494 mutex_unlock(&inode->i_mutex); 2526 mutex_unlock(&inode->i_mutex);
2495 } 2527 }
2496 2528
@@ -2511,66 +2543,38 @@ ssize_t
2511cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 2543cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2512 unsigned long nr_segs, loff_t pos) 2544 unsigned long nr_segs, loff_t pos)
2513{ 2545{
2514 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 2546 struct inode *inode = file_inode(iocb->ki_filp);
2515 struct cifsInodeInfo *cinode = CIFS_I(inode); 2547 struct cifsInodeInfo *cinode = CIFS_I(inode);
2516 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2548 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2517 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 2549 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2518 iocb->ki_filp->private_data; 2550 iocb->ki_filp->private_data;
2519 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2551 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2520 ssize_t written, written2; 2552 ssize_t written;
2521 /*
2522 * We need to store clientCanCacheAll here to prevent race
2523 * conditions - this value can be changed during an execution
2524 * of generic_file_aio_write. For CIFS it can be changed from
2525 * true to false only, but for SMB2 it can be changed both from
2526 * true to false and vice versa. So, we can end up with a data
2527 * stored in the cache, not marked dirty and not sent to the
2528 * server if this value changes its state from false to true
2529 * after cifs_write_end.
2530 */
2531 bool cache_ex = cinode->clientCanCacheAll;
2532 bool cache_read = cinode->clientCanCacheRead;
2533 int rc;
2534 loff_t saved_pos;
2535 2553
2536 if (cache_ex) { 2554 if (cinode->clientCanCacheAll) {
2537 if (cap_unix(tcon->ses) && 2555 if (cap_unix(tcon->ses) &&
2538 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && 2556 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2539 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( 2557 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2540 tcon->fsUnixInfo.Capability)))
2541 return generic_file_aio_write(iocb, iov, nr_segs, pos); 2558 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2542 return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex); 2559 return cifs_writev(iocb, iov, nr_segs, pos);
2543 } 2560 }
2544
2545 /* 2561 /*
2546 * For files without exclusive oplock in strict cache mode we need to 2562 * For non-oplocked files in strict cache mode we need to write the data
2547 * write the data to the server exactly from the pos to pos+len-1 rather 2563 * to the server exactly from the pos to pos+len-1 rather than flush all
2548 * than flush all affected pages because it may cause a error with 2564 * affected pages because it may cause a error with mandatory locks on
2549 * mandatory locks on these pages but not on the region from pos to 2565 * these pages but not on the region from pos to ppos+len-1.
2550 * ppos+len-1.
2551 */ 2566 */
2552 written = cifs_user_writev(iocb, iov, nr_segs, pos); 2567 written = cifs_user_writev(iocb, iov, nr_segs, pos);
2553 if (!cache_read || written <= 0) 2568 if (written > 0 && cinode->clientCanCacheRead) {
2554 return written; 2569 /*
2555 2570 * Windows 7 server can delay breaking level2 oplock if a write
2556 saved_pos = iocb->ki_pos; 2571 * request comes - break it on the client to prevent reading
2557 iocb->ki_pos = pos; 2572 * an old data.
2558 /* we have a read oplock - need to store a data in the page cache */ 2573 */
2559 if (cap_unix(tcon->ses) && 2574 cifs_invalidate_mapping(inode);
2560 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) && 2575 cFYI(1, "Set no oplock for inode=%p after a write operation",
2561 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu( 2576 inode);
2562 tcon->fsUnixInfo.Capability))) 2577 cinode->clientCanCacheRead = false;
2563 written2 = generic_file_aio_write(iocb, iov, nr_segs, pos);
2564 else
2565 written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos,
2566 cache_ex);
2567 /* errors occured during writing - invalidate the page cache */
2568 if (written2 < 0) {
2569 rc = cifs_invalidate_mapping(inode);
2570 if (rc)
2571 written = (ssize_t)rc;
2572 else
2573 iocb->ki_pos = saved_pos;
2574 } 2578 }
2575 return written; 2579 return written;
2576} 2580}
@@ -2911,7 +2915,7 @@ ssize_t
2911cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 2915cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2912 unsigned long nr_segs, loff_t pos) 2916 unsigned long nr_segs, loff_t pos)
2913{ 2917{
2914 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 2918 struct inode *inode = file_inode(iocb->ki_filp);
2915 struct cifsInodeInfo *cinode = CIFS_I(inode); 2919 struct cifsInodeInfo *cinode = CIFS_I(inode);
2916 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2920 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2917 struct cifsFileInfo *cfile = (struct cifsFileInfo *) 2921 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
@@ -3059,7 +3063,7 @@ static struct vm_operations_struct cifs_file_vm_ops = {
3059int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 3063int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3060{ 3064{
3061 int rc, xid; 3065 int rc, xid;
3062 struct inode *inode = file->f_path.dentry->d_inode; 3066 struct inode *inode = file_inode(file);
3063 3067
3064 xid = get_xid(); 3068 xid = get_xid();
3065 3069
@@ -3352,7 +3356,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
3352 int rc; 3356 int rc;
3353 3357
3354 /* Is the page cached? */ 3358 /* Is the page cached? */
3355 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page); 3359 rc = cifs_readpage_from_fscache(file_inode(file), page);
3356 if (rc == 0) 3360 if (rc == 0)
3357 goto read_complete; 3361 goto read_complete;
3358 3362
@@ -3367,8 +3371,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
3367 else 3371 else
3368 cFYI(1, "Bytes read %d", rc); 3372 cFYI(1, "Bytes read %d", rc);
3369 3373
3370 file->f_path.dentry->d_inode->i_atime = 3374 file_inode(file)->i_atime =
3371 current_fs_time(file->f_path.dentry->d_inode->i_sb); 3375 current_fs_time(file_inode(file)->i_sb);
3372 3376
3373 if (PAGE_CACHE_SIZE > rc) 3377 if (PAGE_CACHE_SIZE > rc)
3374 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc); 3378 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
@@ -3377,7 +3381,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
3377 SetPageUptodate(page); 3381 SetPageUptodate(page);
3378 3382
3379 /* send this page to the cache */ 3383 /* send this page to the cache */
3380 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page); 3384 cifs_readpage_to_fscache(file_inode(file), page);
3381 3385
3382 rc = 0; 3386 rc = 0;
3383 3387
@@ -3577,6 +3581,13 @@ void cifs_oplock_break(struct work_struct *work)
3577 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3581 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3578 int rc = 0; 3582 int rc = 0;
3579 3583
3584 if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3585 cifs_has_mand_locks(cinode)) {
3586 cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
3587 inode);
3588 cinode->clientCanCacheRead = false;
3589 }
3590
3580 if (inode && S_ISREG(inode->i_mode)) { 3591 if (inode && S_ISREG(inode->i_mode)) {
3581 if (cinode->clientCanCacheRead) 3592 if (cinode->clientCanCacheRead)
3582 break_lease(inode, O_RDONLY); 3593 break_lease(inode, O_RDONLY);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ed6208ff85a7..83f2606c76d0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -244,15 +244,25 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
244 break; 244 break;
245 } 245 }
246 246
247 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 247 fattr->cf_uid = cifs_sb->mnt_uid;
248 fattr->cf_uid = cifs_sb->mnt_uid; 248 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) {
249 else 249 u64 id = le64_to_cpu(info->Uid);
250 fattr->cf_uid = le64_to_cpu(info->Uid); 250 if (id < ((uid_t)-1)) {
251 251 kuid_t uid = make_kuid(&init_user_ns, id);
252 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 252 if (uid_valid(uid))
253 fattr->cf_gid = cifs_sb->mnt_gid; 253 fattr->cf_uid = uid;
254 else 254 }
255 fattr->cf_gid = le64_to_cpu(info->Gid); 255 }
256
257 fattr->cf_gid = cifs_sb->mnt_gid;
258 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) {
259 u64 id = le64_to_cpu(info->Gid);
260 if (id < ((gid_t)-1)) {
261 kgid_t gid = make_kgid(&init_user_ns, id);
262 if (gid_valid(gid))
263 fattr->cf_gid = gid;
264 }
265 }
256 266
257 fattr->cf_nlink = le64_to_cpu(info->Nlinks); 267 fattr->cf_nlink = le64_to_cpu(info->Nlinks);
258} 268}
@@ -289,7 +299,7 @@ cifs_get_file_info_unix(struct file *filp)
289 unsigned int xid; 299 unsigned int xid;
290 FILE_UNIX_BASIC_INFO find_data; 300 FILE_UNIX_BASIC_INFO find_data;
291 struct cifs_fattr fattr; 301 struct cifs_fattr fattr;
292 struct inode *inode = filp->f_path.dentry->d_inode; 302 struct inode *inode = file_inode(filp);
293 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 303 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
294 struct cifsFileInfo *cfile = filp->private_data; 304 struct cifsFileInfo *cfile = filp->private_data;
295 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 305 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -558,7 +568,7 @@ cifs_get_file_info(struct file *filp)
558 unsigned int xid; 568 unsigned int xid;
559 FILE_ALL_INFO find_data; 569 FILE_ALL_INFO find_data;
560 struct cifs_fattr fattr; 570 struct cifs_fattr fattr;
561 struct inode *inode = filp->f_path.dentry->d_inode; 571 struct inode *inode = file_inode(filp);
562 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 572 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
563 struct cifsFileInfo *cfile = filp->private_data; 573 struct cifsFileInfo *cfile = filp->private_data;
564 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 574 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
@@ -806,10 +816,9 @@ static bool
806inode_has_hashed_dentries(struct inode *inode) 816inode_has_hashed_dentries(struct inode *inode)
807{ 817{
808 struct dentry *dentry; 818 struct dentry *dentry;
809 struct hlist_node *p;
810 819
811 spin_lock(&inode->i_lock); 820 spin_lock(&inode->i_lock);
812 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { 821 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
813 if (!d_unhashed(dentry) || IS_ROOT(dentry)) { 822 if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
814 spin_unlock(&inode->i_lock); 823 spin_unlock(&inode->i_lock);
815 return true; 824 return true;
@@ -1245,14 +1254,14 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
1245 .device = 0, 1254 .device = 0,
1246 }; 1255 };
1247 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1256 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
1248 args.uid = (__u64)current_fsuid(); 1257 args.uid = current_fsuid();
1249 if (parent->i_mode & S_ISGID) 1258 if (parent->i_mode & S_ISGID)
1250 args.gid = (__u64)parent->i_gid; 1259 args.gid = parent->i_gid;
1251 else 1260 else
1252 args.gid = (__u64)current_fsgid(); 1261 args.gid = current_fsgid();
1253 } else { 1262 } else {
1254 args.uid = NO_CHANGE_64; 1263 args.uid = INVALID_UID; /* no change */
1255 args.gid = NO_CHANGE_64; 1264 args.gid = INVALID_GID; /* no change */
1256 } 1265 }
1257 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, 1266 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
1258 cifs_sb->local_nls, 1267 cifs_sb->local_nls,
@@ -1678,7 +1687,7 @@ cifs_invalidate_mapping(struct inode *inode)
1678int cifs_revalidate_file_attr(struct file *filp) 1687int cifs_revalidate_file_attr(struct file *filp)
1679{ 1688{
1680 int rc = 0; 1689 int rc = 0;
1681 struct inode *inode = filp->f_path.dentry->d_inode; 1690 struct inode *inode = file_inode(filp);
1682 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 1691 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
1683 1692
1684 if (!cifs_inode_needs_reval(inode)) 1693 if (!cifs_inode_needs_reval(inode))
@@ -1735,7 +1744,7 @@ out:
1735int cifs_revalidate_file(struct file *filp) 1744int cifs_revalidate_file(struct file *filp)
1736{ 1745{
1737 int rc; 1746 int rc;
1738 struct inode *inode = filp->f_path.dentry->d_inode; 1747 struct inode *inode = file_inode(filp);
1739 1748
1740 rc = cifs_revalidate_file_attr(filp); 1749 rc = cifs_revalidate_file_attr(filp);
1741 if (rc) 1750 if (rc)
@@ -2013,12 +2022,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
2013 if (attrs->ia_valid & ATTR_UID) 2022 if (attrs->ia_valid & ATTR_UID)
2014 args->uid = attrs->ia_uid; 2023 args->uid = attrs->ia_uid;
2015 else 2024 else
2016 args->uid = NO_CHANGE_64; 2025 args->uid = INVALID_UID; /* no change */
2017 2026
2018 if (attrs->ia_valid & ATTR_GID) 2027 if (attrs->ia_valid & ATTR_GID)
2019 args->gid = attrs->ia_gid; 2028 args->gid = attrs->ia_gid;
2020 else 2029 else
2021 args->gid = NO_CHANGE_64; 2030 args->gid = INVALID_GID; /* no change */
2022 2031
2023 if (attrs->ia_valid & ATTR_ATIME) 2032 if (attrs->ia_valid & ATTR_ATIME)
2024 args->atime = cifs_UnixTimeToNT(attrs->ia_atime); 2033 args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
@@ -2086,8 +2095,8 @@ static int
2086cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) 2095cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2087{ 2096{
2088 unsigned int xid; 2097 unsigned int xid;
2089 uid_t uid = NO_CHANGE_32; 2098 kuid_t uid = INVALID_UID;
2090 gid_t gid = NO_CHANGE_32; 2099 kgid_t gid = INVALID_GID;
2091 struct inode *inode = direntry->d_inode; 2100 struct inode *inode = direntry->d_inode;
2092 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2101 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2093 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 2102 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
@@ -2146,7 +2155,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2146 2155
2147#ifdef CONFIG_CIFS_ACL 2156#ifdef CONFIG_CIFS_ACL
2148 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 2157 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
2149 if (uid != NO_CHANGE_32 || gid != NO_CHANGE_32) { 2158 if (uid_valid(uid) || gid_valid(gid)) {
2150 rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64, 2159 rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64,
2151 uid, gid); 2160 uid, gid);
2152 if (rc) { 2161 if (rc) {
@@ -2170,7 +2179,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2170#ifdef CONFIG_CIFS_ACL 2179#ifdef CONFIG_CIFS_ACL
2171 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 2180 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
2172 rc = id_mode_to_cifs_acl(inode, full_path, mode, 2181 rc = id_mode_to_cifs_acl(inode, full_path, mode,
2173 NO_CHANGE_32, NO_CHANGE_32); 2182 INVALID_UID, INVALID_GID);
2174 if (rc) { 2183 if (rc) {
2175 cFYI(1, "%s: Setting ACL failed with error: %d", 2184 cFYI(1, "%s: Setting ACL failed with error: %d",
2176 __func__, rc); 2185 __func__, rc);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index fd5009d56f9f..6c9f1214cf0b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -30,7 +30,7 @@
30 30
31long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) 31long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
32{ 32{
33 struct inode *inode = filep->f_dentry->d_inode; 33 struct inode *inode = file_inode(filep);
34 int rc = -ENOTTY; /* strange error - but the precedent */ 34 int rc = -ENOTTY; /* strange error - but the precedent */
35 unsigned int xid; 35 unsigned int xid;
36 struct cifs_sb_info *cifs_sb; 36 struct cifs_sb_info *cifs_sb;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 51dc2fb6e854..9f6c4c45d21e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -76,7 +76,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
76 } 76 }
77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); 77 rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len);
78 if (rc) { 78 if (rc) {
79 cERROR(1, "%s: Could not update iwth link_str", __func__); 79 cERROR(1, "%s: Could not update with link_str", __func__);
80 goto symlink_hash_err; 80 goto symlink_hash_err;
81 } 81 }
82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash); 82 rc = crypto_shash_final(&sdescmd5->shash, md5_hash);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3a00c0d0cead..1b15bf839f37 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,7 +569,7 @@ bool
569backup_cred(struct cifs_sb_info *cifs_sb) 569backup_cred(struct cifs_sb_info *cifs_sb)
570{ 570{
571 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) { 571 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) {
572 if (cifs_sb->mnt_backupuid == current_fsuid()) 572 if (uid_eq(cifs_sb->mnt_backupuid, current_fsuid()))
573 return true; 573 return true;
574 } 574 }
575 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) { 575 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) {
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index cdd6ff48246b..df40cc5fd13a 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -82,12 +82,10 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
82 82
83 cFYI(1, "%s: for %s", __func__, name->name); 83 cFYI(1, "%s: for %s", __func__, name->name);
84 84
85 if (parent->d_op && parent->d_op->d_hash) 85 dentry = d_hash_and_lookup(parent, name);
86 parent->d_op->d_hash(parent, parent->d_inode, name); 86 if (unlikely(IS_ERR(dentry)))
87 else 87 return;
88 name->hash = full_name_hash(name->name, name->len);
89 88
90 dentry = d_lookup(parent, name);
91 if (dentry) { 89 if (dentry) {
92 int err; 90 int err;
93 91
@@ -505,7 +503,7 @@ static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode)
505 whether we can use the cached search results from the previous search */ 503 whether we can use the cached search results from the previous search */
506static int is_dir_changed(struct file *file) 504static int is_dir_changed(struct file *file)
507{ 505{
508 struct inode *inode = file->f_path.dentry->d_inode; 506 struct inode *inode = file_inode(file);
509 struct cifsInodeInfo *cifsInfo = CIFS_I(inode); 507 struct cifsInodeInfo *cifsInfo = CIFS_I(inode);
510 508
511 if (cifsInfo->time == 0) 509 if (cifsInfo->time == 0)
@@ -778,7 +776,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
778 switch ((int) file->f_pos) { 776 switch ((int) file->f_pos) {
779 case 0: 777 case 0:
780 if (filldir(direntry, ".", 1, file->f_pos, 778 if (filldir(direntry, ".", 1, file->f_pos,
781 file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) { 779 file_inode(file)->i_ino, DT_DIR) < 0) {
782 cERROR(1, "Filldir for current dir failed"); 780 cERROR(1, "Filldir for current dir failed");
783 rc = -ENOMEM; 781 rc = -ENOMEM;
784 break; 782 break;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index a5d234c8d5d9..47bc5a87f94e 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -53,6 +53,13 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
53 mutex_unlock(&server->srv_mutex); 53 mutex_unlock(&server->srv_mutex);
54 return rc; 54 return rc;
55 } 55 }
56
57 /*
58 * The response to this call was already factored into the sequence
59 * number when the call went out, so we must adjust it back downward
60 * after signing here.
61 */
62 --server->sequence_number;
56 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); 63 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
57 mutex_unlock(&server->srv_mutex); 64 mutex_unlock(&server->srv_mutex);
58 65
@@ -952,4 +959,5 @@ struct smb_version_values smb1_values = {
952 .cap_unix = CAP_UNIX, 959 .cap_unix = CAP_UNIX,
953 .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND, 960 .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
954 .cap_large_files = CAP_LARGE_FILES, 961 .cap_large_files = CAP_LARGE_FILES,
962 .oplock_read = OPLOCK_READ,
955}; 963};
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d79de7bc4435..c9c7aa7ed966 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -708,6 +708,7 @@ struct smb_version_values smb20_values = {
708 .cap_unix = 0, 708 .cap_unix = 0,
709 .cap_nt_find = SMB2_NT_FIND, 709 .cap_nt_find = SMB2_NT_FIND,
710 .cap_large_files = SMB2_LARGE_FILES, 710 .cap_large_files = SMB2_LARGE_FILES,
711 .oplock_read = SMB2_OPLOCK_LEVEL_II,
711}; 712};
712 713
713struct smb_version_values smb21_values = { 714struct smb_version_values smb21_values = {
@@ -725,6 +726,7 @@ struct smb_version_values smb21_values = {
725 .cap_unix = 0, 726 .cap_unix = 0,
726 .cap_nt_find = SMB2_NT_FIND, 727 .cap_nt_find = SMB2_NT_FIND,
727 .cap_large_files = SMB2_LARGE_FILES, 728 .cap_large_files = SMB2_LARGE_FILES,
729 .oplock_read = SMB2_OPLOCK_LEVEL_II,
728}; 730};
729 731
730struct smb_version_values smb30_values = { 732struct smb_version_values smb30_values = {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 76d974c952fe..1a528680ec5a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -144,9 +144,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
144 144
145 *sent = 0; 145 *sent = 0;
146 146
147 if (ssocket == NULL)
148 return -ENOTSOCK; /* BB eventually add reconnect code here */
149
150 smb_msg.msg_name = (struct sockaddr *) &server->dstaddr; 147 smb_msg.msg_name = (struct sockaddr *) &server->dstaddr;
151 smb_msg.msg_namelen = sizeof(struct sockaddr); 148 smb_msg.msg_namelen = sizeof(struct sockaddr);
152 smb_msg.msg_control = NULL; 149 smb_msg.msg_control = NULL;
@@ -291,6 +288,9 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
291 struct socket *ssocket = server->ssocket; 288 struct socket *ssocket = server->ssocket;
292 int val = 1; 289 int val = 1;
293 290
291 if (ssocket == NULL)
292 return -ENOTSOCK;
293
294 cFYI(1, "Sending smb: smb_len=%u", smb_buf_length); 294 cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
295 dump_smb(iov[0].iov_base, iov[0].iov_len); 295 dump_smb(iov[0].iov_base, iov[0].iov_len);
296 296
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 958ae0e0ff8c..1da168c61d35 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -33,7 +33,7 @@ void coda_cache_enter(struct inode *inode, int mask)
33 33
34 spin_lock(&cii->c_lock); 34 spin_lock(&cii->c_lock);
35 cii->c_cached_epoch = atomic_read(&permission_epoch); 35 cii->c_cached_epoch = atomic_read(&permission_epoch);
36 if (cii->c_uid != current_fsuid()) { 36 if (!uid_eq(cii->c_uid, current_fsuid())) {
37 cii->c_uid = current_fsuid(); 37 cii->c_uid = current_fsuid();
38 cii->c_cached_perm = mask; 38 cii->c_cached_perm = mask;
39 } else 39 } else
@@ -65,7 +65,7 @@ int coda_cache_check(struct inode *inode, int mask)
65 65
66 spin_lock(&cii->c_lock); 66 spin_lock(&cii->c_lock);
67 hit = (mask & cii->c_cached_perm) == mask && 67 hit = (mask & cii->c_cached_perm) == mask &&
68 cii->c_uid == current_fsuid() && 68 uid_eq(cii->c_uid, current_fsuid()) &&
69 cii->c_cached_epoch == atomic_read(&permission_epoch); 69 cii->c_cached_epoch == atomic_read(&permission_epoch);
70 spin_unlock(&cii->c_lock); 70 spin_unlock(&cii->c_lock);
71 71
diff --git a/fs/coda/coda_fs_i.h b/fs/coda/coda_fs_i.h
index b24fdfd8a3f0..c64075213218 100644
--- a/fs/coda/coda_fs_i.h
+++ b/fs/coda/coda_fs_i.h
@@ -25,7 +25,7 @@ struct coda_inode_info {
25 u_short c_flags; /* flags (see below) */ 25 u_short c_flags; /* flags (see below) */
26 unsigned int c_mapcount; /* nr of times this inode is mapped */ 26 unsigned int c_mapcount; /* nr of times this inode is mapped */
27 unsigned int c_cached_epoch; /* epoch for cached permissions */ 27 unsigned int c_cached_epoch; /* epoch for cached permissions */
28 vuid_t c_uid; /* fsuid for cached permissions */ 28 kuid_t c_uid; /* fsuid for cached permissions */
29 unsigned int c_cached_perm; /* cached access permissions */ 29 unsigned int c_cached_perm; /* cached access permissions */
30 spinlock_t c_lock; 30 spinlock_t c_lock;
31 struct inode vfs_inode; 31 struct inode vfs_inode;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 854ace712685..2849f41e72a2 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -100,9 +100,9 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
100 if (attr->va_mode != (u_short) -1) 100 if (attr->va_mode != (u_short) -1)
101 inode->i_mode = attr->va_mode | inode_type; 101 inode->i_mode = attr->va_mode | inode_type;
102 if (attr->va_uid != -1) 102 if (attr->va_uid != -1)
103 inode->i_uid = (uid_t) attr->va_uid; 103 inode->i_uid = make_kuid(&init_user_ns, (uid_t) attr->va_uid);
104 if (attr->va_gid != -1) 104 if (attr->va_gid != -1)
105 inode->i_gid = (gid_t) attr->va_gid; 105 inode->i_gid = make_kgid(&init_user_ns, (gid_t) attr->va_gid);
106 if (attr->va_nlink != -1) 106 if (attr->va_nlink != -1)
107 set_nlink(inode, attr->va_nlink); 107 set_nlink(inode, attr->va_nlink);
108 if (attr->va_size != -1) 108 if (attr->va_size != -1)
@@ -171,10 +171,10 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
171 vattr->va_mode = iattr->ia_mode; 171 vattr->va_mode = iattr->ia_mode;
172 } 172 }
173 if ( valid & ATTR_UID ) { 173 if ( valid & ATTR_UID ) {
174 vattr->va_uid = (vuid_t) iattr->ia_uid; 174 vattr->va_uid = (vuid_t) from_kuid(&init_user_ns, iattr->ia_uid);
175 } 175 }
176 if ( valid & ATTR_GID ) { 176 if ( valid & ATTR_GID ) {
177 vattr->va_gid = (vgid_t) iattr->ia_gid; 177 vattr->va_gid = (vgid_t) from_kgid(&init_user_ns, iattr->ia_gid);
178 } 178 }
179 if ( valid & ATTR_SIZE ) { 179 if ( valid & ATTR_SIZE ) {
180 vattr->va_size = iattr->ia_size; 180 vattr->va_size = iattr->ia_size;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 49fe52d25600..b7d3a05c062c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -397,7 +397,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
397 * We can't use vfs_readdir because we have to keep the file 397 * We can't use vfs_readdir because we have to keep the file
398 * position in sync between the coda_file and the host_file. 398 * position in sync between the coda_file and the host_file.
399 * and as such we need grab the inode mutex. */ 399 * and as such we need grab the inode mutex. */
400 struct inode *host_inode = host_file->f_path.dentry->d_inode; 400 struct inode *host_inode = file_inode(host_file);
401 401
402 mutex_lock(&host_inode->i_mutex); 402 mutex_lock(&host_inode->i_mutex);
403 host_file->f_pos = coda_file->f_pos; 403 host_file->f_pos = coda_file->f_pos;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 8edd404e6419..fa4c100bdc7d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -66,7 +66,7 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
66static ssize_t 66static ssize_t
67coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos) 67coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos)
68{ 68{
69 struct inode *host_inode, *coda_inode = coda_file->f_path.dentry->d_inode; 69 struct inode *host_inode, *coda_inode = file_inode(coda_file);
70 struct coda_file_info *cfi; 70 struct coda_file_info *cfi;
71 struct file *host_file; 71 struct file *host_file;
72 ssize_t ret; 72 ssize_t ret;
@@ -78,7 +78,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
78 if (!host_file->f_op || !host_file->f_op->write) 78 if (!host_file->f_op || !host_file->f_op->write)
79 return -EINVAL; 79 return -EINVAL;
80 80
81 host_inode = host_file->f_path.dentry->d_inode; 81 host_inode = file_inode(host_file);
82 mutex_lock(&coda_inode->i_mutex); 82 mutex_lock(&coda_inode->i_mutex);
83 83
84 ret = host_file->f_op->write(host_file, buf, count, ppos); 84 ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -106,8 +106,8 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
106 if (!host_file->f_op || !host_file->f_op->mmap) 106 if (!host_file->f_op || !host_file->f_op->mmap)
107 return -ENODEV; 107 return -ENODEV;
108 108
109 coda_inode = coda_file->f_path.dentry->d_inode; 109 coda_inode = file_inode(coda_file);
110 host_inode = host_file->f_path.dentry->d_inode; 110 host_inode = file_inode(host_file);
111 111
112 cii = ITOC(coda_inode); 112 cii = ITOC(coda_inode);
113 spin_lock(&cii->c_lock); 113 spin_lock(&cii->c_lock);
@@ -178,7 +178,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
178 err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), 178 err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode),
179 coda_flags, coda_file->f_cred->fsuid); 179 coda_flags, coda_file->f_cred->fsuid);
180 180
181 host_inode = cfi->cfi_container->f_path.dentry->d_inode; 181 host_inode = file_inode(cfi->cfi_container);
182 cii = ITOC(coda_inode); 182 cii = ITOC(coda_inode);
183 183
184 /* did we mmap this file? */ 184 /* did we mmap this file? */
@@ -202,7 +202,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
202int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync) 202int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
203{ 203{
204 struct file *host_file; 204 struct file *host_file;
205 struct inode *coda_inode = coda_file->f_path.dentry->d_inode; 205 struct inode *coda_inode = file_inode(coda_file);
206 struct coda_file_info *cfi; 206 struct coda_file_info *cfi;
207 int err; 207 int err;
208 208
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index be2aa4909487..dada9d0abede 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -20,6 +20,7 @@
20#include <linux/file.h> 20#include <linux/file.h>
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/pid_namespace.h>
23 24
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25 26
@@ -48,7 +49,7 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
48 return NULL; 49 return NULL;
49 memset(&ei->c_fid, 0, sizeof(struct CodaFid)); 50 memset(&ei->c_fid, 0, sizeof(struct CodaFid));
50 ei->c_flags = 0; 51 ei->c_flags = 0;
51 ei->c_uid = 0; 52 ei->c_uid = GLOBAL_ROOT_UID;
52 ei->c_cached_perm = 0; 53 ei->c_cached_perm = 0;
53 spin_lock_init(&ei->c_lock); 54 spin_lock_init(&ei->c_lock);
54 return &ei->vfs_inode; 55 return &ei->vfs_inode;
@@ -129,7 +130,7 @@ static int get_device_index(struct coda_mount_data *data)
129 f = fdget(data->fd); 130 f = fdget(data->fd);
130 if (!f.file) 131 if (!f.file)
131 goto Ebadf; 132 goto Ebadf;
132 inode = f.file->f_path.dentry->d_inode; 133 inode = file_inode(f.file);
133 if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { 134 if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
134 fdput(f); 135 fdput(f);
135 goto Ebadf; 136 goto Ebadf;
@@ -157,6 +158,9 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent)
157 int error; 158 int error;
158 int idx; 159 int idx;
159 160
161 if (task_active_pid_ns(current) != &init_pid_ns)
162 return -EINVAL;
163
160 idx = get_device_index((struct coda_mount_data *) data); 164 idx = get_device_index((struct coda_mount_data *) data);
161 165
162 /* Ignore errors in data, for backward compatibility */ 166 /* Ignore errors in data, for backward compatibility */
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index ee0981f1375b..3f5de96bbb58 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -52,7 +52,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
52 struct path path; 52 struct path path;
53 int error; 53 int error;
54 struct PioctlData data; 54 struct PioctlData data;
55 struct inode *inode = filp->f_dentry->d_inode; 55 struct inode *inode = file_inode(filp);
56 struct inode *target_inode = NULL; 56 struct inode *target_inode = NULL;
57 struct coda_inode_info *cnp; 57 struct coda_inode_info *cnp;
58 58
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 761d5b31b18d..ebc2bae6c289 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -37,6 +37,7 @@
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/pid_namespace.h>
40#include <asm/io.h> 41#include <asm/io.h>
41#include <asm/poll.h> 42#include <asm/poll.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -266,6 +267,12 @@ static int coda_psdev_open(struct inode * inode, struct file * file)
266 struct venus_comm *vcp; 267 struct venus_comm *vcp;
267 int idx, err; 268 int idx, err;
268 269
270 if (task_active_pid_ns(current) != &init_pid_ns)
271 return -EINVAL;
272
273 if (current_user_ns() != &init_user_ns)
274 return -EINVAL;
275
269 idx = iminor(inode); 276 idx = iminor(inode);
270 if (idx < 0 || idx >= MAX_CODADEVS) 277 if (idx < 0 || idx >= MAX_CODADEVS)
271 return -ENODEV; 278 return -ENODEV;
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 0c68fd31fbf2..3a731976dc5e 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -50,9 +50,9 @@ static void *alloc_upcall(int opcode, int size)
50 return ERR_PTR(-ENOMEM); 50 return ERR_PTR(-ENOMEM);
51 51
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = task_pid_nr_ns(current, &init_pid_ns);
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr_ns(current, &init_pid_ns);
55 inp->ih.uid = current_fsuid(); 55 inp->ih.uid = from_kuid(&init_user_ns, current_fsuid());
56 56
57 return (void*)inp; 57 return (void*)inp;
58} 58}
@@ -157,7 +157,7 @@ int venus_lookup(struct super_block *sb, struct CodaFid *fid,
157} 157}
158 158
159int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, 159int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
160 vuid_t uid) 160 kuid_t uid)
161{ 161{
162 union inputArgs *inp; 162 union inputArgs *inp;
163 union outputArgs *outp; 163 union outputArgs *outp;
@@ -166,7 +166,7 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
166 insize = SIZE(release); 166 insize = SIZE(release);
167 UPARG(CODA_CLOSE); 167 UPARG(CODA_CLOSE);
168 168
169 inp->ih.uid = uid; 169 inp->ih.uid = from_kuid(&init_user_ns, uid);
170 inp->coda_close.VFid = *fid; 170 inp->coda_close.VFid = *fid;
171 inp->coda_close.flags = flags; 171 inp->coda_close.flags = flags;
172 172
diff --git a/fs/compat.c b/fs/compat.c
index 015e1e1f87c6..fe40fde29111 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1278,8 +1278,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1278 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1279 * O_LARGEFILE flag. 1279 * O_LARGEFILE flag.
1280 */ 1280 */
1281asmlinkage long 1281COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
1282compat_sys_open(const char __user *filename, int flags, umode_t mode)
1283{ 1282{
1284 return do_sys_open(AT_FDCWD, filename, flags, mode); 1283 return do_sys_open(AT_FDCWD, filename, flags, mode);
1285} 1284}
@@ -1288,8 +1287,7 @@ compat_sys_open(const char __user *filename, int flags, umode_t mode)
1288 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the 1287 * Exactly like fs/open.c:sys_openat(), except that it doesn't set the
1289 * O_LARGEFILE flag. 1288 * O_LARGEFILE flag.
1290 */ 1289 */
1291asmlinkage long 1290COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
1292compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
1293{ 1291{
1294 return do_sys_open(dfd, filename, flags, mode); 1292 return do_sys_open(dfd, filename, flags, mode);
1295} 1293}
@@ -1739,55 +1737,13 @@ asmlinkage long compat_sys_signalfd(int ufd,
1739} 1737}
1740#endif /* CONFIG_SIGNALFD */ 1738#endif /* CONFIG_SIGNALFD */
1741 1739
1742#ifdef CONFIG_TIMERFD
1743
1744asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
1745 const struct compat_itimerspec __user *utmr,
1746 struct compat_itimerspec __user *otmr)
1747{
1748 int error;
1749 struct itimerspec t;
1750 struct itimerspec __user *ut;
1751
1752 if (get_compat_itimerspec(&t, utmr))
1753 return -EFAULT;
1754 ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
1755 if (copy_to_user(&ut[0], &t, sizeof(t)))
1756 return -EFAULT;
1757 error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
1758 if (!error && otmr)
1759 error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
1760 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
1761
1762 return error;
1763}
1764
1765asmlinkage long compat_sys_timerfd_gettime(int ufd,
1766 struct compat_itimerspec __user *otmr)
1767{
1768 int error;
1769 struct itimerspec t;
1770 struct itimerspec __user *ut;
1771
1772 ut = compat_alloc_user_space(sizeof(struct itimerspec));
1773 error = sys_timerfd_gettime(ufd, ut);
1774 if (!error)
1775 error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
1776 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
1777
1778 return error;
1779}
1780
1781#endif /* CONFIG_TIMERFD */
1782
1783#ifdef CONFIG_FHANDLE 1740#ifdef CONFIG_FHANDLE
1784/* 1741/*
1785 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it 1742 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
1786 * doesn't set the O_LARGEFILE flag. 1743 * doesn't set the O_LARGEFILE flag.
1787 */ 1744 */
1788asmlinkage long 1745COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
1789compat_sys_open_by_handle_at(int mountdirfd, 1746 struct file_handle __user *, handle, int, flags)
1790 struct file_handle __user *handle, int flags)
1791{ 1747{
1792 return do_handle_open(mountdirfd, handle, flags); 1748 return do_handle_open(mountdirfd, handle, flags);
1793} 1749}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e2f57a007029..3ced75f765ca 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1582,7 +1582,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1582 case FIBMAP: 1582 case FIBMAP:
1583 case FIGETBSZ: 1583 case FIGETBSZ:
1584 case FIONREAD: 1584 case FIONREAD:
1585 if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode)) 1585 if (S_ISREG(file_inode(f.file)->i_mode))
1586 break; 1586 break;
1587 /*FALL THROUGH*/ 1587 /*FALL THROUGH*/
1588 1588
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 712b10f64c70..7aabc6ad4e9b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1037,10 +1037,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
1037static int configfs_depend_prep(struct dentry *origin, 1037static int configfs_depend_prep(struct dentry *origin,
1038 struct config_item *target) 1038 struct config_item *target)
1039{ 1039{
1040 struct configfs_dirent *child_sd, *sd = origin->d_fsdata; 1040 struct configfs_dirent *child_sd, *sd;
1041 int ret = 0; 1041 int ret = 0;
1042 1042
1043 BUG_ON(!origin || !sd); 1043 BUG_ON(!origin || !origin->d_fsdata);
1044 sd = origin->d_fsdata;
1044 1045
1045 if (sd->s_element == target) /* Boo-yah */ 1046 if (sd->s_element == target) /* Boo-yah */
1046 goto out; 1047 goto out;
@@ -1625,7 +1626,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
1625 if (offset >= 0) 1626 if (offset >= 0)
1626 break; 1627 break;
1627 default: 1628 default:
1628 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 1629 mutex_unlock(&file_inode(file)->i_mutex);
1629 return -EINVAL; 1630 return -EINVAL;
1630 } 1631 }
1631 if (offset != file->f_pos) { 1632 if (offset != file->f_pos) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 177493272a61..c6479658d487 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -411,7 +411,7 @@ static void wait_for_dump_helpers(struct file *file)
411{ 411{
412 struct pipe_inode_info *pipe; 412 struct pipe_inode_info *pipe;
413 413
414 pipe = file->f_path.dentry->d_inode->i_pipe; 414 pipe = file_inode(file)->i_pipe;
415 415
416 pipe_lock(pipe); 416 pipe_lock(pipe);
417 pipe->readers++; 417 pipe->readers++;
@@ -501,7 +501,7 @@ void do_coredump(siginfo_t *siginfo)
501 * so we dump it as root in mode 2, and only into a controlled 501 * so we dump it as root in mode 2, and only into a controlled
502 * environment (pipe handler or fully qualified path). 502 * environment (pipe handler or fully qualified path).
503 */ 503 */
504 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { 504 if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
505 /* Setuid core dump mode */ 505 /* Setuid core dump mode */
506 flag = O_EXCL; /* Stop rewrite attacks */ 506 flag = O_EXCL; /* Stop rewrite attacks */
507 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ 507 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
@@ -600,7 +600,7 @@ void do_coredump(siginfo_t *siginfo)
600 if (IS_ERR(cprm.file)) 600 if (IS_ERR(cprm.file))
601 goto fail_unlock; 601 goto fail_unlock;
602 602
603 inode = cprm.file->f_path.dentry->d_inode; 603 inode = file_inode(cprm.file);
604 if (inode->i_nlink > 1) 604 if (inode->i_nlink > 1)
605 goto close_fail; 605 goto close_fail;
606 if (d_unhashed(cprm.file->f_path.dentry)) 606 if (d_unhashed(cprm.file->f_path.dentry))
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index c6c3f91ecf06..3ceb9ec976e1 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -351,7 +351,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
351 */ 351 */
352static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 352static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
353{ 353{
354 struct inode *inode = filp->f_path.dentry->d_inode; 354 struct inode *inode = file_inode(filp);
355 struct super_block *sb = inode->i_sb; 355 struct super_block *sb = inode->i_sb;
356 char *buf; 356 char *buf;
357 unsigned int offset; 357 unsigned int offset;
diff --git a/fs/dcache.c b/fs/dcache.c
index 19153a0a810c..fbfae008ba44 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -675,11 +675,10 @@ EXPORT_SYMBOL(dget_parent);
675static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 675static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
676{ 676{
677 struct dentry *alias, *discon_alias; 677 struct dentry *alias, *discon_alias;
678 struct hlist_node *p;
679 678
680again: 679again:
681 discon_alias = NULL; 680 discon_alias = NULL;
682 hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { 681 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
683 spin_lock(&alias->d_lock); 682 spin_lock(&alias->d_lock);
684 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 683 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
685 if (IS_ROOT(alias) && 684 if (IS_ROOT(alias) &&
@@ -730,10 +729,9 @@ EXPORT_SYMBOL(d_find_alias);
730void d_prune_aliases(struct inode *inode) 729void d_prune_aliases(struct inode *inode)
731{ 730{
732 struct dentry *dentry; 731 struct dentry *dentry;
733 struct hlist_node *p;
734restart: 732restart:
735 spin_lock(&inode->i_lock); 733 spin_lock(&inode->i_lock);
736 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { 734 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
737 spin_lock(&dentry->d_lock); 735 spin_lock(&dentry->d_lock);
738 if (!dentry->d_count) { 736 if (!dentry->d_count) {
739 __dget_dlock(dentry); 737 __dget_dlock(dentry);
@@ -1358,6 +1356,7 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1358 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH | 1356 WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH |
1359 DCACHE_OP_COMPARE | 1357 DCACHE_OP_COMPARE |
1360 DCACHE_OP_REVALIDATE | 1358 DCACHE_OP_REVALIDATE |
1359 DCACHE_OP_WEAK_REVALIDATE |
1361 DCACHE_OP_DELETE )); 1360 DCACHE_OP_DELETE ));
1362 dentry->d_op = op; 1361 dentry->d_op = op;
1363 if (!op) 1362 if (!op)
@@ -1368,6 +1367,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1368 dentry->d_flags |= DCACHE_OP_COMPARE; 1367 dentry->d_flags |= DCACHE_OP_COMPARE;
1369 if (op->d_revalidate) 1368 if (op->d_revalidate)
1370 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1369 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1370 if (op->d_weak_revalidate)
1371 dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1371 if (op->d_delete) 1372 if (op->d_delete)
1372 dentry->d_flags |= DCACHE_OP_DELETE; 1373 dentry->d_flags |= DCACHE_OP_DELETE;
1373 if (op->d_prune) 1374 if (op->d_prune)
@@ -1440,14 +1441,13 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1440 int len = entry->d_name.len; 1441 int len = entry->d_name.len;
1441 const char *name = entry->d_name.name; 1442 const char *name = entry->d_name.name;
1442 unsigned int hash = entry->d_name.hash; 1443 unsigned int hash = entry->d_name.hash;
1443 struct hlist_node *p;
1444 1444
1445 if (!inode) { 1445 if (!inode) {
1446 __d_instantiate(entry, NULL); 1446 __d_instantiate(entry, NULL);
1447 return NULL; 1447 return NULL;
1448 } 1448 }
1449 1449
1450 hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { 1450 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
1451 /* 1451 /*
1452 * Don't need alias->d_lock here, because aliases with 1452 * Don't need alias->d_lock here, because aliases with
1453 * d_parent == entry->d_parent are not subject to name or 1453 * d_parent == entry->d_parent are not subject to name or
@@ -1672,7 +1672,6 @@ EXPORT_SYMBOL(d_splice_alias);
1672struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, 1672struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1673 struct qstr *name) 1673 struct qstr *name)
1674{ 1674{
1675 int error;
1676 struct dentry *found; 1675 struct dentry *found;
1677 struct dentry *new; 1676 struct dentry *new;
1678 1677
@@ -1681,10 +1680,12 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1681 * if not go ahead and create it now. 1680 * if not go ahead and create it now.
1682 */ 1681 */
1683 found = d_hash_and_lookup(dentry->d_parent, name); 1682 found = d_hash_and_lookup(dentry->d_parent, name);
1683 if (unlikely(IS_ERR(found)))
1684 goto err_out;
1684 if (!found) { 1685 if (!found) {
1685 new = d_alloc(dentry->d_parent, name); 1686 new = d_alloc(dentry->d_parent, name);
1686 if (!new) { 1687 if (!new) {
1687 error = -ENOMEM; 1688 found = ERR_PTR(-ENOMEM);
1688 goto err_out; 1689 goto err_out;
1689 } 1690 }
1690 1691
@@ -1725,7 +1726,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1725 1726
1726err_out: 1727err_out:
1727 iput(inode); 1728 iput(inode);
1728 return ERR_PTR(error); 1729 return found;
1729} 1730}
1730EXPORT_SYMBOL(d_add_ci); 1731EXPORT_SYMBOL(d_add_ci);
1731 1732
@@ -1889,7 +1890,7 @@ seqretry:
1889 * dentry is returned. The caller must use dput to free the entry when it has 1890 * dentry is returned. The caller must use dput to free the entry when it has
1890 * finished using it. %NULL is returned if the dentry does not exist. 1891 * finished using it. %NULL is returned if the dentry does not exist.
1891 */ 1892 */
1892struct dentry *d_lookup(struct dentry *parent, struct qstr *name) 1893struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
1893{ 1894{
1894 struct dentry *dentry; 1895 struct dentry *dentry;
1895 unsigned seq; 1896 unsigned seq;
@@ -1919,7 +1920,7 @@ EXPORT_SYMBOL(d_lookup);
1919 * 1920 *
1920 * __d_lookup callers must be commented. 1921 * __d_lookup callers must be commented.
1921 */ 1922 */
1922struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) 1923struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
1923{ 1924{
1924 unsigned int len = name->len; 1925 unsigned int len = name->len;
1925 unsigned int hash = name->hash; 1926 unsigned int hash = name->hash;
@@ -1997,12 +1998,10 @@ next:
1997 * @dir: Directory to search in 1998 * @dir: Directory to search in
1998 * @name: qstr of name we wish to find 1999 * @name: qstr of name we wish to find
1999 * 2000 *
2000 * On hash failure or on lookup failure NULL is returned. 2001 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2001 */ 2002 */
2002struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name) 2003struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2003{ 2004{
2004 struct dentry *dentry = NULL;
2005
2006 /* 2005 /*
2007 * Check for a fs-specific hash function. Note that we must 2006 * Check for a fs-specific hash function. Note that we must
2008 * calculate the standard hash first, as the d_op->d_hash() 2007 * calculate the standard hash first, as the d_op->d_hash()
@@ -2010,13 +2009,13 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2010 */ 2009 */
2011 name->hash = full_name_hash(name->name, name->len); 2010 name->hash = full_name_hash(name->name, name->len);
2012 if (dir->d_flags & DCACHE_OP_HASH) { 2011 if (dir->d_flags & DCACHE_OP_HASH) {
2013 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0) 2012 int err = dir->d_op->d_hash(dir, dir->d_inode, name);
2014 goto out; 2013 if (unlikely(err < 0))
2014 return ERR_PTR(err);
2015 } 2015 }
2016 dentry = d_lookup(dir, name); 2016 return d_lookup(dir, name);
2017out:
2018 return dentry;
2019} 2017}
2018EXPORT_SYMBOL(d_hash_and_lookup);
2020 2019
2021/** 2020/**
2022 * d_validate - verify dentry provided from insecure source (deprecated) 2021 * d_validate - verify dentry provided from insecure source (deprecated)
@@ -2394,7 +2393,7 @@ out_err:
2394 */ 2393 */
2395static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2394static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2396{ 2395{
2397 struct dentry *dparent, *aparent; 2396 struct dentry *dparent;
2398 2397
2399 dentry_lock_for_move(anon, dentry); 2398 dentry_lock_for_move(anon, dentry);
2400 2399
@@ -2402,24 +2401,15 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2402 write_seqcount_begin(&anon->d_seq); 2401 write_seqcount_begin(&anon->d_seq);
2403 2402
2404 dparent = dentry->d_parent; 2403 dparent = dentry->d_parent;
2405 aparent = anon->d_parent;
2406 2404
2407 switch_names(dentry, anon); 2405 switch_names(dentry, anon);
2408 swap(dentry->d_name.hash, anon->d_name.hash); 2406 swap(dentry->d_name.hash, anon->d_name.hash);
2409 2407
2410 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2408 dentry->d_parent = dentry;
2411 list_del(&dentry->d_u.d_child); 2409 list_del_init(&dentry->d_u.d_child);
2412 if (!IS_ROOT(dentry)) 2410 anon->d_parent = dparent;
2413 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2414 else
2415 INIT_LIST_HEAD(&dentry->d_u.d_child);
2416
2417 anon->d_parent = (dparent == dentry) ? anon : dparent;
2418 list_del(&anon->d_u.d_child); 2411 list_del(&anon->d_u.d_child);
2419 if (!IS_ROOT(anon)) 2412 list_add(&anon->d_u.d_child, &dparent->d_subdirs);
2420 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2421 else
2422 INIT_LIST_HEAD(&anon->d_u.d_child);
2423 2413
2424 write_seqcount_end(&dentry->d_seq); 2414 write_seqcount_end(&dentry->d_seq);
2425 write_seqcount_end(&anon->d_seq); 2415 write_seqcount_end(&anon->d_seq);
@@ -2722,37 +2712,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
2722} 2712}
2723EXPORT_SYMBOL(d_path); 2713EXPORT_SYMBOL(d_path);
2724 2714
2725/**
2726 * d_path_with_unreachable - return the path of a dentry
2727 * @path: path to report
2728 * @buf: buffer to return value in
2729 * @buflen: buffer length
2730 *
2731 * The difference from d_path() is that this prepends "(unreachable)"
2732 * to paths which are unreachable from the current process' root.
2733 */
2734char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2735{
2736 char *res = buf + buflen;
2737 struct path root;
2738 int error;
2739
2740 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2741 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2742
2743 get_fs_root(current->fs, &root);
2744 write_seqlock(&rename_lock);
2745 error = path_with_deleted(path, &root, &res, &buflen);
2746 if (error > 0)
2747 error = prepend_unreachable(&res, &buflen);
2748 write_sequnlock(&rename_lock);
2749 path_put(&root);
2750 if (error)
2751 res = ERR_PTR(error);
2752
2753 return res;
2754}
2755
2756/* 2715/*
2757 * Helper function for dentry_operations.d_dname() members 2716 * Helper function for dentry_operations.d_dname() members
2758 */ 2717 */
@@ -3035,7 +2994,7 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
3035 ino_t ino = 0; 2994 ino_t ino = 0;
3036 2995
3037 dentry = d_hash_and_lookup(dir, name); 2996 dentry = d_hash_and_lookup(dir, name);
3038 if (dentry) { 2997 if (!IS_ERR_OR_NULL(dentry)) {
3039 if (dentry->d_inode) 2998 if (dentry->d_inode)
3040 ino = dentry->d_inode->i_ino; 2999 ino = dentry->d_inode->i_ino;
3041 dput(dentry); 3000 dput(dentry);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 153bb1e42e63..0c4f80b447fb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -176,7 +176,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
176 opts->uid = uid; 176 opts->uid = uid;
177 break; 177 break;
178 case Opt_gid: 178 case Opt_gid:
179 if (match_octal(&args[0], &option)) 179 if (match_int(&args[0], &option))
180 return -EINVAL; 180 return -EINVAL;
181 gid = make_kgid(current_user_ns(), option); 181 gid = make_kgid(current_user_ns(), option);
182 if (!gid_valid(gid)) 182 if (!gid_valid(gid))
@@ -322,7 +322,6 @@ static struct dentry *__create_file(const char *name, umode_t mode,
322 if (!parent) 322 if (!parent)
323 parent = debugfs_mount->mnt_root; 323 parent = debugfs_mount->mnt_root;
324 324
325 dentry = NULL;
326 mutex_lock(&parent->d_inode->i_mutex); 325 mutex_lock(&parent->d_inode->i_mutex);
327 dentry = lookup_one_len(name, parent, strlen(name)); 326 dentry = lookup_one_len(name, parent, strlen(name));
328 if (!IS_ERR(dentry)) { 327 if (!IS_ERR(dentry)) {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 472e6befc54d..073d30b9d1ac 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -243,6 +243,13 @@ static int mknod_ptmx(struct super_block *sb)
243 struct dentry *root = sb->s_root; 243 struct dentry *root = sb->s_root;
244 struct pts_fs_info *fsi = DEVPTS_SB(sb); 244 struct pts_fs_info *fsi = DEVPTS_SB(sb);
245 struct pts_mount_opts *opts = &fsi->mount_opts; 245 struct pts_mount_opts *opts = &fsi->mount_opts;
246 kuid_t root_uid;
247 kgid_t root_gid;
248
249 root_uid = make_kuid(current_user_ns(), 0);
250 root_gid = make_kgid(current_user_ns(), 0);
251 if (!uid_valid(root_uid) || !gid_valid(root_gid))
252 return -EINVAL;
246 253
247 mutex_lock(&root->d_inode->i_mutex); 254 mutex_lock(&root->d_inode->i_mutex);
248 255
@@ -273,6 +280,8 @@ static int mknod_ptmx(struct super_block *sb)
273 280
274 mode = S_IFCHR|opts->ptmxmode; 281 mode = S_IFCHR|opts->ptmxmode;
275 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); 282 init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
283 inode->i_uid = root_uid;
284 inode->i_gid = root_gid;
276 285
277 d_add(dentry, inode); 286 d_add(dentry, inode);
278 287
@@ -438,6 +447,12 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
438 if (error) 447 if (error)
439 return ERR_PTR(error); 448 return ERR_PTR(error);
440 449
450 /* Require newinstance for all user namespace mounts to ensure
451 * the mount options are not changed.
452 */
453 if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
454 return ERR_PTR(-EINVAL);
455
441 if (opts.newinstance) 456 if (opts.newinstance)
442 s = sget(fs_type, NULL, set_anon_super, flags, NULL); 457 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
443 else 458 else
@@ -491,6 +506,9 @@ static struct file_system_type devpts_fs_type = {
491 .name = "devpts", 506 .name = "devpts",
492 .mount = devpts_mount, 507 .mount = devpts_mount,
493 .kill_sb = devpts_kill_sb, 508 .kill_sb = devpts_kill_sb,
509#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
510 .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
511#endif
494}; 512};
495 513
496/* 514/*
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cf5b44b10c67..f853263cf74f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -261,9 +261,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
261 dio->end_io(dio->iocb, offset, transferred, 261 dio->end_io(dio->iocb, offset, transferred,
262 dio->private, ret, is_async); 262 dio->private, ret, is_async);
263 } else { 263 } else {
264 inode_dio_done(dio->inode);
264 if (is_async) 265 if (is_async)
265 aio_complete(dio->iocb, ret, 0); 266 aio_complete(dio->iocb, ret, 0);
266 inode_dio_done(dio->inode);
267 } 267 }
268 268
269 return ret; 269 return ret;
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index a0387dd8b1f0..7d58d5b112b5 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -158,7 +158,7 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
158 unsigned int x; 158 unsigned int x;
159 159
160 if (!capable(CAP_SYS_ADMIN)) 160 if (!capable(CAP_SYS_ADMIN))
161 return -EACCES; 161 return -EPERM;
162 162
163 x = simple_strtoul(buf, NULL, 0); 163 x = simple_strtoul(buf, NULL, 0);
164 164
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 77c0f70f8fe8..e7665c31f7b1 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -96,10 +96,13 @@ do { \
96} 96}
97 97
98 98
99#define DLM_RTF_SHRINK 0x00000001
100
99struct dlm_rsbtable { 101struct dlm_rsbtable {
100 struct rb_root keep; 102 struct rb_root keep;
101 struct rb_root toss; 103 struct rb_root toss;
102 spinlock_t lock; 104 spinlock_t lock;
105 uint32_t flags;
103}; 106};
104 107
105 108
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index a579f30f237d..1b1146670c4b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1132,6 +1132,7 @@ static void toss_rsb(struct kref *kref)
1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep); 1132 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss); 1133 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
1134 r->res_toss_time = jiffies; 1134 r->res_toss_time = jiffies;
1135 ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
1135 if (r->res_lvbptr) { 1136 if (r->res_lvbptr) {
1136 dlm_free_lvb(r->res_lvbptr); 1137 dlm_free_lvb(r->res_lvbptr);
1137 r->res_lvbptr = NULL; 1138 r->res_lvbptr = NULL;
@@ -1182,7 +1183,7 @@ static void detach_lkb(struct dlm_lkb *lkb)
1182static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) 1183static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
1183{ 1184{
1184 struct dlm_lkb *lkb; 1185 struct dlm_lkb *lkb;
1185 int rv, id; 1186 int rv;
1186 1187
1187 lkb = dlm_allocate_lkb(ls); 1188 lkb = dlm_allocate_lkb(ls);
1188 if (!lkb) 1189 if (!lkb)
@@ -1198,19 +1199,13 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
1198 mutex_init(&lkb->lkb_cb_mutex); 1199 mutex_init(&lkb->lkb_cb_mutex);
1199 INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work); 1200 INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
1200 1201
1201 retry: 1202 idr_preload(GFP_NOFS);
1202 rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
1203 if (!rv)
1204 return -ENOMEM;
1205
1206 spin_lock(&ls->ls_lkbidr_spin); 1203 spin_lock(&ls->ls_lkbidr_spin);
1207 rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id); 1204 rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT);
1208 if (!rv) 1205 if (rv >= 0)
1209 lkb->lkb_id = id; 1206 lkb->lkb_id = rv;
1210 spin_unlock(&ls->ls_lkbidr_spin); 1207 spin_unlock(&ls->ls_lkbidr_spin);
1211 1208 idr_preload_end();
1212 if (rv == -EAGAIN)
1213 goto retry;
1214 1209
1215 if (rv < 0) { 1210 if (rv < 0) {
1216 log_error(ls, "create_lkb idr error %d", rv); 1211 log_error(ls, "create_lkb idr error %d", rv);
@@ -1659,11 +1654,18 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1659 char *name; 1654 char *name;
1660 int our_nodeid = dlm_our_nodeid(); 1655 int our_nodeid = dlm_our_nodeid();
1661 int remote_count = 0; 1656 int remote_count = 0;
1657 int need_shrink = 0;
1662 int i, len, rv; 1658 int i, len, rv;
1663 1659
1664 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX); 1660 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
1665 1661
1666 spin_lock(&ls->ls_rsbtbl[b].lock); 1662 spin_lock(&ls->ls_rsbtbl[b].lock);
1663
1664 if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
1665 spin_unlock(&ls->ls_rsbtbl[b].lock);
1666 return;
1667 }
1668
1667 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) { 1669 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
1668 next = rb_next(n); 1670 next = rb_next(n);
1669 r = rb_entry(n, struct dlm_rsb, res_hashnode); 1671 r = rb_entry(n, struct dlm_rsb, res_hashnode);
@@ -1679,6 +1681,8 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1679 continue; 1681 continue;
1680 } 1682 }
1681 1683
1684 need_shrink = 1;
1685
1682 if (!time_after_eq(jiffies, r->res_toss_time + 1686 if (!time_after_eq(jiffies, r->res_toss_time +
1683 dlm_config.ci_toss_secs * HZ)) { 1687 dlm_config.ci_toss_secs * HZ)) {
1684 continue; 1688 continue;
@@ -1710,6 +1714,11 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
1710 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss); 1714 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1711 dlm_free_rsb(r); 1715 dlm_free_rsb(r);
1712 } 1716 }
1717
1718 if (need_shrink)
1719 ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
1720 else
1721 ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
1713 spin_unlock(&ls->ls_rsbtbl[b].lock); 1722 spin_unlock(&ls->ls_rsbtbl[b].lock);
1714 1723
1715 /* 1724 /*
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 2e99fb0c9737..3ca79d3253b9 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -796,7 +796,6 @@ static int release_lockspace(struct dlm_ls *ls, int force)
796 */ 796 */
797 797
798 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls); 798 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
799 idr_remove_all(&ls->ls_lkbidr);
800 idr_destroy(&ls->ls_lkbidr); 799 idr_destroy(&ls->ls_lkbidr);
801 800
802 /* 801 /*
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index dd87a31bcc21..4f5ad246582f 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -177,12 +177,11 @@ static inline int nodeid_hash(int nodeid)
177static struct connection *__find_con(int nodeid) 177static struct connection *__find_con(int nodeid)
178{ 178{
179 int r; 179 int r;
180 struct hlist_node *h;
181 struct connection *con; 180 struct connection *con;
182 181
183 r = nodeid_hash(nodeid); 182 r = nodeid_hash(nodeid);
184 183
185 hlist_for_each_entry(con, h, &connection_hash[r], list) { 184 hlist_for_each_entry(con, &connection_hash[r], list) {
186 if (con->nodeid == nodeid) 185 if (con->nodeid == nodeid)
187 return con; 186 return con;
188 } 187 }
@@ -232,13 +231,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
232static void foreach_conn(void (*conn_func)(struct connection *c)) 231static void foreach_conn(void (*conn_func)(struct connection *c))
233{ 232{
234 int i; 233 int i;
235 struct hlist_node *h, *n; 234 struct hlist_node *n;
236 struct connection *con; 235 struct connection *con;
237 236
238 for (i = 0; i < CONN_HASH_SIZE; i++) { 237 for (i = 0; i < CONN_HASH_SIZE; i++) {
239 hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ 238 hlist_for_each_entry_safe(con, n, &connection_hash[i], list)
240 conn_func(con); 239 conn_func(con);
241 }
242 } 240 }
243} 241}
244 242
@@ -257,13 +255,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
257static struct connection *assoc2con(int assoc_id) 255static struct connection *assoc2con(int assoc_id)
258{ 256{
259 int i; 257 int i;
260 struct hlist_node *h;
261 struct connection *con; 258 struct connection *con;
262 259
263 mutex_lock(&connections_lock); 260 mutex_lock(&connections_lock);
264 261
265 for (i = 0 ; i < CONN_HASH_SIZE; i++) { 262 for (i = 0 ; i < CONN_HASH_SIZE; i++) {
266 hlist_for_each_entry(con, h, &connection_hash[i], list) { 263 hlist_for_each_entry(con, &connection_hash[i], list) {
267 if (con->sctp_assoc == assoc_id) { 264 if (con->sctp_assoc == assoc_id) {
268 mutex_unlock(&connections_lock); 265 mutex_unlock(&connections_lock);
269 return con; 266 return con;
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index aedea28a86a1..a6bc63f6e31b 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -305,27 +305,26 @@ static int recover_idr_empty(struct dlm_ls *ls)
305static int recover_idr_add(struct dlm_rsb *r) 305static int recover_idr_add(struct dlm_rsb *r)
306{ 306{
307 struct dlm_ls *ls = r->res_ls; 307 struct dlm_ls *ls = r->res_ls;
308 int rv, id; 308 int rv;
309
310 rv = idr_pre_get(&ls->ls_recover_idr, GFP_NOFS);
311 if (!rv)
312 return -ENOMEM;
313 309
310 idr_preload(GFP_NOFS);
314 spin_lock(&ls->ls_recover_idr_lock); 311 spin_lock(&ls->ls_recover_idr_lock);
315 if (r->res_id) { 312 if (r->res_id) {
316 spin_unlock(&ls->ls_recover_idr_lock); 313 rv = -1;
317 return -1; 314 goto out_unlock;
318 }
319 rv = idr_get_new_above(&ls->ls_recover_idr, r, 1, &id);
320 if (rv) {
321 spin_unlock(&ls->ls_recover_idr_lock);
322 return rv;
323 } 315 }
324 r->res_id = id; 316 rv = idr_alloc(&ls->ls_recover_idr, r, 1, 0, GFP_NOWAIT);
317 if (rv < 0)
318 goto out_unlock;
319
320 r->res_id = rv;
325 ls->ls_recover_list_count++; 321 ls->ls_recover_list_count++;
326 dlm_hold_rsb(r); 322 dlm_hold_rsb(r);
323 rv = 0;
324out_unlock:
327 spin_unlock(&ls->ls_recover_idr_lock); 325 spin_unlock(&ls->ls_recover_idr_lock);
328 return 0; 326 idr_preload_end();
327 return rv;
329} 328}
330 329
331static void recover_idr_del(struct dlm_rsb *r) 330static void recover_idr_del(struct dlm_rsb *r)
@@ -351,24 +350,21 @@ static struct dlm_rsb *recover_idr_find(struct dlm_ls *ls, uint64_t id)
351 return r; 350 return r;
352} 351}
353 352
354static int recover_idr_clear_rsb(int id, void *p, void *data) 353static void recover_idr_clear(struct dlm_ls *ls)
355{ 354{
356 struct dlm_ls *ls = data; 355 struct dlm_rsb *r;
357 struct dlm_rsb *r = p; 356 int id;
358 357
359 r->res_id = 0; 358 spin_lock(&ls->ls_recover_idr_lock);
360 r->res_recover_locks_count = 0;
361 ls->ls_recover_list_count--;
362 359
363 dlm_put_rsb(r); 360 idr_for_each_entry(&ls->ls_recover_idr, r, id) {
364 return 0; 361 idr_remove(&ls->ls_recover_idr, id);
365} 362 r->res_id = 0;
363 r->res_recover_locks_count = 0;
364 ls->ls_recover_list_count--;
366 365
367static void recover_idr_clear(struct dlm_ls *ls) 366 dlm_put_rsb(r);
368{ 367 }
369 spin_lock(&ls->ls_recover_idr_lock);
370 idr_for_each(&ls->ls_recover_idr, recover_idr_clear_rsb, ls);
371 idr_remove_all(&ls->ls_recover_idr);
372 368
373 if (ls->ls_recover_list_count != 0) { 369 if (ls->ls_recover_list_count != 0) {
374 log_error(ls, "warning: recover_list_count %d", 370 log_error(ls, "warning: recover_list_count %d",
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
503#endif 503#endif
504 return -EINVAL; 504 return -EINVAL;
505 505
506#ifdef CONFIG_COMPAT 506 /*
507 if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) 507 * can't compare against COMPAT/dlm_write_request32 because
508#else 508 * we don't yet know if is64bit is zero
509 */
509 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) 510 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
510#endif
511 return -EINVAL; 511 return -EINVAL;
512 512
513 kbuf = kzalloc(count + 1, GFP_NOFS); 513 kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index cc16562654de..e15ef38c24fa 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
1config ECRYPT_FS 1config ECRYPT_FS
2 tristate "eCrypt filesystem layer support (EXPERIMENTAL)" 2 tristate "eCrypt filesystem layer support"
3 depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n) 3 depends on KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
4 select CRYPTO_ECB 4 select CRYPTO_ECB
5 select CRYPTO_CBC 5 select CRYPTO_CBC
6 select CRYPTO_MD5 6 select CRYPTO_MD5
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index cfb4b9fed520..7e2c6f5d7985 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -509,6 +509,12 @@ ecryptfs_dentry_to_lower_mnt(struct dentry *dentry)
509 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt; 509 return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt;
510} 510}
511 511
512static inline struct path *
513ecryptfs_dentry_to_lower_path(struct dentry *dentry)
514{
515 return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path;
516}
517
512static inline void 518static inline void
513ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) 519ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
514{ 520{
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index d45ba4568128..53acc9d0c138 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -118,7 +118,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
118 118
119 lower_file = ecryptfs_file_to_lower(file); 119 lower_file = ecryptfs_file_to_lower(file);
120 lower_file->f_pos = file->f_pos; 120 lower_file->f_pos = file->f_pos;
121 inode = file->f_path.dentry->d_inode; 121 inode = file_inode(file);
122 memset(&buf, 0, sizeof(buf)); 122 memset(&buf, 0, sizeof(buf));
123 buf.dirent = dirent; 123 buf.dirent = dirent;
124 buf.dentry = file->f_path.dentry; 124 buf.dentry = file->f_path.dentry;
@@ -133,7 +133,7 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir)
133 goto out; 133 goto out;
134 if (rc >= 0) 134 if (rc >= 0)
135 fsstack_copy_attr_atime(inode, 135 fsstack_copy_attr_atime(inode,
136 lower_file->f_path.dentry->d_inode); 136 file_inode(lower_file));
137out: 137out:
138 return rc; 138 return rc;
139} 139}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index cc7709e7c508..e0f07fb6d56b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1027,8 +1027,7 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1027 struct kstat lower_stat; 1027 struct kstat lower_stat;
1028 int rc; 1028 int rc;
1029 1029
1030 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry), 1030 rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
1031 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1032 if (!rc) { 1031 if (!rc) {
1033 fsstack_copy_attr_all(dentry->d_inode, 1032 fsstack_copy_attr_all(dentry->d_inode,
1034 ecryptfs_inode_to_lower(dentry->d_inode)); 1033 ecryptfs_inode_to_lower(dentry->d_inode));
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 5fa2471796c2..8d7a577ae497 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -115,10 +115,9 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
115 */ 115 */
116int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon) 116int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
117{ 117{
118 struct hlist_node *elem;
119 int rc; 118 int rc;
120 119
121 hlist_for_each_entry(*daemon, elem, 120 hlist_for_each_entry(*daemon,
122 &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], 121 &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
123 euid_chain) { 122 euid_chain) {
124 if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) { 123 if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) {
@@ -445,7 +444,6 @@ void ecryptfs_release_messaging(void)
445 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 444 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
446 } 445 }
447 if (ecryptfs_daemon_hash) { 446 if (ecryptfs_daemon_hash) {
448 struct hlist_node *elem;
449 struct ecryptfs_daemon *daemon; 447 struct ecryptfs_daemon *daemon;
450 int i; 448 int i;
451 449
@@ -453,7 +451,7 @@ void ecryptfs_release_messaging(void)
453 for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { 451 for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
454 int rc; 452 int rc;
455 453
456 hlist_for_each_entry(daemon, elem, 454 hlist_for_each_entry(daemon,
457 &ecryptfs_daemon_hash[i], 455 &ecryptfs_daemon_hash[i],
458 euid_chain) { 456 euid_chain) {
459 rc = ecryptfs_exorcise_daemon(daemon); 457 rc = ecryptfs_exorcise_daemon(daemon);
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index b2a34a192f4f..6a160539cd23 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -40,16 +40,12 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
40 loff_t offset, size_t size) 40 loff_t offset, size_t size)
41{ 41{
42 struct file *lower_file; 42 struct file *lower_file;
43 mm_segment_t fs_save;
44 ssize_t rc; 43 ssize_t rc;
45 44
46 lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; 45 lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
47 if (!lower_file) 46 if (!lower_file)
48 return -EIO; 47 return -EIO;
49 fs_save = get_fs(); 48 rc = kernel_write(lower_file, data, size, offset);
50 set_fs(get_ds());
51 rc = vfs_write(lower_file, data, size, &offset);
52 set_fs(fs_save);
53 mark_inode_dirty_sync(ecryptfs_inode); 49 mark_inode_dirty_sync(ecryptfs_inode);
54 return rc; 50 return rc;
55} 51}
diff --git a/fs/efs/Kconfig b/fs/efs/Kconfig
index 6ebfc1c207a8..d020e3c30fea 100644
--- a/fs/efs/Kconfig
+++ b/fs/efs/Kconfig
@@ -1,6 +1,6 @@
1config EFS_FS 1config EFS_FS
2 tristate "EFS file system support (read only) (EXPERIMENTAL)" 2 tristate "EFS file system support (read only)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 help 4 help
5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard 5 EFS is an older file system used for non-ISO9660 CD-ROMs and hard
6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer 6 disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index 7ee6f7e3a608..055a9e9ca747 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -20,7 +20,7 @@ const struct inode_operations efs_dir_inode_operations = {
20}; 20};
21 21
22static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { 22static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) {
23 struct inode *inode = filp->f_path.dentry->d_inode; 23 struct inode *inode = file_inode(filp);
24 struct buffer_head *bh; 24 struct buffer_head *bh;
25 25
26 struct efs_dir *dirblock; 26 struct efs_dir *dirblock;
diff --git a/fs/exec.c b/fs/exec.c
index 18c45cac368f..a96a4885bbbf 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -123,7 +123,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
123 goto out; 123 goto out;
124 124
125 error = -EINVAL; 125 error = -EINVAL;
126 if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) 126 if (!S_ISREG(file_inode(file)->i_mode))
127 goto exit; 127 goto exit;
128 128
129 error = -EACCES; 129 error = -EACCES;
@@ -355,7 +355,7 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
355 * flags, permissions, and offset, so we use temporary values. We'll update 355 * flags, permissions, and offset, so we use temporary values. We'll update
356 * them later in setup_arg_pages(). 356 * them later in setup_arg_pages().
357 */ 357 */
358int bprm_mm_init(struct linux_binprm *bprm) 358static int bprm_mm_init(struct linux_binprm *bprm)
359{ 359{
360 int err; 360 int err;
361 struct mm_struct *mm = NULL; 361 struct mm_struct *mm = NULL;
@@ -434,8 +434,9 @@ static int count(struct user_arg_ptr argv, int max)
434 if (IS_ERR(p)) 434 if (IS_ERR(p))
435 return -EFAULT; 435 return -EFAULT;
436 436
437 if (i++ >= max) 437 if (i >= max)
438 return -E2BIG; 438 return -E2BIG;
439 ++i;
439 440
440 if (fatal_signal_pending(current)) 441 if (fatal_signal_pending(current))
441 return -ERESTARTNOHAND; 442 return -ERESTARTNOHAND;
@@ -763,7 +764,7 @@ struct file *open_exec(const char *name)
763 goto out; 764 goto out;
764 765
765 err = -EACCES; 766 err = -EACCES;
766 if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) 767 if (!S_ISREG(file_inode(file)->i_mode))
767 goto exit; 768 goto exit;
768 769
769 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) 770 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
@@ -1097,7 +1098,7 @@ EXPORT_SYMBOL(flush_old_exec);
1097 1098
1098void would_dump(struct linux_binprm *bprm, struct file *file) 1099void would_dump(struct linux_binprm *bprm, struct file *file)
1099{ 1100{
1100 if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0) 1101 if (inode_permission(file_inode(file), MAY_READ) < 0)
1101 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; 1102 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1102} 1103}
1103EXPORT_SYMBOL(would_dump); 1104EXPORT_SYMBOL(would_dump);
@@ -1110,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1110 current->sas_ss_sp = current->sas_ss_size = 0; 1111 current->sas_ss_sp = current->sas_ss_size = 0;
1111 1112
1112 if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) 1113 if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
1113 set_dumpable(current->mm, SUID_DUMPABLE_ENABLED); 1114 set_dumpable(current->mm, SUID_DUMP_USER);
1114 else 1115 else
1115 set_dumpable(current->mm, suid_dumpable); 1116 set_dumpable(current->mm, suid_dumpable);
1116 1117
@@ -1269,7 +1270,7 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
1269int prepare_binprm(struct linux_binprm *bprm) 1270int prepare_binprm(struct linux_binprm *bprm)
1270{ 1271{
1271 umode_t mode; 1272 umode_t mode;
1272 struct inode * inode = bprm->file->f_path.dentry->d_inode; 1273 struct inode * inode = file_inode(bprm->file);
1273 int retval; 1274 int retval;
1274 1275
1275 mode = inode->i_mode; 1276 mode = inode->i_mode;
@@ -1638,17 +1639,17 @@ EXPORT_SYMBOL(set_binfmt);
1638void set_dumpable(struct mm_struct *mm, int value) 1639void set_dumpable(struct mm_struct *mm, int value)
1639{ 1640{
1640 switch (value) { 1641 switch (value) {
1641 case SUID_DUMPABLE_DISABLED: 1642 case SUID_DUMP_DISABLE:
1642 clear_bit(MMF_DUMPABLE, &mm->flags); 1643 clear_bit(MMF_DUMPABLE, &mm->flags);
1643 smp_wmb(); 1644 smp_wmb();
1644 clear_bit(MMF_DUMP_SECURELY, &mm->flags); 1645 clear_bit(MMF_DUMP_SECURELY, &mm->flags);
1645 break; 1646 break;
1646 case SUID_DUMPABLE_ENABLED: 1647 case SUID_DUMP_USER:
1647 set_bit(MMF_DUMPABLE, &mm->flags); 1648 set_bit(MMF_DUMPABLE, &mm->flags);
1648 smp_wmb(); 1649 smp_wmb();
1649 clear_bit(MMF_DUMP_SECURELY, &mm->flags); 1650 clear_bit(MMF_DUMP_SECURELY, &mm->flags);
1650 break; 1651 break;
1651 case SUID_DUMPABLE_SAFE: 1652 case SUID_DUMP_ROOT:
1652 set_bit(MMF_DUMP_SECURELY, &mm->flags); 1653 set_bit(MMF_DUMP_SECURELY, &mm->flags);
1653 smp_wmb(); 1654 smp_wmb();
1654 set_bit(MMF_DUMPABLE, &mm->flags); 1655 set_bit(MMF_DUMPABLE, &mm->flags);
@@ -1661,7 +1662,7 @@ int __get_dumpable(unsigned long mm_flags)
1661 int ret; 1662 int ret;
1662 1663
1663 ret = mm_flags & MMF_DUMPABLE_MASK; 1664 ret = mm_flags & MMF_DUMPABLE_MASK;
1664 return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; 1665 return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
1665} 1666}
1666 1667
1667int get_dumpable(struct mm_struct *mm) 1668int get_dumpable(struct mm_struct *mm)
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index c61e62ac231c..46375896cfc0 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -242,7 +242,7 @@ static int
242exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) 242exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
243{ 243{
244 loff_t pos = filp->f_pos; 244 loff_t pos = filp->f_pos;
245 struct inode *inode = filp->f_path.dentry->d_inode; 245 struct inode *inode = file_inode(filp);
246 unsigned int offset = pos & ~PAGE_CACHE_MASK; 246 unsigned int offset = pos & ~PAGE_CACHE_MASK;
247 unsigned long n = pos >> PAGE_CACHE_SHIFT; 247 unsigned long n = pos >> PAGE_CACHE_SHIFT;
248 unsigned long npages = dir_pages(inode); 248 unsigned long npages = dir_pages(inode);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 5df4bb4aab14..262fc9940982 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -44,14 +44,13 @@ find_acceptable_alias(struct dentry *result,
44{ 44{
45 struct dentry *dentry, *toput = NULL; 45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode; 46 struct inode *inode;
47 struct hlist_node *p;
48 47
49 if (acceptable(context, result)) 48 if (acceptable(context, result))
50 return result; 49 return result;
51 50
52 inode = result->d_inode; 51 inode = result->d_inode;
53 spin_lock(&inode->i_lock); 52 spin_lock(&inode->i_lock);
54 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { 53 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
55 dget(dentry); 54 dget(dentry);
56 spin_unlock(&inode->i_lock); 55 spin_unlock(&inode->i_lock);
57 if (toput) 56 if (toput)
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 2616d0ea5c5c..9f9992b37924 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -159,15 +159,6 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
159 return bh; 159 return bh;
160} 160}
161 161
162static void release_blocks(struct super_block *sb, int count)
163{
164 if (count) {
165 struct ext2_sb_info *sbi = EXT2_SB(sb);
166
167 percpu_counter_add(&sbi->s_freeblocks_counter, count);
168 }
169}
170
171static void group_adjust_blocks(struct super_block *sb, int group_no, 162static void group_adjust_blocks(struct super_block *sb, int group_no,
172 struct ext2_group_desc *desc, struct buffer_head *bh, int count) 163 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
173{ 164{
@@ -568,8 +559,11 @@ do_more:
568 } 559 }
569error_return: 560error_return:
570 brelse(bitmap_bh); 561 brelse(bitmap_bh);
571 release_blocks(sb, freed); 562 if (freed) {
572 dquot_free_block_nodirty(inode, freed); 563 percpu_counter_add(&sbi->s_freeblocks_counter, freed);
564 dquot_free_block_nodirty(inode, freed);
565 mark_inode_dirty(inode);
566 }
573} 567}
574 568
575/** 569/**
@@ -1239,10 +1233,6 @@ ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,
1239 1233
1240 *errp = -ENOSPC; 1234 *errp = -ENOSPC;
1241 sb = inode->i_sb; 1235 sb = inode->i_sb;
1242 if (!sb) {
1243 printk("ext2_new_blocks: nonexistent device");
1244 return 0;
1245 }
1246 1236
1247 /* 1237 /*
1248 * Check quota for allocation of this block. 1238 * Check quota for allocation of this block.
@@ -1416,9 +1406,11 @@ allocated:
1416 1406
1417 *errp = 0; 1407 *errp = 0;
1418 brelse(bitmap_bh); 1408 brelse(bitmap_bh);
1419 dquot_free_block_nodirty(inode, *count-num); 1409 if (num < *count) {
1420 mark_inode_dirty(inode); 1410 dquot_free_block_nodirty(inode, *count-num);
1421 *count = num; 1411 mark_inode_dirty(inode);
1412 *count = num;
1413 }
1422 return ret_block; 1414 return ret_block;
1423 1415
1424io_error: 1416io_error:
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0f4f5c929257..4237722bfd27 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -290,7 +290,7 @@ static int
290ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) 290ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
291{ 291{
292 loff_t pos = filp->f_pos; 292 loff_t pos = filp->f_pos;
293 struct inode *inode = filp->f_path.dentry->d_inode; 293 struct inode *inode = file_inode(filp);
294 struct super_block *sb = inode->i_sb; 294 struct super_block *sb = inode->i_sb;
295 unsigned int offset = pos & ~PAGE_CACHE_MASK; 295 unsigned int offset = pos & ~PAGE_CACHE_MASK;
296 unsigned long n = pos >> PAGE_CACHE_SHIFT; 296 unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6363ac66fafa..c3881e56662e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -495,6 +495,10 @@ static int ext2_alloc_branch(struct inode *inode,
495 * parent to disk. 495 * parent to disk.
496 */ 496 */
497 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 497 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
498 if (unlikely(!bh)) {
499 err = -ENOMEM;
500 goto failed;
501 }
498 branch[n].bh = bh; 502 branch[n].bh = bh;
499 lock_buffer(bh); 503 lock_buffer(bh);
500 memset(bh->b_data, 0, blocksize); 504 memset(bh->b_data, 0, blocksize);
@@ -523,6 +527,14 @@ static int ext2_alloc_branch(struct inode *inode,
523 } 527 }
524 *blks = num; 528 *blks = num;
525 return err; 529 return err;
530
531failed:
532 for (i = 1; i < n; i++)
533 bforget(branch[i].bh);
534 for (i = 0; i < indirect_blks; i++)
535 ext2_free_blocks(inode, new_blocks[i], 1);
536 ext2_free_blocks(inode, new_blocks[i], num);
537 return err;
526} 538}
527 539
528/** 540/**
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 2de655f5d625..5d46c09863f0 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -19,7 +19,7 @@
19 19
20long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 20long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
21{ 21{
22 struct inode *inode = filp->f_dentry->d_inode; 22 struct inode *inode = file_inode(filp);
23 struct ext2_inode_info *ei = EXT2_I(inode); 23 struct ext2_inode_info *ei = EXT2_I(inode);
24 unsigned int flags; 24 unsigned int flags;
25 unsigned short rsv_window_size; 25 unsigned short rsv_window_size;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index fa04d023177e..7f68c8114026 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1500,7 +1500,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1500 bh = sb_bread(sb, tmp_bh.b_blocknr); 1500 bh = sb_bread(sb, tmp_bh.b_blocknr);
1501 else 1501 else
1502 bh = sb_getblk(sb, tmp_bh.b_blocknr); 1502 bh = sb_getblk(sb, tmp_bh.b_blocknr);
1503 if (!bh) { 1503 if (unlikely(!bh)) {
1504 err = -EIO; 1504 err = -EIO;
1505 goto out; 1505 goto out;
1506 } 1506 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index b6754dbbce3c..2d7557db3ae8 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -662,10 +662,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
662 ea_idebug(inode, "creating block %d", block); 662 ea_idebug(inode, "creating block %d", block);
663 663
664 new_bh = sb_getblk(sb, block); 664 new_bh = sb_getblk(sb, block);
665 if (!new_bh) { 665 if (unlikely(!new_bh)) {
666 ext2_free_blocks(inode, block, 1); 666 ext2_free_blocks(inode, block, 1);
667 mark_inode_dirty(inode); 667 mark_inode_dirty(inode);
668 error = -EIO; 668 error = -ENOMEM;
669 goto cleanup; 669 goto cleanup;
670 } 670 }
671 lock_buffer(new_bh); 671 lock_buffer(new_bh);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index dd91264ba94f..87eccbbca255 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -99,7 +99,7 @@ static int ext3_readdir(struct file * filp,
99 int i, stored; 99 int i, stored;
100 struct ext3_dir_entry_2 *de; 100 struct ext3_dir_entry_2 *de;
101 int err; 101 int err;
102 struct inode *inode = filp->f_path.dentry->d_inode; 102 struct inode *inode = file_inode(filp);
103 struct super_block *sb = inode->i_sb; 103 struct super_block *sb = inode->i_sb;
104 int ret = 0; 104 int ret = 0;
105 int dir_has_error = 0; 105 int dir_has_error = 0;
@@ -114,7 +114,7 @@ static int ext3_readdir(struct file * filp,
114 * We don't set the inode dirty flag since it's not 114 * We don't set the inode dirty flag since it's not
115 * critical that it get flushed back to the disk. 115 * critical that it get flushed back to the disk.
116 */ 116 */
117 EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; 117 EXT3_I(file_inode(filp))->i_flags &= ~EXT3_INDEX_FL;
118 } 118 }
119 stored = 0; 119 stored = 0;
120 offset = filp->f_pos & (sb->s_blocksize - 1); 120 offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -457,7 +457,7 @@ static int call_filldir(struct file * filp, void * dirent,
457{ 457{
458 struct dir_private_info *info = filp->private_data; 458 struct dir_private_info *info = filp->private_data;
459 loff_t curr_pos; 459 loff_t curr_pos;
460 struct inode *inode = filp->f_path.dentry->d_inode; 460 struct inode *inode = file_inode(filp);
461 struct super_block * sb; 461 struct super_block * sb;
462 int error; 462 int error;
463 463
@@ -487,7 +487,7 @@ static int ext3_dx_readdir(struct file * filp,
487 void * dirent, filldir_t filldir) 487 void * dirent, filldir_t filldir)
488{ 488{
489 struct dir_private_info *info = filp->private_data; 489 struct dir_private_info *info = filp->private_data;
490 struct inode *inode = filp->f_path.dentry->d_inode; 490 struct inode *inode = file_inode(filp);
491 struct fname *fname; 491 struct fname *fname;
492 int ret; 492 int ret;
493 493
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b176d4253544..d512c4bc4ad7 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -676,6 +676,10 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
676 * parent to disk. 676 * parent to disk.
677 */ 677 */
678 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 678 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
679 if (unlikely(!bh)) {
680 err = -ENOMEM;
681 goto failed;
682 }
679 branch[n].bh = bh; 683 branch[n].bh = bh;
680 lock_buffer(bh); 684 lock_buffer(bh);
681 BUFFER_TRACE(bh, "call get_create_access"); 685 BUFFER_TRACE(bh, "call get_create_access");
@@ -717,7 +721,7 @@ failed:
717 BUFFER_TRACE(branch[i].bh, "call journal_forget"); 721 BUFFER_TRACE(branch[i].bh, "call journal_forget");
718 ext3_journal_forget(handle, branch[i].bh); 722 ext3_journal_forget(handle, branch[i].bh);
719 } 723 }
720 for (i = 0; i <indirect_blks; i++) 724 for (i = 0; i < indirect_blks; i++)
721 ext3_free_blocks(handle, inode, new_blocks[i], 1); 725 ext3_free_blocks(handle, inode, new_blocks[i], 1);
722 726
723 ext3_free_blocks(handle, inode, new_blocks[i], num); 727 ext3_free_blocks(handle, inode, new_blocks[i], num);
@@ -1078,8 +1082,8 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
1078 if (!err && buffer_mapped(&dummy)) { 1082 if (!err && buffer_mapped(&dummy)) {
1079 struct buffer_head *bh; 1083 struct buffer_head *bh;
1080 bh = sb_getblk(inode->i_sb, dummy.b_blocknr); 1084 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
1081 if (!bh) { 1085 if (unlikely(!bh)) {
1082 *errp = -EIO; 1086 *errp = -ENOMEM;
1083 goto err; 1087 goto err;
1084 } 1088 }
1085 if (buffer_new(&dummy)) { 1089 if (buffer_new(&dummy)) {
@@ -2729,12 +2733,12 @@ static int __ext3_get_inode_loc(struct inode *inode,
2729 return -EIO; 2733 return -EIO;
2730 2734
2731 bh = sb_getblk(inode->i_sb, block); 2735 bh = sb_getblk(inode->i_sb, block);
2732 if (!bh) { 2736 if (unlikely(!bh)) {
2733 ext3_error (inode->i_sb, "ext3_get_inode_loc", 2737 ext3_error (inode->i_sb, "ext3_get_inode_loc",
2734 "unable to read inode block - " 2738 "unable to read inode block - "
2735 "inode=%lu, block="E3FSBLK, 2739 "inode=%lu, block="E3FSBLK,
2736 inode->i_ino, block); 2740 inode->i_ino, block);
2737 return -EIO; 2741 return -ENOMEM;
2738 } 2742 }
2739 if (!buffer_uptodate(bh)) { 2743 if (!buffer_uptodate(bh)) {
2740 lock_buffer(bh); 2744 lock_buffer(bh);
@@ -2783,7 +2787,7 @@ static int __ext3_get_inode_loc(struct inode *inode,
2783 2787
2784 bitmap_bh = sb_getblk(inode->i_sb, 2788 bitmap_bh = sb_getblk(inode->i_sb,
2785 le32_to_cpu(desc->bg_inode_bitmap)); 2789 le32_to_cpu(desc->bg_inode_bitmap));
2786 if (!bitmap_bh) 2790 if (unlikely(!bitmap_bh))
2787 goto make_io; 2791 goto make_io;
2788 2792
2789 /* 2793 /*
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 677a5c27dc69..4d96e9a64532 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -14,7 +14,7 @@
14 14
15long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 15long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
16{ 16{
17 struct inode *inode = filp->f_dentry->d_inode; 17 struct inode *inode = file_inode(filp);
18 struct ext3_inode_info *ei = EXT3_I(inode); 18 struct ext3_inode_info *ei = EXT3_I(inode);
19 unsigned int flags; 19 unsigned int flags;
20 unsigned short rsv_window_size; 20 unsigned short rsv_window_size;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 890b8947c546..692de13e3596 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,7 +36,6 @@
36#define NAMEI_RA_CHUNKS 2 36#define NAMEI_RA_CHUNKS 2
37#define NAMEI_RA_BLOCKS 4 37#define NAMEI_RA_BLOCKS 4
38#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 38#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
39#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
40 39
41static struct buffer_head *ext3_append(handle_t *handle, 40static struct buffer_head *ext3_append(handle_t *handle,
42 struct inode *inode, 41 struct inode *inode,
@@ -624,7 +623,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
624 623
625 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, 624 dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
626 start_minor_hash)); 625 start_minor_hash));
627 dir = dir_file->f_path.dentry->d_inode; 626 dir = file_inode(dir_file);
628 if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { 627 if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
629 hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; 628 hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
630 if (hinfo.hash_version <= DX_HASH_TEA) 629 if (hinfo.hash_version <= DX_HASH_TEA)
@@ -638,7 +637,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
638 } 637 }
639 hinfo.hash = start_hash; 638 hinfo.hash = start_hash;
640 hinfo.minor_hash = 0; 639 hinfo.minor_hash = 0;
641 frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); 640 frame = dx_probe(NULL, file_inode(dir_file), &hinfo, frames, &err);
642 if (!frame) 641 if (!frame)
643 return err; 642 return err;
644 643
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0f814f3450de..27105655502c 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -116,8 +116,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
116 int err; 116 int err;
117 117
118 bh = sb_getblk(sb, blk); 118 bh = sb_getblk(sb, blk);
119 if (!bh) 119 if (unlikely(!bh))
120 return ERR_PTR(-EIO); 120 return ERR_PTR(-ENOMEM);
121 if ((err = ext3_journal_get_write_access(handle, bh))) { 121 if ((err = ext3_journal_get_write_access(handle, bh))) {
122 brelse(bh); 122 brelse(bh);
123 bh = ERR_PTR(err); 123 bh = ERR_PTR(err);
@@ -234,8 +234,8 @@ static int setup_new_group_blocks(struct super_block *sb,
234 goto exit_bh; 234 goto exit_bh;
235 235
236 gdb = sb_getblk(sb, block); 236 gdb = sb_getblk(sb, block);
237 if (!gdb) { 237 if (unlikely(!gdb)) {
238 err = -EIO; 238 err = -ENOMEM;
239 goto exit_bh; 239 goto exit_bh;
240 } 240 }
241 if ((err = ext3_journal_get_write_access(handle, gdb))) { 241 if ((err = ext3_journal_get_write_access(handle, gdb))) {
@@ -722,8 +722,8 @@ static void update_backups(struct super_block *sb,
722 break; 722 break;
723 723
724 bh = sb_getblk(sb, group * bpg + blk_off); 724 bh = sb_getblk(sb, group * bpg + blk_off);
725 if (!bh) { 725 if (unlikely(!bh)) {
726 err = -EIO; 726 err = -ENOMEM;
727 break; 727 break;
728 } 728 }
729 ext3_debug("update metadata backup %#04lx\n", 729 ext3_debug("update metadata backup %#04lx\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6e50223b3299..5546ca225ffe 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -916,21 +916,24 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
916 "Not enough memory for storing quotafile name"); 916 "Not enough memory for storing quotafile name");
917 return 0; 917 return 0;
918 } 918 }
919 if (sbi->s_qf_names[qtype] && 919 if (sbi->s_qf_names[qtype]) {
920 strcmp(sbi->s_qf_names[qtype], qname)) { 920 int same = !strcmp(sbi->s_qf_names[qtype], qname);
921 ext3_msg(sb, KERN_ERR, 921
922 "%s quota file already specified", QTYPE2NAME(qtype));
923 kfree(qname); 922 kfree(qname);
924 return 0; 923 if (!same) {
924 ext3_msg(sb, KERN_ERR,
925 "%s quota file already specified",
926 QTYPE2NAME(qtype));
927 }
928 return same;
925 } 929 }
926 sbi->s_qf_names[qtype] = qname; 930 if (strchr(qname, '/')) {
927 if (strchr(sbi->s_qf_names[qtype], '/')) {
928 ext3_msg(sb, KERN_ERR, 931 ext3_msg(sb, KERN_ERR,
929 "quotafile must be on filesystem root"); 932 "quotafile must be on filesystem root");
930 kfree(sbi->s_qf_names[qtype]); 933 kfree(qname);
931 sbi->s_qf_names[qtype] = NULL;
932 return 0; 934 return 0;
933 } 935 }
936 sbi->s_qf_names[qtype] = qname;
934 set_opt(sbi->s_mount_opt, QUOTA); 937 set_opt(sbi->s_mount_opt, QUOTA);
935 return 1; 938 return 1;
936} 939}
@@ -945,11 +948,10 @@ static int clear_qf_name(struct super_block *sb, int qtype) {
945 " when quota turned on"); 948 " when quota turned on");
946 return 0; 949 return 0;
947 } 950 }
948 /* 951 if (sbi->s_qf_names[qtype]) {
949 * The space will be released later when all options are confirmed 952 kfree(sbi->s_qf_names[qtype]);
950 * to be correct 953 sbi->s_qf_names[qtype] = NULL;
951 */ 954 }
952 sbi->s_qf_names[qtype] = NULL;
953 return 1; 955 return 1;
954} 956}
955#endif 957#endif
@@ -2065,6 +2067,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2065 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2067 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2066 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2068 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2067 "writeback"); 2069 "writeback");
2070 sb->s_flags |= MS_SNAP_STABLE;
2068 2071
2069 return 0; 2072 return 0;
2070 2073
@@ -2605,7 +2608,18 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2605#ifdef CONFIG_QUOTA 2608#ifdef CONFIG_QUOTA
2606 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 2609 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2607 for (i = 0; i < MAXQUOTAS; i++) 2610 for (i = 0; i < MAXQUOTAS; i++)
2608 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 2611 if (sbi->s_qf_names[i]) {
2612 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
2613 GFP_KERNEL);
2614 if (!old_opts.s_qf_names[i]) {
2615 int j;
2616
2617 for (j = 0; j < i; j++)
2618 kfree(old_opts.s_qf_names[j]);
2619 return -ENOMEM;
2620 }
2621 } else
2622 old_opts.s_qf_names[i] = NULL;
2609#endif 2623#endif
2610 2624
2611 /* 2625 /*
@@ -2698,9 +2712,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2698#ifdef CONFIG_QUOTA 2712#ifdef CONFIG_QUOTA
2699 /* Release old quota file names */ 2713 /* Release old quota file names */
2700 for (i = 0; i < MAXQUOTAS; i++) 2714 for (i = 0; i < MAXQUOTAS; i++)
2701 if (old_opts.s_qf_names[i] && 2715 kfree(old_opts.s_qf_names[i]);
2702 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2703 kfree(old_opts.s_qf_names[i]);
2704#endif 2716#endif
2705 if (enable_quota) 2717 if (enable_quota)
2706 dquot_resume(sb, -1); 2718 dquot_resume(sb, -1);
@@ -2714,9 +2726,7 @@ restore_opts:
2714#ifdef CONFIG_QUOTA 2726#ifdef CONFIG_QUOTA
2715 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 2727 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2716 for (i = 0; i < MAXQUOTAS; i++) { 2728 for (i = 0; i < MAXQUOTAS; i++) {
2717 if (sbi->s_qf_names[i] && 2729 kfree(sbi->s_qf_names[i]);
2718 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2719 kfree(sbi->s_qf_names[i]);
2720 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 2730 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2721 } 2731 }
2722#endif 2732#endif
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d22ebb7a4f55..b1fc96383e08 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -813,10 +813,10 @@ inserted:
813 ea_idebug(inode, "creating block %d", block); 813 ea_idebug(inode, "creating block %d", block);
814 814
815 new_bh = sb_getblk(sb, block); 815 new_bh = sb_getblk(sb, block);
816 if (!new_bh) { 816 if (unlikely(!new_bh)) {
817getblk_failed: 817getblk_failed:
818 ext3_free_blocks(handle, inode, block, 1); 818 ext3_free_blocks(handle, inode, block, 1);
819 error = -EIO; 819 error = -ENOMEM;
820 goto cleanup; 820 goto cleanup;
821 } 821 }
822 lock_buffer(new_bh); 822 lock_buffer(new_bh);
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 0a475c881852..987358740cb9 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -41,6 +41,7 @@ config EXT4_USE_FOR_EXT23
41 41
42config EXT4_FS_POSIX_ACL 42config EXT4_FS_POSIX_ACL
43 bool "Ext4 POSIX Access Control Lists" 43 bool "Ext4 POSIX Access Control Lists"
44 depends on EXT4_FS
44 select FS_POSIX_ACL 45 select FS_POSIX_ACL
45 help 46 help
46 POSIX Access Control Lists (ACLs) support permissions for users and 47 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -53,6 +54,7 @@ config EXT4_FS_POSIX_ACL
53 54
54config EXT4_FS_SECURITY 55config EXT4_FS_SECURITY
55 bool "Ext4 Security Labels" 56 bool "Ext4 Security Labels"
57 depends on EXT4_FS
56 help 58 help
57 Security labels support alternative access control models 59 Security labels support alternative access control models
58 implemented by security modules like SELinux. This option 60 implemented by security modules like SELinux. This option
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index e6e0d988439b..39a54a0e9fe4 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -324,8 +324,8 @@ ext4_acl_chmod(struct inode *inode)
324 if (error) 324 if (error)
325 return error; 325 return error;
326retry: 326retry:
327 handle = ext4_journal_start(inode, 327 handle = ext4_journal_start(inode, EXT4_HT_XATTR,
328 EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 328 ext4_jbd2_credits_xattr(inode));
329 if (IS_ERR(handle)) { 329 if (IS_ERR(handle)) {
330 error = PTR_ERR(handle); 330 error = PTR_ERR(handle);
331 ext4_std_error(inode->i_sb, error); 331 ext4_std_error(inode->i_sb, error);
@@ -422,7 +422,8 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
422 acl = NULL; 422 acl = NULL;
423 423
424retry: 424retry:
425 handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 425 handle = ext4_journal_start(inode, EXT4_HT_XATTR,
426 ext4_jbd2_credits_xattr(inode));
426 if (IS_ERR(handle)) { 427 if (IS_ERR(handle)) {
427 error = PTR_ERR(handle); 428 error = PTR_ERR(handle);
428 goto release_and_out; 429 goto release_and_out;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index cf1821784a16..2f2e0da1a6b7 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -358,7 +358,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
358} 358}
359 359
360/** 360/**
361 * ext4_read_block_bitmap() 361 * ext4_read_block_bitmap_nowait()
362 * @sb: super block 362 * @sb: super block
363 * @block_group: given block group 363 * @block_group: given block group
364 * 364 *
@@ -457,6 +457,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
457 struct buffer_head *bh; 457 struct buffer_head *bh;
458 458
459 bh = ext4_read_block_bitmap_nowait(sb, block_group); 459 bh = ext4_read_block_bitmap_nowait(sb, block_group);
460 if (!bh)
461 return NULL;
460 if (ext4_wait_block_bitmap(sb, block_group, bh)) { 462 if (ext4_wait_block_bitmap(sb, block_group, bh)) {
461 put_bh(bh); 463 put_bh(bh);
462 return NULL; 464 return NULL;
@@ -482,11 +484,16 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
482 484
483 free_clusters = percpu_counter_read_positive(fcc); 485 free_clusters = percpu_counter_read_positive(fcc);
484 dirty_clusters = percpu_counter_read_positive(dcc); 486 dirty_clusters = percpu_counter_read_positive(dcc);
485 root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es)); 487
488 /*
489 * r_blocks_count should always be multiple of the cluster ratio so
490 * we are safe to do a plane bit shift only.
491 */
492 root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
486 493
487 if (free_clusters - (nclusters + root_clusters + dirty_clusters) < 494 if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
488 EXT4_FREECLUSTERS_WATERMARK) { 495 EXT4_FREECLUSTERS_WATERMARK) {
489 free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc)); 496 free_clusters = percpu_counter_sum_positive(fcc);
490 dirty_clusters = percpu_counter_sum_positive(dcc); 497 dirty_clusters = percpu_counter_sum_positive(dcc);
491 } 498 }
492 /* Check whether we have space after accounting for current 499 /* Check whether we have space after accounting for current
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 80a28b297279..6dda04f05ef4 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -110,7 +110,7 @@ static int ext4_readdir(struct file *filp,
110 int i, stored; 110 int i, stored;
111 struct ext4_dir_entry_2 *de; 111 struct ext4_dir_entry_2 *de;
112 int err; 112 int err;
113 struct inode *inode = filp->f_path.dentry->d_inode; 113 struct inode *inode = file_inode(filp);
114 struct super_block *sb = inode->i_sb; 114 struct super_block *sb = inode->i_sb;
115 int ret = 0; 115 int ret = 0;
116 int dir_has_error = 0; 116 int dir_has_error = 0;
@@ -133,7 +133,7 @@ static int ext4_readdir(struct file *filp,
133 * We don't set the inode dirty flag since it's not 133 * We don't set the inode dirty flag since it's not
134 * critical that it get flushed back to the disk. 134 * critical that it get flushed back to the disk.
135 */ 135 */
136 ext4_clear_inode_flag(filp->f_path.dentry->d_inode, 136 ext4_clear_inode_flag(file_inode(filp),
137 EXT4_INODE_INDEX); 137 EXT4_INODE_INDEX);
138 } 138 }
139 stored = 0; 139 stored = 0;
@@ -185,6 +185,7 @@ static int ext4_readdir(struct file *filp,
185 "at offset %llu", 185 "at offset %llu",
186 (unsigned long long)filp->f_pos); 186 (unsigned long long)filp->f_pos);
187 filp->f_pos += sb->s_blocksize - offset; 187 filp->f_pos += sb->s_blocksize - offset;
188 brelse(bh);
188 continue; 189 continue;
189 } 190 }
190 set_buffer_verified(bh); 191 set_buffer_verified(bh);
@@ -494,7 +495,7 @@ static int call_filldir(struct file *filp, void *dirent,
494{ 495{
495 struct dir_private_info *info = filp->private_data; 496 struct dir_private_info *info = filp->private_data;
496 loff_t curr_pos; 497 loff_t curr_pos;
497 struct inode *inode = filp->f_path.dentry->d_inode; 498 struct inode *inode = file_inode(filp);
498 struct super_block *sb; 499 struct super_block *sb;
499 int error; 500 int error;
500 501
@@ -526,7 +527,7 @@ static int ext4_dx_readdir(struct file *filp,
526 void *dirent, filldir_t filldir) 527 void *dirent, filldir_t filldir)
527{ 528{
528 struct dir_private_info *info = filp->private_data; 529 struct dir_private_info *info = filp->private_data;
529 struct inode *inode = filp->f_path.dentry->d_inode; 530 struct inode *inode = file_inode(filp);
530 struct fname *fname; 531 struct fname *fname;
531 int ret; 532 int ret;
532 533
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8462eb3c33aa..6e16c1867959 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -194,8 +194,7 @@ struct mpage_da_data {
194 */ 194 */
195#define EXT4_IO_END_UNWRITTEN 0x0001 195#define EXT4_IO_END_UNWRITTEN 0x0001
196#define EXT4_IO_END_ERROR 0x0002 196#define EXT4_IO_END_ERROR 0x0002
197#define EXT4_IO_END_QUEUED 0x0004 197#define EXT4_IO_END_DIRECT 0x0004
198#define EXT4_IO_END_DIRECT 0x0008
199 198
200struct ext4_io_page { 199struct ext4_io_page {
201 struct page *p_page; 200 struct page *p_page;
@@ -215,10 +214,8 @@ typedef struct ext4_io_end {
215 struct list_head list; /* per-file finished IO list */ 214 struct list_head list; /* per-file finished IO list */
216 struct inode *inode; /* file being written to */ 215 struct inode *inode; /* file being written to */
217 unsigned int flag; /* unwritten or not */ 216 unsigned int flag; /* unwritten or not */
218 struct page *page; /* for writepage() path */
219 loff_t offset; /* offset in the file */ 217 loff_t offset; /* offset in the file */
220 ssize_t size; /* size of the extent */ 218 ssize_t size; /* size of the extent */
221 struct work_struct work; /* data work queue */
222 struct kiocb *iocb; /* iocb struct for AIO */ 219 struct kiocb *iocb; /* iocb struct for AIO */
223 int result; /* error value for AIO */ 220 int result; /* error value for AIO */
224 int num_io_pages; /* for writepages() */ 221 int num_io_pages; /* for writepages() */
@@ -582,6 +579,8 @@ enum {
582#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 579#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
583 /* Do not take i_data_sem locking in ext4_map_blocks */ 580 /* Do not take i_data_sem locking in ext4_map_blocks */
584#define EXT4_GET_BLOCKS_NO_LOCK 0x0100 581#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
582 /* Do not put hole in extent cache */
583#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
585 584
586/* 585/*
587 * Flags used by ext4_free_blocks 586 * Flags used by ext4_free_blocks
@@ -810,17 +809,6 @@ do { \
810 809
811#endif /* defined(__KERNEL__) || defined(__linux__) */ 810#endif /* defined(__KERNEL__) || defined(__linux__) */
812 811
813/*
814 * storage for cached extent
815 * If ec_len == 0, then the cache is invalid.
816 * If ec_start == 0, then the cache represents a gap (null mapping)
817 */
818struct ext4_ext_cache {
819 ext4_fsblk_t ec_start;
820 ext4_lblk_t ec_block;
821 __u32 ec_len; /* must be 32bit to return holes */
822};
823
824#include "extents_status.h" 812#include "extents_status.h"
825 813
826/* 814/*
@@ -887,7 +875,6 @@ struct ext4_inode_info {
887 struct inode vfs_inode; 875 struct inode vfs_inode;
888 struct jbd2_inode *jinode; 876 struct jbd2_inode *jinode;
889 877
890 struct ext4_ext_cache i_cached_extent;
891 /* 878 /*
892 * File creation time. Its function is same as that of 879 * File creation time. Its function is same as that of
893 * struct timespec i_{a,c,m}time in the generic inode. 880 * struct timespec i_{a,c,m}time in the generic inode.
@@ -901,6 +888,8 @@ struct ext4_inode_info {
901 /* extents status tree */ 888 /* extents status tree */
902 struct ext4_es_tree i_es_tree; 889 struct ext4_es_tree i_es_tree;
903 rwlock_t i_es_lock; 890 rwlock_t i_es_lock;
891 struct list_head i_es_lru;
892 unsigned int i_es_lru_nr; /* protected by i_es_lock */
904 893
905 /* ialloc */ 894 /* ialloc */
906 ext4_group_t i_last_alloc_group; 895 ext4_group_t i_last_alloc_group;
@@ -930,6 +919,7 @@ struct ext4_inode_info {
930 spinlock_t i_completed_io_lock; 919 spinlock_t i_completed_io_lock;
931 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 920 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
932 atomic_t i_unwritten; /* Nr. of inflight conversions pending */ 921 atomic_t i_unwritten; /* Nr. of inflight conversions pending */
922 struct work_struct i_unwritten_work; /* deferred extent conversion */
933 923
934 spinlock_t i_block_reservation_lock; 924 spinlock_t i_block_reservation_lock;
935 925
@@ -985,7 +975,6 @@ struct ext4_inode_info {
985#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ 975#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
986#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 976#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
987#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 977#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
988#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
989#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 978#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
990#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 979#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
991#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 980#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
@@ -1316,6 +1305,11 @@ struct ext4_sb_info {
1316 1305
1317 /* Precomputed FS UUID checksum for seeding other checksums */ 1306 /* Precomputed FS UUID checksum for seeding other checksums */
1318 __u32 s_csum_seed; 1307 __u32 s_csum_seed;
1308
1309 /* Reclaim extents from extent status tree */
1310 struct shrinker s_es_shrinker;
1311 struct list_head s_es_lru;
1312 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1319}; 1313};
1320 1314
1321static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1315static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2007,9 +2001,20 @@ extern int ext4fs_dirhash(const char *name, int len, struct
2007 dx_hash_info *hinfo); 2001 dx_hash_info *hinfo);
2008 2002
2009/* ialloc.c */ 2003/* ialloc.c */
2010extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t, 2004extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
2011 const struct qstr *qstr, __u32 goal, 2005 const struct qstr *qstr, __u32 goal,
2012 uid_t *owner); 2006 uid_t *owner, int handle_type,
2007 unsigned int line_no, int nblocks);
2008
2009#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
2010 __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
2011 0, 0, 0)
2012#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
2013 type, nblocks) \
2014 __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
2015 (type), __LINE__, (nblocks))
2016
2017
2013extern void ext4_free_inode(handle_t *, struct inode *); 2018extern void ext4_free_inode(handle_t *, struct inode *);
2014extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 2019extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
2015extern unsigned long ext4_count_free_inodes(struct super_block *); 2020extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -2103,6 +2108,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
2103extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2108extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2104extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); 2109extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
2105extern void ext4_ind_truncate(struct inode *inode); 2110extern void ext4_ind_truncate(struct inode *inode);
2111extern int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length);
2106 2112
2107/* ioctl.c */ 2113/* ioctl.c */
2108extern long ext4_ioctl(struct file *, unsigned int, unsigned long); 2114extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -2151,6 +2157,8 @@ extern void *ext4_kvzalloc(size_t size, gfp_t flags);
2151extern void ext4_kvfree(void *ptr); 2157extern void ext4_kvfree(void *ptr);
2152extern int ext4_alloc_flex_bg_array(struct super_block *sb, 2158extern int ext4_alloc_flex_bg_array(struct super_block *sb,
2153 ext4_group_t ngroup); 2159 ext4_group_t ngroup);
2160extern const char *ext4_decode_error(struct super_block *sb, int errno,
2161 char nbuf[16]);
2154extern __printf(4, 5) 2162extern __printf(4, 5)
2155void __ext4_error(struct super_block *, const char *, unsigned int, 2163void __ext4_error(struct super_block *, const char *, unsigned int,
2156 const char *, ...); 2164 const char *, ...);
@@ -2227,6 +2235,8 @@ extern int ext4_group_desc_csum_verify(struct super_block *sb, __u32 group,
2227 struct ext4_group_desc *gdp); 2235 struct ext4_group_desc *gdp);
2228extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group, 2236extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
2229 struct ext4_group_desc *gdp); 2237 struct ext4_group_desc *gdp);
2238extern int ext4_register_li_request(struct super_block *sb,
2239 ext4_group_t first_not_zeroed);
2230 2240
2231static inline int ext4_has_group_desc_csum(struct super_block *sb) 2241static inline int ext4_has_group_desc_csum(struct super_block *sb)
2232{ 2242{
@@ -2454,6 +2464,75 @@ extern const struct file_operations ext4_file_operations;
2454extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2464extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2455extern void ext4_unwritten_wait(struct inode *inode); 2465extern void ext4_unwritten_wait(struct inode *inode);
2456 2466
2467/* inline.c */
2468extern int ext4_has_inline_data(struct inode *inode);
2469extern int ext4_get_inline_size(struct inode *inode);
2470extern int ext4_get_max_inline_size(struct inode *inode);
2471extern int ext4_find_inline_data_nolock(struct inode *inode);
2472extern void ext4_write_inline_data(struct inode *inode,
2473 struct ext4_iloc *iloc,
2474 void *buffer, loff_t pos,
2475 unsigned int len);
2476extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
2477 unsigned int len);
2478extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
2479 unsigned int len);
2480extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
2481
2482extern int ext4_readpage_inline(struct inode *inode, struct page *page);
2483extern int ext4_try_to_write_inline_data(struct address_space *mapping,
2484 struct inode *inode,
2485 loff_t pos, unsigned len,
2486 unsigned flags,
2487 struct page **pagep);
2488extern int ext4_write_inline_data_end(struct inode *inode,
2489 loff_t pos, unsigned len,
2490 unsigned copied,
2491 struct page *page);
2492extern struct buffer_head *
2493ext4_journalled_write_inline_data(struct inode *inode,
2494 unsigned len,
2495 struct page *page);
2496extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
2497 struct inode *inode,
2498 loff_t pos, unsigned len,
2499 unsigned flags,
2500 struct page **pagep,
2501 void **fsdata);
2502extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
2503 unsigned len, unsigned copied,
2504 struct page *page);
2505extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
2506 struct inode *inode);
2507extern int ext4_try_create_inline_dir(handle_t *handle,
2508 struct inode *parent,
2509 struct inode *inode);
2510extern int ext4_read_inline_dir(struct file *filp,
2511 void *dirent, filldir_t filldir,
2512 int *has_inline_data);
2513extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
2514 const struct qstr *d_name,
2515 struct ext4_dir_entry_2 **res_dir,
2516 int *has_inline_data);
2517extern int ext4_delete_inline_entry(handle_t *handle,
2518 struct inode *dir,
2519 struct ext4_dir_entry_2 *de_del,
2520 struct buffer_head *bh,
2521 int *has_inline_data);
2522extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
2523extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
2524 struct ext4_dir_entry_2 **parent_de,
2525 int *retval);
2526extern int ext4_inline_data_fiemap(struct inode *inode,
2527 struct fiemap_extent_info *fieinfo,
2528 int *has_inline);
2529extern int ext4_try_to_evict_inline_data(handle_t *handle,
2530 struct inode *inode,
2531 int needed);
2532extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
2533
2534extern int ext4_convert_inline_data(struct inode *inode);
2535
2457/* namei.c */ 2536/* namei.c */
2458extern const struct inode_operations ext4_dir_inode_operations; 2537extern const struct inode_operations ext4_dir_inode_operations;
2459extern const struct inode_operations ext4_special_inode_operations; 2538extern const struct inode_operations ext4_special_inode_operations;
@@ -2520,6 +2599,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
2520 struct ext4_ext_path *); 2599 struct ext4_ext_path *);
2521extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2600extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2522extern int ext4_ext_check_inode(struct inode *inode); 2601extern int ext4_ext_check_inode(struct inode *inode);
2602extern int ext4_find_delalloc_range(struct inode *inode,
2603 ext4_lblk_t lblk_start,
2604 ext4_lblk_t lblk_end);
2523extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2605extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2524extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2606extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2525 __u64 start, __u64 len); 2607 __u64 start, __u64 len);
@@ -2537,6 +2619,7 @@ extern void ext4_exit_pageio(void);
2537extern void ext4_ioend_wait(struct inode *); 2619extern void ext4_ioend_wait(struct inode *);
2538extern void ext4_free_io_end(ext4_io_end_t *io); 2620extern void ext4_free_io_end(ext4_io_end_t *io);
2539extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2621extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2622extern void ext4_end_io_work(struct work_struct *work);
2540extern void ext4_io_submit(struct ext4_io_submit *io); 2623extern void ext4_io_submit(struct ext4_io_submit *io);
2541extern int ext4_bio_write_page(struct ext4_io_submit *io, 2624extern int ext4_bio_write_page(struct ext4_io_submit *io,
2542 struct page *page, 2625 struct page *page,
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 487fda12bc00..8643ff5bbeb7 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -193,12 +193,6 @@ static inline unsigned short ext_depth(struct inode *inode)
193 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); 193 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
194} 194}
195 195
196static inline void
197ext4_ext_invalidate_cache(struct inode *inode)
198{
199 EXT4_I(inode)->i_cached_extent.ec_len = 0;
200}
201
202static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) 196static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
203{ 197{
204 /* We can not have an uninitialized extent of zero length! */ 198 /* We can not have an uninitialized extent of zero length! */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b4323ba846b5..7058975e3a55 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -6,6 +6,108 @@
6 6
7#include <trace/events/ext4.h> 7#include <trace/events/ext4.h>
8 8
9/* Just increment the non-pointer handle value */
10static handle_t *ext4_get_nojournal(void)
11{
12 handle_t *handle = current->journal_info;
13 unsigned long ref_cnt = (unsigned long)handle;
14
15 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
16
17 ref_cnt++;
18 handle = (handle_t *)ref_cnt;
19
20 current->journal_info = handle;
21 return handle;
22}
23
24
25/* Decrement the non-pointer handle value */
26static void ext4_put_nojournal(handle_t *handle)
27{
28 unsigned long ref_cnt = (unsigned long)handle;
29
30 BUG_ON(ref_cnt == 0);
31
32 ref_cnt--;
33 handle = (handle_t *)ref_cnt;
34
35 current->journal_info = handle;
36}
37
38/*
39 * Wrappers for jbd2_journal_start/end.
40 */
41handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
42 int type, int nblocks)
43{
44 journal_t *journal;
45
46 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
47 if (sb->s_flags & MS_RDONLY)
48 return ERR_PTR(-EROFS);
49
50 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
51 journal = EXT4_SB(sb)->s_journal;
52 if (!journal)
53 return ext4_get_nojournal();
54 /*
55 * Special case here: if the journal has aborted behind our
56 * backs (eg. EIO in the commit thread), then we still need to
57 * take the FS itself readonly cleanly.
58 */
59 if (is_journal_aborted(journal)) {
60 ext4_abort(sb, "Detected aborted journal");
61 return ERR_PTR(-EROFS);
62 }
63 return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line);
64}
65
66int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
67{
68 struct super_block *sb;
69 int err;
70 int rc;
71
72 if (!ext4_handle_valid(handle)) {
73 ext4_put_nojournal(handle);
74 return 0;
75 }
76 sb = handle->h_transaction->t_journal->j_private;
77 err = handle->h_err;
78 rc = jbd2_journal_stop(handle);
79
80 if (!err)
81 err = rc;
82 if (err)
83 __ext4_std_error(sb, where, line, err);
84 return err;
85}
86
87void ext4_journal_abort_handle(const char *caller, unsigned int line,
88 const char *err_fn, struct buffer_head *bh,
89 handle_t *handle, int err)
90{
91 char nbuf[16];
92 const char *errstr = ext4_decode_error(NULL, err, nbuf);
93
94 BUG_ON(!ext4_handle_valid(handle));
95
96 if (bh)
97 BUFFER_TRACE(bh, "abort");
98
99 if (!handle->h_err)
100 handle->h_err = err;
101
102 if (is_handle_aborted(handle))
103 return;
104
105 printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
106 caller, line, errstr, err_fn);
107
108 jbd2_journal_abort_handle(handle);
109}
110
9int __ext4_journal_get_write_access(const char *where, unsigned int line, 111int __ext4_journal_get_write_access(const char *where, unsigned int line,
10 handle_t *handle, struct buffer_head *bh) 112 handle_t *handle, struct buffer_head *bh)
11{ 113{
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 7177f9b21cb2..4c216b1bf20c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -59,12 +59,6 @@
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ 59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) 60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
61 61
62/* Delete operations potentially hit one directory's namespace plus an
63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
64 * generous. We can grow the delete transaction later if necessary. */
65
66#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
67
68/* Define an arbitrary limit for the amount of data we will anticipate 62/* Define an arbitrary limit for the amount of data we will anticipate
69 * writing to any given transaction. For unbounded transactions such as 63 * writing to any given transaction. For unbounded transactions such as
70 * write(2) and truncate(2) we can write more than this, but we always 64 * write(2) and truncate(2) we can write more than this, but we always
@@ -110,6 +104,36 @@
110#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
111#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
112 106
107static inline int ext4_jbd2_credits_xattr(struct inode *inode)
108{
109 int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb);
110
111 /*
112 * In case of inline data, we may push out the data to a block,
113 * so we need to reserve credits for this eventuality
114 */
115 if (ext4_has_inline_data(inode))
116 credits += ext4_writepage_trans_blocks(inode) + 1;
117 return credits;
118}
119
120
121/*
122 * Ext4 handle operation types -- for logging purposes
123 */
124#define EXT4_HT_MISC 0
125#define EXT4_HT_INODE 1
126#define EXT4_HT_WRITE_PAGE 2
127#define EXT4_HT_MAP_BLOCKS 3
128#define EXT4_HT_DIR 4
129#define EXT4_HT_TRUNCATE 5
130#define EXT4_HT_QUOTA 6
131#define EXT4_HT_RESIZE 7
132#define EXT4_HT_MIGRATE 8
133#define EXT4_HT_MOVE_EXTENTS 9
134#define EXT4_HT_XATTR 10
135#define EXT4_HT_MAX 11
136
113/** 137/**
114 * struct ext4_journal_cb_entry - Base structure for callback information. 138 * struct ext4_journal_cb_entry - Base structure for callback information.
115 * 139 *
@@ -234,7 +258,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
234#define ext4_handle_dirty_super(handle, sb) \ 258#define ext4_handle_dirty_super(handle, sb) \
235 __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) 259 __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
236 260
237handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 261handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
262 int type, int nblocks);
238int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); 263int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
239 264
240#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) 265#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -268,9 +293,17 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
268 return 1; 293 return 1;
269} 294}
270 295
271static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) 296#define ext4_journal_start_sb(sb, type, nblocks) \
297 __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks))
298
299#define ext4_journal_start(inode, type, nblocks) \
300 __ext4_journal_start((inode), __LINE__, (type), (nblocks))
301
302static inline handle_t *__ext4_journal_start(struct inode *inode,
303 unsigned int line, int type,
304 int nblocks)
272{ 305{
273 return ext4_journal_start_sb(inode->i_sb, nblocks); 306 return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks);
274} 307}
275 308
276#define ext4_journal_stop(handle) \ 309#define ext4_journal_stop(handle) \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5ae1674ec12f..28dd8eeea6a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -112,7 +112,7 @@ static int ext4_split_extent_at(handle_t *handle,
112 int flags); 112 int flags);
113 113
114static int ext4_find_delayed_extent(struct inode *inode, 114static int ext4_find_delayed_extent(struct inode *inode,
115 struct ext4_ext_cache *newex); 115 struct extent_status *newes);
116 116
117static int ext4_ext_truncate_extend_restart(handle_t *handle, 117static int ext4_ext_truncate_extend_restart(handle_t *handle,
118 struct inode *inode, 118 struct inode *inode,
@@ -714,7 +714,6 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
714 eh->eh_magic = EXT4_EXT_MAGIC; 714 eh->eh_magic = EXT4_EXT_MAGIC;
715 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); 715 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
716 ext4_mark_inode_dirty(handle, inode); 716 ext4_mark_inode_dirty(handle, inode);
717 ext4_ext_invalidate_cache(inode);
718 return 0; 717 return 0;
719} 718}
720 719
@@ -725,6 +724,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
725 struct ext4_extent_header *eh; 724 struct ext4_extent_header *eh;
726 struct buffer_head *bh; 725 struct buffer_head *bh;
727 short int depth, i, ppos = 0, alloc = 0; 726 short int depth, i, ppos = 0, alloc = 0;
727 int ret;
728 728
729 eh = ext_inode_hdr(inode); 729 eh = ext_inode_hdr(inode);
730 depth = ext_depth(inode); 730 depth = ext_depth(inode);
@@ -752,12 +752,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
752 path[ppos].p_ext = NULL; 752 path[ppos].p_ext = NULL;
753 753
754 bh = sb_getblk(inode->i_sb, path[ppos].p_block); 754 bh = sb_getblk(inode->i_sb, path[ppos].p_block);
755 if (unlikely(!bh)) 755 if (unlikely(!bh)) {
756 ret = -ENOMEM;
756 goto err; 757 goto err;
758 }
757 if (!bh_uptodate_or_lock(bh)) { 759 if (!bh_uptodate_or_lock(bh)) {
758 trace_ext4_ext_load_extent(inode, block, 760 trace_ext4_ext_load_extent(inode, block,
759 path[ppos].p_block); 761 path[ppos].p_block);
760 if (bh_submit_read(bh) < 0) { 762 ret = bh_submit_read(bh);
763 if (ret < 0) {
761 put_bh(bh); 764 put_bh(bh);
762 goto err; 765 goto err;
763 } 766 }
@@ -768,13 +771,15 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
768 put_bh(bh); 771 put_bh(bh);
769 EXT4_ERROR_INODE(inode, 772 EXT4_ERROR_INODE(inode,
770 "ppos %d > depth %d", ppos, depth); 773 "ppos %d > depth %d", ppos, depth);
774 ret = -EIO;
771 goto err; 775 goto err;
772 } 776 }
773 path[ppos].p_bh = bh; 777 path[ppos].p_bh = bh;
774 path[ppos].p_hdr = eh; 778 path[ppos].p_hdr = eh;
775 i--; 779 i--;
776 780
777 if (ext4_ext_check_block(inode, eh, i, bh)) 781 ret = ext4_ext_check_block(inode, eh, i, bh);
782 if (ret < 0)
778 goto err; 783 goto err;
779 } 784 }
780 785
@@ -796,7 +801,7 @@ err:
796 ext4_ext_drop_refs(path); 801 ext4_ext_drop_refs(path);
797 if (alloc) 802 if (alloc)
798 kfree(path); 803 kfree(path);
799 return ERR_PTR(-EIO); 804 return ERR_PTR(ret);
800} 805}
801 806
802/* 807/*
@@ -950,8 +955,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
950 goto cleanup; 955 goto cleanup;
951 } 956 }
952 bh = sb_getblk(inode->i_sb, newblock); 957 bh = sb_getblk(inode->i_sb, newblock);
953 if (!bh) { 958 if (unlikely(!bh)) {
954 err = -EIO; 959 err = -ENOMEM;
955 goto cleanup; 960 goto cleanup;
956 } 961 }
957 lock_buffer(bh); 962 lock_buffer(bh);
@@ -1023,8 +1028,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
1023 oldblock = newblock; 1028 oldblock = newblock;
1024 newblock = ablocks[--a]; 1029 newblock = ablocks[--a];
1025 bh = sb_getblk(inode->i_sb, newblock); 1030 bh = sb_getblk(inode->i_sb, newblock);
1026 if (!bh) { 1031 if (unlikely(!bh)) {
1027 err = -EIO; 1032 err = -ENOMEM;
1028 goto cleanup; 1033 goto cleanup;
1029 } 1034 }
1030 lock_buffer(bh); 1035 lock_buffer(bh);
@@ -1136,11 +1141,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1136 return err; 1141 return err;
1137 1142
1138 bh = sb_getblk(inode->i_sb, newblock); 1143 bh = sb_getblk(inode->i_sb, newblock);
1139 if (!bh) { 1144 if (unlikely(!bh))
1140 err = -EIO; 1145 return -ENOMEM;
1141 ext4_std_error(inode->i_sb, err);
1142 return err;
1143 }
1144 lock_buffer(bh); 1146 lock_buffer(bh);
1145 1147
1146 err = ext4_journal_get_create_access(handle, bh); 1148 err = ext4_journal_get_create_access(handle, bh);
@@ -1960,7 +1962,6 @@ cleanup:
1960 ext4_ext_drop_refs(npath); 1962 ext4_ext_drop_refs(npath);
1961 kfree(npath); 1963 kfree(npath);
1962 } 1964 }
1963 ext4_ext_invalidate_cache(inode);
1964 return err; 1965 return err;
1965} 1966}
1966 1967
@@ -1969,8 +1970,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
1969 struct fiemap_extent_info *fieinfo) 1970 struct fiemap_extent_info *fieinfo)
1970{ 1971{
1971 struct ext4_ext_path *path = NULL; 1972 struct ext4_ext_path *path = NULL;
1972 struct ext4_ext_cache newex;
1973 struct ext4_extent *ex; 1973 struct ext4_extent *ex;
1974 struct extent_status es;
1974 ext4_lblk_t next, next_del, start = 0, end = 0; 1975 ext4_lblk_t next, next_del, start = 0, end = 0;
1975 ext4_lblk_t last = block + num; 1976 ext4_lblk_t last = block + num;
1976 int exists, depth = 0, err = 0; 1977 int exists, depth = 0, err = 0;
@@ -2044,37 +2045,47 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2044 BUG_ON(end <= start); 2045 BUG_ON(end <= start);
2045 2046
2046 if (!exists) { 2047 if (!exists) {
2047 newex.ec_block = start; 2048 es.es_lblk = start;
2048 newex.ec_len = end - start; 2049 es.es_len = end - start;
2049 newex.ec_start = 0; 2050 es.es_pblk = 0;
2050 } else { 2051 } else {
2051 newex.ec_block = le32_to_cpu(ex->ee_block); 2052 es.es_lblk = le32_to_cpu(ex->ee_block);
2052 newex.ec_len = ext4_ext_get_actual_len(ex); 2053 es.es_len = ext4_ext_get_actual_len(ex);
2053 newex.ec_start = ext4_ext_pblock(ex); 2054 es.es_pblk = ext4_ext_pblock(ex);
2054 if (ext4_ext_is_uninitialized(ex)) 2055 if (ext4_ext_is_uninitialized(ex))
2055 flags |= FIEMAP_EXTENT_UNWRITTEN; 2056 flags |= FIEMAP_EXTENT_UNWRITTEN;
2056 } 2057 }
2057 2058
2058 /* 2059 /*
2059 * Find delayed extent and update newex accordingly. We call 2060 * Find delayed extent and update es accordingly. We call
2060 * it even in !exists case to find out whether newex is the 2061 * it even in !exists case to find out whether es is the
2061 * last existing extent or not. 2062 * last existing extent or not.
2062 */ 2063 */
2063 next_del = ext4_find_delayed_extent(inode, &newex); 2064 next_del = ext4_find_delayed_extent(inode, &es);
2064 if (!exists && next_del) { 2065 if (!exists && next_del) {
2065 exists = 1; 2066 exists = 1;
2066 flags |= FIEMAP_EXTENT_DELALLOC; 2067 flags |= FIEMAP_EXTENT_DELALLOC;
2067 } 2068 }
2068 up_read(&EXT4_I(inode)->i_data_sem); 2069 up_read(&EXT4_I(inode)->i_data_sem);
2069 2070
2070 if (unlikely(newex.ec_len == 0)) { 2071 if (unlikely(es.es_len == 0)) {
2071 EXT4_ERROR_INODE(inode, "newex.ec_len == 0"); 2072 EXT4_ERROR_INODE(inode, "es.es_len == 0");
2072 err = -EIO; 2073 err = -EIO;
2073 break; 2074 break;
2074 } 2075 }
2075 2076
2076 /* This is possible iff next == next_del == EXT_MAX_BLOCKS */ 2077 /*
2077 if (next == next_del) { 2078 * This is possible iff next == next_del == EXT_MAX_BLOCKS.
2079 * we need to check next == EXT_MAX_BLOCKS because it is
2080 * possible that an extent is with unwritten and delayed
2081 * status due to when an extent is delayed allocated and
2082 * is allocated by fallocate status tree will track both of
2083 * them in a extent.
2084 *
2085 * So we could return a unwritten and delayed extent, and
2086 * its block is equal to 'next'.
2087 */
2088 if (next == next_del && next == EXT_MAX_BLOCKS) {
2078 flags |= FIEMAP_EXTENT_LAST; 2089 flags |= FIEMAP_EXTENT_LAST;
2079 if (unlikely(next_del != EXT_MAX_BLOCKS || 2090 if (unlikely(next_del != EXT_MAX_BLOCKS ||
2080 next != EXT_MAX_BLOCKS)) { 2091 next != EXT_MAX_BLOCKS)) {
@@ -2089,9 +2100,9 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2089 2100
2090 if (exists) { 2101 if (exists) {
2091 err = fiemap_fill_next_extent(fieinfo, 2102 err = fiemap_fill_next_extent(fieinfo,
2092 (__u64)newex.ec_block << blksize_bits, 2103 (__u64)es.es_lblk << blksize_bits,
2093 (__u64)newex.ec_start << blksize_bits, 2104 (__u64)es.es_pblk << blksize_bits,
2094 (__u64)newex.ec_len << blksize_bits, 2105 (__u64)es.es_len << blksize_bits,
2095 flags); 2106 flags);
2096 if (err < 0) 2107 if (err < 0)
2097 break; 2108 break;
@@ -2101,7 +2112,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2101 } 2112 }
2102 } 2113 }
2103 2114
2104 block = newex.ec_block + newex.ec_len; 2115 block = es.es_lblk + es.es_len;
2105 } 2116 }
2106 2117
2107 if (path) { 2118 if (path) {
@@ -2112,21 +2123,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2112 return err; 2123 return err;
2113} 2124}
2114 2125
2115static void
2116ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
2117 __u32 len, ext4_fsblk_t start)
2118{
2119 struct ext4_ext_cache *cex;
2120 BUG_ON(len == 0);
2121 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2122 trace_ext4_ext_put_in_cache(inode, block, len, start);
2123 cex = &EXT4_I(inode)->i_cached_extent;
2124 cex->ec_block = block;
2125 cex->ec_len = len;
2126 cex->ec_start = start;
2127 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2128}
2129
2130/* 2126/*
2131 * ext4_ext_put_gap_in_cache: 2127 * ext4_ext_put_gap_in_cache:
2132 * calculate boundaries of the gap that the requested block fits into 2128 * calculate boundaries of the gap that the requested block fits into
@@ -2143,9 +2139,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2143 2139
2144 ex = path[depth].p_ext; 2140 ex = path[depth].p_ext;
2145 if (ex == NULL) { 2141 if (ex == NULL) {
2146 /* there is no extent yet, so gap is [0;-] */ 2142 /*
2147 lblock = 0; 2143 * there is no extent yet, so gap is [0;-] and we
2148 len = EXT_MAX_BLOCKS; 2144 * don't cache it
2145 */
2149 ext_debug("cache gap(whole file):"); 2146 ext_debug("cache gap(whole file):");
2150 } else if (block < le32_to_cpu(ex->ee_block)) { 2147 } else if (block < le32_to_cpu(ex->ee_block)) {
2151 lblock = block; 2148 lblock = block;
@@ -2154,6 +2151,9 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2154 block, 2151 block,
2155 le32_to_cpu(ex->ee_block), 2152 le32_to_cpu(ex->ee_block),
2156 ext4_ext_get_actual_len(ex)); 2153 ext4_ext_get_actual_len(ex));
2154 if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
2155 ext4_es_insert_extent(inode, lblock, len, ~0,
2156 EXTENT_STATUS_HOLE);
2157 } else if (block >= le32_to_cpu(ex->ee_block) 2157 } else if (block >= le32_to_cpu(ex->ee_block)
2158 + ext4_ext_get_actual_len(ex)) { 2158 + ext4_ext_get_actual_len(ex)) {
2159 ext4_lblk_t next; 2159 ext4_lblk_t next;
@@ -2167,58 +2167,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2167 block); 2167 block);
2168 BUG_ON(next == lblock); 2168 BUG_ON(next == lblock);
2169 len = next - lblock; 2169 len = next - lblock;
2170 if (!ext4_find_delalloc_range(inode, lblock, lblock + len - 1))
2171 ext4_es_insert_extent(inode, lblock, len, ~0,
2172 EXTENT_STATUS_HOLE);
2170 } else { 2173 } else {
2171 lblock = len = 0; 2174 lblock = len = 0;
2172 BUG(); 2175 BUG();
2173 } 2176 }
2174 2177
2175 ext_debug(" -> %u:%lu\n", lblock, len); 2178 ext_debug(" -> %u:%lu\n", lblock, len);
2176 ext4_ext_put_in_cache(inode, lblock, len, 0);
2177}
2178
2179/*
2180 * ext4_ext_in_cache()
2181 * Checks to see if the given block is in the cache.
2182 * If it is, the cached extent is stored in the given
2183 * cache extent pointer.
2184 *
2185 * @inode: The files inode
2186 * @block: The block to look for in the cache
2187 * @ex: Pointer where the cached extent will be stored
2188 * if it contains block
2189 *
2190 * Return 0 if cache is invalid; 1 if the cache is valid
2191 */
2192static int
2193ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2194 struct ext4_extent *ex)
2195{
2196 struct ext4_ext_cache *cex;
2197 int ret = 0;
2198
2199 /*
2200 * We borrow i_block_reservation_lock to protect i_cached_extent
2201 */
2202 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
2203 cex = &EXT4_I(inode)->i_cached_extent;
2204
2205 /* has cache valid data? */
2206 if (cex->ec_len == 0)
2207 goto errout;
2208
2209 if (in_range(block, cex->ec_block, cex->ec_len)) {
2210 ex->ee_block = cpu_to_le32(cex->ec_block);
2211 ext4_ext_store_pblock(ex, cex->ec_start);
2212 ex->ee_len = cpu_to_le16(cex->ec_len);
2213 ext_debug("%u cached by %u:%u:%llu\n",
2214 block,
2215 cex->ec_block, cex->ec_len, cex->ec_start);
2216 ret = 1;
2217 }
2218errout:
2219 trace_ext4_ext_in_cache(inode, block, ret);
2220 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2221 return ret;
2222} 2179}
2223 2180
2224/* 2181/*
@@ -2653,13 +2610,11 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2653 ext_debug("truncate since %u to %u\n", start, end); 2610 ext_debug("truncate since %u to %u\n", start, end);
2654 2611
2655 /* probably first extent we're gonna free will be last in block */ 2612 /* probably first extent we're gonna free will be last in block */
2656 handle = ext4_journal_start(inode, depth + 1); 2613 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
2657 if (IS_ERR(handle)) 2614 if (IS_ERR(handle))
2658 return PTR_ERR(handle); 2615 return PTR_ERR(handle);
2659 2616
2660again: 2617again:
2661 ext4_ext_invalidate_cache(inode);
2662
2663 trace_ext4_ext_remove_space(inode, start, depth); 2618 trace_ext4_ext_remove_space(inode, start, depth);
2664 2619
2665 /* 2620 /*
@@ -3519,19 +3474,19 @@ out:
3519 * 3474 *
3520 * Return 1 if there is a delalloc block in the range, otherwise 0. 3475 * Return 1 if there is a delalloc block in the range, otherwise 0.
3521 */ 3476 */
3522static int ext4_find_delalloc_range(struct inode *inode, 3477int ext4_find_delalloc_range(struct inode *inode,
3523 ext4_lblk_t lblk_start, 3478 ext4_lblk_t lblk_start,
3524 ext4_lblk_t lblk_end) 3479 ext4_lblk_t lblk_end)
3525{ 3480{
3526 struct extent_status es; 3481 struct extent_status es;
3527 3482
3528 es.start = lblk_start; 3483 ext4_es_find_delayed_extent(inode, lblk_start, &es);
3529 ext4_es_find_extent(inode, &es); 3484 if (es.es_len == 0)
3530 if (es.len == 0)
3531 return 0; /* there is no delay extent in this tree */ 3485 return 0; /* there is no delay extent in this tree */
3532 else if (es.start <= lblk_start && lblk_start < es.start + es.len) 3486 else if (es.es_lblk <= lblk_start &&
3487 lblk_start < es.es_lblk + es.es_len)
3533 return 1; 3488 return 1;
3534 else if (lblk_start <= es.start && es.start <= lblk_end) 3489 else if (lblk_start <= es.es_lblk && es.es_lblk <= lblk_end)
3535 return 1; 3490 return 1;
3536 else 3491 else
3537 return 0; 3492 return 0;
@@ -3656,6 +3611,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3656 ext4_set_io_unwritten_flag(inode, io); 3611 ext4_set_io_unwritten_flag(inode, io);
3657 else 3612 else
3658 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3613 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3614 map->m_flags |= EXT4_MAP_UNWRITTEN;
3659 if (ext4_should_dioread_nolock(inode)) 3615 if (ext4_should_dioread_nolock(inode))
3660 map->m_flags |= EXT4_MAP_UNINIT; 3616 map->m_flags |= EXT4_MAP_UNINIT;
3661 goto out; 3617 goto out;
@@ -3677,8 +3633,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3677 * repeat fallocate creation request 3633 * repeat fallocate creation request
3678 * we already have an unwritten extent 3634 * we already have an unwritten extent
3679 */ 3635 */
3680 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) 3636 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) {
3637 map->m_flags |= EXT4_MAP_UNWRITTEN;
3681 goto map_out; 3638 goto map_out;
3639 }
3682 3640
3683 /* buffered READ or buffered write_begin() lookup */ 3641 /* buffered READ or buffered write_begin() lookup */
3684 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3642 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
@@ -3898,35 +3856,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3898 map->m_lblk, map->m_len, inode->i_ino); 3856 map->m_lblk, map->m_len, inode->i_ino);
3899 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 3857 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
3900 3858
3901 /* check in cache */
3902 if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3903 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3904 if ((sbi->s_cluster_ratio > 1) &&
3905 ext4_find_delalloc_cluster(inode, map->m_lblk))
3906 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3907
3908 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3909 /*
3910 * block isn't allocated yet and
3911 * user doesn't want to allocate it
3912 */
3913 goto out2;
3914 }
3915 /* we should allocate requested block */
3916 } else {
3917 /* block is already allocated */
3918 if (sbi->s_cluster_ratio > 1)
3919 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3920 newblock = map->m_lblk
3921 - le32_to_cpu(newex.ee_block)
3922 + ext4_ext_pblock(&newex);
3923 /* number of remaining blocks in the extent */
3924 allocated = ext4_ext_get_actual_len(&newex) -
3925 (map->m_lblk - le32_to_cpu(newex.ee_block));
3926 goto out;
3927 }
3928 }
3929
3930 /* find extent for this block */ 3859 /* find extent for this block */
3931 path = ext4_ext_find_extent(inode, map->m_lblk, NULL); 3860 path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
3932 if (IS_ERR(path)) { 3861 if (IS_ERR(path)) {
@@ -3973,15 +3902,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3973 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, 3902 ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
3974 ee_block, ee_len, newblock); 3903 ee_block, ee_len, newblock);
3975 3904
3976 /* 3905 if (!ext4_ext_is_uninitialized(ex))
3977 * Do not put uninitialized extent
3978 * in the cache
3979 */
3980 if (!ext4_ext_is_uninitialized(ex)) {
3981 ext4_ext_put_in_cache(inode, ee_block,
3982 ee_len, ee_start);
3983 goto out; 3906 goto out;
3984 } 3907
3985 allocated = ext4_ext_handle_uninitialized_extents( 3908 allocated = ext4_ext_handle_uninitialized_extents(
3986 handle, inode, map, path, flags, 3909 handle, inode, map, path, flags,
3987 allocated, newblock); 3910 allocated, newblock);
@@ -4002,7 +3925,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4002 * put just found gap into cache to speed up 3925 * put just found gap into cache to speed up
4003 * subsequent requests 3926 * subsequent requests
4004 */ 3927 */
4005 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3928 if ((flags & EXT4_GET_BLOCKS_NO_PUT_HOLE) == 0)
3929 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
4006 goto out2; 3930 goto out2;
4007 } 3931 }
4008 3932
@@ -4108,6 +4032,7 @@ got_allocated_blocks:
4108 /* Mark uninitialized */ 4032 /* Mark uninitialized */
4109 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 4033 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
4110 ext4_ext_mark_uninitialized(&newex); 4034 ext4_ext_mark_uninitialized(&newex);
4035 map->m_flags |= EXT4_MAP_UNWRITTEN;
4111 /* 4036 /*
4112 * io_end structure was created for every IO write to an 4037 * io_end structure was created for every IO write to an
4113 * uninitialized extent. To avoid unnecessary conversion, 4038 * uninitialized extent. To avoid unnecessary conversion,
@@ -4241,10 +4166,9 @@ got_allocated_blocks:
4241 * Cache the extent and update transaction to commit on fdatasync only 4166 * Cache the extent and update transaction to commit on fdatasync only
4242 * when it is _not_ an uninitialized extent. 4167 * when it is _not_ an uninitialized extent.
4243 */ 4168 */
4244 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { 4169 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
4245 ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
4246 ext4_update_inode_fsync_trans(handle, inode, 1); 4170 ext4_update_inode_fsync_trans(handle, inode, 1);
4247 } else 4171 else
4248 ext4_update_inode_fsync_trans(handle, inode, 0); 4172 ext4_update_inode_fsync_trans(handle, inode, 0);
4249out: 4173out:
4250 if (allocated > map->m_len) 4174 if (allocated > map->m_len)
@@ -4284,7 +4208,7 @@ void ext4_ext_truncate(struct inode *inode)
4284 * probably first extent we're gonna free will be last in block 4208 * probably first extent we're gonna free will be last in block
4285 */ 4209 */
4286 err = ext4_writepage_trans_blocks(inode); 4210 err = ext4_writepage_trans_blocks(inode);
4287 handle = ext4_journal_start(inode, err); 4211 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, err);
4288 if (IS_ERR(handle)) 4212 if (IS_ERR(handle))
4289 return; 4213 return;
4290 4214
@@ -4303,7 +4227,6 @@ void ext4_ext_truncate(struct inode *inode)
4303 goto out_stop; 4227 goto out_stop;
4304 4228
4305 down_write(&EXT4_I(inode)->i_data_sem); 4229 down_write(&EXT4_I(inode)->i_data_sem);
4306 ext4_ext_invalidate_cache(inode);
4307 4230
4308 ext4_discard_preallocations(inode); 4231 ext4_discard_preallocations(inode);
4309 4232
@@ -4386,7 +4309,7 @@ static void ext4_falloc_update_inode(struct inode *inode,
4386 */ 4309 */
4387long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 4310long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4388{ 4311{
4389 struct inode *inode = file->f_path.dentry->d_inode; 4312 struct inode *inode = file_inode(file);
4390 handle_t *handle; 4313 handle_t *handle;
4391 loff_t new_size; 4314 loff_t new_size;
4392 unsigned int max_blocks; 4315 unsigned int max_blocks;
@@ -4397,13 +4320,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4397 struct ext4_map_blocks map; 4320 struct ext4_map_blocks map;
4398 unsigned int credits, blkbits = inode->i_blkbits; 4321 unsigned int credits, blkbits = inode->i_blkbits;
4399 4322
4400 /*
4401 * currently supporting (pre)allocate mode for extent-based
4402 * files _only_
4403 */
4404 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4405 return -EOPNOTSUPP;
4406
4407 /* Return error if mode is not supported */ 4323 /* Return error if mode is not supported */
4408 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 4324 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
4409 return -EOPNOTSUPP; 4325 return -EOPNOTSUPP;
@@ -4415,6 +4331,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4415 if (ret) 4331 if (ret)
4416 return ret; 4332 return ret;
4417 4333
4334 /*
4335 * currently supporting (pre)allocate mode for extent-based
4336 * files _only_
4337 */
4338 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4339 return -EOPNOTSUPP;
4340
4418 trace_ext4_fallocate_enter(inode, offset, len, mode); 4341 trace_ext4_fallocate_enter(inode, offset, len, mode);
4419 map.m_lblk = offset >> blkbits; 4342 map.m_lblk = offset >> blkbits;
4420 /* 4343 /*
@@ -4451,7 +4374,8 @@ retry:
4451 while (ret >= 0 && ret < max_blocks) { 4374 while (ret >= 0 && ret < max_blocks) {
4452 map.m_lblk = map.m_lblk + ret; 4375 map.m_lblk = map.m_lblk + ret;
4453 map.m_len = max_blocks = max_blocks - ret; 4376 map.m_len = max_blocks = max_blocks - ret;
4454 handle = ext4_journal_start(inode, credits); 4377 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4378 credits);
4455 if (IS_ERR(handle)) { 4379 if (IS_ERR(handle)) {
4456 ret = PTR_ERR(handle); 4380 ret = PTR_ERR(handle);
4457 break; 4381 break;
@@ -4459,11 +4383,11 @@ retry:
4459 ret = ext4_map_blocks(handle, inode, &map, flags); 4383 ret = ext4_map_blocks(handle, inode, &map, flags);
4460 if (ret <= 0) { 4384 if (ret <= 0) {
4461#ifdef EXT4FS_DEBUG 4385#ifdef EXT4FS_DEBUG
4462 WARN_ON(ret <= 0); 4386 ext4_warning(inode->i_sb,
4463 printk(KERN_ERR "%s: ext4_ext_map_blocks " 4387 "inode #%lu: block %u: len %u: "
4464 "returned error inode#%lu, block=%u, " 4388 "ext4_ext_map_blocks returned %d",
4465 "max_blocks=%u", __func__, 4389 inode->i_ino, map.m_lblk,
4466 inode->i_ino, map.m_lblk, max_blocks); 4390 map.m_len, ret);
4467#endif 4391#endif
4468 ext4_mark_inode_dirty(handle, inode); 4392 ext4_mark_inode_dirty(handle, inode);
4469 ret2 = ext4_journal_stop(handle); 4393 ret2 = ext4_journal_stop(handle);
@@ -4529,21 +4453,19 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4529 while (ret >= 0 && ret < max_blocks) { 4453 while (ret >= 0 && ret < max_blocks) {
4530 map.m_lblk += ret; 4454 map.m_lblk += ret;
4531 map.m_len = (max_blocks -= ret); 4455 map.m_len = (max_blocks -= ret);
4532 handle = ext4_journal_start(inode, credits); 4456 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
4533 if (IS_ERR(handle)) { 4457 if (IS_ERR(handle)) {
4534 ret = PTR_ERR(handle); 4458 ret = PTR_ERR(handle);
4535 break; 4459 break;
4536 } 4460 }
4537 ret = ext4_map_blocks(handle, inode, &map, 4461 ret = ext4_map_blocks(handle, inode, &map,
4538 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 4462 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
4539 if (ret <= 0) { 4463 if (ret <= 0)
4540 WARN_ON(ret <= 0); 4464 ext4_warning(inode->i_sb,
4541 ext4_msg(inode->i_sb, KERN_ERR, 4465 "inode #%lu: block %u: len %u: "
4542 "%s:%d: inode #%lu: block %u: len %u: " 4466 "ext4_ext_map_blocks returned %d",
4543 "ext4_ext_map_blocks returned %d", 4467 inode->i_ino, map.m_lblk,
4544 __func__, __LINE__, inode->i_ino, map.m_lblk, 4468 map.m_len, ret);
4545 map.m_len, ret);
4546 }
4547 ext4_mark_inode_dirty(handle, inode); 4469 ext4_mark_inode_dirty(handle, inode);
4548 ret2 = ext4_journal_stop(handle); 4470 ret2 = ext4_journal_stop(handle);
4549 if (ret <= 0 || ret2 ) 4471 if (ret <= 0 || ret2 )
@@ -4553,42 +4475,48 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4553} 4475}
4554 4476
4555/* 4477/*
4556 * If newex is not existing extent (newex->ec_start equals zero) find 4478 * If newes is not existing extent (newes->ec_pblk equals zero) find
4557 * delayed extent at start of newex and update newex accordingly and 4479 * delayed extent at start of newes and update newes accordingly and
4558 * return start of the next delayed extent. 4480 * return start of the next delayed extent.
4559 * 4481 *
4560 * If newex is existing extent (newex->ec_start is not equal zero) 4482 * If newes is existing extent (newes->ec_pblk is not equal zero)
4561 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed 4483 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
4562 * extent found. Leave newex unmodified. 4484 * extent found. Leave newes unmodified.
4563 */ 4485 */
4564static int ext4_find_delayed_extent(struct inode *inode, 4486static int ext4_find_delayed_extent(struct inode *inode,
4565 struct ext4_ext_cache *newex) 4487 struct extent_status *newes)
4566{ 4488{
4567 struct extent_status es; 4489 struct extent_status es;
4568 ext4_lblk_t next_del; 4490 ext4_lblk_t block, next_del;
4569 4491
4570 es.start = newex->ec_block; 4492 ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
4571 next_del = ext4_es_find_extent(inode, &es);
4572 4493
4573 if (newex->ec_start == 0) { 4494 if (newes->es_pblk == 0) {
4574 /* 4495 /*
4575 * No extent in extent-tree contains block @newex->ec_start, 4496 * No extent in extent-tree contains block @newes->es_pblk,
4576 * then the block may stay in 1)a hole or 2)delayed-extent. 4497 * then the block may stay in 1)a hole or 2)delayed-extent.
4577 */ 4498 */
4578 if (es.len == 0) 4499 if (es.es_len == 0)
4579 /* A hole found. */ 4500 /* A hole found. */
4580 return 0; 4501 return 0;
4581 4502
4582 if (es.start > newex->ec_block) { 4503 if (es.es_lblk > newes->es_lblk) {
4583 /* A hole found. */ 4504 /* A hole found. */
4584 newex->ec_len = min(es.start - newex->ec_block, 4505 newes->es_len = min(es.es_lblk - newes->es_lblk,
4585 newex->ec_len); 4506 newes->es_len);
4586 return 0; 4507 return 0;
4587 } 4508 }
4588 4509
4589 newex->ec_len = es.start + es.len - newex->ec_block; 4510 newes->es_len = es.es_lblk + es.es_len - newes->es_lblk;
4590 } 4511 }
4591 4512
4513 block = newes->es_lblk + newes->es_len;
4514 ext4_es_find_delayed_extent(inode, block, &es);
4515 if (es.es_len == 0)
4516 next_del = EXT_MAX_BLOCKS;
4517 else
4518 next_del = es.es_lblk;
4519
4592 return next_del; 4520 return next_del;
4593} 4521}
4594/* fiemap flags we can handle specified here */ 4522/* fiemap flags we can handle specified here */
@@ -4643,7 +4571,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
4643 */ 4571 */
4644int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) 4572int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4645{ 4573{
4646 struct inode *inode = file->f_path.dentry->d_inode; 4574 struct inode *inode = file_inode(file);
4647 struct super_block *sb = inode->i_sb; 4575 struct super_block *sb = inode->i_sb;
4648 ext4_lblk_t first_block, stop_block; 4576 ext4_lblk_t first_block, stop_block;
4649 struct address_space *mapping = inode->i_mapping; 4577 struct address_space *mapping = inode->i_mapping;
@@ -4709,7 +4637,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4709 inode_dio_wait(inode); 4637 inode_dio_wait(inode);
4710 4638
4711 credits = ext4_writepage_trans_blocks(inode); 4639 credits = ext4_writepage_trans_blocks(inode);
4712 handle = ext4_journal_start(inode, credits); 4640 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
4713 if (IS_ERR(handle)) { 4641 if (IS_ERR(handle)) {
4714 err = PTR_ERR(handle); 4642 err = PTR_ERR(handle);
4715 goto out_dio; 4643 goto out_dio;
@@ -4786,14 +4714,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4786 goto out; 4714 goto out;
4787 4715
4788 down_write(&EXT4_I(inode)->i_data_sem); 4716 down_write(&EXT4_I(inode)->i_data_sem);
4789 ext4_ext_invalidate_cache(inode);
4790 ext4_discard_preallocations(inode); 4717 ext4_discard_preallocations(inode);
4791 4718
4792 err = ext4_es_remove_extent(inode, first_block, 4719 err = ext4_es_remove_extent(inode, first_block,
4793 stop_block - first_block); 4720 stop_block - first_block);
4794 err = ext4_ext_remove_space(inode, first_block, stop_block - 1); 4721 err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
4795 4722
4796 ext4_ext_invalidate_cache(inode);
4797 ext4_discard_preallocations(inode); 4723 ext4_discard_preallocations(inode);
4798 4724
4799 if (IS_SYNC(inode)) 4725 if (IS_SYNC(inode))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 564d981a2fcc..f768f4a98a2b 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -23,40 +23,53 @@
23 * (e.g. Reservation space warning), and provide extent-level locking. 23 * (e.g. Reservation space warning), and provide extent-level locking.
24 * Delay extent tree is the first step to achieve this goal. It is 24 * Delay extent tree is the first step to achieve this goal. It is
25 * original built by Yongqiang Yang. At that time it is called delay 25 * original built by Yongqiang Yang. At that time it is called delay
26 * extent tree, whose goal is only track delay extent in memory to 26 * extent tree, whose goal is only track delayed extents in memory to
27 * simplify the implementation of fiemap and bigalloc, and introduce 27 * simplify the implementation of fiemap and bigalloc, and introduce
28 * lseek SEEK_DATA/SEEK_HOLE support. That is why it is still called 28 * lseek SEEK_DATA/SEEK_HOLE support. That is why it is still called
29 * delay extent tree at the following comment. But for better 29 * delay extent tree at the first commit. But for better understand
30 * understand what it does, it has been rename to extent status tree. 30 * what it does, it has been rename to extent status tree.
31 * 31 *
32 * Currently the first step has been done. All delay extents are 32 * Step1:
33 * tracked in the tree. It maintains the delay extent when a delay 33 * Currently the first step has been done. All delayed extents are
34 * allocation is issued, and the delay extent is written out or 34 * tracked in the tree. It maintains the delayed extent when a delayed
35 * allocation is issued, and the delayed extent is written out or
35 * invalidated. Therefore the implementation of fiemap and bigalloc 36 * invalidated. Therefore the implementation of fiemap and bigalloc
36 * are simplified, and SEEK_DATA/SEEK_HOLE are introduced. 37 * are simplified, and SEEK_DATA/SEEK_HOLE are introduced.
37 * 38 *
38 * The following comment describes the implemenmtation of extent 39 * The following comment describes the implemenmtation of extent
39 * status tree and future works. 40 * status tree and future works.
41 *
42 * Step2:
43 * In this step all extent status are tracked by extent status tree.
44 * Thus, we can first try to lookup a block mapping in this tree before
45 * finding it in extent tree. Hence, single extent cache can be removed
46 * because extent status tree can do a better job. Extents in status
47 * tree are loaded on-demand. Therefore, the extent status tree may not
48 * contain all of the extents in a file. Meanwhile we define a shrinker
49 * to reclaim memory from extent status tree because fragmented extent
50 * tree will make status tree cost too much memory. written/unwritten/-
51 * hole extents in the tree will be reclaimed by this shrinker when we
52 * are under high memory pressure. Delayed extents will not be
53 * reclimed because fiemap, bigalloc, and seek_data/hole need it.
40 */ 54 */
41 55
42/* 56/*
43 * extents status tree implementation for ext4. 57 * Extent status tree implementation for ext4.
44 * 58 *
45 * 59 *
46 * ========================================================================== 60 * ==========================================================================
47 * Extents status encompass delayed extents and extent locks 61 * Extent status tree tracks all extent status.
48 * 62 *
49 * 1. Why delayed extent implementation ? 63 * 1. Why we need to implement extent status tree?
50 * 64 *
51 * Without delayed extent, ext4 identifies a delayed extent by looking 65 * Without extent status tree, ext4 identifies a delayed extent by looking
52 * up page cache, this has several deficiencies - complicated, buggy, 66 * up page cache, this has several deficiencies - complicated, buggy,
53 * and inefficient code. 67 * and inefficient code.
54 * 68 *
55 * FIEMAP, SEEK_HOLE/DATA, bigalloc, punch hole and writeout all need 69 * FIEMAP, SEEK_HOLE/DATA, bigalloc, and writeout all need to know if a
56 * to know if a block or a range of blocks are belonged to a delayed 70 * block or a range of blocks are belonged to a delayed extent.
57 * extent.
58 * 71 *
59 * Let us have a look at how they do without delayed extents implementation. 72 * Let us have a look at how they do without extent status tree.
60 * -- FIEMAP 73 * -- FIEMAP
61 * FIEMAP looks up page cache to identify delayed allocations from holes. 74 * FIEMAP looks up page cache to identify delayed allocations from holes.
62 * 75 *
@@ -68,47 +81,48 @@
68 * already under delayed allocation or not to determine whether 81 * already under delayed allocation or not to determine whether
69 * quota reserving is needed for the cluster. 82 * quota reserving is needed for the cluster.
70 * 83 *
71 * -- punch hole
72 * punch hole looks up page cache to identify a delayed extent.
73 *
74 * -- writeout 84 * -- writeout
75 * Writeout looks up whole page cache to see if a buffer is 85 * Writeout looks up whole page cache to see if a buffer is
76 * mapped, If there are not very many delayed buffers, then it is 86 * mapped, If there are not very many delayed buffers, then it is
77 * time comsuming. 87 * time comsuming.
78 * 88 *
79 * With delayed extents implementation, FIEMAP, SEEK_HOLE/DATA, 89 * With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
80 * bigalloc and writeout can figure out if a block or a range of 90 * bigalloc and writeout can figure out if a block or a range of
81 * blocks is under delayed allocation(belonged to a delayed extent) or 91 * blocks is under delayed allocation(belonged to a delayed extent) or
82 * not by searching the delayed extent tree. 92 * not by searching the extent tree.
83 * 93 *
84 * 94 *
85 * ========================================================================== 95 * ==========================================================================
86 * 2. ext4 delayed extents impelmentation 96 * 2. Ext4 extent status tree impelmentation
97 *
98 * -- extent
99 * A extent is a range of blocks which are contiguous logically and
100 * physically. Unlike extent in extent tree, this extent in ext4 is
101 * a in-memory struct, there is no corresponding on-disk data. There
102 * is no limit on length of extent, so an extent can contain as many
103 * blocks as they are contiguous logically and physically.
87 * 104 *
88 * -- delayed extent 105 * -- extent status tree
89 * A delayed extent is a range of blocks which are contiguous 106 * Every inode has an extent status tree and all allocation blocks
90 * logically and under delayed allocation. Unlike extent in 107 * are added to the tree with different status. The extent in the
91 * ext4, delayed extent in ext4 is a in-memory struct, there is 108 * tree are ordered by logical block no.
92 * no corresponding on-disk data. There is no limit on length of
93 * delayed extent, so a delayed extent can contain as many blocks
94 * as they are contiguous logically.
95 * 109 *
96 * -- delayed extent tree 110 * -- operations on a extent status tree
97 * Every inode has a delayed extent tree and all under delayed 111 * There are three important operations on a delayed extent tree: find
98 * allocation blocks are added to the tree as delayed extents. 112 * next extent, adding a extent(a range of blocks) and removing a extent.
99 * Delayed extents in the tree are ordered by logical block no.
100 * 113 *
101 * -- operations on a delayed extent tree 114 * -- race on a extent status tree
102 * There are three operations on a delayed extent tree: find next 115 * Extent status tree is protected by inode->i_es_lock.
103 * delayed extent, adding a space(a range of blocks) and removing
104 * a space.
105 * 116 *
106 * -- race on a delayed extent tree 117 * -- memory consumption
107 * Delayed extent tree is protected inode->i_es_lock. 118 * Fragmented extent tree will make extent status tree cost too much
119 * memory. Hence, we will reclaim written/unwritten/hole extents from
120 * the tree under a heavy memory pressure.
108 * 121 *
109 * 122 *
110 * ========================================================================== 123 * ==========================================================================
111 * 3. performance analysis 124 * 3. Performance analysis
125 *
112 * -- overhead 126 * -- overhead
113 * 1. There is a cache extent for write access, so if writes are 127 * 1. There is a cache extent for write access, so if writes are
114 * not very random, adding space operaions are in O(1) time. 128 * not very random, adding space operaions are in O(1) time.
@@ -120,15 +134,21 @@
120 * 134 *
121 * ========================================================================== 135 * ==========================================================================
122 * 4. TODO list 136 * 4. TODO list
123 * -- Track all extent status
124 * 137 *
125 * -- Improve get block process 138 * -- Refactor delayed space reservation
126 * 139 *
127 * -- Extent-level locking 140 * -- Extent-level locking
128 */ 141 */
129 142
130static struct kmem_cache *ext4_es_cachep; 143static struct kmem_cache *ext4_es_cachep;
131 144
145static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan);
150static int ext4_es_reclaim_extents_count(struct super_block *sb);
151
132int __init ext4_init_es(void) 152int __init ext4_init_es(void)
133{ 153{
134 ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT); 154 ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
@@ -161,7 +181,9 @@ static void ext4_es_print_tree(struct inode *inode)
161 while (node) { 181 while (node) {
162 struct extent_status *es; 182 struct extent_status *es;
163 es = rb_entry(node, struct extent_status, rb_node); 183 es = rb_entry(node, struct extent_status, rb_node);
164 printk(KERN_DEBUG " [%u/%u)", es->start, es->len); 184 printk(KERN_DEBUG " [%u/%u) %llu %llx",
185 es->es_lblk, es->es_len,
186 ext4_es_pblock(es), ext4_es_status(es));
165 node = rb_next(node); 187 node = rb_next(node);
166 } 188 }
167 printk(KERN_DEBUG "\n"); 189 printk(KERN_DEBUG "\n");
@@ -170,10 +192,10 @@ static void ext4_es_print_tree(struct inode *inode)
170#define ext4_es_print_tree(inode) 192#define ext4_es_print_tree(inode)
171#endif 193#endif
172 194
173static inline ext4_lblk_t extent_status_end(struct extent_status *es) 195static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
174{ 196{
175 BUG_ON(es->start + es->len < es->start); 197 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
176 return es->start + es->len - 1; 198 return es->es_lblk + es->es_len - 1;
177} 199}
178 200
179/* 201/*
@@ -181,25 +203,25 @@ static inline ext4_lblk_t extent_status_end(struct extent_status *es)
181 * it can't be found, try to find next extent. 203 * it can't be found, try to find next extent.
182 */ 204 */
183static struct extent_status *__es_tree_search(struct rb_root *root, 205static struct extent_status *__es_tree_search(struct rb_root *root,
184 ext4_lblk_t offset) 206 ext4_lblk_t lblk)
185{ 207{
186 struct rb_node *node = root->rb_node; 208 struct rb_node *node = root->rb_node;
187 struct extent_status *es = NULL; 209 struct extent_status *es = NULL;
188 210
189 while (node) { 211 while (node) {
190 es = rb_entry(node, struct extent_status, rb_node); 212 es = rb_entry(node, struct extent_status, rb_node);
191 if (offset < es->start) 213 if (lblk < es->es_lblk)
192 node = node->rb_left; 214 node = node->rb_left;
193 else if (offset > extent_status_end(es)) 215 else if (lblk > ext4_es_end(es))
194 node = node->rb_right; 216 node = node->rb_right;
195 else 217 else
196 return es; 218 return es;
197 } 219 }
198 220
199 if (es && offset < es->start) 221 if (es && lblk < es->es_lblk)
200 return es; 222 return es;
201 223
202 if (es && offset > extent_status_end(es)) { 224 if (es && lblk > ext4_es_end(es)) {
203 node = rb_next(&es->rb_node); 225 node = rb_next(&es->rb_node);
204 return node ? rb_entry(node, struct extent_status, rb_node) : 226 return node ? rb_entry(node, struct extent_status, rb_node) :
205 NULL; 227 NULL;
@@ -209,79 +231,121 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
209} 231}
210 232
211/* 233/*
212 * ext4_es_find_extent: find the 1st delayed extent covering @es->start 234 * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk
213 * if it exists, otherwise, the next extent after @es->start. 235 * if it exists, otherwise, the next extent after @es->lblk.
214 * 236 *
215 * @inode: the inode which owns delayed extents 237 * @inode: the inode which owns delayed extents
238 * @lblk: the offset where we start to search
216 * @es: delayed extent that we found 239 * @es: delayed extent that we found
217 *
218 * Returns the first block of the next extent after es, otherwise
219 * EXT_MAX_BLOCKS if no delay extent is found.
220 * Delayed extent is returned via @es.
221 */ 240 */
222ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) 241void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
242 struct extent_status *es)
223{ 243{
224 struct ext4_es_tree *tree = NULL; 244 struct ext4_es_tree *tree = NULL;
225 struct extent_status *es1 = NULL; 245 struct extent_status *es1 = NULL;
226 struct rb_node *node; 246 struct rb_node *node;
227 ext4_lblk_t ret = EXT_MAX_BLOCKS;
228 247
229 trace_ext4_es_find_extent_enter(inode, es->start); 248 BUG_ON(es == NULL);
249 trace_ext4_es_find_delayed_extent_enter(inode, lblk);
230 250
231 read_lock(&EXT4_I(inode)->i_es_lock); 251 read_lock(&EXT4_I(inode)->i_es_lock);
232 tree = &EXT4_I(inode)->i_es_tree; 252 tree = &EXT4_I(inode)->i_es_tree;
233 253
234 /* find delay extent in cache firstly */ 254 /* find extent in cache firstly */
255 es->es_lblk = es->es_len = es->es_pblk = 0;
235 if (tree->cache_es) { 256 if (tree->cache_es) {
236 es1 = tree->cache_es; 257 es1 = tree->cache_es;
237 if (in_range(es->start, es1->start, es1->len)) { 258 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
238 es_debug("%u cached by [%u/%u)\n", 259 es_debug("%u cached by [%u/%u) %llu %llx\n",
239 es->start, es1->start, es1->len); 260 lblk, es1->es_lblk, es1->es_len,
261 ext4_es_pblock(es1), ext4_es_status(es1));
240 goto out; 262 goto out;
241 } 263 }
242 } 264 }
243 265
244 es->len = 0; 266 es1 = __es_tree_search(&tree->root, lblk);
245 es1 = __es_tree_search(&tree->root, es->start);
246 267
247out: 268out:
248 if (es1) { 269 if (es1 && !ext4_es_is_delayed(es1)) {
249 tree->cache_es = es1; 270 while ((node = rb_next(&es1->rb_node)) != NULL) {
250 es->start = es1->start;
251 es->len = es1->len;
252 node = rb_next(&es1->rb_node);
253 if (node) {
254 es1 = rb_entry(node, struct extent_status, rb_node); 271 es1 = rb_entry(node, struct extent_status, rb_node);
255 ret = es1->start; 272 if (ext4_es_is_delayed(es1))
273 break;
256 } 274 }
257 } 275 }
258 276
277 if (es1 && ext4_es_is_delayed(es1)) {
278 tree->cache_es = es1;
279 es->es_lblk = es1->es_lblk;
280 es->es_len = es1->es_len;
281 es->es_pblk = es1->es_pblk;
282 }
283
259 read_unlock(&EXT4_I(inode)->i_es_lock); 284 read_unlock(&EXT4_I(inode)->i_es_lock);
260 285
261 trace_ext4_es_find_extent_exit(inode, es, ret); 286 ext4_es_lru_add(inode);
262 return ret; 287 trace_ext4_es_find_delayed_extent_exit(inode, es);
263} 288}
264 289
265static struct extent_status * 290static struct extent_status *
266ext4_es_alloc_extent(ext4_lblk_t start, ext4_lblk_t len) 291ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
292 ext4_fsblk_t pblk)
267{ 293{
268 struct extent_status *es; 294 struct extent_status *es;
269 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); 295 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
270 if (es == NULL) 296 if (es == NULL)
271 return NULL; 297 return NULL;
272 es->start = start; 298 es->es_lblk = lblk;
273 es->len = len; 299 es->es_len = len;
300 es->es_pblk = pblk;
301
302 /*
303 * We don't count delayed extent because we never try to reclaim them
304 */
305 if (!ext4_es_is_delayed(es))
306 EXT4_I(inode)->i_es_lru_nr++;
307
274 return es; 308 return es;
275} 309}
276 310
277static void ext4_es_free_extent(struct extent_status *es) 311static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
278{ 312{
313 /* Decrease the lru counter when this es is not delayed */
314 if (!ext4_es_is_delayed(es)) {
315 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
316 EXT4_I(inode)->i_es_lru_nr--;
317 }
318
279 kmem_cache_free(ext4_es_cachep, es); 319 kmem_cache_free(ext4_es_cachep, es);
280} 320}
281 321
322/*
323 * Check whether or not two extents can be merged
324 * Condition:
325 * - logical block number is contiguous
326 * - physical block number is contiguous
327 * - status is equal
328 */
329static int ext4_es_can_be_merged(struct extent_status *es1,
330 struct extent_status *es2)
331{
332 if (es1->es_lblk + es1->es_len != es2->es_lblk)
333 return 0;
334
335 if (ext4_es_status(es1) != ext4_es_status(es2))
336 return 0;
337
338 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
339 (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
340 return 0;
341
342 return 1;
343}
344
282static struct extent_status * 345static struct extent_status *
283ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es) 346ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
284{ 347{
348 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
285 struct extent_status *es1; 349 struct extent_status *es1;
286 struct rb_node *node; 350 struct rb_node *node;
287 351
@@ -290,10 +354,10 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
290 return es; 354 return es;
291 355
292 es1 = rb_entry(node, struct extent_status, rb_node); 356 es1 = rb_entry(node, struct extent_status, rb_node);
293 if (es->start == extent_status_end(es1) + 1) { 357 if (ext4_es_can_be_merged(es1, es)) {
294 es1->len += es->len; 358 es1->es_len += es->es_len;
295 rb_erase(&es->rb_node, &tree->root); 359 rb_erase(&es->rb_node, &tree->root);
296 ext4_es_free_extent(es); 360 ext4_es_free_extent(inode, es);
297 es = es1; 361 es = es1;
298 } 362 }
299 363
@@ -301,8 +365,9 @@ ext4_es_try_to_merge_left(struct ext4_es_tree *tree, struct extent_status *es)
301} 365}
302 366
303static struct extent_status * 367static struct extent_status *
304ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es) 368ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
305{ 369{
370 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
306 struct extent_status *es1; 371 struct extent_status *es1;
307 struct rb_node *node; 372 struct rb_node *node;
308 373
@@ -311,69 +376,57 @@ ext4_es_try_to_merge_right(struct ext4_es_tree *tree, struct extent_status *es)
311 return es; 376 return es;
312 377
313 es1 = rb_entry(node, struct extent_status, rb_node); 378 es1 = rb_entry(node, struct extent_status, rb_node);
314 if (es1->start == extent_status_end(es) + 1) { 379 if (ext4_es_can_be_merged(es, es1)) {
315 es->len += es1->len; 380 es->es_len += es1->es_len;
316 rb_erase(node, &tree->root); 381 rb_erase(node, &tree->root);
317 ext4_es_free_extent(es1); 382 ext4_es_free_extent(inode, es1);
318 } 383 }
319 384
320 return es; 385 return es;
321} 386}
322 387
323static int __es_insert_extent(struct ext4_es_tree *tree, ext4_lblk_t offset, 388static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
324 ext4_lblk_t len)
325{ 389{
390 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
326 struct rb_node **p = &tree->root.rb_node; 391 struct rb_node **p = &tree->root.rb_node;
327 struct rb_node *parent = NULL; 392 struct rb_node *parent = NULL;
328 struct extent_status *es; 393 struct extent_status *es;
329 ext4_lblk_t end = offset + len - 1;
330
331 BUG_ON(end < offset);
332 es = tree->cache_es;
333 if (es && offset == (extent_status_end(es) + 1)) {
334 es_debug("cached by [%u/%u)\n", es->start, es->len);
335 es->len += len;
336 es = ext4_es_try_to_merge_right(tree, es);
337 goto out;
338 } else if (es && es->start == end + 1) {
339 es_debug("cached by [%u/%u)\n", es->start, es->len);
340 es->start = offset;
341 es->len += len;
342 es = ext4_es_try_to_merge_left(tree, es);
343 goto out;
344 } else if (es && es->start <= offset &&
345 end <= extent_status_end(es)) {
346 es_debug("cached by [%u/%u)\n", es->start, es->len);
347 goto out;
348 }
349 394
350 while (*p) { 395 while (*p) {
351 parent = *p; 396 parent = *p;
352 es = rb_entry(parent, struct extent_status, rb_node); 397 es = rb_entry(parent, struct extent_status, rb_node);
353 398
354 if (offset < es->start) { 399 if (newes->es_lblk < es->es_lblk) {
355 if (es->start == end + 1) { 400 if (ext4_es_can_be_merged(newes, es)) {
356 es->start = offset; 401 /*
357 es->len += len; 402 * Here we can modify es_lblk directly
358 es = ext4_es_try_to_merge_left(tree, es); 403 * because it isn't overlapped.
404 */
405 es->es_lblk = newes->es_lblk;
406 es->es_len += newes->es_len;
407 if (ext4_es_is_written(es) ||
408 ext4_es_is_unwritten(es))
409 ext4_es_store_pblock(es,
410 newes->es_pblk);
411 es = ext4_es_try_to_merge_left(inode, es);
359 goto out; 412 goto out;
360 } 413 }
361 p = &(*p)->rb_left; 414 p = &(*p)->rb_left;
362 } else if (offset > extent_status_end(es)) { 415 } else if (newes->es_lblk > ext4_es_end(es)) {
363 if (offset == extent_status_end(es) + 1) { 416 if (ext4_es_can_be_merged(es, newes)) {
364 es->len += len; 417 es->es_len += newes->es_len;
365 es = ext4_es_try_to_merge_right(tree, es); 418 es = ext4_es_try_to_merge_right(inode, es);
366 goto out; 419 goto out;
367 } 420 }
368 p = &(*p)->rb_right; 421 p = &(*p)->rb_right;
369 } else { 422 } else {
370 if (extent_status_end(es) <= end) 423 BUG_ON(1);
371 es->len = offset - es->start + len; 424 return -EINVAL;
372 goto out;
373 } 425 }
374 } 426 }
375 427
376 es = ext4_es_alloc_extent(offset, len); 428 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
429 newes->es_pblk);
377 if (!es) 430 if (!es)
378 return -ENOMEM; 431 return -ENOMEM;
379 rb_link_node(&es->rb_node, parent, p); 432 rb_link_node(&es->rb_node, parent, p);
@@ -385,85 +438,166 @@ out:
385} 438}
386 439
387/* 440/*
388 * ext4_es_insert_extent() adds a space to a delayed extent tree. 441 * ext4_es_insert_extent() adds a space to a extent status tree.
389 * Caller holds inode->i_es_lock.
390 * 442 *
391 * ext4_es_insert_extent is called by ext4_da_write_begin and 443 * ext4_es_insert_extent is called by ext4_da_write_begin and
392 * ext4_es_remove_extent. 444 * ext4_es_remove_extent.
393 * 445 *
394 * Return 0 on success, error code on failure. 446 * Return 0 on success, error code on failure.
395 */ 447 */
396int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t offset, 448int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
397 ext4_lblk_t len) 449 ext4_lblk_t len, ext4_fsblk_t pblk,
450 unsigned long long status)
398{ 451{
399 struct ext4_es_tree *tree; 452 struct extent_status newes;
453 ext4_lblk_t end = lblk + len - 1;
400 int err = 0; 454 int err = 0;
401 455
402 trace_ext4_es_insert_extent(inode, offset, len); 456 es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
403 es_debug("add [%u/%u) to extent status tree of inode %lu\n", 457 lblk, len, pblk, status, inode->i_ino);
404 offset, len, inode->i_ino); 458
459 if (!len)
460 return 0;
461
462 BUG_ON(end < lblk);
463
464 newes.es_lblk = lblk;
465 newes.es_len = len;
466 ext4_es_store_pblock(&newes, pblk);
467 ext4_es_store_status(&newes, status);
468 trace_ext4_es_insert_extent(inode, &newes);
405 469
406 write_lock(&EXT4_I(inode)->i_es_lock); 470 write_lock(&EXT4_I(inode)->i_es_lock);
407 tree = &EXT4_I(inode)->i_es_tree; 471 err = __es_remove_extent(inode, lblk, end);
408 err = __es_insert_extent(tree, offset, len); 472 if (err != 0)
473 goto error;
474 err = __es_insert_extent(inode, &newes);
475
476error:
409 write_unlock(&EXT4_I(inode)->i_es_lock); 477 write_unlock(&EXT4_I(inode)->i_es_lock);
410 478
479 ext4_es_lru_add(inode);
411 ext4_es_print_tree(inode); 480 ext4_es_print_tree(inode);
412 481
413 return err; 482 return err;
414} 483}
415 484
416/* 485/*
417 * ext4_es_remove_extent() removes a space from a delayed extent tree. 486 * ext4_es_lookup_extent() looks up an extent in extent status tree.
418 * Caller holds inode->i_es_lock.
419 * 487 *
420 * Return 0 on success, error code on failure. 488 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks.
489 *
490 * Return: 1 on found, 0 on not
421 */ 491 */
422int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset, 492int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
423 ext4_lblk_t len) 493 struct extent_status *es)
424{ 494{
425 struct rb_node *node;
426 struct ext4_es_tree *tree; 495 struct ext4_es_tree *tree;
496 struct extent_status *es1 = NULL;
497 struct rb_node *node;
498 int found = 0;
499
500 trace_ext4_es_lookup_extent_enter(inode, lblk);
501 es_debug("lookup extent in block %u\n", lblk);
502
503 tree = &EXT4_I(inode)->i_es_tree;
504 read_lock(&EXT4_I(inode)->i_es_lock);
505
506 /* find extent in cache firstly */
507 es->es_lblk = es->es_len = es->es_pblk = 0;
508 if (tree->cache_es) {
509 es1 = tree->cache_es;
510 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
511 es_debug("%u cached by [%u/%u)\n",
512 lblk, es1->es_lblk, es1->es_len);
513 found = 1;
514 goto out;
515 }
516 }
517
518 node = tree->root.rb_node;
519 while (node) {
520 es1 = rb_entry(node, struct extent_status, rb_node);
521 if (lblk < es1->es_lblk)
522 node = node->rb_left;
523 else if (lblk > ext4_es_end(es1))
524 node = node->rb_right;
525 else {
526 found = 1;
527 break;
528 }
529 }
530
531out:
532 if (found) {
533 BUG_ON(!es1);
534 es->es_lblk = es1->es_lblk;
535 es->es_len = es1->es_len;
536 es->es_pblk = es1->es_pblk;
537 }
538
539 read_unlock(&EXT4_I(inode)->i_es_lock);
540
541 ext4_es_lru_add(inode);
542 trace_ext4_es_lookup_extent_exit(inode, es, found);
543 return found;
544}
545
546static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
547 ext4_lblk_t end)
548{
549 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
550 struct rb_node *node;
427 struct extent_status *es; 551 struct extent_status *es;
428 struct extent_status orig_es; 552 struct extent_status orig_es;
429 ext4_lblk_t len1, len2, end; 553 ext4_lblk_t len1, len2;
554 ext4_fsblk_t block;
430 int err = 0; 555 int err = 0;
431 556
432 trace_ext4_es_remove_extent(inode, offset, len); 557 es = __es_tree_search(&tree->root, lblk);
433 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
434 offset, len, inode->i_ino);
435
436 end = offset + len - 1;
437 BUG_ON(end < offset);
438 write_lock(&EXT4_I(inode)->i_es_lock);
439 tree = &EXT4_I(inode)->i_es_tree;
440 es = __es_tree_search(&tree->root, offset);
441 if (!es) 558 if (!es)
442 goto out; 559 goto out;
443 if (es->start > end) 560 if (es->es_lblk > end)
444 goto out; 561 goto out;
445 562
446 /* Simply invalidate cache_es. */ 563 /* Simply invalidate cache_es. */
447 tree->cache_es = NULL; 564 tree->cache_es = NULL;
448 565
449 orig_es.start = es->start; 566 orig_es.es_lblk = es->es_lblk;
450 orig_es.len = es->len; 567 orig_es.es_len = es->es_len;
451 len1 = offset > es->start ? offset - es->start : 0; 568 orig_es.es_pblk = es->es_pblk;
452 len2 = extent_status_end(es) > end ? 569
453 extent_status_end(es) - end : 0; 570 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
571 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
454 if (len1 > 0) 572 if (len1 > 0)
455 es->len = len1; 573 es->es_len = len1;
456 if (len2 > 0) { 574 if (len2 > 0) {
457 if (len1 > 0) { 575 if (len1 > 0) {
458 err = __es_insert_extent(tree, end + 1, len2); 576 struct extent_status newes;
577
578 newes.es_lblk = end + 1;
579 newes.es_len = len2;
580 if (ext4_es_is_written(&orig_es) ||
581 ext4_es_is_unwritten(&orig_es)) {
582 block = ext4_es_pblock(&orig_es) +
583 orig_es.es_len - len2;
584 ext4_es_store_pblock(&newes, block);
585 }
586 ext4_es_store_status(&newes, ext4_es_status(&orig_es));
587 err = __es_insert_extent(inode, &newes);
459 if (err) { 588 if (err) {
460 es->start = orig_es.start; 589 es->es_lblk = orig_es.es_lblk;
461 es->len = orig_es.len; 590 es->es_len = orig_es.es_len;
462 goto out; 591 goto out;
463 } 592 }
464 } else { 593 } else {
465 es->start = end + 1; 594 es->es_lblk = end + 1;
466 es->len = len2; 595 es->es_len = len2;
596 if (ext4_es_is_written(es) ||
597 ext4_es_is_unwritten(es)) {
598 block = orig_es.es_pblk + orig_es.es_len - len2;
599 ext4_es_store_pblock(es, block);
600 }
467 } 601 }
468 goto out; 602 goto out;
469 } 603 }
@@ -476,10 +610,10 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
476 es = NULL; 610 es = NULL;
477 } 611 }
478 612
479 while (es && extent_status_end(es) <= end) { 613 while (es && ext4_es_end(es) <= end) {
480 node = rb_next(&es->rb_node); 614 node = rb_next(&es->rb_node);
481 rb_erase(&es->rb_node, &tree->root); 615 rb_erase(&es->rb_node, &tree->root);
482 ext4_es_free_extent(es); 616 ext4_es_free_extent(inode, es);
483 if (!node) { 617 if (!node) {
484 es = NULL; 618 es = NULL;
485 break; 619 break;
@@ -487,14 +621,183 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t offset,
487 es = rb_entry(node, struct extent_status, rb_node); 621 es = rb_entry(node, struct extent_status, rb_node);
488 } 622 }
489 623
490 if (es && es->start < end + 1) { 624 if (es && es->es_lblk < end + 1) {
491 len1 = extent_status_end(es) - end; 625 ext4_lblk_t orig_len = es->es_len;
492 es->start = end + 1; 626
493 es->len = len1; 627 len1 = ext4_es_end(es) - end;
628 es->es_lblk = end + 1;
629 es->es_len = len1;
630 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
631 block = es->es_pblk + orig_len - len1;
632 ext4_es_store_pblock(es, block);
633 }
494 } 634 }
495 635
496out: 636out:
637 return err;
638}
639
640/*
641 * ext4_es_remove_extent() removes a space from a extent status tree.
642 *
643 * Return 0 on success, error code on failure.
644 */
645int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
646 ext4_lblk_t len)
647{
648 ext4_lblk_t end;
649 int err = 0;
650
651 trace_ext4_es_remove_extent(inode, lblk, len);
652 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
653 lblk, len, inode->i_ino);
654
655 if (!len)
656 return err;
657
658 end = lblk + len - 1;
659 BUG_ON(end < lblk);
660
661 write_lock(&EXT4_I(inode)->i_es_lock);
662 err = __es_remove_extent(inode, lblk, end);
497 write_unlock(&EXT4_I(inode)->i_es_lock); 663 write_unlock(&EXT4_I(inode)->i_es_lock);
498 ext4_es_print_tree(inode); 664 ext4_es_print_tree(inode);
499 return err; 665 return err;
500} 666}
667
668static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
669{
670 struct ext4_sb_info *sbi = container_of(shrink,
671 struct ext4_sb_info, s_es_shrinker);
672 struct ext4_inode_info *ei;
673 struct list_head *cur, *tmp, scanned;
674 int nr_to_scan = sc->nr_to_scan;
675 int ret, nr_shrunk = 0;
676
677 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
678
679 if (!nr_to_scan)
680 return ext4_es_reclaim_extents_count(sbi->s_sb);
681
682 INIT_LIST_HEAD(&scanned);
683
684 spin_lock(&sbi->s_es_lru_lock);
685 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
686 list_move_tail(cur, &scanned);
687
688 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
689
690 read_lock(&ei->i_es_lock);
691 if (ei->i_es_lru_nr == 0) {
692 read_unlock(&ei->i_es_lock);
693 continue;
694 }
695 read_unlock(&ei->i_es_lock);
696
697 write_lock(&ei->i_es_lock);
698 ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
699 write_unlock(&ei->i_es_lock);
700
701 nr_shrunk += ret;
702 nr_to_scan -= ret;
703 if (nr_to_scan == 0)
704 break;
705 }
706 list_splice_tail(&scanned, &sbi->s_es_lru);
707 spin_unlock(&sbi->s_es_lru_lock);
708 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
709
710 return ext4_es_reclaim_extents_count(sbi->s_sb);
711}
712
713void ext4_es_register_shrinker(struct super_block *sb)
714{
715 struct ext4_sb_info *sbi;
716
717 sbi = EXT4_SB(sb);
718 INIT_LIST_HEAD(&sbi->s_es_lru);
719 spin_lock_init(&sbi->s_es_lru_lock);
720 sbi->s_es_shrinker.shrink = ext4_es_shrink;
721 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
722 register_shrinker(&sbi->s_es_shrinker);
723}
724
725void ext4_es_unregister_shrinker(struct super_block *sb)
726{
727 unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
728}
729
730void ext4_es_lru_add(struct inode *inode)
731{
732 struct ext4_inode_info *ei = EXT4_I(inode);
733 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
734
735 spin_lock(&sbi->s_es_lru_lock);
736 if (list_empty(&ei->i_es_lru))
737 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
738 else
739 list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
740 spin_unlock(&sbi->s_es_lru_lock);
741}
742
743void ext4_es_lru_del(struct inode *inode)
744{
745 struct ext4_inode_info *ei = EXT4_I(inode);
746 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
747
748 spin_lock(&sbi->s_es_lru_lock);
749 if (!list_empty(&ei->i_es_lru))
750 list_del_init(&ei->i_es_lru);
751 spin_unlock(&sbi->s_es_lru_lock);
752}
753
754static int ext4_es_reclaim_extents_count(struct super_block *sb)
755{
756 struct ext4_sb_info *sbi = EXT4_SB(sb);
757 struct ext4_inode_info *ei;
758 struct list_head *cur;
759 int nr_cached = 0;
760
761 spin_lock(&sbi->s_es_lru_lock);
762 list_for_each(cur, &sbi->s_es_lru) {
763 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
764 read_lock(&ei->i_es_lock);
765 nr_cached += ei->i_es_lru_nr;
766 read_unlock(&ei->i_es_lock);
767 }
768 spin_unlock(&sbi->s_es_lru_lock);
769 trace_ext4_es_reclaim_extents_count(sb, nr_cached);
770 return nr_cached;
771}
772
773static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
774 int nr_to_scan)
775{
776 struct inode *inode = &ei->vfs_inode;
777 struct ext4_es_tree *tree = &ei->i_es_tree;
778 struct rb_node *node;
779 struct extent_status *es;
780 int nr_shrunk = 0;
781
782 if (ei->i_es_lru_nr == 0)
783 return 0;
784
785 node = rb_first(&tree->root);
786 while (node != NULL) {
787 es = rb_entry(node, struct extent_status, rb_node);
788 node = rb_next(&es->rb_node);
789 /*
790 * We can't reclaim delayed extent from status tree because
791 * fiemap, bigallic, and seek_data/hole need to use it.
792 */
793 if (!ext4_es_is_delayed(es)) {
794 rb_erase(&es->rb_node, &tree->root);
795 ext4_es_free_extent(inode, es);
796 nr_shrunk++;
797 if (--nr_to_scan == 0)
798 break;
799 }
800 }
801 tree->cache_es = NULL;
802 return nr_shrunk;
803}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 077f82db092a..f190dfe969da 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -20,10 +20,24 @@
20#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) 20#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
21#endif 21#endif
22 22
23/*
24 * These flags live in the high bits of extent_status.es_pblk
25 */
26#define EXTENT_STATUS_WRITTEN (1ULL << 63)
27#define EXTENT_STATUS_UNWRITTEN (1ULL << 62)
28#define EXTENT_STATUS_DELAYED (1ULL << 61)
29#define EXTENT_STATUS_HOLE (1ULL << 60)
30
31#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
32 EXTENT_STATUS_UNWRITTEN | \
33 EXTENT_STATUS_DELAYED | \
34 EXTENT_STATUS_HOLE)
35
23struct extent_status { 36struct extent_status {
24 struct rb_node rb_node; 37 struct rb_node rb_node;
25 ext4_lblk_t start; /* first block extent covers */ 38 ext4_lblk_t es_lblk; /* first logical block extent covers */
26 ext4_lblk_t len; /* length of extent in block */ 39 ext4_lblk_t es_len; /* length of extent in block */
40 ext4_fsblk_t es_pblk; /* first physical block */
27}; 41};
28 42
29struct ext4_es_tree { 43struct ext4_es_tree {
@@ -35,11 +49,69 @@ extern int __init ext4_init_es(void);
35extern void ext4_exit_es(void); 49extern void ext4_exit_es(void);
36extern void ext4_es_init_tree(struct ext4_es_tree *tree); 50extern void ext4_es_init_tree(struct ext4_es_tree *tree);
37 51
38extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t start, 52extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
53 ext4_lblk_t len, ext4_fsblk_t pblk,
54 unsigned long long status);
55extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
39 ext4_lblk_t len); 56 ext4_lblk_t len);
40extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t start, 57extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
41 ext4_lblk_t len); 58 struct extent_status *es);
42extern ext4_lblk_t ext4_es_find_extent(struct inode *inode, 59extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
43 struct extent_status *es); 60 struct extent_status *es);
61
62static inline int ext4_es_is_written(struct extent_status *es)
63{
64 return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0;
65}
66
67static inline int ext4_es_is_unwritten(struct extent_status *es)
68{
69 return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0;
70}
71
72static inline int ext4_es_is_delayed(struct extent_status *es)
73{
74 return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0;
75}
76
77static inline int ext4_es_is_hole(struct extent_status *es)
78{
79 return (es->es_pblk & EXTENT_STATUS_HOLE) != 0;
80}
81
82static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
83{
84 return (es->es_pblk & EXTENT_STATUS_FLAGS);
85}
86
87static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
88{
89 return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
90}
91
92static inline void ext4_es_store_pblock(struct extent_status *es,
93 ext4_fsblk_t pb)
94{
95 ext4_fsblk_t block;
96
97 block = (pb & ~EXTENT_STATUS_FLAGS) |
98 (es->es_pblk & EXTENT_STATUS_FLAGS);
99 es->es_pblk = block;
100}
101
102static inline void ext4_es_store_status(struct extent_status *es,
103 unsigned long long status)
104{
105 ext4_fsblk_t block;
106
107 block = (status & EXTENT_STATUS_FLAGS) |
108 (es->es_pblk & ~EXTENT_STATUS_FLAGS);
109 es->es_pblk = block;
110}
111
112extern void ext4_es_register_shrinker(struct super_block *sb);
113extern void ext4_es_unregister_shrinker(struct super_block *sb);
114extern void ext4_es_lru_add(struct inode *inode);
115extern void ext4_es_lru_del(struct inode *inode);
44 116
45#endif /* _EXT4_EXTENTS_STATUS_H */ 117#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 405565a62277..64848b595b24 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -167,7 +167,7 @@ static ssize_t
167ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 167ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
168 unsigned long nr_segs, loff_t pos) 168 unsigned long nr_segs, loff_t pos)
169{ 169{
170 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 170 struct inode *inode = file_inode(iocb->ki_filp);
171 ssize_t ret; 171 ssize_t ret;
172 172
173 /* 173 /*
@@ -240,7 +240,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
240 handle_t *handle; 240 handle_t *handle;
241 int err; 241 int err;
242 242
243 handle = ext4_journal_start_sb(sb, 1); 243 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
244 if (IS_ERR(handle)) 244 if (IS_ERR(handle))
245 return PTR_ERR(handle); 245 return PTR_ERR(handle);
246 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 246 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
@@ -464,10 +464,8 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
464 * If there is a delay extent at this offset, 464 * If there is a delay extent at this offset,
465 * it will be as a data. 465 * it will be as a data.
466 */ 466 */
467 es.start = last; 467 ext4_es_find_delayed_extent(inode, last, &es);
468 (void)ext4_es_find_extent(inode, &es); 468 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
469 if (last >= es.start &&
470 last < es.start + es.len) {
471 if (last != start) 469 if (last != start)
472 dataoff = last << blkbits; 470 dataoff = last << blkbits;
473 break; 471 break;
@@ -549,11 +547,9 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
549 * If there is a delay extent at this offset, 547 * If there is a delay extent at this offset,
550 * we will skip this extent. 548 * we will skip this extent.
551 */ 549 */
552 es.start = last; 550 ext4_es_find_delayed_extent(inode, last, &es);
553 (void)ext4_es_find_extent(inode, &es); 551 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
554 if (last >= es.start && 552 last = es.es_lblk + es.es_len;
555 last < es.start + es.len) {
556 last = es.start + es.len;
557 holeoff = last << blkbits; 553 holeoff = last << blkbits;
558 continue; 554 continue;
559 } 555 }
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index fa8e4911d354..3d586f02883e 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -155,11 +155,11 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
155 /* Check to see if the seed is all zero's */ 155 /* Check to see if the seed is all zero's */
156 if (hinfo->seed) { 156 if (hinfo->seed) {
157 for (i = 0; i < 4; i++) { 157 for (i = 0; i < 4; i++) {
158 if (hinfo->seed[i]) 158 if (hinfo->seed[i]) {
159 memcpy(buf, hinfo->seed, sizeof(buf));
159 break; 160 break;
161 }
160 } 162 }
161 if (i < 4)
162 memcpy(buf, hinfo->seed, sizeof(buf));
163 } 163 }
164 164
165 switch (hinfo->hash_version) { 165 switch (hinfo->hash_version) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3f32c8012447..32fd2b9075dd 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -634,8 +634,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
634 * For other inodes, search forward from the parent directory's block 634 * For other inodes, search forward from the parent directory's block
635 * group to find a free inode. 635 * group to find a free inode.
636 */ 636 */
637struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, 637struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
638 const struct qstr *qstr, __u32 goal, uid_t *owner) 638 umode_t mode, const struct qstr *qstr,
639 __u32 goal, uid_t *owner, int handle_type,
640 unsigned int line_no, int nblocks)
639{ 641{
640 struct super_block *sb; 642 struct super_block *sb;
641 struct buffer_head *inode_bitmap_bh = NULL; 643 struct buffer_head *inode_bitmap_bh = NULL;
@@ -725,6 +727,15 @@ repeat_in_this_group:
725 "inode=%lu", ino + 1); 727 "inode=%lu", ino + 1);
726 continue; 728 continue;
727 } 729 }
730 if (!handle) {
731 BUG_ON(nblocks <= 0);
732 handle = __ext4_journal_start_sb(dir->i_sb, line_no,
733 handle_type, nblocks);
734 if (IS_ERR(handle)) {
735 err = PTR_ERR(handle);
736 goto fail;
737 }
738 }
728 BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); 739 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
729 err = ext4_journal_get_write_access(handle, inode_bitmap_bh); 740 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
730 if (err) 741 if (err)
@@ -1017,17 +1028,17 @@ iget_failed:
1017 inode = NULL; 1028 inode = NULL;
1018bad_orphan: 1029bad_orphan:
1019 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); 1030 ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
1020 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 1031 printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
1021 bit, (unsigned long long)bitmap_bh->b_blocknr, 1032 bit, (unsigned long long)bitmap_bh->b_blocknr,
1022 ext4_test_bit(bit, bitmap_bh->b_data)); 1033 ext4_test_bit(bit, bitmap_bh->b_data));
1023 printk(KERN_NOTICE "inode=%p\n", inode); 1034 printk(KERN_WARNING "inode=%p\n", inode);
1024 if (inode) { 1035 if (inode) {
1025 printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", 1036 printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
1026 is_bad_inode(inode)); 1037 is_bad_inode(inode));
1027 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 1038 printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
1028 NEXT_ORPHAN(inode)); 1039 NEXT_ORPHAN(inode));
1029 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 1040 printk(KERN_WARNING "max_ino=%lu\n", max_ino);
1030 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); 1041 printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
1031 /* Avoid freeing blocks if we got a bad deleted inode */ 1042 /* Avoid freeing blocks if we got a bad deleted inode */
1032 if (inode->i_nlink == 0) 1043 if (inode->i_nlink == 0)
1033 inode->i_blocks = 0; 1044 inode->i_blocks = 0;
@@ -1137,7 +1148,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1137 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) 1148 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
1138 goto out; 1149 goto out;
1139 1150
1140 handle = ext4_journal_start_sb(sb, 1); 1151 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
1141 if (IS_ERR(handle)) { 1152 if (IS_ERR(handle)) {
1142 ret = PTR_ERR(handle); 1153 ret = PTR_ERR(handle);
1143 goto out; 1154 goto out;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 20862f96e8ae..c541ab8b64dd 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -146,6 +146,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
146 struct super_block *sb = inode->i_sb; 146 struct super_block *sb = inode->i_sb;
147 Indirect *p = chain; 147 Indirect *p = chain;
148 struct buffer_head *bh; 148 struct buffer_head *bh;
149 int ret = -EIO;
149 150
150 *err = 0; 151 *err = 0;
151 /* i_data is not going away, no lock needed */ 152 /* i_data is not going away, no lock needed */
@@ -154,8 +155,10 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
154 goto no_block; 155 goto no_block;
155 while (--depth) { 156 while (--depth) {
156 bh = sb_getblk(sb, le32_to_cpu(p->key)); 157 bh = sb_getblk(sb, le32_to_cpu(p->key));
157 if (unlikely(!bh)) 158 if (unlikely(!bh)) {
159 ret = -ENOMEM;
158 goto failure; 160 goto failure;
161 }
159 162
160 if (!bh_uptodate_or_lock(bh)) { 163 if (!bh_uptodate_or_lock(bh)) {
161 if (bh_submit_read(bh) < 0) { 164 if (bh_submit_read(bh) < 0) {
@@ -177,7 +180,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
177 return NULL; 180 return NULL;
178 181
179failure: 182failure:
180 *err = -EIO; 183 *err = ret;
181no_block: 184no_block:
182 return p; 185 return p;
183} 186}
@@ -355,9 +358,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
355 * for the first direct block 358 * for the first direct block
356 */ 359 */
357 new_blocks[index] = current_block; 360 new_blocks[index] = current_block;
358 printk(KERN_INFO "%s returned more blocks than " 361 WARN(1, KERN_INFO "%s returned more blocks than "
359 "requested\n", __func__); 362 "requested\n", __func__);
360 WARN_ON(1);
361 break; 363 break;
362 } 364 }
363 } 365 }
@@ -471,7 +473,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
471 */ 473 */
472 bh = sb_getblk(inode->i_sb, new_blocks[n-1]); 474 bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
473 if (unlikely(!bh)) { 475 if (unlikely(!bh)) {
474 err = -EIO; 476 err = -ENOMEM;
475 goto failed; 477 goto failed;
476 } 478 }
477 479
@@ -789,7 +791,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
789 791
790 if (final_size > inode->i_size) { 792 if (final_size > inode->i_size) {
791 /* Credits for sb + inode write */ 793 /* Credits for sb + inode write */
792 handle = ext4_journal_start(inode, 2); 794 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
793 if (IS_ERR(handle)) { 795 if (IS_ERR(handle)) {
794 ret = PTR_ERR(handle); 796 ret = PTR_ERR(handle);
795 goto out; 797 goto out;
@@ -849,7 +851,7 @@ locked:
849 int err; 851 int err;
850 852
851 /* Credits for sb + inode write */ 853 /* Credits for sb + inode write */
852 handle = ext4_journal_start(inode, 2); 854 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
853 if (IS_ERR(handle)) { 855 if (IS_ERR(handle)) {
854 /* This is really bad luck. We've written the data 856 /* This is really bad luck. We've written the data
855 * but cannot extend i_size. Bail out and pretend 857 * but cannot extend i_size. Bail out and pretend
@@ -948,7 +950,8 @@ static handle_t *start_transaction(struct inode *inode)
948{ 950{
949 handle_t *result; 951 handle_t *result;
950 952
951 result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)); 953 result = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
954 ext4_blocks_for_truncate(inode));
952 if (!IS_ERR(result)) 955 if (!IS_ERR(result))
953 return result; 956 return result;
954 957
@@ -1515,3 +1518,243 @@ out_stop:
1515 trace_ext4_truncate_exit(inode); 1518 trace_ext4_truncate_exit(inode);
1516} 1519}
1517 1520
1521static int free_hole_blocks(handle_t *handle, struct inode *inode,
1522 struct buffer_head *parent_bh, __le32 *i_data,
1523 int level, ext4_lblk_t first,
1524 ext4_lblk_t count, int max)
1525{
1526 struct buffer_head *bh = NULL;
1527 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1528 int ret = 0;
1529 int i, inc;
1530 ext4_lblk_t offset;
1531 __le32 blk;
1532
1533 inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
1534 for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
1535 if (offset >= count + first)
1536 break;
1537 if (*i_data == 0 || (offset + inc) <= first)
1538 continue;
1539 blk = *i_data;
1540 if (level > 0) {
1541 ext4_lblk_t first2;
1542 bh = sb_bread(inode->i_sb, blk);
1543 if (!bh) {
1544 EXT4_ERROR_INODE_BLOCK(inode, blk,
1545 "Read failure");
1546 return -EIO;
1547 }
1548 first2 = (first > offset) ? first - offset : 0;
1549 ret = free_hole_blocks(handle, inode, bh,
1550 (__le32 *)bh->b_data, level - 1,
1551 first2, count - offset,
1552 inode->i_sb->s_blocksize >> 2);
1553 if (ret) {
1554 brelse(bh);
1555 goto err;
1556 }
1557 }
1558 if (level == 0 ||
1559 (bh && all_zeroes((__le32 *)bh->b_data,
1560 (__le32 *)bh->b_data + addr_per_block))) {
1561 ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
1562 *i_data = 0;
1563 }
1564 brelse(bh);
1565 bh = NULL;
1566 }
1567
1568err:
1569 return ret;
1570}
1571
1572static int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
1573 ext4_lblk_t first, ext4_lblk_t stop)
1574{
1575 int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1576 int level, ret = 0;
1577 int num = EXT4_NDIR_BLOCKS;
1578 ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
1579 __le32 *i_data = EXT4_I(inode)->i_data;
1580
1581 count = stop - first;
1582 for (level = 0; level < 4; level++, max *= addr_per_block) {
1583 if (first < max) {
1584 ret = free_hole_blocks(handle, inode, NULL, i_data,
1585 level, first, count, num);
1586 if (ret)
1587 goto err;
1588 if (count > max - first)
1589 count -= max - first;
1590 else
1591 break;
1592 first = 0;
1593 } else {
1594 first -= max;
1595 }
1596 i_data += num;
1597 if (level == 0) {
1598 num = 1;
1599 max = 1;
1600 }
1601 }
1602
1603err:
1604 return ret;
1605}
1606
1607int ext4_ind_punch_hole(struct file *file, loff_t offset, loff_t length)
1608{
1609 struct inode *inode = file->f_path.dentry->d_inode;
1610 struct super_block *sb = inode->i_sb;
1611 ext4_lblk_t first_block, stop_block;
1612 struct address_space *mapping = inode->i_mapping;
1613 handle_t *handle = NULL;
1614 loff_t first_page, last_page, page_len;
1615 loff_t first_page_offset, last_page_offset;
1616 int err = 0;
1617
1618 /*
1619 * Write out all dirty pages to avoid race conditions
1620 * Then release them.
1621 */
1622 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
1623 err = filemap_write_and_wait_range(mapping,
1624 offset, offset + length - 1);
1625 if (err)
1626 return err;
1627 }
1628
1629 mutex_lock(&inode->i_mutex);
1630 /* It's not possible punch hole on append only file */
1631 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
1632 err = -EPERM;
1633 goto out_mutex;
1634 }
1635 if (IS_SWAPFILE(inode)) {
1636 err = -ETXTBSY;
1637 goto out_mutex;
1638 }
1639
1640 /* No need to punch hole beyond i_size */
1641 if (offset >= inode->i_size)
1642 goto out_mutex;
1643
1644 /*
1645 * If the hole extents beyond i_size, set the hole
1646 * to end after the page that contains i_size
1647 */
1648 if (offset + length > inode->i_size) {
1649 length = inode->i_size +
1650 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
1651 offset;
1652 }
1653
1654 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1655 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
1656
1657 first_page_offset = first_page << PAGE_CACHE_SHIFT;
1658 last_page_offset = last_page << PAGE_CACHE_SHIFT;
1659
1660 /* Now release the pages */
1661 if (last_page_offset > first_page_offset) {
1662 truncate_pagecache_range(inode, first_page_offset,
1663 last_page_offset - 1);
1664 }
1665
1666 /* Wait all existing dio works, newcomers will block on i_mutex */
1667 inode_dio_wait(inode);
1668
1669 handle = start_transaction(inode);
1670 if (IS_ERR(handle))
1671 goto out_mutex;
1672
1673 /*
1674 * Now we need to zero out the non-page-aligned data in the
1675 * pages at the start and tail of the hole, and unmap the buffer
1676 * heads for the block aligned regions of the page that were
1677 * completely zerod.
1678 */
1679 if (first_page > last_page) {
1680 /*
1681 * If the file space being truncated is contained within a page
1682 * just zero out and unmap the middle of that page
1683 */
1684 err = ext4_discard_partial_page_buffers(handle,
1685 mapping, offset, length, 0);
1686 if (err)
1687 goto out;
1688 } else {
1689 /*
1690 * Zero out and unmap the paritial page that contains
1691 * the start of the hole
1692 */
1693 page_len = first_page_offset - offset;
1694 if (page_len > 0) {
1695 err = ext4_discard_partial_page_buffers(handle, mapping,
1696 offset, page_len, 0);
1697 if (err)
1698 goto out;
1699 }
1700
1701 /*
1702 * Zero out and unmap the partial page that contains
1703 * the end of the hole
1704 */
1705 page_len = offset + length - last_page_offset;
1706 if (page_len > 0) {
1707 err = ext4_discard_partial_page_buffers(handle, mapping,
1708 last_page_offset, page_len, 0);
1709 if (err)
1710 goto out;
1711 }
1712 }
1713
1714 /*
1715 * If i_size contained in the last page, we need to
1716 * unmap and zero the paritial page after i_size
1717 */
1718 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
1719 inode->i_size % PAGE_CACHE_SIZE != 0) {
1720 page_len = PAGE_CACHE_SIZE -
1721 (inode->i_size & (PAGE_CACHE_SIZE - 1));
1722 if (page_len > 0) {
1723 err = ext4_discard_partial_page_buffers(handle,
1724 mapping, inode->i_size, page_len, 0);
1725 if (err)
1726 goto out;
1727 }
1728 }
1729
1730 first_block = (offset + sb->s_blocksize - 1) >>
1731 EXT4_BLOCK_SIZE_BITS(sb);
1732 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
1733
1734 if (first_block >= stop_block)
1735 goto out;
1736
1737 down_write(&EXT4_I(inode)->i_data_sem);
1738 ext4_discard_preallocations(inode);
1739
1740 err = ext4_es_remove_extent(inode, first_block,
1741 stop_block - first_block);
1742 err = ext4_free_hole_blocks(handle, inode, first_block, stop_block);
1743
1744 ext4_discard_preallocations(inode);
1745
1746 if (IS_SYNC(inode))
1747 ext4_handle_sync(handle);
1748
1749 up_write(&EXT4_I(inode)->i_data_sem);
1750
1751out:
1752 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
1753 ext4_mark_inode_dirty(handle, inode);
1754 ext4_journal_stop(handle);
1755
1756out_mutex:
1757 mutex_unlock(&inode->i_mutex);
1758
1759 return err;
1760}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 387c47c6cda9..c0fd1a123f7d 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -545,7 +545,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
545 return ret; 545 return ret;
546 546
547retry: 547retry:
548 handle = ext4_journal_start(inode, needed_blocks); 548 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
549 if (IS_ERR(handle)) { 549 if (IS_ERR(handle)) {
550 ret = PTR_ERR(handle); 550 ret = PTR_ERR(handle);
551 handle = NULL; 551 handle = NULL;
@@ -657,7 +657,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
657 * The possible write could happen in the inode, 657 * The possible write could happen in the inode,
658 * so try to reserve the space in inode first. 658 * so try to reserve the space in inode first.
659 */ 659 */
660 handle = ext4_journal_start(inode, 1); 660 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
661 if (IS_ERR(handle)) { 661 if (IS_ERR(handle)) {
662 ret = PTR_ERR(handle); 662 ret = PTR_ERR(handle);
663 handle = NULL; 663 handle = NULL;
@@ -853,7 +853,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
853 if (ret) 853 if (ret)
854 return ret; 854 return ret;
855 855
856 handle = ext4_journal_start(inode, 1); 856 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
857 if (IS_ERR(handle)) { 857 if (IS_ERR(handle)) {
858 ret = PTR_ERR(handle); 858 ret = PTR_ERR(handle);
859 handle = NULL; 859 handle = NULL;
@@ -1188,7 +1188,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
1188 1188
1189 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1189 data_bh = sb_getblk(inode->i_sb, map.m_pblk);
1190 if (!data_bh) { 1190 if (!data_bh) {
1191 error = -EIO; 1191 error = -ENOMEM;
1192 goto out_restore; 1192 goto out_restore;
1193 } 1193 }
1194 1194
@@ -1298,7 +1298,7 @@ int ext4_read_inline_dir(struct file *filp,
1298 int i, stored; 1298 int i, stored;
1299 struct ext4_dir_entry_2 *de; 1299 struct ext4_dir_entry_2 *de;
1300 struct super_block *sb; 1300 struct super_block *sb;
1301 struct inode *inode = filp->f_path.dentry->d_inode; 1301 struct inode *inode = file_inode(filp);
1302 int ret, inline_size = 0; 1302 int ret, inline_size = 0;
1303 struct ext4_iloc iloc; 1303 struct ext4_iloc iloc;
1304 void *dir_buf = NULL; 1304 void *dir_buf = NULL;
@@ -1770,7 +1770,7 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
1770 1770
1771 1771
1772 needed_blocks = ext4_writepage_trans_blocks(inode); 1772 needed_blocks = ext4_writepage_trans_blocks(inode);
1773 handle = ext4_journal_start(inode, needed_blocks); 1773 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks);
1774 if (IS_ERR(handle)) 1774 if (IS_ERR(handle))
1775 return; 1775 return;
1776 1776
@@ -1862,7 +1862,7 @@ int ext4_convert_inline_data(struct inode *inode)
1862 if (error) 1862 if (error)
1863 return error; 1863 return error;
1864 1864
1865 handle = ext4_journal_start(inode, needed_blocks); 1865 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
1866 if (IS_ERR(handle)) { 1866 if (IS_ERR(handle)) {
1867 error = PTR_ERR(handle); 1867 error = PTR_ERR(handle);
1868 goto out_free; 1868 goto out_free;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cbfe13bf5b2a..9ea0cde3fa9e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -132,10 +132,6 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
132} 132}
133 133
134static void ext4_invalidatepage(struct page *page, unsigned long offset); 134static void ext4_invalidatepage(struct page *page, unsigned long offset);
135static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
136 struct buffer_head *bh_result, int create);
137static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
138static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
139static int __ext4_journalled_writepage(struct page *page, unsigned int len); 135static int __ext4_journalled_writepage(struct page *page, unsigned int len);
140static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 136static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
141static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 137static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
@@ -238,7 +234,8 @@ void ext4_evict_inode(struct inode *inode)
238 * protection against it 234 * protection against it
239 */ 235 */
240 sb_start_intwrite(inode->i_sb); 236 sb_start_intwrite(inode->i_sb);
241 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); 237 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
238 ext4_blocks_for_truncate(inode)+3);
242 if (IS_ERR(handle)) { 239 if (IS_ERR(handle)) {
243 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 240 ext4_std_error(inode->i_sb, PTR_ERR(handle));
244 /* 241 /*
@@ -346,7 +343,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
346 spin_lock(&ei->i_block_reservation_lock); 343 spin_lock(&ei->i_block_reservation_lock);
347 trace_ext4_da_update_reserve_space(inode, used, quota_claim); 344 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
348 if (unlikely(used > ei->i_reserved_data_blocks)) { 345 if (unlikely(used > ei->i_reserved_data_blocks)) {
349 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 346 ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
350 "with only %d reserved data blocks", 347 "with only %d reserved data blocks",
351 __func__, inode->i_ino, used, 348 __func__, inode->i_ino, used,
352 ei->i_reserved_data_blocks); 349 ei->i_reserved_data_blocks);
@@ -355,10 +352,12 @@ void ext4_da_update_reserve_space(struct inode *inode,
355 } 352 }
356 353
357 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { 354 if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
358 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " 355 ext4_warning(inode->i_sb, "ino %lu, allocated %d "
359 "with only %d reserved metadata blocks\n", __func__, 356 "with only %d reserved metadata blocks "
360 inode->i_ino, ei->i_allocated_meta_blocks, 357 "(releasing %d blocks with reserved %d data blocks)",
361 ei->i_reserved_meta_blocks); 358 inode->i_ino, ei->i_allocated_meta_blocks,
359 ei->i_reserved_meta_blocks, used,
360 ei->i_reserved_data_blocks);
362 WARN_ON(1); 361 WARN_ON(1);
363 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks; 362 ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
364 } 363 }
@@ -508,12 +507,33 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
508int ext4_map_blocks(handle_t *handle, struct inode *inode, 507int ext4_map_blocks(handle_t *handle, struct inode *inode,
509 struct ext4_map_blocks *map, int flags) 508 struct ext4_map_blocks *map, int flags)
510{ 509{
510 struct extent_status es;
511 int retval; 511 int retval;
512 512
513 map->m_flags = 0; 513 map->m_flags = 0;
514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
515 "logical block %lu\n", inode->i_ino, flags, map->m_len, 515 "logical block %lu\n", inode->i_ino, flags, map->m_len,
516 (unsigned long) map->m_lblk); 516 (unsigned long) map->m_lblk);
517
518 /* Lookup extent status tree firstly */
519 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
520 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
521 map->m_pblk = ext4_es_pblock(&es) +
522 map->m_lblk - es.es_lblk;
523 map->m_flags |= ext4_es_is_written(&es) ?
524 EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
525 retval = es.es_len - (map->m_lblk - es.es_lblk);
526 if (retval > map->m_len)
527 retval = map->m_len;
528 map->m_len = retval;
529 } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
530 retval = 0;
531 } else {
532 BUG_ON(1);
533 }
534 goto found;
535 }
536
517 /* 537 /*
518 * Try to see if we can get the block without requesting a new 538 * Try to see if we can get the block without requesting a new
519 * file system block. 539 * file system block.
@@ -527,20 +547,27 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
527 retval = ext4_ind_map_blocks(handle, inode, map, flags & 547 retval = ext4_ind_map_blocks(handle, inode, map, flags &
528 EXT4_GET_BLOCKS_KEEP_SIZE); 548 EXT4_GET_BLOCKS_KEEP_SIZE);
529 } 549 }
550 if (retval > 0) {
551 int ret;
552 unsigned long long status;
553
554 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
557 ext4_find_delalloc_range(inode, map->m_lblk,
558 map->m_lblk + map->m_len - 1))
559 status |= EXTENT_STATUS_DELAYED;
560 ret = ext4_es_insert_extent(inode, map->m_lblk,
561 map->m_len, map->m_pblk, status);
562 if (ret < 0)
563 retval = ret;
564 }
530 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 565 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
531 up_read((&EXT4_I(inode)->i_data_sem)); 566 up_read((&EXT4_I(inode)->i_data_sem));
532 567
568found:
533 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 569 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
534 int ret; 570 int ret = check_block_validity(inode, map);
535 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
536 /* delayed alloc may be allocated by fallocate and
537 * coverted to initialized by directIO.
538 * we need to handle delayed extent here.
539 */
540 down_write((&EXT4_I(inode)->i_data_sem));
541 goto delayed_mapped;
542 }
543 ret = check_block_validity(inode, map);
544 if (ret != 0) 571 if (ret != 0)
545 return ret; 572 return ret;
546 } 573 }
@@ -560,16 +587,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
560 return retval; 587 return retval;
561 588
562 /* 589 /*
563 * When we call get_blocks without the create flag, the 590 * Here we clear m_flags because after allocating an new extent,
564 * BH_Unwritten flag could have gotten set if the blocks 591 * it will be set again.
565 * requested were part of a uninitialized extent. We need to
566 * clear this flag now that we are committed to convert all or
567 * part of the uninitialized extent to be an initialized
568 * extent. This is because we need to avoid the combination
569 * of BH_Unwritten and BH_Mapped flags being simultaneously
570 * set on the buffer_head.
571 */ 592 */
572 map->m_flags &= ~EXT4_MAP_UNWRITTEN; 593 map->m_flags &= ~EXT4_MAP_FLAGS;
573 594
574 /* 595 /*
575 * New blocks allocate and/or writing to uninitialized extent 596 * New blocks allocate and/or writing to uninitialized extent
@@ -615,18 +636,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
615 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 636 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
616 ext4_da_update_reserve_space(inode, retval, 1); 637 ext4_da_update_reserve_space(inode, retval, 1);
617 } 638 }
618 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { 639 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
619 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 640 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
620 641
621 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 642 if (retval > 0) {
622 int ret; 643 int ret;
623delayed_mapped: 644 unsigned long long status;
624 /* delayed allocation blocks has been allocated */ 645
625 ret = ext4_es_remove_extent(inode, map->m_lblk, 646 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
626 map->m_len); 647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
627 if (ret < 0) 648 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
628 retval = ret; 649 ext4_find_delalloc_range(inode, map->m_lblk,
629 } 650 map->m_lblk + map->m_len - 1))
651 status |= EXTENT_STATUS_DELAYED;
652 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
653 map->m_pblk, status);
654 if (ret < 0)
655 retval = ret;
630 } 656 }
631 657
632 up_write((&EXT4_I(inode)->i_data_sem)); 658 up_write((&EXT4_I(inode)->i_data_sem));
@@ -660,7 +686,8 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
660 if (map.m_len > DIO_MAX_BLOCKS) 686 if (map.m_len > DIO_MAX_BLOCKS)
661 map.m_len = DIO_MAX_BLOCKS; 687 map.m_len = DIO_MAX_BLOCKS;
662 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); 688 dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
663 handle = ext4_journal_start(inode, dio_credits); 689 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
690 dio_credits);
664 if (IS_ERR(handle)) { 691 if (IS_ERR(handle)) {
665 ret = PTR_ERR(handle); 692 ret = PTR_ERR(handle);
666 return ret; 693 return ret;
@@ -707,14 +734,16 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
707 /* ensure we send some value back into *errp */ 734 /* ensure we send some value back into *errp */
708 *errp = 0; 735 *errp = 0;
709 736
737 if (create && err == 0)
738 err = -ENOSPC; /* should never happen */
710 if (err < 0) 739 if (err < 0)
711 *errp = err; 740 *errp = err;
712 if (err <= 0) 741 if (err <= 0)
713 return NULL; 742 return NULL;
714 743
715 bh = sb_getblk(inode->i_sb, map.m_pblk); 744 bh = sb_getblk(inode->i_sb, map.m_pblk);
716 if (!bh) { 745 if (unlikely(!bh)) {
717 *errp = -EIO; 746 *errp = -ENOMEM;
718 return NULL; 747 return NULL;
719 } 748 }
720 if (map.m_flags & EXT4_MAP_NEW) { 749 if (map.m_flags & EXT4_MAP_NEW) {
@@ -808,11 +837,10 @@ int ext4_walk_page_buffers(handle_t *handle,
808 * and the commit_write(). So doing the jbd2_journal_start at the start of 837 * and the commit_write(). So doing the jbd2_journal_start at the start of
809 * prepare_write() is the right place. 838 * prepare_write() is the right place.
810 * 839 *
811 * Also, this function can nest inside ext4_writepage() -> 840 * Also, this function can nest inside ext4_writepage(). In that case, we
812 * block_write_full_page(). In that case, we *know* that ext4_writepage() 841 * *know* that ext4_writepage() has generated enough buffer credits to do the
813 * has generated enough buffer credits to do the whole page. So we won't 842 * whole page. So we won't block on the journal in that case, which is good,
814 * block on the journal in that case, which is good, because the caller may 843 * because the caller may be PF_MEMALLOC.
815 * be PF_MEMALLOC.
816 * 844 *
817 * By accident, ext4 can be reentered when a transaction is open via 845 * By accident, ext4 can be reentered when a transaction is open via
818 * quota file writes. If we were to commit the transaction while thus 846 * quota file writes. If we were to commit the transaction while thus
@@ -878,32 +906,40 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
878 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 906 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
879 flags, pagep); 907 flags, pagep);
880 if (ret < 0) 908 if (ret < 0)
881 goto out; 909 return ret;
882 if (ret == 1) { 910 if (ret == 1)
883 ret = 0; 911 return 0;
884 goto out;
885 }
886 } 912 }
887 913
888retry: 914 /*
889 handle = ext4_journal_start(inode, needed_blocks); 915 * grab_cache_page_write_begin() can take a long time if the
916 * system is thrashing due to memory pressure, or if the page
917 * is being written back. So grab it first before we start
918 * the transaction handle. This also allows us to allocate
919 * the page (if needed) without using GFP_NOFS.
920 */
921retry_grab:
922 page = grab_cache_page_write_begin(mapping, index, flags);
923 if (!page)
924 return -ENOMEM;
925 unlock_page(page);
926
927retry_journal:
928 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
890 if (IS_ERR(handle)) { 929 if (IS_ERR(handle)) {
891 ret = PTR_ERR(handle); 930 page_cache_release(page);
892 goto out; 931 return PTR_ERR(handle);
893 } 932 }
894 933
895 /* We cannot recurse into the filesystem as the transaction is already 934 lock_page(page);
896 * started */ 935 if (page->mapping != mapping) {
897 flags |= AOP_FLAG_NOFS; 936 /* The page got truncated from under us */
898 937 unlock_page(page);
899 page = grab_cache_page_write_begin(mapping, index, flags); 938 page_cache_release(page);
900 if (!page) {
901 ext4_journal_stop(handle); 939 ext4_journal_stop(handle);
902 ret = -ENOMEM; 940 goto retry_grab;
903 goto out;
904 } 941 }
905 942 wait_on_page_writeback(page);
906 *pagep = page;
907 943
908 if (ext4_should_dioread_nolock(inode)) 944 if (ext4_should_dioread_nolock(inode))
909 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 945 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -918,7 +954,6 @@ retry:
918 954
919 if (ret) { 955 if (ret) {
920 unlock_page(page); 956 unlock_page(page);
921 page_cache_release(page);
922 /* 957 /*
923 * __block_write_begin may have instantiated a few blocks 958 * __block_write_begin may have instantiated a few blocks
924 * outside i_size. Trim these off again. Don't need 959 * outside i_size. Trim these off again. Don't need
@@ -942,11 +977,14 @@ retry:
942 if (inode->i_nlink) 977 if (inode->i_nlink)
943 ext4_orphan_del(NULL, inode); 978 ext4_orphan_del(NULL, inode);
944 } 979 }
945 }
946 980
947 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 981 if (ret == -ENOSPC &&
948 goto retry; 982 ext4_should_retry_alloc(inode->i_sb, &retries))
949out: 983 goto retry_journal;
984 page_cache_release(page);
985 return ret;
986 }
987 *pagep = page;
950 return ret; 988 return ret;
951} 989}
952 990
@@ -1256,7 +1294,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1256 * function is called from invalidate page, it's 1294 * function is called from invalidate page, it's
1257 * harmless to return without any action. 1295 * harmless to return without any action.
1258 */ 1296 */
1259 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " 1297 ext4_warning(inode->i_sb, "ext4_da_release_space: "
1260 "ino %lu, to_free %d with only %d reserved " 1298 "ino %lu, to_free %d with only %d reserved "
1261 "data blocks", inode->i_ino, to_free, 1299 "data blocks", inode->i_ino, to_free,
1262 ei->i_reserved_data_blocks); 1300 ei->i_reserved_data_blocks);
@@ -1357,7 +1395,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1357 loff_t size = i_size_read(inode); 1395 loff_t size = i_size_read(inode);
1358 unsigned int len, block_start; 1396 unsigned int len, block_start;
1359 struct buffer_head *bh, *page_bufs = NULL; 1397 struct buffer_head *bh, *page_bufs = NULL;
1360 int journal_data = ext4_should_journal_data(inode);
1361 sector_t pblock = 0, cur_logical = 0; 1398 sector_t pblock = 0, cur_logical = 0;
1362 struct ext4_io_submit io_submit; 1399 struct ext4_io_submit io_submit;
1363 1400
@@ -1378,7 +1415,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1378 if (nr_pages == 0) 1415 if (nr_pages == 0)
1379 break; 1416 break;
1380 for (i = 0; i < nr_pages; i++) { 1417 for (i = 0; i < nr_pages; i++) {
1381 int commit_write = 0, skip_page = 0; 1418 int skip_page = 0;
1382 struct page *page = pvec.pages[i]; 1419 struct page *page = pvec.pages[i];
1383 1420
1384 index = page->index; 1421 index = page->index;
@@ -1400,27 +1437,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1400 BUG_ON(!PageLocked(page)); 1437 BUG_ON(!PageLocked(page));
1401 BUG_ON(PageWriteback(page)); 1438 BUG_ON(PageWriteback(page));
1402 1439
1403 /*
1404 * If the page does not have buffers (for
1405 * whatever reason), try to create them using
1406 * __block_write_begin. If this fails,
1407 * skip the page and move on.
1408 */
1409 if (!page_has_buffers(page)) {
1410 if (__block_write_begin(page, 0, len,
1411 noalloc_get_block_write)) {
1412 skip_page:
1413 unlock_page(page);
1414 continue;
1415 }
1416 commit_write = 1;
1417 }
1418
1419 bh = page_bufs = page_buffers(page); 1440 bh = page_bufs = page_buffers(page);
1420 block_start = 0; 1441 block_start = 0;
1421 do { 1442 do {
1422 if (!bh)
1423 goto skip_page;
1424 if (map && (cur_logical >= map->m_lblk) && 1443 if (map && (cur_logical >= map->m_lblk) &&
1425 (cur_logical <= (map->m_lblk + 1444 (cur_logical <= (map->m_lblk +
1426 (map->m_len - 1)))) { 1445 (map->m_len - 1)))) {
@@ -1448,33 +1467,14 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1448 pblock++; 1467 pblock++;
1449 } while (bh != page_bufs); 1468 } while (bh != page_bufs);
1450 1469
1451 if (skip_page) 1470 if (skip_page) {
1452 goto skip_page; 1471 unlock_page(page);
1453 1472 continue;
1454 if (commit_write) 1473 }
1455 /* mark the buffer_heads as dirty & uptodate */
1456 block_commit_write(page, 0, len);
1457 1474
1458 clear_page_dirty_for_io(page); 1475 clear_page_dirty_for_io(page);
1459 /* 1476 err = ext4_bio_write_page(&io_submit, page, len,
1460 * Delalloc doesn't support data journalling, 1477 mpd->wbc);
1461 * but eventually maybe we'll lift this
1462 * restriction.
1463 */
1464 if (unlikely(journal_data && PageChecked(page)))
1465 err = __ext4_journalled_writepage(page, len);
1466 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
1467 err = ext4_bio_write_page(&io_submit, page,
1468 len, mpd->wbc);
1469 else if (buffer_uninit(page_bufs)) {
1470 ext4_set_bh_endio(page_bufs, inode);
1471 err = block_write_full_page_endio(page,
1472 noalloc_get_block_write,
1473 mpd->wbc, ext4_end_io_buffer_write);
1474 } else
1475 err = block_write_full_page(page,
1476 noalloc_get_block_write, mpd->wbc);
1477
1478 if (!err) 1478 if (!err)
1479 mpd->pages_written++; 1479 mpd->pages_written++;
1480 /* 1480 /*
@@ -1640,7 +1640,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1640 (unsigned long long) next, 1640 (unsigned long long) next,
1641 mpd->b_size >> mpd->inode->i_blkbits, err); 1641 mpd->b_size >> mpd->inode->i_blkbits, err);
1642 ext4_msg(sb, KERN_CRIT, 1642 ext4_msg(sb, KERN_CRIT,
1643 "This should not happen!! Data will be lost\n"); 1643 "This should not happen!! Data will be lost");
1644 if (err == -ENOSPC) 1644 if (err == -ENOSPC)
1645 ext4_print_free_blocks(mpd->inode); 1645 ext4_print_free_blocks(mpd->inode);
1646 } 1646 }
@@ -1690,16 +1690,16 @@ submit_io:
1690 * 1690 *
1691 * @mpd->lbh - extent of blocks 1691 * @mpd->lbh - extent of blocks
1692 * @logical - logical number of the block in the file 1692 * @logical - logical number of the block in the file
1693 * @bh - bh of the block (used to access block's state) 1693 * @b_state - b_state of the buffer head added
1694 * 1694 *
1695 * the function is used to collect contig. blocks in same state 1695 * the function is used to collect contig. blocks in same state
1696 */ 1696 */
1697static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 1697static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical,
1698 sector_t logical, size_t b_size,
1699 unsigned long b_state) 1698 unsigned long b_state)
1700{ 1699{
1701 sector_t next; 1700 sector_t next;
1702 int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; 1701 int blkbits = mpd->inode->i_blkbits;
1702 int nrblocks = mpd->b_size >> blkbits;
1703 1703
1704 /* 1704 /*
1705 * XXX Don't go larger than mballoc is willing to allocate 1705 * XXX Don't go larger than mballoc is willing to allocate
@@ -1707,11 +1707,11 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1707 * mpage_da_submit_io() into this function and then call 1707 * mpage_da_submit_io() into this function and then call
1708 * ext4_map_blocks() multiple times in a loop 1708 * ext4_map_blocks() multiple times in a loop
1709 */ 1709 */
1710 if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) 1710 if (nrblocks >= (8*1024*1024 >> blkbits))
1711 goto flush_it; 1711 goto flush_it;
1712 1712
1713 /* check if thereserved journal credits might overflow */ 1713 /* check if the reserved journal credits might overflow */
1714 if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { 1714 if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) {
1715 if (nrblocks >= EXT4_MAX_TRANS_DATA) { 1715 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1716 /* 1716 /*
1717 * With non-extent format we are limited by the journal 1717 * With non-extent format we are limited by the journal
@@ -1720,16 +1720,6 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1720 * nrblocks. So limit nrblocks. 1720 * nrblocks. So limit nrblocks.
1721 */ 1721 */
1722 goto flush_it; 1722 goto flush_it;
1723 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1724 EXT4_MAX_TRANS_DATA) {
1725 /*
1726 * Adding the new buffer_head would make it cross the
1727 * allowed limit for which we have journal credit
1728 * reserved. So limit the new bh->b_size
1729 */
1730 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1731 mpd->inode->i_blkbits;
1732 /* we will do mpage_da_submit_io in the next loop */
1733 } 1723 }
1734 } 1724 }
1735 /* 1725 /*
@@ -1737,7 +1727,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1737 */ 1727 */
1738 if (mpd->b_size == 0) { 1728 if (mpd->b_size == 0) {
1739 mpd->b_blocknr = logical; 1729 mpd->b_blocknr = logical;
1740 mpd->b_size = b_size; 1730 mpd->b_size = 1 << blkbits;
1741 mpd->b_state = b_state & BH_FLAGS; 1731 mpd->b_state = b_state & BH_FLAGS;
1742 return; 1732 return;
1743 } 1733 }
@@ -1747,7 +1737,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1747 * Can we merge the block to our big extent? 1737 * Can we merge the block to our big extent?
1748 */ 1738 */
1749 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { 1739 if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
1750 mpd->b_size += b_size; 1740 mpd->b_size += 1 << blkbits;
1751 return; 1741 return;
1752 } 1742 }
1753 1743
@@ -1775,6 +1765,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1775 struct ext4_map_blocks *map, 1765 struct ext4_map_blocks *map,
1776 struct buffer_head *bh) 1766 struct buffer_head *bh)
1777{ 1767{
1768 struct extent_status es;
1778 int retval; 1769 int retval;
1779 sector_t invalid_block = ~((sector_t) 0xffff); 1770 sector_t invalid_block = ~((sector_t) 0xffff);
1780 1771
@@ -1785,6 +1776,42 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1785 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," 1776 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1786 "logical block %lu\n", inode->i_ino, map->m_len, 1777 "logical block %lu\n", inode->i_ino, map->m_len,
1787 (unsigned long) map->m_lblk); 1778 (unsigned long) map->m_lblk);
1779
1780 /* Lookup extent status tree firstly */
1781 if (ext4_es_lookup_extent(inode, iblock, &es)) {
1782
1783 if (ext4_es_is_hole(&es)) {
1784 retval = 0;
1785 down_read((&EXT4_I(inode)->i_data_sem));
1786 goto add_delayed;
1787 }
1788
1789 /*
1790 * Delayed extent could be allocated by fallocate.
1791 * So we need to check it.
1792 */
1793 if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
1794 map_bh(bh, inode->i_sb, invalid_block);
1795 set_buffer_new(bh);
1796 set_buffer_delay(bh);
1797 return 0;
1798 }
1799
1800 map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
1801 retval = es.es_len - (iblock - es.es_lblk);
1802 if (retval > map->m_len)
1803 retval = map->m_len;
1804 map->m_len = retval;
1805 if (ext4_es_is_written(&es))
1806 map->m_flags |= EXT4_MAP_MAPPED;
1807 else if (ext4_es_is_unwritten(&es))
1808 map->m_flags |= EXT4_MAP_UNWRITTEN;
1809 else
1810 BUG_ON(1);
1811
1812 return retval;
1813 }
1814
1788 /* 1815 /*
1789 * Try to see if we can get the block without requesting a new 1816 * Try to see if we can get the block without requesting a new
1790 * file system block. 1817 * file system block.
@@ -1803,11 +1830,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1803 map->m_flags |= EXT4_MAP_FROM_CLUSTER; 1830 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
1804 retval = 0; 1831 retval = 0;
1805 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 1832 } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1806 retval = ext4_ext_map_blocks(NULL, inode, map, 0); 1833 retval = ext4_ext_map_blocks(NULL, inode, map,
1834 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1807 else 1835 else
1808 retval = ext4_ind_map_blocks(NULL, inode, map, 0); 1836 retval = ext4_ind_map_blocks(NULL, inode, map,
1837 EXT4_GET_BLOCKS_NO_PUT_HOLE);
1809 1838
1839add_delayed:
1810 if (retval == 0) { 1840 if (retval == 0) {
1841 int ret;
1811 /* 1842 /*
1812 * XXX: __block_prepare_write() unmaps passed block, 1843 * XXX: __block_prepare_write() unmaps passed block,
1813 * is it OK? 1844 * is it OK?
@@ -1815,15 +1846,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1815 /* If the block was allocated from previously allocated cluster, 1846 /* If the block was allocated from previously allocated cluster,
1816 * then we dont need to reserve it again. */ 1847 * then we dont need to reserve it again. */
1817 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1848 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1818 retval = ext4_da_reserve_space(inode, iblock); 1849 ret = ext4_da_reserve_space(inode, iblock);
1819 if (retval) 1850 if (ret) {
1820 /* not enough space to reserve */ 1851 /* not enough space to reserve */
1852 retval = ret;
1821 goto out_unlock; 1853 goto out_unlock;
1854 }
1822 } 1855 }
1823 1856
1824 retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); 1857 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1825 if (retval) 1858 ~0, EXTENT_STATUS_DELAYED);
1859 if (ret) {
1860 retval = ret;
1826 goto out_unlock; 1861 goto out_unlock;
1862 }
1827 1863
1828 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served 1864 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1829 * and it should not appear on the bh->b_state. 1865 * and it should not appear on the bh->b_state.
@@ -1833,6 +1869,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1833 map_bh(bh, inode->i_sb, invalid_block); 1869 map_bh(bh, inode->i_sb, invalid_block);
1834 set_buffer_new(bh); 1870 set_buffer_new(bh);
1835 set_buffer_delay(bh); 1871 set_buffer_delay(bh);
1872 } else if (retval > 0) {
1873 int ret;
1874 unsigned long long status;
1875
1876 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
1879 map->m_pblk, status);
1880 if (ret != 0)
1881 retval = ret;
1836 } 1882 }
1837 1883
1838out_unlock: 1884out_unlock:
@@ -1890,27 +1936,6 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1890 return 0; 1936 return 0;
1891} 1937}
1892 1938
1893/*
1894 * This function is used as a standard get_block_t calback function
1895 * when there is no desire to allocate any blocks. It is used as a
1896 * callback function for block_write_begin() and block_write_full_page().
1897 * These functions should only try to map a single block at a time.
1898 *
1899 * Since this function doesn't do block allocations even if the caller
1900 * requests it by passing in create=1, it is critically important that
1901 * any caller checks to make sure that any buffer heads are returned
1902 * by this function are either all already mapped or marked for
1903 * delayed allocation before calling block_write_full_page(). Otherwise,
1904 * b_blocknr could be left unitialized, and the page write functions will
1905 * be taken by surprise.
1906 */
1907static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
1908 struct buffer_head *bh_result, int create)
1909{
1910 BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
1911 return _ext4_get_block(inode, iblock, bh_result, 0);
1912}
1913
1914static int bget_one(handle_t *handle, struct buffer_head *bh) 1939static int bget_one(handle_t *handle, struct buffer_head *bh)
1915{ 1940{
1916 get_bh(bh); 1941 get_bh(bh);
@@ -1955,7 +1980,8 @@ static int __ext4_journalled_writepage(struct page *page,
1955 * references to buffers so we are safe */ 1980 * references to buffers so we are safe */
1956 unlock_page(page); 1981 unlock_page(page);
1957 1982
1958 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 1983 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
1984 ext4_writepage_trans_blocks(inode));
1959 if (IS_ERR(handle)) { 1985 if (IS_ERR(handle)) {
1960 ret = PTR_ERR(handle); 1986 ret = PTR_ERR(handle);
1961 goto out; 1987 goto out;
@@ -2035,11 +2061,12 @@ out:
2035static int ext4_writepage(struct page *page, 2061static int ext4_writepage(struct page *page,
2036 struct writeback_control *wbc) 2062 struct writeback_control *wbc)
2037{ 2063{
2038 int ret = 0, commit_write = 0; 2064 int ret = 0;
2039 loff_t size; 2065 loff_t size;
2040 unsigned int len; 2066 unsigned int len;
2041 struct buffer_head *page_bufs = NULL; 2067 struct buffer_head *page_bufs = NULL;
2042 struct inode *inode = page->mapping->host; 2068 struct inode *inode = page->mapping->host;
2069 struct ext4_io_submit io_submit;
2043 2070
2044 trace_ext4_writepage(page); 2071 trace_ext4_writepage(page);
2045 size = i_size_read(inode); 2072 size = i_size_read(inode);
@@ -2048,39 +2075,29 @@ static int ext4_writepage(struct page *page,
2048 else 2075 else
2049 len = PAGE_CACHE_SIZE; 2076 len = PAGE_CACHE_SIZE;
2050 2077
2078 page_bufs = page_buffers(page);
2051 /* 2079 /*
2052 * If the page does not have buffers (for whatever reason), 2080 * We cannot do block allocation or other extent handling in this
2053 * try to create them using __block_write_begin. If this 2081 * function. If there are buffers needing that, we have to redirty
2054 * fails, redirty the page and move on. 2082 * the page. But we may reach here when we do a journal commit via
2083 * journal_submit_inode_data_buffers() and in that case we must write
2084 * allocated buffers to achieve data=ordered mode guarantees.
2055 */ 2085 */
2056 if (!page_has_buffers(page)) { 2086 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2057 if (__block_write_begin(page, 0, len, 2087 ext4_bh_delay_or_unwritten)) {
2058 noalloc_get_block_write)) { 2088 redirty_page_for_writepage(wbc, page);
2059 redirty_page: 2089 if (current->flags & PF_MEMALLOC) {
2060 redirty_page_for_writepage(wbc, page); 2090 /*
2091 * For memory cleaning there's no point in writing only
2092 * some buffers. So just bail out. Warn if we came here
2093 * from direct reclaim.
2094 */
2095 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD))
2096 == PF_MEMALLOC);
2061 unlock_page(page); 2097 unlock_page(page);
2062 return 0; 2098 return 0;
2063 } 2099 }
2064 commit_write = 1;
2065 }
2066 page_bufs = page_buffers(page);
2067 if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2068 ext4_bh_delay_or_unwritten)) {
2069 /*
2070 * We don't want to do block allocation, so redirty
2071 * the page and return. We may reach here when we do
2072 * a journal commit via journal_submit_inode_data_buffers.
2073 * We can also reach here via shrink_page_list but it
2074 * should never be for direct reclaim so warn if that
2075 * happens
2076 */
2077 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
2078 PF_MEMALLOC);
2079 goto redirty_page;
2080 } 2100 }
2081 if (commit_write)
2082 /* now mark the buffer_heads as dirty and uptodate */
2083 block_commit_write(page, 0, len);
2084 2101
2085 if (PageChecked(page) && ext4_should_journal_data(inode)) 2102 if (PageChecked(page) && ext4_should_journal_data(inode))
2086 /* 2103 /*
@@ -2089,14 +2106,9 @@ static int ext4_writepage(struct page *page,
2089 */ 2106 */
2090 return __ext4_journalled_writepage(page, len); 2107 return __ext4_journalled_writepage(page, len);
2091 2108
2092 if (buffer_uninit(page_bufs)) { 2109 memset(&io_submit, 0, sizeof(io_submit));
2093 ext4_set_bh_endio(page_bufs, inode); 2110 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2094 ret = block_write_full_page_endio(page, noalloc_get_block_write, 2111 ext4_io_submit(&io_submit);
2095 wbc, ext4_end_io_buffer_write);
2096 } else
2097 ret = block_write_full_page(page, noalloc_get_block_write,
2098 wbc);
2099
2100 return ret; 2112 return ret;
2101} 2113}
2102 2114
@@ -2228,51 +2240,38 @@ static int write_cache_pages_da(handle_t *handle,
2228 logical = (sector_t) page->index << 2240 logical = (sector_t) page->index <<
2229 (PAGE_CACHE_SHIFT - inode->i_blkbits); 2241 (PAGE_CACHE_SHIFT - inode->i_blkbits);
2230 2242
2231 if (!page_has_buffers(page)) { 2243 /* Add all dirty buffers to mpd */
2232 mpage_add_bh_to_extent(mpd, logical, 2244 head = page_buffers(page);
2233 PAGE_CACHE_SIZE, 2245 bh = head;
2234 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2246 do {
2235 if (mpd->io_done) 2247 BUG_ON(buffer_locked(bh));
2236 goto ret_extent_tail;
2237 } else {
2238 /* 2248 /*
2239 * Page with regular buffer heads, 2249 * We need to try to allocate unmapped blocks
2240 * just add all dirty ones 2250 * in the same page. Otherwise we won't make
2251 * progress with the page in ext4_writepage
2241 */ 2252 */
2242 head = page_buffers(page); 2253 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2243 bh = head; 2254 mpage_add_bh_to_extent(mpd, logical,
2244 do { 2255 bh->b_state);
2245 BUG_ON(buffer_locked(bh)); 2256 if (mpd->io_done)
2257 goto ret_extent_tail;
2258 } else if (buffer_dirty(bh) &&
2259 buffer_mapped(bh)) {
2246 /* 2260 /*
2247 * We need to try to allocate 2261 * mapped dirty buffer. We need to
2248 * unmapped blocks in the same page. 2262 * update the b_state because we look
2249 * Otherwise we won't make progress 2263 * at b_state in mpage_da_map_blocks.
2250 * with the page in ext4_writepage 2264 * We don't update b_size because if we
2265 * find an unmapped buffer_head later
2266 * we need to use the b_state flag of
2267 * that buffer_head.
2251 */ 2268 */
2252 if (ext4_bh_delay_or_unwritten(NULL, bh)) { 2269 if (mpd->b_size == 0)
2253 mpage_add_bh_to_extent(mpd, logical, 2270 mpd->b_state =
2254 bh->b_size, 2271 bh->b_state & BH_FLAGS;
2255 bh->b_state); 2272 }
2256 if (mpd->io_done) 2273 logical++;
2257 goto ret_extent_tail; 2274 } while ((bh = bh->b_this_page) != head);
2258 } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
2259 /*
2260 * mapped dirty buffer. We need
2261 * to update the b_state
2262 * because we look at b_state
2263 * in mpage_da_map_blocks. We
2264 * don't update b_size because
2265 * if we find an unmapped
2266 * buffer_head later we need to
2267 * use the b_state flag of that
2268 * buffer_head.
2269 */
2270 if (mpd->b_size == 0)
2271 mpd->b_state = bh->b_state & BH_FLAGS;
2272 }
2273 logical++;
2274 } while ((bh = bh->b_this_page) != head);
2275 }
2276 2275
2277 if (nr_to_write > 0) { 2276 if (nr_to_write > 0) {
2278 nr_to_write--; 2277 nr_to_write--;
@@ -2413,7 +2412,8 @@ retry:
2413 needed_blocks = ext4_da_writepages_trans_blocks(inode); 2412 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2414 2413
2415 /* start a new transaction*/ 2414 /* start a new transaction*/
2416 handle = ext4_journal_start(inode, needed_blocks); 2415 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2416 needed_blocks);
2417 if (IS_ERR(handle)) { 2417 if (IS_ERR(handle)) {
2418 ret = PTR_ERR(handle); 2418 ret = PTR_ERR(handle);
2419 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2419 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb)
2512 /* 2512 /*
2513 * Start pushing delalloc when 1/2 of free blocks are dirty. 2513 * Start pushing delalloc when 1/2 of free blocks are dirty.
2514 */ 2514 */
2515 if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && 2515 if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
2516 !writeback_in_progress(sb->s_bdi) && 2516 try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2517 down_read_trylock(&sb->s_umount)) {
2518 writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2519 up_read(&sb->s_umount);
2520 }
2521 2517
2522 if (2 * free_blocks < 3 * dirty_blocks || 2518 if (2 * free_blocks < 3 * dirty_blocks ||
2523 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { 2519 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
@@ -2555,42 +2551,52 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2555 pos, len, flags, 2551 pos, len, flags,
2556 pagep, fsdata); 2552 pagep, fsdata);
2557 if (ret < 0) 2553 if (ret < 0)
2558 goto out; 2554 return ret;
2559 if (ret == 1) { 2555 if (ret == 1)
2560 ret = 0; 2556 return 0;
2561 goto out;
2562 }
2563 } 2557 }
2564 2558
2565retry: 2559 /*
2560 * grab_cache_page_write_begin() can take a long time if the
2561 * system is thrashing due to memory pressure, or if the page
2562 * is being written back. So grab it first before we start
2563 * the transaction handle. This also allows us to allocate
2564 * the page (if needed) without using GFP_NOFS.
2565 */
2566retry_grab:
2567 page = grab_cache_page_write_begin(mapping, index, flags);
2568 if (!page)
2569 return -ENOMEM;
2570 unlock_page(page);
2571
2566 /* 2572 /*
2567 * With delayed allocation, we don't log the i_disksize update 2573 * With delayed allocation, we don't log the i_disksize update
2568 * if there is delayed block allocation. But we still need 2574 * if there is delayed block allocation. But we still need
2569 * to journalling the i_disksize update if writes to the end 2575 * to journalling the i_disksize update if writes to the end
2570 * of file which has an already mapped buffer. 2576 * of file which has an already mapped buffer.
2571 */ 2577 */
2572 handle = ext4_journal_start(inode, 1); 2578retry_journal:
2579 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
2573 if (IS_ERR(handle)) { 2580 if (IS_ERR(handle)) {
2574 ret = PTR_ERR(handle); 2581 page_cache_release(page);
2575 goto out; 2582 return PTR_ERR(handle);
2576 } 2583 }
2577 /* We cannot recurse into the filesystem as the transaction is already
2578 * started */
2579 flags |= AOP_FLAG_NOFS;
2580 2584
2581 page = grab_cache_page_write_begin(mapping, index, flags); 2585 lock_page(page);
2582 if (!page) { 2586 if (page->mapping != mapping) {
2587 /* The page got truncated from under us */
2588 unlock_page(page);
2589 page_cache_release(page);
2583 ext4_journal_stop(handle); 2590 ext4_journal_stop(handle);
2584 ret = -ENOMEM; 2591 goto retry_grab;
2585 goto out;
2586 } 2592 }
2587 *pagep = page; 2593 /* In case writeback began while the page was unlocked */
2594 wait_on_page_writeback(page);
2588 2595
2589 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2596 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2590 if (ret < 0) { 2597 if (ret < 0) {
2591 unlock_page(page); 2598 unlock_page(page);
2592 ext4_journal_stop(handle); 2599 ext4_journal_stop(handle);
2593 page_cache_release(page);
2594 /* 2600 /*
2595 * block_write_begin may have instantiated a few blocks 2601 * block_write_begin may have instantiated a few blocks
2596 * outside i_size. Trim these off again. Don't need 2602 * outside i_size. Trim these off again. Don't need
@@ -2598,11 +2604,16 @@ retry:
2598 */ 2604 */
2599 if (pos + len > inode->i_size) 2605 if (pos + len > inode->i_size)
2600 ext4_truncate_failed_write(inode); 2606 ext4_truncate_failed_write(inode);
2607
2608 if (ret == -ENOSPC &&
2609 ext4_should_retry_alloc(inode->i_sb, &retries))
2610 goto retry_journal;
2611
2612 page_cache_release(page);
2613 return ret;
2601 } 2614 }
2602 2615
2603 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2616 *pagep = page;
2604 goto retry;
2605out:
2606 return ret; 2617 return ret;
2607} 2618}
2608 2619
@@ -2858,36 +2869,10 @@ ext4_readpages(struct file *file, struct address_space *mapping,
2858 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2869 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
2859} 2870}
2860 2871
2861static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
2862{
2863 struct buffer_head *head, *bh;
2864 unsigned int curr_off = 0;
2865
2866 if (!page_has_buffers(page))
2867 return;
2868 head = bh = page_buffers(page);
2869 do {
2870 if (offset <= curr_off && test_clear_buffer_uninit(bh)
2871 && bh->b_private) {
2872 ext4_free_io_end(bh->b_private);
2873 bh->b_private = NULL;
2874 bh->b_end_io = NULL;
2875 }
2876 curr_off = curr_off + bh->b_size;
2877 bh = bh->b_this_page;
2878 } while (bh != head);
2879}
2880
2881static void ext4_invalidatepage(struct page *page, unsigned long offset) 2872static void ext4_invalidatepage(struct page *page, unsigned long offset)
2882{ 2873{
2883 trace_ext4_invalidatepage(page, offset); 2874 trace_ext4_invalidatepage(page, offset);
2884 2875
2885 /*
2886 * free any io_end structure allocated for buffers to be discarded
2887 */
2888 if (ext4_should_dioread_nolock(page->mapping->host))
2889 ext4_invalidatepage_free_endio(page, offset);
2890
2891 /* No journalling happens on data buffers when this function is used */ 2876 /* No journalling happens on data buffers when this function is used */
2892 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); 2877 WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
2893 2878
@@ -2959,7 +2944,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2959 ssize_t size, void *private, int ret, 2944 ssize_t size, void *private, int ret,
2960 bool is_async) 2945 bool is_async)
2961{ 2946{
2962 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 2947 struct inode *inode = file_inode(iocb->ki_filp);
2963 ext4_io_end_t *io_end = iocb->private; 2948 ext4_io_end_t *io_end = iocb->private;
2964 2949
2965 /* if not async direct IO or dio with 0 bytes write, just return */ 2950 /* if not async direct IO or dio with 0 bytes write, just return */
@@ -2977,9 +2962,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2977 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2962 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2978 ext4_free_io_end(io_end); 2963 ext4_free_io_end(io_end);
2979out: 2964out:
2965 inode_dio_done(inode);
2980 if (is_async) 2966 if (is_async)
2981 aio_complete(iocb, ret, 0); 2967 aio_complete(iocb, ret, 0);
2982 inode_dio_done(inode);
2983 return; 2968 return;
2984 } 2969 }
2985 2970
@@ -2993,65 +2978,6 @@ out:
2993 ext4_add_complete_io(io_end); 2978 ext4_add_complete_io(io_end);
2994} 2979}
2995 2980
2996static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2997{
2998 ext4_io_end_t *io_end = bh->b_private;
2999 struct inode *inode;
3000
3001 if (!test_clear_buffer_uninit(bh) || !io_end)
3002 goto out;
3003
3004 if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
3005 ext4_msg(io_end->inode->i_sb, KERN_INFO,
3006 "sb umounted, discard end_io request for inode %lu",
3007 io_end->inode->i_ino);
3008 ext4_free_io_end(io_end);
3009 goto out;
3010 }
3011
3012 /*
3013 * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
3014 * but being more careful is always safe for the future change.
3015 */
3016 inode = io_end->inode;
3017 ext4_set_io_unwritten_flag(inode, io_end);
3018 ext4_add_complete_io(io_end);
3019out:
3020 bh->b_private = NULL;
3021 bh->b_end_io = NULL;
3022 clear_buffer_uninit(bh);
3023 end_buffer_async_write(bh, uptodate);
3024}
3025
3026static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
3027{
3028 ext4_io_end_t *io_end;
3029 struct page *page = bh->b_page;
3030 loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
3031 size_t size = bh->b_size;
3032
3033retry:
3034 io_end = ext4_init_io_end(inode, GFP_ATOMIC);
3035 if (!io_end) {
3036 pr_warn_ratelimited("%s: allocation fail\n", __func__);
3037 schedule();
3038 goto retry;
3039 }
3040 io_end->offset = offset;
3041 io_end->size = size;
3042 /*
3043 * We need to hold a reference to the page to make sure it
3044 * doesn't get evicted before ext4_end_io_work() has a chance
3045 * to convert the extent from written to unwritten.
3046 */
3047 io_end->page = page;
3048 get_page(io_end->page);
3049
3050 bh->b_private = io_end;
3051 bh->b_end_io = ext4_end_io_buffer_write;
3052 return 0;
3053}
3054
3055/* 2981/*
3056 * For ext4 extent files, ext4 will do direct-io write to holes, 2982 * For ext4 extent files, ext4 will do direct-io write to holes,
3057 * preallocated extents, and those write extend the file, no need to 2983 * preallocated extents, and those write extend the file, no need to
@@ -3553,20 +3479,20 @@ int ext4_can_truncate(struct inode *inode)
3553 3479
3554int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 3480int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3555{ 3481{
3556 struct inode *inode = file->f_path.dentry->d_inode; 3482 struct inode *inode = file_inode(file);
3557 if (!S_ISREG(inode->i_mode)) 3483 if (!S_ISREG(inode->i_mode))
3558 return -EOPNOTSUPP; 3484 return -EOPNOTSUPP;
3559 3485
3560 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 3486 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3561 /* TODO: Add support for non extent hole punching */ 3487 return ext4_ind_punch_hole(file, offset, length);
3562 return -EOPNOTSUPP;
3563 }
3564 3488
3565 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { 3489 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3566 /* TODO: Add support for bigalloc file systems */ 3490 /* TODO: Add support for bigalloc file systems */
3567 return -EOPNOTSUPP; 3491 return -EOPNOTSUPP;
3568 } 3492 }
3569 3493
3494 trace_ext4_punch_hole(inode, offset, length);
3495
3570 return ext4_ext_punch_hole(file, offset, length); 3496 return ext4_ext_punch_hole(file, offset, length);
3571} 3497}
3572 3498
@@ -3660,11 +3586,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
3660 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); 3586 iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
3661 3587
3662 bh = sb_getblk(sb, block); 3588 bh = sb_getblk(sb, block);
3663 if (!bh) { 3589 if (unlikely(!bh))
3664 EXT4_ERROR_INODE_BLOCK(inode, block, 3590 return -ENOMEM;
3665 "unable to read itable block");
3666 return -EIO;
3667 }
3668 if (!buffer_uptodate(bh)) { 3591 if (!buffer_uptodate(bh)) {
3669 lock_buffer(bh); 3592 lock_buffer(bh);
3670 3593
@@ -3696,7 +3619,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
3696 3619
3697 /* Is the inode bitmap in cache? */ 3620 /* Is the inode bitmap in cache? */
3698 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); 3621 bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
3699 if (!bitmap_bh) 3622 if (unlikely(!bitmap_bh))
3700 goto make_io; 3623 goto make_io;
3701 3624
3702 /* 3625 /*
@@ -4404,8 +4327,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4404 4327
4405 /* (user+group)*(old+new) structure, inode write (sb, 4328 /* (user+group)*(old+new) structure, inode write (sb,
4406 * inode block, ? - but truncate inode update has it) */ 4329 * inode block, ? - but truncate inode update has it) */
4407 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ 4330 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4408 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); 4331 (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
4332 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
4409 if (IS_ERR(handle)) { 4333 if (IS_ERR(handle)) {
4410 error = PTR_ERR(handle); 4334 error = PTR_ERR(handle);
4411 goto err_out; 4335 goto err_out;
@@ -4440,7 +4364,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4440 (attr->ia_size < inode->i_size)) { 4364 (attr->ia_size < inode->i_size)) {
4441 handle_t *handle; 4365 handle_t *handle;
4442 4366
4443 handle = ext4_journal_start(inode, 3); 4367 handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
4444 if (IS_ERR(handle)) { 4368 if (IS_ERR(handle)) {
4445 error = PTR_ERR(handle); 4369 error = PTR_ERR(handle);
4446 goto err_out; 4370 goto err_out;
@@ -4460,7 +4384,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4460 attr->ia_size); 4384 attr->ia_size);
4461 if (error) { 4385 if (error) {
4462 /* Do as much error cleanup as possible */ 4386 /* Do as much error cleanup as possible */
4463 handle = ext4_journal_start(inode, 3); 4387 handle = ext4_journal_start(inode,
4388 EXT4_HT_INODE, 3);
4464 if (IS_ERR(handle)) { 4389 if (IS_ERR(handle)) {
4465 ext4_orphan_del(NULL, inode); 4390 ext4_orphan_del(NULL, inode);
4466 goto err_out; 4391 goto err_out;
@@ -4801,7 +4726,7 @@ void ext4_dirty_inode(struct inode *inode, int flags)
4801{ 4726{
4802 handle_t *handle; 4727 handle_t *handle;
4803 4728
4804 handle = ext4_journal_start(inode, 2); 4729 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
4805 if (IS_ERR(handle)) 4730 if (IS_ERR(handle))
4806 goto out; 4731 goto out;
4807 4732
@@ -4902,7 +4827,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4902 4827
4903 /* Finally we can mark the inode as dirty. */ 4828 /* Finally we can mark the inode as dirty. */
4904 4829
4905 handle = ext4_journal_start(inode, 1); 4830 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
4906 if (IS_ERR(handle)) 4831 if (IS_ERR(handle))
4907 return PTR_ERR(handle); 4832 return PTR_ERR(handle);
4908 4833
@@ -4926,7 +4851,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4926 unsigned long len; 4851 unsigned long len;
4927 int ret; 4852 int ret;
4928 struct file *file = vma->vm_file; 4853 struct file *file = vma->vm_file;
4929 struct inode *inode = file->f_path.dentry->d_inode; 4854 struct inode *inode = file_inode(file);
4930 struct address_space *mapping = inode->i_mapping; 4855 struct address_space *mapping = inode->i_mapping;
4931 handle_t *handle; 4856 handle_t *handle;
4932 get_block_t *get_block; 4857 get_block_t *get_block;
@@ -4968,7 +4893,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4968 0, len, NULL, 4893 0, len, NULL,
4969 ext4_bh_unmapped)) { 4894 ext4_bh_unmapped)) {
4970 /* Wait so that we don't change page under IO */ 4895 /* Wait so that we don't change page under IO */
4971 wait_on_page_writeback(page); 4896 wait_for_stable_page(page);
4972 ret = VM_FAULT_LOCKED; 4897 ret = VM_FAULT_LOCKED;
4973 goto out; 4898 goto out;
4974 } 4899 }
@@ -4980,7 +4905,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4980 else 4905 else
4981 get_block = ext4_get_block; 4906 get_block = ext4_get_block;
4982retry_alloc: 4907retry_alloc:
4983 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); 4908 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
4909 ext4_writepage_trans_blocks(inode));
4984 if (IS_ERR(handle)) { 4910 if (IS_ERR(handle)) {
4985 ret = VM_FAULT_SIGBUS; 4911 ret = VM_FAULT_SIGBUS;
4986 goto out; 4912 goto out;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5747f52f7c72..721f4d33e148 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -22,7 +22,7 @@
22 22
23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
24{ 24{
25 struct inode *inode = filp->f_dentry->d_inode; 25 struct inode *inode = file_inode(filp);
26 struct super_block *sb = inode->i_sb; 26 struct super_block *sb = inode->i_sb;
27 struct ext4_inode_info *ei = EXT4_I(inode); 27 struct ext4_inode_info *ei = EXT4_I(inode);
28 unsigned int flags; 28 unsigned int flags;
@@ -104,7 +104,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
104 } else if (oldflags & EXT4_EOFBLOCKS_FL) 104 } else if (oldflags & EXT4_EOFBLOCKS_FL)
105 ext4_truncate(inode); 105 ext4_truncate(inode);
106 106
107 handle = ext4_journal_start(inode, 1); 107 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
108 if (IS_ERR(handle)) { 108 if (IS_ERR(handle)) {
109 err = PTR_ERR(handle); 109 err = PTR_ERR(handle);
110 goto flags_out; 110 goto flags_out;
@@ -173,7 +173,7 @@ flags_out:
173 } 173 }
174 174
175 mutex_lock(&inode->i_mutex); 175 mutex_lock(&inode->i_mutex);
176 handle = ext4_journal_start(inode, 1); 176 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
177 if (IS_ERR(handle)) { 177 if (IS_ERR(handle)) {
178 err = PTR_ERR(handle); 178 err = PTR_ERR(handle);
179 goto unlock_out; 179 goto unlock_out;
@@ -313,6 +313,9 @@ mext_out:
313 if (err == 0) 313 if (err == 0)
314 err = err2; 314 err = err2;
315 mnt_drop_write_file(filp); 315 mnt_drop_write_file(filp);
316 if (!err && ext4_has_group_desc_csum(sb) &&
317 test_opt(sb, INIT_INODE_TABLE))
318 err = ext4_register_li_request(sb, input.group);
316group_add_out: 319group_add_out:
317 ext4_resize_end(sb); 320 ext4_resize_end(sb);
318 return err; 321 return err;
@@ -358,6 +361,7 @@ group_add_out:
358 ext4_fsblk_t n_blocks_count; 361 ext4_fsblk_t n_blocks_count;
359 struct super_block *sb = inode->i_sb; 362 struct super_block *sb = inode->i_sb;
360 int err = 0, err2 = 0; 363 int err = 0, err2 = 0;
364 ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
361 365
362 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 366 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
363 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 367 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
@@ -388,6 +392,11 @@ group_add_out:
388 if (err == 0) 392 if (err == 0)
389 err = err2; 393 err = err2;
390 mnt_drop_write_file(filp); 394 mnt_drop_write_file(filp);
395 if (!err && (o_group > EXT4_SB(sb)->s_groups_count) &&
396 ext4_has_group_desc_csum(sb) &&
397 test_opt(sb, INIT_INODE_TABLE))
398 err = ext4_register_li_request(sb, o_group);
399
391resizefs_out: 400resizefs_out:
392 ext4_resize_end(sb); 401 ext4_resize_end(sb);
393 return err; 402 return err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1bf6fe785c4f..6540ebe058e3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -23,11 +23,18 @@
23 23
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "mballoc.h" 25#include "mballoc.h"
26#include <linux/debugfs.h>
27#include <linux/log2.h> 26#include <linux/log2.h>
27#include <linux/module.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <trace/events/ext4.h> 29#include <trace/events/ext4.h>
30 30
31#ifdef CONFIG_EXT4_DEBUG
32ushort ext4_mballoc_debug __read_mostly;
33
34module_param_named(mballoc_debug, ext4_mballoc_debug, ushort, 0644);
35MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
36#endif
37
31/* 38/*
32 * MUSTDO: 39 * MUSTDO:
33 * - test ext4_ext_search_left() and ext4_ext_search_right() 40 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -1884,15 +1891,19 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1884 case 0: 1891 case 0:
1885 BUG_ON(ac->ac_2order == 0); 1892 BUG_ON(ac->ac_2order == 0);
1886 1893
1887 if (grp->bb_largest_free_order < ac->ac_2order)
1888 return 0;
1889
1890 /* Avoid using the first bg of a flexgroup for data files */ 1894 /* Avoid using the first bg of a flexgroup for data files */
1891 if ((ac->ac_flags & EXT4_MB_HINT_DATA) && 1895 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1892 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && 1896 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1893 ((group % flex_size) == 0)) 1897 ((group % flex_size) == 0))
1894 return 0; 1898 return 0;
1895 1899
1900 if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
1901 (free / fragments) >= ac->ac_g_ex.fe_len)
1902 return 1;
1903
1904 if (grp->bb_largest_free_order < ac->ac_2order)
1905 return 0;
1906
1896 return 1; 1907 return 1;
1897 case 1: 1908 case 1:
1898 if ((free / fragments) >= ac->ac_g_ex.fe_len) 1909 if ((free / fragments) >= ac->ac_g_ex.fe_len)
@@ -2007,7 +2018,7 @@ repeat:
2007 } 2018 }
2008 2019
2009 ac->ac_groups_scanned++; 2020 ac->ac_groups_scanned++;
2010 if (cr == 0) 2021 if (cr == 0 && ac->ac_2order < sb->s_blocksize_bits+2)
2011 ext4_mb_simple_scan_group(ac, &e4b); 2022 ext4_mb_simple_scan_group(ac, &e4b);
2012 else if (cr == 1 && sbi->s_stripe && 2023 else if (cr == 1 && sbi->s_stripe &&
2013 !(ac->ac_g_ex.fe_len % sbi->s_stripe)) 2024 !(ac->ac_g_ex.fe_len % sbi->s_stripe))
@@ -2656,40 +2667,6 @@ static void ext4_free_data_callback(struct super_block *sb,
2656 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2667 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2657} 2668}
2658 2669
2659#ifdef CONFIG_EXT4_DEBUG
2660u8 mb_enable_debug __read_mostly;
2661
2662static struct dentry *debugfs_dir;
2663static struct dentry *debugfs_debug;
2664
2665static void __init ext4_create_debugfs_entry(void)
2666{
2667 debugfs_dir = debugfs_create_dir("ext4", NULL);
2668 if (debugfs_dir)
2669 debugfs_debug = debugfs_create_u8("mballoc-debug",
2670 S_IRUGO | S_IWUSR,
2671 debugfs_dir,
2672 &mb_enable_debug);
2673}
2674
2675static void ext4_remove_debugfs_entry(void)
2676{
2677 debugfs_remove(debugfs_debug);
2678 debugfs_remove(debugfs_dir);
2679}
2680
2681#else
2682
2683static void __init ext4_create_debugfs_entry(void)
2684{
2685}
2686
2687static void ext4_remove_debugfs_entry(void)
2688{
2689}
2690
2691#endif
2692
2693int __init ext4_init_mballoc(void) 2670int __init ext4_init_mballoc(void)
2694{ 2671{
2695 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space, 2672 ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
@@ -2711,7 +2688,6 @@ int __init ext4_init_mballoc(void)
2711 kmem_cache_destroy(ext4_ac_cachep); 2688 kmem_cache_destroy(ext4_ac_cachep);
2712 return -ENOMEM; 2689 return -ENOMEM;
2713 } 2690 }
2714 ext4_create_debugfs_entry();
2715 return 0; 2691 return 0;
2716} 2692}
2717 2693
@@ -2726,7 +2702,6 @@ void ext4_exit_mballoc(void)
2726 kmem_cache_destroy(ext4_ac_cachep); 2702 kmem_cache_destroy(ext4_ac_cachep);
2727 kmem_cache_destroy(ext4_free_data_cachep); 2703 kmem_cache_destroy(ext4_free_data_cachep);
2728 ext4_groupinfo_destroy_slabs(); 2704 ext4_groupinfo_destroy_slabs();
2729 ext4_remove_debugfs_entry();
2730} 2705}
2731 2706
2732 2707
@@ -3872,7 +3847,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3872 struct super_block *sb = ac->ac_sb; 3847 struct super_block *sb = ac->ac_sb;
3873 ext4_group_t ngroups, i; 3848 ext4_group_t ngroups, i;
3874 3849
3875 if (!mb_enable_debug || 3850 if (!ext4_mballoc_debug ||
3876 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3851 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3877 return; 3852 return;
3878 3853
@@ -4005,8 +3980,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4005 len = ar->len; 3980 len = ar->len;
4006 3981
4007 /* just a dirty hack to filter too big requests */ 3982 /* just a dirty hack to filter too big requests */
4008 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10) 3983 if (len >= EXT4_CLUSTERS_PER_GROUP(sb))
4009 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10; 3984 len = EXT4_CLUSTERS_PER_GROUP(sb);
4010 3985
4011 /* start searching from the goal */ 3986 /* start searching from the goal */
4012 goal = ar->goal; 3987 goal = ar->goal;
@@ -4136,7 +4111,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4136 /* The max size of hash table is PREALLOC_TB_SIZE */ 4111 /* The max size of hash table is PREALLOC_TB_SIZE */
4137 order = PREALLOC_TB_SIZE - 1; 4112 order = PREALLOC_TB_SIZE - 1;
4138 /* Add the prealloc space to lg */ 4113 /* Add the prealloc space to lg */
4139 rcu_read_lock(); 4114 spin_lock(&lg->lg_prealloc_lock);
4140 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], 4115 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4141 pa_inode_list) { 4116 pa_inode_list) {
4142 spin_lock(&tmp_pa->pa_lock); 4117 spin_lock(&tmp_pa->pa_lock);
@@ -4160,12 +4135,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4160 if (!added) 4135 if (!added)
4161 list_add_tail_rcu(&pa->pa_inode_list, 4136 list_add_tail_rcu(&pa->pa_inode_list,
4162 &lg->lg_prealloc_list[order]); 4137 &lg->lg_prealloc_list[order]);
4163 rcu_read_unlock(); 4138 spin_unlock(&lg->lg_prealloc_lock);
4164 4139
4165 /* Now trim the list to be not more than 8 elements */ 4140 /* Now trim the list to be not more than 8 elements */
4166 if (lg_prealloc_count > 8) { 4141 if (lg_prealloc_count > 8) {
4167 ext4_mb_discard_lg_preallocations(sb, lg, 4142 ext4_mb_discard_lg_preallocations(sb, lg,
4168 order, lg_prealloc_count); 4143 order, lg_prealloc_count);
4169 return; 4144 return;
4170 } 4145 }
4171 return ; 4146 return ;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 3ccd889ba953..08481ee84cd5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -37,11 +37,11 @@
37/* 37/*
38 */ 38 */
39#ifdef CONFIG_EXT4_DEBUG 39#ifdef CONFIG_EXT4_DEBUG
40extern u8 mb_enable_debug; 40extern ushort ext4_mballoc_debug;
41 41
42#define mb_debug(n, fmt, a...) \ 42#define mb_debug(n, fmt, a...) \
43 do { \ 43 do { \
44 if ((n) <= mb_enable_debug) { \ 44 if ((n) <= ext4_mballoc_debug) { \
45 printk(KERN_DEBUG "(%s, %d): %s: ", \ 45 printk(KERN_DEBUG "(%s, %d): %s: ", \
46 __FILE__, __LINE__, __func__); \ 46 __FILE__, __LINE__, __func__); \
47 printk(fmt, ## a); \ 47 printk(fmt, ## a); \
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index db8226d595fa..480acf4a085f 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -456,11 +456,14 @@ int ext4_ext_migrate(struct inode *inode)
456 */ 456 */
457 return retval; 457 return retval;
458 458
459 handle = ext4_journal_start(inode, 459 /*
460 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 460 * Worst case we can touch the allocation bitmaps, a bgd
461 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 461 * block, and a block to link in the orphan list. We do need
462 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) 462 * need to worry about credits for modifying the quota inode.
463 + 1); 463 */
464 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
465 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
466
464 if (IS_ERR(handle)) { 467 if (IS_ERR(handle)) {
465 retval = PTR_ERR(handle); 468 retval = PTR_ERR(handle);
466 return retval; 469 return retval;
@@ -507,7 +510,7 @@ int ext4_ext_migrate(struct inode *inode)
507 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 510 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
508 up_read((&EXT4_I(inode)->i_data_sem)); 511 up_read((&EXT4_I(inode)->i_data_sem));
509 512
510 handle = ext4_journal_start(inode, 1); 513 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
511 if (IS_ERR(handle)) { 514 if (IS_ERR(handle)) {
512 /* 515 /*
513 * It is impossible to update on-disk structures without 516 * It is impossible to update on-disk structures without
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index fe7c63f4717e..f9b551561d2c 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -80,6 +80,8 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
80 * is not blocked in the elevator. */ 80 * is not blocked in the elevator. */
81 if (!*bh) 81 if (!*bh)
82 *bh = sb_getblk(sb, mmp_block); 82 *bh = sb_getblk(sb, mmp_block);
83 if (!*bh)
84 return -ENOMEM;
83 if (*bh) { 85 if (*bh) {
84 get_bh(*bh); 86 get_bh(*bh);
85 lock_buffer(*bh); 87 lock_buffer(*bh);
@@ -91,7 +93,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
91 *bh = NULL; 93 *bh = NULL;
92 } 94 }
93 } 95 }
94 if (!*bh) { 96 if (unlikely(!*bh)) {
95 ext4_warning(sb, "Error while reading MMP block %llu", 97 ext4_warning(sb, "Error while reading MMP block %llu",
96 mmp_block); 98 mmp_block);
97 return -EIO; 99 return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index d9cc5ee42f53..4e81d47aa8cb 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -681,6 +681,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
681 681
682 depth = ext_depth(donor_inode); 682 depth = ext_depth(donor_inode);
683 dext = donor_path[depth].p_ext; 683 dext = donor_path[depth].p_ext;
684 if (unlikely(!dext))
685 goto missing_donor_extent;
684 tmp_dext = *dext; 686 tmp_dext = *dext;
685 687
686 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 688 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
@@ -691,7 +693,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
691 /* Loop for the donor extents */ 693 /* Loop for the donor extents */
692 while (1) { 694 while (1) {
693 /* The extent for donor must be found. */ 695 /* The extent for donor must be found. */
694 if (!dext) { 696 if (unlikely(!dext)) {
697 missing_donor_extent:
695 EXT4_ERROR_INODE(donor_inode, 698 EXT4_ERROR_INODE(donor_inode,
696 "The extent for donor must be found"); 699 "The extent for donor must be found");
697 *err = -EIO; 700 *err = -EIO;
@@ -761,9 +764,6 @@ out:
761 kfree(donor_path); 764 kfree(donor_path);
762 } 765 }
763 766
764 ext4_ext_invalidate_cache(orig_inode);
765 ext4_ext_invalidate_cache(donor_inode);
766
767 return replaced_count; 767 return replaced_count;
768} 768}
769 769
@@ -900,7 +900,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
900 pgoff_t orig_page_offset, int data_offset_in_page, 900 pgoff_t orig_page_offset, int data_offset_in_page,
901 int block_len_in_page, int uninit, int *err) 901 int block_len_in_page, int uninit, int *err)
902{ 902{
903 struct inode *orig_inode = o_filp->f_dentry->d_inode; 903 struct inode *orig_inode = file_inode(o_filp);
904 struct page *pagep[2] = {NULL, NULL}; 904 struct page *pagep[2] = {NULL, NULL};
905 handle_t *handle; 905 handle_t *handle;
906 ext4_lblk_t orig_blk_offset; 906 ext4_lblk_t orig_blk_offset;
@@ -920,7 +920,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
920again: 920again:
921 *err = 0; 921 *err = 0;
922 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 922 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
923 handle = ext4_journal_start(orig_inode, jblocks); 923 handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);
924 if (IS_ERR(handle)) { 924 if (IS_ERR(handle)) {
925 *err = PTR_ERR(handle); 925 *err = PTR_ERR(handle);
926 return 0; 926 return 0;
@@ -1279,8 +1279,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1279 __u64 orig_start, __u64 donor_start, __u64 len, 1279 __u64 orig_start, __u64 donor_start, __u64 len,
1280 __u64 *moved_len) 1280 __u64 *moved_len)
1281{ 1281{
1282 struct inode *orig_inode = o_filp->f_dentry->d_inode; 1282 struct inode *orig_inode = file_inode(o_filp);
1283 struct inode *donor_inode = d_filp->f_dentry->d_inode; 1283 struct inode *donor_inode = file_inode(d_filp);
1284 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; 1284 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
1285 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy; 1285 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1286 ext4_lblk_t block_start = orig_start; 1286 ext4_lblk_t block_start = orig_start;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8990165346ee..3825d6aa8336 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -47,38 +47,111 @@
47#define NAMEI_RA_CHUNKS 2 47#define NAMEI_RA_CHUNKS 2
48#define NAMEI_RA_BLOCKS 4 48#define NAMEI_RA_BLOCKS 4
49#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 49#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
51 50
52static struct buffer_head *ext4_append(handle_t *handle, 51static struct buffer_head *ext4_append(handle_t *handle,
53 struct inode *inode, 52 struct inode *inode,
54 ext4_lblk_t *block, int *err) 53 ext4_lblk_t *block)
55{ 54{
56 struct buffer_head *bh; 55 struct buffer_head *bh;
56 int err = 0;
57 57
58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && 58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 ((inode->i_size >> 10) >= 59 ((inode->i_size >> 10) >=
60 EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) { 60 EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
61 *err = -ENOSPC; 61 return ERR_PTR(-ENOSPC);
62 return NULL;
63 }
64 62
65 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 63 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
66 64
67 bh = ext4_bread(handle, inode, *block, 1, err); 65 bh = ext4_bread(handle, inode, *block, 1, &err);
68 if (bh) { 66 if (!bh)
69 inode->i_size += inode->i_sb->s_blocksize; 67 return ERR_PTR(err);
70 EXT4_I(inode)->i_disksize = inode->i_size; 68 inode->i_size += inode->i_sb->s_blocksize;
71 *err = ext4_journal_get_write_access(handle, bh); 69 EXT4_I(inode)->i_disksize = inode->i_size;
72 if (*err) { 70 err = ext4_journal_get_write_access(handle, bh);
71 if (err) {
72 brelse(bh);
73 ext4_std_error(inode->i_sb, err);
74 return ERR_PTR(err);
75 }
76 return bh;
77}
78
79static int ext4_dx_csum_verify(struct inode *inode,
80 struct ext4_dir_entry *dirent);
81
82typedef enum {
83 EITHER, INDEX, DIRENT
84} dirblock_type_t;
85
86#define ext4_read_dirblock(inode, block, type) \
87 __ext4_read_dirblock((inode), (block), (type), __LINE__)
88
89static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
90 ext4_lblk_t block,
91 dirblock_type_t type,
92 unsigned int line)
93{
94 struct buffer_head *bh;
95 struct ext4_dir_entry *dirent;
96 int err = 0, is_dx_block = 0;
97
98 bh = ext4_bread(NULL, inode, block, 0, &err);
99 if (!bh) {
100 if (err == 0) {
101 ext4_error_inode(inode, __func__, line, block,
102 "Directory hole found");
103 return ERR_PTR(-EIO);
104 }
105 __ext4_warning(inode->i_sb, __func__, line,
106 "error reading directory block "
107 "(ino %lu, block %lu)", inode->i_ino,
108 (unsigned long) block);
109 return ERR_PTR(err);
110 }
111 dirent = (struct ext4_dir_entry *) bh->b_data;
112 /* Determine whether or not we have an index block */
113 if (is_dx(inode)) {
114 if (block == 0)
115 is_dx_block = 1;
116 else if (ext4_rec_len_from_disk(dirent->rec_len,
117 inode->i_sb->s_blocksize) ==
118 inode->i_sb->s_blocksize)
119 is_dx_block = 1;
120 }
121 if (!is_dx_block && type == INDEX) {
122 ext4_error_inode(inode, __func__, line, block,
123 "directory leaf block found instead of index block");
124 return ERR_PTR(-EIO);
125 }
126 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
127 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
128 buffer_verified(bh))
129 return bh;
130
131 /*
132 * An empty leaf block can get mistaken for a index block; for
133 * this reason, we can only check the index checksum when the
134 * caller is sure it should be an index block.
135 */
136 if (is_dx_block && type == INDEX) {
137 if (ext4_dx_csum_verify(inode, dirent))
138 set_buffer_verified(bh);
139 else {
140 ext4_error_inode(inode, __func__, line, block,
141 "Directory index failed checksum");
73 brelse(bh); 142 brelse(bh);
74 bh = NULL; 143 return ERR_PTR(-EIO);
75 } 144 }
76 } 145 }
77 if (!bh && !(*err)) { 146 if (!is_dx_block) {
78 *err = -EIO; 147 if (ext4_dirent_csum_verify(inode, dirent))
79 ext4_error(inode->i_sb, 148 set_buffer_verified(bh);
80 "Directory hole detected on inode %lu\n", 149 else {
81 inode->i_ino); 150 ext4_error_inode(inode, __func__, line, block,
151 "Directory block failed checksum");
152 brelse(bh);
153 return ERR_PTR(-EIO);
154 }
82 } 155 }
83 return bh; 156 return bh;
84} 157}
@@ -604,9 +677,9 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
604 u32 hash; 677 u32 hash;
605 678
606 frame->bh = NULL; 679 frame->bh = NULL;
607 if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) { 680 bh = ext4_read_dirblock(dir, 0, INDEX);
608 if (*err == 0) 681 if (IS_ERR(bh)) {
609 *err = ERR_BAD_DX_DIR; 682 *err = PTR_ERR(bh);
610 goto fail; 683 goto fail;
611 } 684 }
612 root = (struct dx_root *) bh->b_data; 685 root = (struct dx_root *) bh->b_data;
@@ -643,15 +716,6 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
643 goto fail; 716 goto fail;
644 } 717 }
645 718
646 if (!buffer_verified(bh) &&
647 !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
648 ext4_warning(dir->i_sb, "Root failed checksum");
649 brelse(bh);
650 *err = ERR_BAD_DX_DIR;
651 goto fail;
652 }
653 set_buffer_verified(bh);
654
655 entries = (struct dx_entry *) (((char *)&root->info) + 719 entries = (struct dx_entry *) (((char *)&root->info) +
656 root->info.info_length); 720 root->info.info_length);
657 721
@@ -709,22 +773,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
709 frame->entries = entries; 773 frame->entries = entries;
710 frame->at = at; 774 frame->at = at;
711 if (!indirect--) return frame; 775 if (!indirect--) return frame;
712 if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) { 776 bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
713 if (!(*err)) 777 if (IS_ERR(bh)) {
714 *err = ERR_BAD_DX_DIR; 778 *err = PTR_ERR(bh);
715 goto fail2; 779 goto fail2;
716 } 780 }
717 at = entries = ((struct dx_node *) bh->b_data)->entries; 781 entries = ((struct dx_node *) bh->b_data)->entries;
718
719 if (!buffer_verified(bh) &&
720 !ext4_dx_csum_verify(dir,
721 (struct ext4_dir_entry *)bh->b_data)) {
722 ext4_warning(dir->i_sb, "Node failed checksum");
723 brelse(bh);
724 *err = ERR_BAD_DX_DIR;
725 goto fail;
726 }
727 set_buffer_verified(bh);
728 782
729 if (dx_get_limit(entries) != dx_node_limit (dir)) { 783 if (dx_get_limit(entries) != dx_node_limit (dir)) {
730 ext4_warning(dir->i_sb, 784 ext4_warning(dir->i_sb,
@@ -783,7 +837,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
783{ 837{
784 struct dx_frame *p; 838 struct dx_frame *p;
785 struct buffer_head *bh; 839 struct buffer_head *bh;
786 int err, num_frames = 0; 840 int num_frames = 0;
787 __u32 bhash; 841 __u32 bhash;
788 842
789 p = frame; 843 p = frame;
@@ -822,25 +876,9 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
822 * block so no check is necessary 876 * block so no check is necessary
823 */ 877 */
824 while (num_frames--) { 878 while (num_frames--) {
825 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 879 bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
826 0, &err))) { 880 if (IS_ERR(bh))
827 if (!err) { 881 return PTR_ERR(bh);
828 ext4_error(dir->i_sb,
829 "Directory hole detected on inode %lu\n",
830 dir->i_ino);
831 return -EIO;
832 }
833 return err; /* Failure */
834 }
835
836 if (!buffer_verified(bh) &&
837 !ext4_dx_csum_verify(dir,
838 (struct ext4_dir_entry *)bh->b_data)) {
839 ext4_warning(dir->i_sb, "Node failed checksum");
840 return -EIO;
841 }
842 set_buffer_verified(bh);
843
844 p++; 882 p++;
845 brelse(p->bh); 883 brelse(p->bh);
846 p->bh = bh; 884 p->bh = bh;
@@ -866,20 +904,9 @@ static int htree_dirblock_to_tree(struct file *dir_file,
866 904
867 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", 905 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
868 (unsigned long)block)); 906 (unsigned long)block));
869 if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) { 907 bh = ext4_read_dirblock(dir, block, DIRENT);
870 if (!err) { 908 if (IS_ERR(bh))
871 err = -EIO; 909 return PTR_ERR(bh);
872 ext4_error(dir->i_sb,
873 "Directory hole detected on inode %lu\n",
874 dir->i_ino);
875 }
876 return err;
877 }
878
879 if (!buffer_verified(bh) &&
880 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
881 return -EIO;
882 set_buffer_verified(bh);
883 910
884 de = (struct ext4_dir_entry_2 *) bh->b_data; 911 de = (struct ext4_dir_entry_2 *) bh->b_data;
885 top = (struct ext4_dir_entry_2 *) ((char *) de + 912 top = (struct ext4_dir_entry_2 *) ((char *) de +
@@ -937,7 +964,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
937 964
938 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 965 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
939 start_hash, start_minor_hash)); 966 start_hash, start_minor_hash));
940 dir = dir_file->f_path.dentry->d_inode; 967 dir = file_inode(dir_file);
941 if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { 968 if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
942 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; 969 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
943 if (hinfo.hash_version <= DX_HASH_TEA) 970 if (hinfo.hash_version <= DX_HASH_TEA)
@@ -1333,26 +1360,11 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1333 return NULL; 1360 return NULL;
1334 do { 1361 do {
1335 block = dx_get_block(frame->at); 1362 block = dx_get_block(frame->at);
1336 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) { 1363 bh = ext4_read_dirblock(dir, block, DIRENT);
1337 if (!(*err)) { 1364 if (IS_ERR(bh)) {
1338 *err = -EIO; 1365 *err = PTR_ERR(bh);
1339 ext4_error(dir->i_sb,
1340 "Directory hole detected on inode %lu\n",
1341 dir->i_ino);
1342 }
1343 goto errout;
1344 }
1345
1346 if (!buffer_verified(bh) &&
1347 !ext4_dirent_csum_verify(dir,
1348 (struct ext4_dir_entry *)bh->b_data)) {
1349 EXT4_ERROR_INODE(dir, "checksumming directory "
1350 "block %lu", (unsigned long)block);
1351 brelse(bh);
1352 *err = -EIO;
1353 goto errout; 1366 goto errout;
1354 } 1367 }
1355 set_buffer_verified(bh);
1356 retval = search_dirblock(bh, dir, d_name, 1368 retval = search_dirblock(bh, dir, d_name,
1357 block << EXT4_BLOCK_SIZE_BITS(sb), 1369 block << EXT4_BLOCK_SIZE_BITS(sb),
1358 res_dir); 1370 res_dir);
@@ -1536,11 +1548,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1536 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1548 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1537 csum_size = sizeof(struct ext4_dir_entry_tail); 1549 csum_size = sizeof(struct ext4_dir_entry_tail);
1538 1550
1539 bh2 = ext4_append (handle, dir, &newblock, &err); 1551 bh2 = ext4_append(handle, dir, &newblock);
1540 if (!(bh2)) { 1552 if (IS_ERR(bh2)) {
1541 brelse(*bh); 1553 brelse(*bh);
1542 *bh = NULL; 1554 *bh = NULL;
1543 goto errout; 1555 *error = PTR_ERR(bh2);
1556 return NULL;
1544 } 1557 }
1545 1558
1546 BUFFER_TRACE(*bh, "get_write_access"); 1559 BUFFER_TRACE(*bh, "get_write_access");
@@ -1621,7 +1634,6 @@ journal_error:
1621 brelse(bh2); 1634 brelse(bh2);
1622 *bh = NULL; 1635 *bh = NULL;
1623 ext4_std_error(dir->i_sb, err); 1636 ext4_std_error(dir->i_sb, err);
1624errout:
1625 *error = err; 1637 *error = err;
1626 return NULL; 1638 return NULL;
1627} 1639}
@@ -1699,7 +1711,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1699 const char *name = dentry->d_name.name; 1711 const char *name = dentry->d_name.name;
1700 int namelen = dentry->d_name.len; 1712 int namelen = dentry->d_name.len;
1701 unsigned int blocksize = dir->i_sb->s_blocksize; 1713 unsigned int blocksize = dir->i_sb->s_blocksize;
1702 unsigned short reclen;
1703 int csum_size = 0; 1714 int csum_size = 0;
1704 int err; 1715 int err;
1705 1716
@@ -1707,7 +1718,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1707 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 1718 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1708 csum_size = sizeof(struct ext4_dir_entry_tail); 1719 csum_size = sizeof(struct ext4_dir_entry_tail);
1709 1720
1710 reclen = EXT4_DIR_REC_LEN(namelen);
1711 if (!de) { 1721 if (!de) {
1712 err = ext4_find_dest_de(dir, inode, 1722 err = ext4_find_dest_de(dir, inode,
1713 bh, bh->b_data, blocksize - csum_size, 1723 bh, bh->b_data, blocksize - csum_size,
@@ -1798,10 +1808,10 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1798 len = ((char *) root) + (blocksize - csum_size) - (char *) de; 1808 len = ((char *) root) + (blocksize - csum_size) - (char *) de;
1799 1809
1800 /* Allocate new block for the 0th block's dirents */ 1810 /* Allocate new block for the 0th block's dirents */
1801 bh2 = ext4_append(handle, dir, &block, &retval); 1811 bh2 = ext4_append(handle, dir, &block);
1802 if (!(bh2)) { 1812 if (IS_ERR(bh2)) {
1803 brelse(bh); 1813 brelse(bh);
1804 return retval; 1814 return PTR_ERR(bh2);
1805 } 1815 }
1806 ext4_set_inode_flag(dir, EXT4_INODE_INDEX); 1816 ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
1807 data1 = bh2->b_data; 1817 data1 = bh2->b_data;
@@ -1918,20 +1928,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1918 } 1928 }
1919 blocks = dir->i_size >> sb->s_blocksize_bits; 1929 blocks = dir->i_size >> sb->s_blocksize_bits;
1920 for (block = 0; block < blocks; block++) { 1930 for (block = 0; block < blocks; block++) {
1921 if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) { 1931 bh = ext4_read_dirblock(dir, block, DIRENT);
1922 if (!retval) { 1932 if (IS_ERR(bh))
1923 retval = -EIO; 1933 return PTR_ERR(bh);
1924 ext4_error(inode->i_sb, 1934
1925 "Directory hole detected on inode %lu\n",
1926 inode->i_ino);
1927 }
1928 return retval;
1929 }
1930 if (!buffer_verified(bh) &&
1931 !ext4_dirent_csum_verify(dir,
1932 (struct ext4_dir_entry *)bh->b_data))
1933 return -EIO;
1934 set_buffer_verified(bh);
1935 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1935 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1936 if (retval != -ENOSPC) { 1936 if (retval != -ENOSPC) {
1937 brelse(bh); 1937 brelse(bh);
@@ -1943,9 +1943,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1943 return make_indexed_dir(handle, dentry, inode, bh); 1943 return make_indexed_dir(handle, dentry, inode, bh);
1944 brelse(bh); 1944 brelse(bh);
1945 } 1945 }
1946 bh = ext4_append(handle, dir, &block, &retval); 1946 bh = ext4_append(handle, dir, &block);
1947 if (!bh) 1947 if (IS_ERR(bh))
1948 return retval; 1948 return PTR_ERR(bh);
1949 de = (struct ext4_dir_entry_2 *) bh->b_data; 1949 de = (struct ext4_dir_entry_2 *) bh->b_data;
1950 de->inode = 0; 1950 de->inode = 0;
1951 de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); 1951 de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
@@ -1982,22 +1982,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1982 return err; 1982 return err;
1983 entries = frame->entries; 1983 entries = frame->entries;
1984 at = frame->at; 1984 at = frame->at;
1985 1985 bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
1986 if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) { 1986 if (IS_ERR(bh)) {
1987 if (!err) { 1987 err = PTR_ERR(bh);
1988 err = -EIO; 1988 bh = NULL;
1989 ext4_error(dir->i_sb,
1990 "Directory hole detected on inode %lu\n",
1991 dir->i_ino);
1992 }
1993 goto cleanup; 1989 goto cleanup;
1994 } 1990 }
1995 1991
1996 if (!buffer_verified(bh) &&
1997 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
1998 goto journal_error;
1999 set_buffer_verified(bh);
2000
2001 BUFFER_TRACE(bh, "get_write_access"); 1992 BUFFER_TRACE(bh, "get_write_access");
2002 err = ext4_journal_get_write_access(handle, bh); 1993 err = ext4_journal_get_write_access(handle, bh);
2003 if (err) 1994 if (err)
@@ -2025,9 +2016,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
2025 err = -ENOSPC; 2016 err = -ENOSPC;
2026 goto cleanup; 2017 goto cleanup;
2027 } 2018 }
2028 bh2 = ext4_append (handle, dir, &newblock, &err); 2019 bh2 = ext4_append(handle, dir, &newblock);
2029 if (!(bh2)) 2020 if (IS_ERR(bh2)) {
2021 err = PTR_ERR(bh2);
2030 goto cleanup; 2022 goto cleanup;
2023 }
2031 node2 = (struct dx_node *)(bh2->b_data); 2024 node2 = (struct dx_node *)(bh2->b_data);
2032 entries2 = node2->entries; 2025 entries2 = node2->entries;
2033 memset(&node2->fake, 0, sizeof(struct fake_dirent)); 2026 memset(&node2->fake, 0, sizeof(struct fake_dirent));
@@ -2106,8 +2099,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
2106journal_error: 2099journal_error:
2107 ext4_std_error(dir->i_sb, err); 2100 ext4_std_error(dir->i_sb, err);
2108cleanup: 2101cleanup:
2109 if (bh) 2102 brelse(bh);
2110 brelse(bh);
2111 dx_release(frames); 2103 dx_release(frames);
2112 return err; 2104 return err;
2113} 2105}
@@ -2254,29 +2246,28 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2254{ 2246{
2255 handle_t *handle; 2247 handle_t *handle;
2256 struct inode *inode; 2248 struct inode *inode;
2257 int err, retries = 0; 2249 int err, credits, retries = 0;
2258 2250
2259 dquot_initialize(dir); 2251 dquot_initialize(dir);
2260 2252
2253 credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2254 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2255 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2261retry: 2256retry:
2262 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2263 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2258 NULL, EXT4_HT_DIR, credits);
2264 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 2259 handle = ext4_journal_current_handle();
2265 if (IS_ERR(handle))
2266 return PTR_ERR(handle);
2267
2268 if (IS_DIRSYNC(dir))
2269 ext4_handle_sync(handle);
2270
2271 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
2272 err = PTR_ERR(inode); 2260 err = PTR_ERR(inode);
2273 if (!IS_ERR(inode)) { 2261 if (!IS_ERR(inode)) {
2274 inode->i_op = &ext4_file_inode_operations; 2262 inode->i_op = &ext4_file_inode_operations;
2275 inode->i_fop = &ext4_file_operations; 2263 inode->i_fop = &ext4_file_operations;
2276 ext4_set_aops(inode); 2264 ext4_set_aops(inode);
2277 err = ext4_add_nondir(handle, dentry, inode); 2265 err = ext4_add_nondir(handle, dentry, inode);
2266 if (!err && IS_DIRSYNC(dir))
2267 ext4_handle_sync(handle);
2278 } 2268 }
2279 ext4_journal_stop(handle); 2269 if (handle)
2270 ext4_journal_stop(handle);
2280 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2271 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2281 goto retry; 2272 goto retry;
2282 return err; 2273 return err;
@@ -2287,31 +2278,30 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
2287{ 2278{
2288 handle_t *handle; 2279 handle_t *handle;
2289 struct inode *inode; 2280 struct inode *inode;
2290 int err, retries = 0; 2281 int err, credits, retries = 0;
2291 2282
2292 if (!new_valid_dev(rdev)) 2283 if (!new_valid_dev(rdev))
2293 return -EINVAL; 2284 return -EINVAL;
2294 2285
2295 dquot_initialize(dir); 2286 dquot_initialize(dir);
2296 2287
2288 credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2289 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2290 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2297retry: 2291retry:
2298 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2292 inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
2299 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2293 NULL, EXT4_HT_DIR, credits);
2300 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 2294 handle = ext4_journal_current_handle();
2301 if (IS_ERR(handle))
2302 return PTR_ERR(handle);
2303
2304 if (IS_DIRSYNC(dir))
2305 ext4_handle_sync(handle);
2306
2307 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
2308 err = PTR_ERR(inode); 2295 err = PTR_ERR(inode);
2309 if (!IS_ERR(inode)) { 2296 if (!IS_ERR(inode)) {
2310 init_special_inode(inode, inode->i_mode, rdev); 2297 init_special_inode(inode, inode->i_mode, rdev);
2311 inode->i_op = &ext4_special_inode_operations; 2298 inode->i_op = &ext4_special_inode_operations;
2312 err = ext4_add_nondir(handle, dentry, inode); 2299 err = ext4_add_nondir(handle, dentry, inode);
2300 if (!err && IS_DIRSYNC(dir))
2301 ext4_handle_sync(handle);
2313 } 2302 }
2314 ext4_journal_stop(handle); 2303 if (handle)
2304 ext4_journal_stop(handle);
2315 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2305 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2316 goto retry; 2306 goto retry;
2317 return err; 2307 return err;
@@ -2351,6 +2341,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2351 struct buffer_head *dir_block = NULL; 2341 struct buffer_head *dir_block = NULL;
2352 struct ext4_dir_entry_2 *de; 2342 struct ext4_dir_entry_2 *de;
2353 struct ext4_dir_entry_tail *t; 2343 struct ext4_dir_entry_tail *t;
2344 ext4_lblk_t block = 0;
2354 unsigned int blocksize = dir->i_sb->s_blocksize; 2345 unsigned int blocksize = dir->i_sb->s_blocksize;
2355 int csum_size = 0; 2346 int csum_size = 0;
2356 int err; 2347 int err;
@@ -2367,17 +2358,10 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2367 goto out; 2358 goto out;
2368 } 2359 }
2369 2360
2370 inode->i_size = EXT4_I(inode)->i_disksize = blocksize; 2361 inode->i_size = 0;
2371 dir_block = ext4_bread(handle, inode, 0, 1, &err); 2362 dir_block = ext4_append(handle, inode, &block);
2372 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { 2363 if (IS_ERR(dir_block))
2373 if (!err) { 2364 return PTR_ERR(dir_block);
2374 err = -EIO;
2375 ext4_error(inode->i_sb,
2376 "Directory hole detected on inode %lu\n",
2377 inode->i_ino);
2378 }
2379 goto out;
2380 }
2381 BUFFER_TRACE(dir_block, "get_write_access"); 2365 BUFFER_TRACE(dir_block, "get_write_access");
2382 err = ext4_journal_get_write_access(handle, dir_block); 2366 err = ext4_journal_get_write_access(handle, dir_block);
2383 if (err) 2367 if (err)
@@ -2404,25 +2388,21 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2404{ 2388{
2405 handle_t *handle; 2389 handle_t *handle;
2406 struct inode *inode; 2390 struct inode *inode;
2407 int err, retries = 0; 2391 int err, credits, retries = 0;
2408 2392
2409 if (EXT4_DIR_LINK_MAX(dir)) 2393 if (EXT4_DIR_LINK_MAX(dir))
2410 return -EMLINK; 2394 return -EMLINK;
2411 2395
2412 dquot_initialize(dir); 2396 dquot_initialize(dir);
2413 2397
2398 credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2399 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2400 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2414retry: 2401retry:
2415 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2402 inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
2416 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 2403 &dentry->d_name,
2417 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); 2404 0, NULL, EXT4_HT_DIR, credits);
2418 if (IS_ERR(handle)) 2405 handle = ext4_journal_current_handle();
2419 return PTR_ERR(handle);
2420
2421 if (IS_DIRSYNC(dir))
2422 ext4_handle_sync(handle);
2423
2424 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
2425 &dentry->d_name, 0, NULL);
2426 err = PTR_ERR(inode); 2406 err = PTR_ERR(inode);
2427 if (IS_ERR(inode)) 2407 if (IS_ERR(inode))
2428 goto out_stop; 2408 goto out_stop;
@@ -2450,8 +2430,12 @@ out_clear_inode:
2450 goto out_clear_inode; 2430 goto out_clear_inode;
2451 unlock_new_inode(inode); 2431 unlock_new_inode(inode);
2452 d_instantiate(dentry, inode); 2432 d_instantiate(dentry, inode);
2433 if (IS_DIRSYNC(dir))
2434 ext4_handle_sync(handle);
2435
2453out_stop: 2436out_stop:
2454 ext4_journal_stop(handle); 2437 if (handle)
2438 ext4_journal_stop(handle);
2455 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2439 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2456 goto retry; 2440 goto retry;
2457 return err; 2441 return err;
@@ -2477,25 +2461,14 @@ static int empty_dir(struct inode *inode)
2477 } 2461 }
2478 2462
2479 sb = inode->i_sb; 2463 sb = inode->i_sb;
2480 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 2464 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
2481 !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { 2465 EXT4_ERROR_INODE(inode, "invalid size");
2482 if (err)
2483 EXT4_ERROR_INODE(inode,
2484 "error %d reading directory lblock 0", err);
2485 else
2486 ext4_warning(inode->i_sb,
2487 "bad directory (dir #%lu) - no data block",
2488 inode->i_ino);
2489 return 1; 2466 return 1;
2490 } 2467 }
2491 if (!buffer_verified(bh) && 2468 bh = ext4_read_dirblock(inode, 0, EITHER);
2492 !ext4_dirent_csum_verify(inode, 2469 if (IS_ERR(bh))
2493 (struct ext4_dir_entry *)bh->b_data)) { 2470 return 1;
2494 EXT4_ERROR_INODE(inode, "checksum error reading directory " 2471
2495 "lblock 0");
2496 return -EIO;
2497 }
2498 set_buffer_verified(bh);
2499 de = (struct ext4_dir_entry_2 *) bh->b_data; 2472 de = (struct ext4_dir_entry_2 *) bh->b_data;
2500 de1 = ext4_next_entry(de, sb->s_blocksize); 2473 de1 = ext4_next_entry(de, sb->s_blocksize);
2501 if (le32_to_cpu(de->inode) != inode->i_ino || 2474 if (le32_to_cpu(de->inode) != inode->i_ino ||
@@ -2518,28 +2491,9 @@ static int empty_dir(struct inode *inode)
2518 err = 0; 2491 err = 0;
2519 brelse(bh); 2492 brelse(bh);
2520 lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); 2493 lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
2521 bh = ext4_bread(NULL, inode, lblock, 0, &err); 2494 bh = ext4_read_dirblock(inode, lblock, EITHER);
2522 if (!bh) { 2495 if (IS_ERR(bh))
2523 if (err) 2496 return 1;
2524 EXT4_ERROR_INODE(inode,
2525 "error %d reading directory "
2526 "lblock %u", err, lblock);
2527 else
2528 ext4_warning(inode->i_sb,
2529 "bad directory (dir #%lu) - no data block",
2530 inode->i_ino);
2531
2532 offset += sb->s_blocksize;
2533 continue;
2534 }
2535 if (!buffer_verified(bh) &&
2536 !ext4_dirent_csum_verify(inode,
2537 (struct ext4_dir_entry *)bh->b_data)) {
2538 EXT4_ERROR_INODE(inode, "checksum error "
2539 "reading directory lblock 0");
2540 return -EIO;
2541 }
2542 set_buffer_verified(bh);
2543 de = (struct ext4_dir_entry_2 *) bh->b_data; 2497 de = (struct ext4_dir_entry_2 *) bh->b_data;
2544 } 2498 }
2545 if (ext4_check_dir_entry(inode, NULL, de, bh, 2499 if (ext4_check_dir_entry(inode, NULL, de, bh,
@@ -2718,25 +2672,18 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2718 struct inode *inode; 2672 struct inode *inode;
2719 struct buffer_head *bh; 2673 struct buffer_head *bh;
2720 struct ext4_dir_entry_2 *de; 2674 struct ext4_dir_entry_2 *de;
2721 handle_t *handle; 2675 handle_t *handle = NULL;
2722 2676
2723 /* Initialize quotas before so that eventual writes go in 2677 /* Initialize quotas before so that eventual writes go in
2724 * separate transaction */ 2678 * separate transaction */
2725 dquot_initialize(dir); 2679 dquot_initialize(dir);
2726 dquot_initialize(dentry->d_inode); 2680 dquot_initialize(dentry->d_inode);
2727 2681
2728 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2729 if (IS_ERR(handle))
2730 return PTR_ERR(handle);
2731
2732 retval = -ENOENT; 2682 retval = -ENOENT;
2733 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2683 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2734 if (!bh) 2684 if (!bh)
2735 goto end_rmdir; 2685 goto end_rmdir;
2736 2686
2737 if (IS_DIRSYNC(dir))
2738 ext4_handle_sync(handle);
2739
2740 inode = dentry->d_inode; 2687 inode = dentry->d_inode;
2741 2688
2742 retval = -EIO; 2689 retval = -EIO;
@@ -2747,6 +2694,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2747 if (!empty_dir(inode)) 2694 if (!empty_dir(inode))
2748 goto end_rmdir; 2695 goto end_rmdir;
2749 2696
2697 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2698 EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2699 if (IS_ERR(handle)) {
2700 retval = PTR_ERR(handle);
2701 handle = NULL;
2702 goto end_rmdir;
2703 }
2704
2705 if (IS_DIRSYNC(dir))
2706 ext4_handle_sync(handle);
2707
2750 retval = ext4_delete_entry(handle, dir, de, bh); 2708 retval = ext4_delete_entry(handle, dir, de, bh);
2751 if (retval) 2709 if (retval)
2752 goto end_rmdir; 2710 goto end_rmdir;
@@ -2768,8 +2726,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2768 ext4_mark_inode_dirty(handle, dir); 2726 ext4_mark_inode_dirty(handle, dir);
2769 2727
2770end_rmdir: 2728end_rmdir:
2771 ext4_journal_stop(handle);
2772 brelse(bh); 2729 brelse(bh);
2730 if (handle)
2731 ext4_journal_stop(handle);
2773 return retval; 2732 return retval;
2774} 2733}
2775 2734
@@ -2779,7 +2738,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2779 struct inode *inode; 2738 struct inode *inode;
2780 struct buffer_head *bh; 2739 struct buffer_head *bh;
2781 struct ext4_dir_entry_2 *de; 2740 struct ext4_dir_entry_2 *de;
2782 handle_t *handle; 2741 handle_t *handle = NULL;
2783 2742
2784 trace_ext4_unlink_enter(dir, dentry); 2743 trace_ext4_unlink_enter(dir, dentry);
2785 /* Initialize quotas before so that eventual writes go 2744 /* Initialize quotas before so that eventual writes go
@@ -2787,13 +2746,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2787 dquot_initialize(dir); 2746 dquot_initialize(dir);
2788 dquot_initialize(dentry->d_inode); 2747 dquot_initialize(dentry->d_inode);
2789 2748
2790 handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2791 if (IS_ERR(handle))
2792 return PTR_ERR(handle);
2793
2794 if (IS_DIRSYNC(dir))
2795 ext4_handle_sync(handle);
2796
2797 retval = -ENOENT; 2749 retval = -ENOENT;
2798 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2750 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2799 if (!bh) 2751 if (!bh)
@@ -2805,6 +2757,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2805 if (le32_to_cpu(de->inode) != inode->i_ino) 2757 if (le32_to_cpu(de->inode) != inode->i_ino)
2806 goto end_unlink; 2758 goto end_unlink;
2807 2759
2760 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2761 EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
2762 if (IS_ERR(handle)) {
2763 retval = PTR_ERR(handle);
2764 handle = NULL;
2765 goto end_unlink;
2766 }
2767
2768 if (IS_DIRSYNC(dir))
2769 ext4_handle_sync(handle);
2770
2808 if (!inode->i_nlink) { 2771 if (!inode->i_nlink) {
2809 ext4_warning(inode->i_sb, 2772 ext4_warning(inode->i_sb,
2810 "Deleting nonexistent file (%lu), %d", 2773 "Deleting nonexistent file (%lu), %d",
@@ -2825,8 +2788,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2825 retval = 0; 2788 retval = 0;
2826 2789
2827end_unlink: 2790end_unlink:
2828 ext4_journal_stop(handle);
2829 brelse(bh); 2791 brelse(bh);
2792 if (handle)
2793 ext4_journal_stop(handle);
2830 trace_ext4_unlink_exit(dentry, retval); 2794 trace_ext4_unlink_exit(dentry, retval);
2831 return retval; 2795 return retval;
2832} 2796}
@@ -2866,15 +2830,10 @@ static int ext4_symlink(struct inode *dir,
2866 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); 2830 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2867 } 2831 }
2868retry: 2832retry:
2869 handle = ext4_journal_start(dir, credits); 2833 inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
2870 if (IS_ERR(handle)) 2834 &dentry->d_name, 0, NULL,
2871 return PTR_ERR(handle); 2835 EXT4_HT_DIR, credits);
2872 2836 handle = ext4_journal_current_handle();
2873 if (IS_DIRSYNC(dir))
2874 ext4_handle_sync(handle);
2875
2876 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2877 &dentry->d_name, 0, NULL);
2878 err = PTR_ERR(inode); 2837 err = PTR_ERR(inode);
2879 if (IS_ERR(inode)) 2838 if (IS_ERR(inode))
2880 goto out_stop; 2839 goto out_stop;
@@ -2904,7 +2863,7 @@ retry:
2904 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS 2863 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
2905 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified 2864 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2906 */ 2865 */
2907 handle = ext4_journal_start(dir, 2866 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2908 EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2867 EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2909 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); 2868 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2910 if (IS_ERR(handle)) { 2869 if (IS_ERR(handle)) {
@@ -2927,8 +2886,12 @@ retry:
2927 } 2886 }
2928 EXT4_I(inode)->i_disksize = inode->i_size; 2887 EXT4_I(inode)->i_disksize = inode->i_size;
2929 err = ext4_add_nondir(handle, dentry, inode); 2888 err = ext4_add_nondir(handle, dentry, inode);
2889 if (!err && IS_DIRSYNC(dir))
2890 ext4_handle_sync(handle);
2891
2930out_stop: 2892out_stop:
2931 ext4_journal_stop(handle); 2893 if (handle)
2894 ext4_journal_stop(handle);
2932 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 2895 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2933 goto retry; 2896 goto retry;
2934 return err; 2897 return err;
@@ -2951,8 +2914,9 @@ static int ext4_link(struct dentry *old_dentry,
2951 dquot_initialize(dir); 2914 dquot_initialize(dir);
2952 2915
2953retry: 2916retry:
2954 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2917 handle = ext4_journal_start(dir, EXT4_HT_DIR,
2955 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2918 (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2919 EXT4_INDEX_EXTRA_TRANS_BLOCKS));
2956 if (IS_ERR(handle)) 2920 if (IS_ERR(handle))
2957 return PTR_ERR(handle); 2921 return PTR_ERR(handle);
2958 2922
@@ -2992,13 +2956,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
2992 struct buffer_head *bh; 2956 struct buffer_head *bh;
2993 2957
2994 if (!ext4_has_inline_data(inode)) { 2958 if (!ext4_has_inline_data(inode)) {
2995 if (!(bh = ext4_bread(handle, inode, 0, 0, retval))) { 2959 bh = ext4_read_dirblock(inode, 0, EITHER);
2996 if (!*retval) { 2960 if (IS_ERR(bh)) {
2997 *retval = -EIO; 2961 *retval = PTR_ERR(bh);
2998 ext4_error(inode->i_sb,
2999 "Directory hole detected on inode %lu\n",
3000 inode->i_ino);
3001 }
3002 return NULL; 2962 return NULL;
3003 } 2963 }
3004 *parent_de = ext4_next_entry( 2964 *parent_de = ext4_next_entry(
@@ -3035,9 +2995,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3035 * in separate transaction */ 2995 * in separate transaction */
3036 if (new_dentry->d_inode) 2996 if (new_dentry->d_inode)
3037 dquot_initialize(new_dentry->d_inode); 2997 dquot_initialize(new_dentry->d_inode);
3038 handle = ext4_journal_start(old_dir, 2 * 2998 handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
3039 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + 2999 (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
3040 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); 3000 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3041 if (IS_ERR(handle)) 3001 if (IS_ERR(handle))
3042 return PTR_ERR(handle); 3002 return PTR_ERR(handle);
3043 3003
@@ -3077,11 +3037,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3077 &inlined); 3037 &inlined);
3078 if (!dir_bh) 3038 if (!dir_bh)
3079 goto end_rename; 3039 goto end_rename;
3080 if (!inlined && !buffer_verified(dir_bh) &&
3081 !ext4_dirent_csum_verify(old_inode,
3082 (struct ext4_dir_entry *)dir_bh->b_data))
3083 goto end_rename;
3084 set_buffer_verified(dir_bh);
3085 if (le32_to_cpu(parent_de->inode) != old_dir->i_ino) 3040 if (le32_to_cpu(parent_de->inode) != old_dir->i_ino)
3086 goto end_rename; 3041 goto end_rename;
3087 retval = -EMLINK; 3042 retval = -EMLINK;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 0016fbca2a40..809b31003ecc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -23,6 +23,7 @@
23#include <linux/workqueue.h> 23#include <linux/workqueue.h>
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/mm.h>
26 27
27#include "ext4_jbd2.h" 28#include "ext4_jbd2.h"
28#include "xattr.h" 29#include "xattr.h"
@@ -73,8 +74,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
73 BUG_ON(!list_empty(&io->list)); 74 BUG_ON(!list_empty(&io->list));
74 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 75 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
75 76
76 if (io->page)
77 put_page(io->page);
78 for (i = 0; i < io->num_io_pages; i++) 77 for (i = 0; i < io->num_io_pages; i++)
79 put_io_page(io->pages[i]); 78 put_io_page(io->pages[i]);
80 io->num_io_pages = 0; 79 io->num_io_pages = 0;
@@ -103,14 +102,13 @@ static int ext4_end_io(ext4_io_end_t *io)
103 "(inode %lu, offset %llu, size %zd, error %d)", 102 "(inode %lu, offset %llu, size %zd, error %d)",
104 inode->i_ino, offset, size, ret); 103 inode->i_ino, offset, size, ret);
105 } 104 }
106 if (io->iocb)
107 aio_complete(io->iocb, io->result, 0);
108
109 if (io->flag & EXT4_IO_END_DIRECT)
110 inode_dio_done(inode);
111 /* Wake up anyone waiting on unwritten extent conversion */ 105 /* Wake up anyone waiting on unwritten extent conversion */
112 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 106 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
113 wake_up_all(ext4_ioend_wq(inode)); 107 wake_up_all(ext4_ioend_wq(inode));
108 if (io->flag & EXT4_IO_END_DIRECT)
109 inode_dio_done(inode);
110 if (io->iocb)
111 aio_complete(io->iocb, io->result, 0);
114 return ret; 112 return ret;
115} 113}
116 114
@@ -119,7 +117,6 @@ static void dump_completed_IO(struct inode *inode)
119#ifdef EXT4FS_DEBUG 117#ifdef EXT4FS_DEBUG
120 struct list_head *cur, *before, *after; 118 struct list_head *cur, *before, *after;
121 ext4_io_end_t *io, *io0, *io1; 119 ext4_io_end_t *io, *io0, *io1;
122 unsigned long flags;
123 120
124 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { 121 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
125 ext4_debug("inode %lu completed_io list is empty\n", 122 ext4_debug("inode %lu completed_io list is empty\n",
@@ -152,26 +149,20 @@ void ext4_add_complete_io(ext4_io_end_t *io_end)
152 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; 149 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
153 150
154 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 151 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
155 if (list_empty(&ei->i_completed_io_list)) { 152 if (list_empty(&ei->i_completed_io_list))
156 io_end->flag |= EXT4_IO_END_QUEUED; 153 queue_work(wq, &ei->i_unwritten_work);
157 queue_work(wq, &io_end->work);
158 }
159 list_add_tail(&io_end->list, &ei->i_completed_io_list); 154 list_add_tail(&io_end->list, &ei->i_completed_io_list);
160 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 155 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
161} 156}
162 157
163static int ext4_do_flush_completed_IO(struct inode *inode, 158static int ext4_do_flush_completed_IO(struct inode *inode)
164 ext4_io_end_t *work_io)
165{ 159{
166 ext4_io_end_t *io; 160 ext4_io_end_t *io;
167 struct list_head unwritten, complete, to_free; 161 struct list_head unwritten;
168 unsigned long flags; 162 unsigned long flags;
169 struct ext4_inode_info *ei = EXT4_I(inode); 163 struct ext4_inode_info *ei = EXT4_I(inode);
170 int err, ret = 0; 164 int err, ret = 0;
171 165
172 INIT_LIST_HEAD(&complete);
173 INIT_LIST_HEAD(&to_free);
174
175 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 166 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
176 dump_completed_IO(inode); 167 dump_completed_IO(inode);
177 list_replace_init(&ei->i_completed_io_list, &unwritten); 168 list_replace_init(&ei->i_completed_io_list, &unwritten);
@@ -185,32 +176,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
185 err = ext4_end_io(io); 176 err = ext4_end_io(io);
186 if (unlikely(!ret && err)) 177 if (unlikely(!ret && err))
187 ret = err; 178 ret = err;
188
189 list_add_tail(&io->list, &complete);
190 }
191 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
192 while (!list_empty(&complete)) {
193 io = list_entry(complete.next, ext4_io_end_t, list);
194 io->flag &= ~EXT4_IO_END_UNWRITTEN; 179 io->flag &= ~EXT4_IO_END_UNWRITTEN;
195 /* end_io context can not be destroyed now because it still
196 * used by queued worker. Worker thread will destroy it later */
197 if (io->flag & EXT4_IO_END_QUEUED)
198 list_del_init(&io->list);
199 else
200 list_move(&io->list, &to_free);
201 }
202 /* If we are called from worker context, it is time to clear queued
203 * flag, and destroy it's end_io if it was converted already */
204 if (work_io) {
205 work_io->flag &= ~EXT4_IO_END_QUEUED;
206 if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
207 list_add_tail(&work_io->list, &to_free);
208 }
209 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
210
211 while (!list_empty(&to_free)) {
212 io = list_entry(to_free.next, ext4_io_end_t, list);
213 list_del_init(&io->list);
214 ext4_free_io_end(io); 180 ext4_free_io_end(io);
215 } 181 }
216 return ret; 182 return ret;
@@ -219,10 +185,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
219/* 185/*
220 * work on completed aio dio IO, to convert unwritten extents to extents 186 * work on completed aio dio IO, to convert unwritten extents to extents
221 */ 187 */
222static void ext4_end_io_work(struct work_struct *work) 188void ext4_end_io_work(struct work_struct *work)
223{ 189{
224 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 190 struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
225 ext4_do_flush_completed_IO(io->inode, io); 191 i_unwritten_work);
192 ext4_do_flush_completed_IO(&ei->vfs_inode);
226} 193}
227 194
228int ext4_flush_unwritten_io(struct inode *inode) 195int ext4_flush_unwritten_io(struct inode *inode)
@@ -230,7 +197,7 @@ int ext4_flush_unwritten_io(struct inode *inode)
230 int ret; 197 int ret;
231 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && 198 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
232 !(inode->i_state & I_FREEING)); 199 !(inode->i_state & I_FREEING));
233 ret = ext4_do_flush_completed_IO(inode, NULL); 200 ret = ext4_do_flush_completed_IO(inode);
234 ext4_unwritten_wait(inode); 201 ext4_unwritten_wait(inode);
235 return ret; 202 return ret;
236} 203}
@@ -241,7 +208,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
241 if (io) { 208 if (io) {
242 atomic_inc(&EXT4_I(inode)->i_ioend_count); 209 atomic_inc(&EXT4_I(inode)->i_ioend_count);
243 io->inode = inode; 210 io->inode = inode;
244 INIT_WORK(&io->work, ext4_end_io_work);
245 INIT_LIST_HEAD(&io->list); 211 INIT_LIST_HEAD(&io->list);
246 } 212 }
247 return io; 213 return io;
@@ -382,14 +348,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
382 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); 348 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
383 } 349 }
384 350
385 if (!buffer_mapped(bh) || buffer_delay(bh)) {
386 if (!buffer_mapped(bh))
387 clear_buffer_dirty(bh);
388 if (io->io_bio)
389 ext4_io_submit(io);
390 return 0;
391 }
392
393 if (io->io_bio && bh->b_blocknr != io->io_next_block) { 351 if (io->io_bio && bh->b_blocknr != io->io_next_block) {
394submit_and_retry: 352submit_and_retry:
395 ext4_io_submit(io); 353 ext4_io_submit(io);
@@ -436,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
436 394
437 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); 395 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
438 if (!io_page) { 396 if (!io_page) {
439 set_page_dirty(page); 397 redirty_page_for_writepage(wbc, page);
440 unlock_page(page); 398 unlock_page(page);
441 return -ENOMEM; 399 return -ENOMEM;
442 } 400 }
@@ -468,7 +426,15 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
468 set_buffer_uptodate(bh); 426 set_buffer_uptodate(bh);
469 continue; 427 continue;
470 } 428 }
471 clear_buffer_dirty(bh); 429 if (!buffer_dirty(bh) || buffer_delay(bh) ||
430 !buffer_mapped(bh) || buffer_unwritten(bh)) {
431 /* A hole? We can safely clear the dirty bit */
432 if (!buffer_mapped(bh))
433 clear_buffer_dirty(bh);
434 if (io->io_bio)
435 ext4_io_submit(io);
436 continue;
437 }
472 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 438 ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
473 if (ret) { 439 if (ret) {
474 /* 440 /*
@@ -476,9 +442,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
476 * we can do but mark the page as dirty, and 442 * we can do but mark the page as dirty, and
477 * better luck next time. 443 * better luck next time.
478 */ 444 */
479 set_page_dirty(page); 445 redirty_page_for_writepage(wbc, page);
480 break; 446 break;
481 } 447 }
448 clear_buffer_dirty(bh);
482 } 449 }
483 unlock_page(page); 450 unlock_page(page);
484 /* 451 /*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d99387b89edd..c7f4d7584669 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -333,8 +333,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
333 int err; 333 int err;
334 334
335 bh = sb_getblk(sb, blk); 335 bh = sb_getblk(sb, blk);
336 if (!bh) 336 if (unlikely(!bh))
337 return ERR_PTR(-EIO); 337 return ERR_PTR(-ENOMEM);
338 if ((err = ext4_journal_get_write_access(handle, bh))) { 338 if ((err = ext4_journal_get_write_access(handle, bh))) {
339 brelse(bh); 339 brelse(bh);
340 bh = ERR_PTR(err); 340 bh = ERR_PTR(err);
@@ -410,8 +410,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
410 return err; 410 return err;
411 411
412 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); 412 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
413 if (!bh) 413 if (unlikely(!bh))
414 return -EIO; 414 return -ENOMEM;
415 415
416 err = ext4_journal_get_write_access(handle, bh); 416 err = ext4_journal_get_write_access(handle, bh);
417 if (err) 417 if (err)
@@ -466,7 +466,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
466 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); 466 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
467 467
468 /* This transaction may be extended/restarted along the way */ 468 /* This transaction may be extended/restarted along the way */
469 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 469 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
470 if (IS_ERR(handle)) 470 if (IS_ERR(handle))
471 return PTR_ERR(handle); 471 return PTR_ERR(handle);
472 472
@@ -500,8 +500,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
500 goto out; 500 goto out;
501 501
502 gdb = sb_getblk(sb, block); 502 gdb = sb_getblk(sb, block);
503 if (!gdb) { 503 if (unlikely(!gdb)) {
504 err = -EIO; 504 err = -ENOMEM;
505 goto out; 505 goto out;
506 } 506 }
507 507
@@ -1031,7 +1031,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1031 handle_t *handle; 1031 handle_t *handle;
1032 int err = 0, err2; 1032 int err = 0, err2;
1033 1033
1034 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 1034 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA);
1035 if (IS_ERR(handle)) { 1035 if (IS_ERR(handle)) {
1036 group = 1; 1036 group = 1;
1037 err = PTR_ERR(handle); 1037 err = PTR_ERR(handle);
@@ -1064,8 +1064,8 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1064 ext4_bg_has_super(sb, group)); 1064 ext4_bg_has_super(sb, group));
1065 1065
1066 bh = sb_getblk(sb, backup_block); 1066 bh = sb_getblk(sb, backup_block);
1067 if (!bh) { 1067 if (unlikely(!bh)) {
1068 err = -EIO; 1068 err = -ENOMEM;
1069 break; 1069 break;
1070 } 1070 }
1071 ext4_debug("update metadata backup %llu(+%llu)\n", 1071 ext4_debug("update metadata backup %llu(+%llu)\n",
@@ -1168,7 +1168,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1168static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) 1168static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
1169{ 1169{
1170 struct buffer_head *bh = sb_getblk(sb, block); 1170 struct buffer_head *bh = sb_getblk(sb, block);
1171 if (!bh) 1171 if (unlikely(!bh))
1172 return NULL; 1172 return NULL;
1173 if (!bh_uptodate_or_lock(bh)) { 1173 if (!bh_uptodate_or_lock(bh)) {
1174 if (bh_submit_read(bh) < 0) { 1174 if (bh_submit_read(bh) < 0) {
@@ -1412,7 +1412,7 @@ static int ext4_flex_group_add(struct super_block *sb,
1412 * modify each of the reserved GDT dindirect blocks. 1412 * modify each of the reserved GDT dindirect blocks.
1413 */ 1413 */
1414 credit = flex_gd->count * 4 + reserved_gdb; 1414 credit = flex_gd->count * 4 + reserved_gdb;
1415 handle = ext4_journal_start_sb(sb, credit); 1415 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit);
1416 if (IS_ERR(handle)) { 1416 if (IS_ERR(handle)) {
1417 err = PTR_ERR(handle); 1417 err = PTR_ERR(handle);
1418 goto exit; 1418 goto exit;
@@ -1506,10 +1506,12 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
1506 group_data[i].blocks_count = blocks_per_group; 1506 group_data[i].blocks_count = blocks_per_group;
1507 overhead = ext4_group_overhead_blocks(sb, group + i); 1507 overhead = ext4_group_overhead_blocks(sb, group + i);
1508 group_data[i].free_blocks_count = blocks_per_group - overhead; 1508 group_data[i].free_blocks_count = blocks_per_group - overhead;
1509 if (ext4_has_group_desc_csum(sb)) 1509 if (ext4_has_group_desc_csum(sb)) {
1510 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | 1510 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
1511 EXT4_BG_INODE_UNINIT; 1511 EXT4_BG_INODE_UNINIT;
1512 else 1512 if (!test_opt(sb, INIT_INODE_TABLE))
1513 flex_gd->bg_flags[i] |= EXT4_BG_INODE_ZEROED;
1514 } else
1513 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; 1515 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
1514 } 1516 }
1515 1517
@@ -1594,7 +1596,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
1594 1596
1595 err = ext4_alloc_flex_bg_array(sb, input->group + 1); 1597 err = ext4_alloc_flex_bg_array(sb, input->group + 1);
1596 if (err) 1598 if (err)
1597 return err; 1599 goto out;
1598 1600
1599 err = ext4_mb_alloc_groupinfo(sb, input->group + 1); 1601 err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
1600 if (err) 1602 if (err)
@@ -1622,7 +1624,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
1622 /* We will update the superblock, one block bitmap, and 1624 /* We will update the superblock, one block bitmap, and
1623 * one group descriptor via ext4_group_add_blocks(). 1625 * one group descriptor via ext4_group_add_blocks().
1624 */ 1626 */
1625 handle = ext4_journal_start_sb(sb, 3); 1627 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, 3);
1626 if (IS_ERR(handle)) { 1628 if (IS_ERR(handle)) {
1627 err = PTR_ERR(handle); 1629 err = PTR_ERR(handle);
1628 ext4_warning(sb, "error %d on journal start", err); 1630 ext4_warning(sb, "error %d on journal start", err);
@@ -1786,7 +1788,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
1786 credits += 3; /* block bitmap, bg descriptor, resize inode */ 1788 credits += 3; /* block bitmap, bg descriptor, resize inode */
1787 } 1789 }
1788 1790
1789 handle = ext4_journal_start_sb(sb, credits); 1791 handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credits);
1790 if (IS_ERR(handle)) 1792 if (IS_ERR(handle))
1791 return PTR_ERR(handle); 1793 return PTR_ERR(handle);
1792 1794
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3d4fb81bacd5..620cf5615ba2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -69,8 +69,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
69static void ext4_clear_journal_err(struct super_block *sb, 69static void ext4_clear_journal_err(struct super_block *sb,
70 struct ext4_super_block *es); 70 struct ext4_super_block *es);
71static int ext4_sync_fs(struct super_block *sb, int wait); 71static int ext4_sync_fs(struct super_block *sb, int wait);
72static const char *ext4_decode_error(struct super_block *sb, int errno,
73 char nbuf[16]);
74static int ext4_remount(struct super_block *sb, int *flags, char *data); 72static int ext4_remount(struct super_block *sb, int *flags, char *data);
75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 73static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
76static int ext4_unfreeze(struct super_block *sb); 74static int ext4_unfreeze(struct super_block *sb);
@@ -296,107 +294,6 @@ void ext4_itable_unused_set(struct super_block *sb,
296} 294}
297 295
298 296
299/* Just increment the non-pointer handle value */
300static handle_t *ext4_get_nojournal(void)
301{
302 handle_t *handle = current->journal_info;
303 unsigned long ref_cnt = (unsigned long)handle;
304
305 BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT);
306
307 ref_cnt++;
308 handle = (handle_t *)ref_cnt;
309
310 current->journal_info = handle;
311 return handle;
312}
313
314
315/* Decrement the non-pointer handle value */
316static void ext4_put_nojournal(handle_t *handle)
317{
318 unsigned long ref_cnt = (unsigned long)handle;
319
320 BUG_ON(ref_cnt == 0);
321
322 ref_cnt--;
323 handle = (handle_t *)ref_cnt;
324
325 current->journal_info = handle;
326}
327
328/*
329 * Wrappers for jbd2_journal_start/end.
330 */
331handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
332{
333 journal_t *journal;
334
335 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
336 if (sb->s_flags & MS_RDONLY)
337 return ERR_PTR(-EROFS);
338
339 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
340 journal = EXT4_SB(sb)->s_journal;
341 if (!journal)
342 return ext4_get_nojournal();
343 /*
344 * Special case here: if the journal has aborted behind our
345 * backs (eg. EIO in the commit thread), then we still need to
346 * take the FS itself readonly cleanly.
347 */
348 if (is_journal_aborted(journal)) {
349 ext4_abort(sb, "Detected aborted journal");
350 return ERR_PTR(-EROFS);
351 }
352 return jbd2_journal_start(journal, nblocks);
353}
354
355int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
356{
357 struct super_block *sb;
358 int err;
359 int rc;
360
361 if (!ext4_handle_valid(handle)) {
362 ext4_put_nojournal(handle);
363 return 0;
364 }
365 sb = handle->h_transaction->t_journal->j_private;
366 err = handle->h_err;
367 rc = jbd2_journal_stop(handle);
368
369 if (!err)
370 err = rc;
371 if (err)
372 __ext4_std_error(sb, where, line, err);
373 return err;
374}
375
376void ext4_journal_abort_handle(const char *caller, unsigned int line,
377 const char *err_fn, struct buffer_head *bh,
378 handle_t *handle, int err)
379{
380 char nbuf[16];
381 const char *errstr = ext4_decode_error(NULL, err, nbuf);
382
383 BUG_ON(!ext4_handle_valid(handle));
384
385 if (bh)
386 BUFFER_TRACE(bh, "abort");
387
388 if (!handle->h_err)
389 handle->h_err = err;
390
391 if (is_handle_aborted(handle))
392 return;
393
394 printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
395 caller, line, errstr, err_fn);
396
397 jbd2_journal_abort_handle(handle);
398}
399
400static void __save_error_info(struct super_block *sb, const char *func, 297static void __save_error_info(struct super_block *sb, const char *func,
401 unsigned int line) 298 unsigned int line)
402{ 299{
@@ -553,7 +450,7 @@ void ext4_error_file(struct file *file, const char *function,
553 va_list args; 450 va_list args;
554 struct va_format vaf; 451 struct va_format vaf;
555 struct ext4_super_block *es; 452 struct ext4_super_block *es;
556 struct inode *inode = file->f_dentry->d_inode; 453 struct inode *inode = file_inode(file);
557 char pathname[80], *path; 454 char pathname[80], *path;
558 455
559 es = EXT4_SB(inode->i_sb)->s_es; 456 es = EXT4_SB(inode->i_sb)->s_es;
@@ -582,8 +479,8 @@ void ext4_error_file(struct file *file, const char *function,
582 ext4_handle_error(inode->i_sb); 479 ext4_handle_error(inode->i_sb);
583} 480}
584 481
585static const char *ext4_decode_error(struct super_block *sb, int errno, 482const char *ext4_decode_error(struct super_block *sb, int errno,
586 char nbuf[16]) 483 char nbuf[16])
587{ 484{
588 char *errstr = NULL; 485 char *errstr = NULL;
589 486
@@ -858,6 +755,7 @@ static void ext4_put_super(struct super_block *sb)
858 ext4_abort(sb, "Couldn't clean up the journal"); 755 ext4_abort(sb, "Couldn't clean up the journal");
859 } 756 }
860 757
758 ext4_es_unregister_shrinker(sb);
861 del_timer(&sbi->s_err_report); 759 del_timer(&sbi->s_err_report);
862 ext4_release_system_zone(sb); 760 ext4_release_system_zone(sb);
863 ext4_mb_release(sb); 761 ext4_mb_release(sb);
@@ -939,11 +837,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
939 return NULL; 837 return NULL;
940 838
941 ei->vfs_inode.i_version = 1; 839 ei->vfs_inode.i_version = 1;
942 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
943 INIT_LIST_HEAD(&ei->i_prealloc_list); 840 INIT_LIST_HEAD(&ei->i_prealloc_list);
944 spin_lock_init(&ei->i_prealloc_lock); 841 spin_lock_init(&ei->i_prealloc_lock);
945 ext4_es_init_tree(&ei->i_es_tree); 842 ext4_es_init_tree(&ei->i_es_tree);
946 rwlock_init(&ei->i_es_lock); 843 rwlock_init(&ei->i_es_lock);
844 INIT_LIST_HEAD(&ei->i_es_lru);
845 ei->i_es_lru_nr = 0;
947 ei->i_reserved_data_blocks = 0; 846 ei->i_reserved_data_blocks = 0;
948 ei->i_reserved_meta_blocks = 0; 847 ei->i_reserved_meta_blocks = 0;
949 ei->i_allocated_meta_blocks = 0; 848 ei->i_allocated_meta_blocks = 0;
@@ -960,6 +859,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
960 ei->i_datasync_tid = 0; 859 ei->i_datasync_tid = 0;
961 atomic_set(&ei->i_ioend_count, 0); 860 atomic_set(&ei->i_ioend_count, 0);
962 atomic_set(&ei->i_unwritten, 0); 861 atomic_set(&ei->i_unwritten, 0);
862 INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work);
963 863
964 return &ei->vfs_inode; 864 return &ei->vfs_inode;
965} 865}
@@ -1031,6 +931,7 @@ void ext4_clear_inode(struct inode *inode)
1031 dquot_drop(inode); 931 dquot_drop(inode);
1032 ext4_discard_preallocations(inode); 932 ext4_discard_preallocations(inode);
1033 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 933 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
934 ext4_es_lru_del(inode);
1034 if (EXT4_I(inode)->jinode) { 935 if (EXT4_I(inode)->jinode) {
1035 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), 936 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1036 EXT4_I(inode)->jinode); 937 EXT4_I(inode)->jinode);
@@ -1280,8 +1181,8 @@ static const match_table_t tokens = {
1280 {Opt_stripe, "stripe=%u"}, 1181 {Opt_stripe, "stripe=%u"},
1281 {Opt_delalloc, "delalloc"}, 1182 {Opt_delalloc, "delalloc"},
1282 {Opt_nodelalloc, "nodelalloc"}, 1183 {Opt_nodelalloc, "nodelalloc"},
1283 {Opt_mblk_io_submit, "mblk_io_submit"}, 1184 {Opt_removed, "mblk_io_submit"},
1284 {Opt_nomblk_io_submit, "nomblk_io_submit"}, 1185 {Opt_removed, "nomblk_io_submit"},
1285 {Opt_block_validity, "block_validity"}, 1186 {Opt_block_validity, "block_validity"},
1286 {Opt_noblock_validity, "noblock_validity"}, 1187 {Opt_noblock_validity, "noblock_validity"},
1287 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1188 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1337,6 +1238,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1337{ 1238{
1338 struct ext4_sb_info *sbi = EXT4_SB(sb); 1239 struct ext4_sb_info *sbi = EXT4_SB(sb);
1339 char *qname; 1240 char *qname;
1241 int ret = -1;
1340 1242
1341 if (sb_any_quota_loaded(sb) && 1243 if (sb_any_quota_loaded(sb) &&
1342 !sbi->s_qf_names[qtype]) { 1244 !sbi->s_qf_names[qtype]) {
@@ -1351,23 +1253,26 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
1351 "Not enough memory for storing quotafile name"); 1253 "Not enough memory for storing quotafile name");
1352 return -1; 1254 return -1;
1353 } 1255 }
1354 if (sbi->s_qf_names[qtype] && 1256 if (sbi->s_qf_names[qtype]) {
1355 strcmp(sbi->s_qf_names[qtype], qname)) { 1257 if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
1356 ext4_msg(sb, KERN_ERR, 1258 ret = 1;
1357 "%s quota file already specified", QTYPE2NAME(qtype)); 1259 else
1358 kfree(qname); 1260 ext4_msg(sb, KERN_ERR,
1359 return -1; 1261 "%s quota file already specified",
1262 QTYPE2NAME(qtype));
1263 goto errout;
1360 } 1264 }
1361 sbi->s_qf_names[qtype] = qname; 1265 if (strchr(qname, '/')) {
1362 if (strchr(sbi->s_qf_names[qtype], '/')) {
1363 ext4_msg(sb, KERN_ERR, 1266 ext4_msg(sb, KERN_ERR,
1364 "quotafile must be on filesystem root"); 1267 "quotafile must be on filesystem root");
1365 kfree(sbi->s_qf_names[qtype]); 1268 goto errout;
1366 sbi->s_qf_names[qtype] = NULL;
1367 return -1;
1368 } 1269 }
1270 sbi->s_qf_names[qtype] = qname;
1369 set_opt(sb, QUOTA); 1271 set_opt(sb, QUOTA);
1370 return 1; 1272 return 1;
1273errout:
1274 kfree(qname);
1275 return ret;
1371} 1276}
1372 1277
1373static int clear_qf_name(struct super_block *sb, int qtype) 1278static int clear_qf_name(struct super_block *sb, int qtype)
@@ -1381,10 +1286,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1381 " when quota turned on"); 1286 " when quota turned on");
1382 return -1; 1287 return -1;
1383 } 1288 }
1384 /* 1289 kfree(sbi->s_qf_names[qtype]);
1385 * The space will be released later when all options are confirmed
1386 * to be correct
1387 */
1388 sbi->s_qf_names[qtype] = NULL; 1290 sbi->s_qf_names[qtype] = NULL;
1389 return 1; 1291 return 1;
1390} 1292}
@@ -1404,6 +1306,9 @@ static int clear_qf_name(struct super_block *sb, int qtype)
1404#define MOPT_QFMT MOPT_NOSUPPORT 1306#define MOPT_QFMT MOPT_NOSUPPORT
1405#endif 1307#endif
1406#define MOPT_DATAJ 0x0080 1308#define MOPT_DATAJ 0x0080
1309#define MOPT_NO_EXT2 0x0100
1310#define MOPT_NO_EXT3 0x0200
1311#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1407 1312
1408static const struct mount_opts { 1313static const struct mount_opts {
1409 int token; 1314 int token;
@@ -1414,25 +1319,31 @@ static const struct mount_opts {
1414 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, 1319 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1415 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, 1320 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1416 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, 1321 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1417 {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
1418 {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
1419 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, 1322 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1420 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, 1323 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1421 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET}, 1324 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1422 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR}, 1325 MOPT_EXT4_ONLY | MOPT_SET},
1326 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1327 MOPT_EXT4_ONLY | MOPT_CLEAR},
1423 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, 1328 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1424 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, 1329 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1425 {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT}, 1330 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1426 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT}, 1331 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1427 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET}, 1332 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1333 MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
1334 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1335 MOPT_EXT4_ONLY | MOPT_SET},
1428 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1336 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1429 EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET}, 1337 EXT4_MOUNT_JOURNAL_CHECKSUM),
1430 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET}, 1338 MOPT_EXT4_ONLY | MOPT_SET},
1339 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1431 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, 1340 {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
1432 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, 1341 {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
1433 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, 1342 {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
1434 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET}, 1343 {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
1435 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR}, 1344 MOPT_NO_EXT2 | MOPT_SET},
1345 {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
1346 MOPT_NO_EXT2 | MOPT_CLEAR},
1436 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, 1347 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1437 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, 1348 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1438 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, 1349 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
@@ -1444,9 +1355,14 @@ static const struct mount_opts {
1444 {Opt_inode_readahead_blks, 0, MOPT_GTE0}, 1355 {Opt_inode_readahead_blks, 0, MOPT_GTE0},
1445 {Opt_init_itable, 0, MOPT_GTE0}, 1356 {Opt_init_itable, 0, MOPT_GTE0},
1446 {Opt_stripe, 0, MOPT_GTE0}, 1357 {Opt_stripe, 0, MOPT_GTE0},
1447 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ}, 1358 {Opt_resuid, 0, MOPT_GTE0},
1448 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ}, 1359 {Opt_resgid, 0, MOPT_GTE0},
1449 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ}, 1360 {Opt_journal_dev, 0, MOPT_GTE0},
1361 {Opt_journal_ioprio, 0, MOPT_GTE0},
1362 {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1363 {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
1364 {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
1365 MOPT_NO_EXT2 | MOPT_DATAJ},
1450 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, 1366 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1451 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, 1367 {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
1452#ifdef CONFIG_EXT4_FS_POSIX_ACL 1368#ifdef CONFIG_EXT4_FS_POSIX_ACL
@@ -1496,8 +1412,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1496 else if (token == Opt_offgrpjquota) 1412 else if (token == Opt_offgrpjquota)
1497 return clear_qf_name(sb, GRPQUOTA); 1413 return clear_qf_name(sb, GRPQUOTA);
1498#endif 1414#endif
1499 if (args->from && match_int(args, &arg))
1500 return -1;
1501 switch (token) { 1415 switch (token) {
1502 case Opt_noacl: 1416 case Opt_noacl:
1503 case Opt_nouser_xattr: 1417 case Opt_nouser_xattr:
@@ -1506,138 +1420,149 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1506 case Opt_sb: 1420 case Opt_sb:
1507 return 1; /* handled by get_sb_block() */ 1421 return 1; /* handled by get_sb_block() */
1508 case Opt_removed: 1422 case Opt_removed:
1509 ext4_msg(sb, KERN_WARNING, 1423 ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
1510 "Ignoring removed %s option", opt); 1424 return 1;
1425 case Opt_abort:
1426 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1427 return 1;
1428 case Opt_i_version:
1429 sb->s_flags |= MS_I_VERSION;
1511 return 1; 1430 return 1;
1512 case Opt_resuid: 1431 }
1432
1433 for (m = ext4_mount_opts; m->token != Opt_err; m++)
1434 if (token == m->token)
1435 break;
1436
1437 if (m->token == Opt_err) {
1438 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
1439 "or missing value", opt);
1440 return -1;
1441 }
1442
1443 if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
1444 ext4_msg(sb, KERN_ERR,
1445 "Mount option \"%s\" incompatible with ext2", opt);
1446 return -1;
1447 }
1448 if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
1449 ext4_msg(sb, KERN_ERR,
1450 "Mount option \"%s\" incompatible with ext3", opt);
1451 return -1;
1452 }
1453
1454 if (args->from && match_int(args, &arg))
1455 return -1;
1456 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1457 return -1;
1458 if (m->flags & MOPT_EXPLICIT)
1459 set_opt2(sb, EXPLICIT_DELALLOC);
1460 if (m->flags & MOPT_CLEAR_ERR)
1461 clear_opt(sb, ERRORS_MASK);
1462 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1463 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1464 "options when quota turned on");
1465 return -1;
1466 }
1467
1468 if (m->flags & MOPT_NOSUPPORT) {
1469 ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
1470 } else if (token == Opt_commit) {
1471 if (arg == 0)
1472 arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
1473 sbi->s_commit_interval = HZ * arg;
1474 } else if (token == Opt_max_batch_time) {
1475 if (arg == 0)
1476 arg = EXT4_DEF_MAX_BATCH_TIME;
1477 sbi->s_max_batch_time = arg;
1478 } else if (token == Opt_min_batch_time) {
1479 sbi->s_min_batch_time = arg;
1480 } else if (token == Opt_inode_readahead_blks) {
1481 if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
1482 ext4_msg(sb, KERN_ERR,
1483 "EXT4-fs: inode_readahead_blks must be "
1484 "0 or a power of 2 smaller than 2^31");
1485 return -1;
1486 }
1487 sbi->s_inode_readahead_blks = arg;
1488 } else if (token == Opt_init_itable) {
1489 set_opt(sb, INIT_INODE_TABLE);
1490 if (!args->from)
1491 arg = EXT4_DEF_LI_WAIT_MULT;
1492 sbi->s_li_wait_mult = arg;
1493 } else if (token == Opt_max_dir_size_kb) {
1494 sbi->s_max_dir_size_kb = arg;
1495 } else if (token == Opt_stripe) {
1496 sbi->s_stripe = arg;
1497 } else if (token == Opt_resuid) {
1513 uid = make_kuid(current_user_ns(), arg); 1498 uid = make_kuid(current_user_ns(), arg);
1514 if (!uid_valid(uid)) { 1499 if (!uid_valid(uid)) {
1515 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); 1500 ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
1516 return -1; 1501 return -1;
1517 } 1502 }
1518 sbi->s_resuid = uid; 1503 sbi->s_resuid = uid;
1519 return 1; 1504 } else if (token == Opt_resgid) {
1520 case Opt_resgid:
1521 gid = make_kgid(current_user_ns(), arg); 1505 gid = make_kgid(current_user_ns(), arg);
1522 if (!gid_valid(gid)) { 1506 if (!gid_valid(gid)) {
1523 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); 1507 ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
1524 return -1; 1508 return -1;
1525 } 1509 }
1526 sbi->s_resgid = gid; 1510 sbi->s_resgid = gid;
1527 return 1; 1511 } else if (token == Opt_journal_dev) {
1528 case Opt_abort:
1529 sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
1530 return 1;
1531 case Opt_i_version:
1532 sb->s_flags |= MS_I_VERSION;
1533 return 1;
1534 case Opt_journal_dev:
1535 if (is_remount) { 1512 if (is_remount) {
1536 ext4_msg(sb, KERN_ERR, 1513 ext4_msg(sb, KERN_ERR,
1537 "Cannot specify journal on remount"); 1514 "Cannot specify journal on remount");
1538 return -1; 1515 return -1;
1539 } 1516 }
1540 *journal_devnum = arg; 1517 *journal_devnum = arg;
1541 return 1; 1518 } else if (token == Opt_journal_ioprio) {
1542 case Opt_journal_ioprio: 1519 if (arg > 7) {
1543 if (arg < 0 || arg > 7) 1520 ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
1544 return -1; 1521 " (must be 0-7)");
1545 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1546 return 1;
1547 }
1548
1549 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
1550 if (token != m->token)
1551 continue;
1552 if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
1553 return -1;
1554 if (m->flags & MOPT_EXPLICIT)
1555 set_opt2(sb, EXPLICIT_DELALLOC);
1556 if (m->flags & MOPT_CLEAR_ERR)
1557 clear_opt(sb, ERRORS_MASK);
1558 if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
1559 ext4_msg(sb, KERN_ERR, "Cannot change quota "
1560 "options when quota turned on");
1561 return -1; 1522 return -1;
1562 } 1523 }
1563 1524 *journal_ioprio =
1564 if (m->flags & MOPT_NOSUPPORT) { 1525 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1565 ext4_msg(sb, KERN_ERR, "%s option not supported", opt); 1526 } else if (m->flags & MOPT_DATAJ) {
1566 } else if (token == Opt_commit) { 1527 if (is_remount) {
1567 if (arg == 0) 1528 if (!sbi->s_journal)
1568 arg = JBD2_DEFAULT_MAX_COMMIT_AGE; 1529 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1569 sbi->s_commit_interval = HZ * arg; 1530 else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
1570 } else if (token == Opt_max_batch_time) {
1571 if (arg == 0)
1572 arg = EXT4_DEF_MAX_BATCH_TIME;
1573 sbi->s_max_batch_time = arg;
1574 } else if (token == Opt_min_batch_time) {
1575 sbi->s_min_batch_time = arg;
1576 } else if (token == Opt_inode_readahead_blks) {
1577 if (arg > (1 << 30))
1578 return -1;
1579 if (arg && !is_power_of_2(arg)) {
1580 ext4_msg(sb, KERN_ERR, 1531 ext4_msg(sb, KERN_ERR,
1581 "EXT4-fs: inode_readahead_blks"
1582 " must be a power of 2");
1583 return -1;
1584 }
1585 sbi->s_inode_readahead_blks = arg;
1586 } else if (token == Opt_init_itable) {
1587 set_opt(sb, INIT_INODE_TABLE);
1588 if (!args->from)
1589 arg = EXT4_DEF_LI_WAIT_MULT;
1590 sbi->s_li_wait_mult = arg;
1591 } else if (token == Opt_max_dir_size_kb) {
1592 sbi->s_max_dir_size_kb = arg;
1593 } else if (token == Opt_stripe) {
1594 sbi->s_stripe = arg;
1595 } else if (m->flags & MOPT_DATAJ) {
1596 if (is_remount) {
1597 if (!sbi->s_journal)
1598 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1599 else if (test_opt(sb, DATA_FLAGS) !=
1600 m->mount_opt) {
1601 ext4_msg(sb, KERN_ERR,
1602 "Cannot change data mode on remount"); 1532 "Cannot change data mode on remount");
1603 return -1;
1604 }
1605 } else {
1606 clear_opt(sb, DATA_FLAGS);
1607 sbi->s_mount_opt |= m->mount_opt;
1608 }
1609#ifdef CONFIG_QUOTA
1610 } else if (m->flags & MOPT_QFMT) {
1611 if (sb_any_quota_loaded(sb) &&
1612 sbi->s_jquota_fmt != m->mount_opt) {
1613 ext4_msg(sb, KERN_ERR, "Cannot "
1614 "change journaled quota options "
1615 "when quota turned on");
1616 return -1; 1533 return -1;
1617 } 1534 }
1618 sbi->s_jquota_fmt = m->mount_opt;
1619#endif
1620 } else { 1535 } else {
1621 if (!args->from) 1536 clear_opt(sb, DATA_FLAGS);
1622 arg = 1; 1537 sbi->s_mount_opt |= m->mount_opt;
1623 if (m->flags & MOPT_CLEAR)
1624 arg = !arg;
1625 else if (unlikely(!(m->flags & MOPT_SET))) {
1626 ext4_msg(sb, KERN_WARNING,
1627 "buggy handling of option %s", opt);
1628 WARN_ON(1);
1629 return -1;
1630 }
1631 if (arg != 0)
1632 sbi->s_mount_opt |= m->mount_opt;
1633 else
1634 sbi->s_mount_opt &= ~m->mount_opt;
1635 } 1538 }
1636 return 1; 1539#ifdef CONFIG_QUOTA
1540 } else if (m->flags & MOPT_QFMT) {
1541 if (sb_any_quota_loaded(sb) &&
1542 sbi->s_jquota_fmt != m->mount_opt) {
1543 ext4_msg(sb, KERN_ERR, "Cannot change journaled "
1544 "quota options when quota turned on");
1545 return -1;
1546 }
1547 sbi->s_jquota_fmt = m->mount_opt;
1548#endif
1549 } else {
1550 if (!args->from)
1551 arg = 1;
1552 if (m->flags & MOPT_CLEAR)
1553 arg = !arg;
1554 else if (unlikely(!(m->flags & MOPT_SET))) {
1555 ext4_msg(sb, KERN_WARNING,
1556 "buggy handling of option %s", opt);
1557 WARN_ON(1);
1558 return -1;
1559 }
1560 if (arg != 0)
1561 sbi->s_mount_opt |= m->mount_opt;
1562 else
1563 sbi->s_mount_opt &= ~m->mount_opt;
1637 } 1564 }
1638 ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " 1565 return 1;
1639 "or missing value", opt);
1640 return -1;
1641} 1566}
1642 1567
1643static int parse_options(char *options, struct super_block *sb, 1568static int parse_options(char *options, struct super_block *sb,
@@ -2776,7 +2701,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2776 break; 2701 break;
2777 } 2702 }
2778 2703
2779 if (group == ngroups) 2704 if (group >= ngroups)
2780 ret = 1; 2705 ret = 1;
2781 2706
2782 if (!ret) { 2707 if (!ret) {
@@ -3016,33 +2941,34 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3016 return elr; 2941 return elr;
3017} 2942}
3018 2943
3019static int ext4_register_li_request(struct super_block *sb, 2944int ext4_register_li_request(struct super_block *sb,
3020 ext4_group_t first_not_zeroed) 2945 ext4_group_t first_not_zeroed)
3021{ 2946{
3022 struct ext4_sb_info *sbi = EXT4_SB(sb); 2947 struct ext4_sb_info *sbi = EXT4_SB(sb);
3023 struct ext4_li_request *elr; 2948 struct ext4_li_request *elr = NULL;
3024 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2949 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3025 int ret = 0; 2950 int ret = 0;
3026 2951
2952 mutex_lock(&ext4_li_mtx);
3027 if (sbi->s_li_request != NULL) { 2953 if (sbi->s_li_request != NULL) {
3028 /* 2954 /*
3029 * Reset timeout so it can be computed again, because 2955 * Reset timeout so it can be computed again, because
3030 * s_li_wait_mult might have changed. 2956 * s_li_wait_mult might have changed.
3031 */ 2957 */
3032 sbi->s_li_request->lr_timeout = 0; 2958 sbi->s_li_request->lr_timeout = 0;
3033 return 0; 2959 goto out;
3034 } 2960 }
3035 2961
3036 if (first_not_zeroed == ngroups || 2962 if (first_not_zeroed == ngroups ||
3037 (sb->s_flags & MS_RDONLY) || 2963 (sb->s_flags & MS_RDONLY) ||
3038 !test_opt(sb, INIT_INODE_TABLE)) 2964 !test_opt(sb, INIT_INODE_TABLE))
3039 return 0; 2965 goto out;
3040 2966
3041 elr = ext4_li_request_new(sb, first_not_zeroed); 2967 elr = ext4_li_request_new(sb, first_not_zeroed);
3042 if (!elr) 2968 if (!elr) {
3043 return -ENOMEM; 2969 ret = -ENOMEM;
3044 2970 goto out;
3045 mutex_lock(&ext4_li_mtx); 2971 }
3046 2972
3047 if (NULL == ext4_li_info) { 2973 if (NULL == ext4_li_info) {
3048 ret = ext4_li_info_new(); 2974 ret = ext4_li_info_new();
@@ -3379,7 +3305,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3379#ifdef CONFIG_EXT4_FS_POSIX_ACL 3305#ifdef CONFIG_EXT4_FS_POSIX_ACL
3380 set_opt(sb, POSIX_ACL); 3306 set_opt(sb, POSIX_ACL);
3381#endif 3307#endif
3382 set_opt(sb, MBLK_IO_SUBMIT);
3383 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3308 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3384 set_opt(sb, JOURNAL_DATA); 3309 set_opt(sb, JOURNAL_DATA);
3385 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3310 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3772,6 +3697,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3772 sbi->s_max_writeback_mb_bump = 128; 3697 sbi->s_max_writeback_mb_bump = 128;
3773 sbi->s_extent_max_zeroout_kb = 32; 3698 sbi->s_extent_max_zeroout_kb = 32;
3774 3699
3700 /* Register extent status tree shrinker */
3701 ext4_es_register_shrinker(sb);
3702
3775 /* 3703 /*
3776 * set up enough so that it can read an inode 3704 * set up enough so that it can read an inode
3777 */ 3705 */
@@ -4008,7 +3936,7 @@ no_journal:
4008 !(sb->s_flags & MS_RDONLY)) { 3936 !(sb->s_flags & MS_RDONLY)) {
4009 err = ext4_enable_quotas(sb); 3937 err = ext4_enable_quotas(sb);
4010 if (err) 3938 if (err)
4011 goto failed_mount7; 3939 goto failed_mount8;
4012 } 3940 }
4013#endif /* CONFIG_QUOTA */ 3941#endif /* CONFIG_QUOTA */
4014 3942
@@ -4035,6 +3963,10 @@ cantfind_ext4:
4035 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3963 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4036 goto failed_mount; 3964 goto failed_mount;
4037 3965
3966#ifdef CONFIG_QUOTA
3967failed_mount8:
3968 kobject_del(&sbi->s_kobj);
3969#endif
4038failed_mount7: 3970failed_mount7:
4039 ext4_unregister_li_request(sb); 3971 ext4_unregister_li_request(sb);
4040failed_mount6: 3972failed_mount6:
@@ -4476,16 +4408,12 @@ static void ext4_clear_journal_err(struct super_block *sb,
4476int ext4_force_commit(struct super_block *sb) 4408int ext4_force_commit(struct super_block *sb)
4477{ 4409{
4478 journal_t *journal; 4410 journal_t *journal;
4479 int ret = 0;
4480 4411
4481 if (sb->s_flags & MS_RDONLY) 4412 if (sb->s_flags & MS_RDONLY)
4482 return 0; 4413 return 0;
4483 4414
4484 journal = EXT4_SB(sb)->s_journal; 4415 journal = EXT4_SB(sb)->s_journal;
4485 if (journal) 4416 return ext4_journal_force_commit(journal);
4486 ret = ext4_journal_force_commit(journal);
4487
4488 return ret;
4489} 4417}
4490 4418
4491static int ext4_sync_fs(struct super_block *sb, int wait) 4419static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -4588,7 +4516,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4588 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 4516 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
4589 int err = 0; 4517 int err = 0;
4590#ifdef CONFIG_QUOTA 4518#ifdef CONFIG_QUOTA
4591 int i; 4519 int i, j;
4592#endif 4520#endif
4593 char *orig_data = kstrdup(data, GFP_KERNEL); 4521 char *orig_data = kstrdup(data, GFP_KERNEL);
4594 4522
@@ -4604,7 +4532,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4604#ifdef CONFIG_QUOTA 4532#ifdef CONFIG_QUOTA
4605 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4533 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4606 for (i = 0; i < MAXQUOTAS; i++) 4534 for (i = 0; i < MAXQUOTAS; i++)
4607 old_opts.s_qf_names[i] = sbi->s_qf_names[i]; 4535 if (sbi->s_qf_names[i]) {
4536 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4537 GFP_KERNEL);
4538 if (!old_opts.s_qf_names[i]) {
4539 for (j = 0; j < i; j++)
4540 kfree(old_opts.s_qf_names[j]);
4541 return -ENOMEM;
4542 }
4543 } else
4544 old_opts.s_qf_names[i] = NULL;
4608#endif 4545#endif
4609 if (sbi->s_journal && sbi->s_journal->j_task->io_context) 4546 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
4610 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; 4547 journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
@@ -4737,9 +4674,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4737#ifdef CONFIG_QUOTA 4674#ifdef CONFIG_QUOTA
4738 /* Release old quota file names */ 4675 /* Release old quota file names */
4739 for (i = 0; i < MAXQUOTAS; i++) 4676 for (i = 0; i < MAXQUOTAS; i++)
4740 if (old_opts.s_qf_names[i] && 4677 kfree(old_opts.s_qf_names[i]);
4741 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
4742 kfree(old_opts.s_qf_names[i]);
4743 if (enable_quota) { 4678 if (enable_quota) {
4744 if (sb_any_quota_suspended(sb)) 4679 if (sb_any_quota_suspended(sb))
4745 dquot_resume(sb, -1); 4680 dquot_resume(sb, -1);
@@ -4768,9 +4703,7 @@ restore_opts:
4768#ifdef CONFIG_QUOTA 4703#ifdef CONFIG_QUOTA
4769 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 4704 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4770 for (i = 0; i < MAXQUOTAS; i++) { 4705 for (i = 0; i < MAXQUOTAS; i++) {
4771 if (sbi->s_qf_names[i] && 4706 kfree(sbi->s_qf_names[i]);
4772 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
4773 kfree(sbi->s_qf_names[i]);
4774 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4707 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4775 } 4708 }
4776#endif 4709#endif
@@ -4835,7 +4768,7 @@ static int ext4_write_dquot(struct dquot *dquot)
4835 struct inode *inode; 4768 struct inode *inode;
4836 4769
4837 inode = dquot_to_inode(dquot); 4770 inode = dquot_to_inode(dquot);
4838 handle = ext4_journal_start(inode, 4771 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
4839 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 4772 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
4840 if (IS_ERR(handle)) 4773 if (IS_ERR(handle))
4841 return PTR_ERR(handle); 4774 return PTR_ERR(handle);
@@ -4851,7 +4784,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)
4851 int ret, err; 4784 int ret, err;
4852 handle_t *handle; 4785 handle_t *handle;
4853 4786
4854 handle = ext4_journal_start(dquot_to_inode(dquot), 4787 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
4855 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 4788 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
4856 if (IS_ERR(handle)) 4789 if (IS_ERR(handle))
4857 return PTR_ERR(handle); 4790 return PTR_ERR(handle);
@@ -4867,7 +4800,7 @@ static int ext4_release_dquot(struct dquot *dquot)
4867 int ret, err; 4800 int ret, err;
4868 handle_t *handle; 4801 handle_t *handle;
4869 4802
4870 handle = ext4_journal_start(dquot_to_inode(dquot), 4803 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
4871 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 4804 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
4872 if (IS_ERR(handle)) { 4805 if (IS_ERR(handle)) {
4873 /* Release dquot anyway to avoid endless cycle in dqput() */ 4806 /* Release dquot anyway to avoid endless cycle in dqput() */
@@ -4899,7 +4832,7 @@ static int ext4_write_info(struct super_block *sb, int type)
4899 handle_t *handle; 4832 handle_t *handle;
4900 4833
4901 /* Data block + inode block */ 4834 /* Data block + inode block */
4902 handle = ext4_journal_start(sb->s_root->d_inode, 2); 4835 handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
4903 if (IS_ERR(handle)) 4836 if (IS_ERR(handle))
4904 return PTR_ERR(handle); 4837 return PTR_ERR(handle);
4905 ret = dquot_commit_info(sb, type); 4838 ret = dquot_commit_info(sb, type);
@@ -5005,9 +4938,9 @@ static int ext4_enable_quotas(struct super_block *sb)
5005 DQUOT_USAGE_ENABLED); 4938 DQUOT_USAGE_ENABLED);
5006 if (err) { 4939 if (err) {
5007 ext4_warning(sb, 4940 ext4_warning(sb,
5008 "Failed to enable quota (type=%d) " 4941 "Failed to enable quota tracking "
5009 "tracking. Please run e2fsck to fix.", 4942 "(type=%d, err=%d). Please run "
5010 type); 4943 "e2fsck to fix.", type, err);
5011 return err; 4944 return err;
5012 } 4945 }
5013 } 4946 }
@@ -5045,7 +4978,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
5045 4978
5046 /* Update modification times of quota files when userspace can 4979 /* Update modification times of quota files when userspace can
5047 * start looking at them */ 4980 * start looking at them */
5048 handle = ext4_journal_start(inode, 1); 4981 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
5049 if (IS_ERR(handle)) 4982 if (IS_ERR(handle))
5050 goto out; 4983 goto out;
5051 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 4984 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a91ebc2b66f..3a120b277240 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -549,7 +549,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
549 error = ext4_handle_dirty_xattr_block(handle, inode, bh); 549 error = ext4_handle_dirty_xattr_block(handle, inode, bh);
550 if (IS_SYNC(inode)) 550 if (IS_SYNC(inode))
551 ext4_handle_sync(handle); 551 ext4_handle_sync(handle);
552 dquot_free_block(inode, 1); 552 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
553 ea_bdebug(bh, "refcount now=%d; releasing", 553 ea_bdebug(bh, "refcount now=%d; releasing",
554 le32_to_cpu(BHDR(bh)->h_refcount)); 554 le32_to_cpu(BHDR(bh)->h_refcount));
555 } 555 }
@@ -832,7 +832,8 @@ inserted:
832 else { 832 else {
833 /* The old block is released after updating 833 /* The old block is released after updating
834 the inode. */ 834 the inode. */
835 error = dquot_alloc_block(inode, 1); 835 error = dquot_alloc_block(inode,
836 EXT4_C2B(EXT4_SB(sb), 1));
836 if (error) 837 if (error)
837 goto cleanup; 838 goto cleanup;
838 error = ext4_journal_get_write_access(handle, 839 error = ext4_journal_get_write_access(handle,
@@ -886,17 +887,18 @@ inserted:
886 (unsigned long long)block); 887 (unsigned long long)block);
887 888
888 new_bh = sb_getblk(sb, block); 889 new_bh = sb_getblk(sb, block);
889 if (!new_bh) { 890 if (unlikely(!new_bh)) {
891 error = -ENOMEM;
890getblk_failed: 892getblk_failed:
891 ext4_free_blocks(handle, inode, NULL, block, 1, 893 ext4_free_blocks(handle, inode, NULL, block, 1,
892 EXT4_FREE_BLOCKS_METADATA); 894 EXT4_FREE_BLOCKS_METADATA);
893 error = -EIO;
894 goto cleanup; 895 goto cleanup;
895 } 896 }
896 lock_buffer(new_bh); 897 lock_buffer(new_bh);
897 error = ext4_journal_get_create_access(handle, new_bh); 898 error = ext4_journal_get_create_access(handle, new_bh);
898 if (error) { 899 if (error) {
899 unlock_buffer(new_bh); 900 unlock_buffer(new_bh);
901 error = -EIO;
900 goto getblk_failed; 902 goto getblk_failed;
901 } 903 }
902 memcpy(new_bh->b_data, s->base, new_bh->b_size); 904 memcpy(new_bh->b_data, s->base, new_bh->b_size);
@@ -928,7 +930,7 @@ cleanup:
928 return error; 930 return error;
929 931
930cleanup_dquot: 932cleanup_dquot:
931 dquot_free_block(inode, 1); 933 dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
932 goto cleanup; 934 goto cleanup;
933 935
934bad_block: 936bad_block:
@@ -1164,17 +1166,10 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1164{ 1166{
1165 handle_t *handle; 1167 handle_t *handle;
1166 int error, retries = 0; 1168 int error, retries = 0;
1167 int credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); 1169 int credits = ext4_jbd2_credits_xattr(inode);
1168 1170
1169retry: 1171retry:
1170 /* 1172 handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1171 * In case of inline data, we may push out the data to a block,
1172 * So reserve the journal space first.
1173 */
1174 if (ext4_has_inline_data(inode))
1175 credits += ext4_writepage_trans_blocks(inode) + 1;
1176
1177 handle = ext4_journal_start(inode, credits);
1178 if (IS_ERR(handle)) { 1173 if (IS_ERR(handle)) {
1179 error = PTR_ERR(handle); 1174 error = PTR_ERR(handle);
1180 } else { 1175 } else {
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 69eda787a96a..aa25deb5c6cd 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -125,74 +125,6 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
125 struct ext4_xattr_info *i, 125 struct ext4_xattr_info *i,
126 struct ext4_xattr_ibody_find *is); 126 struct ext4_xattr_ibody_find *is);
127 127
128extern int ext4_has_inline_data(struct inode *inode);
129extern int ext4_get_inline_size(struct inode *inode);
130extern int ext4_get_max_inline_size(struct inode *inode);
131extern int ext4_find_inline_data_nolock(struct inode *inode);
132extern void ext4_write_inline_data(struct inode *inode,
133 struct ext4_iloc *iloc,
134 void *buffer, loff_t pos,
135 unsigned int len);
136extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
137 unsigned int len);
138extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
139 unsigned int len);
140extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
141
142extern int ext4_readpage_inline(struct inode *inode, struct page *page);
143extern int ext4_try_to_write_inline_data(struct address_space *mapping,
144 struct inode *inode,
145 loff_t pos, unsigned len,
146 unsigned flags,
147 struct page **pagep);
148extern int ext4_write_inline_data_end(struct inode *inode,
149 loff_t pos, unsigned len,
150 unsigned copied,
151 struct page *page);
152extern struct buffer_head *
153ext4_journalled_write_inline_data(struct inode *inode,
154 unsigned len,
155 struct page *page);
156extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
157 struct inode *inode,
158 loff_t pos, unsigned len,
159 unsigned flags,
160 struct page **pagep,
161 void **fsdata);
162extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
163 unsigned len, unsigned copied,
164 struct page *page);
165extern int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
166 struct inode *inode);
167extern int ext4_try_create_inline_dir(handle_t *handle,
168 struct inode *parent,
169 struct inode *inode);
170extern int ext4_read_inline_dir(struct file *filp,
171 void *dirent, filldir_t filldir,
172 int *has_inline_data);
173extern struct buffer_head *ext4_find_inline_entry(struct inode *dir,
174 const struct qstr *d_name,
175 struct ext4_dir_entry_2 **res_dir,
176 int *has_inline_data);
177extern int ext4_delete_inline_entry(handle_t *handle,
178 struct inode *dir,
179 struct ext4_dir_entry_2 *de_del,
180 struct buffer_head *bh,
181 int *has_inline_data);
182extern int empty_inline_dir(struct inode *dir, int *has_inline_data);
183extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
184 struct ext4_dir_entry_2 **parent_de,
185 int *retval);
186extern int ext4_inline_data_fiemap(struct inode *inode,
187 struct fiemap_extent_info *fieinfo,
188 int *has_inline);
189extern int ext4_try_to_evict_inline_data(handle_t *handle,
190 struct inode *inode,
191 int needed);
192extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
193
194extern int ext4_convert_inline_data(struct inode *inode);
195
196#ifdef CONFIG_EXT4_FS_SECURITY 128#ifdef CONFIG_EXT4_FS_SECURITY
197extern int ext4_init_security(handle_t *handle, struct inode *inode, 129extern int ext4_init_security(handle_t *handle, struct inode *inode,
198 struct inode *dir, const struct qstr *qstr); 130 struct inode *dir, const struct qstr *qstr);
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index e95b94945d5f..137af4255da6 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -191,15 +191,14 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
191 retval = f2fs_getxattr(inode, name_index, "", value, retval); 191 retval = f2fs_getxattr(inode, name_index, "", value, retval);
192 } 192 }
193 193
194 if (retval < 0) { 194 if (retval > 0)
195 if (retval == -ENODATA)
196 acl = NULL;
197 else
198 acl = ERR_PTR(retval);
199 } else {
200 acl = f2fs_acl_from_disk(value, retval); 195 acl = f2fs_acl_from_disk(value, retval);
201 } 196 else if (retval == -ENODATA)
197 acl = NULL;
198 else
199 acl = ERR_PTR(retval);
202 kfree(value); 200 kfree(value);
201
203 if (!IS_ERR(acl)) 202 if (!IS_ERR(acl))
204 set_cached_acl(inode, type, acl); 203 set_cached_acl(inode, type, acl);
205 204
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6ef36c37e2be..2b6fc131e2ce 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,22 +72,22 @@ static int f2fs_write_meta_page(struct page *page,
72{ 72{
73 struct inode *inode = page->mapping->host; 73 struct inode *inode = page->mapping->host;
74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
75 int err;
76 75
77 wait_on_page_writeback(page); 76 /* Should not write any meta pages, if any IO error was occurred */
78 77 if (wbc->for_reclaim ||
79 err = write_meta_page(sbi, page, wbc); 78 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) {
80 if (err) { 79 dec_page_count(sbi, F2FS_DIRTY_META);
81 wbc->pages_skipped++; 80 wbc->pages_skipped++;
82 set_page_dirty(page); 81 set_page_dirty(page);
82 return AOP_WRITEPAGE_ACTIVATE;
83 } 83 }
84 84
85 dec_page_count(sbi, F2FS_DIRTY_META); 85 wait_on_page_writeback(page);
86 86
87 /* In this case, we should not unlock this page */ 87 write_meta_page(sbi, page);
88 if (err != AOP_WRITEPAGE_ACTIVATE) 88 dec_page_count(sbi, F2FS_DIRTY_META);
89 unlock_page(page); 89 unlock_page(page);
90 return err; 90 return 0;
91} 91}
92 92
93static int f2fs_write_meta_pages(struct address_space *mapping, 93static int f2fs_write_meta_pages(struct address_space *mapping,
@@ -138,7 +138,10 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
138 BUG_ON(page->mapping != mapping); 138 BUG_ON(page->mapping != mapping);
139 BUG_ON(!PageDirty(page)); 139 BUG_ON(!PageDirty(page));
140 clear_page_dirty_for_io(page); 140 clear_page_dirty_for_io(page);
141 f2fs_write_meta_page(page, &wbc); 141 if (f2fs_write_meta_page(page, &wbc)) {
142 unlock_page(page);
143 break;
144 }
142 if (nwritten++ >= nr_to_write) 145 if (nwritten++ >= nr_to_write)
143 break; 146 break;
144 } 147 }
@@ -161,7 +164,6 @@ static int f2fs_set_meta_page_dirty(struct page *page)
161 if (!PageDirty(page)) { 164 if (!PageDirty(page)) {
162 __set_page_dirty_nobuffers(page); 165 __set_page_dirty_nobuffers(page);
163 inc_page_count(sbi, F2FS_DIRTY_META); 166 inc_page_count(sbi, F2FS_DIRTY_META);
164 F2FS_SET_SB_DIRT(sbi);
165 return 1; 167 return 1;
166 } 168 }
167 return 0; 169 return 0;
@@ -214,22 +216,13 @@ retry:
214 goto retry; 216 goto retry;
215 } 217 }
216 new->ino = ino; 218 new->ino = ino;
217 INIT_LIST_HEAD(&new->list);
218 219
219 /* add new_oentry into list which is sorted by inode number */ 220 /* add new_oentry into list which is sorted by inode number */
220 if (orphan) { 221 if (orphan)
221 struct orphan_inode_entry *prev; 222 list_add(&new->list, this->prev);
222 223 else
223 /* get previous entry */
224 prev = list_entry(orphan->list.prev, typeof(*prev), list);
225 if (&prev->list != head)
226 /* insert new orphan inode entry */
227 list_add(&new->list, &prev->list);
228 else
229 list_add(&new->list, head);
230 } else {
231 list_add_tail(&new->list, head); 224 list_add_tail(&new->list, head);
232 } 225
233 sbi->n_orphans++; 226 sbi->n_orphans++;
234out: 227out:
235 mutex_unlock(&sbi->orphan_inode_mutex); 228 mutex_unlock(&sbi->orphan_inode_mutex);
@@ -546,7 +539,7 @@ retry:
546/* 539/*
547 * Freeze all the FS-operations for checkpoint. 540 * Freeze all the FS-operations for checkpoint.
548 */ 541 */
549void block_operations(struct f2fs_sb_info *sbi) 542static void block_operations(struct f2fs_sb_info *sbi)
550{ 543{
551 int t; 544 int t;
552 struct writeback_control wbc = { 545 struct writeback_control wbc = {
@@ -718,27 +711,24 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
718 sbi->alloc_valid_block_count = 0; 711 sbi->alloc_valid_block_count = 0;
719 712
720 /* Here, we only have one bio having CP pack */ 713 /* Here, we only have one bio having CP pack */
721 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) 714 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
722 sbi->sb->s_flags |= MS_RDONLY;
723 else
724 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
725 715
726 clear_prefree_segments(sbi); 716 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
727 F2FS_RESET_SB_DIRT(sbi); 717 clear_prefree_segments(sbi);
718 F2FS_RESET_SB_DIRT(sbi);
719 }
728} 720}
729 721
730/* 722/*
731 * We guarantee that this checkpoint procedure should not fail. 723 * We guarantee that this checkpoint procedure should not fail.
732 */ 724 */
733void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount) 725void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
734{ 726{
735 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 727 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
736 unsigned long long ckpt_ver; 728 unsigned long long ckpt_ver;
737 729
738 if (!blocked) { 730 mutex_lock(&sbi->cp_mutex);
739 mutex_lock(&sbi->cp_mutex); 731 block_operations(sbi);
740 block_operations(sbi);
741 }
742 732
743 f2fs_submit_bio(sbi, DATA, true); 733 f2fs_submit_bio(sbi, DATA, true);
744 f2fs_submit_bio(sbi, NODE, true); 734 f2fs_submit_bio(sbi, NODE, true);
@@ -772,7 +762,7 @@ void init_orphan_info(struct f2fs_sb_info *sbi)
772 sbi->n_orphans = 0; 762 sbi->n_orphans = 0;
773} 763}
774 764
775int create_checkpoint_caches(void) 765int __init create_checkpoint_caches(void)
776{ 766{
777 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 767 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
778 sizeof(struct orphan_inode_entry), NULL); 768 sizeof(struct orphan_inode_entry), NULL);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 655aeabc1dd4..7bd22a201125 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -16,6 +16,7 @@
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/bio.h> 18#include <linux/bio.h>
19#include <linux/prefetch.h>
19 20
20#include "f2fs.h" 21#include "f2fs.h"
21#include "node.h" 22#include "node.h"
@@ -546,6 +547,15 @@ redirty_out:
546 547
547#define MAX_DESIRED_PAGES_WP 4096 548#define MAX_DESIRED_PAGES_WP 4096
548 549
550static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
551 void *data)
552{
553 struct address_space *mapping = data;
554 int ret = mapping->a_ops->writepage(page, wbc);
555 mapping_set_error(mapping, ret);
556 return ret;
557}
558
549static int f2fs_write_data_pages(struct address_space *mapping, 559static int f2fs_write_data_pages(struct address_space *mapping,
550 struct writeback_control *wbc) 560 struct writeback_control *wbc)
551{ 561{
@@ -562,7 +572,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
562 572
563 if (!S_ISDIR(inode->i_mode)) 573 if (!S_ISDIR(inode->i_mode))
564 mutex_lock(&sbi->writepages); 574 mutex_lock(&sbi->writepages);
565 ret = generic_writepages(mapping, wbc); 575 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
566 if (!S_ISDIR(inode->i_mode)) 576 if (!S_ISDIR(inode->i_mode))
567 mutex_unlock(&sbi->writepages); 577 mutex_unlock(&sbi->writepages);
568 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 578 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
@@ -688,6 +698,11 @@ static int f2fs_set_data_page_dirty(struct page *page)
688 return 0; 698 return 0;
689} 699}
690 700
701static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
702{
703 return generic_block_bmap(mapping, block, get_data_block_ro);
704}
705
691const struct address_space_operations f2fs_dblock_aops = { 706const struct address_space_operations f2fs_dblock_aops = {
692 .readpage = f2fs_read_data_page, 707 .readpage = f2fs_read_data_page,
693 .readpages = f2fs_read_data_pages, 708 .readpages = f2fs_read_data_pages,
@@ -699,4 +714,5 @@ const struct address_space_operations f2fs_dblock_aops = {
699 .invalidatepage = f2fs_invalidate_data_page, 714 .invalidatepage = f2fs_invalidate_data_page,
700 .releasepage = f2fs_release_data_page, 715 .releasepage = f2fs_release_data_page,
701 .direct_IO = f2fs_direct_IO, 716 .direct_IO = f2fs_direct_IO,
717 .bmap = f2fs_bmap,
702}; 718};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0e0380a588ad..025b9e2f935d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -26,6 +26,7 @@
26 26
27static LIST_HEAD(f2fs_stat_list); 27static LIST_HEAD(f2fs_stat_list);
28static struct dentry *debugfs_root; 28static struct dentry *debugfs_root;
29static DEFINE_MUTEX(f2fs_stat_mutex);
29 30
30static void update_general_status(struct f2fs_sb_info *sbi) 31static void update_general_status(struct f2fs_sb_info *sbi)
31{ 32{
@@ -180,18 +181,16 @@ static int stat_show(struct seq_file *s, void *v)
180 int i = 0; 181 int i = 0;
181 int j; 182 int j;
182 183
184 mutex_lock(&f2fs_stat_mutex);
183 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { 185 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
186 char devname[BDEVNAME_SIZE];
184 187
185 mutex_lock(&si->stat_lock);
186 if (!si->sbi) {
187 mutex_unlock(&si->stat_lock);
188 continue;
189 }
190 update_general_status(si->sbi); 188 update_general_status(si->sbi);
191 189
192 seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++); 190 seq_printf(s, "\n=====[ partition info(%s). #%d ]=====\n",
193 seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ", 191 bdevname(si->sbi->sb->s_bdev, devname), i++);
194 si->nat_area_segs, si->sit_area_segs); 192 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
193 si->sit_area_segs, si->nat_area_segs);
195 seq_printf(s, "[SSA: %d] [MAIN: %d", 194 seq_printf(s, "[SSA: %d] [MAIN: %d",
196 si->ssa_area_segs, si->main_area_segs); 195 si->ssa_area_segs, si->main_area_segs);
197 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", 196 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
@@ -286,8 +285,8 @@ static int stat_show(struct seq_file *s, void *v)
286 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", 285 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
287 (si->base_mem + si->cache_mem) >> 10, 286 (si->base_mem + si->cache_mem) >> 10,
288 si->base_mem >> 10, si->cache_mem >> 10); 287 si->base_mem >> 10, si->cache_mem >> 10);
289 mutex_unlock(&si->stat_lock);
290 } 288 }
289 mutex_unlock(&f2fs_stat_mutex);
291 return 0; 290 return 0;
292} 291}
293 292
@@ -303,7 +302,7 @@ static const struct file_operations stat_fops = {
303 .release = single_release, 302 .release = single_release,
304}; 303};
305 304
306static int init_stats(struct f2fs_sb_info *sbi) 305int f2fs_build_stats(struct f2fs_sb_info *sbi)
307{ 306{
308 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); 307 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
309 struct f2fs_stat_info *si; 308 struct f2fs_stat_info *si;
@@ -313,9 +312,6 @@ static int init_stats(struct f2fs_sb_info *sbi)
313 return -ENOMEM; 312 return -ENOMEM;
314 313
315 si = sbi->stat_info; 314 si = sbi->stat_info;
316 mutex_init(&si->stat_lock);
317 list_add_tail(&si->stat_list, &f2fs_stat_list);
318
319 si->all_area_segs = le32_to_cpu(raw_super->segment_count); 315 si->all_area_segs = le32_to_cpu(raw_super->segment_count);
320 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); 316 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
321 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); 317 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
@@ -325,21 +321,11 @@ static int init_stats(struct f2fs_sb_info *sbi)
325 si->main_area_zones = si->main_area_sections / 321 si->main_area_zones = si->main_area_sections /
326 le32_to_cpu(raw_super->secs_per_zone); 322 le32_to_cpu(raw_super->secs_per_zone);
327 si->sbi = sbi; 323 si->sbi = sbi;
328 return 0;
329}
330 324
331int f2fs_build_stats(struct f2fs_sb_info *sbi) 325 mutex_lock(&f2fs_stat_mutex);
332{ 326 list_add_tail(&si->stat_list, &f2fs_stat_list);
333 int retval; 327 mutex_unlock(&f2fs_stat_mutex);
334
335 retval = init_stats(sbi);
336 if (retval)
337 return retval;
338
339 if (!debugfs_root)
340 debugfs_root = debugfs_create_dir("f2fs", NULL);
341 328
342 debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
343 return 0; 329 return 0;
344} 330}
345 331
@@ -347,14 +333,22 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
347{ 333{
348 struct f2fs_stat_info *si = sbi->stat_info; 334 struct f2fs_stat_info *si = sbi->stat_info;
349 335
336 mutex_lock(&f2fs_stat_mutex);
350 list_del(&si->stat_list); 337 list_del(&si->stat_list);
351 mutex_lock(&si->stat_lock); 338 mutex_unlock(&f2fs_stat_mutex);
352 si->sbi = NULL; 339
353 mutex_unlock(&si->stat_lock);
354 kfree(sbi->stat_info); 340 kfree(sbi->stat_info);
355} 341}
356 342
357void destroy_root_stats(void) 343void __init f2fs_create_root_stats(void)
344{
345 debugfs_root = debugfs_create_dir("f2fs", NULL);
346 if (debugfs_root)
347 debugfs_create_file("status", S_IRUGO, debugfs_root,
348 NULL, &stat_fops);
349}
350
351void f2fs_destroy_root_stats(void)
358{ 352{
359 debugfs_remove_recursive(debugfs_root); 353 debugfs_remove_recursive(debugfs_root);
360 debugfs_root = NULL; 354 debugfs_root = NULL;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b4e24f32b54e..a1f38443ecee 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -11,6 +11,7 @@
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include "f2fs.h" 13#include "f2fs.h"
14#include "node.h"
14#include "acl.h" 15#include "acl.h"
15 16
16static unsigned long dir_blocks(struct inode *inode) 17static unsigned long dir_blocks(struct inode *inode)
@@ -74,7 +75,7 @@ static unsigned long dir_block_index(unsigned int level, unsigned int idx)
74 return bidx; 75 return bidx;
75} 76}
76 77
77static bool early_match_name(const char *name, int namelen, 78static bool early_match_name(const char *name, size_t namelen,
78 f2fs_hash_t namehash, struct f2fs_dir_entry *de) 79 f2fs_hash_t namehash, struct f2fs_dir_entry *de)
79{ 80{
80 if (le16_to_cpu(de->name_len) != namelen) 81 if (le16_to_cpu(de->name_len) != namelen)
@@ -87,7 +88,7 @@ static bool early_match_name(const char *name, int namelen,
87} 88}
88 89
89static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, 90static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
90 const char *name, int namelen, int *max_slots, 91 const char *name, size_t namelen, int *max_slots,
91 f2fs_hash_t namehash, struct page **res_page) 92 f2fs_hash_t namehash, struct page **res_page)
92{ 93{
93 struct f2fs_dir_entry *de; 94 struct f2fs_dir_entry *de;
@@ -126,7 +127,7 @@ found:
126} 127}
127 128
128static struct f2fs_dir_entry *find_in_level(struct inode *dir, 129static struct f2fs_dir_entry *find_in_level(struct inode *dir,
129 unsigned int level, const char *name, int namelen, 130 unsigned int level, const char *name, size_t namelen,
130 f2fs_hash_t namehash, struct page **res_page) 131 f2fs_hash_t namehash, struct page **res_page)
131{ 132{
132 int s = GET_DENTRY_SLOTS(namelen); 133 int s = GET_DENTRY_SLOTS(namelen);
@@ -181,7 +182,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
181 struct qstr *child, struct page **res_page) 182 struct qstr *child, struct page **res_page)
182{ 183{
183 const char *name = child->name; 184 const char *name = child->name;
184 int namelen = child->len; 185 size_t namelen = child->len;
185 unsigned long npages = dir_blocks(dir); 186 unsigned long npages = dir_blocks(dir);
186 struct f2fs_dir_entry *de = NULL; 187 struct f2fs_dir_entry *de = NULL;
187 f2fs_hash_t name_hash; 188 f2fs_hash_t name_hash;
@@ -264,7 +265,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
264 mutex_unlock_op(sbi, DENTRY_OPS); 265 mutex_unlock_op(sbi, DENTRY_OPS);
265} 266}
266 267
267void init_dent_inode(struct dentry *dentry, struct page *ipage) 268void init_dent_inode(const struct qstr *name, struct page *ipage)
268{ 269{
269 struct f2fs_node *rn; 270 struct f2fs_node *rn;
270 271
@@ -273,20 +274,19 @@ void init_dent_inode(struct dentry *dentry, struct page *ipage)
273 274
274 wait_on_page_writeback(ipage); 275 wait_on_page_writeback(ipage);
275 276
276 /* copy dentry info. to this inode page */ 277 /* copy name info. to this inode page */
277 rn = (struct f2fs_node *)page_address(ipage); 278 rn = (struct f2fs_node *)page_address(ipage);
278 rn->i.i_namelen = cpu_to_le32(dentry->d_name.len); 279 rn->i.i_namelen = cpu_to_le32(name->len);
279 memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len); 280 memcpy(rn->i.i_name, name->name, name->len);
280 set_page_dirty(ipage); 281 set_page_dirty(ipage);
281} 282}
282 283
283static int init_inode_metadata(struct inode *inode, struct dentry *dentry) 284static int init_inode_metadata(struct inode *inode,
285 struct inode *dir, const struct qstr *name)
284{ 286{
285 struct inode *dir = dentry->d_parent->d_inode;
286
287 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 287 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
288 int err; 288 int err;
289 err = new_inode_page(inode, dentry); 289 err = new_inode_page(inode, name);
290 if (err) 290 if (err)
291 return err; 291 return err;
292 292
@@ -308,7 +308,8 @@ static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
308 ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); 308 ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
309 if (IS_ERR(ipage)) 309 if (IS_ERR(ipage))
310 return PTR_ERR(ipage); 310 return PTR_ERR(ipage);
311 init_dent_inode(dentry, ipage); 311 set_cold_node(inode, ipage);
312 init_dent_inode(name, ipage);
312 f2fs_put_page(ipage, 1); 313 f2fs_put_page(ipage, 1);
313 } 314 }
314 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 315 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
@@ -369,7 +370,7 @@ next:
369 goto next; 370 goto next;
370} 371}
371 372
372int f2fs_add_link(struct dentry *dentry, struct inode *inode) 373int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode)
373{ 374{
374 unsigned int bit_pos; 375 unsigned int bit_pos;
375 unsigned int level; 376 unsigned int level;
@@ -378,17 +379,15 @@ int f2fs_add_link(struct dentry *dentry, struct inode *inode)
378 f2fs_hash_t dentry_hash; 379 f2fs_hash_t dentry_hash;
379 struct f2fs_dir_entry *de; 380 struct f2fs_dir_entry *de;
380 unsigned int nbucket, nblock; 381 unsigned int nbucket, nblock;
381 struct inode *dir = dentry->d_parent->d_inode;
382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); 382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
383 const char *name = dentry->d_name.name; 383 size_t namelen = name->len;
384 int namelen = dentry->d_name.len;
385 struct page *dentry_page = NULL; 384 struct page *dentry_page = NULL;
386 struct f2fs_dentry_block *dentry_blk = NULL; 385 struct f2fs_dentry_block *dentry_blk = NULL;
387 int slots = GET_DENTRY_SLOTS(namelen); 386 int slots = GET_DENTRY_SLOTS(namelen);
388 int err = 0; 387 int err = 0;
389 int i; 388 int i;
390 389
391 dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len); 390 dentry_hash = f2fs_dentry_hash(name->name, name->len);
392 level = 0; 391 level = 0;
393 current_depth = F2FS_I(dir)->i_current_depth; 392 current_depth = F2FS_I(dir)->i_current_depth;
394 if (F2FS_I(dir)->chash == dentry_hash) { 393 if (F2FS_I(dir)->chash == dentry_hash) {
@@ -431,7 +430,7 @@ start:
431 ++level; 430 ++level;
432 goto start; 431 goto start;
433add_dentry: 432add_dentry:
434 err = init_inode_metadata(inode, dentry); 433 err = init_inode_metadata(inode, dir, name);
435 if (err) 434 if (err)
436 goto fail; 435 goto fail;
437 436
@@ -440,7 +439,7 @@ add_dentry:
440 de = &dentry_blk->dentry[bit_pos]; 439 de = &dentry_blk->dentry[bit_pos];
441 de->hash_code = dentry_hash; 440 de->hash_code = dentry_hash;
442 de->name_len = cpu_to_le16(namelen); 441 de->name_len = cpu_to_le16(namelen);
443 memcpy(dentry_blk->filename[bit_pos], name, namelen); 442 memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
444 de->ino = cpu_to_le32(inode->i_ino); 443 de->ino = cpu_to_le32(inode->i_ino);
445 set_de_type(de, inode); 444 set_de_type(de, inode);
446 for (i = 0; i < slots; i++) 445 for (i = 0; i < slots; i++)
@@ -501,7 +500,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
501 } 500 }
502 501
503 if (inode) { 502 if (inode) {
504 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 503 inode->i_ctime = CURRENT_TIME;
505 drop_nlink(inode); 504 drop_nlink(inode);
506 if (S_ISDIR(inode->i_mode)) { 505 if (S_ISDIR(inode->i_mode)) {
507 drop_nlink(inode); 506 drop_nlink(inode);
@@ -540,13 +539,13 @@ int f2fs_make_empty(struct inode *inode, struct inode *parent)
540 539
541 de = &dentry_blk->dentry[0]; 540 de = &dentry_blk->dentry[0];
542 de->name_len = cpu_to_le16(1); 541 de->name_len = cpu_to_le16(1);
543 de->hash_code = 0; 542 de->hash_code = f2fs_dentry_hash(".", 1);
544 de->ino = cpu_to_le32(inode->i_ino); 543 de->ino = cpu_to_le32(inode->i_ino);
545 memcpy(dentry_blk->filename[0], ".", 1); 544 memcpy(dentry_blk->filename[0], ".", 1);
546 set_de_type(de, inode); 545 set_de_type(de, inode);
547 546
548 de = &dentry_blk->dentry[1]; 547 de = &dentry_blk->dentry[1];
549 de->hash_code = 0; 548 de->hash_code = f2fs_dentry_hash("..", 2);
550 de->name_len = cpu_to_le16(2); 549 de->name_len = cpu_to_le16(2);
551 de->ino = cpu_to_le32(parent->i_ino); 550 de->ino = cpu_to_le32(parent->i_ino);
552 memcpy(dentry_blk->filename[1], "..", 2); 551 memcpy(dentry_blk->filename[1], "..", 2);
@@ -601,7 +600,7 @@ bool f2fs_empty_dir(struct inode *dir)
601static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) 600static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
602{ 601{
603 unsigned long pos = file->f_pos; 602 unsigned long pos = file->f_pos;
604 struct inode *inode = file->f_dentry->d_inode; 603 struct inode *inode = file_inode(file);
605 unsigned long npages = dir_blocks(inode); 604 unsigned long npages = dir_blocks(inode);
606 unsigned char *types = NULL; 605 unsigned char *types = NULL;
607 unsigned int bit_pos = 0, start_bit_pos = 0; 606 unsigned int bit_pos = 0, start_bit_pos = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a18d63db2fb6..cc2213afdcc7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -104,6 +104,20 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
104} 104}
105 105
106/* 106/*
107 * ioctl commands
108 */
109#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
110#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
111
112#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
113/*
114 * ioctl commands in 32 bit emulation
115 */
116#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
117#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
118#endif
119
120/*
107 * For INODE and NODE manager 121 * For INODE and NODE manager
108 */ 122 */
109#define XATTR_NODE_OFFSET (-1) /* 123#define XATTR_NODE_OFFSET (-1) /*
@@ -141,7 +155,7 @@ struct f2fs_inode_info {
141 155
142 /* Use below internally in f2fs*/ 156 /* Use below internally in f2fs*/
143 unsigned long flags; /* use to pass per-file flags */ 157 unsigned long flags; /* use to pass per-file flags */
144 unsigned long long data_version;/* lastes version of data for fsync */ 158 unsigned long long data_version;/* latest version of data for fsync */
145 atomic_t dirty_dents; /* # of dirty dentry pages */ 159 atomic_t dirty_dents; /* # of dirty dentry pages */
146 f2fs_hash_t chash; /* hash value of given file name */ 160 f2fs_hash_t chash; /* hash value of given file name */
147 unsigned int clevel; /* maximum level of given file name */ 161 unsigned int clevel; /* maximum level of given file name */
@@ -211,11 +225,11 @@ struct dnode_of_data {
211static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, 225static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
212 struct page *ipage, struct page *npage, nid_t nid) 226 struct page *ipage, struct page *npage, nid_t nid)
213{ 227{
228 memset(dn, 0, sizeof(*dn));
214 dn->inode = inode; 229 dn->inode = inode;
215 dn->inode_page = ipage; 230 dn->inode_page = ipage;
216 dn->node_page = npage; 231 dn->node_page = npage;
217 dn->nid = nid; 232 dn->nid = nid;
218 dn->inode_page_locked = 0;
219} 233}
220 234
221/* 235/*
@@ -573,6 +587,14 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
573 return atomic_read(&sbi->nr_pages[count_type]); 587 return atomic_read(&sbi->nr_pages[count_type]);
574} 588}
575 589
590static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
591{
592 unsigned int pages_per_sec = sbi->segs_per_sec *
593 (1 << sbi->log_blocks_per_seg);
594 return ((get_pages(sbi, block_type) + pages_per_sec - 1)
595 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
596}
597
576static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) 598static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
577{ 599{
578 block_t ret; 600 block_t ret;
@@ -842,12 +864,12 @@ void f2fs_truncate(struct inode *);
842int f2fs_setattr(struct dentry *, struct iattr *); 864int f2fs_setattr(struct dentry *, struct iattr *);
843int truncate_hole(struct inode *, pgoff_t, pgoff_t); 865int truncate_hole(struct inode *, pgoff_t, pgoff_t);
844long f2fs_ioctl(struct file *, unsigned int, unsigned long); 866long f2fs_ioctl(struct file *, unsigned int, unsigned long);
867long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
845 868
846/* 869/*
847 * inode.c 870 * inode.c
848 */ 871 */
849void f2fs_set_inode_flags(struct inode *); 872void f2fs_set_inode_flags(struct inode *);
850struct inode *f2fs_iget_nowait(struct super_block *, unsigned long);
851struct inode *f2fs_iget(struct super_block *, unsigned long); 873struct inode *f2fs_iget(struct super_block *, unsigned long);
852void update_inode(struct inode *, struct page *); 874void update_inode(struct inode *, struct page *);
853int f2fs_write_inode(struct inode *, struct writeback_control *); 875int f2fs_write_inode(struct inode *, struct writeback_control *);
@@ -867,21 +889,29 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
867ino_t f2fs_inode_by_name(struct inode *, struct qstr *); 889ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
868void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 890void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
869 struct page *, struct inode *); 891 struct page *, struct inode *);
870void init_dent_inode(struct dentry *, struct page *); 892void init_dent_inode(const struct qstr *, struct page *);
871int f2fs_add_link(struct dentry *, struct inode *); 893int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
872void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); 894void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
873int f2fs_make_empty(struct inode *, struct inode *); 895int f2fs_make_empty(struct inode *, struct inode *);
874bool f2fs_empty_dir(struct inode *); 896bool f2fs_empty_dir(struct inode *);
875 897
898static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
899{
900 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
901 inode);
902}
903
876/* 904/*
877 * super.c 905 * super.c
878 */ 906 */
879int f2fs_sync_fs(struct super_block *, int); 907int f2fs_sync_fs(struct super_block *, int);
908extern __printf(3, 4)
909void f2fs_msg(struct super_block *, const char *, const char *, ...);
880 910
881/* 911/*
882 * hash.c 912 * hash.c
883 */ 913 */
884f2fs_hash_t f2fs_dentry_hash(const char *, int); 914f2fs_hash_t f2fs_dentry_hash(const char *, size_t);
885 915
886/* 916/*
887 * node.c 917 * node.c
@@ -894,7 +924,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
894int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); 924int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
895int truncate_inode_blocks(struct inode *, pgoff_t); 925int truncate_inode_blocks(struct inode *, pgoff_t);
896int remove_inode_page(struct inode *); 926int remove_inode_page(struct inode *);
897int new_inode_page(struct inode *, struct dentry *); 927int new_inode_page(struct inode *, const struct qstr *);
898struct page *new_node_page(struct dnode_of_data *, unsigned int); 928struct page *new_node_page(struct dnode_of_data *, unsigned int);
899void ra_node_page(struct f2fs_sb_info *, nid_t); 929void ra_node_page(struct f2fs_sb_info *, nid_t);
900struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 930struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -912,7 +942,7 @@ int restore_node_summary(struct f2fs_sb_info *, unsigned int,
912void flush_nat_entries(struct f2fs_sb_info *); 942void flush_nat_entries(struct f2fs_sb_info *);
913int build_node_manager(struct f2fs_sb_info *); 943int build_node_manager(struct f2fs_sb_info *);
914void destroy_node_manager(struct f2fs_sb_info *); 944void destroy_node_manager(struct f2fs_sb_info *);
915int create_node_manager_caches(void); 945int __init create_node_manager_caches(void);
916void destroy_node_manager_caches(void); 946void destroy_node_manager_caches(void);
917 947
918/* 948/*
@@ -927,8 +957,7 @@ void allocate_new_segments(struct f2fs_sb_info *);
927struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 957struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
928struct bio *f2fs_bio_alloc(struct block_device *, int); 958struct bio *f2fs_bio_alloc(struct block_device *, int);
929void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); 959void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
930int write_meta_page(struct f2fs_sb_info *, struct page *, 960void write_meta_page(struct f2fs_sb_info *, struct page *);
931 struct writeback_control *);
932void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, 961void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
933 block_t, block_t *); 962 block_t, block_t *);
934void write_data_page(struct inode *, struct page *, struct dnode_of_data*, 963void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
@@ -961,10 +990,9 @@ int get_valid_checkpoint(struct f2fs_sb_info *);
961void set_dirty_dir_page(struct inode *, struct page *); 990void set_dirty_dir_page(struct inode *, struct page *);
962void remove_dirty_dir_inode(struct inode *); 991void remove_dirty_dir_inode(struct inode *);
963void sync_dirty_dir_inodes(struct f2fs_sb_info *); 992void sync_dirty_dir_inodes(struct f2fs_sb_info *);
964void block_operations(struct f2fs_sb_info *); 993void write_checkpoint(struct f2fs_sb_info *, bool);
965void write_checkpoint(struct f2fs_sb_info *, bool, bool);
966void init_orphan_info(struct f2fs_sb_info *); 994void init_orphan_info(struct f2fs_sb_info *);
967int create_checkpoint_caches(void); 995int __init create_checkpoint_caches(void);
968void destroy_checkpoint_caches(void); 996void destroy_checkpoint_caches(void);
969 997
970/* 998/*
@@ -984,9 +1012,9 @@ int do_write_data_page(struct page *);
984int start_gc_thread(struct f2fs_sb_info *); 1012int start_gc_thread(struct f2fs_sb_info *);
985void stop_gc_thread(struct f2fs_sb_info *); 1013void stop_gc_thread(struct f2fs_sb_info *);
986block_t start_bidx_of_node(unsigned int); 1014block_t start_bidx_of_node(unsigned int);
987int f2fs_gc(struct f2fs_sb_info *, int); 1015int f2fs_gc(struct f2fs_sb_info *);
988void build_gc_manager(struct f2fs_sb_info *); 1016void build_gc_manager(struct f2fs_sb_info *);
989int create_gc_caches(void); 1017int __init create_gc_caches(void);
990void destroy_gc_caches(void); 1018void destroy_gc_caches(void);
991 1019
992/* 1020/*
@@ -1058,7 +1086,8 @@ struct f2fs_stat_info {
1058 1086
1059int f2fs_build_stats(struct f2fs_sb_info *); 1087int f2fs_build_stats(struct f2fs_sb_info *);
1060void f2fs_destroy_stats(struct f2fs_sb_info *); 1088void f2fs_destroy_stats(struct f2fs_sb_info *);
1061void destroy_root_stats(void); 1089void __init f2fs_create_root_stats(void);
1090void f2fs_destroy_root_stats(void);
1062#else 1091#else
1063#define stat_inc_call_count(si) 1092#define stat_inc_call_count(si)
1064#define stat_inc_seg_count(si, type) 1093#define stat_inc_seg_count(si, type)
@@ -1068,7 +1097,8 @@ void destroy_root_stats(void);
1068 1097
1069static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 1098static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1070static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 1099static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
1071static inline void destroy_root_stats(void) { } 1100static inline void __init f2fs_create_root_stats(void) { }
1101static inline void f2fs_destroy_root_stats(void) { }
1072#endif 1102#endif
1073 1103
1074extern const struct file_operations f2fs_dir_operations; 1104extern const struct file_operations f2fs_dir_operations;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f9e085dfb1f0..b7a053d4c6d3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -15,6 +15,7 @@
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/falloc.h> 16#include <linux/falloc.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/compat.h>
18#include <linux/uaccess.h> 19#include <linux/uaccess.h>
19#include <linux/mount.h> 20#include <linux/mount.h>
20 21
@@ -96,8 +97,9 @@ out:
96} 97}
97 98
98static const struct vm_operations_struct f2fs_file_vm_ops = { 99static const struct vm_operations_struct f2fs_file_vm_ops = {
99 .fault = filemap_fault, 100 .fault = filemap_fault,
100 .page_mkwrite = f2fs_vm_page_mkwrite, 101 .page_mkwrite = f2fs_vm_page_mkwrite,
102 .remap_pages = generic_file_remap_pages,
101}; 103};
102 104
103static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode) 105static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
@@ -137,6 +139,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
137 if (ret) 139 if (ret)
138 return ret; 140 return ret;
139 141
142 /* guarantee free sections for fsync */
143 f2fs_balance_fs(sbi);
144
140 mutex_lock(&inode->i_mutex); 145 mutex_lock(&inode->i_mutex);
141 146
142 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 147 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
@@ -153,22 +158,24 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
153 158
154 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) 159 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
155 need_cp = true; 160 need_cp = true;
156 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP)) 161 else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
157 need_cp = true; 162 need_cp = true;
158 if (!space_for_roll_forward(sbi)) 163 else if (!space_for_roll_forward(sbi))
159 need_cp = true; 164 need_cp = true;
160 if (need_to_sync_dir(sbi, inode)) 165 else if (need_to_sync_dir(sbi, inode))
161 need_cp = true; 166 need_cp = true;
162 167
163 f2fs_write_inode(inode, NULL);
164
165 if (need_cp) { 168 if (need_cp) {
166 /* all the dirty node pages should be flushed for POR */ 169 /* all the dirty node pages should be flushed for POR */
167 ret = f2fs_sync_fs(inode->i_sb, 1); 170 ret = f2fs_sync_fs(inode->i_sb, 1);
168 clear_inode_flag(F2FS_I(inode), FI_NEED_CP); 171 clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
169 } else { 172 } else {
170 while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0) 173 /* if there is no written node page, write its inode page */
171 f2fs_write_inode(inode, NULL); 174 while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
175 ret = f2fs_write_inode(inode, NULL);
176 if (ret)
177 goto out;
178 }
172 filemap_fdatawait_range(sbi->node_inode->i_mapping, 179 filemap_fdatawait_range(sbi->node_inode->i_mapping,
173 0, LONG_MAX); 180 0, LONG_MAX);
174 } 181 }
@@ -292,8 +299,6 @@ void f2fs_truncate(struct inode *inode)
292 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 299 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
293 mark_inode_dirty(inode); 300 mark_inode_dirty(inode);
294 } 301 }
295
296 f2fs_balance_fs(F2FS_SB(inode->i_sb));
297} 302}
298 303
299static int f2fs_getattr(struct vfsmount *mnt, 304static int f2fs_getattr(struct vfsmount *mnt,
@@ -350,6 +355,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
350 attr->ia_size != i_size_read(inode)) { 355 attr->ia_size != i_size_read(inode)) {
351 truncate_setsize(inode, attr->ia_size); 356 truncate_setsize(inode, attr->ia_size);
352 f2fs_truncate(inode); 357 f2fs_truncate(inode);
358 f2fs_balance_fs(F2FS_SB(inode->i_sb));
353 } 359 }
354 360
355 __setattr_copy(inode, attr); 361 __setattr_copy(inode, attr);
@@ -381,12 +387,17 @@ const struct inode_operations f2fs_file_inode_operations = {
381static void fill_zero(struct inode *inode, pgoff_t index, 387static void fill_zero(struct inode *inode, pgoff_t index,
382 loff_t start, loff_t len) 388 loff_t start, loff_t len)
383{ 389{
390 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
384 struct page *page; 391 struct page *page;
385 392
386 if (!len) 393 if (!len)
387 return; 394 return;
388 395
396 f2fs_balance_fs(sbi);
397
398 mutex_lock_op(sbi, DATA_NEW);
389 page = get_new_data_page(inode, index, false); 399 page = get_new_data_page(inode, index, false);
400 mutex_unlock_op(sbi, DATA_NEW);
390 401
391 if (!IS_ERR(page)) { 402 if (!IS_ERR(page)) {
392 wait_on_page_writeback(page); 403 wait_on_page_writeback(page);
@@ -405,6 +416,8 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
405 struct dnode_of_data dn; 416 struct dnode_of_data dn;
406 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 417 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
407 418
419 f2fs_balance_fs(sbi);
420
408 mutex_lock_op(sbi, DATA_TRUNC); 421 mutex_lock_op(sbi, DATA_TRUNC);
409 set_new_dnode(&dn, inode, NULL, NULL, 0); 422 set_new_dnode(&dn, inode, NULL, NULL, 0);
410 err = get_dnode_of_data(&dn, index, RDONLY_NODE); 423 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
@@ -532,7 +545,6 @@ static long f2fs_fallocate(struct file *file, int mode,
532 loff_t offset, loff_t len) 545 loff_t offset, loff_t len)
533{ 546{
534 struct inode *inode = file->f_path.dentry->d_inode; 547 struct inode *inode = file->f_path.dentry->d_inode;
535 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
536 long ret; 548 long ret;
537 549
538 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 550 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -543,7 +555,10 @@ static long f2fs_fallocate(struct file *file, int mode,
543 else 555 else
544 ret = expand_inode_data(inode, offset, len, mode); 556 ret = expand_inode_data(inode, offset, len, mode);
545 557
546 f2fs_balance_fs(sbi); 558 if (!ret) {
559 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
560 mark_inode_dirty(inode);
561 }
547 return ret; 562 return ret;
548} 563}
549 564
@@ -620,6 +635,23 @@ out:
620 } 635 }
621} 636}
622 637
638#ifdef CONFIG_COMPAT
639long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
640{
641 switch (cmd) {
642 case F2FS_IOC32_GETFLAGS:
643 cmd = F2FS_IOC_GETFLAGS;
644 break;
645 case F2FS_IOC32_SETFLAGS:
646 cmd = F2FS_IOC_SETFLAGS;
647 break;
648 default:
649 return -ENOIOCTLCMD;
650 }
651 return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
652}
653#endif
654
623const struct file_operations f2fs_file_operations = { 655const struct file_operations f2fs_file_operations = {
624 .llseek = generic_file_llseek, 656 .llseek = generic_file_llseek,
625 .read = do_sync_read, 657 .read = do_sync_read,
@@ -631,6 +663,9 @@ const struct file_operations f2fs_file_operations = {
631 .fsync = f2fs_sync_file, 663 .fsync = f2fs_sync_file,
632 .fallocate = f2fs_fallocate, 664 .fallocate = f2fs_fallocate,
633 .unlocked_ioctl = f2fs_ioctl, 665 .unlocked_ioctl = f2fs_ioctl,
666#ifdef CONFIG_COMPAT
667 .compat_ioctl = f2fs_compat_ioctl,
668#endif
634 .splice_read = generic_file_splice_read, 669 .splice_read = generic_file_splice_read,
635 .splice_write = generic_file_splice_write, 670 .splice_write = generic_file_splice_write,
636}; 671};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 644aa3808273..94b8a0c48453 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -44,10 +44,10 @@ static int gc_thread_func(void *data)
44 if (kthread_should_stop()) 44 if (kthread_should_stop())
45 break; 45 break;
46 46
47 f2fs_balance_fs(sbi); 47 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
48 48 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
49 if (!test_opt(sbi, BG_GC))
50 continue; 49 continue;
50 }
51 51
52 /* 52 /*
53 * [GC triggering condition] 53 * [GC triggering condition]
@@ -78,7 +78,8 @@ static int gc_thread_func(void *data)
78 78
79 sbi->bg_gc++; 79 sbi->bg_gc++;
80 80
81 if (f2fs_gc(sbi, 1) == GC_NONE) 81 /* if return value is not zero, no victim was selected */
82 if (f2fs_gc(sbi))
82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME; 83 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) 84 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
84 wait_ms = GC_THREAD_MAX_SLEEP_TIME; 85 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
@@ -90,7 +91,10 @@ static int gc_thread_func(void *data)
90int start_gc_thread(struct f2fs_sb_info *sbi) 91int start_gc_thread(struct f2fs_sb_info *sbi)
91{ 92{
92 struct f2fs_gc_kthread *gc_th; 93 struct f2fs_gc_kthread *gc_th;
94 dev_t dev = sbi->sb->s_bdev->bd_dev;
93 95
96 if (!test_opt(sbi, BG_GC))
97 return 0;
94 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 98 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
95 if (!gc_th) 99 if (!gc_th)
96 return -ENOMEM; 100 return -ENOMEM;
@@ -98,9 +102,10 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
98 sbi->gc_thread = gc_th; 102 sbi->gc_thread = gc_th;
99 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); 103 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
100 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, 104 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
101 GC_THREAD_NAME); 105 "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
102 if (IS_ERR(gc_th->f2fs_gc_task)) { 106 if (IS_ERR(gc_th->f2fs_gc_task)) {
103 kfree(gc_th); 107 kfree(gc_th);
108 sbi->gc_thread = NULL;
104 return -ENOMEM; 109 return -ENOMEM;
105 } 110 }
106 return 0; 111 return 0;
@@ -141,6 +146,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
141static unsigned int get_max_cost(struct f2fs_sb_info *sbi, 146static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
142 struct victim_sel_policy *p) 147 struct victim_sel_policy *p)
143{ 148{
149 /* SSR allocates in a segment unit */
150 if (p->alloc_mode == SSR)
151 return 1 << sbi->log_blocks_per_seg;
144 if (p->gc_mode == GC_GREEDY) 152 if (p->gc_mode == GC_GREEDY)
145 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit; 153 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
146 else if (p->gc_mode == GC_CB) 154 else if (p->gc_mode == GC_CB)
@@ -356,7 +364,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
356 sentry = get_seg_entry(sbi, segno); 364 sentry = get_seg_entry(sbi, segno);
357 ret = f2fs_test_bit(offset, sentry->cur_valid_map); 365 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
358 mutex_unlock(&sit_i->sentry_lock); 366 mutex_unlock(&sit_i->sentry_lock);
359 return ret ? GC_OK : GC_NEXT; 367 return ret;
360} 368}
361 369
362/* 370/*
@@ -364,7 +372,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
364 * On validity, copy that node with cold status, otherwise (invalid node) 372 * On validity, copy that node with cold status, otherwise (invalid node)
365 * ignore that. 373 * ignore that.
366 */ 374 */
367static int gc_node_segment(struct f2fs_sb_info *sbi, 375static void gc_node_segment(struct f2fs_sb_info *sbi,
368 struct f2fs_summary *sum, unsigned int segno, int gc_type) 376 struct f2fs_summary *sum, unsigned int segno, int gc_type)
369{ 377{
370 bool initial = true; 378 bool initial = true;
@@ -376,23 +384,12 @@ next_step:
376 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 384 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
377 nid_t nid = le32_to_cpu(entry->nid); 385 nid_t nid = le32_to_cpu(entry->nid);
378 struct page *node_page; 386 struct page *node_page;
379 int err;
380 387
381 /* 388 /* stop BG_GC if there is not enough free sections. */
382 * It makes sure that free segments are able to write 389 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
383 * all the dirty node pages before CP after this CP. 390 return;
384 * So let's check the space of dirty node pages.
385 */
386 if (should_do_checkpoint(sbi)) {
387 mutex_lock(&sbi->cp_mutex);
388 block_operations(sbi);
389 return GC_BLOCKED;
390 }
391 391
392 err = check_valid_map(sbi, segno, off); 392 if (check_valid_map(sbi, segno, off) == 0)
393 if (err == GC_ERROR)
394 return err;
395 else if (err == GC_NEXT)
396 continue; 393 continue;
397 394
398 if (initial) { 395 if (initial) {
@@ -422,36 +419,33 @@ next_step:
422 }; 419 };
423 sync_node_pages(sbi, 0, &wbc); 420 sync_node_pages(sbi, 0, &wbc);
424 } 421 }
425 return GC_DONE;
426} 422}
427 423
428/* 424/*
429 * Calculate start block index that this node page contains 425 * Calculate start block index indicating the given node offset.
426 * Be careful, caller should give this node offset only indicating direct node
427 * blocks. If any node offsets, which point the other types of node blocks such
428 * as indirect or double indirect node blocks, are given, it must be a caller's
429 * bug.
430 */ 430 */
431block_t start_bidx_of_node(unsigned int node_ofs) 431block_t start_bidx_of_node(unsigned int node_ofs)
432{ 432{
433 block_t start_bidx; 433 unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
434 unsigned int bidx, indirect_blks; 434 unsigned int bidx;
435 int dec;
436 435
437 indirect_blks = 2 * NIDS_PER_BLOCK + 4; 436 if (node_ofs == 0)
437 return 0;
438 438
439 start_bidx = 1; 439 if (node_ofs <= 2) {
440 if (node_ofs == 0) {
441 start_bidx = 0;
442 } else if (node_ofs <= 2) {
443 bidx = node_ofs - 1; 440 bidx = node_ofs - 1;
444 } else if (node_ofs <= indirect_blks) { 441 } else if (node_ofs <= indirect_blks) {
445 dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1); 442 int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
446 bidx = node_ofs - 2 - dec; 443 bidx = node_ofs - 2 - dec;
447 } else { 444 } else {
448 dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); 445 int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
449 bidx = node_ofs - 5 - dec; 446 bidx = node_ofs - 5 - dec;
450 } 447 }
451 448 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
452 if (start_bidx)
453 start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
454 return start_bidx;
455} 449}
456 450
457static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 451static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -467,13 +461,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
467 461
468 node_page = get_node_page(sbi, nid); 462 node_page = get_node_page(sbi, nid);
469 if (IS_ERR(node_page)) 463 if (IS_ERR(node_page))
470 return GC_NEXT; 464 return 0;
471 465
472 get_node_info(sbi, nid, dni); 466 get_node_info(sbi, nid, dni);
473 467
474 if (sum->version != dni->version) { 468 if (sum->version != dni->version) {
475 f2fs_put_page(node_page, 1); 469 f2fs_put_page(node_page, 1);
476 return GC_NEXT; 470 return 0;
477 } 471 }
478 472
479 *nofs = ofs_of_node(node_page); 473 *nofs = ofs_of_node(node_page);
@@ -481,8 +475,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
481 f2fs_put_page(node_page, 1); 475 f2fs_put_page(node_page, 1);
482 476
483 if (source_blkaddr != blkaddr) 477 if (source_blkaddr != blkaddr)
484 return GC_NEXT; 478 return 0;
485 return GC_OK; 479 return 1;
486} 480}
487 481
488static void move_data_page(struct inode *inode, struct page *page, int gc_type) 482static void move_data_page(struct inode *inode, struct page *page, int gc_type)
@@ -523,13 +517,13 @@ out:
523 * If the parent node is not valid or the data block address is different, 517 * If the parent node is not valid or the data block address is different,
524 * the victim data block is ignored. 518 * the victim data block is ignored.
525 */ 519 */
526static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 520static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
527 struct list_head *ilist, unsigned int segno, int gc_type) 521 struct list_head *ilist, unsigned int segno, int gc_type)
528{ 522{
529 struct super_block *sb = sbi->sb; 523 struct super_block *sb = sbi->sb;
530 struct f2fs_summary *entry; 524 struct f2fs_summary *entry;
531 block_t start_addr; 525 block_t start_addr;
532 int err, off; 526 int off;
533 int phase = 0; 527 int phase = 0;
534 528
535 start_addr = START_BLOCK(sbi, segno); 529 start_addr = START_BLOCK(sbi, segno);
@@ -543,22 +537,11 @@ next_step:
543 unsigned int ofs_in_node, nofs; 537 unsigned int ofs_in_node, nofs;
544 block_t start_bidx; 538 block_t start_bidx;
545 539
546 /* 540 /* stop BG_GC if there is not enough free sections. */
547 * It makes sure that free segments are able to write 541 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
548 * all the dirty node pages before CP after this CP. 542 return;
549 * So let's check the space of dirty node pages.
550 */
551 if (should_do_checkpoint(sbi)) {
552 mutex_lock(&sbi->cp_mutex);
553 block_operations(sbi);
554 err = GC_BLOCKED;
555 goto stop;
556 }
557 543
558 err = check_valid_map(sbi, segno, off); 544 if (check_valid_map(sbi, segno, off) == 0)
559 if (err == GC_ERROR)
560 goto stop;
561 else if (err == GC_NEXT)
562 continue; 545 continue;
563 546
564 if (phase == 0) { 547 if (phase == 0) {
@@ -567,10 +550,7 @@ next_step:
567 } 550 }
568 551
569 /* Get an inode by ino with checking validity */ 552 /* Get an inode by ino with checking validity */
570 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); 553 if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0)
571 if (err == GC_ERROR)
572 goto stop;
573 else if (err == GC_NEXT)
574 continue; 554 continue;
575 555
576 if (phase == 1) { 556 if (phase == 1) {
@@ -582,7 +562,7 @@ next_step:
582 ofs_in_node = le16_to_cpu(entry->ofs_in_node); 562 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
583 563
584 if (phase == 2) { 564 if (phase == 2) {
585 inode = f2fs_iget_nowait(sb, dni.ino); 565 inode = f2fs_iget(sb, dni.ino);
586 if (IS_ERR(inode)) 566 if (IS_ERR(inode))
587 continue; 567 continue;
588 568
@@ -610,11 +590,9 @@ next_iput:
610 } 590 }
611 if (++phase < 4) 591 if (++phase < 4)
612 goto next_step; 592 goto next_step;
613 err = GC_DONE; 593
614stop:
615 if (gc_type == FG_GC) 594 if (gc_type == FG_GC)
616 f2fs_submit_bio(sbi, DATA, true); 595 f2fs_submit_bio(sbi, DATA, true);
617 return err;
618} 596}
619 597
620static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 598static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -628,17 +606,16 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
628 return ret; 606 return ret;
629} 607}
630 608
631static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, 609static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
632 struct list_head *ilist, int gc_type) 610 struct list_head *ilist, int gc_type)
633{ 611{
634 struct page *sum_page; 612 struct page *sum_page;
635 struct f2fs_summary_block *sum; 613 struct f2fs_summary_block *sum;
636 int ret = GC_DONE;
637 614
638 /* read segment summary of victim */ 615 /* read segment summary of victim */
639 sum_page = get_sum_page(sbi, segno); 616 sum_page = get_sum_page(sbi, segno);
640 if (IS_ERR(sum_page)) 617 if (IS_ERR(sum_page))
641 return GC_ERROR; 618 return;
642 619
643 /* 620 /*
644 * CP needs to lock sum_page. In this time, we don't need 621 * CP needs to lock sum_page. In this time, we don't need
@@ -650,76 +627,55 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
650 627
651 switch (GET_SUM_TYPE((&sum->footer))) { 628 switch (GET_SUM_TYPE((&sum->footer))) {
652 case SUM_TYPE_NODE: 629 case SUM_TYPE_NODE:
653 ret = gc_node_segment(sbi, sum->entries, segno, gc_type); 630 gc_node_segment(sbi, sum->entries, segno, gc_type);
654 break; 631 break;
655 case SUM_TYPE_DATA: 632 case SUM_TYPE_DATA:
656 ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); 633 gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
657 break; 634 break;
658 } 635 }
659 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); 636 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
660 stat_inc_call_count(sbi->stat_info); 637 stat_inc_call_count(sbi->stat_info);
661 638
662 f2fs_put_page(sum_page, 0); 639 f2fs_put_page(sum_page, 0);
663 return ret;
664} 640}
665 641
666int f2fs_gc(struct f2fs_sb_info *sbi, int nGC) 642int f2fs_gc(struct f2fs_sb_info *sbi)
667{ 643{
668 unsigned int segno;
669 int old_free_secs, cur_free_secs;
670 int gc_status, nfree;
671 struct list_head ilist; 644 struct list_head ilist;
645 unsigned int segno, i;
672 int gc_type = BG_GC; 646 int gc_type = BG_GC;
647 int nfree = 0;
648 int ret = -1;
673 649
674 INIT_LIST_HEAD(&ilist); 650 INIT_LIST_HEAD(&ilist);
675gc_more: 651gc_more:
676 nfree = 0; 652 if (!(sbi->sb->s_flags & MS_ACTIVE))
677 gc_status = GC_NONE; 653 goto stop;
678 654
679 if (has_not_enough_free_secs(sbi)) 655 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree))
680 old_free_secs = reserved_sections(sbi); 656 gc_type = FG_GC;
681 else
682 old_free_secs = free_sections(sbi);
683 657
684 while (sbi->sb->s_flags & MS_ACTIVE) { 658 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
685 int i; 659 goto stop;
686 if (has_not_enough_free_secs(sbi)) 660 ret = 0;
687 gc_type = FG_GC;
688 661
689 cur_free_secs = free_sections(sbi) + nfree; 662 for (i = 0; i < sbi->segs_per_sec; i++)
663 do_garbage_collect(sbi, segno + i, &ilist, gc_type);
690 664
691 /* We got free space successfully. */ 665 if (gc_type == FG_GC &&
692 if (nGC < cur_free_secs - old_free_secs) 666 get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
693 break; 667 nfree++;
694 668
695 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) 669 if (has_not_enough_free_secs(sbi, nfree))
696 break; 670 goto gc_more;
697 671
698 for (i = 0; i < sbi->segs_per_sec; i++) { 672 if (gc_type == FG_GC)
699 /* 673 write_checkpoint(sbi, false);
700 * do_garbage_collect will give us three gc_status:
701 * GC_ERROR, GC_DONE, and GC_BLOCKED.
702 * If GC is finished uncleanly, we have to return
703 * the victim to dirty segment list.
704 */
705 gc_status = do_garbage_collect(sbi, segno + i,
706 &ilist, gc_type);
707 if (gc_status != GC_DONE)
708 goto stop;
709 nfree++;
710 }
711 }
712stop: 674stop:
713 if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
714 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
715 if (nfree)
716 goto gc_more;
717 }
718 mutex_unlock(&sbi->gc_mutex); 675 mutex_unlock(&sbi->gc_mutex);
719 676
720 put_gc_inode(&ilist); 677 put_gc_inode(&ilist);
721 BUG_ON(!list_empty(&ilist)); 678 return ret;
722 return gc_status;
723} 679}
724 680
725void build_gc_manager(struct f2fs_sb_info *sbi) 681void build_gc_manager(struct f2fs_sb_info *sbi)
@@ -727,7 +683,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
727 DIRTY_I(sbi)->v_ops = &default_v_ops; 683 DIRTY_I(sbi)->v_ops = &default_v_ops;
728} 684}
729 685
730int create_gc_caches(void) 686int __init create_gc_caches(void)
731{ 687{
732 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", 688 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
733 sizeof(struct inode_entry), NULL); 689 sizeof(struct inode_entry), NULL);
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b026d9354ccd..30b2db003acd 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -8,7 +8,6 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11#define GC_THREAD_NAME "f2fs_gc_task"
12#define GC_THREAD_MIN_WB_PAGES 1 /* 11#define GC_THREAD_MIN_WB_PAGES 1 /*
13 * a threshold to determine 12 * a threshold to determine
14 * whether IO subsystem is idle 13 * whether IO subsystem is idle
@@ -23,15 +22,6 @@
23/* Search max. number of dirty segments to select a victim segment */ 22/* Search max. number of dirty segments to select a victim segment */
24#define MAX_VICTIM_SEARCH 20 23#define MAX_VICTIM_SEARCH 20
25 24
26enum {
27 GC_NONE = 0,
28 GC_ERROR,
29 GC_OK,
30 GC_NEXT,
31 GC_BLOCKED,
32 GC_DONE,
33};
34
35struct f2fs_gc_kthread { 25struct f2fs_gc_kthread {
36 struct task_struct *f2fs_gc_task; 26 struct task_struct *f2fs_gc_task;
37 wait_queue_head_t gc_wait_queue_head; 27 wait_queue_head_t gc_wait_queue_head;
@@ -104,14 +94,3 @@ static inline int is_idle(struct f2fs_sb_info *sbi)
104 struct request_list *rl = &q->root_rl; 94 struct request_list *rl = &q->root_rl;
105 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); 95 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
106} 96}
107
108static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
109{
110 unsigned int pages_per_sec = sbi->segs_per_sec *
111 (1 << sbi->log_blocks_per_seg);
112 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
113 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
114 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
115 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
116 return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
117}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index a60f04200f8b..6eb8d269b53b 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -42,7 +42,7 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[])
42 buf[1] += b1; 42 buf[1] += b1;
43} 43}
44 44
45static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num) 45static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
46{ 46{
47 unsigned pad, val; 47 unsigned pad, val;
48 int i; 48 int i;
@@ -69,13 +69,17 @@ static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
69 *buf++ = pad; 69 *buf++ = pad;
70} 70}
71 71
72f2fs_hash_t f2fs_dentry_hash(const char *name, int len) 72f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len)
73{ 73{
74 __u32 hash, minor_hash; 74 __u32 hash;
75 f2fs_hash_t f2fs_hash; 75 f2fs_hash_t f2fs_hash;
76 const char *p; 76 const char *p;
77 __u32 in[8], buf[4]; 77 __u32 in[8], buf[4];
78 78
79 if ((len <= 2) && (name[0] == '.') &&
80 (name[1] == '.' || name[1] == '\0'))
81 return 0;
82
79 /* Initialize the default seed for the hash checksum functions */ 83 /* Initialize the default seed for the hash checksum functions */
80 buf[0] = 0x67452301; 84 buf[0] = 0x67452301;
81 buf[1] = 0xefcdab89; 85 buf[1] = 0xefcdab89;
@@ -83,15 +87,15 @@ f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
83 buf[3] = 0x10325476; 87 buf[3] = 0x10325476;
84 88
85 p = name; 89 p = name;
86 while (len > 0) { 90 while (1) {
87 str2hashbuf(p, len, in, 4); 91 str2hashbuf(p, len, in, 4);
88 TEA_transform(buf, in); 92 TEA_transform(buf, in);
89 len -= 16;
90 p += 16; 93 p += 16;
94 if (len <= 16)
95 break;
96 len -= 16;
91 } 97 }
92 hash = buf[0]; 98 hash = buf[0];
93 minor_hash = buf[1];
94
95 f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT); 99 f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
96 return f2fs_hash; 100 return f2fs_hash;
97} 101}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index df5fb381ebf1..ddae412d30c8 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -16,11 +16,6 @@
16#include "f2fs.h" 16#include "f2fs.h"
17#include "node.h" 17#include "node.h"
18 18
19struct f2fs_iget_args {
20 u64 ino;
21 int on_free;
22};
23
24void f2fs_set_inode_flags(struct inode *inode) 19void f2fs_set_inode_flags(struct inode *inode)
25{ 20{
26 unsigned int flags = F2FS_I(inode)->i_flags; 21 unsigned int flags = F2FS_I(inode)->i_flags;
@@ -40,34 +35,6 @@ void f2fs_set_inode_flags(struct inode *inode)
40 inode->i_flags |= S_DIRSYNC; 35 inode->i_flags |= S_DIRSYNC;
41} 36}
42 37
43static int f2fs_iget_test(struct inode *inode, void *data)
44{
45 struct f2fs_iget_args *args = data;
46
47 if (inode->i_ino != args->ino)
48 return 0;
49 if (inode->i_state & (I_FREEING | I_WILL_FREE)) {
50 args->on_free = 1;
51 return 0;
52 }
53 return 1;
54}
55
56struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
57{
58 struct f2fs_iget_args args = {
59 .ino = ino,
60 .on_free = 0
61 };
62 struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);
63
64 if (inode)
65 return inode;
66 if (!args.on_free)
67 return f2fs_iget(sb, ino);
68 return ERR_PTR(-ENOENT);
69}
70
71static int do_read_inode(struct inode *inode) 38static int do_read_inode(struct inode *inode)
72{ 39{
73 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 40 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
@@ -100,6 +67,10 @@ static int do_read_inode(struct inode *inode)
100 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); 67 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
101 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); 68 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
102 inode->i_generation = le32_to_cpu(ri->i_generation); 69 inode->i_generation = le32_to_cpu(ri->i_generation);
70 if (ri->i_addr[0])
71 inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0]));
72 else
73 inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1]));
103 74
104 fi->i_current_depth = le32_to_cpu(ri->i_current_depth); 75 fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
105 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); 76 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
@@ -203,6 +174,21 @@ void update_inode(struct inode *inode, struct page *node_page)
203 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); 174 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
204 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); 175 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
205 ri->i_generation = cpu_to_le32(inode->i_generation); 176 ri->i_generation = cpu_to_le32(inode->i_generation);
177
178 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
179 if (old_valid_dev(inode->i_rdev)) {
180 ri->i_addr[0] =
181 cpu_to_le32(old_encode_dev(inode->i_rdev));
182 ri->i_addr[1] = 0;
183 } else {
184 ri->i_addr[0] = 0;
185 ri->i_addr[1] =
186 cpu_to_le32(new_encode_dev(inode->i_rdev));
187 ri->i_addr[2] = 0;
188 }
189 }
190
191 set_cold_node(inode, node_page);
206 set_page_dirty(node_page); 192 set_page_dirty(node_page);
207} 193}
208 194
@@ -216,6 +202,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
216 inode->i_ino == F2FS_META_INO(sbi)) 202 inode->i_ino == F2FS_META_INO(sbi))
217 return 0; 203 return 0;
218 204
205 if (wbc)
206 f2fs_balance_fs(sbi);
207
219 node_page = get_node_page(sbi, inode->i_ino); 208 node_page = get_node_page(sbi, inode->i_ino);
220 if (IS_ERR(node_page)) 209 if (IS_ERR(node_page))
221 return PTR_ERR(node_page); 210 return PTR_ERR(node_page);
@@ -256,6 +245,7 @@ void f2fs_evict_inode(struct inode *inode)
256 if (inode->i_nlink || is_bad_inode(inode)) 245 if (inode->i_nlink || is_bad_inode(inode))
257 goto no_delete; 246 goto no_delete;
258 247
248 sb_start_intwrite(inode->i_sb);
259 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); 249 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
260 i_size_write(inode, 0); 250 i_size_write(inode, 0);
261 251
@@ -263,6 +253,7 @@ void f2fs_evict_inode(struct inode *inode)
263 f2fs_truncate(inode); 253 f2fs_truncate(inode);
264 254
265 remove_inode_page(inode); 255 remove_inode_page(inode);
256 sb_end_intwrite(inode->i_sb);
266no_delete: 257no_delete:
267 clear_inode(inode); 258 clear_inode(inode);
268} 259}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 89b7675dc377..1a49b881bac0 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -77,8 +77,8 @@ fail:
77 77
78static int is_multimedia_file(const unsigned char *s, const char *sub) 78static int is_multimedia_file(const unsigned char *s, const char *sub)
79{ 79{
80 int slen = strlen(s); 80 size_t slen = strlen(s);
81 int sublen = strlen(sub); 81 size_t sublen = strlen(sub);
82 int ret; 82 int ret;
83 83
84 if (sublen > slen) 84 if (sublen > slen)
@@ -123,6 +123,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
123 nid_t ino = 0; 123 nid_t ino = 0;
124 int err; 124 int err;
125 125
126 f2fs_balance_fs(sbi);
127
126 inode = f2fs_new_inode(dir, mode); 128 inode = f2fs_new_inode(dir, mode);
127 if (IS_ERR(inode)) 129 if (IS_ERR(inode))
128 return PTR_ERR(inode); 130 return PTR_ERR(inode);
@@ -144,8 +146,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
144 if (!sbi->por_doing) 146 if (!sbi->por_doing)
145 d_instantiate(dentry, inode); 147 d_instantiate(dentry, inode);
146 unlock_new_inode(inode); 148 unlock_new_inode(inode);
147
148 f2fs_balance_fs(sbi);
149 return 0; 149 return 0;
150out: 150out:
151 clear_nlink(inode); 151 clear_nlink(inode);
@@ -163,6 +163,8 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
163 struct f2fs_sb_info *sbi = F2FS_SB(sb); 163 struct f2fs_sb_info *sbi = F2FS_SB(sb);
164 int err; 164 int err;
165 165
166 f2fs_balance_fs(sbi);
167
166 inode->i_ctime = CURRENT_TIME; 168 inode->i_ctime = CURRENT_TIME;
167 atomic_inc(&inode->i_count); 169 atomic_inc(&inode->i_count);
168 170
@@ -172,8 +174,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
172 goto out; 174 goto out;
173 175
174 d_instantiate(dentry, inode); 176 d_instantiate(dentry, inode);
175
176 f2fs_balance_fs(sbi);
177 return 0; 177 return 0;
178out: 178out:
179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -223,6 +223,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
223 struct page *page; 223 struct page *page;
224 int err = -ENOENT; 224 int err = -ENOENT;
225 225
226 f2fs_balance_fs(sbi);
227
226 de = f2fs_find_entry(dir, &dentry->d_name, &page); 228 de = f2fs_find_entry(dir, &dentry->d_name, &page);
227 if (!de) 229 if (!de)
228 goto fail; 230 goto fail;
@@ -238,7 +240,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
238 240
239 /* In order to evict this inode, we set it dirty */ 241 /* In order to evict this inode, we set it dirty */
240 mark_inode_dirty(inode); 242 mark_inode_dirty(inode);
241 f2fs_balance_fs(sbi);
242fail: 243fail:
243 return err; 244 return err;
244} 245}
@@ -249,9 +250,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
249 struct super_block *sb = dir->i_sb; 250 struct super_block *sb = dir->i_sb;
250 struct f2fs_sb_info *sbi = F2FS_SB(sb); 251 struct f2fs_sb_info *sbi = F2FS_SB(sb);
251 struct inode *inode; 252 struct inode *inode;
252 unsigned symlen = strlen(symname) + 1; 253 size_t symlen = strlen(symname) + 1;
253 int err; 254 int err;
254 255
256 f2fs_balance_fs(sbi);
257
255 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO); 258 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
256 if (IS_ERR(inode)) 259 if (IS_ERR(inode))
257 return PTR_ERR(inode); 260 return PTR_ERR(inode);
@@ -268,9 +271,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
268 271
269 d_instantiate(dentry, inode); 272 d_instantiate(dentry, inode);
270 unlock_new_inode(inode); 273 unlock_new_inode(inode);
271
272 f2fs_balance_fs(sbi);
273
274 return err; 274 return err;
275out: 275out:
276 clear_nlink(inode); 276 clear_nlink(inode);
@@ -286,6 +286,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
286 struct inode *inode; 286 struct inode *inode;
287 int err; 287 int err;
288 288
289 f2fs_balance_fs(sbi);
290
289 inode = f2fs_new_inode(dir, S_IFDIR | mode); 291 inode = f2fs_new_inode(dir, S_IFDIR | mode);
290 if (IS_ERR(inode)) 292 if (IS_ERR(inode))
291 return PTR_ERR(inode); 293 return PTR_ERR(inode);
@@ -305,7 +307,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
305 d_instantiate(dentry, inode); 307 d_instantiate(dentry, inode);
306 unlock_new_inode(inode); 308 unlock_new_inode(inode);
307 309
308 f2fs_balance_fs(sbi);
309 return 0; 310 return 0;
310 311
311out_fail: 312out_fail:
@@ -336,6 +337,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
336 if (!new_valid_dev(rdev)) 337 if (!new_valid_dev(rdev))
337 return -EINVAL; 338 return -EINVAL;
338 339
340 f2fs_balance_fs(sbi);
341
339 inode = f2fs_new_inode(dir, mode); 342 inode = f2fs_new_inode(dir, mode);
340 if (IS_ERR(inode)) 343 if (IS_ERR(inode))
341 return PTR_ERR(inode); 344 return PTR_ERR(inode);
@@ -350,9 +353,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
350 alloc_nid_done(sbi, inode->i_ino); 353 alloc_nid_done(sbi, inode->i_ino);
351 d_instantiate(dentry, inode); 354 d_instantiate(dentry, inode);
352 unlock_new_inode(inode); 355 unlock_new_inode(inode);
353
354 f2fs_balance_fs(sbi);
355
356 return 0; 356 return 0;
357out: 357out:
358 clear_nlink(inode); 358 clear_nlink(inode);
@@ -376,6 +376,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
376 struct f2fs_dir_entry *new_entry; 376 struct f2fs_dir_entry *new_entry;
377 int err = -ENOENT; 377 int err = -ENOENT;
378 378
379 f2fs_balance_fs(sbi);
380
379 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); 381 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
380 if (!old_entry) 382 if (!old_entry)
381 goto out; 383 goto out;
@@ -441,8 +443,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
441 } 443 }
442 444
443 mutex_unlock_op(sbi, RENAME); 445 mutex_unlock_op(sbi, RENAME);
444
445 f2fs_balance_fs(sbi);
446 return 0; 446 return 0;
447 447
448out_dir: 448out_dir:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 19870361497e..e275218904ed 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -104,7 +104,7 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
104 f2fs_put_page(page, 1); 104 f2fs_put_page(page, 1);
105 continue; 105 continue;
106 } 106 }
107 page_cache_release(page); 107 f2fs_put_page(page, 0);
108 } 108 }
109} 109}
110 110
@@ -484,12 +484,14 @@ static void truncate_node(struct dnode_of_data *dn)
484 struct node_info ni; 484 struct node_info ni;
485 485
486 get_node_info(sbi, dn->nid, &ni); 486 get_node_info(sbi, dn->nid, &ni);
487 if (dn->inode->i_blocks == 0) {
488 BUG_ON(ni.blk_addr != NULL_ADDR);
489 goto invalidate;
490 }
487 BUG_ON(ni.blk_addr == NULL_ADDR); 491 BUG_ON(ni.blk_addr == NULL_ADDR);
488 492
489 if (ni.blk_addr != NULL_ADDR)
490 invalidate_blocks(sbi, ni.blk_addr);
491
492 /* Deallocate node address */ 493 /* Deallocate node address */
494 invalidate_blocks(sbi, ni.blk_addr);
493 dec_valid_node_count(sbi, dn->inode, 1); 495 dec_valid_node_count(sbi, dn->inode, 1);
494 set_node_addr(sbi, &ni, NULL_ADDR); 496 set_node_addr(sbi, &ni, NULL_ADDR);
495 497
@@ -499,7 +501,7 @@ static void truncate_node(struct dnode_of_data *dn)
499 } else { 501 } else {
500 sync_inode_page(dn); 502 sync_inode_page(dn);
501 } 503 }
502 504invalidate:
503 clear_node_page_dirty(dn->node_page); 505 clear_node_page_dirty(dn->node_page);
504 F2FS_SET_SB_DIRT(sbi); 506 F2FS_SET_SB_DIRT(sbi);
505 507
@@ -658,7 +660,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
658 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 660 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
659 int err = 0, cont = 1; 661 int err = 0, cont = 1;
660 int level, offset[4], noffset[4]; 662 int level, offset[4], noffset[4];
661 unsigned int nofs; 663 unsigned int nofs = 0;
662 struct f2fs_node *rn; 664 struct f2fs_node *rn;
663 struct dnode_of_data dn; 665 struct dnode_of_data dn;
664 struct page *page; 666 struct page *page;
@@ -768,25 +770,17 @@ int remove_inode_page(struct inode *inode)
768 dn.inode_page_locked = 1; 770 dn.inode_page_locked = 1;
769 truncate_node(&dn); 771 truncate_node(&dn);
770 } 772 }
771 if (inode->i_blocks == 1) {
772 /* inernally call f2fs_put_page() */
773 set_new_dnode(&dn, inode, page, page, ino);
774 truncate_node(&dn);
775 } else if (inode->i_blocks == 0) {
776 struct node_info ni;
777 get_node_info(sbi, inode->i_ino, &ni);
778 773
779 /* called after f2fs_new_inode() is failed */ 774 /* 0 is possible, after f2fs_new_inode() is failed */
780 BUG_ON(ni.blk_addr != NULL_ADDR); 775 BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1);
781 f2fs_put_page(page, 1); 776 set_new_dnode(&dn, inode, page, page, ino);
782 } else { 777 truncate_node(&dn);
783 BUG(); 778
784 }
785 mutex_unlock_op(sbi, NODE_TRUNC); 779 mutex_unlock_op(sbi, NODE_TRUNC);
786 return 0; 780 return 0;
787} 781}
788 782
789int new_inode_page(struct inode *inode, struct dentry *dentry) 783int new_inode_page(struct inode *inode, const struct qstr *name)
790{ 784{
791 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 785 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
792 struct page *page; 786 struct page *page;
@@ -796,7 +790,7 @@ int new_inode_page(struct inode *inode, struct dentry *dentry)
796 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 790 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
797 mutex_lock_op(sbi, NODE_NEW); 791 mutex_lock_op(sbi, NODE_NEW);
798 page = new_node_page(&dn, 0); 792 page = new_node_page(&dn, 0);
799 init_dent_inode(dentry, page); 793 init_dent_inode(name, page);
800 mutex_unlock_op(sbi, NODE_NEW); 794 mutex_unlock_op(sbi, NODE_NEW);
801 if (IS_ERR(page)) 795 if (IS_ERR(page))
802 return PTR_ERR(page); 796 return PTR_ERR(page);
@@ -834,17 +828,18 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
834 goto fail; 828 goto fail;
835 } 829 }
836 set_node_addr(sbi, &new_ni, NEW_ADDR); 830 set_node_addr(sbi, &new_ni, NEW_ADDR);
831 set_cold_node(dn->inode, page);
837 832
838 dn->node_page = page; 833 dn->node_page = page;
839 sync_inode_page(dn); 834 sync_inode_page(dn);
840 set_page_dirty(page); 835 set_page_dirty(page);
841 set_cold_node(dn->inode, page);
842 if (ofs == 0) 836 if (ofs == 0)
843 inc_valid_inode_count(sbi); 837 inc_valid_inode_count(sbi);
844 838
845 return page; 839 return page;
846 840
847fail: 841fail:
842 clear_node_page_dirty(page);
848 f2fs_put_page(page, 1); 843 f2fs_put_page(page, 1);
849 return ERR_PTR(err); 844 return ERR_PTR(err);
850} 845}
@@ -879,15 +874,11 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
879 return; 874 return;
880 875
881 if (read_node_page(apage, READA)) 876 if (read_node_page(apage, READA))
882 goto unlock_out; 877 unlock_page(apage);
883
884 page_cache_release(apage);
885 return;
886 878
887unlock_out:
888 unlock_page(apage);
889release_out: 879release_out:
890 page_cache_release(apage); 880 f2fs_put_page(apage, 0);
881 return;
891} 882}
892 883
893struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 884struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
@@ -1093,7 +1084,6 @@ static int f2fs_write_node_page(struct page *page,
1093{ 1084{
1094 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 1085 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1095 nid_t nid; 1086 nid_t nid;
1096 unsigned int nofs;
1097 block_t new_addr; 1087 block_t new_addr;
1098 struct node_info ni; 1088 struct node_info ni;
1099 1089
@@ -1110,7 +1100,6 @@ static int f2fs_write_node_page(struct page *page,
1110 1100
1111 /* get old block addr of this node page */ 1101 /* get old block addr of this node page */
1112 nid = nid_of_node(page); 1102 nid = nid_of_node(page);
1113 nofs = ofs_of_node(page);
1114 BUG_ON(page->index != nid); 1103 BUG_ON(page->index != nid);
1115 1104
1116 get_node_info(sbi, nid, &ni); 1105 get_node_info(sbi, nid, &ni);
@@ -1131,6 +1120,12 @@ static int f2fs_write_node_page(struct page *page,
1131 return 0; 1120 return 0;
1132} 1121}
1133 1122
1123/*
1124 * It is very important to gather dirty pages and write at once, so that we can
1125 * submit a big bio without interfering other data writes.
1126 * Be default, 512 pages (2MB), a segment size, is quite reasonable.
1127 */
1128#define COLLECT_DIRTY_NODES 512
1134static int f2fs_write_node_pages(struct address_space *mapping, 1129static int f2fs_write_node_pages(struct address_space *mapping,
1135 struct writeback_control *wbc) 1130 struct writeback_control *wbc)
1136{ 1131{
@@ -1138,17 +1133,16 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1138 struct block_device *bdev = sbi->sb->s_bdev; 1133 struct block_device *bdev = sbi->sb->s_bdev;
1139 long nr_to_write = wbc->nr_to_write; 1134 long nr_to_write = wbc->nr_to_write;
1140 1135
1141 if (wbc->for_kupdate) 1136 /* First check balancing cached NAT entries */
1142 return 0;
1143
1144 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1145 return 0;
1146
1147 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { 1137 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1148 write_checkpoint(sbi, false, false); 1138 write_checkpoint(sbi, false);
1149 return 0; 1139 return 0;
1150 } 1140 }
1151 1141
1142 /* collect a number of dirty node pages and write together */
1143 if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES)
1144 return 0;
1145
1152 /* if mounting is failed, skip writing node pages */ 1146 /* if mounting is failed, skip writing node pages */
1153 wbc->nr_to_write = bio_get_nr_vecs(bdev); 1147 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1154 sync_node_pages(sbi, 0, wbc); 1148 sync_node_pages(sbi, 0, wbc);
@@ -1571,7 +1565,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1571 nid_t nid; 1565 nid_t nid;
1572 struct f2fs_nat_entry raw_ne; 1566 struct f2fs_nat_entry raw_ne;
1573 int offset = -1; 1567 int offset = -1;
1574 block_t old_blkaddr, new_blkaddr; 1568 block_t new_blkaddr;
1575 1569
1576 ne = list_entry(cur, struct nat_entry, list); 1570 ne = list_entry(cur, struct nat_entry, list);
1577 nid = nat_get_nid(ne); 1571 nid = nat_get_nid(ne);
@@ -1585,7 +1579,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1585 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1579 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1586 if (offset >= 0) { 1580 if (offset >= 0) {
1587 raw_ne = nat_in_journal(sum, offset); 1581 raw_ne = nat_in_journal(sum, offset);
1588 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1589 goto flush_now; 1582 goto flush_now;
1590 } 1583 }
1591to_nat_page: 1584to_nat_page:
@@ -1607,7 +1600,6 @@ to_nat_page:
1607 1600
1608 BUG_ON(!nat_blk); 1601 BUG_ON(!nat_blk);
1609 raw_ne = nat_blk->entries[nid - start_nid]; 1602 raw_ne = nat_blk->entries[nid - start_nid];
1610 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1611flush_now: 1603flush_now:
1612 new_blkaddr = nat_get_blkaddr(ne); 1604 new_blkaddr = nat_get_blkaddr(ne);
1613 1605
@@ -1741,7 +1733,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
1741 kfree(nm_i); 1733 kfree(nm_i);
1742} 1734}
1743 1735
1744int create_node_manager_caches(void) 1736int __init create_node_manager_caches(void)
1745{ 1737{
1746 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 1738 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1747 sizeof(struct nat_entry), NULL); 1739 sizeof(struct nat_entry), NULL);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index b07e9b6ef376..b235215ac138 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -42,7 +42,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
42{ 42{
43 struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); 43 struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
44 struct f2fs_inode *raw_inode = &(raw_node->i); 44 struct f2fs_inode *raw_inode = &(raw_node->i);
45 struct dentry dent, parent; 45 struct qstr name;
46 struct f2fs_dir_entry *de; 46 struct f2fs_dir_entry *de;
47 struct page *page; 47 struct page *page;
48 struct inode *dir; 48 struct inode *dir;
@@ -57,17 +57,15 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
57 goto out; 57 goto out;
58 } 58 }
59 59
60 parent.d_inode = dir; 60 name.len = le32_to_cpu(raw_inode->i_namelen);
61 dent.d_parent = &parent; 61 name.name = raw_inode->i_name;
62 dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
63 dent.d_name.name = raw_inode->i_name;
64 62
65 de = f2fs_find_entry(dir, &dent.d_name, &page); 63 de = f2fs_find_entry(dir, &name, &page);
66 if (de) { 64 if (de) {
67 kunmap(page); 65 kunmap(page);
68 f2fs_put_page(page, 0); 66 f2fs_put_page(page, 0);
69 } else { 67 } else {
70 f2fs_add_link(&dent, inode); 68 err = __f2fs_add_link(dir, &name, inode);
71 } 69 }
72 iput(dir); 70 iput(dir);
73out: 71out:
@@ -144,14 +142,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
144 goto out; 142 goto out;
145 } 143 }
146 144
147 INIT_LIST_HEAD(&entry->list);
148 list_add_tail(&entry->list, head);
149
150 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 145 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
151 if (IS_ERR(entry->inode)) { 146 if (IS_ERR(entry->inode)) {
152 err = PTR_ERR(entry->inode); 147 err = PTR_ERR(entry->inode);
148 kmem_cache_free(fsync_entry_slab, entry);
153 goto out; 149 goto out;
154 } 150 }
151
152 list_add_tail(&entry->list, head);
155 entry->blkaddr = blkaddr; 153 entry->blkaddr = blkaddr;
156 } 154 }
157 if (IS_INODE(page)) { 155 if (IS_INODE(page)) {
@@ -173,10 +171,9 @@ out:
173static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, 171static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
174 struct list_head *head) 172 struct list_head *head)
175{ 173{
176 struct list_head *this; 174 struct fsync_inode_entry *entry, *tmp;
177 struct fsync_inode_entry *entry; 175
178 list_for_each(this, head) { 176 list_for_each_entry_safe(entry, tmp, head, list) {
179 entry = list_entry(this, struct fsync_inode_entry, list);
180 iput(entry->inode); 177 iput(entry->inode);
181 list_del(&entry->list); 178 list_del(&entry->list);
182 kmem_cache_free(fsync_entry_slab, entry); 179 kmem_cache_free(fsync_entry_slab, entry);
@@ -227,7 +224,10 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
227 f2fs_put_page(node_page, 1); 224 f2fs_put_page(node_page, 1);
228 225
229 /* Deallocate previous index in the node page */ 226 /* Deallocate previous index in the node page */
230 inode = f2fs_iget_nowait(sbi->sb, ino); 227 inode = f2fs_iget(sbi->sb, ino);
228 if (IS_ERR(inode))
229 return;
230
231 truncate_hole(inode, bidx, bidx + 1); 231 truncate_hole(inode, bidx, bidx + 1);
232 iput(inode); 232 iput(inode);
233} 233}
@@ -371,5 +371,5 @@ void recover_fsync_data(struct f2fs_sb_info *sbi)
371out: 371out:
372 destroy_fsync_dnodes(sbi, &inode_list); 372 destroy_fsync_dnodes(sbi, &inode_list);
373 kmem_cache_destroy(fsync_entry_slab); 373 kmem_cache_destroy(fsync_entry_slab);
374 write_checkpoint(sbi, false, false); 374 write_checkpoint(sbi, false);
375} 375}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1b26e4ea1016..777f17e496e6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -12,57 +12,26 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/bio.h> 13#include <linux/bio.h>
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/prefetch.h>
15#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
16 17
17#include "f2fs.h" 18#include "f2fs.h"
18#include "segment.h" 19#include "segment.h"
19#include "node.h" 20#include "node.h"
20 21
21static int need_to_flush(struct f2fs_sb_info *sbi)
22{
23 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
24 sbi->segs_per_sec;
25 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
26 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
27 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
28 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
29
30 if (sbi->por_doing)
31 return 0;
32
33 if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
34 reserved_sections(sbi)))
35 return 1;
36 return 0;
37}
38
39/* 22/*
40 * This function balances dirty node and dentry pages. 23 * This function balances dirty node and dentry pages.
41 * In addition, it controls garbage collection. 24 * In addition, it controls garbage collection.
42 */ 25 */
43void f2fs_balance_fs(struct f2fs_sb_info *sbi) 26void f2fs_balance_fs(struct f2fs_sb_info *sbi)
44{ 27{
45 struct writeback_control wbc = {
46 .sync_mode = WB_SYNC_ALL,
47 .nr_to_write = LONG_MAX,
48 .for_reclaim = 0,
49 };
50
51 if (sbi->por_doing)
52 return;
53
54 /* 28 /*
55 * We should do checkpoint when there are so many dirty node pages 29 * We should do GC or end up with checkpoint, if there are so many dirty
56 * with enough free segments. After then, we should do GC. 30 * dir/node pages without enough free segments.
57 */ 31 */
58 if (need_to_flush(sbi)) { 32 if (has_not_enough_free_secs(sbi, 0)) {
59 sync_dirty_dir_inodes(sbi);
60 sync_node_pages(sbi, 0, &wbc);
61 }
62
63 if (has_not_enough_free_secs(sbi)) {
64 mutex_lock(&sbi->gc_mutex); 33 mutex_lock(&sbi->gc_mutex);
65 f2fs_gc(sbi, 1); 34 f2fs_gc(sbi);
66 } 35 }
67} 36}
68 37
@@ -339,7 +308,7 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
339 * If there is not enough reserved sections, 308 * If there is not enough reserved sections,
340 * we should not reuse prefree segments. 309 * we should not reuse prefree segments.
341 */ 310 */
342 if (has_not_enough_free_secs(sbi)) 311 if (has_not_enough_free_secs(sbi, 0))
343 return NULL_SEGNO; 312 return NULL_SEGNO;
344 313
345 /* 314 /*
@@ -567,6 +536,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
567 } 536 }
568} 537}
569 538
539static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
540{
541 struct curseg_info *curseg = CURSEG_I(sbi, type);
542 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
543
544 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
545 return v_ops->get_victim(sbi,
546 &(curseg)->next_segno, BG_GC, type, SSR);
547
548 /* For data segments, let's do SSR more intensively */
549 for (; type >= CURSEG_HOT_DATA; type--)
550 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
551 BG_GC, type, SSR))
552 return 1;
553 return 0;
554}
555
570/* 556/*
571 * flush out current segment and replace it with new segment 557 * flush out current segment and replace it with new segment
572 * This function should be returned with success, otherwise BUG 558 * This function should be returned with success, otherwise BUG
@@ -631,7 +617,7 @@ static void f2fs_end_io_write(struct bio *bio, int err)
631 if (page->mapping) 617 if (page->mapping)
632 set_bit(AS_EIO, &page->mapping->flags); 618 set_bit(AS_EIO, &page->mapping->flags);
633 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); 619 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
634 set_page_dirty(page); 620 p->sbi->sb->s_flags |= MS_RDONLY;
635 } 621 }
636 end_page_writeback(page); 622 end_page_writeback(page);
637 dec_page_count(p->sbi, F2FS_WRITEBACK); 623 dec_page_count(p->sbi, F2FS_WRITEBACK);
@@ -791,11 +777,10 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
791 return __get_segment_type_2(page, p_type); 777 return __get_segment_type_2(page, p_type);
792 case 4: 778 case 4:
793 return __get_segment_type_4(page, p_type); 779 return __get_segment_type_4(page, p_type);
794 case 6:
795 return __get_segment_type_6(page, p_type);
796 default:
797 BUG();
798 } 780 }
781 /* NR_CURSEG_TYPE(6) logs by default */
782 BUG_ON(sbi->active_logs != NR_CURSEG_TYPE);
783 return __get_segment_type_6(page, p_type);
799} 784}
800 785
801static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 786static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -848,15 +833,10 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
848 mutex_unlock(&curseg->curseg_mutex); 833 mutex_unlock(&curseg->curseg_mutex);
849} 834}
850 835
851int write_meta_page(struct f2fs_sb_info *sbi, struct page *page, 836void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
852 struct writeback_control *wbc)
853{ 837{
854 if (wbc->for_reclaim)
855 return AOP_WRITEPAGE_ACTIVATE;
856
857 set_page_writeback(page); 838 set_page_writeback(page);
858 submit_write_page(sbi, page, page->index, META); 839 submit_write_page(sbi, page, page->index, META);
859 return 0;
860} 840}
861 841
862void write_node_page(struct f2fs_sb_info *sbi, struct page *page, 842void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
@@ -1608,7 +1588,6 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1608 1588
1609 for (i = 0; i < NR_DIRTY_TYPE; i++) { 1589 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1610 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); 1590 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1611 dirty_i->nr_dirty[i] = 0;
1612 if (!dirty_i->dirty_segmap[i]) 1591 if (!dirty_i->dirty_segmap[i])
1613 return -ENOMEM; 1592 return -ENOMEM;
1614 } 1593 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0948405af6f5..552dadbb2327 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -450,16 +450,16 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi)
450 return (free_sections(sbi) < overprovision_sections(sbi)); 450 return (free_sections(sbi) < overprovision_sections(sbi));
451} 451}
452 452
453static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) 453static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
454{ 454{
455 struct curseg_info *curseg = CURSEG_I(sbi, type); 455 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
456 return DIRTY_I(sbi)->v_ops->get_victim(sbi, 456 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
457 &(curseg)->next_segno, BG_GC, type, SSR);
458}
459 457
460static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) 458 if (sbi->por_doing)
461{ 459 return false;
462 return free_sections(sbi) <= reserved_sections(sbi); 460
461 return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
462 reserved_sections(sbi)));
463} 463}
464 464
465static inline int utilization(struct f2fs_sb_info *sbi) 465static inline int utilization(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 13867322cf5a..8c117649a035 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -53,6 +53,18 @@ static match_table_t f2fs_tokens = {
53 {Opt_err, NULL}, 53 {Opt_err, NULL},
54}; 54};
55 55
56void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
57{
58 struct va_format vaf;
59 va_list args;
60
61 va_start(args, fmt);
62 vaf.fmt = fmt;
63 vaf.va = &args;
64 printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
65 va_end(args);
66}
67
56static void init_once(void *foo) 68static void init_once(void *foo)
57{ 69{
58 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; 70 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
@@ -100,7 +112,7 @@ static void f2fs_put_super(struct super_block *sb)
100 f2fs_destroy_stats(sbi); 112 f2fs_destroy_stats(sbi);
101 stop_gc_thread(sbi); 113 stop_gc_thread(sbi);
102 114
103 write_checkpoint(sbi, false, true); 115 write_checkpoint(sbi, true);
104 116
105 iput(sbi->node_inode); 117 iput(sbi->node_inode);
106 iput(sbi->meta_inode); 118 iput(sbi->meta_inode);
@@ -119,15 +131,32 @@ static void f2fs_put_super(struct super_block *sb)
119int f2fs_sync_fs(struct super_block *sb, int sync) 131int f2fs_sync_fs(struct super_block *sb, int sync)
120{ 132{
121 struct f2fs_sb_info *sbi = F2FS_SB(sb); 133 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 int ret = 0;
123 134
124 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) 135 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
125 return 0; 136 return 0;
126 137
127 if (sync) 138 if (sync)
128 write_checkpoint(sbi, false, false); 139 write_checkpoint(sbi, false);
140 else
141 f2fs_balance_fs(sbi);
129 142
130 return ret; 143 return 0;
144}
145
146static int f2fs_freeze(struct super_block *sb)
147{
148 int err;
149
150 if (sb->s_flags & MS_RDONLY)
151 return 0;
152
153 err = f2fs_sync_fs(sb, 1);
154 return err;
155}
156
157static int f2fs_unfreeze(struct super_block *sb)
158{
159 return 0;
131} 160}
132 161
133static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) 162static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -148,8 +177,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
148 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; 177 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
149 buf->f_bavail = user_block_count - valid_user_blocks(sbi); 178 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
150 179
151 buf->f_files = valid_inode_count(sbi); 180 buf->f_files = sbi->total_node_count;
152 buf->f_ffree = sbi->total_node_count - valid_node_count(sbi); 181 buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi);
153 182
154 buf->f_namelen = F2FS_MAX_NAME_LEN; 183 buf->f_namelen = F2FS_MAX_NAME_LEN;
155 buf->f_fsid.val[0] = (u32)id; 184 buf->f_fsid.val[0] = (u32)id;
@@ -185,7 +214,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
185 seq_puts(seq, ",noacl"); 214 seq_puts(seq, ",noacl");
186#endif 215#endif
187 if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) 216 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
188 seq_puts(seq, ",disable_ext_indentify"); 217 seq_puts(seq, ",disable_ext_identify");
189 218
190 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 219 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
191 220
@@ -200,6 +229,8 @@ static struct super_operations f2fs_sops = {
200 .evict_inode = f2fs_evict_inode, 229 .evict_inode = f2fs_evict_inode,
201 .put_super = f2fs_put_super, 230 .put_super = f2fs_put_super,
202 .sync_fs = f2fs_sync_fs, 231 .sync_fs = f2fs_sync_fs,
232 .freeze_fs = f2fs_freeze,
233 .unfreeze_fs = f2fs_unfreeze,
203 .statfs = f2fs_statfs, 234 .statfs = f2fs_statfs,
204}; 235};
205 236
@@ -248,7 +279,8 @@ static const struct export_operations f2fs_export_ops = {
248 .get_parent = f2fs_get_parent, 279 .get_parent = f2fs_get_parent,
249}; 280};
250 281
251static int parse_options(struct f2fs_sb_info *sbi, char *options) 282static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi,
283 char *options)
252{ 284{
253 substring_t args[MAX_OPT_ARGS]; 285 substring_t args[MAX_OPT_ARGS];
254 char *p; 286 char *p;
@@ -287,7 +319,8 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
287 break; 319 break;
288#else 320#else
289 case Opt_nouser_xattr: 321 case Opt_nouser_xattr:
290 pr_info("nouser_xattr options not supported\n"); 322 f2fs_msg(sb, KERN_INFO,
323 "nouser_xattr options not supported");
291 break; 324 break;
292#endif 325#endif
293#ifdef CONFIG_F2FS_FS_POSIX_ACL 326#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -296,13 +329,13 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
296 break; 329 break;
297#else 330#else
298 case Opt_noacl: 331 case Opt_noacl:
299 pr_info("noacl options not supported\n"); 332 f2fs_msg(sb, KERN_INFO, "noacl options not supported");
300 break; 333 break;
301#endif 334#endif
302 case Opt_active_logs: 335 case Opt_active_logs:
303 if (args->from && match_int(args, &arg)) 336 if (args->from && match_int(args, &arg))
304 return -EINVAL; 337 return -EINVAL;
305 if (arg != 2 && arg != 4 && arg != 6) 338 if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
306 return -EINVAL; 339 return -EINVAL;
307 sbi->active_logs = arg; 340 sbi->active_logs = arg;
308 break; 341 break;
@@ -310,8 +343,9 @@ static int parse_options(struct f2fs_sb_info *sbi, char *options)
310 set_opt(sbi, DISABLE_EXT_IDENTIFY); 343 set_opt(sbi, DISABLE_EXT_IDENTIFY);
311 break; 344 break;
312 default: 345 default:
313 pr_err("Unrecognized mount option \"%s\" or missing value\n", 346 f2fs_msg(sb, KERN_ERR,
314 p); 347 "Unrecognized mount option \"%s\" or missing value",
348 p);
315 return -EINVAL; 349 return -EINVAL;
316 } 350 }
317 } 351 }
@@ -338,30 +372,53 @@ static loff_t max_file_size(unsigned bits)
338 return result; 372 return result;
339} 373}
340 374
341static int sanity_check_raw_super(struct f2fs_super_block *raw_super) 375static int sanity_check_raw_super(struct super_block *sb,
376 struct f2fs_super_block *raw_super)
342{ 377{
343 unsigned int blocksize; 378 unsigned int blocksize;
344 379
345 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) 380 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
381 f2fs_msg(sb, KERN_INFO,
382 "Magic Mismatch, valid(0x%x) - read(0x%x)",
383 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
346 return 1; 384 return 1;
385 }
386
387 /* Currently, support only 4KB page cache size */
388 if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
389 f2fs_msg(sb, KERN_INFO,
390 "Invalid page_cache_size (%lu), supports only 4KB\n",
391 PAGE_CACHE_SIZE);
392 return 1;
393 }
347 394
348 /* Currently, support only 4KB block size */ 395 /* Currently, support only 4KB block size */
349 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); 396 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
350 if (blocksize != PAGE_CACHE_SIZE) 397 if (blocksize != F2FS_BLKSIZE) {
398 f2fs_msg(sb, KERN_INFO,
399 "Invalid blocksize (%u), supports only 4KB\n",
400 blocksize);
351 return 1; 401 return 1;
402 }
403
352 if (le32_to_cpu(raw_super->log_sectorsize) != 404 if (le32_to_cpu(raw_super->log_sectorsize) !=
353 F2FS_LOG_SECTOR_SIZE) 405 F2FS_LOG_SECTOR_SIZE) {
406 f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
354 return 1; 407 return 1;
408 }
355 if (le32_to_cpu(raw_super->log_sectors_per_block) != 409 if (le32_to_cpu(raw_super->log_sectors_per_block) !=
356 F2FS_LOG_SECTORS_PER_BLOCK) 410 F2FS_LOG_SECTORS_PER_BLOCK) {
411 f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
357 return 1; 412 return 1;
413 }
358 return 0; 414 return 0;
359} 415}
360 416
361static int sanity_check_ckpt(struct f2fs_super_block *raw_super, 417static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
362 struct f2fs_checkpoint *ckpt)
363{ 418{
364 unsigned int total, fsmeta; 419 unsigned int total, fsmeta;
420 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
421 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
365 422
366 total = le32_to_cpu(raw_super->segment_count); 423 total = le32_to_cpu(raw_super->segment_count);
367 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); 424 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -372,6 +429,11 @@ static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
372 429
373 if (fsmeta >= total) 430 if (fsmeta >= total)
374 return 1; 431 return 1;
432
433 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
434 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
435 return 1;
436 }
375 return 0; 437 return 0;
376} 438}
377 439
@@ -400,6 +462,32 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
400 atomic_set(&sbi->nr_pages[i], 0); 462 atomic_set(&sbi->nr_pages[i], 0);
401} 463}
402 464
465static int validate_superblock(struct super_block *sb,
466 struct f2fs_super_block **raw_super,
467 struct buffer_head **raw_super_buf, sector_t block)
468{
469 const char *super = (block == 0 ? "first" : "second");
470
471 /* read f2fs raw super block */
472 *raw_super_buf = sb_bread(sb, block);
473 if (!*raw_super_buf) {
474 f2fs_msg(sb, KERN_ERR, "unable to read %s superblock",
475 super);
476 return 1;
477 }
478
479 *raw_super = (struct f2fs_super_block *)
480 ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET);
481
482 /* sanity checking of raw super */
483 if (!sanity_check_raw_super(sb, *raw_super))
484 return 0;
485
486 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem "
487 "in %s superblock", super);
488 return 1;
489}
490
403static int f2fs_fill_super(struct super_block *sb, void *data, int silent) 491static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
404{ 492{
405 struct f2fs_sb_info *sbi; 493 struct f2fs_sb_info *sbi;
@@ -414,19 +502,17 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
414 if (!sbi) 502 if (!sbi)
415 return -ENOMEM; 503 return -ENOMEM;
416 504
417 /* set a temporary block size */ 505 /* set a block size */
418 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) 506 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) {
419 goto free_sbi; 507 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
420
421 /* read f2fs raw super block */
422 raw_super_buf = sb_bread(sb, 0);
423 if (!raw_super_buf) {
424 err = -EIO;
425 goto free_sbi; 508 goto free_sbi;
426 } 509 }
427 raw_super = (struct f2fs_super_block *)
428 ((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET);
429 510
511 if (validate_superblock(sb, &raw_super, &raw_super_buf, 0)) {
512 brelse(raw_super_buf);
513 if (validate_superblock(sb, &raw_super, &raw_super_buf, 1))
514 goto free_sb_buf;
515 }
430 /* init some FS parameters */ 516 /* init some FS parameters */
431 sbi->active_logs = NR_CURSEG_TYPE; 517 sbi->active_logs = NR_CURSEG_TYPE;
432 518
@@ -439,11 +525,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
439 set_opt(sbi, POSIX_ACL); 525 set_opt(sbi, POSIX_ACL);
440#endif 526#endif
441 /* parse mount options */ 527 /* parse mount options */
442 if (parse_options(sbi, (char *)data)) 528 if (parse_options(sb, sbi, (char *)data))
443 goto free_sb_buf;
444
445 /* sanity checking of raw super */
446 if (sanity_check_raw_super(raw_super))
447 goto free_sb_buf; 529 goto free_sb_buf;
448 530
449 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); 531 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
@@ -478,18 +560,23 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
478 /* get an inode for meta space */ 560 /* get an inode for meta space */
479 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 561 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
480 if (IS_ERR(sbi->meta_inode)) { 562 if (IS_ERR(sbi->meta_inode)) {
563 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
481 err = PTR_ERR(sbi->meta_inode); 564 err = PTR_ERR(sbi->meta_inode);
482 goto free_sb_buf; 565 goto free_sb_buf;
483 } 566 }
484 567
485 err = get_valid_checkpoint(sbi); 568 err = get_valid_checkpoint(sbi);
486 if (err) 569 if (err) {
570 f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
487 goto free_meta_inode; 571 goto free_meta_inode;
572 }
488 573
489 /* sanity checking of checkpoint */ 574 /* sanity checking of checkpoint */
490 err = -EINVAL; 575 err = -EINVAL;
491 if (sanity_check_ckpt(raw_super, sbi->ckpt)) 576 if (sanity_check_ckpt(sbi)) {
577 f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
492 goto free_cp; 578 goto free_cp;
579 }
493 580
494 sbi->total_valid_node_count = 581 sbi->total_valid_node_count =
495 le32_to_cpu(sbi->ckpt->valid_node_count); 582 le32_to_cpu(sbi->ckpt->valid_node_count);
@@ -503,38 +590,41 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
503 INIT_LIST_HEAD(&sbi->dir_inode_list); 590 INIT_LIST_HEAD(&sbi->dir_inode_list);
504 spin_lock_init(&sbi->dir_inode_lock); 591 spin_lock_init(&sbi->dir_inode_lock);
505 592
506 /* init super block */
507 if (!sb_set_blocksize(sb, sbi->blocksize))
508 goto free_cp;
509
510 init_orphan_info(sbi); 593 init_orphan_info(sbi);
511 594
512 /* setup f2fs internal modules */ 595 /* setup f2fs internal modules */
513 err = build_segment_manager(sbi); 596 err = build_segment_manager(sbi);
514 if (err) 597 if (err) {
598 f2fs_msg(sb, KERN_ERR,
599 "Failed to initialize F2FS segment manager");
515 goto free_sm; 600 goto free_sm;
601 }
516 err = build_node_manager(sbi); 602 err = build_node_manager(sbi);
517 if (err) 603 if (err) {
604 f2fs_msg(sb, KERN_ERR,
605 "Failed to initialize F2FS node manager");
518 goto free_nm; 606 goto free_nm;
607 }
519 608
520 build_gc_manager(sbi); 609 build_gc_manager(sbi);
521 610
522 /* get an inode for node space */ 611 /* get an inode for node space */
523 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); 612 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
524 if (IS_ERR(sbi->node_inode)) { 613 if (IS_ERR(sbi->node_inode)) {
614 f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
525 err = PTR_ERR(sbi->node_inode); 615 err = PTR_ERR(sbi->node_inode);
526 goto free_nm; 616 goto free_nm;
527 } 617 }
528 618
529 /* if there are nt orphan nodes free them */ 619 /* if there are nt orphan nodes free them */
530 err = -EINVAL; 620 err = -EINVAL;
531 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && 621 if (recover_orphan_inodes(sbi))
532 recover_orphan_inodes(sbi))
533 goto free_node_inode; 622 goto free_node_inode;
534 623
535 /* read root inode and dentry */ 624 /* read root inode and dentry */
536 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); 625 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
537 if (IS_ERR(root)) { 626 if (IS_ERR(root)) {
627 f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
538 err = PTR_ERR(root); 628 err = PTR_ERR(root);
539 goto free_node_inode; 629 goto free_node_inode;
540 } 630 }
@@ -548,8 +638,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
548 } 638 }
549 639
550 /* recover fsynced data */ 640 /* recover fsynced data */
551 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) && 641 if (!test_opt(sbi, DISABLE_ROLL_FORWARD))
552 !test_opt(sbi, DISABLE_ROLL_FORWARD))
553 recover_fsync_data(sbi); 642 recover_fsync_data(sbi);
554 643
555 /* After POR, we can run background GC thread */ 644 /* After POR, we can run background GC thread */
@@ -599,7 +688,7 @@ static struct file_system_type f2fs_fs_type = {
599 .fs_flags = FS_REQUIRES_DEV, 688 .fs_flags = FS_REQUIRES_DEV,
600}; 689};
601 690
602static int init_inodecache(void) 691static int __init init_inodecache(void)
603{ 692{
604 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 693 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
605 sizeof(struct f2fs_inode_info), NULL); 694 sizeof(struct f2fs_inode_info), NULL);
@@ -634,14 +723,17 @@ static int __init init_f2fs_fs(void)
634 err = create_checkpoint_caches(); 723 err = create_checkpoint_caches();
635 if (err) 724 if (err)
636 goto fail; 725 goto fail;
637 return register_filesystem(&f2fs_fs_type); 726 err = register_filesystem(&f2fs_fs_type);
727 if (err)
728 goto fail;
729 f2fs_create_root_stats();
638fail: 730fail:
639 return err; 731 return err;
640} 732}
641 733
642static void __exit exit_f2fs_fs(void) 734static void __exit exit_f2fs_fs(void)
643{ 735{
644 destroy_root_stats(); 736 f2fs_destroy_root_stats();
645 unregister_filesystem(&f2fs_fs_type); 737 unregister_filesystem(&f2fs_fs_type);
646 destroy_checkpoint_caches(); 738 destroy_checkpoint_caches();
647 destroy_gc_caches(); 739 destroy_gc_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7d52e8dc0c59..8038c0496504 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -208,7 +208,7 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
208 struct page *page; 208 struct page *page;
209 void *base_addr; 209 void *base_addr;
210 int error = 0, found = 0; 210 int error = 0, found = 0;
211 int value_len, name_len; 211 size_t value_len, name_len;
212 212
213 if (name == NULL) 213 if (name == NULL)
214 return -EINVAL; 214 return -EINVAL;
@@ -304,7 +304,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
304 struct f2fs_xattr_entry *here, *last; 304 struct f2fs_xattr_entry *here, *last;
305 struct page *page; 305 struct page *page;
306 void *base_addr; 306 void *base_addr;
307 int error, found, free, name_len, newsize; 307 int error, found, free, newsize;
308 size_t name_len;
308 char *pval; 309 char *pval;
309 310
310 if (name == NULL) 311 if (name == NULL)
@@ -317,6 +318,8 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
317 if (name_len > 255 || value_len > MAX_VALUE_LEN) 318 if (name_len > 255 || value_len > MAX_VALUE_LEN)
318 return -ERANGE; 319 return -ERANGE;
319 320
321 f2fs_balance_fs(sbi);
322
320 mutex_lock_op(sbi, NODE_NEW); 323 mutex_lock_op(sbi, NODE_NEW);
321 if (!fi->i_xattr_nid) { 324 if (!fi->i_xattr_nid) {
322 /* Allocate new attribute block */ 325 /* Allocate new attribute block */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 58bf744dbf39..165012ef363a 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -698,7 +698,7 @@ out:
698 698
699static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir) 699static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir)
700{ 700{
701 struct inode *inode = filp->f_path.dentry->d_inode; 701 struct inode *inode = file_inode(filp);
702 return __fat_readdir(inode, filp, dirent, filldir, 0, 0); 702 return __fat_readdir(inode, filp, dirent, filldir, 0, 0);
703} 703}
704 704
@@ -779,7 +779,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
779static long fat_dir_ioctl(struct file *filp, unsigned int cmd, 779static long fat_dir_ioctl(struct file *filp, unsigned int cmd,
780 unsigned long arg) 780 unsigned long arg)
781{ 781{
782 struct inode *inode = filp->f_path.dentry->d_inode; 782 struct inode *inode = file_inode(filp);
783 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg; 783 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
784 int short_only, both; 784 int short_only, both;
785 785
@@ -819,7 +819,7 @@ FAT_IOCTL_FILLDIR_FUNC(fat_compat_ioctl_filldir, compat_dirent)
819static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, 819static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
820 unsigned long arg) 820 unsigned long arg)
821{ 821{
822 struct inode *inode = filp->f_path.dentry->d_inode; 822 struct inode *inode = file_inode(filp);
823 struct compat_dirent __user *d1 = compat_ptr(arg); 823 struct compat_dirent __user *d1 = compat_ptr(arg);
824 int short_only, both; 824 int short_only, both;
825 825
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 12701a567752..e9cc3f0d58e2 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -95,6 +95,8 @@ struct msdos_sb_info {
95 95
96 spinlock_t dir_hash_lock; 96 spinlock_t dir_hash_lock;
97 struct hlist_head dir_hashtable[FAT_HASH_SIZE]; 97 struct hlist_head dir_hashtable[FAT_HASH_SIZE];
98
99 unsigned int dirty; /* fs state before mount */
98}; 100};
99 101
100#define FAT_CACHE_VALID 0 /* special case for valid cache */ 102#define FAT_CACHE_VALID 0 /* special case for valid cache */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a62e0ecbe2db..3978f8ca1823 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -32,7 +32,7 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
32 32
33static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr) 33static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
34{ 34{
35 struct inode *inode = file->f_path.dentry->d_inode; 35 struct inode *inode = file_inode(file);
36 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 36 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
37 int is_dir = S_ISDIR(inode->i_mode); 37 int is_dir = S_ISDIR(inode->i_mode);
38 u32 attr, oldattr; 38 u32 attr, oldattr;
@@ -116,7 +116,7 @@ out:
116 116
117long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 117long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
118{ 118{
119 struct inode *inode = filp->f_path.dentry->d_inode; 119 struct inode *inode = file_inode(filp);
120 u32 __user *user_attr = (u32 __user *)arg; 120 u32 __user *user_attr = (u32 __user *)arg;
121 121
122 switch (cmd) { 122 switch (cmd) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index f8f491677a4a..acf6e479b443 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -341,12 +341,11 @@ struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
341{ 341{
342 struct msdos_sb_info *sbi = MSDOS_SB(sb); 342 struct msdos_sb_info *sbi = MSDOS_SB(sb);
343 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); 343 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
344 struct hlist_node *_p;
345 struct msdos_inode_info *i; 344 struct msdos_inode_info *i;
346 struct inode *inode = NULL; 345 struct inode *inode = NULL;
347 346
348 spin_lock(&sbi->inode_hash_lock); 347 spin_lock(&sbi->inode_hash_lock);
349 hlist_for_each_entry(i, _p, head, i_fat_hash) { 348 hlist_for_each_entry(i, head, i_fat_hash) {
350 BUG_ON(i->vfs_inode.i_sb != sb); 349 BUG_ON(i->vfs_inode.i_sb != sb);
351 if (i->i_pos != i_pos) 350 if (i->i_pos != i_pos)
352 continue; 351 continue;
@@ -488,10 +487,59 @@ static void fat_evict_inode(struct inode *inode)
488 fat_detach(inode); 487 fat_detach(inode);
489} 488}
490 489
490static void fat_set_state(struct super_block *sb,
491 unsigned int set, unsigned int force)
492{
493 struct buffer_head *bh;
494 struct fat_boot_sector *b;
495 struct msdos_sb_info *sbi = sb->s_fs_info;
496
497 /* do not change any thing if mounted read only */
498 if ((sb->s_flags & MS_RDONLY) && !force)
499 return;
500
501 /* do not change state if fs was dirty */
502 if (sbi->dirty) {
503 /* warn only on set (mount). */
504 if (set)
505 fat_msg(sb, KERN_WARNING, "Volume was not properly "
506 "unmounted. Some data may be corrupt. "
507 "Please run fsck.");
508 return;
509 }
510
511 bh = sb_bread(sb, 0);
512 if (bh == NULL) {
513 fat_msg(sb, KERN_ERR, "unable to read boot sector "
514 "to mark fs as dirty");
515 return;
516 }
517
518 b = (struct fat_boot_sector *) bh->b_data;
519
520 if (sbi->fat_bits == 32) {
521 if (set)
522 b->fat32.state |= FAT_STATE_DIRTY;
523 else
524 b->fat32.state &= ~FAT_STATE_DIRTY;
525 } else /* fat 16 and 12 */ {
526 if (set)
527 b->fat16.state |= FAT_STATE_DIRTY;
528 else
529 b->fat16.state &= ~FAT_STATE_DIRTY;
530 }
531
532 mark_buffer_dirty(bh);
533 sync_dirty_buffer(bh);
534 brelse(bh);
535}
536
491static void fat_put_super(struct super_block *sb) 537static void fat_put_super(struct super_block *sb)
492{ 538{
493 struct msdos_sb_info *sbi = MSDOS_SB(sb); 539 struct msdos_sb_info *sbi = MSDOS_SB(sb);
494 540
541 fat_set_state(sb, 0, 0);
542
495 iput(sbi->fsinfo_inode); 543 iput(sbi->fsinfo_inode);
496 iput(sbi->fat_inode); 544 iput(sbi->fat_inode);
497 545
@@ -566,8 +614,18 @@ static void __exit fat_destroy_inodecache(void)
566 614
567static int fat_remount(struct super_block *sb, int *flags, char *data) 615static int fat_remount(struct super_block *sb, int *flags, char *data)
568{ 616{
617 int new_rdonly;
569 struct msdos_sb_info *sbi = MSDOS_SB(sb); 618 struct msdos_sb_info *sbi = MSDOS_SB(sb);
570 *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); 619 *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME);
620
621 /* make sure we update state on remount. */
622 new_rdonly = *flags & MS_RDONLY;
623 if (new_rdonly != (sb->s_flags & MS_RDONLY)) {
624 if (new_rdonly)
625 fat_set_state(sb, 0, 0);
626 else
627 fat_set_state(sb, 1, 1);
628 }
571 return 0; 629 return 0;
572} 630}
573 631
@@ -1298,17 +1356,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1298 sbi->prev_free = FAT_START_ENT; 1356 sbi->prev_free = FAT_START_ENT;
1299 sb->s_maxbytes = 0xffffffff; 1357 sb->s_maxbytes = 0xffffffff;
1300 1358
1301 if (!sbi->fat_length && b->fat32_length) { 1359 if (!sbi->fat_length && b->fat32.length) {
1302 struct fat_boot_fsinfo *fsinfo; 1360 struct fat_boot_fsinfo *fsinfo;
1303 struct buffer_head *fsinfo_bh; 1361 struct buffer_head *fsinfo_bh;
1304 1362
1305 /* Must be FAT32 */ 1363 /* Must be FAT32 */
1306 sbi->fat_bits = 32; 1364 sbi->fat_bits = 32;
1307 sbi->fat_length = le32_to_cpu(b->fat32_length); 1365 sbi->fat_length = le32_to_cpu(b->fat32.length);
1308 sbi->root_cluster = le32_to_cpu(b->root_cluster); 1366 sbi->root_cluster = le32_to_cpu(b->fat32.root_cluster);
1309 1367
1310 /* MC - if info_sector is 0, don't multiply by 0 */ 1368 /* MC - if info_sector is 0, don't multiply by 0 */
1311 sbi->fsinfo_sector = le16_to_cpu(b->info_sector); 1369 sbi->fsinfo_sector = le16_to_cpu(b->fat32.info_sector);
1312 if (sbi->fsinfo_sector == 0) 1370 if (sbi->fsinfo_sector == 0)
1313 sbi->fsinfo_sector = 1; 1371 sbi->fsinfo_sector = 1;
1314 1372
@@ -1362,6 +1420,12 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1362 if (sbi->fat_bits != 32) 1420 if (sbi->fat_bits != 32)
1363 sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; 1421 sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
1364 1422
1423 /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
1424 if (sbi->fat_bits == 32)
1425 sbi->dirty = b->fat32.state & FAT_STATE_DIRTY;
1426 else /* fat 16 or 12 */
1427 sbi->dirty = b->fat16.state & FAT_STATE_DIRTY;
1428
1365 /* check that FAT table does not overflow */ 1429 /* check that FAT table does not overflow */
1366 fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; 1430 fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
1367 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); 1431 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
@@ -1456,6 +1520,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1456 "the device does not support discard"); 1520 "the device does not support discard");
1457 } 1521 }
1458 1522
1523 fat_set_state(sb, 1, 0);
1459 return 0; 1524 return 0;
1460 1525
1461out_invalid: 1526out_invalid:
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index ef4b5faba87b..499c10438ca2 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -21,13 +21,12 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart)
21{ 21{
22 struct msdos_sb_info *sbi = MSDOS_SB(sb); 22 struct msdos_sb_info *sbi = MSDOS_SB(sb);
23 struct hlist_head *head; 23 struct hlist_head *head;
24 struct hlist_node *_p;
25 struct msdos_inode_info *i; 24 struct msdos_inode_info *i;
26 struct inode *inode = NULL; 25 struct inode *inode = NULL;
27 26
28 head = sbi->dir_hashtable + fat_dir_hash(i_logstart); 27 head = sbi->dir_hashtable + fat_dir_hash(i_logstart);
29 spin_lock(&sbi->dir_hash_lock); 28 spin_lock(&sbi->dir_hash_lock);
30 hlist_for_each_entry(i, _p, head, i_dir_hash) { 29 hlist_for_each_entry(i, head, i_dir_hash) {
31 BUG_ON(i->vfs_inode.i_sb != sb); 30 BUG_ON(i->vfs_inode.i_sb != sb);
32 if (i->i_logstart != i_logstart) 31 if (i->i_logstart != i_logstart)
33 continue; 32 continue;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 71a600a19f06..6599222536eb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -30,7 +30,7 @@
30 30
31static int setfl(int fd, struct file * filp, unsigned long arg) 31static int setfl(int fd, struct file * filp, unsigned long arg)
32{ 32{
33 struct inode * inode = filp->f_path.dentry->d_inode; 33 struct inode * inode = file_inode(filp);
34 int error = 0; 34 int error = 0;
35 35
36 /* 36 /*
diff --git a/fs/file.c b/fs/file.c
index 15cb8618e95d..3906d9577a18 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -490,7 +490,7 @@ void exit_files(struct task_struct *tsk)
490 } 490 }
491} 491}
492 492
493static void __devinit fdtable_defer_list_init(int cpu) 493static void fdtable_defer_list_init(int cpu)
494{ 494{
495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
496 spin_lock_init(&fddef->lock); 496 spin_lock_init(&fddef->lock);
@@ -516,7 +516,7 @@ struct files_struct init_files = {
516 .close_on_exec = init_files.close_on_exec_init, 516 .close_on_exec = init_files.close_on_exec_init,
517 .open_fds = init_files.open_fds_init, 517 .open_fds = init_files.open_fds_init,
518 }, 518 },
519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
520}; 520};
521 521
522/* 522/*
diff --git a/fs/file_table.c b/fs/file_table.c
index de9e9653d611..aa07d3684a2e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -94,8 +94,8 @@ int proc_nr_files(ctl_table *table, int write,
94#endif 94#endif
95 95
96/* Find an unused file structure and return a pointer to it. 96/* Find an unused file structure and return a pointer to it.
97 * Returns NULL, if there are no more free file structures or 97 * Returns an error pointer if some error happend e.g. we over file
98 * we run out of memory. 98 * structures limit, run out of memory or operation is not permitted.
99 * 99 *
100 * Be very careful using this. You are responsible for 100 * Be very careful using this. You are responsible for
101 * getting write access to any mount that you might assign 101 * getting write access to any mount that you might assign
@@ -107,7 +107,8 @@ struct file *get_empty_filp(void)
107{ 107{
108 const struct cred *cred = current_cred(); 108 const struct cred *cred = current_cred();
109 static long old_max; 109 static long old_max;
110 struct file * f; 110 struct file *f;
111 int error;
111 112
112 /* 113 /*
113 * Privileged users can go above max_files 114 * Privileged users can go above max_files
@@ -122,13 +123,16 @@ struct file *get_empty_filp(void)
122 } 123 }
123 124
124 f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); 125 f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
125 if (f == NULL) 126 if (unlikely(!f))
126 goto fail; 127 return ERR_PTR(-ENOMEM);
127 128
128 percpu_counter_inc(&nr_files); 129 percpu_counter_inc(&nr_files);
129 f->f_cred = get_cred(cred); 130 f->f_cred = get_cred(cred);
130 if (security_file_alloc(f)) 131 error = security_file_alloc(f);
131 goto fail_sec; 132 if (unlikely(error)) {
133 file_free(f);
134 return ERR_PTR(error);
135 }
132 136
133 INIT_LIST_HEAD(&f->f_u.fu_list); 137 INIT_LIST_HEAD(&f->f_u.fu_list);
134 atomic_long_set(&f->f_count, 1); 138 atomic_long_set(&f->f_count, 1);
@@ -144,12 +148,7 @@ over:
144 pr_info("VFS: file-max limit %lu reached\n", get_max_files()); 148 pr_info("VFS: file-max limit %lu reached\n", get_max_files());
145 old_max = get_nr_files(); 149 old_max = get_nr_files();
146 } 150 }
147 goto fail; 151 return ERR_PTR(-ENFILE);
148
149fail_sec:
150 file_free(f);
151fail:
152 return NULL;
153} 152}
154 153
155/** 154/**
@@ -173,8 +172,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
173 struct file *file; 172 struct file *file;
174 173
175 file = get_empty_filp(); 174 file = get_empty_filp();
176 if (!file) 175 if (IS_ERR(file))
177 return NULL; 176 return file;
178 177
179 file->f_path = *path; 178 file->f_path = *path;
180 file->f_mapping = path->dentry->d_inode->i_mapping; 179 file->f_mapping = path->dentry->d_inode->i_mapping;
@@ -447,7 +446,7 @@ void mark_files_ro(struct super_block *sb)
447 446
448 lg_global_lock(&files_lglock); 447 lg_global_lock(&files_lglock);
449 do_file_list_for_each_entry(sb, f) { 448 do_file_list_for_each_entry(sb, f) {
450 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) 449 if (!S_ISREG(file_inode(f)->i_mode))
451 continue; 450 continue;
452 if (!file_count(f)) 451 if (!file_count(f))
453 continue; 452 continue;
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index bd447e88f208..664b07a53870 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -237,7 +237,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
237static int 237static int
238vxfs_readdir(struct file *fp, void *retp, filldir_t filler) 238vxfs_readdir(struct file *fp, void *retp, filldir_t filler)
239{ 239{
240 struct inode *ip = fp->f_path.dentry->d_inode; 240 struct inode *ip = file_inode(fp);
241 struct super_block *sbp = ip->i_sb; 241 struct super_block *sbp = ip->i_sb;
242 u_long bsize = sbp->s_blocksize; 242 u_long bsize = sbp->s_blocksize;
243 u_long page, npages, block, pblocks, nblocks, offset; 243 u_long page, npages, block, pblocks, nblocks, offset;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 310972b72a66..21f46fb3a101 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -318,8 +318,14 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
318 318
319static int write_inode(struct inode *inode, struct writeback_control *wbc) 319static int write_inode(struct inode *inode, struct writeback_control *wbc)
320{ 320{
321 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 321 int ret;
322 return inode->i_sb->s_op->write_inode(inode, wbc); 322
323 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
324 trace_writeback_write_inode_start(inode, wbc);
325 ret = inode->i_sb->s_op->write_inode(inode, wbc);
326 trace_writeback_write_inode(inode, wbc);
327 return ret;
328 }
323 return 0; 329 return 0;
324} 330}
325 331
@@ -450,6 +456,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
450 456
451 WARN_ON(!(inode->i_state & I_SYNC)); 457 WARN_ON(!(inode->i_state & I_SYNC));
452 458
459 trace_writeback_single_inode_start(inode, wbc, nr_to_write);
460
453 ret = do_writepages(mapping, wbc); 461 ret = do_writepages(mapping, wbc);
454 462
455 /* 463 /*
@@ -1150,8 +1158,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1150 * dirty the inode itself 1158 * dirty the inode itself
1151 */ 1159 */
1152 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 1160 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
1161 trace_writeback_dirty_inode_start(inode, flags);
1162
1153 if (sb->s_op->dirty_inode) 1163 if (sb->s_op->dirty_inode)
1154 sb->s_op->dirty_inode(inode, flags); 1164 sb->s_op->dirty_inode(inode, flags);
1165
1166 trace_writeback_dirty_inode(inode, flags);
1155 } 1167 }
1156 1168
1157 /* 1169 /*
@@ -1332,47 +1344,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
1332EXPORT_SYMBOL(writeback_inodes_sb); 1344EXPORT_SYMBOL(writeback_inodes_sb);
1333 1345
1334/** 1346/**
1335 * writeback_inodes_sb_if_idle - start writeback if none underway 1347 * try_to_writeback_inodes_sb_nr - try to start writeback if none underway
1336 * @sb: the superblock 1348 * @sb: the superblock
1337 * @reason: reason why some writeback work was initiated 1349 * @nr: the number of pages to write
1350 * @reason: the reason of writeback
1338 * 1351 *
1339 * Invoke writeback_inodes_sb if no writeback is currently underway. 1352 * Invoke writeback_inodes_sb_nr if no writeback is currently underway.
1340 * Returns 1 if writeback was started, 0 if not. 1353 * Returns 1 if writeback was started, 0 if not.
1341 */ 1354 */
1342int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason) 1355int try_to_writeback_inodes_sb_nr(struct super_block *sb,
1356 unsigned long nr,
1357 enum wb_reason reason)
1343{ 1358{
1344 if (!writeback_in_progress(sb->s_bdi)) { 1359 if (writeback_in_progress(sb->s_bdi))
1345 down_read(&sb->s_umount);
1346 writeback_inodes_sb(sb, reason);
1347 up_read(&sb->s_umount);
1348 return 1; 1360 return 1;
1349 } else 1361
1362 if (!down_read_trylock(&sb->s_umount))
1350 return 0; 1363 return 0;
1364
1365 writeback_inodes_sb_nr(sb, nr, reason);
1366 up_read(&sb->s_umount);
1367 return 1;
1351} 1368}
1352EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1369EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
1353 1370
1354/** 1371/**
1355 * writeback_inodes_sb_nr_if_idle - start writeback if none underway 1372 * try_to_writeback_inodes_sb - try to start writeback if none underway
1356 * @sb: the superblock 1373 * @sb: the superblock
1357 * @nr: the number of pages to write
1358 * @reason: reason why some writeback work was initiated 1374 * @reason: reason why some writeback work was initiated
1359 * 1375 *
1360 * Invoke writeback_inodes_sb if no writeback is currently underway. 1376 * Implement by try_to_writeback_inodes_sb_nr()
1361 * Returns 1 if writeback was started, 0 if not. 1377 * Returns 1 if writeback was started, 0 if not.
1362 */ 1378 */
1363int writeback_inodes_sb_nr_if_idle(struct super_block *sb, 1379int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
1364 unsigned long nr,
1365 enum wb_reason reason)
1366{ 1380{
1367 if (!writeback_in_progress(sb->s_bdi)) { 1381 return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
1368 down_read(&sb->s_umount);
1369 writeback_inodes_sb_nr(sb, nr, reason);
1370 up_read(&sb->s_umount);
1371 return 1;
1372 } else
1373 return 0;
1374} 1382}
1375EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); 1383EXPORT_SYMBOL(try_to_writeback_inodes_sb);
1376 1384
1377/** 1385/**
1378 * sync_inodes_sb - sync sb inode pages 1386 * sync_inodes_sb - sync sb inode pages
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 8dcb114758e3..e2cba1f60c21 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -237,13 +237,12 @@ static int fscache_alloc_object(struct fscache_cache *cache,
237 struct fscache_cookie *cookie) 237 struct fscache_cookie *cookie)
238{ 238{
239 struct fscache_object *object; 239 struct fscache_object *object;
240 struct hlist_node *_n;
241 int ret; 240 int ret;
242 241
243 _enter("%p,%p{%s}", cache, cookie, cookie->def->name); 242 _enter("%p,%p{%s}", cache, cookie, cookie->def->name);
244 243
245 spin_lock(&cookie->lock); 244 spin_lock(&cookie->lock);
246 hlist_for_each_entry(object, _n, &cookie->backing_objects, 245 hlist_for_each_entry(object, &cookie->backing_objects,
247 cookie_link) { 246 cookie_link) {
248 if (object->cache == cache) 247 if (object->cache == cache)
249 goto object_already_extant; 248 goto object_already_extant;
@@ -311,7 +310,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
311{ 310{
312 struct fscache_object *p; 311 struct fscache_object *p;
313 struct fscache_cache *cache = object->cache; 312 struct fscache_cache *cache = object->cache;
314 struct hlist_node *_n;
315 int ret; 313 int ret;
316 314
317 _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); 315 _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
@@ -321,7 +319,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
321 /* there may be multiple initial creations of this object, but we only 319 /* there may be multiple initial creations of this object, but we only
322 * want one */ 320 * want one */
323 ret = -EEXIST; 321 ret = -EEXIST;
324 hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { 322 hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) {
325 if (p->cache == object->cache) { 323 if (p->cache == object->cache) {
326 if (p->state >= FSCACHE_OBJECT_DYING) 324 if (p->state >= FSCACHE_OBJECT_DYING)
327 ret = -ENOBUFS; 325 ret = -ENOBUFS;
@@ -331,7 +329,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
331 329
332 /* pin the parent object */ 330 /* pin the parent object */
333 spin_lock_nested(&cookie->parent->lock, 1); 331 spin_lock_nested(&cookie->parent->lock, 1);
334 hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, 332 hlist_for_each_entry(p, &cookie->parent->backing_objects,
335 cookie_link) { 333 cookie_link) {
336 if (p->cache == object->cache) { 334 if (p->cache == object->cache) {
337 if (p->state >= FSCACHE_OBJECT_DYING) { 335 if (p->state >= FSCACHE_OBJECT_DYING) {
@@ -435,7 +433,6 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate);
435void __fscache_update_cookie(struct fscache_cookie *cookie) 433void __fscache_update_cookie(struct fscache_cookie *cookie)
436{ 434{
437 struct fscache_object *object; 435 struct fscache_object *object;
438 struct hlist_node *_p;
439 436
440 fscache_stat(&fscache_n_updates); 437 fscache_stat(&fscache_n_updates);
441 438
@@ -452,7 +449,7 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
452 spin_lock(&cookie->lock); 449 spin_lock(&cookie->lock);
453 450
454 /* update the index entry on disk in each cache backing this cookie */ 451 /* update the index entry on disk in each cache backing this cookie */
455 hlist_for_each_entry(object, _p, 452 hlist_for_each_entry(object,
456 &cookie->backing_objects, cookie_link) { 453 &cookie->backing_objects, cookie_link) {
457 fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); 454 fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
458 } 455 }
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 0cf160a94eda..1b2f6c2c3aaf 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -4,12 +4,24 @@ config FUSE_FS
4 With FUSE it is possible to implement a fully functional filesystem 4 With FUSE it is possible to implement a fully functional filesystem
5 in a userspace program. 5 in a userspace program.
6 6
7 There's also companion library: libfuse. This library along with 7 There's also a companion library: libfuse2. This library is available
8 utilities is available from the FUSE homepage: 8 from the FUSE homepage:
9 <http://fuse.sourceforge.net/> 9 <http://fuse.sourceforge.net/>
10 although chances are your distribution already has that library
11 installed if you've installed the "fuse" package itself.
10 12
11 See <file:Documentation/filesystems/fuse.txt> for more information. 13 See <file:Documentation/filesystems/fuse.txt> for more information.
12 See <file:Documentation/Changes> for needed library/utility version. 14 See <file:Documentation/Changes> for needed library/utility version.
13 15
14 If you want to develop a userspace FS, or if you want to use 16 If you want to develop a userspace FS, or if you want to use
15 a filesystem based on FUSE, answer Y or M. 17 a filesystem based on FUSE, answer Y or M.
18
19config CUSE
20 tristate "Character device in Userspace support"
21 depends on FUSE_FS
22 help
23 This FUSE extension allows character devices to be
24 implemented in userspace.
25
26 If you want to develop or use a userspace character device
27 based on CUSE, answer Y or M.
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 75a20c092dd4..b7978b9f75ef 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -23,7 +23,7 @@ static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
23{ 23{
24 struct fuse_conn *fc; 24 struct fuse_conn *fc;
25 mutex_lock(&fuse_mutex); 25 mutex_lock(&fuse_mutex);
26 fc = file->f_path.dentry->d_inode->i_private; 26 fc = file_inode(file)->i_private;
27 if (fc) 27 if (fc)
28 fc = fuse_conn_get(fc); 28 fc = fuse_conn_get(fc);
29 mutex_unlock(&fuse_mutex); 29 mutex_unlock(&fuse_mutex);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index ee8d55042298..6f96a8def147 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -45,7 +45,6 @@
45#include <linux/miscdevice.h> 45#include <linux/miscdevice.h>
46#include <linux/mutex.h> 46#include <linux/mutex.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/spinlock.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
51 50
@@ -63,7 +62,7 @@ struct cuse_conn {
63 bool unrestricted_ioctl; 62 bool unrestricted_ioctl;
64}; 63};
65 64
66static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */ 65static DEFINE_MUTEX(cuse_lock); /* protects registration */
67static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN]; 66static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
68static struct class *cuse_class; 67static struct class *cuse_class;
69 68
@@ -92,19 +91,22 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
92 loff_t *ppos) 91 loff_t *ppos)
93{ 92{
94 loff_t pos = 0; 93 loff_t pos = 0;
94 struct iovec iov = { .iov_base = buf, .iov_len = count };
95 95
96 return fuse_direct_io(file, buf, count, &pos, 0); 96 return fuse_direct_io(file, &iov, 1, count, &pos, 0);
97} 97}
98 98
99static ssize_t cuse_write(struct file *file, const char __user *buf, 99static ssize_t cuse_write(struct file *file, const char __user *buf,
100 size_t count, loff_t *ppos) 100 size_t count, loff_t *ppos)
101{ 101{
102 loff_t pos = 0; 102 loff_t pos = 0;
103 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
104
103 /* 105 /*
104 * No locking or generic_write_checks(), the server is 106 * No locking or generic_write_checks(), the server is
105 * responsible for locking and sanity checks. 107 * responsible for locking and sanity checks.
106 */ 108 */
107 return fuse_direct_io(file, buf, count, &pos, 1); 109 return fuse_direct_io(file, &iov, 1, count, &pos, 1);
108} 110}
109 111
110static int cuse_open(struct inode *inode, struct file *file) 112static int cuse_open(struct inode *inode, struct file *file)
@@ -114,14 +116,14 @@ static int cuse_open(struct inode *inode, struct file *file)
114 int rc; 116 int rc;
115 117
116 /* look up and get the connection */ 118 /* look up and get the connection */
117 spin_lock(&cuse_lock); 119 mutex_lock(&cuse_lock);
118 list_for_each_entry(pos, cuse_conntbl_head(devt), list) 120 list_for_each_entry(pos, cuse_conntbl_head(devt), list)
119 if (pos->dev->devt == devt) { 121 if (pos->dev->devt == devt) {
120 fuse_conn_get(&pos->fc); 122 fuse_conn_get(&pos->fc);
121 cc = pos; 123 cc = pos;
122 break; 124 break;
123 } 125 }
124 spin_unlock(&cuse_lock); 126 mutex_unlock(&cuse_lock);
125 127
126 /* dead? */ 128 /* dead? */
127 if (!cc) 129 if (!cc)
@@ -267,7 +269,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
267static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo) 269static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
268{ 270{
269 char *end = p + len; 271 char *end = p + len;
270 char *key, *val; 272 char *uninitialized_var(key), *uninitialized_var(val);
271 int rc; 273 int rc;
272 274
273 while (true) { 275 while (true) {
@@ -305,14 +307,14 @@ static void cuse_gendev_release(struct device *dev)
305 */ 307 */
306static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 308static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
307{ 309{
308 struct cuse_conn *cc = fc_to_cc(fc); 310 struct cuse_conn *cc = fc_to_cc(fc), *pos;
309 struct cuse_init_out *arg = req->out.args[0].value; 311 struct cuse_init_out *arg = req->out.args[0].value;
310 struct page *page = req->pages[0]; 312 struct page *page = req->pages[0];
311 struct cuse_devinfo devinfo = { }; 313 struct cuse_devinfo devinfo = { };
312 struct device *dev; 314 struct device *dev;
313 struct cdev *cdev; 315 struct cdev *cdev;
314 dev_t devt; 316 dev_t devt;
315 int rc; 317 int rc, i;
316 318
317 if (req->out.h.error || 319 if (req->out.h.error ||
318 arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) { 320 arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
@@ -356,15 +358,24 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
356 dev_set_drvdata(dev, cc); 358 dev_set_drvdata(dev, cc);
357 dev_set_name(dev, "%s", devinfo.name); 359 dev_set_name(dev, "%s", devinfo.name);
358 360
361 mutex_lock(&cuse_lock);
362
363 /* make sure the device-name is unique */
364 for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
365 list_for_each_entry(pos, &cuse_conntbl[i], list)
366 if (!strcmp(dev_name(pos->dev), dev_name(dev)))
367 goto err_unlock;
368 }
369
359 rc = device_add(dev); 370 rc = device_add(dev);
360 if (rc) 371 if (rc)
361 goto err_device; 372 goto err_unlock;
362 373
363 /* register cdev */ 374 /* register cdev */
364 rc = -ENOMEM; 375 rc = -ENOMEM;
365 cdev = cdev_alloc(); 376 cdev = cdev_alloc();
366 if (!cdev) 377 if (!cdev)
367 goto err_device; 378 goto err_unlock;
368 379
369 cdev->owner = THIS_MODULE; 380 cdev->owner = THIS_MODULE;
370 cdev->ops = &cuse_frontend_fops; 381 cdev->ops = &cuse_frontend_fops;
@@ -377,9 +388,8 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
377 cc->cdev = cdev; 388 cc->cdev = cdev;
378 389
379 /* make the device available */ 390 /* make the device available */
380 spin_lock(&cuse_lock);
381 list_add(&cc->list, cuse_conntbl_head(devt)); 391 list_add(&cc->list, cuse_conntbl_head(devt));
382 spin_unlock(&cuse_lock); 392 mutex_unlock(&cuse_lock);
383 393
384 /* announce device availability */ 394 /* announce device availability */
385 dev_set_uevent_suppress(dev, 0); 395 dev_set_uevent_suppress(dev, 0);
@@ -391,7 +401,8 @@ out:
391 401
392err_cdev: 402err_cdev:
393 cdev_del(cdev); 403 cdev_del(cdev);
394err_device: 404err_unlock:
405 mutex_unlock(&cuse_lock);
395 put_device(dev); 406 put_device(dev);
396err_region: 407err_region:
397 unregister_chrdev_region(devt, 1); 408 unregister_chrdev_region(devt, 1);
@@ -411,7 +422,7 @@ static int cuse_send_init(struct cuse_conn *cc)
411 422
412 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 423 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
413 424
414 req = fuse_get_req(fc); 425 req = fuse_get_req(fc, 1);
415 if (IS_ERR(req)) { 426 if (IS_ERR(req)) {
416 rc = PTR_ERR(req); 427 rc = PTR_ERR(req);
417 goto err; 428 goto err;
@@ -441,6 +452,7 @@ static int cuse_send_init(struct cuse_conn *cc)
441 req->out.argvar = 1; 452 req->out.argvar = 1;
442 req->out.argpages = 1; 453 req->out.argpages = 1;
443 req->pages[0] = page; 454 req->pages[0] = page;
455 req->page_descs[0].length = req->out.args[1].size;
444 req->num_pages = 1; 456 req->num_pages = 1;
445 req->end = cuse_process_init_reply; 457 req->end = cuse_process_init_reply;
446 fuse_request_send_background(fc, req); 458 fuse_request_send_background(fc, req);
@@ -520,9 +532,9 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
520 int rc; 532 int rc;
521 533
522 /* remove from the conntbl, no more access from this point on */ 534 /* remove from the conntbl, no more access from this point on */
523 spin_lock(&cuse_lock); 535 mutex_lock(&cuse_lock);
524 list_del_init(&cc->list); 536 list_del_init(&cc->list);
525 spin_unlock(&cuse_lock); 537 mutex_unlock(&cuse_lock);
526 538
527 /* remove device */ 539 /* remove device */
528 if (cc->dev) 540 if (cc->dev)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c16335315e5d..e9bdec0b16d9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,34 +34,67 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
34 return file->private_data; 34 return file->private_data;
35} 35}
36 36
37static void fuse_request_init(struct fuse_req *req) 37static void fuse_request_init(struct fuse_req *req, struct page **pages,
38 struct fuse_page_desc *page_descs,
39 unsigned npages)
38{ 40{
39 memset(req, 0, sizeof(*req)); 41 memset(req, 0, sizeof(*req));
42 memset(pages, 0, sizeof(*pages) * npages);
43 memset(page_descs, 0, sizeof(*page_descs) * npages);
40 INIT_LIST_HEAD(&req->list); 44 INIT_LIST_HEAD(&req->list);
41 INIT_LIST_HEAD(&req->intr_entry); 45 INIT_LIST_HEAD(&req->intr_entry);
42 init_waitqueue_head(&req->waitq); 46 init_waitqueue_head(&req->waitq);
43 atomic_set(&req->count, 1); 47 atomic_set(&req->count, 1);
48 req->pages = pages;
49 req->page_descs = page_descs;
50 req->max_pages = npages;
44} 51}
45 52
46struct fuse_req *fuse_request_alloc(void) 53static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
47{ 54{
48 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); 55 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
49 if (req) 56 if (req) {
50 fuse_request_init(req); 57 struct page **pages;
58 struct fuse_page_desc *page_descs;
59
60 if (npages <= FUSE_REQ_INLINE_PAGES) {
61 pages = req->inline_pages;
62 page_descs = req->inline_page_descs;
63 } else {
64 pages = kmalloc(sizeof(struct page *) * npages, flags);
65 page_descs = kmalloc(sizeof(struct fuse_page_desc) *
66 npages, flags);
67 }
68
69 if (!pages || !page_descs) {
70 kfree(pages);
71 kfree(page_descs);
72 kmem_cache_free(fuse_req_cachep, req);
73 return NULL;
74 }
75
76 fuse_request_init(req, pages, page_descs, npages);
77 }
51 return req; 78 return req;
52} 79}
80
81struct fuse_req *fuse_request_alloc(unsigned npages)
82{
83 return __fuse_request_alloc(npages, GFP_KERNEL);
84}
53EXPORT_SYMBOL_GPL(fuse_request_alloc); 85EXPORT_SYMBOL_GPL(fuse_request_alloc);
54 86
55struct fuse_req *fuse_request_alloc_nofs(void) 87struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
56{ 88{
57 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS); 89 return __fuse_request_alloc(npages, GFP_NOFS);
58 if (req)
59 fuse_request_init(req);
60 return req;
61} 90}
62 91
63void fuse_request_free(struct fuse_req *req) 92void fuse_request_free(struct fuse_req *req)
64{ 93{
94 if (req->pages != req->inline_pages) {
95 kfree(req->pages);
96 kfree(req->page_descs);
97 }
65 kmem_cache_free(fuse_req_cachep, req); 98 kmem_cache_free(fuse_req_cachep, req);
66} 99}
67 100
@@ -97,7 +130,7 @@ static void fuse_req_init_context(struct fuse_req *req)
97 req->in.h.pid = current->pid; 130 req->in.h.pid = current->pid;
98} 131}
99 132
100struct fuse_req *fuse_get_req(struct fuse_conn *fc) 133struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
101{ 134{
102 struct fuse_req *req; 135 struct fuse_req *req;
103 sigset_t oldset; 136 sigset_t oldset;
@@ -116,7 +149,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
116 if (!fc->connected) 149 if (!fc->connected)
117 goto out; 150 goto out;
118 151
119 req = fuse_request_alloc(); 152 req = fuse_request_alloc(npages);
120 err = -ENOMEM; 153 err = -ENOMEM;
121 if (!req) 154 if (!req)
122 goto out; 155 goto out;
@@ -165,7 +198,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
165 struct fuse_file *ff = file->private_data; 198 struct fuse_file *ff = file->private_data;
166 199
167 spin_lock(&fc->lock); 200 spin_lock(&fc->lock);
168 fuse_request_init(req); 201 fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
169 BUG_ON(ff->reserved_req); 202 BUG_ON(ff->reserved_req);
170 ff->reserved_req = req; 203 ff->reserved_req = req;
171 wake_up_all(&fc->reserved_req_waitq); 204 wake_up_all(&fc->reserved_req_waitq);
@@ -186,13 +219,14 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
186 * filesystem should not have it's own file open. If deadlock is 219 * filesystem should not have it's own file open. If deadlock is
187 * intentional, it can still be broken by "aborting" the filesystem. 220 * intentional, it can still be broken by "aborting" the filesystem.
188 */ 221 */
189struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) 222struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
223 struct file *file)
190{ 224{
191 struct fuse_req *req; 225 struct fuse_req *req;
192 226
193 atomic_inc(&fc->num_waiting); 227 atomic_inc(&fc->num_waiting);
194 wait_event(fc->blocked_waitq, !fc->blocked); 228 wait_event(fc->blocked_waitq, !fc->blocked);
195 req = fuse_request_alloc(); 229 req = fuse_request_alloc(0);
196 if (!req) 230 if (!req)
197 req = get_reserved_req(fc, file); 231 req = get_reserved_req(fc, file);
198 232
@@ -406,9 +440,8 @@ __acquires(fc->lock)
406 } 440 }
407} 441}
408 442
409void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) 443static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
410{ 444{
411 req->isreply = 1;
412 spin_lock(&fc->lock); 445 spin_lock(&fc->lock);
413 if (!fc->connected) 446 if (!fc->connected)
414 req->out.h.error = -ENOTCONN; 447 req->out.h.error = -ENOTCONN;
@@ -425,6 +458,12 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
425 } 458 }
426 spin_unlock(&fc->lock); 459 spin_unlock(&fc->lock);
427} 460}
461
462void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
463{
464 req->isreply = 1;
465 __fuse_request_send(fc, req);
466}
428EXPORT_SYMBOL_GPL(fuse_request_send); 467EXPORT_SYMBOL_GPL(fuse_request_send);
429 468
430static void fuse_request_send_nowait_locked(struct fuse_conn *fc, 469static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
@@ -491,6 +530,27 @@ void fuse_request_send_background_locked(struct fuse_conn *fc,
491 fuse_request_send_nowait_locked(fc, req); 530 fuse_request_send_nowait_locked(fc, req);
492} 531}
493 532
533void fuse_force_forget(struct file *file, u64 nodeid)
534{
535 struct inode *inode = file->f_path.dentry->d_inode;
536 struct fuse_conn *fc = get_fuse_conn(inode);
537 struct fuse_req *req;
538 struct fuse_forget_in inarg;
539
540 memset(&inarg, 0, sizeof(inarg));
541 inarg.nlookup = 1;
542 req = fuse_get_req_nofail_nopages(fc, file);
543 req->in.h.opcode = FUSE_FORGET;
544 req->in.h.nodeid = nodeid;
545 req->in.numargs = 1;
546 req->in.args[0].size = sizeof(inarg);
547 req->in.args[0].value = &inarg;
548 req->isreply = 0;
549 __fuse_request_send(fc, req);
550 /* ignore errors */
551 fuse_put_request(fc, req);
552}
553
494/* 554/*
495 * Lock the request. Up to the next unlock_request() there mustn't be 555 * Lock the request. Up to the next unlock_request() there mustn't be
496 * anything that could cause a page-fault. If the request was already 556 * anything that could cause a page-fault. If the request was already
@@ -692,8 +752,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
692 struct page *oldpage = *pagep; 752 struct page *oldpage = *pagep;
693 struct page *newpage; 753 struct page *newpage;
694 struct pipe_buffer *buf = cs->pipebufs; 754 struct pipe_buffer *buf = cs->pipebufs;
695 struct address_space *mapping;
696 pgoff_t index;
697 755
698 unlock_request(cs->fc, cs->req); 756 unlock_request(cs->fc, cs->req);
699 fuse_copy_finish(cs); 757 fuse_copy_finish(cs);
@@ -724,9 +782,6 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
724 if (fuse_check_page(newpage) != 0) 782 if (fuse_check_page(newpage) != 0)
725 goto out_fallback_unlock; 783 goto out_fallback_unlock;
726 784
727 mapping = oldpage->mapping;
728 index = oldpage->index;
729
730 /* 785 /*
731 * This is a new and locked page, it shouldn't be mapped or 786 * This is a new and locked page, it shouldn't be mapped or
732 * have any special flags on it 787 * have any special flags on it
@@ -855,11 +910,11 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
855{ 910{
856 unsigned i; 911 unsigned i;
857 struct fuse_req *req = cs->req; 912 struct fuse_req *req = cs->req;
858 unsigned offset = req->page_offset;
859 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
860 913
861 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { 914 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
862 int err; 915 int err;
916 unsigned offset = req->page_descs[i].offset;
917 unsigned count = min(nbytes, req->page_descs[i].length);
863 918
864 err = fuse_copy_page(cs, &req->pages[i], offset, count, 919 err = fuse_copy_page(cs, &req->pages[i], offset, count,
865 zeroing); 920 zeroing);
@@ -867,8 +922,6 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
867 return err; 922 return err;
868 923
869 nbytes -= count; 924 nbytes -= count;
870 count = min(nbytes, (unsigned) PAGE_SIZE);
871 offset = 0;
872 } 925 }
873 return 0; 926 return 0;
874} 927}
@@ -1541,29 +1594,34 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1541 unsigned int num; 1594 unsigned int num;
1542 unsigned int offset; 1595 unsigned int offset;
1543 size_t total_len = 0; 1596 size_t total_len = 0;
1597 int num_pages;
1598
1599 offset = outarg->offset & ~PAGE_CACHE_MASK;
1600 file_size = i_size_read(inode);
1601
1602 num = outarg->size;
1603 if (outarg->offset > file_size)
1604 num = 0;
1605 else if (outarg->offset + num > file_size)
1606 num = file_size - outarg->offset;
1544 1607
1545 req = fuse_get_req(fc); 1608 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609 num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1610
1611 req = fuse_get_req(fc, num_pages);
1546 if (IS_ERR(req)) 1612 if (IS_ERR(req))
1547 return PTR_ERR(req); 1613 return PTR_ERR(req);
1548 1614
1549 offset = outarg->offset & ~PAGE_CACHE_MASK;
1550
1551 req->in.h.opcode = FUSE_NOTIFY_REPLY; 1615 req->in.h.opcode = FUSE_NOTIFY_REPLY;
1552 req->in.h.nodeid = outarg->nodeid; 1616 req->in.h.nodeid = outarg->nodeid;
1553 req->in.numargs = 2; 1617 req->in.numargs = 2;
1554 req->in.argpages = 1; 1618 req->in.argpages = 1;
1555 req->page_offset = offset; 1619 req->page_descs[0].offset = offset;
1556 req->end = fuse_retrieve_end; 1620 req->end = fuse_retrieve_end;
1557 1621
1558 index = outarg->offset >> PAGE_CACHE_SHIFT; 1622 index = outarg->offset >> PAGE_CACHE_SHIFT;
1559 file_size = i_size_read(inode);
1560 num = outarg->size;
1561 if (outarg->offset > file_size)
1562 num = 0;
1563 else if (outarg->offset + num > file_size)
1564 num = file_size - outarg->offset;
1565 1623
1566 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) { 1624 while (num && req->num_pages < num_pages) {
1567 struct page *page; 1625 struct page *page;
1568 unsigned int this_num; 1626 unsigned int this_num;
1569 1627
@@ -1573,6 +1631,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1573 1631
1574 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); 1632 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1575 req->pages[req->num_pages] = page; 1633 req->pages[req->num_pages] = page;
1634 req->page_descs[req->num_pages].length = this_num;
1576 req->num_pages++; 1635 req->num_pages++;
1577 1636
1578 offset = 0; 1637 offset = 0;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b7c09f9eb40c..ff15522481d4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -14,6 +14,29 @@
14#include <linux/namei.h> 14#include <linux/namei.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16 16
17static bool fuse_use_readdirplus(struct inode *dir, struct file *filp)
18{
19 struct fuse_conn *fc = get_fuse_conn(dir);
20 struct fuse_inode *fi = get_fuse_inode(dir);
21
22 if (!fc->do_readdirplus)
23 return false;
24 if (!fc->readdirplus_auto)
25 return true;
26 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
27 return true;
28 if (filp->f_pos == 0)
29 return true;
30 return false;
31}
32
33static void fuse_advise_use_readdirplus(struct inode *dir)
34{
35 struct fuse_inode *fi = get_fuse_inode(dir);
36
37 set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
38}
39
17#if BITS_PER_LONG >= 64 40#if BITS_PER_LONG >= 64
18static inline void fuse_dentry_settime(struct dentry *entry, u64 time) 41static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
19{ 42{
@@ -178,7 +201,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
178 return -ECHILD; 201 return -ECHILD;
179 202
180 fc = get_fuse_conn(inode); 203 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 204 req = fuse_get_req_nopages(fc);
182 if (IS_ERR(req)) 205 if (IS_ERR(req))
183 return 0; 206 return 0;
184 207
@@ -219,6 +242,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
219 attr_version); 242 attr_version);
220 fuse_change_entry_timeout(entry, &outarg); 243 fuse_change_entry_timeout(entry, &outarg);
221 } 244 }
245 fuse_advise_use_readdirplus(inode);
222 return 1; 246 return 1;
223} 247}
224 248
@@ -271,7 +295,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
271 if (name->len > FUSE_NAME_MAX) 295 if (name->len > FUSE_NAME_MAX)
272 goto out; 296 goto out;
273 297
274 req = fuse_get_req(fc); 298 req = fuse_get_req_nopages(fc);
275 err = PTR_ERR(req); 299 err = PTR_ERR(req);
276 if (IS_ERR(req)) 300 if (IS_ERR(req))
277 goto out; 301 goto out;
@@ -355,6 +379,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
355 else 379 else
356 fuse_invalidate_entry_cache(entry); 380 fuse_invalidate_entry_cache(entry);
357 381
382 fuse_advise_use_readdirplus(dir);
358 return newent; 383 return newent;
359 384
360 out_iput: 385 out_iput:
@@ -391,7 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
391 if (!forget) 416 if (!forget)
392 goto out_err; 417 goto out_err;
393 418
394 req = fuse_get_req(fc); 419 req = fuse_get_req_nopages(fc);
395 err = PTR_ERR(req); 420 err = PTR_ERR(req);
396 if (IS_ERR(req)) 421 if (IS_ERR(req))
397 goto out_put_forget_req; 422 goto out_put_forget_req;
@@ -592,7 +617,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
592{ 617{
593 struct fuse_mknod_in inarg; 618 struct fuse_mknod_in inarg;
594 struct fuse_conn *fc = get_fuse_conn(dir); 619 struct fuse_conn *fc = get_fuse_conn(dir);
595 struct fuse_req *req = fuse_get_req(fc); 620 struct fuse_req *req = fuse_get_req_nopages(fc);
596 if (IS_ERR(req)) 621 if (IS_ERR(req))
597 return PTR_ERR(req); 622 return PTR_ERR(req);
598 623
@@ -623,7 +648,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
623{ 648{
624 struct fuse_mkdir_in inarg; 649 struct fuse_mkdir_in inarg;
625 struct fuse_conn *fc = get_fuse_conn(dir); 650 struct fuse_conn *fc = get_fuse_conn(dir);
626 struct fuse_req *req = fuse_get_req(fc); 651 struct fuse_req *req = fuse_get_req_nopages(fc);
627 if (IS_ERR(req)) 652 if (IS_ERR(req))
628 return PTR_ERR(req); 653 return PTR_ERR(req);
629 654
@@ -647,7 +672,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
647{ 672{
648 struct fuse_conn *fc = get_fuse_conn(dir); 673 struct fuse_conn *fc = get_fuse_conn(dir);
649 unsigned len = strlen(link) + 1; 674 unsigned len = strlen(link) + 1;
650 struct fuse_req *req = fuse_get_req(fc); 675 struct fuse_req *req = fuse_get_req_nopages(fc);
651 if (IS_ERR(req)) 676 if (IS_ERR(req))
652 return PTR_ERR(req); 677 return PTR_ERR(req);
653 678
@@ -664,7 +689,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
664{ 689{
665 int err; 690 int err;
666 struct fuse_conn *fc = get_fuse_conn(dir); 691 struct fuse_conn *fc = get_fuse_conn(dir);
667 struct fuse_req *req = fuse_get_req(fc); 692 struct fuse_req *req = fuse_get_req_nopages(fc);
668 if (IS_ERR(req)) 693 if (IS_ERR(req))
669 return PTR_ERR(req); 694 return PTR_ERR(req);
670 695
@@ -682,7 +707,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
682 707
683 spin_lock(&fc->lock); 708 spin_lock(&fc->lock);
684 fi->attr_version = ++fc->attr_version; 709 fi->attr_version = ++fc->attr_version;
685 drop_nlink(inode); 710 /*
711 * If i_nlink == 0 then unlink doesn't make sense, yet this can
712 * happen if userspace filesystem is careless. It would be
713 * difficult to enforce correct nlink usage so just ignore this
714 * condition here
715 */
716 if (inode->i_nlink > 0)
717 drop_nlink(inode);
686 spin_unlock(&fc->lock); 718 spin_unlock(&fc->lock);
687 fuse_invalidate_attr(inode); 719 fuse_invalidate_attr(inode);
688 fuse_invalidate_attr(dir); 720 fuse_invalidate_attr(dir);
@@ -696,7 +728,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
696{ 728{
697 int err; 729 int err;
698 struct fuse_conn *fc = get_fuse_conn(dir); 730 struct fuse_conn *fc = get_fuse_conn(dir);
699 struct fuse_req *req = fuse_get_req(fc); 731 struct fuse_req *req = fuse_get_req_nopages(fc);
700 if (IS_ERR(req)) 732 if (IS_ERR(req))
701 return PTR_ERR(req); 733 return PTR_ERR(req);
702 734
@@ -723,7 +755,7 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
723 int err; 755 int err;
724 struct fuse_rename_in inarg; 756 struct fuse_rename_in inarg;
725 struct fuse_conn *fc = get_fuse_conn(olddir); 757 struct fuse_conn *fc = get_fuse_conn(olddir);
726 struct fuse_req *req = fuse_get_req(fc); 758 struct fuse_req *req = fuse_get_req_nopages(fc);
727 759
728 if (IS_ERR(req)) 760 if (IS_ERR(req))
729 return PTR_ERR(req); 761 return PTR_ERR(req);
@@ -776,7 +808,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
776 struct fuse_link_in inarg; 808 struct fuse_link_in inarg;
777 struct inode *inode = entry->d_inode; 809 struct inode *inode = entry->d_inode;
778 struct fuse_conn *fc = get_fuse_conn(inode); 810 struct fuse_conn *fc = get_fuse_conn(inode);
779 struct fuse_req *req = fuse_get_req(fc); 811 struct fuse_req *req = fuse_get_req_nopages(fc);
780 if (IS_ERR(req)) 812 if (IS_ERR(req))
781 return PTR_ERR(req); 813 return PTR_ERR(req);
782 814
@@ -848,7 +880,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
848 struct fuse_req *req; 880 struct fuse_req *req;
849 u64 attr_version; 881 u64 attr_version;
850 882
851 req = fuse_get_req(fc); 883 req = fuse_get_req_nopages(fc);
852 if (IS_ERR(req)) 884 if (IS_ERR(req))
853 return PTR_ERR(req); 885 return PTR_ERR(req);
854 886
@@ -985,7 +1017,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
985 1017
986/* 1018/*
987 * Calling into a user-controlled filesystem gives the filesystem 1019 * Calling into a user-controlled filesystem gives the filesystem
988 * daemon ptrace-like capabilities over the requester process. This 1020 * daemon ptrace-like capabilities over the current process. This
989 * means, that the filesystem daemon is able to record the exact 1021 * means, that the filesystem daemon is able to record the exact
990 * filesystem operations performed, and can also control the behavior 1022 * filesystem operations performed, and can also control the behavior
991 * of the requester process in otherwise impossible ways. For example 1023 * of the requester process in otherwise impossible ways. For example
@@ -996,27 +1028,23 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
996 * for which the owner of the mount has ptrace privilege. This 1028 * for which the owner of the mount has ptrace privilege. This
997 * excludes processes started by other users, suid or sgid processes. 1029 * excludes processes started by other users, suid or sgid processes.
998 */ 1030 */
999int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) 1031int fuse_allow_current_process(struct fuse_conn *fc)
1000{ 1032{
1001 const struct cred *cred; 1033 const struct cred *cred;
1002 int ret;
1003 1034
1004 if (fc->flags & FUSE_ALLOW_OTHER) 1035 if (fc->flags & FUSE_ALLOW_OTHER)
1005 return 1; 1036 return 1;
1006 1037
1007 rcu_read_lock(); 1038 cred = current_cred();
1008 ret = 0;
1009 cred = __task_cred(task);
1010 if (uid_eq(cred->euid, fc->user_id) && 1039 if (uid_eq(cred->euid, fc->user_id) &&
1011 uid_eq(cred->suid, fc->user_id) && 1040 uid_eq(cred->suid, fc->user_id) &&
1012 uid_eq(cred->uid, fc->user_id) && 1041 uid_eq(cred->uid, fc->user_id) &&
1013 gid_eq(cred->egid, fc->group_id) && 1042 gid_eq(cred->egid, fc->group_id) &&
1014 gid_eq(cred->sgid, fc->group_id) && 1043 gid_eq(cred->sgid, fc->group_id) &&
1015 gid_eq(cred->gid, fc->group_id)) 1044 gid_eq(cred->gid, fc->group_id))
1016 ret = 1; 1045 return 1;
1017 rcu_read_unlock();
1018 1046
1019 return ret; 1047 return 0;
1020} 1048}
1021 1049
1022static int fuse_access(struct inode *inode, int mask) 1050static int fuse_access(struct inode *inode, int mask)
@@ -1029,7 +1057,7 @@ static int fuse_access(struct inode *inode, int mask)
1029 if (fc->no_access) 1057 if (fc->no_access)
1030 return 0; 1058 return 0;
1031 1059
1032 req = fuse_get_req(fc); 1060 req = fuse_get_req_nopages(fc);
1033 if (IS_ERR(req)) 1061 if (IS_ERR(req))
1034 return PTR_ERR(req); 1062 return PTR_ERR(req);
1035 1063
@@ -1077,7 +1105,7 @@ static int fuse_permission(struct inode *inode, int mask)
1077 bool refreshed = false; 1105 bool refreshed = false;
1078 int err = 0; 1106 int err = 0;
1079 1107
1080 if (!fuse_allow_task(fc, current)) 1108 if (!fuse_allow_current_process(fc))
1081 return -EACCES; 1109 return -EACCES;
1082 1110
1083 /* 1111 /*
@@ -1155,19 +1183,157 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
1155 return 0; 1183 return 0;
1156} 1184}
1157 1185
1158static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) 1186static int fuse_direntplus_link(struct file *file,
1187 struct fuse_direntplus *direntplus,
1188 u64 attr_version)
1159{ 1189{
1160 int err; 1190 int err;
1191 struct fuse_entry_out *o = &direntplus->entry_out;
1192 struct fuse_dirent *dirent = &direntplus->dirent;
1193 struct dentry *parent = file->f_path.dentry;
1194 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
1195 struct dentry *dentry;
1196 struct dentry *alias;
1197 struct inode *dir = parent->d_inode;
1198 struct fuse_conn *fc;
1199 struct inode *inode;
1200
1201 if (!o->nodeid) {
1202 /*
1203 * Unlike in the case of fuse_lookup, zero nodeid does not mean
1204 * ENOENT. Instead, it only means the userspace filesystem did
1205 * not want to return attributes/handle for this entry.
1206 *
1207 * So do nothing.
1208 */
1209 return 0;
1210 }
1211
1212 if (name.name[0] == '.') {
1213 /*
1214 * We could potentially refresh the attributes of the directory
1215 * and its parent?
1216 */
1217 if (name.len == 1)
1218 return 0;
1219 if (name.name[1] == '.' && name.len == 2)
1220 return 0;
1221 }
1222 fc = get_fuse_conn(dir);
1223
1224 name.hash = full_name_hash(name.name, name.len);
1225 dentry = d_lookup(parent, &name);
1226 if (dentry && dentry->d_inode) {
1227 inode = dentry->d_inode;
1228 if (get_node_id(inode) == o->nodeid) {
1229 struct fuse_inode *fi;
1230 fi = get_fuse_inode(inode);
1231 spin_lock(&fc->lock);
1232 fi->nlookup++;
1233 spin_unlock(&fc->lock);
1234
1235 /*
1236 * The other branch to 'found' comes via fuse_iget()
1237 * which bumps nlookup inside
1238 */
1239 goto found;
1240 }
1241 err = d_invalidate(dentry);
1242 if (err)
1243 goto out;
1244 dput(dentry);
1245 dentry = NULL;
1246 }
1247
1248 dentry = d_alloc(parent, &name);
1249 err = -ENOMEM;
1250 if (!dentry)
1251 goto out;
1252
1253 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
1254 &o->attr, entry_attr_timeout(o), attr_version);
1255 if (!inode)
1256 goto out;
1257
1258 alias = d_materialise_unique(dentry, inode);
1259 err = PTR_ERR(alias);
1260 if (IS_ERR(alias))
1261 goto out;
1262 if (alias) {
1263 dput(dentry);
1264 dentry = alias;
1265 }
1266
1267found:
1268 fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
1269 attr_version);
1270
1271 fuse_change_entry_timeout(dentry, o);
1272
1273 err = 0;
1274out:
1275 if (dentry)
1276 dput(dentry);
1277 return err;
1278}
1279
1280static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
1281 void *dstbuf, filldir_t filldir, u64 attr_version)
1282{
1283 struct fuse_direntplus *direntplus;
1284 struct fuse_dirent *dirent;
1285 size_t reclen;
1286 int over = 0;
1287 int ret;
1288
1289 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
1290 direntplus = (struct fuse_direntplus *) buf;
1291 dirent = &direntplus->dirent;
1292 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
1293
1294 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
1295 return -EIO;
1296 if (reclen > nbytes)
1297 break;
1298
1299 if (!over) {
1300 /* We fill entries into dstbuf only as much as
1301 it can hold. But we still continue iterating
1302 over remaining entries to link them. If not,
1303 we need to send a FORGET for each of those
1304 which we did not link.
1305 */
1306 over = filldir(dstbuf, dirent->name, dirent->namelen,
1307 file->f_pos, dirent->ino,
1308 dirent->type);
1309 file->f_pos = dirent->off;
1310 }
1311
1312 buf += reclen;
1313 nbytes -= reclen;
1314
1315 ret = fuse_direntplus_link(file, direntplus, attr_version);
1316 if (ret)
1317 fuse_force_forget(file, direntplus->entry_out.nodeid);
1318 }
1319
1320 return 0;
1321}
1322
1323static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1324{
1325 int plus, err;
1161 size_t nbytes; 1326 size_t nbytes;
1162 struct page *page; 1327 struct page *page;
1163 struct inode *inode = file->f_path.dentry->d_inode; 1328 struct inode *inode = file_inode(file);
1164 struct fuse_conn *fc = get_fuse_conn(inode); 1329 struct fuse_conn *fc = get_fuse_conn(inode);
1165 struct fuse_req *req; 1330 struct fuse_req *req;
1331 u64 attr_version = 0;
1166 1332
1167 if (is_bad_inode(inode)) 1333 if (is_bad_inode(inode))
1168 return -EIO; 1334 return -EIO;
1169 1335
1170 req = fuse_get_req(fc); 1336 req = fuse_get_req(fc, 1);
1171 if (IS_ERR(req)) 1337 if (IS_ERR(req))
1172 return PTR_ERR(req); 1338 return PTR_ERR(req);
1173 1339
@@ -1176,17 +1342,34 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
1176 fuse_put_request(fc, req); 1342 fuse_put_request(fc, req);
1177 return -ENOMEM; 1343 return -ENOMEM;
1178 } 1344 }
1345
1346 plus = fuse_use_readdirplus(inode, file);
1179 req->out.argpages = 1; 1347 req->out.argpages = 1;
1180 req->num_pages = 1; 1348 req->num_pages = 1;
1181 req->pages[0] = page; 1349 req->pages[0] = page;
1182 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); 1350 req->page_descs[0].length = PAGE_SIZE;
1351 if (plus) {
1352 attr_version = fuse_get_attr_version(fc);
1353 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1354 FUSE_READDIRPLUS);
1355 } else {
1356 fuse_read_fill(req, file, file->f_pos, PAGE_SIZE,
1357 FUSE_READDIR);
1358 }
1183 fuse_request_send(fc, req); 1359 fuse_request_send(fc, req);
1184 nbytes = req->out.args[0].size; 1360 nbytes = req->out.args[0].size;
1185 err = req->out.h.error; 1361 err = req->out.h.error;
1186 fuse_put_request(fc, req); 1362 fuse_put_request(fc, req);
1187 if (!err) 1363 if (!err) {
1188 err = parse_dirfile(page_address(page), nbytes, file, dstbuf, 1364 if (plus) {
1189 filldir); 1365 err = parse_dirplusfile(page_address(page), nbytes,
1366 file, dstbuf, filldir,
1367 attr_version);
1368 } else {
1369 err = parse_dirfile(page_address(page), nbytes, file,
1370 dstbuf, filldir);
1371 }
1372 }
1190 1373
1191 __free_page(page); 1374 __free_page(page);
1192 fuse_invalidate_attr(inode); /* atime changed */ 1375 fuse_invalidate_attr(inode); /* atime changed */
@@ -1197,7 +1380,7 @@ static char *read_link(struct dentry *dentry)
1197{ 1380{
1198 struct inode *inode = dentry->d_inode; 1381 struct inode *inode = dentry->d_inode;
1199 struct fuse_conn *fc = get_fuse_conn(inode); 1382 struct fuse_conn *fc = get_fuse_conn(inode);
1200 struct fuse_req *req = fuse_get_req(fc); 1383 struct fuse_req *req = fuse_get_req_nopages(fc);
1201 char *link; 1384 char *link;
1202 1385
1203 if (IS_ERR(req)) 1386 if (IS_ERR(req))
@@ -1391,7 +1574,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1391 loff_t oldsize; 1574 loff_t oldsize;
1392 int err; 1575 int err;
1393 1576
1394 if (!fuse_allow_task(fc, current)) 1577 if (!fuse_allow_current_process(fc))
1395 return -EACCES; 1578 return -EACCES;
1396 1579
1397 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 1580 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
@@ -1410,7 +1593,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1410 if (attr->ia_valid & ATTR_SIZE) 1593 if (attr->ia_valid & ATTR_SIZE)
1411 is_truncate = true; 1594 is_truncate = true;
1412 1595
1413 req = fuse_get_req(fc); 1596 req = fuse_get_req_nopages(fc);
1414 if (IS_ERR(req)) 1597 if (IS_ERR(req))
1415 return PTR_ERR(req); 1598 return PTR_ERR(req);
1416 1599
@@ -1500,7 +1683,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
1500 struct inode *inode = entry->d_inode; 1683 struct inode *inode = entry->d_inode;
1501 struct fuse_conn *fc = get_fuse_conn(inode); 1684 struct fuse_conn *fc = get_fuse_conn(inode);
1502 1685
1503 if (!fuse_allow_task(fc, current)) 1686 if (!fuse_allow_current_process(fc))
1504 return -EACCES; 1687 return -EACCES;
1505 1688
1506 return fuse_update_attributes(inode, stat, NULL, NULL); 1689 return fuse_update_attributes(inode, stat, NULL, NULL);
@@ -1518,7 +1701,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1518 if (fc->no_setxattr) 1701 if (fc->no_setxattr)
1519 return -EOPNOTSUPP; 1702 return -EOPNOTSUPP;
1520 1703
1521 req = fuse_get_req(fc); 1704 req = fuse_get_req_nopages(fc);
1522 if (IS_ERR(req)) 1705 if (IS_ERR(req))
1523 return PTR_ERR(req); 1706 return PTR_ERR(req);
1524 1707
@@ -1557,7 +1740,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
1557 if (fc->no_getxattr) 1740 if (fc->no_getxattr)
1558 return -EOPNOTSUPP; 1741 return -EOPNOTSUPP;
1559 1742
1560 req = fuse_get_req(fc); 1743 req = fuse_get_req_nopages(fc);
1561 if (IS_ERR(req)) 1744 if (IS_ERR(req))
1562 return PTR_ERR(req); 1745 return PTR_ERR(req);
1563 1746
@@ -1603,13 +1786,13 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
1603 struct fuse_getxattr_out outarg; 1786 struct fuse_getxattr_out outarg;
1604 ssize_t ret; 1787 ssize_t ret;
1605 1788
1606 if (!fuse_allow_task(fc, current)) 1789 if (!fuse_allow_current_process(fc))
1607 return -EACCES; 1790 return -EACCES;
1608 1791
1609 if (fc->no_listxattr) 1792 if (fc->no_listxattr)
1610 return -EOPNOTSUPP; 1793 return -EOPNOTSUPP;
1611 1794
1612 req = fuse_get_req(fc); 1795 req = fuse_get_req_nopages(fc);
1613 if (IS_ERR(req)) 1796 if (IS_ERR(req))
1614 return PTR_ERR(req); 1797 return PTR_ERR(req);
1615 1798
@@ -1654,7 +1837,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1654 if (fc->no_removexattr) 1837 if (fc->no_removexattr)
1655 return -EOPNOTSUPP; 1838 return -EOPNOTSUPP;
1656 1839
1657 req = fuse_get_req(fc); 1840 req = fuse_get_req_nopages(fc);
1658 if (IS_ERR(req)) 1841 if (IS_ERR(req))
1659 return PTR_ERR(req); 1842 return PTR_ERR(req);
1660 1843
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e21d4d8f87e3..c8071768b950 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -25,7 +25,7 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
25 struct fuse_req *req; 25 struct fuse_req *req;
26 int err; 26 int err;
27 27
28 req = fuse_get_req(fc); 28 req = fuse_get_req_nopages(fc);
29 if (IS_ERR(req)) 29 if (IS_ERR(req))
30 return PTR_ERR(req); 30 return PTR_ERR(req);
31 31
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
57 return NULL; 57 return NULL;
58 58
59 ff->fc = fc; 59 ff->fc = fc;
60 ff->reserved_req = fuse_request_alloc(); 60 ff->reserved_req = fuse_request_alloc(0);
61 if (unlikely(!ff->reserved_req)) { 61 if (unlikely(!ff->reserved_req)) {
62 kfree(ff); 62 kfree(ff);
63 return NULL; 63 return NULL;
@@ -368,7 +368,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
368 if (fc->no_flush) 368 if (fc->no_flush)
369 return 0; 369 return 0;
370 370
371 req = fuse_get_req_nofail(fc, file); 371 req = fuse_get_req_nofail_nopages(fc, file);
372 memset(&inarg, 0, sizeof(inarg)); 372 memset(&inarg, 0, sizeof(inarg));
373 inarg.fh = ff->fh; 373 inarg.fh = ff->fh;
374 inarg.lock_owner = fuse_lock_owner_id(fc, id); 374 inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -436,7 +436,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
436 436
437 fuse_sync_writes(inode); 437 fuse_sync_writes(inode);
438 438
439 req = fuse_get_req(fc); 439 req = fuse_get_req_nopages(fc);
440 if (IS_ERR(req)) { 440 if (IS_ERR(req)) {
441 err = PTR_ERR(req); 441 err = PTR_ERR(req);
442 goto out; 442 goto out;
@@ -544,7 +544,7 @@ static int fuse_readpage(struct file *file, struct page *page)
544 */ 544 */
545 fuse_wait_on_page_writeback(inode, page->index); 545 fuse_wait_on_page_writeback(inode, page->index);
546 546
547 req = fuse_get_req(fc); 547 req = fuse_get_req(fc, 1);
548 err = PTR_ERR(req); 548 err = PTR_ERR(req);
549 if (IS_ERR(req)) 549 if (IS_ERR(req))
550 goto out; 550 goto out;
@@ -555,6 +555,7 @@ static int fuse_readpage(struct file *file, struct page *page)
555 req->out.argpages = 1; 555 req->out.argpages = 1;
556 req->num_pages = 1; 556 req->num_pages = 1;
557 req->pages[0] = page; 557 req->pages[0] = page;
558 req->page_descs[0].length = count;
558 num_read = fuse_send_read(req, file, pos, count, NULL); 559 num_read = fuse_send_read(req, file, pos, count, NULL);
559 err = req->out.h.error; 560 err = req->out.h.error;
560 fuse_put_request(fc, req); 561 fuse_put_request(fc, req);
@@ -641,6 +642,7 @@ struct fuse_fill_data {
641 struct fuse_req *req; 642 struct fuse_req *req;
642 struct file *file; 643 struct file *file;
643 struct inode *inode; 644 struct inode *inode;
645 unsigned nr_pages;
644}; 646};
645 647
646static int fuse_readpages_fill(void *_data, struct page *page) 648static int fuse_readpages_fill(void *_data, struct page *page)
@@ -656,16 +658,26 @@ static int fuse_readpages_fill(void *_data, struct page *page)
656 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 658 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
657 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 659 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
658 req->pages[req->num_pages - 1]->index + 1 != page->index)) { 660 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
661 int nr_alloc = min_t(unsigned, data->nr_pages,
662 FUSE_MAX_PAGES_PER_REQ);
659 fuse_send_readpages(req, data->file); 663 fuse_send_readpages(req, data->file);
660 data->req = req = fuse_get_req(fc); 664 data->req = req = fuse_get_req(fc, nr_alloc);
661 if (IS_ERR(req)) { 665 if (IS_ERR(req)) {
662 unlock_page(page); 666 unlock_page(page);
663 return PTR_ERR(req); 667 return PTR_ERR(req);
664 } 668 }
665 } 669 }
670
671 if (WARN_ON(req->num_pages >= req->max_pages)) {
672 fuse_put_request(fc, req);
673 return -EIO;
674 }
675
666 page_cache_get(page); 676 page_cache_get(page);
667 req->pages[req->num_pages] = page; 677 req->pages[req->num_pages] = page;
678 req->page_descs[req->num_pages].length = PAGE_SIZE;
668 req->num_pages++; 679 req->num_pages++;
680 data->nr_pages--;
669 return 0; 681 return 0;
670} 682}
671 683
@@ -676,6 +688,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
676 struct fuse_conn *fc = get_fuse_conn(inode); 688 struct fuse_conn *fc = get_fuse_conn(inode);
677 struct fuse_fill_data data; 689 struct fuse_fill_data data;
678 int err; 690 int err;
691 int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
679 692
680 err = -EIO; 693 err = -EIO;
681 if (is_bad_inode(inode)) 694 if (is_bad_inode(inode))
@@ -683,7 +696,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
683 696
684 data.file = file; 697 data.file = file;
685 data.inode = inode; 698 data.inode = inode;
686 data.req = fuse_get_req(fc); 699 data.req = fuse_get_req(fc, nr_alloc);
700 data.nr_pages = nr_pages;
687 err = PTR_ERR(data.req); 701 err = PTR_ERR(data.req);
688 if (IS_ERR(data.req)) 702 if (IS_ERR(data.req))
689 goto out; 703 goto out;
@@ -786,7 +800,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
786 800
787 res = fuse_send_write(req, file, pos, count, NULL); 801 res = fuse_send_write(req, file, pos, count, NULL);
788 802
789 offset = req->page_offset; 803 offset = req->page_descs[0].offset;
790 count = res; 804 count = res;
791 for (i = 0; i < req->num_pages; i++) { 805 for (i = 0; i < req->num_pages; i++) {
792 struct page *page = req->pages[i]; 806 struct page *page = req->pages[i];
@@ -817,7 +831,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
817 int err; 831 int err;
818 832
819 req->in.argpages = 1; 833 req->in.argpages = 1;
820 req->page_offset = offset; 834 req->page_descs[0].offset = offset;
821 835
822 do { 836 do {
823 size_t tmp; 837 size_t tmp;
@@ -857,6 +871,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
857 871
858 err = 0; 872 err = 0;
859 req->pages[req->num_pages] = page; 873 req->pages[req->num_pages] = page;
874 req->page_descs[req->num_pages].length = tmp;
860 req->num_pages++; 875 req->num_pages++;
861 876
862 iov_iter_advance(ii, tmp); 877 iov_iter_advance(ii, tmp);
@@ -869,11 +884,19 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
869 if (!fc->big_writes) 884 if (!fc->big_writes)
870 break; 885 break;
871 } while (iov_iter_count(ii) && count < fc->max_write && 886 } while (iov_iter_count(ii) && count < fc->max_write &&
872 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); 887 req->num_pages < req->max_pages && offset == 0);
873 888
874 return count > 0 ? count : err; 889 return count > 0 ? count : err;
875} 890}
876 891
892static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
893{
894 return min_t(unsigned,
895 ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
896 (pos >> PAGE_CACHE_SHIFT) + 1,
897 FUSE_MAX_PAGES_PER_REQ);
898}
899
877static ssize_t fuse_perform_write(struct file *file, 900static ssize_t fuse_perform_write(struct file *file,
878 struct address_space *mapping, 901 struct address_space *mapping,
879 struct iov_iter *ii, loff_t pos) 902 struct iov_iter *ii, loff_t pos)
@@ -889,8 +912,9 @@ static ssize_t fuse_perform_write(struct file *file,
889 do { 912 do {
890 struct fuse_req *req; 913 struct fuse_req *req;
891 ssize_t count; 914 ssize_t count;
915 unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
892 916
893 req = fuse_get_req(fc); 917 req = fuse_get_req(fc, nr_pages);
894 if (IS_ERR(req)) { 918 if (IS_ERR(req)) {
895 err = PTR_ERR(req); 919 err = PTR_ERR(req);
896 break; 920 break;
@@ -1023,47 +1047,110 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
1023 } 1047 }
1024} 1048}
1025 1049
1026static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, 1050static inline void fuse_page_descs_length_init(struct fuse_req *req,
1051 unsigned index, unsigned nr_pages)
1052{
1053 int i;
1054
1055 for (i = index; i < index + nr_pages; i++)
1056 req->page_descs[i].length = PAGE_SIZE -
1057 req->page_descs[i].offset;
1058}
1059
1060static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
1061{
1062 return (unsigned long)ii->iov->iov_base + ii->iov_offset;
1063}
1064
1065static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
1066 size_t max_size)
1067{
1068 return min(iov_iter_single_seg_count(ii), max_size);
1069}
1070
1071static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
1027 size_t *nbytesp, int write) 1072 size_t *nbytesp, int write)
1028{ 1073{
1029 size_t nbytes = *nbytesp; 1074 size_t nbytes = 0; /* # bytes already packed in req */
1030 unsigned long user_addr = (unsigned long) buf;
1031 unsigned offset = user_addr & ~PAGE_MASK;
1032 int npages;
1033 1075
1034 /* Special case for kernel I/O: can copy directly into the buffer */ 1076 /* Special case for kernel I/O: can copy directly into the buffer */
1035 if (segment_eq(get_fs(), KERNEL_DS)) { 1077 if (segment_eq(get_fs(), KERNEL_DS)) {
1078 unsigned long user_addr = fuse_get_user_addr(ii);
1079 size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
1080
1036 if (write) 1081 if (write)
1037 req->in.args[1].value = (void *) user_addr; 1082 req->in.args[1].value = (void *) user_addr;
1038 else 1083 else
1039 req->out.args[0].value = (void *) user_addr; 1084 req->out.args[0].value = (void *) user_addr;
1040 1085
1086 iov_iter_advance(ii, frag_size);
1087 *nbytesp = frag_size;
1041 return 0; 1088 return 0;
1042 } 1089 }
1043 1090
1044 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 1091 while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
1045 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 1092 unsigned npages;
1046 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); 1093 unsigned long user_addr = fuse_get_user_addr(ii);
1047 npages = get_user_pages_fast(user_addr, npages, !write, req->pages); 1094 unsigned offset = user_addr & ~PAGE_MASK;
1048 if (npages < 0) 1095 size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes);
1049 return npages; 1096 int ret;
1097
1098 unsigned n = req->max_pages - req->num_pages;
1099 frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT);
1100
1101 npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1102 npages = clamp(npages, 1U, n);
1103
1104 ret = get_user_pages_fast(user_addr, npages, !write,
1105 &req->pages[req->num_pages]);
1106 if (ret < 0)
1107 return ret;
1050 1108
1051 req->num_pages = npages; 1109 npages = ret;
1052 req->page_offset = offset; 1110 frag_size = min_t(size_t, frag_size,
1111 (npages << PAGE_SHIFT) - offset);
1112 iov_iter_advance(ii, frag_size);
1113
1114 req->page_descs[req->num_pages].offset = offset;
1115 fuse_page_descs_length_init(req, req->num_pages, npages);
1116
1117 req->num_pages += npages;
1118 req->page_descs[req->num_pages - 1].length -=
1119 (npages << PAGE_SHIFT) - offset - frag_size;
1120
1121 nbytes += frag_size;
1122 }
1053 1123
1054 if (write) 1124 if (write)
1055 req->in.argpages = 1; 1125 req->in.argpages = 1;
1056 else 1126 else
1057 req->out.argpages = 1; 1127 req->out.argpages = 1;
1058 1128
1059 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 1129 *nbytesp = nbytes;
1060 *nbytesp = min(*nbytesp, nbytes);
1061 1130
1062 return 0; 1131 return 0;
1063} 1132}
1064 1133
1065ssize_t fuse_direct_io(struct file *file, const char __user *buf, 1134static inline int fuse_iter_npages(const struct iov_iter *ii_p)
1066 size_t count, loff_t *ppos, int write) 1135{
1136 struct iov_iter ii = *ii_p;
1137 int npages = 0;
1138
1139 while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
1140 unsigned long user_addr = fuse_get_user_addr(&ii);
1141 unsigned offset = user_addr & ~PAGE_MASK;
1142 size_t frag_size = iov_iter_single_seg_count(&ii);
1143
1144 npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1145 iov_iter_advance(&ii, frag_size);
1146 }
1147
1148 return min(npages, FUSE_MAX_PAGES_PER_REQ);
1149}
1150
1151ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
1152 unsigned long nr_segs, size_t count, loff_t *ppos,
1153 int write)
1067{ 1154{
1068 struct fuse_file *ff = file->private_data; 1155 struct fuse_file *ff = file->private_data;
1069 struct fuse_conn *fc = ff->fc; 1156 struct fuse_conn *fc = ff->fc;
@@ -1071,8 +1158,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1071 loff_t pos = *ppos; 1158 loff_t pos = *ppos;
1072 ssize_t res = 0; 1159 ssize_t res = 0;
1073 struct fuse_req *req; 1160 struct fuse_req *req;
1161 struct iov_iter ii;
1162
1163 iov_iter_init(&ii, iov, nr_segs, count, 0);
1074 1164
1075 req = fuse_get_req(fc); 1165 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1076 if (IS_ERR(req)) 1166 if (IS_ERR(req))
1077 return PTR_ERR(req); 1167 return PTR_ERR(req);
1078 1168
@@ -1080,7 +1170,7 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1080 size_t nres; 1170 size_t nres;
1081 fl_owner_t owner = current->files; 1171 fl_owner_t owner = current->files;
1082 size_t nbytes = min(count, nmax); 1172 size_t nbytes = min(count, nmax);
1083 int err = fuse_get_user_pages(req, buf, &nbytes, write); 1173 int err = fuse_get_user_pages(req, &ii, &nbytes, write);
1084 if (err) { 1174 if (err) {
1085 res = err; 1175 res = err;
1086 break; 1176 break;
@@ -1103,12 +1193,11 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1103 count -= nres; 1193 count -= nres;
1104 res += nres; 1194 res += nres;
1105 pos += nres; 1195 pos += nres;
1106 buf += nres;
1107 if (nres != nbytes) 1196 if (nres != nbytes)
1108 break; 1197 break;
1109 if (count) { 1198 if (count) {
1110 fuse_put_request(fc, req); 1199 fuse_put_request(fc, req);
1111 req = fuse_get_req(fc); 1200 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1112 if (IS_ERR(req)) 1201 if (IS_ERR(req))
1113 break; 1202 break;
1114 } 1203 }
@@ -1122,8 +1211,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1122} 1211}
1123EXPORT_SYMBOL_GPL(fuse_direct_io); 1212EXPORT_SYMBOL_GPL(fuse_direct_io);
1124 1213
1125static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1214static ssize_t __fuse_direct_read(struct file *file, const struct iovec *iov,
1126 size_t count, loff_t *ppos) 1215 unsigned long nr_segs, loff_t *ppos)
1127{ 1216{
1128 ssize_t res; 1217 ssize_t res;
1129 struct inode *inode = file->f_path.dentry->d_inode; 1218 struct inode *inode = file->f_path.dentry->d_inode;
@@ -1131,22 +1220,31 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1131 if (is_bad_inode(inode)) 1220 if (is_bad_inode(inode))
1132 return -EIO; 1221 return -EIO;
1133 1222
1134 res = fuse_direct_io(file, buf, count, ppos, 0); 1223 res = fuse_direct_io(file, iov, nr_segs, iov_length(iov, nr_segs),
1224 ppos, 0);
1135 1225
1136 fuse_invalidate_attr(inode); 1226 fuse_invalidate_attr(inode);
1137 1227
1138 return res; 1228 return res;
1139} 1229}
1140 1230
1141static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, 1231static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1142 size_t count, loff_t *ppos) 1232 size_t count, loff_t *ppos)
1233{
1234 struct iovec iov = { .iov_base = buf, .iov_len = count };
1235 return __fuse_direct_read(file, &iov, 1, ppos);
1236}
1237
1238static ssize_t __fuse_direct_write(struct file *file, const struct iovec *iov,
1239 unsigned long nr_segs, loff_t *ppos)
1143{ 1240{
1144 struct inode *inode = file->f_path.dentry->d_inode; 1241 struct inode *inode = file->f_path.dentry->d_inode;
1242 size_t count = iov_length(iov, nr_segs);
1145 ssize_t res; 1243 ssize_t res;
1146 1244
1147 res = generic_write_checks(file, ppos, &count, 0); 1245 res = generic_write_checks(file, ppos, &count, 0);
1148 if (!res) { 1246 if (!res) {
1149 res = fuse_direct_io(file, buf, count, ppos, 1); 1247 res = fuse_direct_io(file, iov, nr_segs, count, ppos, 1);
1150 if (res > 0) 1248 if (res > 0)
1151 fuse_write_update_size(inode, *ppos); 1249 fuse_write_update_size(inode, *ppos);
1152 } 1250 }
@@ -1159,6 +1257,7 @@ static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
1159static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1257static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1160 size_t count, loff_t *ppos) 1258 size_t count, loff_t *ppos)
1161{ 1259{
1260 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
1162 struct inode *inode = file->f_path.dentry->d_inode; 1261 struct inode *inode = file->f_path.dentry->d_inode;
1163 ssize_t res; 1262 ssize_t res;
1164 1263
@@ -1167,7 +1266,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1167 1266
1168 /* Don't allow parallel writes to the same file */ 1267 /* Don't allow parallel writes to the same file */
1169 mutex_lock(&inode->i_mutex); 1268 mutex_lock(&inode->i_mutex);
1170 res = __fuse_direct_write(file, buf, count, ppos); 1269 res = __fuse_direct_write(file, &iov, 1, ppos);
1171 mutex_unlock(&inode->i_mutex); 1270 mutex_unlock(&inode->i_mutex);
1172 1271
1173 return res; 1272 return res;
@@ -1272,7 +1371,7 @@ static int fuse_writepage_locked(struct page *page)
1272 1371
1273 set_page_writeback(page); 1372 set_page_writeback(page);
1274 1373
1275 req = fuse_request_alloc_nofs(); 1374 req = fuse_request_alloc_nofs(1);
1276 if (!req) 1375 if (!req)
1277 goto err; 1376 goto err;
1278 1377
@@ -1293,7 +1392,8 @@ static int fuse_writepage_locked(struct page *page)
1293 req->in.argpages = 1; 1392 req->in.argpages = 1;
1294 req->num_pages = 1; 1393 req->num_pages = 1;
1295 req->pages[0] = tmp_page; 1394 req->pages[0] = tmp_page;
1296 req->page_offset = 0; 1395 req->page_descs[0].offset = 0;
1396 req->page_descs[0].length = PAGE_SIZE;
1297 req->end = fuse_writepage_end; 1397 req->end = fuse_writepage_end;
1298 req->inode = inode; 1398 req->inode = inode;
1299 1399
@@ -1471,7 +1571,7 @@ static int fuse_getlk(struct file *file, struct file_lock *fl)
1471 struct fuse_lk_out outarg; 1571 struct fuse_lk_out outarg;
1472 int err; 1572 int err;
1473 1573
1474 req = fuse_get_req(fc); 1574 req = fuse_get_req_nopages(fc);
1475 if (IS_ERR(req)) 1575 if (IS_ERR(req))
1476 return PTR_ERR(req); 1576 return PTR_ERR(req);
1477 1577
@@ -1506,7 +1606,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1506 if (fl->fl_flags & FL_CLOSE) 1606 if (fl->fl_flags & FL_CLOSE)
1507 return 0; 1607 return 0;
1508 1608
1509 req = fuse_get_req(fc); 1609 req = fuse_get_req_nopages(fc);
1510 if (IS_ERR(req)) 1610 if (IS_ERR(req))
1511 return PTR_ERR(req); 1611 return PTR_ERR(req);
1512 1612
@@ -1575,7 +1675,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
1575 if (!inode->i_sb->s_bdev || fc->no_bmap) 1675 if (!inode->i_sb->s_bdev || fc->no_bmap)
1576 return 0; 1676 return 0;
1577 1677
1578 req = fuse_get_req(fc); 1678 req = fuse_get_req_nopages(fc);
1579 if (IS_ERR(req)) 1679 if (IS_ERR(req))
1580 return 0; 1680 return 0;
1581 1681
@@ -1873,7 +1973,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1873 num_pages++; 1973 num_pages++;
1874 } 1974 }
1875 1975
1876 req = fuse_get_req(fc); 1976 req = fuse_get_req(fc, num_pages);
1877 if (IS_ERR(req)) { 1977 if (IS_ERR(req)) {
1878 err = PTR_ERR(req); 1978 err = PTR_ERR(req);
1879 req = NULL; 1979 req = NULL;
@@ -1881,6 +1981,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1881 } 1981 }
1882 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages); 1982 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1883 req->num_pages = num_pages; 1983 req->num_pages = num_pages;
1984 fuse_page_descs_length_init(req, 0, req->num_pages);
1884 1985
1885 /* okay, let's send it to the client */ 1986 /* okay, let's send it to the client */
1886 req->in.h.opcode = FUSE_IOCTL; 1987 req->in.h.opcode = FUSE_IOCTL;
@@ -1981,7 +2082,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
1981 struct inode *inode = file->f_dentry->d_inode; 2082 struct inode *inode = file->f_dentry->d_inode;
1982 struct fuse_conn *fc = get_fuse_conn(inode); 2083 struct fuse_conn *fc = get_fuse_conn(inode);
1983 2084
1984 if (!fuse_allow_task(fc, current)) 2085 if (!fuse_allow_current_process(fc))
1985 return -EACCES; 2086 return -EACCES;
1986 2087
1987 if (is_bad_inode(inode)) 2088 if (is_bad_inode(inode))
@@ -2066,6 +2167,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2066 return DEFAULT_POLLMASK; 2167 return DEFAULT_POLLMASK;
2067 2168
2068 poll_wait(file, &ff->poll_wait, wait); 2169 poll_wait(file, &ff->poll_wait, wait);
2170 inarg.events = (__u32)poll_requested_events(wait);
2069 2171
2070 /* 2172 /*
2071 * Ask for notification iff there's someone waiting for it. 2173 * Ask for notification iff there's someone waiting for it.
@@ -2076,7 +2178,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
2076 fuse_register_polled_file(fc, ff); 2178 fuse_register_polled_file(fc, ff);
2077 } 2179 }
2078 2180
2079 req = fuse_get_req(fc); 2181 req = fuse_get_req_nopages(fc);
2080 if (IS_ERR(req)) 2182 if (IS_ERR(req))
2081 return POLLERR; 2183 return POLLERR;
2082 2184
@@ -2126,41 +2228,6 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2126 return 0; 2228 return 0;
2127} 2229}
2128 2230
2129static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
2130 unsigned long nr_segs, loff_t *ppos, int rw)
2131{
2132 const struct iovec *vector = iov;
2133 ssize_t ret = 0;
2134
2135 while (nr_segs > 0) {
2136 void __user *base;
2137 size_t len;
2138 ssize_t nr;
2139
2140 base = vector->iov_base;
2141 len = vector->iov_len;
2142 vector++;
2143 nr_segs--;
2144
2145 if (rw == WRITE)
2146 nr = __fuse_direct_write(filp, base, len, ppos);
2147 else
2148 nr = fuse_direct_read(filp, base, len, ppos);
2149
2150 if (nr < 0) {
2151 if (!ret)
2152 ret = nr;
2153 break;
2154 }
2155 ret += nr;
2156 if (nr != len)
2157 break;
2158 }
2159
2160 return ret;
2161}
2162
2163
2164static ssize_t 2231static ssize_t
2165fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2232fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2166 loff_t offset, unsigned long nr_segs) 2233 loff_t offset, unsigned long nr_segs)
@@ -2172,13 +2239,16 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2172 file = iocb->ki_filp; 2239 file = iocb->ki_filp;
2173 pos = offset; 2240 pos = offset;
2174 2241
2175 ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); 2242 if (rw == WRITE)
2243 ret = __fuse_direct_write(file, iov, nr_segs, &pos);
2244 else
2245 ret = __fuse_direct_read(file, iov, nr_segs, &pos);
2176 2246
2177 return ret; 2247 return ret;
2178} 2248}
2179 2249
2180long fuse_file_fallocate(struct file *file, int mode, loff_t offset, 2250static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2181 loff_t length) 2251 loff_t length)
2182{ 2252{
2183 struct fuse_file *ff = file->private_data; 2253 struct fuse_file *ff = file->private_data;
2184 struct fuse_conn *fc = ff->fc; 2254 struct fuse_conn *fc = ff->fc;
@@ -2194,7 +2264,7 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2194 if (fc->no_fallocate) 2264 if (fc->no_fallocate)
2195 return -EOPNOTSUPP; 2265 return -EOPNOTSUPP;
2196 2266
2197 req = fuse_get_req(fc); 2267 req = fuse_get_req_nopages(fc);
2198 if (IS_ERR(req)) 2268 if (IS_ERR(req))
2199 return PTR_ERR(req); 2269 return PTR_ERR(req);
2200 2270
@@ -2213,7 +2283,6 @@ long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2213 2283
2214 return err; 2284 return err;
2215} 2285}
2216EXPORT_SYMBOL_GPL(fuse_file_fallocate);
2217 2286
2218static const struct file_operations fuse_file_operations = { 2287static const struct file_operations fuse_file_operations = {
2219 .llseek = fuse_file_llseek, 2288 .llseek = fuse_file_llseek,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e105a53fc72d..6aeba864f070 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,9 @@
44 doing the mount will be allowed to access the filesystem */ 44 doing the mount will be allowed to access the filesystem */
45#define FUSE_ALLOW_OTHER (1 << 1) 45#define FUSE_ALLOW_OTHER (1 << 1)
46 46
47/** Number of page pointers embedded in fuse_req */
48#define FUSE_REQ_INLINE_PAGES 1
49
47/** List of active connections */ 50/** List of active connections */
48extern struct list_head fuse_conn_list; 51extern struct list_head fuse_conn_list;
49 52
@@ -103,6 +106,15 @@ struct fuse_inode {
103 106
104 /** List of writepage requestst (pending or sent) */ 107 /** List of writepage requestst (pending or sent) */
105 struct list_head writepages; 108 struct list_head writepages;
109
110 /** Miscellaneous bits describing inode state */
111 unsigned long state;
112};
113
114/** FUSE inode state bits */
115enum {
116 /** Advise readdirplus */
117 FUSE_I_ADVISE_RDPLUS,
106}; 118};
107 119
108struct fuse_conn; 120struct fuse_conn;
@@ -200,6 +212,12 @@ struct fuse_out {
200 struct fuse_arg args[3]; 212 struct fuse_arg args[3];
201}; 213};
202 214
215/** FUSE page descriptor */
216struct fuse_page_desc {
217 unsigned int length;
218 unsigned int offset;
219};
220
203/** The request state */ 221/** The request state */
204enum fuse_req_state { 222enum fuse_req_state {
205 FUSE_REQ_INIT = 0, 223 FUSE_REQ_INIT = 0,
@@ -291,14 +309,23 @@ struct fuse_req {
291 } misc; 309 } misc;
292 310
293 /** page vector */ 311 /** page vector */
294 struct page *pages[FUSE_MAX_PAGES_PER_REQ]; 312 struct page **pages;
313
314 /** page-descriptor vector */
315 struct fuse_page_desc *page_descs;
316
317 /** size of the 'pages' array */
318 unsigned max_pages;
319
320 /** inline page vector */
321 struct page *inline_pages[FUSE_REQ_INLINE_PAGES];
322
323 /** inline page-descriptor vector */
324 struct fuse_page_desc inline_page_descs[FUSE_REQ_INLINE_PAGES];
295 325
296 /** number of pages in vector */ 326 /** number of pages in vector */
297 unsigned num_pages; 327 unsigned num_pages;
298 328
299 /** offset of data on first page */
300 unsigned page_offset;
301
302 /** File used in the request (or NULL) */ 329 /** File used in the request (or NULL) */
303 struct fuse_file *ff; 330 struct fuse_file *ff;
304 331
@@ -487,6 +514,12 @@ struct fuse_conn {
487 /** Use enhanced/automatic page cache invalidation. */ 514 /** Use enhanced/automatic page cache invalidation. */
488 unsigned auto_inval_data:1; 515 unsigned auto_inval_data:1;
489 516
517 /** Does the filesystem support readdirplus? */
518 unsigned do_readdirplus:1;
519
520 /** Does the filesystem want adaptive readdirplus? */
521 unsigned readdirplus_auto:1;
522
490 /** The number of requests waiting for completion */ 523 /** The number of requests waiting for completion */
491 atomic_t num_waiting; 524 atomic_t num_waiting;
492 525
@@ -578,6 +611,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
578 611
579struct fuse_forget_link *fuse_alloc_forget(void); 612struct fuse_forget_link *fuse_alloc_forget(void);
580 613
614/* Used by READDIRPLUS */
615void fuse_force_forget(struct file *file, u64 nodeid);
616
581/** 617/**
582 * Initialize READ or READDIR request 618 * Initialize READ or READDIR request
583 */ 619 */
@@ -658,9 +694,9 @@ void fuse_ctl_cleanup(void);
658/** 694/**
659 * Allocate a request 695 * Allocate a request
660 */ 696 */
661struct fuse_req *fuse_request_alloc(void); 697struct fuse_req *fuse_request_alloc(unsigned npages);
662 698
663struct fuse_req *fuse_request_alloc_nofs(void); 699struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
664 700
665/** 701/**
666 * Free a request 702 * Free a request
@@ -668,14 +704,25 @@ struct fuse_req *fuse_request_alloc_nofs(void);
668void fuse_request_free(struct fuse_req *req); 704void fuse_request_free(struct fuse_req *req);
669 705
670/** 706/**
671 * Get a request, may fail with -ENOMEM 707 * Get a request, may fail with -ENOMEM,
708 * caller should specify # elements in req->pages[] explicitly
672 */ 709 */
673struct fuse_req *fuse_get_req(struct fuse_conn *fc); 710struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages);
711
712/**
713 * Get a request, may fail with -ENOMEM,
714 * useful for callers who doesn't use req->pages[]
715 */
716static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
717{
718 return fuse_get_req(fc, 0);
719}
674 720
675/** 721/**
676 * Gets a requests for a file operation, always succeeds 722 * Gets a requests for a file operation, always succeeds
677 */ 723 */
678struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); 724struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
725 struct file *file);
679 726
680/** 727/**
681 * Decrement reference count of a request. If count goes to zero free 728 * Decrement reference count of a request. If count goes to zero free
@@ -739,9 +786,9 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc);
739int fuse_valid_type(int m); 786int fuse_valid_type(int m);
740 787
741/** 788/**
742 * Is task allowed to perform filesystem operation? 789 * Is current process allowed to perform filesystem operation?
743 */ 790 */
744int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); 791int fuse_allow_current_process(struct fuse_conn *fc);
745 792
746u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); 793u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
747 794
@@ -776,8 +823,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
776 823
777int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 824int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
778 bool isdir); 825 bool isdir);
779ssize_t fuse_direct_io(struct file *file, const char __user *buf, 826ssize_t fuse_direct_io(struct file *file, const struct iovec *iov,
780 size_t count, loff_t *ppos, int write); 827 unsigned long nr_segs, size_t count, loff_t *ppos,
828 int write);
781long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 829long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
782 unsigned int flags); 830 unsigned int flags);
783long fuse_ioctl_common(struct file *file, unsigned int cmd, 831long fuse_ioctl_common(struct file *file, unsigned int cmd,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 73ca6b72beaf..df00993ed108 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -92,6 +92,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
92 fi->attr_version = 0; 92 fi->attr_version = 0;
93 fi->writectr = 0; 93 fi->writectr = 0;
94 fi->orig_ino = 0; 94 fi->orig_ino = 0;
95 fi->state = 0;
95 INIT_LIST_HEAD(&fi->write_files); 96 INIT_LIST_HEAD(&fi->write_files);
96 INIT_LIST_HEAD(&fi->queued_writes); 97 INIT_LIST_HEAD(&fi->queued_writes);
97 INIT_LIST_HEAD(&fi->writepages); 98 INIT_LIST_HEAD(&fi->writepages);
@@ -408,12 +409,12 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
408 struct fuse_statfs_out outarg; 409 struct fuse_statfs_out outarg;
409 int err; 410 int err;
410 411
411 if (!fuse_allow_task(fc, current)) { 412 if (!fuse_allow_current_process(fc)) {
412 buf->f_type = FUSE_SUPER_MAGIC; 413 buf->f_type = FUSE_SUPER_MAGIC;
413 return 0; 414 return 0;
414 } 415 }
415 416
416 req = fuse_get_req(fc); 417 req = fuse_get_req_nopages(fc);
417 if (IS_ERR(req)) 418 if (IS_ERR(req))
418 return PTR_ERR(req); 419 return PTR_ERR(req);
419 420
@@ -678,7 +679,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
678 679
679 if (*max_len < len) { 680 if (*max_len < len) {
680 *max_len = len; 681 *max_len = len;
681 return 255; 682 return FILEID_INVALID;
682 } 683 }
683 684
684 nodeid = get_fuse_inode(inode)->nodeid; 685 nodeid = get_fuse_inode(inode)->nodeid;
@@ -863,6 +864,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
863 fc->dont_mask = 1; 864 fc->dont_mask = 1;
864 if (arg->flags & FUSE_AUTO_INVAL_DATA) 865 if (arg->flags & FUSE_AUTO_INVAL_DATA)
865 fc->auto_inval_data = 1; 866 fc->auto_inval_data = 1;
867 if (arg->flags & FUSE_DO_READDIRPLUS)
868 fc->do_readdirplus = 1;
869 if (arg->flags & FUSE_READDIRPLUS_AUTO)
870 fc->readdirplus_auto = 1;
866 } else { 871 } else {
867 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 872 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
868 fc->no_lock = 1; 873 fc->no_lock = 1;
@@ -889,7 +894,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
889 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 894 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
890 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 895 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
891 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 896 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
892 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA; 897 FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
898 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
893 req->in.h.opcode = FUSE_INIT; 899 req->in.h.opcode = FUSE_INIT;
894 req->in.numargs = 1; 900 req->in.numargs = 1;
895 req->in.args[0].size = sizeof(*arg); 901 req->in.args[0].size = sizeof(*arg);
@@ -1034,12 +1040,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
1034 /* only now - we want root dentry with NULL ->d_op */ 1040 /* only now - we want root dentry with NULL ->d_op */
1035 sb->s_d_op = &fuse_dentry_operations; 1041 sb->s_d_op = &fuse_dentry_operations;
1036 1042
1037 init_req = fuse_request_alloc(); 1043 init_req = fuse_request_alloc(0);
1038 if (!init_req) 1044 if (!init_req)
1039 goto err_put_root; 1045 goto err_put_root;
1040 1046
1041 if (is_bdev) { 1047 if (is_bdev) {
1042 fc->destroy_req = fuse_request_alloc(); 1048 fc->destroy_req = fuse_request_alloc(0);
1043 if (!fc->destroy_req) 1049 if (!fc->destroy_req)
1044 goto err_free_init_req; 1050 goto err_free_init_req;
1045 } 1051 }
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index f850020ad906..f69ac0af5496 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -237,7 +237,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
237 return -EINVAL; 237 return -EINVAL;
238 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 238 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
239 return value ? -EACCES : 0; 239 return value ? -EACCES : 0;
240 if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) 240 if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_FOWNER))
241 return -EPERM; 241 return -EPERM;
242 if (S_ISLNK(inode->i_mode)) 242 if (S_ISLNK(inode->i_mode))
243 return -EOPNOTSUPP; 243 return -EOPNOTSUPP;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 30de4f2a2ea9..24f414f0ce61 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -51,7 +51,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
51 continue; 51 continue;
52 if (gfs2_is_jdata(ip)) 52 if (gfs2_is_jdata(ip))
53 set_buffer_uptodate(bh); 53 set_buffer_uptodate(bh);
54 gfs2_trans_add_bh(ip->i_gl, bh, 0); 54 gfs2_trans_add_data(ip->i_gl, bh);
55 } 55 }
56} 56}
57 57
@@ -230,16 +230,14 @@ out_ignore:
230} 230}
231 231
232/** 232/**
233 * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk 233 * gfs2_writepages - Write a bunch of dirty pages back to disk
234 * @mapping: The mapping to write 234 * @mapping: The mapping to write
235 * @wbc: Write-back control 235 * @wbc: Write-back control
236 * 236 *
237 * For the data=writeback case we can already ignore buffer heads 237 * Used for both ordered and writeback modes.
238 * and write whole extents at once. This is a big reduction in the
239 * number of I/O requests we send and the bmap calls we make in this case.
240 */ 238 */
241static int gfs2_writeback_writepages(struct address_space *mapping, 239static int gfs2_writepages(struct address_space *mapping,
242 struct writeback_control *wbc) 240 struct writeback_control *wbc)
243{ 241{
244 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); 242 return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
245} 243}
@@ -852,7 +850,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
852 goto failed; 850 goto failed;
853 } 851 }
854 852
855 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 853 gfs2_trans_add_meta(ip->i_gl, dibh);
856 854
857 if (gfs2_is_stuffed(ip)) 855 if (gfs2_is_stuffed(ip))
858 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); 856 return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
@@ -1102,7 +1100,7 @@ cannot_release:
1102 1100
1103static const struct address_space_operations gfs2_writeback_aops = { 1101static const struct address_space_operations gfs2_writeback_aops = {
1104 .writepage = gfs2_writeback_writepage, 1102 .writepage = gfs2_writeback_writepage,
1105 .writepages = gfs2_writeback_writepages, 1103 .writepages = gfs2_writepages,
1106 .readpage = gfs2_readpage, 1104 .readpage = gfs2_readpage,
1107 .readpages = gfs2_readpages, 1105 .readpages = gfs2_readpages,
1108 .write_begin = gfs2_write_begin, 1106 .write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1118 1116
1119static const struct address_space_operations gfs2_ordered_aops = { 1117static const struct address_space_operations gfs2_ordered_aops = {
1120 .writepage = gfs2_ordered_writepage, 1118 .writepage = gfs2_ordered_writepage,
1119 .writepages = gfs2_writepages,
1121 .readpage = gfs2_readpage, 1120 .readpage = gfs2_readpage,
1122 .readpages = gfs2_readpages, 1121 .readpages = gfs2_readpages,
1123 .write_begin = gfs2_write_begin, 1122 .write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a68e91bcef3d..5e83657f046e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
22#include "meta_io.h" 22#include "meta_io.h"
23#include "quota.h" 23#include "quota.h"
24#include "rgrp.h" 24#include "rgrp.h"
25#include "log.h"
25#include "super.h" 26#include "super.h"
26#include "trans.h" 27#include "trans.h"
27#include "dir.h" 28#include "dir.h"
@@ -93,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
93 if (!gfs2_is_jdata(ip)) 94 if (!gfs2_is_jdata(ip))
94 mark_buffer_dirty(bh); 95 mark_buffer_dirty(bh);
95 if (!gfs2_is_writeback(ip)) 96 if (!gfs2_is_writeback(ip))
96 gfs2_trans_add_bh(ip->i_gl, bh, 0); 97 gfs2_trans_add_data(ip->i_gl, bh);
97 98
98 if (release) { 99 if (release) {
99 unlock_page(page); 100 unlock_page(page);
@@ -153,7 +154,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
153 154
154 /* Set up the pointer to the new block */ 155 /* Set up the pointer to the new block */
155 156
156 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 157 gfs2_trans_add_meta(ip->i_gl, dibh);
157 di = (struct gfs2_dinode *)dibh->b_data; 158 di = (struct gfs2_dinode *)dibh->b_data;
158 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 159 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
159 160
@@ -405,7 +406,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp,
405 BUG_ON(i < 1); 406 BUG_ON(i < 1);
406 BUG_ON(mp->mp_bh[i] != NULL); 407 BUG_ON(mp->mp_bh[i] != NULL);
407 mp->mp_bh[i] = gfs2_meta_new(gl, bn); 408 mp->mp_bh[i] = gfs2_meta_new(gl, bn);
408 gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); 409 gfs2_trans_add_meta(gl, mp->mp_bh[i]);
409 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); 410 gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
410 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); 411 gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
411 ptr += offset; 412 ptr += offset;
@@ -468,7 +469,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
468 BUG_ON(sheight < 1); 469 BUG_ON(sheight < 1);
469 BUG_ON(dibh == NULL); 470 BUG_ON(dibh == NULL);
470 471
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 472 gfs2_trans_add_meta(ip->i_gl, dibh);
472 473
473 if (height == sheight) { 474 if (height == sheight) {
474 struct buffer_head *bh; 475 struct buffer_head *bh;
@@ -544,7 +545,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
544 /* Branching from existing tree */ 545 /* Branching from existing tree */
545 case ALLOC_GROW_DEPTH: 546 case ALLOC_GROW_DEPTH:
546 if (i > 1 && i < height) 547 if (i > 1 && i < height)
547 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); 548 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
548 for (; i < height && n > 0; i++, n--) 549 for (; i < height && n > 0; i++, n--)
549 gfs2_indirect_init(mp, ip->i_gl, i, 550 gfs2_indirect_init(mp, ip->i_gl, i,
550 mp->mp_list[i-1], bn++); 551 mp->mp_list[i-1], bn++);
@@ -556,7 +557,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
556 case ALLOC_DATA: 557 case ALLOC_DATA:
557 BUG_ON(n > dblks); 558 BUG_ON(n > dblks);
558 BUG_ON(mp->mp_bh[end_of_metadata] == NULL); 559 BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
559 gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); 560 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
560 dblks = n; 561 dblks = n;
561 ptr = metapointer(end_of_metadata, mp); 562 ptr = metapointer(end_of_metadata, mp);
562 dblock = bn; 563 dblock = bn;
@@ -796,8 +797,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
796 797
797 down_write(&ip->i_rw_mutex); 798 down_write(&ip->i_rw_mutex);
798 799
799 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 800 gfs2_trans_add_meta(ip->i_gl, dibh);
800 gfs2_trans_add_bh(ip->i_gl, bh, 1); 801 gfs2_trans_add_meta(ip->i_gl, bh);
801 802
802 bstart = 0; 803 bstart = 0;
803 blen = 0; 804 blen = 0;
@@ -981,7 +982,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
981 } 982 }
982 983
983 if (!gfs2_is_writeback(ip)) 984 if (!gfs2_is_writeback(ip))
984 gfs2_trans_add_bh(ip->i_gl, bh, 0); 985 gfs2_trans_add_data(ip->i_gl, bh);
985 986
986 zero_user(page, offset, length); 987 zero_user(page, offset, length);
987 mark_buffer_dirty(bh); 988 mark_buffer_dirty(bh);
@@ -1046,7 +1047,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
1046 if (error) 1047 if (error)
1047 goto out; 1048 goto out;
1048 1049
1049 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1050 gfs2_trans_add_meta(ip->i_gl, dibh);
1050 1051
1051 if (gfs2_is_stuffed(ip)) { 1052 if (gfs2_is_stuffed(ip)) {
1052 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); 1053 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
@@ -1098,7 +1099,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1098 if (error) 1099 if (error)
1099 return error; 1100 return error;
1100 1101
1101 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1102 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1102 if (error) 1103 if (error)
1103 return error; 1104 return error;
1104 1105
@@ -1137,11 +1138,12 @@ static int trunc_end(struct gfs2_inode *ip)
1137 ip->i_height = 0; 1138 ip->i_height = 0;
1138 ip->i_goal = ip->i_no_addr; 1139 ip->i_goal = ip->i_no_addr;
1139 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 1140 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1141 gfs2_ordered_del_inode(ip);
1140 } 1142 }
1141 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1143 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1142 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; 1144 ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1143 1145
1144 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1146 gfs2_trans_add_meta(ip->i_gl, dibh);
1145 gfs2_dinode_out(ip, dibh->b_data); 1147 gfs2_dinode_out(ip, dibh->b_data);
1146 brelse(dibh); 1148 brelse(dibh);
1147 1149
@@ -1246,7 +1248,7 @@ static int do_grow(struct inode *inode, u64 size)
1246 1248
1247 i_size_write(inode, size); 1249 i_size_write(inode, size);
1248 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1250 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1249 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1251 gfs2_trans_add_meta(ip->i_gl, dibh);
1250 gfs2_dinode_out(ip, dibh->b_data); 1252 gfs2_dinode_out(ip, dibh->b_data);
1251 brelse(dibh); 1253 brelse(dibh);
1252 1254
@@ -1286,6 +1288,10 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1286 1288
1287 inode_dio_wait(inode); 1289 inode_dio_wait(inode);
1288 1290
1291 ret = gfs2_rs_alloc(GFS2_I(inode));
1292 if (ret)
1293 return ret;
1294
1289 oldsize = inode->i_size; 1295 oldsize = inode->i_size;
1290 if (newsize >= oldsize) 1296 if (newsize >= oldsize)
1291 return do_grow(inode, newsize); 1297 return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 9a35670fdc38..c3e82bd23179 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -93,7 +93,7 @@ int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
93 struct buffer_head *bh; 93 struct buffer_head *bh;
94 94
95 bh = gfs2_meta_new(ip->i_gl, block); 95 bh = gfs2_meta_new(ip->i_gl, block);
96 gfs2_trans_add_bh(ip->i_gl, bh, 1); 96 gfs2_trans_add_meta(ip->i_gl, bh);
97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); 97 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 98 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99 *bhp = bh; 99 *bhp = bh;
@@ -127,7 +127,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
127 if (error) 127 if (error)
128 return error; 128 return error;
129 129
130 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 130 gfs2_trans_add_meta(ip->i_gl, dibh);
131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); 131 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132 if (ip->i_inode.i_size < offset + size) 132 if (ip->i_inode.i_size < offset + size)
133 i_size_write(&ip->i_inode, offset + size); 133 i_size_write(&ip->i_inode, offset + size);
@@ -209,7 +209,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
209 if (error) 209 if (error)
210 goto fail; 210 goto fail;
211 211
212 gfs2_trans_add_bh(ip->i_gl, bh, 1); 212 gfs2_trans_add_meta(ip->i_gl, bh);
213 memcpy(bh->b_data + o, buf, amount); 213 memcpy(bh->b_data + o, buf, amount);
214 brelse(bh); 214 brelse(bh);
215 215
@@ -231,7 +231,7 @@ out:
231 i_size_write(&ip->i_inode, offset + copied); 231 i_size_write(&ip->i_inode, offset + copied);
232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 232 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
233 233
234 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 234 gfs2_trans_add_meta(ip->i_gl, dibh);
235 gfs2_dinode_out(ip, dibh->b_data); 235 gfs2_dinode_out(ip, dibh->b_data);
236 brelse(dibh); 236 brelse(dibh);
237 237
@@ -647,7 +647,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
647 return; 647 return;
648 } 648 }
649 649
650 gfs2_trans_add_bh(dip->i_gl, bh, 1); 650 gfs2_trans_add_meta(dip->i_gl, bh);
651 651
652 /* If there is no prev entry, this is the first entry in the block. 652 /* If there is no prev entry, this is the first entry in the block.
653 The de_rec_len is already as big as it needs to be. Just zero 653 The de_rec_len is already as big as it needs to be. Just zero
@@ -690,7 +690,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); 690 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
691 totlen = be16_to_cpu(dent->de_rec_len); 691 totlen = be16_to_cpu(dent->de_rec_len);
692 BUG_ON(offset + name->len > totlen); 692 BUG_ON(offset + name->len > totlen);
693 gfs2_trans_add_bh(ip->i_gl, bh, 1); 693 gfs2_trans_add_meta(ip->i_gl, bh);
694 ndent = (struct gfs2_dirent *)((char *)dent + offset); 694 ndent = (struct gfs2_dirent *)((char *)dent + offset);
695 dent->de_rec_len = cpu_to_be16(offset); 695 dent->de_rec_len = cpu_to_be16(offset);
696 gfs2_qstr2dirent(name, totlen - offset, ndent); 696 gfs2_qstr2dirent(name, totlen - offset, ndent);
@@ -831,7 +831,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
831 return NULL; 831 return NULL;
832 832
833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); 833 gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
834 gfs2_trans_add_bh(ip->i_gl, bh, 1); 834 gfs2_trans_add_meta(ip->i_gl, bh);
835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); 835 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
836 leaf = (struct gfs2_leaf *)bh->b_data; 836 leaf = (struct gfs2_leaf *)bh->b_data;
837 leaf->lf_depth = cpu_to_be16(depth); 837 leaf->lf_depth = cpu_to_be16(depth);
@@ -916,7 +916,7 @@ static int dir_make_exhash(struct inode *inode)
916 /* We're done with the new leaf block, now setup the new 916 /* We're done with the new leaf block, now setup the new
917 hash table. */ 917 hash table. */
918 918
919 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 919 gfs2_trans_add_meta(dip->i_gl, dibh);
920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 920 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
921 921
922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); 922 lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
976 return 1; /* can't split */ 976 return 1; /* can't split */
977 } 977 }
978 978
979 gfs2_trans_add_bh(dip->i_gl, obh, 1); 979 gfs2_trans_add_meta(dip->i_gl, obh);
980 980
981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); 981 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
982 if (!nleaf) { 982 if (!nleaf) {
@@ -1069,7 +1069,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1069 1069
1070 error = gfs2_meta_inode_buffer(dip, &dibh); 1070 error = gfs2_meta_inode_buffer(dip, &dibh);
1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { 1071 if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1072 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1072 gfs2_trans_add_meta(dip->i_gl, dibh);
1073 gfs2_add_inode_blocks(&dip->i_inode, 1); 1073 gfs2_add_inode_blocks(&dip->i_inode, 1);
1074 gfs2_dinode_out(dip, dibh->b_data); 1074 gfs2_dinode_out(dip, dibh->b_data);
1075 brelse(dibh); 1075 brelse(dibh);
@@ -1622,7 +1622,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1622 return error; 1622 return error;
1623 } while(1); 1623 } while(1);
1624 1624
1625 gfs2_trans_add_bh(ip->i_gl, obh, 1); 1625 gfs2_trans_add_meta(ip->i_gl, obh);
1626 1626
1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); 1627 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1628 if (!leaf) { 1628 if (!leaf) {
@@ -1636,7 +1636,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1636 error = gfs2_meta_inode_buffer(ip, &bh); 1636 error = gfs2_meta_inode_buffer(ip, &bh);
1637 if (error) 1637 if (error)
1638 return error; 1638 return error;
1639 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1639 gfs2_trans_add_meta(ip->i_gl, bh);
1640 gfs2_add_inode_blocks(&ip->i_inode, 1); 1640 gfs2_add_inode_blocks(&ip->i_inode, 1);
1641 gfs2_dinode_out(ip, bh->b_data); 1641 gfs2_dinode_out(ip, bh->b_data);
1642 brelse(bh); 1642 brelse(bh);
@@ -1795,7 +1795,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1795 if (IS_ERR(dent)) 1795 if (IS_ERR(dent))
1796 return PTR_ERR(dent); 1796 return PTR_ERR(dent);
1797 1797
1798 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1798 gfs2_trans_add_meta(dip->i_gl, bh);
1799 gfs2_inum_out(nip, dent); 1799 gfs2_inum_out(nip, dent);
1800 dent->de_type = cpu_to_be16(new_type); 1800 dent->de_type = cpu_to_be16(new_type);
1801 1801
@@ -1804,7 +1804,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1804 error = gfs2_meta_inode_buffer(dip, &bh); 1804 error = gfs2_meta_inode_buffer(dip, &bh);
1805 if (error) 1805 if (error)
1806 return error; 1806 return error;
1807 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1807 gfs2_trans_add_meta(dip->i_gl, bh);
1808 } 1808 }
1809 1809
1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1810 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
@@ -1849,7 +1849,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1849 if (!ht) 1849 if (!ht)
1850 return -ENOMEM; 1850 return -ENOMEM;
1851 1851
1852 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1852 error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1853 if (error) 1853 if (error)
1854 goto out; 1854 goto out;
1855 1855
@@ -1917,7 +1917,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1917 if (error) 1917 if (error)
1918 goto out_end_trans; 1918 goto out_end_trans;
1919 1919
1920 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1920 gfs2_trans_add_meta(dip->i_gl, dibh);
1921 /* On the last dealloc, make this a regular file in case we crash. 1921 /* On the last dealloc, make this a regular file in case we crash.
1922 (We don't want to free these blocks a second time.) */ 1922 (We don't want to free these blocks a second time.) */
1923 if (last_dealloc) 1923 if (last_dealloc)
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 4767774a5f3e..9973df4ff565 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -37,10 +37,10 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len,
37 37
38 if (parent && (*len < GFS2_LARGE_FH_SIZE)) { 38 if (parent && (*len < GFS2_LARGE_FH_SIZE)) {
39 *len = GFS2_LARGE_FH_SIZE; 39 *len = GFS2_LARGE_FH_SIZE;
40 return 255; 40 return FILEID_INVALID;
41 } else if (*len < GFS2_SMALL_FH_SIZE) { 41 } else if (*len < GFS2_SMALL_FH_SIZE) {
42 *len = GFS2_SMALL_FH_SIZE; 42 *len = GFS2_SMALL_FH_SIZE;
43 return 255; 43 return FILEID_INVALID;
44 } 44 }
45 45
46 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 46 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 991ab2d484dd..019f45e45097 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -157,7 +157,7 @@ static const u32 gfs2_to_fsflags[32] = {
157 157
158static int gfs2_get_flags(struct file *filp, u32 __user *ptr) 158static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
159{ 159{
160 struct inode *inode = filp->f_path.dentry->d_inode; 160 struct inode *inode = file_inode(filp);
161 struct gfs2_inode *ip = GFS2_I(inode); 161 struct gfs2_inode *ip = GFS2_I(inode);
162 struct gfs2_holder gh; 162 struct gfs2_holder gh;
163 int error; 163 int error;
@@ -217,7 +217,7 @@ void gfs2_set_inode_flags(struct inode *inode)
217 */ 217 */
218static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) 218static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
219{ 219{
220 struct inode *inode = filp->f_path.dentry->d_inode; 220 struct inode *inode = file_inode(filp);
221 struct gfs2_inode *ip = GFS2_I(inode); 221 struct gfs2_inode *ip = GFS2_I(inode);
222 struct gfs2_sbd *sdp = GFS2_SB(inode); 222 struct gfs2_sbd *sdp = GFS2_SB(inode);
223 struct buffer_head *bh; 223 struct buffer_head *bh;
@@ -276,7 +276,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
276 error = gfs2_meta_inode_buffer(ip, &bh); 276 error = gfs2_meta_inode_buffer(ip, &bh);
277 if (error) 277 if (error)
278 goto out_trans_end; 278 goto out_trans_end;
279 gfs2_trans_add_bh(ip->i_gl, bh, 1); 279 gfs2_trans_add_meta(ip->i_gl, bh);
280 ip->i_diskflags = new_flags; 280 ip->i_diskflags = new_flags;
281 gfs2_dinode_out(ip, bh->b_data); 281 gfs2_dinode_out(ip, bh->b_data);
282 brelse(bh); 282 brelse(bh);
@@ -293,7 +293,7 @@ out_drop_write:
293 293
294static int gfs2_set_flags(struct file *filp, u32 __user *ptr) 294static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
295{ 295{
296 struct inode *inode = filp->f_path.dentry->d_inode; 296 struct inode *inode = file_inode(filp);
297 u32 fsflags, gfsflags; 297 u32 fsflags, gfsflags;
298 298
299 if (get_user(fsflags, ptr)) 299 if (get_user(fsflags, ptr))
@@ -336,7 +336,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
336 336
337static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) 337static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
338{ 338{
339 struct inode *inode = filep->f_dentry->d_inode; 339 struct inode *inode = file_inode(filep);
340 struct gfs2_sbd *sdp = GFS2_SB(inode); 340 struct gfs2_sbd *sdp = GFS2_SB(inode);
341 struct gfs2_inode *ip = GFS2_I(inode); 341 struct gfs2_inode *ip = GFS2_I(inode);
342 size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; 342 size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
@@ -386,7 +386,7 @@ static int gfs2_allocate_page_backing(struct page *page)
386static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 386static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
387{ 387{
388 struct page *page = vmf->page; 388 struct page *page = vmf->page;
389 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 389 struct inode *inode = file_inode(vma->vm_file);
390 struct gfs2_inode *ip = GFS2_I(inode); 390 struct gfs2_inode *ip = GFS2_I(inode);
391 struct gfs2_sbd *sdp = GFS2_SB(inode); 391 struct gfs2_sbd *sdp = GFS2_SB(inode);
392 unsigned long last_index; 392 unsigned long last_index;
@@ -483,7 +483,7 @@ out:
483 gfs2_holder_uninit(&gh); 483 gfs2_holder_uninit(&gh);
484 if (ret == 0) { 484 if (ret == 0) {
485 set_page_dirty(page); 485 set_page_dirty(page);
486 wait_on_page_writeback(page); 486 wait_for_stable_page(page);
487 } 487 }
488 sb_end_pagefault(inode->i_sb); 488 sb_end_pagefault(inode->i_sb);
489 return block_page_mkwrite_return(ret); 489 return block_page_mkwrite_return(ret);
@@ -673,8 +673,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
673{ 673{
674 struct file *file = iocb->ki_filp; 674 struct file *file = iocb->ki_filp;
675 size_t writesize = iov_length(iov, nr_segs); 675 size_t writesize = iov_length(iov, nr_segs);
676 struct dentry *dentry = file->f_dentry; 676 struct gfs2_inode *ip = GFS2_I(file_inode(file));
677 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
678 int ret; 677 int ret;
679 678
680 ret = gfs2_rs_alloc(ip); 679 ret = gfs2_rs_alloc(ip);
@@ -709,7 +708,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
709 if (unlikely(error)) 708 if (unlikely(error))
710 return error; 709 return error;
711 710
712 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 711 gfs2_trans_add_meta(ip->i_gl, dibh);
713 712
714 if (gfs2_is_stuffed(ip)) { 713 if (gfs2_is_stuffed(ip)) {
715 error = gfs2_unstuff_dinode(ip, NULL); 714 error = gfs2_unstuff_dinode(ip, NULL);
@@ -772,7 +771,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
772static long gfs2_fallocate(struct file *file, int mode, loff_t offset, 771static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
773 loff_t len) 772 loff_t len)
774{ 773{
775 struct inode *inode = file->f_path.dentry->d_inode; 774 struct inode *inode = file_inode(file);
776 struct gfs2_sbd *sdp = GFS2_SB(inode); 775 struct gfs2_sbd *sdp = GFS2_SB(inode);
777 struct gfs2_inode *ip = GFS2_I(inode); 776 struct gfs2_inode *ip = GFS2_I(inode);
778 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 777 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
@@ -938,7 +937,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
938{ 937{
939 struct gfs2_file *fp = file->private_data; 938 struct gfs2_file *fp = file->private_data;
940 struct gfs2_holder *fl_gh = &fp->f_fl_gh; 939 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
941 struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); 940 struct gfs2_inode *ip = GFS2_I(file_inode(file));
942 struct gfs2_glock *gl; 941 struct gfs2_glock *gl;
943 unsigned int state; 942 unsigned int state;
944 int flags; 943 int flags;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 992c5c0cb504..cf3515546739 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -30,6 +30,7 @@
30#include <linux/rculist_bl.h> 30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h> 31#include <linux/bit_spinlock.h>
32#include <linux/percpu.h> 32#include <linux/percpu.h>
33#include <linux/list_sort.h>
33 34
34#include "gfs2.h" 35#include "gfs2.h"
35#include "incore.h" 36#include "incore.h"
@@ -1376,56 +1377,105 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1376 gfs2_glock_put(gl); 1377 gfs2_glock_put(gl);
1377} 1378}
1378 1379
1380static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1381{
1382 struct gfs2_glock *gla, *glb;
1379 1383
1380static int gfs2_shrink_glock_memory(struct shrinker *shrink, 1384 gla = list_entry(a, struct gfs2_glock, gl_lru);
1381 struct shrink_control *sc) 1385 glb = list_entry(b, struct gfs2_glock, gl_lru);
1386
1387 if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1388 return 1;
1389 if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1390 return -1;
1391
1392 return 0;
1393}
1394
1395/**
1396 * gfs2_dispose_glock_lru - Demote a list of glocks
1397 * @list: The list to dispose of
1398 *
1399 * Disposing of glocks may involve disk accesses, so that here we sort
1400 * the glocks by number (i.e. disk location of the inodes) so that if
1401 * there are any such accesses, they'll be sent in order (mostly).
1402 *
1403 * Must be called under the lru_lock, but may drop and retake this
1404 * lock. While the lru_lock is dropped, entries may vanish from the
1405 * list, but no new entries will appear on the list (since it is
1406 * private)
1407 */
1408
1409static void gfs2_dispose_glock_lru(struct list_head *list)
1410__releases(&lru_lock)
1411__acquires(&lru_lock)
1382{ 1412{
1383 struct gfs2_glock *gl; 1413 struct gfs2_glock *gl;
1384 int may_demote;
1385 int nr_skipped = 0;
1386 int nr = sc->nr_to_scan;
1387 gfp_t gfp_mask = sc->gfp_mask;
1388 LIST_HEAD(skipped);
1389 1414
1390 if (nr == 0) 1415 list_sort(NULL, list, glock_cmp);
1391 goto out;
1392 1416
1393 if (!(gfp_mask & __GFP_FS)) 1417 while(!list_empty(list)) {
1394 return -1; 1418 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1419 list_del_init(&gl->gl_lru);
1420 clear_bit(GLF_LRU, &gl->gl_flags);
1421 gfs2_glock_hold(gl);
1422 spin_unlock(&lru_lock);
1423 spin_lock(&gl->gl_spin);
1424 if (demote_ok(gl))
1425 handle_callback(gl, LM_ST_UNLOCKED, 0);
1426 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1427 smp_mb__after_clear_bit();
1428 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1429 gfs2_glock_put_nolock(gl);
1430 spin_unlock(&gl->gl_spin);
1431 spin_lock(&lru_lock);
1432 }
1433}
1434
1435/**
1436 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1437 * @nr: The number of entries to scan
1438 *
1439 * This function selects the entries on the LRU which are able to
1440 * be demoted, and then kicks off the process by calling
1441 * gfs2_dispose_glock_lru() above.
1442 */
1443
1444static void gfs2_scan_glock_lru(int nr)
1445{
1446 struct gfs2_glock *gl;
1447 LIST_HEAD(skipped);
1448 LIST_HEAD(dispose);
1395 1449
1396 spin_lock(&lru_lock); 1450 spin_lock(&lru_lock);
1397 while(nr && !list_empty(&lru_list)) { 1451 while(nr && !list_empty(&lru_list)) {
1398 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1452 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1399 list_del_init(&gl->gl_lru);
1400 clear_bit(GLF_LRU, &gl->gl_flags);
1401 atomic_dec(&lru_count);
1402 1453
1403 /* Test for being demotable */ 1454 /* Test for being demotable */
1404 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1455 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1405 gfs2_glock_hold(gl); 1456 list_move(&gl->gl_lru, &dispose);
1406 spin_unlock(&lru_lock); 1457 atomic_dec(&lru_count);
1407 spin_lock(&gl->gl_spin); 1458 nr--;
1408 may_demote = demote_ok(gl);
1409 if (may_demote) {
1410 handle_callback(gl, LM_ST_UNLOCKED, 0);
1411 nr--;
1412 }
1413 clear_bit(GLF_LOCK, &gl->gl_flags);
1414 smp_mb__after_clear_bit();
1415 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1416 gfs2_glock_put_nolock(gl);
1417 spin_unlock(&gl->gl_spin);
1418 spin_lock(&lru_lock);
1419 continue; 1459 continue;
1420 } 1460 }
1421 nr_skipped++; 1461
1422 list_add(&gl->gl_lru, &skipped); 1462 list_move(&gl->gl_lru, &skipped);
1423 set_bit(GLF_LRU, &gl->gl_flags);
1424 } 1463 }
1425 list_splice(&skipped, &lru_list); 1464 list_splice(&skipped, &lru_list);
1426 atomic_add(nr_skipped, &lru_count); 1465 if (!list_empty(&dispose))
1466 gfs2_dispose_glock_lru(&dispose);
1427 spin_unlock(&lru_lock); 1467 spin_unlock(&lru_lock);
1428out: 1468}
1469
1470static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1471 struct shrink_control *sc)
1472{
1473 if (sc->nr_to_scan) {
1474 if (!(sc->gfp_mask & __GFP_FS))
1475 return -1;
1476 gfs2_scan_glock_lru(sc->nr_to_scan);
1477 }
1478
1429 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; 1479 return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
1430} 1480}
1431 1481
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78d4184ffc7d..444b6503ebc4 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -322,8 +322,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
322 break; 322 break;
323 }; 323 };
324 324
325 ip->i_inode.i_uid = be32_to_cpu(str->di_uid); 325 i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
326 ip->i_inode.i_gid = be32_to_cpu(str->di_gid); 326 i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
327 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); 327 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
328 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); 328 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
329 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); 329 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c373a24fedd9..156e42ec84ea 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -52,7 +52,6 @@ struct gfs2_log_header_host {
52 */ 52 */
53 53
54struct gfs2_log_operations { 54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
56 void (*lo_before_commit) (struct gfs2_sbd *sdp); 55 void (*lo_before_commit) (struct gfs2_sbd *sdp);
57 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); 56 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
58 void (*lo_before_scan) (struct gfs2_jdesc *jd, 57 void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -341,6 +340,7 @@ enum {
341 GIF_QD_LOCKED = 1, 340 GIF_QD_LOCKED = 1,
342 GIF_ALLOC_FAILED = 2, 341 GIF_ALLOC_FAILED = 2,
343 GIF_SW_PAGED = 3, 342 GIF_SW_PAGED = 3,
343 GIF_ORDERED = 4,
344}; 344};
345 345
346struct gfs2_inode { 346struct gfs2_inode {
@@ -357,6 +357,7 @@ struct gfs2_inode {
357 struct gfs2_rgrpd *i_rgd; 357 struct gfs2_rgrpd *i_rgd;
358 u64 i_goal; /* goal block for allocations */ 358 u64 i_goal; /* goal block for allocations */
359 struct rw_semaphore i_rw_mutex; 359 struct rw_semaphore i_rw_mutex;
360 struct list_head i_ordered;
360 struct list_head i_trunc_list; 361 struct list_head i_trunc_list;
361 __be64 *i_hash_cache; 362 __be64 *i_hash_cache;
362 u32 i_entries; 363 u32 i_entries;
@@ -391,7 +392,6 @@ struct gfs2_revoke_replay {
391}; 392};
392 393
393enum { 394enum {
394 QDF_USER = 0,
395 QDF_CHANGE = 1, 395 QDF_CHANGE = 1,
396 QDF_LOCKED = 2, 396 QDF_LOCKED = 2,
397 QDF_REFRESH = 3, 397 QDF_REFRESH = 3,
@@ -403,7 +403,7 @@ struct gfs2_quota_data {
403 403
404 atomic_t qd_count; 404 atomic_t qd_count;
405 405
406 u32 qd_id; 406 struct kqid qd_id;
407 unsigned long qd_flags; /* QDF_... */ 407 unsigned long qd_flags; /* QDF_... */
408 408
409 s64 qd_change; 409 s64 qd_change;
@@ -641,6 +641,7 @@ struct gfs2_sbd {
641 wait_queue_head_t sd_glock_wait; 641 wait_queue_head_t sd_glock_wait;
642 atomic_t sd_glock_disposal; 642 atomic_t sd_glock_disposal;
643 struct completion sd_locking_init; 643 struct completion sd_locking_init;
644 struct completion sd_wdack;
644 struct delayed_work sd_control_work; 645 struct delayed_work sd_control_work;
645 646
646 /* Inode Stuff */ 647 /* Inode Stuff */
@@ -723,6 +724,7 @@ struct gfs2_sbd {
723 struct list_head sd_log_le_revoke; 724 struct list_head sd_log_le_revoke;
724 struct list_head sd_log_le_databuf; 725 struct list_head sd_log_le_databuf;
725 struct list_head sd_log_le_ordered; 726 struct list_head sd_log_le_ordered;
727 spinlock_t sd_ordered_lock;
726 728
727 atomic_t sd_log_thresh1; 729 atomic_t sd_log_thresh1;
728 atomic_t sd_log_thresh2; 730 atomic_t sd_log_thresh2;
@@ -758,10 +760,7 @@ struct gfs2_sbd {
758 unsigned int sd_replayed_blocks; 760 unsigned int sd_replayed_blocks;
759 761
760 /* For quiescing the filesystem */ 762 /* For quiescing the filesystem */
761
762 struct gfs2_holder sd_freeze_gh; 763 struct gfs2_holder sd_freeze_gh;
763 struct mutex sd_freeze_lock;
764 unsigned int sd_freeze_count;
765 764
766 char sd_fsname[GFS2_FSNAME_LEN]; 765 char sd_fsname[GFS2_FSNAME_LEN];
767 char sd_table_name[GFS2_FSNAME_LEN]; 766 char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2b6f5698ef18..cc00bd1d1f87 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -368,10 +368,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip,
368 struct inode *inode) 368 struct inode *inode)
369{ 369{
370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
371 (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { 371 (dip->i_inode.i_mode & S_ISUID) &&
372 !uid_eq(dip->i_inode.i_uid, GLOBAL_ROOT_UID)) {
372 if (S_ISDIR(inode->i_mode)) 373 if (S_ISDIR(inode->i_mode))
373 inode->i_mode |= S_ISUID; 374 inode->i_mode |= S_ISUID;
374 else if (dip->i_inode.i_uid != current_fsuid()) 375 else if (!uid_eq(dip->i_inode.i_uid, current_fsuid()))
375 inode->i_mode &= ~07111; 376 inode->i_mode &= ~07111;
376 inode->i_uid = dip->i_inode.i_uid; 377 inode->i_uid = dip->i_inode.i_uid;
377 } else 378 } else
@@ -447,7 +448,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
447 struct timespec tv = CURRENT_TIME; 448 struct timespec tv = CURRENT_TIME;
448 449
449 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); 450 dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
450 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 451 gfs2_trans_add_meta(ip->i_gl, dibh);
451 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); 452 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
452 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); 453 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
453 di = (struct gfs2_dinode *)dibh->b_data; 454 di = (struct gfs2_dinode *)dibh->b_data;
@@ -455,8 +456,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
455 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 456 di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
456 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 457 di->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
457 di->di_mode = cpu_to_be32(ip->i_inode.i_mode); 458 di->di_mode = cpu_to_be32(ip->i_inode.i_mode);
458 di->di_uid = cpu_to_be32(ip->i_inode.i_uid); 459 di->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
459 di->di_gid = cpu_to_be32(ip->i_inode.i_gid); 460 di->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
460 di->di_nlink = 0; 461 di->di_nlink = 0;
461 di->di_size = cpu_to_be64(ip->i_inode.i_size); 462 di->di_size = cpu_to_be64(ip->i_inode.i_size);
462 di->di_blocks = cpu_to_be64(1); 463 di->di_blocks = cpu_to_be64(1);
@@ -548,7 +549,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
548 if (error) 549 if (error)
549 return error; 550 return error;
550 551
551 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 552 error = gfs2_quota_lock(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
552 if (error) 553 if (error)
553 goto fail; 554 goto fail;
554 555
@@ -584,7 +585,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
584 if (error) 585 if (error)
585 goto fail_end_trans; 586 goto fail_end_trans;
586 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); 587 set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
587 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 588 gfs2_trans_add_meta(ip->i_gl, dibh);
588 gfs2_dinode_out(ip, dibh->b_data); 589 gfs2_dinode_out(ip, dibh->b_data);
589 brelse(dibh); 590 brelse(dibh);
590 return 0; 591 return 0;
@@ -931,7 +932,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
931 if (error) 932 if (error)
932 goto out_brelse; 933 goto out_brelse;
933 934
934 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 935 gfs2_trans_add_meta(ip->i_gl, dibh);
935 inc_nlink(&ip->i_inode); 936 inc_nlink(&ip->i_inode);
936 ip->i_inode.i_ctime = CURRENT_TIME; 937 ip->i_inode.i_ctime = CURRENT_TIME;
937 ihold(inode); 938 ihold(inode);
@@ -978,8 +979,8 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 return -EPERM; 979 return -EPERM;
979 980
980 if ((dip->i_inode.i_mode & S_ISVTX) && 981 if ((dip->i_inode.i_mode & S_ISVTX) &&
981 dip->i_inode.i_uid != current_fsuid() && 982 !uid_eq(dip->i_inode.i_uid, current_fsuid()) &&
982 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) 983 !uid_eq(ip->i_inode.i_uid, current_fsuid()) && !capable(CAP_FOWNER))
983 return -EPERM; 984 return -EPERM;
984 985
985 if (IS_APPEND(&dip->i_inode)) 986 if (IS_APPEND(&dip->i_inode))
@@ -1412,7 +1413,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1412 if (error) 1413 if (error)
1413 goto out_end_trans; 1414 goto out_end_trans;
1414 ip->i_inode.i_ctime = CURRENT_TIME; 1415 ip->i_inode.i_ctime = CURRENT_TIME;
1415 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1416 gfs2_trans_add_meta(ip->i_gl, dibh);
1416 gfs2_dinode_out(ip, dibh->b_data); 1417 gfs2_dinode_out(ip, dibh->b_data);
1417 brelse(dibh); 1418 brelse(dibh);
1418 } 1419 }
@@ -1580,7 +1581,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1580{ 1581{
1581 struct gfs2_inode *ip = GFS2_I(inode); 1582 struct gfs2_inode *ip = GFS2_I(inode);
1582 struct gfs2_sbd *sdp = GFS2_SB(inode); 1583 struct gfs2_sbd *sdp = GFS2_SB(inode);
1583 u32 ouid, ogid, nuid, ngid; 1584 kuid_t ouid, nuid;
1585 kgid_t ogid, ngid;
1584 int error; 1586 int error;
1585 1587
1586 ouid = inode->i_uid; 1588 ouid = inode->i_uid;
@@ -1588,16 +1590,17 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1588 nuid = attr->ia_uid; 1590 nuid = attr->ia_uid;
1589 ngid = attr->ia_gid; 1591 ngid = attr->ia_gid;
1590 1592
1591 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) 1593 if (!(attr->ia_valid & ATTR_UID) || uid_eq(ouid, nuid))
1592 ouid = nuid = NO_QUOTA_CHANGE; 1594 ouid = nuid = NO_UID_QUOTA_CHANGE;
1593 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 1595 if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
1594 ogid = ngid = NO_QUOTA_CHANGE; 1596 ogid = ngid = NO_GID_QUOTA_CHANGE;
1595 1597
1596 error = gfs2_quota_lock(ip, nuid, ngid); 1598 error = gfs2_quota_lock(ip, nuid, ngid);
1597 if (error) 1599 if (error)
1598 return error; 1600 return error;
1599 1601
1600 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1602 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1603 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1601 error = gfs2_quota_check(ip, nuid, ngid); 1604 error = gfs2_quota_check(ip, nuid, ngid);
1602 if (error) 1605 if (error)
1603 goto out_gunlock_q; 1606 goto out_gunlock_q;
@@ -1611,7 +1614,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1611 if (error) 1614 if (error)
1612 goto out_end_trans; 1615 goto out_end_trans;
1613 1616
1614 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1617 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1618 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1615 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1619 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1616 gfs2_quota_change(ip, -blocks, ouid, ogid); 1620 gfs2_quota_change(ip, -blocks, ouid, ogid);
1617 gfs2_quota_change(ip, blocks, nuid, ngid); 1621 gfs2_quota_change(ip, blocks, nuid, ngid);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8dad6b093716..9802de0f85e6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -241,6 +241,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
241 241
242static void gfs2_reverse_hex(char *c, u64 value) 242static void gfs2_reverse_hex(char *c, u64 value)
243{ 243{
244 *c = '0';
244 while (value) { 245 while (value) {
245 *c-- = hex_asc[value & 0x0f]; 246 *c-- = hex_asc[value & 0x0f];
246 value >>= 4; 247 value >>= 4;
@@ -280,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
280{ 281{
281 struct gfs2_sbd *sdp = gl->gl_sbd; 282 struct gfs2_sbd *sdp = gl->gl_sbd;
282 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 283 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
284 int lvb_needs_unlock = 0;
283 int error; 285 int error;
284 286
285 if (gl->gl_lksb.sb_lkid == 0) { 287 if (gl->gl_lksb.sb_lkid == 0) {
@@ -293,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
293 gfs2_update_request_times(gl); 295 gfs2_update_request_times(gl);
294 296
295 /* don't want to skip dlm_unlock writing the lvb when lock is ex */ 297 /* don't want to skip dlm_unlock writing the lvb when lock is ex */
298
299 if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
300 lvb_needs_unlock = 1;
301
296 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && 302 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
297 gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { 303 !lvb_needs_unlock) {
298 gfs2_glock_free(gl); 304 gfs2_glock_free(gl);
299 return; 305 return;
300 } 306 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9c81c1..9a2ca8be7647 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
482 } 482 }
483} 483}
484 484
485static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) 485static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
486{ 486{
487 struct gfs2_bufdata *bda, *bdb; 487 struct gfs2_inode *ipa, *ipb;
488 488
489 bda = list_entry(a, struct gfs2_bufdata, bd_list); 489 ipa = list_entry(a, struct gfs2_inode, i_ordered);
490 bdb = list_entry(b, struct gfs2_bufdata, bd_list); 490 ipb = list_entry(b, struct gfs2_inode, i_ordered);
491 491
492 if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) 492 if (ipa->i_no_addr < ipb->i_no_addr)
493 return -1; 493 return -1;
494 if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) 494 if (ipa->i_no_addr > ipb->i_no_addr)
495 return 1; 495 return 1;
496 return 0; 496 return 0;
497} 497}
498 498
499static void gfs2_ordered_write(struct gfs2_sbd *sdp) 499static void gfs2_ordered_write(struct gfs2_sbd *sdp)
500{ 500{
501 struct gfs2_bufdata *bd; 501 struct gfs2_inode *ip;
502 struct buffer_head *bh;
503 LIST_HEAD(written); 502 LIST_HEAD(written);
504 503
505 gfs2_log_lock(sdp); 504 spin_lock(&sdp->sd_ordered_lock);
506 list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); 505 list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
507 while (!list_empty(&sdp->sd_log_le_ordered)) { 506 while (!list_empty(&sdp->sd_log_le_ordered)) {
508 bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); 507 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
509 list_move(&bd->bd_list, &written); 508 list_move(&ip->i_ordered, &written);
510 bh = bd->bd_bh; 509 if (ip->i_inode.i_mapping->nrpages == 0)
511 if (!buffer_dirty(bh))
512 continue; 510 continue;
513 get_bh(bh); 511 spin_unlock(&sdp->sd_ordered_lock);
514 gfs2_log_unlock(sdp); 512 filemap_fdatawrite(ip->i_inode.i_mapping);
515 lock_buffer(bh); 513 spin_lock(&sdp->sd_ordered_lock);
516 if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
517 bh->b_end_io = end_buffer_write_sync;
518 submit_bh(WRITE_SYNC, bh);
519 } else {
520 unlock_buffer(bh);
521 brelse(bh);
522 }
523 gfs2_log_lock(sdp);
524 } 514 }
525 list_splice(&written, &sdp->sd_log_le_ordered); 515 list_splice(&written, &sdp->sd_log_le_ordered);
526 gfs2_log_unlock(sdp); 516 spin_unlock(&sdp->sd_ordered_lock);
527} 517}
528 518
529static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 519static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
530{ 520{
531 struct gfs2_bufdata *bd; 521 struct gfs2_inode *ip;
532 struct buffer_head *bh;
533 522
534 gfs2_log_lock(sdp); 523 spin_lock(&sdp->sd_ordered_lock);
535 while (!list_empty(&sdp->sd_log_le_ordered)) { 524 while (!list_empty(&sdp->sd_log_le_ordered)) {
536 bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); 525 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
537 bh = bd->bd_bh; 526 list_del(&ip->i_ordered);
538 if (buffer_locked(bh)) { 527 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
539 get_bh(bh); 528 if (ip->i_inode.i_mapping->nrpages == 0)
540 gfs2_log_unlock(sdp);
541 wait_on_buffer(bh);
542 brelse(bh);
543 gfs2_log_lock(sdp);
544 continue; 529 continue;
545 } 530 spin_unlock(&sdp->sd_ordered_lock);
546 list_del_init(&bd->bd_list); 531 filemap_fdatawait(ip->i_inode.i_mapping);
532 spin_lock(&sdp->sd_ordered_lock);
547 } 533 }
548 gfs2_log_unlock(sdp); 534 spin_unlock(&sdp->sd_ordered_lock);
535}
536
537void gfs2_ordered_del_inode(struct gfs2_inode *ip)
538{
539 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
540
541 spin_lock(&sdp->sd_ordered_lock);
542 if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
543 list_del(&ip->i_ordered);
544 spin_unlock(&sdp->sd_ordered_lock);
549} 545}
550 546
551/** 547/**
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 3fd5215ea25f..3566f35915e0 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -48,6 +48,18 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
48 sdp->sd_log_head = sdp->sd_log_tail = value; 48 sdp->sd_log_head = sdp->sd_log_tail = value;
49} 49}
50 50
51static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
52{
53 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
54
55 if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
56 spin_lock(&sdp->sd_ordered_lock);
57 if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
58 list_add(&ip->i_ordered, &sdp->sd_log_le_ordered);
59 spin_unlock(&sdp->sd_ordered_lock);
60 }
61}
62extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
51extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, 63extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
52 unsigned int ssize); 64 unsigned int ssize);
53 65
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9ceccb1595a3..a5055977a214 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -37,7 +37,7 @@
37 * 37 *
38 * The log lock must be held when calling this function 38 * The log lock must be held when calling this function
39 */ 39 */
40static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 40void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
41{ 41{
42 struct gfs2_bufdata *bd; 42 struct gfs2_bufdata *bd;
43 43
@@ -388,32 +388,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
388 return page; 388 return page;
389} 389}
390 390
391static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
392{
393 struct gfs2_meta_header *mh;
394 struct gfs2_trans *tr;
395
396 tr = current->journal_info;
397 tr->tr_touched = 1;
398 if (!list_empty(&bd->bd_list))
399 return;
400 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
401 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
402 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
403 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
404 printk(KERN_ERR
405 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
406 (unsigned long long)bd->bd_bh->b_blocknr);
407 BUG();
408 }
409 gfs2_pin(sdp, bd->bd_bh);
410 mh->__pad0 = cpu_to_be64(0);
411 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
412 sdp->sd_log_num_buf++;
413 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
414 tr->tr_num_buf_new++;
415}
416
417static void gfs2_check_magic(struct buffer_head *bh) 391static void gfs2_check_magic(struct buffer_head *bh)
418{ 392{
419 void *kaddr; 393 void *kaddr;
@@ -600,20 +574,6 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
600 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 574 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
601} 575}
602 576
603static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
604{
605 struct gfs2_glock *gl = bd->bd_gl;
606 struct gfs2_trans *tr;
607
608 tr = current->journal_info;
609 tr->tr_touched = 1;
610 tr->tr_num_revoke++;
611 sdp->sd_log_num_revoke++;
612 atomic_inc(&gl->gl_revokes);
613 set_bit(GLF_LFLUSH, &gl->gl_flags);
614 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
615}
616
617static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 577static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
618{ 578{
619 struct gfs2_meta_header *mh; 579 struct gfs2_meta_header *mh;
@@ -749,44 +709,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
749} 709}
750 710
751/** 711/**
752 * databuf_lo_add - Add a databuf to the transaction.
753 *
754 * This is used in two distinct cases:
755 * i) In ordered write mode
756 * We put the data buffer on a list so that we can ensure that its
757 * synced to disk at the right time
758 * ii) In journaled data mode
759 * We need to journal the data block in the same way as metadata in
760 * the functions above. The difference is that here we have a tag
761 * which is two __be64's being the block number (as per meta data)
762 * and a flag which says whether the data block needs escaping or
763 * not. This means we need a new log entry for each 251 or so data
764 * blocks, which isn't an enormous overhead but twice as much as
765 * for normal metadata blocks.
766 */
767static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
768{
769 struct gfs2_trans *tr = current->journal_info;
770 struct address_space *mapping = bd->bd_bh->b_page->mapping;
771 struct gfs2_inode *ip = GFS2_I(mapping->host);
772
773 if (tr)
774 tr->tr_touched = 1;
775 if (!list_empty(&bd->bd_list))
776 return;
777 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
778 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
779 if (gfs2_is_jdata(ip)) {
780 gfs2_pin(sdp, bd->bd_bh);
781 tr->tr_num_databuf_new++;
782 sdp->sd_log_num_databuf++;
783 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
784 } else {
785 list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
786 }
787}
788
789/**
790 * databuf_lo_before_commit - Scan the data buffers, writing as we go 712 * databuf_lo_before_commit - Scan the data buffers, writing as we go
791 * 713 *
792 */ 714 */
@@ -885,7 +807,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
885 807
886 808
887const struct gfs2_log_operations gfs2_buf_lops = { 809const struct gfs2_log_operations gfs2_buf_lops = {
888 .lo_add = buf_lo_add,
889 .lo_before_commit = buf_lo_before_commit, 810 .lo_before_commit = buf_lo_before_commit,
890 .lo_after_commit = buf_lo_after_commit, 811 .lo_after_commit = buf_lo_after_commit,
891 .lo_before_scan = buf_lo_before_scan, 812 .lo_before_scan = buf_lo_before_scan,
@@ -895,7 +816,6 @@ const struct gfs2_log_operations gfs2_buf_lops = {
895}; 816};
896 817
897const struct gfs2_log_operations gfs2_revoke_lops = { 818const struct gfs2_log_operations gfs2_revoke_lops = {
898 .lo_add = revoke_lo_add,
899 .lo_before_commit = revoke_lo_before_commit, 819 .lo_before_commit = revoke_lo_before_commit,
900 .lo_after_commit = revoke_lo_after_commit, 820 .lo_after_commit = revoke_lo_after_commit,
901 .lo_before_scan = revoke_lo_before_scan, 821 .lo_before_scan = revoke_lo_before_scan,
@@ -909,7 +829,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
909}; 829};
910 830
911const struct gfs2_log_operations gfs2_databuf_lops = { 831const struct gfs2_log_operations gfs2_databuf_lops = {
912 .lo_add = databuf_lo_add,
913 .lo_before_commit = databuf_lo_before_commit, 832 .lo_before_commit = databuf_lo_before_commit,
914 .lo_after_commit = databuf_lo_after_commit, 833 .lo_after_commit = databuf_lo_after_commit,
915 .lo_scan_elements = databuf_lo_scan_elements, 834 .lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 954a330585f4..ba77b7da8325 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -29,6 +29,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
29extern const struct gfs2_log_operations *gfs2_log_ops[]; 29extern const struct gfs2_log_operations *gfs2_log_ops[];
30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); 30extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); 31extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
32extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
32 33
33static inline unsigned int buf_limit(struct gfs2_sbd *sdp) 34static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
34{ 35{
@@ -46,19 +47,6 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
46 return limit; 47 return limit;
47} 48}
48 49
49static inline void lops_init_le(struct gfs2_bufdata *bd,
50 const struct gfs2_log_operations *lops)
51{
52 INIT_LIST_HEAD(&bd->bd_list);
53 bd->bd_ops = lops;
54}
55
56static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
57{
58 if (bd->bd_ops->lo_add)
59 bd->bd_ops->lo_add(sdp, bd);
60}
61
62static inline void lops_before_commit(struct gfs2_sbd *sdp) 50static inline void lops_before_commit(struct gfs2_sbd *sdp)
63{ 51{
64 int x; 52 int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 22255d96b27e..b059bbb5059e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -271,41 +271,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
271 return 0; 271 return 0;
272} 272}
273 273
274/**
275 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
276 * @gl: the glock the buffer belongs to
277 * @bh: The buffer to be attached to
278 * @meta: Flag to indicate whether its metadata or not
279 */
280
281void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
282 int meta)
283{
284 struct gfs2_bufdata *bd;
285
286 if (meta)
287 lock_page(bh->b_page);
288
289 if (bh->b_private) {
290 if (meta)
291 unlock_page(bh->b_page);
292 return;
293 }
294
295 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
296 bd->bd_bh = bh;
297 bd->bd_gl = gl;
298
299 if (meta)
300 lops_init_le(bd, &gfs2_buf_lops);
301 else
302 lops_init_le(bd, &gfs2_databuf_lops);
303 bh->b_private = bd;
304
305 if (meta)
306 unlock_page(bh->b_page);
307}
308
309void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 274void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta)
310{ 275{
311 struct address_space *mapping = bh->b_page->mapping; 276 struct address_space *mapping = bh->b_page->mapping;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index c30973b07a7c..0d4c843b6f8e 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,9 +56,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); 56int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); 57struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
58 58
59void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
60 int meta);
61
62void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, 59void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
63 int meta); 60 int meta);
64 61
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0e3554edb8f2..1b612be4b873 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -81,6 +81,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
81 init_waitqueue_head(&sdp->sd_glock_wait); 81 init_waitqueue_head(&sdp->sd_glock_wait);
82 atomic_set(&sdp->sd_glock_disposal, 0); 82 atomic_set(&sdp->sd_glock_disposal, 0);
83 init_completion(&sdp->sd_locking_init); 83 init_completion(&sdp->sd_locking_init);
84 init_completion(&sdp->sd_wdack);
84 spin_lock_init(&sdp->sd_statfs_spin); 85 spin_lock_init(&sdp->sd_statfs_spin);
85 86
86 spin_lock_init(&sdp->sd_rindex_spin); 87 spin_lock_init(&sdp->sd_rindex_spin);
@@ -102,6 +103,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
102 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 103 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
103 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 104 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
104 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 105 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
106 spin_lock_init(&sdp->sd_ordered_lock);
105 107
106 init_waitqueue_head(&sdp->sd_log_waitq); 108 init_waitqueue_head(&sdp->sd_log_waitq);
107 init_waitqueue_head(&sdp->sd_logd_waitq); 109 init_waitqueue_head(&sdp->sd_logd_waitq);
@@ -115,8 +117,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
115 117
116 INIT_LIST_HEAD(&sdp->sd_revoke_list); 118 INIT_LIST_HEAD(&sdp->sd_revoke_list);
117 119
118 mutex_init(&sdp->sd_freeze_lock);
119
120 return sdp; 120 return sdp;
121} 121}
122 122
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ae55e248c3b7..c7c840e916f8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -65,13 +65,10 @@
65#include "inode.h" 65#include "inode.h"
66#include "util.h" 66#include "util.h"
67 67
68#define QUOTA_USER 1
69#define QUOTA_GROUP 0
70
71struct gfs2_quota_change_host { 68struct gfs2_quota_change_host {
72 u64 qc_change; 69 u64 qc_change;
73 u32 qc_flags; /* GFS2_QCF_... */ 70 u32 qc_flags; /* GFS2_QCF_... */
74 u32 qc_id; 71 struct kqid qc_id;
75}; 72};
76 73
77static LIST_HEAD(qd_lru_list); 74static LIST_HEAD(qd_lru_list);
@@ -120,17 +117,24 @@ out:
120 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; 117 return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
121} 118}
122 119
120static u64 qd2index(struct gfs2_quota_data *qd)
121{
122 struct kqid qid = qd->qd_id;
123 return (2 * (u64)from_kqid(&init_user_ns, qid)) +
124 (qid.type == USRQUOTA) ? 0 : 1;
125}
126
123static u64 qd2offset(struct gfs2_quota_data *qd) 127static u64 qd2offset(struct gfs2_quota_data *qd)
124{ 128{
125 u64 offset; 129 u64 offset;
126 130
127 offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags); 131 offset = qd2index(qd);
128 offset *= sizeof(struct gfs2_quota); 132 offset *= sizeof(struct gfs2_quota);
129 133
130 return offset; 134 return offset;
131} 135}
132 136
133static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, 137static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
134 struct gfs2_quota_data **qdp) 138 struct gfs2_quota_data **qdp)
135{ 139{
136 struct gfs2_quota_data *qd; 140 struct gfs2_quota_data *qd;
@@ -141,13 +145,11 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
141 return -ENOMEM; 145 return -ENOMEM;
142 146
143 atomic_set(&qd->qd_count, 1); 147 atomic_set(&qd->qd_count, 1);
144 qd->qd_id = id; 148 qd->qd_id = qid;
145 if (user)
146 set_bit(QDF_USER, &qd->qd_flags);
147 qd->qd_slot = -1; 149 qd->qd_slot = -1;
148 INIT_LIST_HEAD(&qd->qd_reclaim); 150 INIT_LIST_HEAD(&qd->qd_reclaim);
149 151
150 error = gfs2_glock_get(sdp, 2 * (u64)id + !user, 152 error = gfs2_glock_get(sdp, qd2index(qd),
151 &gfs2_quota_glops, CREATE, &qd->qd_gl); 153 &gfs2_quota_glops, CREATE, &qd->qd_gl);
152 if (error) 154 if (error)
153 goto fail; 155 goto fail;
@@ -161,7 +163,7 @@ fail:
161 return error; 163 return error;
162} 164}
163 165
164static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, 166static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
165 struct gfs2_quota_data **qdp) 167 struct gfs2_quota_data **qdp)
166{ 168{
167 struct gfs2_quota_data *qd = NULL, *new_qd = NULL; 169 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
@@ -173,8 +175,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
173 found = 0; 175 found = 0;
174 spin_lock(&qd_lru_lock); 176 spin_lock(&qd_lru_lock);
175 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { 177 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
176 if (qd->qd_id == id && 178 if (qid_eq(qd->qd_id, qid)) {
177 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
178 if (!atomic_read(&qd->qd_count) && 179 if (!atomic_read(&qd->qd_count) &&
179 !list_empty(&qd->qd_reclaim)) { 180 !list_empty(&qd->qd_reclaim)) {
180 /* Remove it from reclaim list */ 181 /* Remove it from reclaim list */
@@ -208,7 +209,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
208 return 0; 209 return 0;
209 } 210 }
210 211
211 error = qd_alloc(sdp, user, id, &new_qd); 212 error = qd_alloc(sdp, qid, &new_qd);
212 if (error) 213 if (error)
213 return error; 214 return error;
214 } 215 }
@@ -458,12 +459,12 @@ static void qd_unlock(struct gfs2_quota_data *qd)
458 qd_put(qd); 459 qd_put(qd);
459} 460}
460 461
461static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, 462static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
462 struct gfs2_quota_data **qdp) 463 struct gfs2_quota_data **qdp)
463{ 464{
464 int error; 465 int error;
465 466
466 error = qd_get(sdp, user, id, qdp); 467 error = qd_get(sdp, qid, qdp);
467 if (error) 468 if (error)
468 return error; 469 return error;
469 470
@@ -491,7 +492,7 @@ static void qdsb_put(struct gfs2_quota_data *qd)
491 qd_put(qd); 492 qd_put(qd);
492} 493}
493 494
494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) 495int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
495{ 496{
496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 497 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
497 struct gfs2_quota_data **qd; 498 struct gfs2_quota_data **qd;
@@ -512,28 +513,30 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
512 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 513 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
513 return 0; 514 return 0;
514 515
515 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); 516 error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
516 if (error) 517 if (error)
517 goto out; 518 goto out;
518 ip->i_res->rs_qa_qd_num++; 519 ip->i_res->rs_qa_qd_num++;
519 qd++; 520 qd++;
520 521
521 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); 522 error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
522 if (error) 523 if (error)
523 goto out; 524 goto out;
524 ip->i_res->rs_qa_qd_num++; 525 ip->i_res->rs_qa_qd_num++;
525 qd++; 526 qd++;
526 527
527 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 528 if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) &&
528 error = qdsb_get(sdp, QUOTA_USER, uid, qd); 529 !uid_eq(uid, ip->i_inode.i_uid)) {
530 error = qdsb_get(sdp, make_kqid_uid(uid), qd);
529 if (error) 531 if (error)
530 goto out; 532 goto out;
531 ip->i_res->rs_qa_qd_num++; 533 ip->i_res->rs_qa_qd_num++;
532 qd++; 534 qd++;
533 } 535 }
534 536
535 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { 537 if (!gid_eq(gid, NO_GID_QUOTA_CHANGE) &&
536 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); 538 !gid_eq(gid, ip->i_inode.i_gid)) {
539 error = qdsb_get(sdp, make_kqid_gid(gid), qd);
537 if (error) 540 if (error)
538 goto out; 541 goto out;
539 ip->i_res->rs_qa_qd_num++; 542 ip->i_res->rs_qa_qd_num++;
@@ -567,18 +570,10 @@ static int sort_qd(const void *a, const void *b)
567 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a; 570 const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
568 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b; 571 const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;
569 572
570 if (!test_bit(QDF_USER, &qd_a->qd_flags) != 573 if (qid_lt(qd_a->qd_id, qd_b->qd_id))
571 !test_bit(QDF_USER, &qd_b->qd_flags)) {
572 if (test_bit(QDF_USER, &qd_a->qd_flags))
573 return -1;
574 else
575 return 1;
576 }
577 if (qd_a->qd_id < qd_b->qd_id)
578 return -1; 574 return -1;
579 if (qd_a->qd_id > qd_b->qd_id) 575 if (qid_lt(qd_b->qd_id, qd_a->qd_id))
580 return 1; 576 return 1;
581
582 return 0; 577 return 0;
583} 578}
584 579
@@ -590,14 +585,14 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
590 s64 x; 585 s64 x;
591 586
592 mutex_lock(&sdp->sd_quota_mutex); 587 mutex_lock(&sdp->sd_quota_mutex);
593 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); 588 gfs2_trans_add_meta(ip->i_gl, qd->qd_bh);
594 589
595 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { 590 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
596 qc->qc_change = 0; 591 qc->qc_change = 0;
597 qc->qc_flags = 0; 592 qc->qc_flags = 0;
598 if (test_bit(QDF_USER, &qd->qd_flags)) 593 if (qd->qd_id.type == USRQUOTA)
599 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); 594 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
600 qc->qc_id = cpu_to_be32(qd->qd_id); 595 qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id));
601 } 596 }
602 597
603 x = be64_to_cpu(qc->qc_change) + change; 598 x = be64_to_cpu(qc->qc_change) + change;
@@ -726,7 +721,7 @@ get_a_page:
726 goto unlock_out; 721 goto unlock_out;
727 } 722 }
728 723
729 gfs2_trans_add_bh(ip->i_gl, bh, 0); 724 gfs2_trans_add_meta(ip->i_gl, bh);
730 725
731 kaddr = kmap_atomic(page); 726 kaddr = kmap_atomic(page);
732 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) 727 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
@@ -925,7 +920,7 @@ fail:
925 return error; 920 return error;
926} 921}
927 922
928int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) 923int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
929{ 924{
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 925 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_quota_data *qd; 926 struct gfs2_quota_data *qd;
@@ -1040,13 +1035,13 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1040 1035
1041 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", 1036 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
1042 sdp->sd_fsname, type, 1037 sdp->sd_fsname, type,
1043 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", 1038 (qd->qd_id.type == USRQUOTA) ? "user" : "group",
1044 qd->qd_id); 1039 from_kqid(&init_user_ns, qd->qd_id));
1045 1040
1046 return 0; 1041 return 0;
1047} 1042}
1048 1043
1049int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) 1044int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
1050{ 1045{
1051 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1046 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1052 struct gfs2_quota_data *qd; 1047 struct gfs2_quota_data *qd;
@@ -1063,8 +1058,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1063 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1058 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1064 qd = ip->i_res->rs_qa_qd[x]; 1059 qd = ip->i_res->rs_qa_qd[x];
1065 1060
1066 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1061 if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
1067 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) 1062 qid_eq(qd->qd_id, make_kqid_gid(gid))))
1068 continue; 1063 continue;
1069 1064
1070 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1065 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
@@ -1074,10 +1069,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1074 1069
1075 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1070 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
1076 print_message(qd, "exceeded"); 1071 print_message(qd, "exceeded");
1077 quota_send_warning(make_kqid(&init_user_ns, 1072 quota_send_warning(qd->qd_id,
1078 test_bit(QDF_USER, &qd->qd_flags) ?
1079 USRQUOTA : GRPQUOTA,
1080 qd->qd_id),
1081 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1073 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
1082 1074
1083 error = -EDQUOT; 1075 error = -EDQUOT;
@@ -1087,10 +1079,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1087 time_after_eq(jiffies, qd->qd_last_warn + 1079 time_after_eq(jiffies, qd->qd_last_warn +
1088 gfs2_tune_get(sdp, 1080 gfs2_tune_get(sdp,
1089 gt_quota_warn_period) * HZ)) { 1081 gt_quota_warn_period) * HZ)) {
1090 quota_send_warning(make_kqid(&init_user_ns, 1082 quota_send_warning(qd->qd_id,
1091 test_bit(QDF_USER, &qd->qd_flags) ?
1092 USRQUOTA : GRPQUOTA,
1093 qd->qd_id),
1094 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1083 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1095 error = print_message(qd, "warning"); 1084 error = print_message(qd, "warning");
1096 qd->qd_last_warn = jiffies; 1085 qd->qd_last_warn = jiffies;
@@ -1101,7 +1090,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1101} 1090}
1102 1091
1103void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 1092void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1104 u32 uid, u32 gid) 1093 kuid_t uid, kgid_t gid)
1105{ 1094{
1106 struct gfs2_quota_data *qd; 1095 struct gfs2_quota_data *qd;
1107 unsigned int x; 1096 unsigned int x;
@@ -1114,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1114 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 1103 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1115 qd = ip->i_res->rs_qa_qd[x]; 1104 qd = ip->i_res->rs_qa_qd[x];
1116 1105
1117 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1106 if (qid_eq(qd->qd_id, make_kqid_uid(uid)) ||
1118 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { 1107 qid_eq(qd->qd_id, make_kqid_gid(gid))) {
1119 do_qc(qd, change); 1108 do_qc(qd, change);
1120 } 1109 }
1121 } 1110 }
@@ -1170,13 +1159,13 @@ static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
1170 return gfs2_quota_sync(sb, type); 1159 return gfs2_quota_sync(sb, type);
1171} 1160}
1172 1161
1173int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) 1162int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid)
1174{ 1163{
1175 struct gfs2_quota_data *qd; 1164 struct gfs2_quota_data *qd;
1176 struct gfs2_holder q_gh; 1165 struct gfs2_holder q_gh;
1177 int error; 1166 int error;
1178 1167
1179 error = qd_get(sdp, user, id, &qd); 1168 error = qd_get(sdp, qid, &qd);
1180 if (error) 1169 if (error)
1181 return error; 1170 return error;
1182 1171
@@ -1194,7 +1183,9 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *
1194 1183
1195 qc->qc_change = be64_to_cpu(str->qc_change); 1184 qc->qc_change = be64_to_cpu(str->qc_change);
1196 qc->qc_flags = be32_to_cpu(str->qc_flags); 1185 qc->qc_flags = be32_to_cpu(str->qc_flags);
1197 qc->qc_id = be32_to_cpu(str->qc_id); 1186 qc->qc_id = make_kqid(&init_user_ns,
1187 (qc->qc_flags & GFS2_QCF_USER)?USRQUOTA:GRPQUOTA,
1188 be32_to_cpu(str->qc_id));
1198} 1189}
1199 1190
1200int gfs2_quota_init(struct gfs2_sbd *sdp) 1191int gfs2_quota_init(struct gfs2_sbd *sdp)
@@ -1257,8 +1248,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
1257 if (!qc.qc_change) 1248 if (!qc.qc_change)
1258 continue; 1249 continue;
1259 1250
1260 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER), 1251 error = qd_alloc(sdp, qc.qc_id, &qd);
1261 qc.qc_id, &qd);
1262 if (error) { 1252 if (error) {
1263 brelse(bh); 1253 brelse(bh);
1264 goto fail; 1254 goto fail;
@@ -1485,21 +1475,17 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1485 struct gfs2_quota_data *qd; 1475 struct gfs2_quota_data *qd;
1486 struct gfs2_holder q_gh; 1476 struct gfs2_holder q_gh;
1487 int error; 1477 int error;
1488 int type;
1489 1478
1490 memset(fdq, 0, sizeof(struct fs_disk_quota)); 1479 memset(fdq, 0, sizeof(struct fs_disk_quota));
1491 1480
1492 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1481 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1493 return -ESRCH; /* Crazy XFS error code */ 1482 return -ESRCH; /* Crazy XFS error code */
1494 1483
1495 if (qid.type == USRQUOTA) 1484 if ((qid.type != USRQUOTA) &&
1496 type = QUOTA_USER; 1485 (qid.type != GRPQUOTA))
1497 else if (qid.type == GRPQUOTA)
1498 type = QUOTA_GROUP;
1499 else
1500 return -EINVAL; 1486 return -EINVAL;
1501 1487
1502 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); 1488 error = qd_get(sdp, qid, &qd);
1503 if (error) 1489 if (error)
1504 return error; 1490 return error;
1505 error = do_glock(qd, FORCE, &q_gh); 1491 error = do_glock(qd, FORCE, &q_gh);
@@ -1508,8 +1494,8 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1508 1494
1509 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 1495 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
1510 fdq->d_version = FS_DQUOT_VERSION; 1496 fdq->d_version = FS_DQUOT_VERSION;
1511 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1497 fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1512 fdq->d_id = from_kqid(&init_user_ns, qid); 1498 fdq->d_id = from_kqid_munged(current_user_ns(), qid);
1513 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; 1499 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1514 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; 1500 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1515 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; 1501 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
@@ -1535,32 +1521,18 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1535 int alloc_required; 1521 int alloc_required;
1536 loff_t offset; 1522 loff_t offset;
1537 int error; 1523 int error;
1538 int type;
1539 1524
1540 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1525 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1541 return -ESRCH; /* Crazy XFS error code */ 1526 return -ESRCH; /* Crazy XFS error code */
1542 1527
1543 switch(qid.type) { 1528 if ((qid.type != USRQUOTA) &&
1544 case USRQUOTA: 1529 (qid.type != GRPQUOTA))
1545 type = QUOTA_USER;
1546 if (fdq->d_flags != FS_USER_QUOTA)
1547 return -EINVAL;
1548 break;
1549 case GRPQUOTA:
1550 type = QUOTA_GROUP;
1551 if (fdq->d_flags != FS_GROUP_QUOTA)
1552 return -EINVAL;
1553 break;
1554 default:
1555 return -EINVAL; 1530 return -EINVAL;
1556 }
1557 1531
1558 if (fdq->d_fieldmask & ~GFS2_FIELDMASK) 1532 if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
1559 return -EINVAL; 1533 return -EINVAL;
1560 if (fdq->d_id != from_kqid(&init_user_ns, qid))
1561 return -EINVAL;
1562 1534
1563 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); 1535 error = qd_get(sdp, qid, &qd);
1564 if (error) 1536 if (error)
1565 return error; 1537 return error;
1566 1538
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index f25d98b87904..4f5e6e44ed83 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -14,20 +14,21 @@ struct gfs2_inode;
14struct gfs2_sbd; 14struct gfs2_sbd;
15struct shrink_control; 15struct shrink_control;
16 16
17#define NO_QUOTA_CHANGE ((u32)-1) 17#define NO_UID_QUOTA_CHANGE INVALID_UID
18#define NO_GID_QUOTA_CHANGE INVALID_GID
18 19
19extern int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid); 20extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
20extern void gfs2_quota_unhold(struct gfs2_inode *ip); 21extern void gfs2_quota_unhold(struct gfs2_inode *ip);
21 22
22extern int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid); 23extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
23extern void gfs2_quota_unlock(struct gfs2_inode *ip); 24extern void gfs2_quota_unlock(struct gfs2_inode *ip);
24 25
25extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); 26extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
26extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 27extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
27 u32 uid, u32 gid); 28 kuid_t uid, kgid_t gid);
28 29
29extern int gfs2_quota_sync(struct super_block *sb, int type); 30extern int gfs2_quota_sync(struct super_block *sb, int type);
30extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); 31extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid);
31 32
32extern int gfs2_quota_init(struct gfs2_sbd *sdp); 33extern int gfs2_quota_init(struct gfs2_sbd *sdp);
33extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); 34extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
@@ -41,7 +42,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
41 int ret; 42 int ret;
42 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 43 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
43 return 0; 44 return 0;
44 ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 45 ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
45 if (ret) 46 if (ret)
46 return ret; 47 return ret;
47 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 48 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 37ee061d899e..d1f51fd73f86 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -350,10 +350,14 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
350 BUG_ON(len < chunk_size); 350 BUG_ON(len < chunk_size);
351 len -= chunk_size; 351 len -= chunk_size;
352 block = gfs2_rbm_to_block(&rbm); 352 block = gfs2_rbm_to_block(&rbm);
353 gfs2_rbm_from_block(&rbm, block + chunk_size); 353 if (gfs2_rbm_from_block(&rbm, block + chunk_size)) {
354 n_unaligned = 3; 354 n_unaligned = 0;
355 if (ptr)
356 break; 355 break;
356 }
357 if (ptr) {
358 n_unaligned = 3;
359 break;
360 }
357 n_unaligned = len & 3; 361 n_unaligned = len & 3;
358 } 362 }
359 363
@@ -557,22 +561,20 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
557 */ 561 */
558int gfs2_rs_alloc(struct gfs2_inode *ip) 562int gfs2_rs_alloc(struct gfs2_inode *ip)
559{ 563{
560 struct gfs2_blkreserv *res; 564 int error = 0;
561 565
566 down_write(&ip->i_rw_mutex);
562 if (ip->i_res) 567 if (ip->i_res)
563 return 0; 568 goto out;
564
565 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
566 if (!res)
567 return -ENOMEM;
568 569
569 RB_CLEAR_NODE(&res->rs_node); 570 ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
571 if (!ip->i_res) {
572 error = -ENOMEM;
573 goto out;
574 }
570 575
571 down_write(&ip->i_rw_mutex); 576 RB_CLEAR_NODE(&ip->i_res->rs_node);
572 if (ip->i_res) 577out:
573 kmem_cache_free(gfs2_rsrv_cachep, res);
574 else
575 ip->i_res = res;
576 up_write(&ip->i_rw_mutex); 578 up_write(&ip->i_rw_mutex);
577 return 0; 579 return 0;
578} 580}
@@ -1255,7 +1257,7 @@ fail:
1255 1257
1256int gfs2_fitrim(struct file *filp, void __user *argp) 1258int gfs2_fitrim(struct file *filp, void __user *argp)
1257{ 1259{
1258 struct inode *inode = filp->f_dentry->d_inode; 1260 struct inode *inode = file_inode(filp);
1259 struct gfs2_sbd *sdp = GFS2_SB(inode); 1261 struct gfs2_sbd *sdp = GFS2_SB(inode);
1260 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); 1262 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
1261 struct buffer_head *bh; 1263 struct buffer_head *bh;
@@ -1321,7 +1323,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
1321 if (ret == 0) { 1323 if (ret == 0) {
1322 bh = rgd->rd_bits[0].bi_bh; 1324 bh = rgd->rd_bits[0].bi_bh;
1323 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1325 rgd->rd_flags |= GFS2_RGF_TRIMMED;
1324 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1326 gfs2_trans_add_meta(rgd->rd_gl, bh);
1325 gfs2_rgrp_out(rgd, bh->b_data); 1327 gfs2_rgrp_out(rgd, bh->b_data);
1326 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); 1328 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
1327 gfs2_trans_end(sdp); 1329 gfs2_trans_end(sdp);
@@ -1424,6 +1426,9 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1424 rs->rs_free = extlen; 1426 rs->rs_free = extlen;
1425 rs->rs_inum = ip->i_no_addr; 1427 rs->rs_inum = ip->i_no_addr;
1426 rs_insert(ip); 1428 rs_insert(ip);
1429 } else {
1430 if (goal == rgd->rd_last_alloc + rgd->rd_data0)
1431 rgd->rd_last_alloc = 0;
1427 } 1432 }
1428} 1433}
1429 1434
@@ -1963,14 +1968,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1963 1968
1964 *n = 1; 1969 *n = 1;
1965 block = gfs2_rbm_to_block(rbm); 1970 block = gfs2_rbm_to_block(rbm);
1966 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); 1971 gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
1967 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 1972 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1968 block++; 1973 block++;
1969 while (*n < elen) { 1974 while (*n < elen) {
1970 ret = gfs2_rbm_from_block(&pos, block); 1975 ret = gfs2_rbm_from_block(&pos, block);
1971 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) 1976 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1972 break; 1977 break;
1973 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); 1978 gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
1974 gfs2_setbit(&pos, true, GFS2_BLKST_USED); 1979 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1975 (*n)++; 1980 (*n)++;
1976 block++; 1981 block++;
@@ -2009,7 +2014,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
2009 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, 2014 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
2010 rbm.bi->bi_len); 2015 rbm.bi->bi_len);
2011 } 2016 }
2012 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); 2017 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
2013 gfs2_setbit(&rbm, false, new_state); 2018 gfs2_setbit(&rbm, false, new_state);
2014 } 2019 }
2015 2020
@@ -2152,7 +2157,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2152 if (error == 0) { 2157 if (error == 0) {
2153 struct gfs2_dinode *di = 2158 struct gfs2_dinode *di =
2154 (struct gfs2_dinode *)dibh->b_data; 2159 (struct gfs2_dinode *)dibh->b_data;
2155 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 2160 gfs2_trans_add_meta(ip->i_gl, dibh);
2156 di->di_goal_meta = di->di_goal_data = 2161 di->di_goal_meta = di->di_goal_data =
2157 cpu_to_be64(ip->i_goal); 2162 cpu_to_be64(ip->i_goal);
2158 brelse(dibh); 2163 brelse(dibh);
@@ -2171,7 +2176,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2171 *generation = rbm.rgd->rd_igeneration++; 2176 *generation = rbm.rgd->rd_igeneration++;
2172 } 2177 }
2173 2178
2174 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); 2179 gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
2175 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); 2180 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2176 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); 2181 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2177 2182
@@ -2218,7 +2223,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
2218 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); 2223 trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
2219 rgd->rd_free += blen; 2224 rgd->rd_free += blen;
2220 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2225 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
2221 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2226 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2222 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2227 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2223 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2228 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2224 2229
@@ -2255,7 +2260,7 @@ void gfs2_unlink_di(struct inode *inode)
2255 if (!rgd) 2260 if (!rgd)
2256 return; 2261 return;
2257 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2262 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
2258 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2263 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2259 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2264 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2260 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2265 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2261 update_rgrp_lvb_unlinked(rgd, 1); 2266 update_rgrp_lvb_unlinked(rgd, 1);
@@ -2276,7 +2281,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
2276 rgd->rd_dinodes--; 2281 rgd->rd_dinodes--;
2277 rgd->rd_free++; 2282 rgd->rd_free++;
2278 2283
2279 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2284 gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
2280 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2285 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2281 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2286 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2282 update_rgrp_lvb_unlinked(rgd, -1); 2287 update_rgrp_lvb_unlinked(rgd, -1);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d6488674d916..cab77b8ba84f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -500,7 +500,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
500 if (error) 500 if (error)
501 return; 501 return;
502 502
503 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 503 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
504 504
505 spin_lock(&sdp->sd_statfs_spin); 505 spin_lock(&sdp->sd_statfs_spin);
506 l_sc->sc_total += total; 506 l_sc->sc_total += total;
@@ -528,7 +528,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 528 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 529 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
530 530
531 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 531 gfs2_trans_add_meta(l_ip->i_gl, l_bh);
532 532
533 spin_lock(&sdp->sd_statfs_spin); 533 spin_lock(&sdp->sd_statfs_spin);
534 m_sc->sc_total += l_sc->sc_total; 534 m_sc->sc_total += l_sc->sc_total;
@@ -539,7 +539,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
539 0, sizeof(struct gfs2_statfs_change)); 539 0, sizeof(struct gfs2_statfs_change));
540 spin_unlock(&sdp->sd_statfs_spin); 540 spin_unlock(&sdp->sd_statfs_spin);
541 541
542 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); 542 gfs2_trans_add_meta(m_ip->i_gl, m_bh);
543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); 543 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
544} 544}
545 545
@@ -663,54 +663,6 @@ out:
663 return error; 663 return error;
664} 664}
665 665
666/**
667 * gfs2_freeze_fs - freezes the file system
668 * @sdp: the file system
669 *
670 * This function flushes data and meta data for all machines by
671 * acquiring the transaction log exclusively. All journals are
672 * ensured to be in a clean state as well.
673 *
674 * Returns: errno
675 */
676
677int gfs2_freeze_fs(struct gfs2_sbd *sdp)
678{
679 int error = 0;
680
681 mutex_lock(&sdp->sd_freeze_lock);
682
683 if (!sdp->sd_freeze_count++) {
684 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
685 if (error)
686 sdp->sd_freeze_count--;
687 }
688
689 mutex_unlock(&sdp->sd_freeze_lock);
690
691 return error;
692}
693
694/**
695 * gfs2_unfreeze_fs - unfreezes the file system
696 * @sdp: the file system
697 *
698 * This function allows the file system to proceed by unlocking
699 * the exclusively held transaction lock. Other GFS2 nodes are
700 * now free to acquire the lock shared and go on with their lives.
701 *
702 */
703
704void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
705{
706 mutex_lock(&sdp->sd_freeze_lock);
707
708 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
709 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
710
711 mutex_unlock(&sdp->sd_freeze_lock);
712}
713
714void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 666void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
715{ 667{
716 struct gfs2_dinode *str = buf; 668 struct gfs2_dinode *str = buf;
@@ -721,8 +673,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
721 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 673 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
722 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 674 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
723 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 675 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
724 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 676 str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
725 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 677 str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
726 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 678 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
727 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 679 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
728 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 680 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
@@ -824,7 +776,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
824 776
825 ret = gfs2_meta_inode_buffer(ip, &bh); 777 ret = gfs2_meta_inode_buffer(ip, &bh);
826 if (ret == 0) { 778 if (ret == 0) {
827 gfs2_trans_add_bh(ip->i_gl, bh, 1); 779 gfs2_trans_add_meta(ip->i_gl, bh);
828 gfs2_dinode_out(ip, bh->b_data); 780 gfs2_dinode_out(ip, bh->b_data);
829 brelse(bh); 781 brelse(bh);
830 } 782 }
@@ -888,13 +840,6 @@ static void gfs2_put_super(struct super_block *sb)
888 int error; 840 int error;
889 struct gfs2_jdesc *jd; 841 struct gfs2_jdesc *jd;
890 842
891 /* Unfreeze the filesystem, if we need to */
892
893 mutex_lock(&sdp->sd_freeze_lock);
894 if (sdp->sd_freeze_count)
895 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
896 mutex_unlock(&sdp->sd_freeze_lock);
897
898 /* No more recovery requests */ 843 /* No more recovery requests */
899 set_bit(SDF_NORECOVERY, &sdp->sd_flags); 844 set_bit(SDF_NORECOVERY, &sdp->sd_flags);
900 smp_mb(); 845 smp_mb();
@@ -985,7 +930,7 @@ static int gfs2_freeze(struct super_block *sb)
985 return -EINVAL; 930 return -EINVAL;
986 931
987 for (;;) { 932 for (;;) {
988 error = gfs2_freeze_fs(sdp); 933 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
989 if (!error) 934 if (!error)
990 break; 935 break;
991 936
@@ -1013,7 +958,9 @@ static int gfs2_freeze(struct super_block *sb)
1013 958
1014static int gfs2_unfreeze(struct super_block *sb) 959static int gfs2_unfreeze(struct super_block *sb)
1015{ 960{
1016 gfs2_unfreeze_fs(sb->s_fs_info); 961 struct gfs2_sbd *sdp = sb->s_fs_info;
962
963 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
1017 return 0; 964 return 0;
1018} 965}
1019 966
@@ -1429,7 +1376,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1429 if (error) 1376 if (error)
1430 return error; 1377 return error;
1431 1378
1432 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1379 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1433 if (error) 1380 if (error)
1434 return error; 1381 return error;
1435 1382
@@ -1577,6 +1524,7 @@ out:
1577 /* Case 3 starts here */ 1524 /* Case 3 starts here */
1578 truncate_inode_pages(&inode->i_data, 0); 1525 truncate_inode_pages(&inode->i_data, 0);
1579 gfs2_rs_delete(ip); 1526 gfs2_rs_delete(ip);
1527 gfs2_ordered_del_inode(ip);
1580 clear_inode(inode); 1528 clear_inode(inode);
1581 gfs2_dir_hash_inval(ip); 1529 gfs2_dir_hash_inval(ip);
1582 ip->i_gl->gl_object = NULL; 1530 ip->i_gl->gl_object = NULL;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index a0464680af0b..90e3322ffa10 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh); 46 struct buffer_head *l_bh);
47extern int gfs2_statfs_sync(struct super_block *sb, int type); 47extern int gfs2_statfs_sync(struct super_block *sb, int type);
48 48
49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
51
52extern struct file_system_type gfs2_fs_type; 49extern struct file_system_type gfs2_fs_type;
53extern struct file_system_type gfs2meta_fs_type; 50extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 51extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8056b7b7238e..aa5c48044966 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -91,39 +91,37 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
91 91
92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) 92static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
93{ 93{
94 unsigned int count; 94 struct super_block *sb = sdp->sd_vfs;
95 95 int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1;
96 mutex_lock(&sdp->sd_freeze_lock);
97 count = sdp->sd_freeze_count;
98 mutex_unlock(&sdp->sd_freeze_lock);
99 96
100 return snprintf(buf, PAGE_SIZE, "%u\n", count); 97 return snprintf(buf, PAGE_SIZE, "%u\n", frozen);
101} 98}
102 99
103static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 100static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
104{ 101{
105 ssize_t ret = len; 102 int error;
106 int error = 0;
107 int n = simple_strtol(buf, NULL, 0); 103 int n = simple_strtol(buf, NULL, 0);
108 104
109 if (!capable(CAP_SYS_ADMIN)) 105 if (!capable(CAP_SYS_ADMIN))
110 return -EACCES; 106 return -EPERM;
111 107
112 switch (n) { 108 switch (n) {
113 case 0: 109 case 0:
114 gfs2_unfreeze_fs(sdp); 110 error = thaw_super(sdp->sd_vfs);
115 break; 111 break;
116 case 1: 112 case 1:
117 error = gfs2_freeze_fs(sdp); 113 error = freeze_super(sdp->sd_vfs);
118 break; 114 break;
119 default: 115 default:
120 ret = -EINVAL; 116 return -EINVAL;
121 } 117 }
122 118
123 if (error) 119 if (error) {
124 fs_warn(sdp, "freeze %d error %d", n, error); 120 fs_warn(sdp, "freeze %d error %d", n, error);
121 return error;
122 }
125 123
126 return ret; 124 return len;
127} 125}
128 126
129static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) 127static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
@@ -135,7 +133,7 @@ static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
135static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 133static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
136{ 134{
137 if (!capable(CAP_SYS_ADMIN)) 135 if (!capable(CAP_SYS_ADMIN))
138 return -EACCES; 136 return -EPERM;
139 137
140 if (simple_strtol(buf, NULL, 0) != 1) 138 if (simple_strtol(buf, NULL, 0) != 1)
141 return -EINVAL; 139 return -EINVAL;
@@ -150,7 +148,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
150 size_t len) 148 size_t len)
151{ 149{
152 if (!capable(CAP_SYS_ADMIN)) 150 if (!capable(CAP_SYS_ADMIN))
153 return -EACCES; 151 return -EPERM;
154 152
155 if (simple_strtol(buf, NULL, 0) != 1) 153 if (simple_strtol(buf, NULL, 0) != 1)
156 return -EINVAL; 154 return -EINVAL;
@@ -163,7 +161,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
163 size_t len) 161 size_t len)
164{ 162{
165 if (!capable(CAP_SYS_ADMIN)) 163 if (!capable(CAP_SYS_ADMIN))
166 return -EACCES; 164 return -EPERM;
167 165
168 if (simple_strtol(buf, NULL, 0) != 1) 166 if (simple_strtol(buf, NULL, 0) != 1)
169 return -EINVAL; 167 return -EINVAL;
@@ -175,30 +173,40 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
175static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, 173static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
176 size_t len) 174 size_t len)
177{ 175{
176 struct kqid qid;
178 int error; 177 int error;
179 u32 id; 178 u32 id;
180 179
181 if (!capable(CAP_SYS_ADMIN)) 180 if (!capable(CAP_SYS_ADMIN))
182 return -EACCES; 181 return -EPERM;
183 182
184 id = simple_strtoul(buf, NULL, 0); 183 id = simple_strtoul(buf, NULL, 0);
185 184
186 error = gfs2_quota_refresh(sdp, 1, id); 185 qid = make_kqid(current_user_ns(), USRQUOTA, id);
186 if (!qid_valid(qid))
187 return -EINVAL;
188
189 error = gfs2_quota_refresh(sdp, qid);
187 return error ? error : len; 190 return error ? error : len;
188} 191}
189 192
190static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, 193static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
191 size_t len) 194 size_t len)
192{ 195{
196 struct kqid qid;
193 int error; 197 int error;
194 u32 id; 198 u32 id;
195 199
196 if (!capable(CAP_SYS_ADMIN)) 200 if (!capable(CAP_SYS_ADMIN))
197 return -EACCES; 201 return -EPERM;
198 202
199 id = simple_strtoul(buf, NULL, 0); 203 id = simple_strtoul(buf, NULL, 0);
200 204
201 error = gfs2_quota_refresh(sdp, 0, id); 205 qid = make_kqid(current_user_ns(), GRPQUOTA, id);
206 if (!qid_valid(qid))
207 return -EINVAL;
208
209 error = gfs2_quota_refresh(sdp, qid);
202 return error ? error : len; 210 return error ? error : len;
203} 211}
204 212
@@ -213,7 +221,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
213 int rv; 221 int rv;
214 222
215 if (!capable(CAP_SYS_ADMIN)) 223 if (!capable(CAP_SYS_ADMIN))
216 return -EACCES; 224 return -EPERM;
217 225
218 rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum, 226 rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum,
219 mode); 227 mode);
@@ -332,6 +340,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
332 return ret; 340 return ret;
333} 341}
334 342
343static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
344{
345 int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
346
347 return sprintf(buf, "%d\n", val);
348}
349
350static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
351{
352 ssize_t ret = len;
353 int val;
354
355 val = simple_strtol(buf, NULL, 0);
356
357 if ((val == 1) &&
358 !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
359 complete(&sdp->sd_wdack);
360 else
361 ret = -EINVAL;
362 return ret;
363}
364
335static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 365static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
336{ 366{
337 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 367 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -463,7 +493,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
463 493
464GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 494GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
465GDLM_ATTR(block, 0644, block_show, block_store); 495GDLM_ATTR(block, 0644, block_show, block_store);
466GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 496GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
467GDLM_ATTR(jid, 0644, jid_show, jid_store); 497GDLM_ATTR(jid, 0644, jid_show, jid_store);
468GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); 498GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
469GDLM_ATTR(first_done, 0444, first_done_show, NULL); 499GDLM_ATTR(first_done, 0444, first_done_show, NULL);
@@ -502,7 +532,7 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
502 unsigned int x, y; 532 unsigned int x, y;
503 533
504 if (!capable(CAP_SYS_ADMIN)) 534 if (!capable(CAP_SYS_ADMIN))
505 return -EACCES; 535 return -EPERM;
506 536
507 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y) 537 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
508 return -EINVAL; 538 return -EINVAL;
@@ -521,7 +551,7 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
521 unsigned int x; 551 unsigned int x;
522 552
523 if (!capable(CAP_SYS_ADMIN)) 553 if (!capable(CAP_SYS_ADMIN))
524 return -EACCES; 554 return -EPERM;
525 555
526 x = simple_strtoul(buf, NULL, 0); 556 x = simple_strtoul(buf, NULL, 0);
527 557
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 413627072f36..88162fae27a5 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -18,6 +18,7 @@
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "glock.h" 20#include "glock.h"
21#include "inode.h"
21#include "log.h" 22#include "log.h"
22#include "lops.h" 23#include "lops.h"
23#include "meta_io.h" 24#include "meta_io.h"
@@ -142,44 +143,143 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
142 sb_end_intwrite(sdp->sd_vfs); 143 sb_end_intwrite(sdp->sd_vfs);
143} 144}
144 145
146static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
147 struct buffer_head *bh,
148 const struct gfs2_log_operations *lops)
149{
150 struct gfs2_bufdata *bd;
151
152 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
153 bd->bd_bh = bh;
154 bd->bd_gl = gl;
155 bd->bd_ops = lops;
156 INIT_LIST_HEAD(&bd->bd_list);
157 bh->b_private = bd;
158 return bd;
159}
160
145/** 161/**
146 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction 162 * gfs2_trans_add_data - Add a databuf to the transaction.
147 * @gl: the glock the buffer belongs to 163 * @gl: The inode glock associated with the buffer
148 * @bh: The buffer to add 164 * @bh: The buffer to add
149 * @meta: True in the case of adding metadata
150 * 165 *
166 * This is used in two distinct cases:
167 * i) In ordered write mode
168 * We put the data buffer on a list so that we can ensure that its
169 * synced to disk at the right time
170 * ii) In journaled data mode
171 * We need to journal the data block in the same way as metadata in
172 * the functions above. The difference is that here we have a tag
173 * which is two __be64's being the block number (as per meta data)
174 * and a flag which says whether the data block needs escaping or
175 * not. This means we need a new log entry for each 251 or so data
176 * blocks, which isn't an enormous overhead but twice as much as
177 * for normal metadata blocks.
151 */ 178 */
179void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
180{
181 struct gfs2_trans *tr = current->journal_info;
182 struct gfs2_sbd *sdp = gl->gl_sbd;
183 struct address_space *mapping = bh->b_page->mapping;
184 struct gfs2_inode *ip = GFS2_I(mapping->host);
185 struct gfs2_bufdata *bd;
152 186
153void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) 187 if (!gfs2_is_jdata(ip)) {
188 gfs2_ordered_add_inode(ip);
189 return;
190 }
191
192 lock_buffer(bh);
193 gfs2_log_lock(sdp);
194 bd = bh->b_private;
195 if (bd == NULL) {
196 gfs2_log_unlock(sdp);
197 unlock_buffer(bh);
198 if (bh->b_private == NULL)
199 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
200 lock_buffer(bh);
201 gfs2_log_lock(sdp);
202 }
203 gfs2_assert(sdp, bd->bd_gl == gl);
204 tr->tr_touched = 1;
205 if (list_empty(&bd->bd_list)) {
206 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
207 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
208 gfs2_pin(sdp, bd->bd_bh);
209 tr->tr_num_databuf_new++;
210 sdp->sd_log_num_databuf++;
211 list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
212 }
213 gfs2_log_unlock(sdp);
214 unlock_buffer(bh);
215}
216
217static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
154{ 218{
219 struct gfs2_meta_header *mh;
220 struct gfs2_trans *tr;
221
222 tr = current->journal_info;
223 tr->tr_touched = 1;
224 if (!list_empty(&bd->bd_list))
225 return;
226 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
227 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
228 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
229 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
230 printk(KERN_ERR
231 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
232 (unsigned long long)bd->bd_bh->b_blocknr);
233 BUG();
234 }
235 gfs2_pin(sdp, bd->bd_bh);
236 mh->__pad0 = cpu_to_be64(0);
237 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
238 sdp->sd_log_num_buf++;
239 list_add(&bd->bd_list, &sdp->sd_log_le_buf);
240 tr->tr_num_buf_new++;
241}
242
243void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
244{
245
155 struct gfs2_sbd *sdp = gl->gl_sbd; 246 struct gfs2_sbd *sdp = gl->gl_sbd;
156 struct gfs2_bufdata *bd; 247 struct gfs2_bufdata *bd;
157 248
158 lock_buffer(bh); 249 lock_buffer(bh);
159 gfs2_log_lock(sdp); 250 gfs2_log_lock(sdp);
160 bd = bh->b_private; 251 bd = bh->b_private;
161 if (bd) 252 if (bd == NULL) {
162 gfs2_assert(sdp, bd->bd_gl == gl);
163 else {
164 gfs2_log_unlock(sdp); 253 gfs2_log_unlock(sdp);
165 unlock_buffer(bh); 254 unlock_buffer(bh);
166 gfs2_attach_bufdata(gl, bh, meta); 255 lock_page(bh->b_page);
167 bd = bh->b_private; 256 if (bh->b_private == NULL)
257 bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops);
258 unlock_page(bh->b_page);
168 lock_buffer(bh); 259 lock_buffer(bh);
169 gfs2_log_lock(sdp); 260 gfs2_log_lock(sdp);
170 } 261 }
171 lops_add(sdp, bd); 262 gfs2_assert(sdp, bd->bd_gl == gl);
263 meta_lo_add(sdp, bd);
172 gfs2_log_unlock(sdp); 264 gfs2_log_unlock(sdp);
173 unlock_buffer(bh); 265 unlock_buffer(bh);
174} 266}
175 267
176void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 268void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
177{ 269{
270 struct gfs2_glock *gl = bd->bd_gl;
271 struct gfs2_trans *tr = current->journal_info;
272
178 BUG_ON(!list_empty(&bd->bd_list)); 273 BUG_ON(!list_empty(&bd->bd_list));
179 BUG_ON(!list_empty(&bd->bd_ail_st_list)); 274 BUG_ON(!list_empty(&bd->bd_ail_st_list));
180 BUG_ON(!list_empty(&bd->bd_ail_gl_list)); 275 BUG_ON(!list_empty(&bd->bd_ail_gl_list));
181 lops_init_le(bd, &gfs2_revoke_lops); 276 bd->bd_ops = &gfs2_revoke_lops;
182 lops_add(sdp, bd); 277 tr->tr_touched = 1;
278 tr->tr_num_revoke++;
279 sdp->sd_log_num_revoke++;
280 atomic_inc(&gl->gl_revokes);
281 set_bit(GLF_LFLUSH, &gl->gl_flags);
282 list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
183} 283}
184 284
185void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) 285void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index bf2ae9aeee7a..1e6e7da25a17 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -39,7 +39,8 @@ extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
39 unsigned int revokes); 39 unsigned int revokes);
40 40
41extern void gfs2_trans_end(struct gfs2_sbd *sdp); 41extern void gfs2_trans_end(struct gfs2_sbd *sdp);
42extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); 42extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
43extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); 44extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
44extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); 45extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
45 46
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f00d7c5744f6..6402fb69d71b 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -54,6 +54,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
54 54
55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 55 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
56 56
57 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
58 wait_for_completion(&sdp->sd_wdack);
59
57 if (lm->lm_unmount) { 60 if (lm->lm_unmount) {
58 fs_err(sdp, "telling LM to unmount\n"); 61 fs_err(sdp, "telling LM to unmount\n");
59 lm->lm_unmount(sdp); 62 lm->lm_unmount(sdp);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 76c144b3c9bb..ecd37f30ab91 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -270,7 +270,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
270 if (error) 270 if (error)
271 goto out_gunlock; 271 goto out_gunlock;
272 272
273 gfs2_trans_add_bh(ip->i_gl, bh, 1); 273 gfs2_trans_add_meta(ip->i_gl, bh);
274 274
275 dataptrs = GFS2_EA2DATAPTRS(ea); 275 dataptrs = GFS2_EA2DATAPTRS(ea);
276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { 276 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
@@ -309,7 +309,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
309 error = gfs2_meta_inode_buffer(ip, &dibh); 309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) { 310 if (!error) {
311 ip->i_inode.i_ctime = CURRENT_TIME; 311 ip->i_inode.i_ctime = CURRENT_TIME;
312 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 312 gfs2_trans_add_meta(ip->i_gl, dibh);
313 gfs2_dinode_out(ip, dibh->b_data); 313 gfs2_dinode_out(ip, dibh->b_data);
314 brelse(dibh); 314 brelse(dibh);
315 } 315 }
@@ -331,7 +331,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
331 if (error) 331 if (error)
332 return error; 332 return error;
333 333
334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 334 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
335 if (error) 335 if (error)
336 goto out_alloc; 336 goto out_alloc;
337 337
@@ -509,7 +509,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
509 } 509 }
510 510
511 if (din) { 511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1); 512 gfs2_trans_add_meta(ip->i_gl, bh[x]);
513 memcpy(pos, din, cp_size); 513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize; 514 din += sdp->sd_jbsize;
515 } 515 }
@@ -629,7 +629,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
629 return error; 629 return error;
630 gfs2_trans_add_unrevoke(sdp, block, 1); 630 gfs2_trans_add_unrevoke(sdp, block, 1);
631 *bhp = gfs2_meta_new(ip->i_gl, block); 631 *bhp = gfs2_meta_new(ip->i_gl, block);
632 gfs2_trans_add_bh(ip->i_gl, *bhp, 1); 632 gfs2_trans_add_meta(ip->i_gl, *bhp);
633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); 633 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); 634 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
635 635
@@ -691,7 +691,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
691 return error; 691 return error;
692 gfs2_trans_add_unrevoke(sdp, block, 1); 692 gfs2_trans_add_unrevoke(sdp, block, 1);
693 bh = gfs2_meta_new(ip->i_gl, block); 693 bh = gfs2_meta_new(ip->i_gl, block);
694 gfs2_trans_add_bh(ip->i_gl, bh, 1); 694 gfs2_trans_add_meta(ip->i_gl, bh);
695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); 695 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
696 696
697 gfs2_add_inode_blocks(&ip->i_inode, 1); 697 gfs2_add_inode_blocks(&ip->i_inode, 1);
@@ -751,7 +751,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
751 error = gfs2_meta_inode_buffer(ip, &dibh); 751 error = gfs2_meta_inode_buffer(ip, &dibh);
752 if (!error) { 752 if (!error) {
753 ip->i_inode.i_ctime = CURRENT_TIME; 753 ip->i_inode.i_ctime = CURRENT_TIME;
754 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 754 gfs2_trans_add_meta(ip->i_gl, dibh);
755 gfs2_dinode_out(ip, dibh->b_data); 755 gfs2_dinode_out(ip, dibh->b_data);
756 brelse(dibh); 756 brelse(dibh);
757 } 757 }
@@ -834,7 +834,7 @@ static void ea_set_remove_stuffed(struct gfs2_inode *ip,
834 struct gfs2_ea_header *prev = el->el_prev; 834 struct gfs2_ea_header *prev = el->el_prev;
835 u32 len; 835 u32 len;
836 836
837 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 837 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
838 838
839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) { 839 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
840 ea->ea_type = GFS2_EATYPE_UNUSED; 840 ea->ea_type = GFS2_EATYPE_UNUSED;
@@ -872,7 +872,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
872 if (error) 872 if (error)
873 return error; 873 return error;
874 874
875 gfs2_trans_add_bh(ip->i_gl, bh, 1); 875 gfs2_trans_add_meta(ip->i_gl, bh);
876 876
877 if (es->ea_split) 877 if (es->ea_split)
878 ea = ea_split_ea(ea); 878 ea = ea_split_ea(ea);
@@ -886,7 +886,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
886 if (error) 886 if (error)
887 goto out; 887 goto out;
888 ip->i_inode.i_ctime = CURRENT_TIME; 888 ip->i_inode.i_ctime = CURRENT_TIME;
889 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 889 gfs2_trans_add_meta(ip->i_gl, dibh);
890 gfs2_dinode_out(ip, dibh->b_data); 890 gfs2_dinode_out(ip, dibh->b_data);
891 brelse(dibh); 891 brelse(dibh);
892out: 892out:
@@ -901,7 +901,7 @@ static int ea_set_simple_alloc(struct gfs2_inode *ip,
901 struct gfs2_ea_header *ea = es->es_ea; 901 struct gfs2_ea_header *ea = es->es_ea;
902 int error; 902 int error;
903 903
904 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); 904 gfs2_trans_add_meta(ip->i_gl, es->es_bh);
905 905
906 if (es->ea_split) 906 if (es->ea_split)
907 ea = ea_split_ea(ea); 907 ea = ea_split_ea(ea);
@@ -997,7 +997,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
997 goto out; 997 goto out;
998 } 998 }
999 999
1000 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1000 gfs2_trans_add_meta(ip->i_gl, indbh);
1001 } else { 1001 } else {
1002 u64 blk; 1002 u64 blk;
1003 unsigned int n = 1; 1003 unsigned int n = 1;
@@ -1006,7 +1006,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1006 return error; 1006 return error;
1007 gfs2_trans_add_unrevoke(sdp, blk, 1); 1007 gfs2_trans_add_unrevoke(sdp, blk, 1);
1008 indbh = gfs2_meta_new(ip->i_gl, blk); 1008 indbh = gfs2_meta_new(ip->i_gl, blk);
1009 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1009 gfs2_trans_add_meta(ip->i_gl, indbh);
1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 1010 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1011 gfs2_buffer_clear_tail(indbh, mh_size); 1011 gfs2_buffer_clear_tail(indbh, mh_size);
1012 1012
@@ -1092,7 +1092,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1092 if (error) 1092 if (error)
1093 return error; 1093 return error;
1094 1094
1095 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1095 gfs2_trans_add_meta(ip->i_gl, el->el_bh);
1096 1096
1097 if (prev) { 1097 if (prev) {
1098 u32 len; 1098 u32 len;
@@ -1109,7 +1109,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1109 error = gfs2_meta_inode_buffer(ip, &dibh); 1109 error = gfs2_meta_inode_buffer(ip, &dibh);
1110 if (!error) { 1110 if (!error) {
1111 ip->i_inode.i_ctime = CURRENT_TIME; 1111 ip->i_inode.i_ctime = CURRENT_TIME;
1112 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1112 gfs2_trans_add_meta(ip->i_gl, dibh);
1113 gfs2_dinode_out(ip, dibh->b_data); 1113 gfs2_dinode_out(ip, dibh->b_data);
1114 brelse(dibh); 1114 brelse(dibh);
1115 } 1115 }
@@ -1265,7 +1265,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1265 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0); 1266 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1267 if (error == 0) { 1267 if (error == 0) {
1268 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1268 gfs2_trans_add_meta(ip->i_gl, el.el_bh);
1269 memcpy(GFS2_EA2DATA(el.el_ea), data, 1269 memcpy(GFS2_EA2DATA(el.el_ea), data,
1270 GFS2_EA_DATA_LEN(el.el_ea)); 1270 GFS2_EA_DATA_LEN(el.el_ea));
1271 } 1271 }
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352 if (error) 1352 if (error)
1353 goto out_gunlock; 1353 goto out_gunlock;
1354 1354
1355 gfs2_trans_add_bh(ip->i_gl, indbh, 1); 1355 gfs2_trans_add_meta(ip->i_gl, indbh);
1356 1356
1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); 1357 eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1358 bstart = 0; 1358 bstart = 0;
@@ -1384,7 +1384,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
1384 1384
1385 error = gfs2_meta_inode_buffer(ip, &dibh); 1385 error = gfs2_meta_inode_buffer(ip, &dibh);
1386 if (!error) { 1386 if (!error) {
1387 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1387 gfs2_trans_add_meta(ip->i_gl, dibh);
1388 gfs2_dinode_out(ip, dibh->b_data); 1388 gfs2_dinode_out(ip, dibh->b_data);
1389 brelse(dibh); 1389 brelse(dibh);
1390 } 1390 }
@@ -1434,7 +1434,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1434 1434
1435 error = gfs2_meta_inode_buffer(ip, &dibh); 1435 error = gfs2_meta_inode_buffer(ip, &dibh);
1436 if (!error) { 1436 if (!error) {
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1437 gfs2_trans_add_meta(ip->i_gl, dibh);
1438 gfs2_dinode_out(ip, dibh->b_data); 1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh); 1439 brelse(dibh);
1440 } 1440 }
@@ -1461,7 +1461,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
1461 if (error) 1461 if (error)
1462 return error; 1462 return error;
1463 1463
1464 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1464 error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1465 if (error) 1465 if (error)
1466 return error; 1466 return error;
1467 1467
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index b77c5bc20f8a..998e3a6decf3 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -1,6 +1,6 @@
1config HFS_FS 1config HFS_FS
2 tristate "Apple Macintosh file system support (EXPERIMENTAL)" 2 tristate "Apple Macintosh file system support"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK
4 select NLS 4 select NLS
5 help 5 help
6 If you say Y here, you will be able to mount Macintosh-formatted 6 If you say Y here, you will be able to mount Macintosh-formatted
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 422dde2ec0a1..5f7f1abd5f6d 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -51,7 +51,7 @@ done:
51 */ 51 */
52static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 52static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
53{ 53{
54 struct inode *inode = filp->f_path.dentry->d_inode; 54 struct inode *inode = file_inode(filp);
55 struct super_block *sb = inode->i_sb; 55 struct super_block *sb = inode->i_sb;
56 int len, err; 56 int len, err;
57 char strbuf[HFS_MAX_NAMELEN]; 57 char strbuf[HFS_MAX_NAMELEN];
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d47f11658c17..3031dfdd2358 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -128,7 +128,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
128{ 128{
129 struct file *file = iocb->ki_filp; 129 struct file *file = iocb->ki_filp;
130 struct address_space *mapping = file->f_mapping; 130 struct address_space *mapping = file->f_mapping;
131 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 131 struct inode *inode = file_inode(file)->i_mapping->host;
132 ssize_t ret; 132 ssize_t ret;
133 133
134 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 134 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile
index 3cc0df730156..09d278bb7b91 100644
--- a/fs/hfsplus/Makefile
+++ b/fs/hfsplus/Makefile
@@ -5,5 +5,5 @@
5obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o 5obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o
6 6
7hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \ 7hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
8 bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o 8 bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
9 9 attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
new file mode 100644
index 000000000000..8d691f124714
--- /dev/null
+++ b/fs/hfsplus/attributes.c
@@ -0,0 +1,399 @@
1/*
2 * linux/fs/hfsplus/attributes.c
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Handling of records in attributes tree
7 */
8
9#include "hfsplus_fs.h"
10#include "hfsplus_raw.h"
11
12static struct kmem_cache *hfsplus_attr_tree_cachep;
13
14int hfsplus_create_attr_tree_cache(void)
15{
16 if (hfsplus_attr_tree_cachep)
17 return -EEXIST;
18
19 hfsplus_attr_tree_cachep =
20 kmem_cache_create("hfsplus_attr_cache",
21 sizeof(hfsplus_attr_entry), 0,
22 SLAB_HWCACHE_ALIGN, NULL);
23 if (!hfsplus_attr_tree_cachep)
24 return -ENOMEM;
25
26 return 0;
27}
28
29void hfsplus_destroy_attr_tree_cache(void)
30{
31 kmem_cache_destroy(hfsplus_attr_tree_cachep);
32}
33
34int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *k1,
35 const hfsplus_btree_key *k2)
36{
37 __be32 k1_cnid, k2_cnid;
38
39 k1_cnid = k1->attr.cnid;
40 k2_cnid = k2->attr.cnid;
41 if (k1_cnid != k2_cnid)
42 return be32_to_cpu(k1_cnid) < be32_to_cpu(k2_cnid) ? -1 : 1;
43
44 return hfsplus_strcmp(
45 (const struct hfsplus_unistr *)&k1->attr.key_name,
46 (const struct hfsplus_unistr *)&k2->attr.key_name);
47}
48
49int hfsplus_attr_build_key(struct super_block *sb, hfsplus_btree_key *key,
50 u32 cnid, const char *name)
51{
52 int len;
53
54 memset(key, 0, sizeof(struct hfsplus_attr_key));
55 key->attr.cnid = cpu_to_be32(cnid);
56 if (name) {
57 len = strlen(name);
58 if (len > HFSPLUS_ATTR_MAX_STRLEN) {
59 printk(KERN_ERR "hfs: invalid xattr name's length\n");
60 return -EINVAL;
61 }
62 hfsplus_asc2uni(sb,
63 (struct hfsplus_unistr *)&key->attr.key_name,
64 HFSPLUS_ATTR_MAX_STRLEN, name, len);
65 len = be16_to_cpu(key->attr.key_name.length);
66 } else {
67 key->attr.key_name.length = 0;
68 len = 0;
69 }
70
71 /* The length of the key, as stored in key_len field, does not include
72 * the size of the key_len field itself.
73 * So, offsetof(hfsplus_attr_key, key_name) is a trick because
74 * it takes into consideration key_len field (__be16) of
75 * hfsplus_attr_key structure instead of length field (__be16) of
76 * hfsplus_attr_unistr structure.
77 */
78 key->key_len =
79 cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
80 2 * len);
81
82 return 0;
83}
84
85void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
86 u32 cnid,
87 struct hfsplus_attr_unistr *name)
88{
89 int ustrlen;
90
91 memset(key, 0, sizeof(struct hfsplus_attr_key));
92 ustrlen = be16_to_cpu(name->length);
93 key->attr.cnid = cpu_to_be32(cnid);
94 key->attr.key_name.length = cpu_to_be16(ustrlen);
95 ustrlen *= 2;
96 memcpy(key->attr.key_name.unicode, name->unicode, ustrlen);
97
98 /* The length of the key, as stored in key_len field, does not include
99 * the size of the key_len field itself.
100 * So, offsetof(hfsplus_attr_key, key_name) is a trick because
101 * it takes into consideration key_len field (__be16) of
102 * hfsplus_attr_key structure instead of length field (__be16) of
103 * hfsplus_attr_unistr structure.
104 */
105 key->key_len =
106 cpu_to_be16(offsetof(struct hfsplus_attr_key, key_name) +
107 ustrlen);
108}
109
110hfsplus_attr_entry *hfsplus_alloc_attr_entry(void)
111{
112 return kmem_cache_alloc(hfsplus_attr_tree_cachep, GFP_KERNEL);
113}
114
115void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry)
116{
117 if (entry)
118 kmem_cache_free(hfsplus_attr_tree_cachep, entry);
119}
120
121#define HFSPLUS_INVALID_ATTR_RECORD -1
122
123static int hfsplus_attr_build_record(hfsplus_attr_entry *entry, int record_type,
124 u32 cnid, const void *value, size_t size)
125{
126 if (record_type == HFSPLUS_ATTR_FORK_DATA) {
127 /*
128 * Mac OS X supports only inline data attributes.
129 * Do nothing
130 */
131 memset(entry, 0, sizeof(*entry));
132 return sizeof(struct hfsplus_attr_fork_data);
133 } else if (record_type == HFSPLUS_ATTR_EXTENTS) {
134 /*
135 * Mac OS X supports only inline data attributes.
136 * Do nothing.
137 */
138 memset(entry, 0, sizeof(*entry));
139 return sizeof(struct hfsplus_attr_extents);
140 } else if (record_type == HFSPLUS_ATTR_INLINE_DATA) {
141 u16 len;
142
143 memset(entry, 0, sizeof(struct hfsplus_attr_inline_data));
144 entry->inline_data.record_type = cpu_to_be32(record_type);
145 if (size <= HFSPLUS_MAX_INLINE_DATA_SIZE)
146 len = size;
147 else
148 return HFSPLUS_INVALID_ATTR_RECORD;
149 entry->inline_data.length = cpu_to_be16(len);
150 memcpy(entry->inline_data.raw_bytes, value, len);
151 /*
152 * Align len on two-byte boundary.
153 * It needs to add pad byte if we have odd len.
154 */
155 len = round_up(len, 2);
156 return offsetof(struct hfsplus_attr_inline_data, raw_bytes) +
157 len;
158 } else /* invalid input */
159 memset(entry, 0, sizeof(*entry));
160
161 return HFSPLUS_INVALID_ATTR_RECORD;
162}
163
164int hfsplus_find_attr(struct super_block *sb, u32 cnid,
165 const char *name, struct hfs_find_data *fd)
166{
167 int err = 0;
168
169 dprint(DBG_ATTR_MOD, "find_attr: %s,%d\n", name ? name : NULL, cnid);
170
171 if (!HFSPLUS_SB(sb)->attr_tree) {
172 printk(KERN_ERR "hfs: attributes file doesn't exist\n");
173 return -EINVAL;
174 }
175
176 if (name) {
177 err = hfsplus_attr_build_key(sb, fd->search_key, cnid, name);
178 if (err)
179 goto failed_find_attr;
180 err = hfs_brec_find(fd, hfs_find_rec_by_key);
181 if (err)
182 goto failed_find_attr;
183 } else {
184 err = hfsplus_attr_build_key(sb, fd->search_key, cnid, NULL);
185 if (err)
186 goto failed_find_attr;
187 err = hfs_brec_find(fd, hfs_find_1st_rec_by_cnid);
188 if (err)
189 goto failed_find_attr;
190 }
191
192failed_find_attr:
193 return err;
194}
195
196int hfsplus_attr_exists(struct inode *inode, const char *name)
197{
198 int err = 0;
199 struct super_block *sb = inode->i_sb;
200 struct hfs_find_data fd;
201
202 if (!HFSPLUS_SB(sb)->attr_tree)
203 return 0;
204
205 err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
206 if (err)
207 return 0;
208
209 err = hfsplus_find_attr(sb, inode->i_ino, name, &fd);
210 if (err)
211 goto attr_not_found;
212
213 hfs_find_exit(&fd);
214 return 1;
215
216attr_not_found:
217 hfs_find_exit(&fd);
218 return 0;
219}
220
221int hfsplus_create_attr(struct inode *inode,
222 const char *name,
223 const void *value, size_t size)
224{
225 struct super_block *sb = inode->i_sb;
226 struct hfs_find_data fd;
227 hfsplus_attr_entry *entry_ptr;
228 int entry_size;
229 int err;
230
231 dprint(DBG_ATTR_MOD, "create_attr: %s,%ld\n",
232 name ? name : NULL, inode->i_ino);
233
234 if (!HFSPLUS_SB(sb)->attr_tree) {
235 printk(KERN_ERR "hfs: attributes file doesn't exist\n");
236 return -EINVAL;
237 }
238
239 entry_ptr = hfsplus_alloc_attr_entry();
240 if (!entry_ptr)
241 return -ENOMEM;
242
243 err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
244 if (err)
245 goto failed_init_create_attr;
246
247 if (name) {
248 err = hfsplus_attr_build_key(sb, fd.search_key,
249 inode->i_ino, name);
250 if (err)
251 goto failed_create_attr;
252 } else {
253 err = -EINVAL;
254 goto failed_create_attr;
255 }
256
257 /* Mac OS X supports only inline data attributes. */
258 entry_size = hfsplus_attr_build_record(entry_ptr,
259 HFSPLUS_ATTR_INLINE_DATA,
260 inode->i_ino,
261 value, size);
262 if (entry_size == HFSPLUS_INVALID_ATTR_RECORD) {
263 err = -EINVAL;
264 goto failed_create_attr;
265 }
266
267 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
268 if (err != -ENOENT) {
269 if (!err)
270 err = -EEXIST;
271 goto failed_create_attr;
272 }
273
274 err = hfs_brec_insert(&fd, entry_ptr, entry_size);
275 if (err)
276 goto failed_create_attr;
277
278 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY);
279
280failed_create_attr:
281 hfs_find_exit(&fd);
282
283failed_init_create_attr:
284 hfsplus_destroy_attr_entry(entry_ptr);
285 return err;
286}
287
288static int __hfsplus_delete_attr(struct inode *inode, u32 cnid,
289 struct hfs_find_data *fd)
290{
291 int err = 0;
292 __be32 found_cnid, record_type;
293
294 hfs_bnode_read(fd->bnode, &found_cnid,
295 fd->keyoffset +
296 offsetof(struct hfsplus_attr_key, cnid),
297 sizeof(__be32));
298 if (cnid != be32_to_cpu(found_cnid))
299 return -ENOENT;
300
301 hfs_bnode_read(fd->bnode, &record_type,
302 fd->entryoffset, sizeof(record_type));
303
304 switch (be32_to_cpu(record_type)) {
305 case HFSPLUS_ATTR_INLINE_DATA:
306 /* All is OK. Do nothing. */
307 break;
308 case HFSPLUS_ATTR_FORK_DATA:
309 case HFSPLUS_ATTR_EXTENTS:
310 printk(KERN_ERR "hfs: only inline data xattr are supported\n");
311 return -EOPNOTSUPP;
312 default:
313 printk(KERN_ERR "hfs: invalid extended attribute record\n");
314 return -ENOENT;
315 }
316
317 err = hfs_brec_remove(fd);
318 if (err)
319 return err;
320
321 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ATTR_DIRTY);
322 return err;
323}
324
325int hfsplus_delete_attr(struct inode *inode, const char *name)
326{
327 int err = 0;
328 struct super_block *sb = inode->i_sb;
329 struct hfs_find_data fd;
330
331 dprint(DBG_ATTR_MOD, "delete_attr: %s,%ld\n",
332 name ? name : NULL, inode->i_ino);
333
334 if (!HFSPLUS_SB(sb)->attr_tree) {
335 printk(KERN_ERR "hfs: attributes file doesn't exist\n");
336 return -EINVAL;
337 }
338
339 err = hfs_find_init(HFSPLUS_SB(sb)->attr_tree, &fd);
340 if (err)
341 return err;
342
343 if (name) {
344 err = hfsplus_attr_build_key(sb, fd.search_key,
345 inode->i_ino, name);
346 if (err)
347 goto out;
348 } else {
349 printk(KERN_ERR "hfs: invalid extended attribute name\n");
350 err = -EINVAL;
351 goto out;
352 }
353
354 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
355 if (err)
356 goto out;
357
358 err = __hfsplus_delete_attr(inode, inode->i_ino, &fd);
359 if (err)
360 goto out;
361
362out:
363 hfs_find_exit(&fd);
364 return err;
365}
366
367int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid)
368{
369 int err = 0;
370 struct hfs_find_data fd;
371
372 dprint(DBG_ATTR_MOD, "delete_all_attrs: %d\n", cnid);
373
374 if (!HFSPLUS_SB(dir->i_sb)->attr_tree) {
375 printk(KERN_ERR "hfs: attributes file doesn't exist\n");
376 return -EINVAL;
377 }
378
379 err = hfs_find_init(HFSPLUS_SB(dir->i_sb)->attr_tree, &fd);
380 if (err)
381 return err;
382
383 for (;;) {
384 err = hfsplus_find_attr(dir->i_sb, cnid, NULL, &fd);
385 if (err) {
386 if (err != -ENOENT)
387 printk(KERN_ERR "hfs: xattr search failed.\n");
388 goto end_delete_all;
389 }
390
391 err = __hfsplus_delete_attr(dir, cnid, &fd);
392 if (err)
393 goto end_delete_all;
394 }
395
396end_delete_all:
397 hfs_find_exit(&fd);
398 return err;
399}
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index 5d799c13205f..d73c98d1ee99 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
24 fd->key = ptr + tree->max_key_len + 2; 24 fd->key = ptr + tree->max_key_len + 2;
25 dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", 25 dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n",
26 tree->cnid, __builtin_return_address(0)); 26 tree->cnid, __builtin_return_address(0));
27 mutex_lock(&tree->tree_lock); 27 switch (tree->cnid) {
28 case HFSPLUS_CAT_CNID:
29 mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
30 break;
31 case HFSPLUS_EXT_CNID:
32 mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
33 break;
34 case HFSPLUS_ATTR_CNID:
35 mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
36 break;
37 default:
38 BUG();
39 }
28 return 0; 40 return 0;
29} 41}
30 42
@@ -38,15 +50,73 @@ void hfs_find_exit(struct hfs_find_data *fd)
38 fd->tree = NULL; 50 fd->tree = NULL;
39} 51}
40 52
41/* Find the record in bnode that best matches key (not greater than...)*/ 53int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode,
42int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) 54 struct hfs_find_data *fd,
55 int *begin,
56 int *end,
57 int *cur_rec)
58{
59 __be32 cur_cnid, search_cnid;
60
61 if (bnode->tree->cnid == HFSPLUS_EXT_CNID) {
62 cur_cnid = fd->key->ext.cnid;
63 search_cnid = fd->search_key->ext.cnid;
64 } else if (bnode->tree->cnid == HFSPLUS_CAT_CNID) {
65 cur_cnid = fd->key->cat.parent;
66 search_cnid = fd->search_key->cat.parent;
67 } else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) {
68 cur_cnid = fd->key->attr.cnid;
69 search_cnid = fd->search_key->attr.cnid;
70 } else
71 BUG();
72
73 if (cur_cnid == search_cnid) {
74 (*end) = (*cur_rec);
75 if ((*begin) == (*end))
76 return 1;
77 } else {
78 if (be32_to_cpu(cur_cnid) < be32_to_cpu(search_cnid))
79 (*begin) = (*cur_rec) + 1;
80 else
81 (*end) = (*cur_rec) - 1;
82 }
83
84 return 0;
85}
86
87int hfs_find_rec_by_key(struct hfs_bnode *bnode,
88 struct hfs_find_data *fd,
89 int *begin,
90 int *end,
91 int *cur_rec)
43{ 92{
44 int cmpval; 93 int cmpval;
94
95 cmpval = bnode->tree->keycmp(fd->key, fd->search_key);
96 if (!cmpval) {
97 (*end) = (*cur_rec);
98 return 1;
99 }
100 if (cmpval < 0)
101 (*begin) = (*cur_rec) + 1;
102 else
103 *(end) = (*cur_rec) - 1;
104
105 return 0;
106}
107
108/* Find the record in bnode that best matches key (not greater than...)*/
109int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd,
110 search_strategy_t rec_found)
111{
45 u16 off, len, keylen; 112 u16 off, len, keylen;
46 int rec; 113 int rec;
47 int b, e; 114 int b, e;
48 int res; 115 int res;
49 116
117 if (!rec_found)
118 BUG();
119
50 b = 0; 120 b = 0;
51 e = bnode->num_recs - 1; 121 e = bnode->num_recs - 1;
52 res = -ENOENT; 122 res = -ENOENT;
@@ -59,17 +129,12 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
59 goto fail; 129 goto fail;
60 } 130 }
61 hfs_bnode_read(bnode, fd->key, off, keylen); 131 hfs_bnode_read(bnode, fd->key, off, keylen);
62 cmpval = bnode->tree->keycmp(fd->key, fd->search_key); 132 if (rec_found(bnode, fd, &b, &e, &rec)) {
63 if (!cmpval) {
64 e = rec;
65 res = 0; 133 res = 0;
66 goto done; 134 goto done;
67 } 135 }
68 if (cmpval < 0)
69 b = rec + 1;
70 else
71 e = rec - 1;
72 } while (b <= e); 136 } while (b <= e);
137
73 if (rec != e && e >= 0) { 138 if (rec != e && e >= 0) {
74 len = hfs_brec_lenoff(bnode, e, &off); 139 len = hfs_brec_lenoff(bnode, e, &off);
75 keylen = hfs_brec_keylen(bnode, e); 140 keylen = hfs_brec_keylen(bnode, e);
@@ -79,19 +144,21 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd)
79 } 144 }
80 hfs_bnode_read(bnode, fd->key, off, keylen); 145 hfs_bnode_read(bnode, fd->key, off, keylen);
81 } 146 }
147
82done: 148done:
83 fd->record = e; 149 fd->record = e;
84 fd->keyoffset = off; 150 fd->keyoffset = off;
85 fd->keylength = keylen; 151 fd->keylength = keylen;
86 fd->entryoffset = off + keylen; 152 fd->entryoffset = off + keylen;
87 fd->entrylength = len - keylen; 153 fd->entrylength = len - keylen;
154
88fail: 155fail:
89 return res; 156 return res;
90} 157}
91 158
92/* Traverse a B*Tree from the root to a leaf finding best fit to key */ 159/* Traverse a B*Tree from the root to a leaf finding best fit to key */
93/* Return allocated copy of node found, set recnum to best record */ 160/* Return allocated copy of node found, set recnum to best record */
94int hfs_brec_find(struct hfs_find_data *fd) 161int hfs_brec_find(struct hfs_find_data *fd, search_strategy_t do_key_compare)
95{ 162{
96 struct hfs_btree *tree; 163 struct hfs_btree *tree;
97 struct hfs_bnode *bnode; 164 struct hfs_bnode *bnode;
@@ -122,7 +189,7 @@ int hfs_brec_find(struct hfs_find_data *fd)
122 goto invalid; 189 goto invalid;
123 bnode->parent = parent; 190 bnode->parent = parent;
124 191
125 res = __hfs_brec_find(bnode, fd); 192 res = __hfs_brec_find(bnode, fd, do_key_compare);
126 if (!height) 193 if (!height)
127 break; 194 break;
128 if (fd->record < 0) 195 if (fd->record < 0)
@@ -149,7 +216,7 @@ int hfs_brec_read(struct hfs_find_data *fd, void *rec, int rec_len)
149{ 216{
150 int res; 217 int res;
151 218
152 res = hfs_brec_find(fd); 219 res = hfs_brec_find(fd, hfs_find_rec_by_key);
153 if (res) 220 if (res)
154 return res; 221 return res;
155 if (fd->entrylength > rec_len) 222 if (fd->entrylength > rec_len)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 1c42cc5b899f..f31ac6f404f1 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -62,7 +62,8 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off)
62 62
63 tree = node->tree; 63 tree = node->tree;
64 if (node->type == HFS_NODE_LEAF || 64 if (node->type == HFS_NODE_LEAF ||
65 tree->attributes & HFS_TREE_VARIDXKEYS) 65 tree->attributes & HFS_TREE_VARIDXKEYS ||
66 node->tree->cnid == HFSPLUS_ATTR_CNID)
66 key_len = hfs_bnode_read_u16(node, off) + 2; 67 key_len = hfs_bnode_read_u16(node, off) + 2;
67 else 68 else
68 key_len = tree->max_key_len + 2; 69 key_len = tree->max_key_len + 2;
@@ -314,7 +315,8 @@ void hfs_bnode_dump(struct hfs_bnode *node)
314 if (i && node->type == HFS_NODE_INDEX) { 315 if (i && node->type == HFS_NODE_INDEX) {
315 int tmp; 316 int tmp;
316 317
317 if (node->tree->attributes & HFS_TREE_VARIDXKEYS) 318 if (node->tree->attributes & HFS_TREE_VARIDXKEYS ||
319 node->tree->cnid == HFSPLUS_ATTR_CNID)
318 tmp = hfs_bnode_read_u16(node, key_off) + 2; 320 tmp = hfs_bnode_read_u16(node, key_off) + 2;
319 else 321 else
320 tmp = node->tree->max_key_len + 2; 322 tmp = node->tree->max_key_len + 2;
@@ -646,6 +648,8 @@ void hfs_bnode_put(struct hfs_bnode *node)
646 if (test_bit(HFS_BNODE_DELETED, &node->flags)) { 648 if (test_bit(HFS_BNODE_DELETED, &node->flags)) {
647 hfs_bnode_unhash(node); 649 hfs_bnode_unhash(node);
648 spin_unlock(&tree->hash_lock); 650 spin_unlock(&tree->hash_lock);
651 hfs_bnode_clear(node, 0,
652 PAGE_CACHE_SIZE * tree->pages_per_bnode);
649 hfs_bmap_free(node); 653 hfs_bmap_free(node);
650 hfs_bnode_free(node); 654 hfs_bnode_free(node);
651 return; 655 return;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2a734cfccc92..298d4e45604b 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -36,7 +36,8 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
36 return 0; 36 return 0;
37 37
38 if ((node->type == HFS_NODE_INDEX) && 38 if ((node->type == HFS_NODE_INDEX) &&
39 !(node->tree->attributes & HFS_TREE_VARIDXKEYS)) { 39 !(node->tree->attributes & HFS_TREE_VARIDXKEYS) &&
40 (node->tree->cnid != HFSPLUS_ATTR_CNID)) {
40 retval = node->tree->max_key_len + 2; 41 retval = node->tree->max_key_len + 2;
41 } else { 42 } else {
42 recoff = hfs_bnode_read_u16(node, 43 recoff = hfs_bnode_read_u16(node,
@@ -151,12 +152,13 @@ skip:
151 152
152 /* get index key */ 153 /* get index key */
153 hfs_bnode_read_key(new_node, fd->search_key, 14); 154 hfs_bnode_read_key(new_node, fd->search_key, 14);
154 __hfs_brec_find(fd->bnode, fd); 155 __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key);
155 156
156 hfs_bnode_put(new_node); 157 hfs_bnode_put(new_node);
157 new_node = NULL; 158 new_node = NULL;
158 159
159 if (tree->attributes & HFS_TREE_VARIDXKEYS) 160 if ((tree->attributes & HFS_TREE_VARIDXKEYS) ||
161 (tree->cnid == HFSPLUS_ATTR_CNID))
160 key_len = be16_to_cpu(fd->search_key->key_len) + 2; 162 key_len = be16_to_cpu(fd->search_key->key_len) + 2;
161 else { 163 else {
162 fd->search_key->key_len = 164 fd->search_key->key_len =
@@ -201,7 +203,7 @@ again:
201 hfs_bnode_put(node); 203 hfs_bnode_put(node);
202 node = fd->bnode = parent; 204 node = fd->bnode = parent;
203 205
204 __hfs_brec_find(node, fd); 206 __hfs_brec_find(node, fd, hfs_find_rec_by_key);
205 goto again; 207 goto again;
206 } 208 }
207 hfs_bnode_write_u16(node, 209 hfs_bnode_write_u16(node,
@@ -367,12 +369,13 @@ again:
367 parent = hfs_bnode_find(tree, node->parent); 369 parent = hfs_bnode_find(tree, node->parent);
368 if (IS_ERR(parent)) 370 if (IS_ERR(parent))
369 return PTR_ERR(parent); 371 return PTR_ERR(parent);
370 __hfs_brec_find(parent, fd); 372 __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
371 hfs_bnode_dump(parent); 373 hfs_bnode_dump(parent);
372 rec = fd->record; 374 rec = fd->record;
373 375
374 /* size difference between old and new key */ 376 /* size difference between old and new key */
375 if (tree->attributes & HFS_TREE_VARIDXKEYS) 377 if ((tree->attributes & HFS_TREE_VARIDXKEYS) ||
378 (tree->cnid == HFSPLUS_ATTR_CNID))
376 newkeylen = hfs_bnode_read_u16(node, 14) + 2; 379 newkeylen = hfs_bnode_read_u16(node, 14) + 2;
377 else 380 else
378 fd->keylength = newkeylen = tree->max_key_len + 2; 381 fd->keylength = newkeylen = tree->max_key_len + 2;
@@ -427,7 +430,7 @@ skip:
427 hfs_bnode_read_key(new_node, fd->search_key, 14); 430 hfs_bnode_read_key(new_node, fd->search_key, 14);
428 cnid = cpu_to_be32(new_node->this); 431 cnid = cpu_to_be32(new_node->this);
429 432
430 __hfs_brec_find(fd->bnode, fd); 433 __hfs_brec_find(fd->bnode, fd, hfs_find_rec_by_key);
431 hfs_brec_insert(fd, &cnid, sizeof(cnid)); 434 hfs_brec_insert(fd, &cnid, sizeof(cnid));
432 hfs_bnode_put(fd->bnode); 435 hfs_bnode_put(fd->bnode);
433 hfs_bnode_put(new_node); 436 hfs_bnode_put(new_node);
@@ -495,13 +498,15 @@ static int hfs_btree_inc_height(struct hfs_btree *tree)
495 /* insert old root idx into new root */ 498 /* insert old root idx into new root */
496 node->parent = tree->root; 499 node->parent = tree->root;
497 if (node->type == HFS_NODE_LEAF || 500 if (node->type == HFS_NODE_LEAF ||
498 tree->attributes & HFS_TREE_VARIDXKEYS) 501 tree->attributes & HFS_TREE_VARIDXKEYS ||
502 tree->cnid == HFSPLUS_ATTR_CNID)
499 key_size = hfs_bnode_read_u16(node, 14) + 2; 503 key_size = hfs_bnode_read_u16(node, 14) + 2;
500 else 504 else
501 key_size = tree->max_key_len + 2; 505 key_size = tree->max_key_len + 2;
502 hfs_bnode_copy(new_node, 14, node, 14, key_size); 506 hfs_bnode_copy(new_node, 14, node, 14, key_size);
503 507
504 if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { 508 if (!(tree->attributes & HFS_TREE_VARIDXKEYS) &&
509 (tree->cnid != HFSPLUS_ATTR_CNID)) {
505 key_size = tree->max_key_len + 2; 510 key_size = tree->max_key_len + 2;
506 hfs_bnode_write_u16(new_node, 14, tree->max_key_len); 511 hfs_bnode_write_u16(new_node, 14, tree->max_key_len);
507 } 512 }
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 685d07d0ed18..efb689c21a95 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -98,6 +98,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
98 set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 98 set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
99 } 99 }
100 break; 100 break;
101 case HFSPLUS_ATTR_CNID:
102 if (tree->max_key_len != HFSPLUS_ATTR_KEYLEN - sizeof(u16)) {
103 printk(KERN_ERR "hfs: invalid attributes max_key_len %d\n",
104 tree->max_key_len);
105 goto fail_page;
106 }
107 tree->keycmp = hfsplus_attr_bin_cmp_key;
108 break;
101 default: 109 default:
102 printk(KERN_ERR "hfs: unknown B*Tree requested\n"); 110 printk(KERN_ERR "hfs: unknown B*Tree requested\n");
103 goto fail_page; 111 goto fail_page;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 798d9c4c5e71..840d71edd193 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -45,7 +45,8 @@ void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
45 45
46 key->cat.parent = cpu_to_be32(parent); 46 key->cat.parent = cpu_to_be32(parent);
47 if (str) { 47 if (str) {
48 hfsplus_asc2uni(sb, &key->cat.name, str->name, str->len); 48 hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
49 str->name, str->len);
49 len = be16_to_cpu(key->cat.name.length); 50 len = be16_to_cpu(key->cat.name.length);
50 } else { 51 } else {
51 key->cat.name.length = 0; 52 key->cat.name.length = 0;
@@ -167,7 +168,8 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
167 entry->type = cpu_to_be16(type); 168 entry->type = cpu_to_be16(type);
168 entry->thread.reserved = 0; 169 entry->thread.reserved = 0;
169 entry->thread.parentID = cpu_to_be32(parentid); 170 entry->thread.parentID = cpu_to_be32(parentid);
170 hfsplus_asc2uni(sb, &entry->thread.nodeName, str->name, str->len); 171 hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
172 str->name, str->len);
171 return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; 173 return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
172} 174}
173 175
@@ -198,7 +200,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
198 hfsplus_cat_build_key_uni(fd->search_key, 200 hfsplus_cat_build_key_uni(fd->search_key,
199 be32_to_cpu(tmp.thread.parentID), 201 be32_to_cpu(tmp.thread.parentID),
200 &tmp.thread.nodeName); 202 &tmp.thread.nodeName);
201 return hfs_brec_find(fd); 203 return hfs_brec_find(fd, hfs_find_rec_by_key);
202} 204}
203 205
204int hfsplus_create_cat(u32 cnid, struct inode *dir, 206int hfsplus_create_cat(u32 cnid, struct inode *dir,
@@ -221,7 +223,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
221 S_ISDIR(inode->i_mode) ? 223 S_ISDIR(inode->i_mode) ?
222 HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, 224 HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
223 dir->i_ino, str); 225 dir->i_ino, str);
224 err = hfs_brec_find(&fd); 226 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
225 if (err != -ENOENT) { 227 if (err != -ENOENT) {
226 if (!err) 228 if (!err)
227 err = -EEXIST; 229 err = -EEXIST;
@@ -233,7 +235,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
233 235
234 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 236 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
235 entry_size = hfsplus_cat_build_record(&entry, cnid, inode); 237 entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
236 err = hfs_brec_find(&fd); 238 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
237 if (err != -ENOENT) { 239 if (err != -ENOENT) {
238 /* panic? */ 240 /* panic? */
239 if (!err) 241 if (!err)
@@ -253,7 +255,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
253 255
254err1: 256err1:
255 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 257 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
256 if (!hfs_brec_find(&fd)) 258 if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
257 hfs_brec_remove(&fd); 259 hfs_brec_remove(&fd);
258err2: 260err2:
259 hfs_find_exit(&fd); 261 hfs_find_exit(&fd);
@@ -279,7 +281,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
279 int len; 281 int len;
280 282
281 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 283 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
282 err = hfs_brec_find(&fd); 284 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
283 if (err) 285 if (err)
284 goto out; 286 goto out;
285 287
@@ -296,7 +298,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
296 } else 298 } else
297 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 299 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
298 300
299 err = hfs_brec_find(&fd); 301 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
300 if (err) 302 if (err)
301 goto out; 303 goto out;
302 304
@@ -326,7 +328,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
326 goto out; 328 goto out;
327 329
328 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 330 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
329 err = hfs_brec_find(&fd); 331 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
330 if (err) 332 if (err)
331 goto out; 333 goto out;
332 334
@@ -337,6 +339,12 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
337 dir->i_size--; 339 dir->i_size--;
338 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 340 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
339 hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); 341 hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
342
343 if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) {
344 if (HFSPLUS_SB(sb)->attr_tree)
345 hfsplus_delete_all_attrs(dir, cnid);
346 }
347
340out: 348out:
341 hfs_find_exit(&fd); 349 hfs_find_exit(&fd);
342 350
@@ -363,7 +371,7 @@ int hfsplus_rename_cat(u32 cnid,
363 371
364 /* find the old dir entry and read the data */ 372 /* find the old dir entry and read the data */
365 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); 373 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
366 err = hfs_brec_find(&src_fd); 374 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
367 if (err) 375 if (err)
368 goto out; 376 goto out;
369 if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { 377 if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
@@ -376,7 +384,7 @@ int hfsplus_rename_cat(u32 cnid,
376 384
377 /* create new dir entry with the data from the old entry */ 385 /* create new dir entry with the data from the old entry */
378 hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); 386 hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
379 err = hfs_brec_find(&dst_fd); 387 err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
380 if (err != -ENOENT) { 388 if (err != -ENOENT) {
381 if (!err) 389 if (!err)
382 err = -EEXIST; 390 err = -EEXIST;
@@ -391,7 +399,7 @@ int hfsplus_rename_cat(u32 cnid,
391 399
392 /* finally remove the old entry */ 400 /* finally remove the old entry */
393 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); 401 hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
394 err = hfs_brec_find(&src_fd); 402 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
395 if (err) 403 if (err)
396 goto out; 404 goto out;
397 err = hfs_brec_remove(&src_fd); 405 err = hfs_brec_remove(&src_fd);
@@ -402,7 +410,7 @@ int hfsplus_rename_cat(u32 cnid,
402 410
403 /* remove old thread entry */ 411 /* remove old thread entry */
404 hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); 412 hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
405 err = hfs_brec_find(&src_fd); 413 err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
406 if (err) 414 if (err)
407 goto out; 415 goto out;
408 type = hfs_bnode_read_u16(src_fd.bnode, src_fd.entryoffset); 416 type = hfs_bnode_read_u16(src_fd.bnode, src_fd.entryoffset);
@@ -414,7 +422,7 @@ int hfsplus_rename_cat(u32 cnid,
414 hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); 422 hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
415 entry_size = hfsplus_fill_cat_thread(sb, &entry, type, 423 entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
416 dst_dir->i_ino, dst_name); 424 dst_dir->i_ino, dst_name);
417 err = hfs_brec_find(&dst_fd); 425 err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
418 if (err != -ENOENT) { 426 if (err != -ENOENT) {
419 if (!err) 427 if (!err)
420 err = -EEXIST; 428 err = -EEXIST;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 6b9f921ef2fa..031c24e50521 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -15,6 +15,7 @@
15 15
16#include "hfsplus_fs.h" 16#include "hfsplus_fs.h"
17#include "hfsplus_raw.h" 17#include "hfsplus_raw.h"
18#include "xattr.h"
18 19
19static inline void hfsplus_instantiate(struct dentry *dentry, 20static inline void hfsplus_instantiate(struct dentry *dentry,
20 struct inode *inode, u32 cnid) 21 struct inode *inode, u32 cnid)
@@ -122,7 +123,7 @@ fail:
122 123
123static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) 124static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
124{ 125{
125 struct inode *inode = filp->f_path.dentry->d_inode; 126 struct inode *inode = file_inode(filp);
126 struct super_block *sb = inode->i_sb; 127 struct super_block *sb = inode->i_sb;
127 int len, err; 128 int len, err;
128 char strbuf[HFSPLUS_MAX_STRLEN + 1]; 129 char strbuf[HFSPLUS_MAX_STRLEN + 1];
@@ -138,7 +139,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
138 if (err) 139 if (err)
139 return err; 140 return err;
140 hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); 141 hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
141 err = hfs_brec_find(&fd); 142 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
142 if (err) 143 if (err)
143 goto out; 144 goto out;
144 145
@@ -421,6 +422,15 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
421 if (res) 422 if (res)
422 goto out_err; 423 goto out_err;
423 424
425 res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
426 if (res == -EOPNOTSUPP)
427 res = 0; /* Operation is not supported. */
428 else if (res) {
429 /* Try to delete anyway without error analysis. */
430 hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
431 goto out_err;
432 }
433
424 hfsplus_instantiate(dentry, inode, inode->i_ino); 434 hfsplus_instantiate(dentry, inode, inode->i_ino);
425 mark_inode_dirty(inode); 435 mark_inode_dirty(inode);
426 goto out; 436 goto out;
@@ -450,15 +460,26 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
450 init_special_inode(inode, mode, rdev); 460 init_special_inode(inode, mode, rdev);
451 461
452 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); 462 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
453 if (res) { 463 if (res)
454 clear_nlink(inode); 464 goto failed_mknod;
455 hfsplus_delete_inode(inode); 465
456 iput(inode); 466 res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
457 goto out; 467 if (res == -EOPNOTSUPP)
468 res = 0; /* Operation is not supported. */
469 else if (res) {
470 /* Try to delete anyway without error analysis. */
471 hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name);
472 goto failed_mknod;
458 } 473 }
459 474
460 hfsplus_instantiate(dentry, inode, inode->i_ino); 475 hfsplus_instantiate(dentry, inode, inode->i_ino);
461 mark_inode_dirty(inode); 476 mark_inode_dirty(inode);
477 goto out;
478
479failed_mknod:
480 clear_nlink(inode);
481 hfsplus_delete_inode(inode);
482 iput(inode);
462out: 483out:
463 mutex_unlock(&sbi->vh_mutex); 484 mutex_unlock(&sbi->vh_mutex);
464 return res; 485 return res;
@@ -499,15 +520,19 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
499} 520}
500 521
501const struct inode_operations hfsplus_dir_inode_operations = { 522const struct inode_operations hfsplus_dir_inode_operations = {
502 .lookup = hfsplus_lookup, 523 .lookup = hfsplus_lookup,
503 .create = hfsplus_create, 524 .create = hfsplus_create,
504 .link = hfsplus_link, 525 .link = hfsplus_link,
505 .unlink = hfsplus_unlink, 526 .unlink = hfsplus_unlink,
506 .mkdir = hfsplus_mkdir, 527 .mkdir = hfsplus_mkdir,
507 .rmdir = hfsplus_rmdir, 528 .rmdir = hfsplus_rmdir,
508 .symlink = hfsplus_symlink, 529 .symlink = hfsplus_symlink,
509 .mknod = hfsplus_mknod, 530 .mknod = hfsplus_mknod,
510 .rename = hfsplus_rename, 531 .rename = hfsplus_rename,
532 .setxattr = generic_setxattr,
533 .getxattr = generic_getxattr,
534 .listxattr = hfsplus_listxattr,
535 .removexattr = hfsplus_removexattr,
511}; 536};
512 537
513const struct file_operations hfsplus_dir_operations = { 538const struct file_operations hfsplus_dir_operations = {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index eba76eab6d62..a94f0f779d5e 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -95,7 +95,7 @@ static void __hfsplus_ext_write_extent(struct inode *inode,
95 HFSPLUS_IS_RSRC(inode) ? 95 HFSPLUS_IS_RSRC(inode) ?
96 HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); 96 HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA);
97 97
98 res = hfs_brec_find(fd); 98 res = hfs_brec_find(fd, hfs_find_rec_by_key);
99 if (hip->extent_state & HFSPLUS_EXT_NEW) { 99 if (hip->extent_state & HFSPLUS_EXT_NEW) {
100 if (res != -ENOENT) 100 if (res != -ENOENT)
101 return; 101 return;
@@ -154,7 +154,7 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
154 154
155 hfsplus_ext_build_key(fd->search_key, cnid, block, type); 155 hfsplus_ext_build_key(fd->search_key, cnid, block, type);
156 fd->key->ext.cnid = 0; 156 fd->key->ext.cnid = 0;
157 res = hfs_brec_find(fd); 157 res = hfs_brec_find(fd, hfs_find_rec_by_key);
158 if (res && res != -ENOENT) 158 if (res && res != -ENOENT)
159 return res; 159 return res;
160 if (fd->key->ext.cnid != fd->search_key->ext.cnid || 160 if (fd->key->ext.cnid != fd->search_key->ext.cnid ||
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index a6da86b1b4c1..05b11f36024c 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -23,6 +23,7 @@
23#define DBG_SUPER 0x00000010 23#define DBG_SUPER 0x00000010
24#define DBG_EXTENT 0x00000020 24#define DBG_EXTENT 0x00000020
25#define DBG_BITMAP 0x00000040 25#define DBG_BITMAP 0x00000040
26#define DBG_ATTR_MOD 0x00000080
26 27
27#if 0 28#if 0
28#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD) 29#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
@@ -46,6 +47,13 @@ typedef int (*btree_keycmp)(const hfsplus_btree_key *,
46 47
47#define NODE_HASH_SIZE 256 48#define NODE_HASH_SIZE 256
48 49
50/* B-tree mutex nested subclasses */
51enum hfsplus_btree_mutex_classes {
52 CATALOG_BTREE_MUTEX,
53 EXTENTS_BTREE_MUTEX,
54 ATTR_BTREE_MUTEX,
55};
56
49/* An HFS+ BTree held in memory */ 57/* An HFS+ BTree held in memory */
50struct hfs_btree { 58struct hfs_btree {
51 struct super_block *sb; 59 struct super_block *sb;
@@ -223,6 +231,7 @@ struct hfsplus_inode_info {
223#define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */ 231#define HFSPLUS_I_CAT_DIRTY 1 /* has changes in the catalog tree */
224#define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */ 232#define HFSPLUS_I_EXT_DIRTY 2 /* has changes in the extent tree */
225#define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */ 233#define HFSPLUS_I_ALLOC_DIRTY 3 /* has changes in the allocation file */
234#define HFSPLUS_I_ATTR_DIRTY 4 /* has changes in the attributes tree */
226 235
227#define HFSPLUS_IS_RSRC(inode) \ 236#define HFSPLUS_IS_RSRC(inode) \
228 test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags) 237 test_bit(HFSPLUS_I_RSRC, &HFSPLUS_I(inode)->flags)
@@ -302,7 +311,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
302#define hfs_brec_remove hfsplus_brec_remove 311#define hfs_brec_remove hfsplus_brec_remove
303#define hfs_find_init hfsplus_find_init 312#define hfs_find_init hfsplus_find_init
304#define hfs_find_exit hfsplus_find_exit 313#define hfs_find_exit hfsplus_find_exit
305#define __hfs_brec_find __hplusfs_brec_find 314#define __hfs_brec_find __hfsplus_brec_find
306#define hfs_brec_find hfsplus_brec_find 315#define hfs_brec_find hfsplus_brec_find
307#define hfs_brec_read hfsplus_brec_read 316#define hfs_brec_read hfsplus_brec_read
308#define hfs_brec_goto hfsplus_brec_goto 317#define hfs_brec_goto hfsplus_brec_goto
@@ -324,10 +333,33 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
324 */ 333 */
325#define HFSPLUS_IOC_BLESS _IO('h', 0x80) 334#define HFSPLUS_IOC_BLESS _IO('h', 0x80)
326 335
336typedef int (*search_strategy_t)(struct hfs_bnode *,
337 struct hfs_find_data *,
338 int *, int *, int *);
339
327/* 340/*
328 * Functions in any *.c used in other files 341 * Functions in any *.c used in other files
329 */ 342 */
330 343
344/* attributes.c */
345int hfsplus_create_attr_tree_cache(void);
346void hfsplus_destroy_attr_tree_cache(void);
347hfsplus_attr_entry *hfsplus_alloc_attr_entry(void);
348void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p);
349int hfsplus_attr_bin_cmp_key(const hfsplus_btree_key *,
350 const hfsplus_btree_key *);
351int hfsplus_attr_build_key(struct super_block *, hfsplus_btree_key *,
352 u32, const char *);
353void hfsplus_attr_build_key_uni(hfsplus_btree_key *key,
354 u32 cnid,
355 struct hfsplus_attr_unistr *name);
356int hfsplus_find_attr(struct super_block *, u32,
357 const char *, struct hfs_find_data *);
358int hfsplus_attr_exists(struct inode *inode, const char *name);
359int hfsplus_create_attr(struct inode *, const char *, const void *, size_t);
360int hfsplus_delete_attr(struct inode *, const char *);
361int hfsplus_delete_all_attrs(struct inode *dir, u32 cnid);
362
331/* bitmap.c */ 363/* bitmap.c */
332int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *); 364int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *);
333int hfsplus_block_free(struct super_block *, u32, u32); 365int hfsplus_block_free(struct super_block *, u32, u32);
@@ -369,8 +401,15 @@ int hfs_brec_remove(struct hfs_find_data *);
369/* bfind.c */ 401/* bfind.c */
370int hfs_find_init(struct hfs_btree *, struct hfs_find_data *); 402int hfs_find_init(struct hfs_btree *, struct hfs_find_data *);
371void hfs_find_exit(struct hfs_find_data *); 403void hfs_find_exit(struct hfs_find_data *);
372int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *); 404int hfs_find_1st_rec_by_cnid(struct hfs_bnode *,
373int hfs_brec_find(struct hfs_find_data *); 405 struct hfs_find_data *,
406 int *, int *, int *);
407int hfs_find_rec_by_key(struct hfs_bnode *,
408 struct hfs_find_data *,
409 int *, int *, int *);
410int __hfs_brec_find(struct hfs_bnode *, struct hfs_find_data *,
411 search_strategy_t);
412int hfs_brec_find(struct hfs_find_data *, search_strategy_t);
374int hfs_brec_read(struct hfs_find_data *, void *, int); 413int hfs_brec_read(struct hfs_find_data *, void *, int);
375int hfs_brec_goto(struct hfs_find_data *, int); 414int hfs_brec_goto(struct hfs_find_data *, int);
376 415
@@ -417,11 +456,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
417 456
418/* ioctl.c */ 457/* ioctl.c */
419long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 458long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
420int hfsplus_setxattr(struct dentry *dentry, const char *name,
421 const void *value, size_t size, int flags);
422ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
423 void *value, size_t size);
424ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
425 459
426/* options.c */ 460/* options.c */
427int hfsplus_parse_options(char *, struct hfsplus_sb_info *); 461int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
@@ -446,7 +480,7 @@ int hfsplus_strcmp(const struct hfsplus_unistr *,
446int hfsplus_uni2asc(struct super_block *, 480int hfsplus_uni2asc(struct super_block *,
447 const struct hfsplus_unistr *, char *, int *); 481 const struct hfsplus_unistr *, char *, int *);
448int hfsplus_asc2uni(struct super_block *, 482int hfsplus_asc2uni(struct super_block *,
449 struct hfsplus_unistr *, const char *, int); 483 struct hfsplus_unistr *, int, const char *, int);
450int hfsplus_hash_dentry(const struct dentry *dentry, 484int hfsplus_hash_dentry(const struct dentry *dentry,
451 const struct inode *inode, struct qstr *str); 485 const struct inode *inode, struct qstr *str);
452int hfsplus_compare_dentry(const struct dentry *parent, 486int hfsplus_compare_dentry(const struct dentry *parent,
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 921967e5abb1..452ede01b036 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -52,13 +52,23 @@
52typedef __be32 hfsplus_cnid; 52typedef __be32 hfsplus_cnid;
53typedef __be16 hfsplus_unichr; 53typedef __be16 hfsplus_unichr;
54 54
55#define HFSPLUS_MAX_STRLEN 255
56#define HFSPLUS_ATTR_MAX_STRLEN 127
57
55/* A "string" as used in filenames, etc. */ 58/* A "string" as used in filenames, etc. */
56struct hfsplus_unistr { 59struct hfsplus_unistr {
57 __be16 length; 60 __be16 length;
58 hfsplus_unichr unicode[255]; 61 hfsplus_unichr unicode[HFSPLUS_MAX_STRLEN];
59} __packed; 62} __packed;
60 63
61#define HFSPLUS_MAX_STRLEN 255 64/*
65 * A "string" is used in attributes file
66 * for name of extended attribute
67 */
68struct hfsplus_attr_unistr {
69 __be16 length;
70 hfsplus_unichr unicode[HFSPLUS_ATTR_MAX_STRLEN];
71} __packed;
62 72
63/* POSIX permissions */ 73/* POSIX permissions */
64struct hfsplus_perm { 74struct hfsplus_perm {
@@ -291,6 +301,8 @@ struct hfsplus_cat_file {
291/* File attribute bits */ 301/* File attribute bits */
292#define HFSPLUS_FILE_LOCKED 0x0001 302#define HFSPLUS_FILE_LOCKED 0x0001
293#define HFSPLUS_FILE_THREAD_EXISTS 0x0002 303#define HFSPLUS_FILE_THREAD_EXISTS 0x0002
304#define HFSPLUS_XATTR_EXISTS 0x0004
305#define HFSPLUS_ACL_EXISTS 0x0008
294 306
295/* HFS+ catalog thread (part of a cat_entry) */ 307/* HFS+ catalog thread (part of a cat_entry) */
296struct hfsplus_cat_thread { 308struct hfsplus_cat_thread {
@@ -327,11 +339,63 @@ struct hfsplus_ext_key {
327 339
328#define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) 340#define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key)
329 341
342#define HFSPLUS_XATTR_FINDER_INFO_NAME "com.apple.FinderInfo"
343#define HFSPLUS_XATTR_ACL_NAME "com.apple.system.Security"
344
345#define HFSPLUS_ATTR_INLINE_DATA 0x10
346#define HFSPLUS_ATTR_FORK_DATA 0x20
347#define HFSPLUS_ATTR_EXTENTS 0x30
348
349/* HFS+ attributes tree key */
350struct hfsplus_attr_key {
351 __be16 key_len;
352 __be16 pad;
353 hfsplus_cnid cnid;
354 __be32 start_block;
355 struct hfsplus_attr_unistr key_name;
356} __packed;
357
358#define HFSPLUS_ATTR_KEYLEN sizeof(struct hfsplus_attr_key)
359
360/* HFS+ fork data attribute */
361struct hfsplus_attr_fork_data {
362 __be32 record_type;
363 __be32 reserved;
364 struct hfsplus_fork_raw the_fork;
365} __packed;
366
367/* HFS+ extension attribute */
368struct hfsplus_attr_extents {
369 __be32 record_type;
370 __be32 reserved;
371 struct hfsplus_extent extents;
372} __packed;
373
374#define HFSPLUS_MAX_INLINE_DATA_SIZE 3802
375
376/* HFS+ attribute inline data */
377struct hfsplus_attr_inline_data {
378 __be32 record_type;
379 __be32 reserved1;
380 u8 reserved2[6];
381 __be16 length;
382 u8 raw_bytes[HFSPLUS_MAX_INLINE_DATA_SIZE];
383} __packed;
384
385/* A data record in the attributes tree */
386typedef union {
387 __be32 record_type;
388 struct hfsplus_attr_fork_data fork_data;
389 struct hfsplus_attr_extents extents;
390 struct hfsplus_attr_inline_data inline_data;
391} __packed hfsplus_attr_entry;
392
330/* HFS+ generic BTree key */ 393/* HFS+ generic BTree key */
331typedef union { 394typedef union {
332 __be16 key_len; 395 __be16 key_len;
333 struct hfsplus_cat_key cat; 396 struct hfsplus_cat_key cat;
334 struct hfsplus_ext_key ext; 397 struct hfsplus_ext_key ext;
398 struct hfsplus_attr_key attr;
335} __packed hfsplus_btree_key; 399} __packed hfsplus_btree_key;
336 400
337#endif 401#endif
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 799b336b59f9..160ccc9cdb4b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -17,6 +17,7 @@
17 17
18#include "hfsplus_fs.h" 18#include "hfsplus_fs.h"
19#include "hfsplus_raw.h" 19#include "hfsplus_raw.h"
20#include "xattr.h"
20 21
21static int hfsplus_readpage(struct file *file, struct page *page) 22static int hfsplus_readpage(struct file *file, struct page *page)
22{ 23{
@@ -124,7 +125,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
124{ 125{
125 struct file *file = iocb->ki_filp; 126 struct file *file = iocb->ki_filp;
126 struct address_space *mapping = file->f_mapping; 127 struct address_space *mapping = file->f_mapping;
127 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 128 struct inode *inode = file_inode(file)->i_mapping->host;
128 ssize_t ret; 129 ssize_t ret;
129 130
130 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 131 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
@@ -348,6 +349,18 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
348 error = error2; 349 error = error2;
349 } 350 }
350 351
352 if (test_and_clear_bit(HFSPLUS_I_ATTR_DIRTY, &hip->flags)) {
353 if (sbi->attr_tree) {
354 error2 =
355 filemap_write_and_wait(
356 sbi->attr_tree->inode->i_mapping);
357 if (!error)
358 error = error2;
359 } else {
360 printk(KERN_ERR "hfs: sync non-existent attributes tree\n");
361 }
362 }
363
351 if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { 364 if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) {
352 error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); 365 error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
353 if (!error) 366 if (!error)
@@ -365,9 +378,10 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
365static const struct inode_operations hfsplus_file_inode_operations = { 378static const struct inode_operations hfsplus_file_inode_operations = {
366 .lookup = hfsplus_file_lookup, 379 .lookup = hfsplus_file_lookup,
367 .setattr = hfsplus_setattr, 380 .setattr = hfsplus_setattr,
368 .setxattr = hfsplus_setxattr, 381 .setxattr = generic_setxattr,
369 .getxattr = hfsplus_getxattr, 382 .getxattr = generic_getxattr,
370 .listxattr = hfsplus_listxattr, 383 .listxattr = hfsplus_listxattr,
384 .removexattr = hfsplus_removexattr,
371}; 385};
372 386
373static const struct file_operations hfsplus_file_operations = { 387static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 09addc8615fa..d3ff5cc317d7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -16,7 +16,6 @@
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/mount.h> 17#include <linux/mount.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/xattr.h>
20#include <asm/uaccess.h> 19#include <asm/uaccess.h>
21#include "hfsplus_fs.h" 20#include "hfsplus_fs.h"
22 21
@@ -59,7 +58,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
59 58
60static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) 59static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
61{ 60{
62 struct inode *inode = file->f_path.dentry->d_inode; 61 struct inode *inode = file_inode(file);
63 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 62 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
64 unsigned int flags = 0; 63 unsigned int flags = 0;
65 64
@@ -75,7 +74,7 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
75 74
76static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) 75static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
77{ 76{
78 struct inode *inode = file->f_path.dentry->d_inode; 77 struct inode *inode = file_inode(file);
79 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 78 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
80 unsigned int flags; 79 unsigned int flags;
81 int err = 0; 80 int err = 0;
@@ -151,110 +150,3 @@ long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
151 return -ENOTTY; 150 return -ENOTTY;
152 } 151 }
153} 152}
154
155int hfsplus_setxattr(struct dentry *dentry, const char *name,
156 const void *value, size_t size, int flags)
157{
158 struct inode *inode = dentry->d_inode;
159 struct hfs_find_data fd;
160 hfsplus_cat_entry entry;
161 struct hfsplus_cat_file *file;
162 int res;
163
164 if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
165 return -EOPNOTSUPP;
166
167 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
168 if (res)
169 return res;
170 res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
171 if (res)
172 goto out;
173 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
174 sizeof(struct hfsplus_cat_file));
175 file = &entry.file;
176
177 if (!strcmp(name, "hfs.type")) {
178 if (size == 4)
179 memcpy(&file->user_info.fdType, value, 4);
180 else
181 res = -ERANGE;
182 } else if (!strcmp(name, "hfs.creator")) {
183 if (size == 4)
184 memcpy(&file->user_info.fdCreator, value, 4);
185 else
186 res = -ERANGE;
187 } else
188 res = -EOPNOTSUPP;
189 if (!res) {
190 hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
191 sizeof(struct hfsplus_cat_file));
192 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
193 }
194out:
195 hfs_find_exit(&fd);
196 return res;
197}
198
199ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
200 void *value, size_t size)
201{
202 struct inode *inode = dentry->d_inode;
203 struct hfs_find_data fd;
204 hfsplus_cat_entry entry;
205 struct hfsplus_cat_file *file;
206 ssize_t res = 0;
207
208 if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
209 return -EOPNOTSUPP;
210
211 if (size) {
212 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
213 if (res)
214 return res;
215 res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
216 if (res)
217 goto out;
218 hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
219 sizeof(struct hfsplus_cat_file));
220 }
221 file = &entry.file;
222
223 if (!strcmp(name, "hfs.type")) {
224 if (size >= 4) {
225 memcpy(value, &file->user_info.fdType, 4);
226 res = 4;
227 } else
228 res = size ? -ERANGE : 4;
229 } else if (!strcmp(name, "hfs.creator")) {
230 if (size >= 4) {
231 memcpy(value, &file->user_info.fdCreator, 4);
232 res = 4;
233 } else
234 res = size ? -ERANGE : 4;
235 } else
236 res = -EOPNOTSUPP;
237out:
238 if (size)
239 hfs_find_exit(&fd);
240 return res;
241}
242
243#define HFSPLUS_ATTRLIST_SIZE (sizeof("hfs.creator")+sizeof("hfs.type"))
244
245ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
246{
247 struct inode *inode = dentry->d_inode;
248
249 if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode))
250 return -EOPNOTSUPP;
251
252 if (!buffer || !size)
253 return HFSPLUS_ATTRLIST_SIZE;
254 if (size < HFSPLUS_ATTRLIST_SIZE)
255 return -ERANGE;
256 strcpy(buffer, "hfs.type");
257 strcpy(buffer + sizeof("hfs.type"), "hfs.creator");
258
259 return HFSPLUS_ATTRLIST_SIZE;
260}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 796198d26553..974c26f96fae 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -20,6 +20,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb);
20static void hfsplus_destroy_inode(struct inode *inode); 20static void hfsplus_destroy_inode(struct inode *inode);
21 21
22#include "hfsplus_fs.h" 22#include "hfsplus_fs.h"
23#include "xattr.h"
23 24
24static int hfsplus_system_read_inode(struct inode *inode) 25static int hfsplus_system_read_inode(struct inode *inode)
25{ 26{
@@ -118,6 +119,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
118 case HFSPLUS_ATTR_CNID: 119 case HFSPLUS_ATTR_CNID:
119 fork = &vhdr->attr_file; 120 fork = &vhdr->attr_file;
120 tree = sbi->attr_tree; 121 tree = sbi->attr_tree;
122 break;
121 default: 123 default:
122 return -EIO; 124 return -EIO;
123 } 125 }
@@ -191,6 +193,12 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
191 error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); 193 error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping);
192 if (!error) 194 if (!error)
193 error = error2; 195 error = error2;
196 if (sbi->attr_tree) {
197 error2 =
198 filemap_write_and_wait(sbi->attr_tree->inode->i_mapping);
199 if (!error)
200 error = error2;
201 }
194 error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); 202 error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping);
195 if (!error) 203 if (!error)
196 error = error2; 204 error = error2;
@@ -281,6 +289,7 @@ static void hfsplus_put_super(struct super_block *sb)
281 hfsplus_sync_fs(sb, 1); 289 hfsplus_sync_fs(sb, 1);
282 } 290 }
283 291
292 hfs_btree_close(sbi->attr_tree);
284 hfs_btree_close(sbi->cat_tree); 293 hfs_btree_close(sbi->cat_tree);
285 hfs_btree_close(sbi->ext_tree); 294 hfs_btree_close(sbi->ext_tree);
286 iput(sbi->alloc_file); 295 iput(sbi->alloc_file);
@@ -477,12 +486,20 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
477 printk(KERN_ERR "hfs: failed to load catalog file\n"); 486 printk(KERN_ERR "hfs: failed to load catalog file\n");
478 goto out_close_ext_tree; 487 goto out_close_ext_tree;
479 } 488 }
489 if (vhdr->attr_file.total_blocks != 0) {
490 sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID);
491 if (!sbi->attr_tree) {
492 printk(KERN_ERR "hfs: failed to load attributes file\n");
493 goto out_close_cat_tree;
494 }
495 }
496 sb->s_xattr = hfsplus_xattr_handlers;
480 497
481 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); 498 inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID);
482 if (IS_ERR(inode)) { 499 if (IS_ERR(inode)) {
483 printk(KERN_ERR "hfs: failed to load allocation file\n"); 500 printk(KERN_ERR "hfs: failed to load allocation file\n");
484 err = PTR_ERR(inode); 501 err = PTR_ERR(inode);
485 goto out_close_cat_tree; 502 goto out_close_attr_tree;
486 } 503 }
487 sbi->alloc_file = inode; 504 sbi->alloc_file = inode;
488 505
@@ -542,10 +559,27 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
542 } 559 }
543 err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root, 560 err = hfsplus_create_cat(sbi->hidden_dir->i_ino, root,
544 &str, sbi->hidden_dir); 561 &str, sbi->hidden_dir);
545 mutex_unlock(&sbi->vh_mutex); 562 if (err) {
546 if (err) 563 mutex_unlock(&sbi->vh_mutex);
564 goto out_put_hidden_dir;
565 }
566
567 err = hfsplus_init_inode_security(sbi->hidden_dir,
568 root, &str);
569 if (err == -EOPNOTSUPP)
570 err = 0; /* Operation is not supported. */
571 else if (err) {
572 /*
573 * Try to delete anyway without
574 * error analysis.
575 */
576 hfsplus_delete_cat(sbi->hidden_dir->i_ino,
577 root, &str);
578 mutex_unlock(&sbi->vh_mutex);
547 goto out_put_hidden_dir; 579 goto out_put_hidden_dir;
580 }
548 581
582 mutex_unlock(&sbi->vh_mutex);
549 hfsplus_mark_inode_dirty(sbi->hidden_dir, 583 hfsplus_mark_inode_dirty(sbi->hidden_dir,
550 HFSPLUS_I_CAT_DIRTY); 584 HFSPLUS_I_CAT_DIRTY);
551 } 585 }
@@ -562,6 +596,8 @@ out_put_root:
562 sb->s_root = NULL; 596 sb->s_root = NULL;
563out_put_alloc_file: 597out_put_alloc_file:
564 iput(sbi->alloc_file); 598 iput(sbi->alloc_file);
599out_close_attr_tree:
600 hfs_btree_close(sbi->attr_tree);
565out_close_cat_tree: 601out_close_cat_tree:
566 hfs_btree_close(sbi->cat_tree); 602 hfs_btree_close(sbi->cat_tree);
567out_close_ext_tree: 603out_close_ext_tree:
@@ -635,9 +671,20 @@ static int __init init_hfsplus_fs(void)
635 hfsplus_init_once); 671 hfsplus_init_once);
636 if (!hfsplus_inode_cachep) 672 if (!hfsplus_inode_cachep)
637 return -ENOMEM; 673 return -ENOMEM;
674 err = hfsplus_create_attr_tree_cache();
675 if (err)
676 goto destroy_inode_cache;
638 err = register_filesystem(&hfsplus_fs_type); 677 err = register_filesystem(&hfsplus_fs_type);
639 if (err) 678 if (err)
640 kmem_cache_destroy(hfsplus_inode_cachep); 679 goto destroy_attr_tree_cache;
680 return 0;
681
682destroy_attr_tree_cache:
683 hfsplus_destroy_attr_tree_cache();
684
685destroy_inode_cache:
686 kmem_cache_destroy(hfsplus_inode_cachep);
687
641 return err; 688 return err;
642} 689}
643 690
@@ -650,6 +697,7 @@ static void __exit exit_hfsplus_fs(void)
650 * destroy cache. 697 * destroy cache.
651 */ 698 */
652 rcu_barrier(); 699 rcu_barrier();
700 hfsplus_destroy_attr_tree_cache();
653 kmem_cache_destroy(hfsplus_inode_cachep); 701 kmem_cache_destroy(hfsplus_inode_cachep);
654} 702}
655 703
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a32998f29f0b..2c2e47dcfdd8 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -295,7 +295,8 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size)
295 return hfsplus_decompose_table + (off / 4); 295 return hfsplus_decompose_table + (off / 4);
296} 296}
297 297
298int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, 298int hfsplus_asc2uni(struct super_block *sb,
299 struct hfsplus_unistr *ustr, int max_unistr_len,
299 const char *astr, int len) 300 const char *astr, int len)
300{ 301{
301 int size, dsize, decompose; 302 int size, dsize, decompose;
@@ -303,7 +304,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
303 wchar_t c; 304 wchar_t c;
304 305
305 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 306 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
306 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { 307 while (outlen < max_unistr_len && len > 0) {
307 size = asc2unichar(sb, astr, len, &c); 308 size = asc2unichar(sb, astr, len, &c);
308 309
309 if (decompose) 310 if (decompose)
@@ -311,7 +312,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
311 else 312 else
312 dstr = NULL; 313 dstr = NULL;
313 if (dstr) { 314 if (dstr) {
314 if (outlen + dsize > HFSPLUS_MAX_STRLEN) 315 if (outlen + dsize > max_unistr_len)
315 break; 316 break;
316 do { 317 do {
317 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 318 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
new file mode 100644
index 000000000000..e8a4b0815c61
--- /dev/null
+++ b/fs/hfsplus/xattr.c
@@ -0,0 +1,709 @@
1/*
2 * linux/fs/hfsplus/xattr.c
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Logic of processing extended attributes
7 */
8
9#include "hfsplus_fs.h"
10#include "xattr.h"
11
12const struct xattr_handler *hfsplus_xattr_handlers[] = {
13 &hfsplus_xattr_osx_handler,
14 &hfsplus_xattr_user_handler,
15 &hfsplus_xattr_trusted_handler,
16 &hfsplus_xattr_security_handler,
17 NULL
18};
19
20static int strcmp_xattr_finder_info(const char *name)
21{
22 if (name) {
23 return strncmp(name, HFSPLUS_XATTR_FINDER_INFO_NAME,
24 sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME));
25 }
26 return -1;
27}
28
29static int strcmp_xattr_acl(const char *name)
30{
31 if (name) {
32 return strncmp(name, HFSPLUS_XATTR_ACL_NAME,
33 sizeof(HFSPLUS_XATTR_ACL_NAME));
34 }
35 return -1;
36}
37
38static inline int is_known_namespace(const char *name)
39{
40 if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
41 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
42 strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
43 strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
44 return false;
45
46 return true;
47}
48
49static int can_set_xattr(struct inode *inode, const char *name,
50 const void *value, size_t value_len)
51{
52 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
53 return -EOPNOTSUPP; /* TODO: implement ACL support */
54
55 if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) {
56 /*
57 * This makes sure that we aren't trying to set an
58 * attribute in a different namespace by prefixing it
59 * with "osx."
60 */
61 if (is_known_namespace(name + XATTR_MAC_OSX_PREFIX_LEN))
62 return -EOPNOTSUPP;
63
64 return 0;
65 }
66
67 /*
68 * Don't allow setting an attribute in an unknown namespace.
69 */
70 if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
71 strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
72 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
73 return -EOPNOTSUPP;
74
75 return 0;
76}
77
78int __hfsplus_setxattr(struct inode *inode, const char *name,
79 const void *value, size_t size, int flags)
80{
81 int err = 0;
82 struct hfs_find_data cat_fd;
83 hfsplus_cat_entry entry;
84 u16 cat_entry_flags, cat_entry_type;
85 u16 folder_finderinfo_len = sizeof(struct DInfo) +
86 sizeof(struct DXInfo);
87 u16 file_finderinfo_len = sizeof(struct FInfo) +
88 sizeof(struct FXInfo);
89
90 if ((!S_ISREG(inode->i_mode) &&
91 !S_ISDIR(inode->i_mode)) ||
92 HFSPLUS_IS_RSRC(inode))
93 return -EOPNOTSUPP;
94
95 err = can_set_xattr(inode, name, value, size);
96 if (err)
97 return err;
98
99 if (strncmp(name, XATTR_MAC_OSX_PREFIX,
100 XATTR_MAC_OSX_PREFIX_LEN) == 0)
101 name += XATTR_MAC_OSX_PREFIX_LEN;
102
103 if (value == NULL) {
104 value = "";
105 size = 0;
106 }
107
108 err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd);
109 if (err) {
110 printk(KERN_ERR "hfs: can't init xattr find struct\n");
111 return err;
112 }
113
114 err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd);
115 if (err) {
116 printk(KERN_ERR "hfs: catalog searching failed\n");
117 goto end_setxattr;
118 }
119
120 if (!strcmp_xattr_finder_info(name)) {
121 if (flags & XATTR_CREATE) {
122 printk(KERN_ERR "hfs: xattr exists yet\n");
123 err = -EOPNOTSUPP;
124 goto end_setxattr;
125 }
126 hfs_bnode_read(cat_fd.bnode, &entry, cat_fd.entryoffset,
127 sizeof(hfsplus_cat_entry));
128 if (be16_to_cpu(entry.type) == HFSPLUS_FOLDER) {
129 if (size == folder_finderinfo_len) {
130 memcpy(&entry.folder.user_info, value,
131 folder_finderinfo_len);
132 hfs_bnode_write(cat_fd.bnode, &entry,
133 cat_fd.entryoffset,
134 sizeof(struct hfsplus_cat_folder));
135 hfsplus_mark_inode_dirty(inode,
136 HFSPLUS_I_CAT_DIRTY);
137 } else {
138 err = -ERANGE;
139 goto end_setxattr;
140 }
141 } else if (be16_to_cpu(entry.type) == HFSPLUS_FILE) {
142 if (size == file_finderinfo_len) {
143 memcpy(&entry.file.user_info, value,
144 file_finderinfo_len);
145 hfs_bnode_write(cat_fd.bnode, &entry,
146 cat_fd.entryoffset,
147 sizeof(struct hfsplus_cat_file));
148 hfsplus_mark_inode_dirty(inode,
149 HFSPLUS_I_CAT_DIRTY);
150 } else {
151 err = -ERANGE;
152 goto end_setxattr;
153 }
154 } else {
155 err = -EOPNOTSUPP;
156 goto end_setxattr;
157 }
158 goto end_setxattr;
159 }
160
161 if (!HFSPLUS_SB(inode->i_sb)->attr_tree) {
162 err = -EOPNOTSUPP;
163 goto end_setxattr;
164 }
165
166 if (hfsplus_attr_exists(inode, name)) {
167 if (flags & XATTR_CREATE) {
168 printk(KERN_ERR "hfs: xattr exists yet\n");
169 err = -EOPNOTSUPP;
170 goto end_setxattr;
171 }
172 err = hfsplus_delete_attr(inode, name);
173 if (err)
174 goto end_setxattr;
175 err = hfsplus_create_attr(inode, name, value, size);
176 if (err)
177 goto end_setxattr;
178 } else {
179 if (flags & XATTR_REPLACE) {
180 printk(KERN_ERR "hfs: cannot replace xattr\n");
181 err = -EOPNOTSUPP;
182 goto end_setxattr;
183 }
184 err = hfsplus_create_attr(inode, name, value, size);
185 if (err)
186 goto end_setxattr;
187 }
188
189 cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset);
190 if (cat_entry_type == HFSPLUS_FOLDER) {
191 cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode,
192 cat_fd.entryoffset +
193 offsetof(struct hfsplus_cat_folder, flags));
194 cat_entry_flags |= HFSPLUS_XATTR_EXISTS;
195 if (!strcmp_xattr_acl(name))
196 cat_entry_flags |= HFSPLUS_ACL_EXISTS;
197 hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
198 offsetof(struct hfsplus_cat_folder, flags),
199 cat_entry_flags);
200 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
201 } else if (cat_entry_type == HFSPLUS_FILE) {
202 cat_entry_flags = hfs_bnode_read_u16(cat_fd.bnode,
203 cat_fd.entryoffset +
204 offsetof(struct hfsplus_cat_file, flags));
205 cat_entry_flags |= HFSPLUS_XATTR_EXISTS;
206 if (!strcmp_xattr_acl(name))
207 cat_entry_flags |= HFSPLUS_ACL_EXISTS;
208 hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
209 offsetof(struct hfsplus_cat_file, flags),
210 cat_entry_flags);
211 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
212 } else {
213 printk(KERN_ERR "hfs: invalid catalog entry type\n");
214 err = -EIO;
215 goto end_setxattr;
216 }
217
218end_setxattr:
219 hfs_find_exit(&cat_fd);
220 return err;
221}
222
223static inline int is_osx_xattr(const char *xattr_name)
224{
225 return !is_known_namespace(xattr_name);
226}
227
228static int name_len(const char *xattr_name, int xattr_name_len)
229{
230 int len = xattr_name_len + 1;
231
232 if (is_osx_xattr(xattr_name))
233 len += XATTR_MAC_OSX_PREFIX_LEN;
234
235 return len;
236}
237
238static int copy_name(char *buffer, const char *xattr_name, int name_len)
239{
240 int len = name_len;
241 int offset = 0;
242
243 if (is_osx_xattr(xattr_name)) {
244 strncpy(buffer, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN);
245 offset += XATTR_MAC_OSX_PREFIX_LEN;
246 len += XATTR_MAC_OSX_PREFIX_LEN;
247 }
248
249 strncpy(buffer + offset, xattr_name, name_len);
250 memset(buffer + offset + name_len, 0, 1);
251 len += 1;
252
253 return len;
254}
255
256static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry,
257 void *value, size_t size)
258{
259 ssize_t res = 0;
260 struct inode *inode = dentry->d_inode;
261 struct hfs_find_data fd;
262 u16 entry_type;
263 u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo);
264 u16 file_rec_len = sizeof(struct FInfo) + sizeof(struct FXInfo);
265 u16 record_len = max(folder_rec_len, file_rec_len);
266 u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
267 u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)];
268
269 if (size >= record_len) {
270 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
271 if (res) {
272 printk(KERN_ERR "hfs: can't init xattr find struct\n");
273 return res;
274 }
275 res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
276 if (res)
277 goto end_getxattr_finder_info;
278 entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset);
279
280 if (entry_type == HFSPLUS_FOLDER) {
281 hfs_bnode_read(fd.bnode, folder_finder_info,
282 fd.entryoffset +
283 offsetof(struct hfsplus_cat_folder, user_info),
284 folder_rec_len);
285 memcpy(value, folder_finder_info, folder_rec_len);
286 res = folder_rec_len;
287 } else if (entry_type == HFSPLUS_FILE) {
288 hfs_bnode_read(fd.bnode, file_finder_info,
289 fd.entryoffset +
290 offsetof(struct hfsplus_cat_file, user_info),
291 file_rec_len);
292 memcpy(value, file_finder_info, file_rec_len);
293 res = file_rec_len;
294 } else {
295 res = -EOPNOTSUPP;
296 goto end_getxattr_finder_info;
297 }
298 } else
299 res = size ? -ERANGE : record_len;
300
301end_getxattr_finder_info:
302 if (size >= record_len)
303 hfs_find_exit(&fd);
304 return res;
305}
306
307ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
308 void *value, size_t size)
309{
310 struct inode *inode = dentry->d_inode;
311 struct hfs_find_data fd;
312 hfsplus_attr_entry *entry;
313 __be32 xattr_record_type;
314 u32 record_type;
315 u16 record_length = 0;
316 ssize_t res = 0;
317
318 if ((!S_ISREG(inode->i_mode) &&
319 !S_ISDIR(inode->i_mode)) ||
320 HFSPLUS_IS_RSRC(inode))
321 return -EOPNOTSUPP;
322
323 if (strncmp(name, XATTR_MAC_OSX_PREFIX,
324 XATTR_MAC_OSX_PREFIX_LEN) == 0) {
325 /* skip "osx." prefix */
326 name += XATTR_MAC_OSX_PREFIX_LEN;
327 /*
328 * Don't allow retrieving properly prefixed attributes
329 * by prepending them with "osx."
330 */
331 if (is_known_namespace(name))
332 return -EOPNOTSUPP;
333 }
334
335 if (!strcmp_xattr_finder_info(name))
336 return hfsplus_getxattr_finder_info(dentry, value, size);
337
338 if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
339 return -EOPNOTSUPP;
340
341 entry = hfsplus_alloc_attr_entry();
342 if (!entry) {
343 printk(KERN_ERR "hfs: can't allocate xattr entry\n");
344 return -ENOMEM;
345 }
346
347 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd);
348 if (res) {
349 printk(KERN_ERR "hfs: can't init xattr find struct\n");
350 goto failed_getxattr_init;
351 }
352
353 res = hfsplus_find_attr(inode->i_sb, inode->i_ino, name, &fd);
354 if (res) {
355 if (res == -ENOENT)
356 res = -ENODATA;
357 else
358 printk(KERN_ERR "hfs: xattr searching failed\n");
359 goto out;
360 }
361
362 hfs_bnode_read(fd.bnode, &xattr_record_type,
363 fd.entryoffset, sizeof(xattr_record_type));
364 record_type = be32_to_cpu(xattr_record_type);
365 if (record_type == HFSPLUS_ATTR_INLINE_DATA) {
366 record_length = hfs_bnode_read_u16(fd.bnode,
367 fd.entryoffset +
368 offsetof(struct hfsplus_attr_inline_data,
369 length));
370 if (record_length > HFSPLUS_MAX_INLINE_DATA_SIZE) {
371 printk(KERN_ERR "hfs: invalid xattr record size\n");
372 res = -EIO;
373 goto out;
374 }
375 } else if (record_type == HFSPLUS_ATTR_FORK_DATA ||
376 record_type == HFSPLUS_ATTR_EXTENTS) {
377 printk(KERN_ERR "hfs: only inline data xattr are supported\n");
378 res = -EOPNOTSUPP;
379 goto out;
380 } else {
381 printk(KERN_ERR "hfs: invalid xattr record\n");
382 res = -EIO;
383 goto out;
384 }
385
386 if (size) {
387 hfs_bnode_read(fd.bnode, entry, fd.entryoffset,
388 offsetof(struct hfsplus_attr_inline_data,
389 raw_bytes) + record_length);
390 }
391
392 if (size >= record_length) {
393 memcpy(value, entry->inline_data.raw_bytes, record_length);
394 res = record_length;
395 } else
396 res = size ? -ERANGE : record_length;
397
398out:
399 hfs_find_exit(&fd);
400
401failed_getxattr_init:
402 hfsplus_destroy_attr_entry(entry);
403 return res;
404}
405
406static inline int can_list(const char *xattr_name)
407{
408 if (!xattr_name)
409 return 0;
410
411 return strncmp(xattr_name, XATTR_TRUSTED_PREFIX,
412 XATTR_TRUSTED_PREFIX_LEN) ||
413 capable(CAP_SYS_ADMIN);
414}
415
416static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry,
417 char *buffer, size_t size)
418{
419 ssize_t res = 0;
420 struct inode *inode = dentry->d_inode;
421 struct hfs_find_data fd;
422 u16 entry_type;
423 u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
424 u8 file_finder_info[sizeof(struct FInfo) + sizeof(struct FXInfo)];
425 unsigned long len, found_bit;
426 int xattr_name_len, symbols_count;
427
428 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
429 if (res) {
430 printk(KERN_ERR "hfs: can't init xattr find struct\n");
431 return res;
432 }
433
434 res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
435 if (res)
436 goto end_listxattr_finder_info;
437
438 entry_type = hfs_bnode_read_u16(fd.bnode, fd.entryoffset);
439 if (entry_type == HFSPLUS_FOLDER) {
440 len = sizeof(struct DInfo) + sizeof(struct DXInfo);
441 hfs_bnode_read(fd.bnode, folder_finder_info,
442 fd.entryoffset +
443 offsetof(struct hfsplus_cat_folder, user_info),
444 len);
445 found_bit = find_first_bit((void *)folder_finder_info, len*8);
446 } else if (entry_type == HFSPLUS_FILE) {
447 len = sizeof(struct FInfo) + sizeof(struct FXInfo);
448 hfs_bnode_read(fd.bnode, file_finder_info,
449 fd.entryoffset +
450 offsetof(struct hfsplus_cat_file, user_info),
451 len);
452 found_bit = find_first_bit((void *)file_finder_info, len*8);
453 } else {
454 res = -EOPNOTSUPP;
455 goto end_listxattr_finder_info;
456 }
457
458 if (found_bit >= (len*8))
459 res = 0;
460 else {
461 symbols_count = sizeof(HFSPLUS_XATTR_FINDER_INFO_NAME) - 1;
462 xattr_name_len =
463 name_len(HFSPLUS_XATTR_FINDER_INFO_NAME, symbols_count);
464 if (!buffer || !size) {
465 if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME))
466 res = xattr_name_len;
467 } else if (can_list(HFSPLUS_XATTR_FINDER_INFO_NAME)) {
468 if (size < xattr_name_len)
469 res = -ERANGE;
470 else {
471 res = copy_name(buffer,
472 HFSPLUS_XATTR_FINDER_INFO_NAME,
473 symbols_count);
474 }
475 }
476 }
477
478end_listxattr_finder_info:
479 hfs_find_exit(&fd);
480
481 return res;
482}
483
484ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
485{
486 ssize_t err;
487 ssize_t res = 0;
488 struct inode *inode = dentry->d_inode;
489 struct hfs_find_data fd;
490 u16 key_len = 0;
491 struct hfsplus_attr_key attr_key;
492 char strbuf[HFSPLUS_ATTR_MAX_STRLEN +
493 XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
494 int xattr_name_len;
495
496 if ((!S_ISREG(inode->i_mode) &&
497 !S_ISDIR(inode->i_mode)) ||
498 HFSPLUS_IS_RSRC(inode))
499 return -EOPNOTSUPP;
500
501 res = hfsplus_listxattr_finder_info(dentry, buffer, size);
502 if (res < 0)
503 return res;
504 else if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
505 return (res == 0) ? -EOPNOTSUPP : res;
506
507 err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->attr_tree, &fd);
508 if (err) {
509 printk(KERN_ERR "hfs: can't init xattr find struct\n");
510 return err;
511 }
512
513 err = hfsplus_find_attr(inode->i_sb, inode->i_ino, NULL, &fd);
514 if (err) {
515 if (err == -ENOENT) {
516 if (res == 0)
517 res = -ENODATA;
518 goto end_listxattr;
519 } else {
520 res = err;
521 goto end_listxattr;
522 }
523 }
524
525 for (;;) {
526 key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset);
527 if (key_len == 0 || key_len > fd.tree->max_key_len) {
528 printk(KERN_ERR "hfs: invalid xattr key length: %d\n",
529 key_len);
530 res = -EIO;
531 goto end_listxattr;
532 }
533
534 hfs_bnode_read(fd.bnode, &attr_key,
535 fd.keyoffset, key_len + sizeof(key_len));
536
537 if (be32_to_cpu(attr_key.cnid) != inode->i_ino)
538 goto end_listxattr;
539
540 xattr_name_len = HFSPLUS_ATTR_MAX_STRLEN;
541 if (hfsplus_uni2asc(inode->i_sb,
542 (const struct hfsplus_unistr *)&fd.key->attr.key_name,
543 strbuf, &xattr_name_len)) {
544 printk(KERN_ERR "hfs: unicode conversion failed\n");
545 res = -EIO;
546 goto end_listxattr;
547 }
548
549 if (!buffer || !size) {
550 if (can_list(strbuf))
551 res += name_len(strbuf, xattr_name_len);
552 } else if (can_list(strbuf)) {
553 if (size < (res + name_len(strbuf, xattr_name_len))) {
554 res = -ERANGE;
555 goto end_listxattr;
556 } else
557 res += copy_name(buffer + res,
558 strbuf, xattr_name_len);
559 }
560
561 if (hfs_brec_goto(&fd, 1))
562 goto end_listxattr;
563 }
564
565end_listxattr:
566 hfs_find_exit(&fd);
567 return res;
568}
569
570int hfsplus_removexattr(struct dentry *dentry, const char *name)
571{
572 int err = 0;
573 struct inode *inode = dentry->d_inode;
574 struct hfs_find_data cat_fd;
575 u16 flags;
576 u16 cat_entry_type;
577 int is_xattr_acl_deleted = 0;
578 int is_all_xattrs_deleted = 0;
579
580 if ((!S_ISREG(inode->i_mode) &&
581 !S_ISDIR(inode->i_mode)) ||
582 HFSPLUS_IS_RSRC(inode))
583 return -EOPNOTSUPP;
584
585 if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
586 return -EOPNOTSUPP;
587
588 err = can_set_xattr(inode, name, NULL, 0);
589 if (err)
590 return err;
591
592 if (strncmp(name, XATTR_MAC_OSX_PREFIX,
593 XATTR_MAC_OSX_PREFIX_LEN) == 0)
594 name += XATTR_MAC_OSX_PREFIX_LEN;
595
596 if (!strcmp_xattr_finder_info(name))
597 return -EOPNOTSUPP;
598
599 err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &cat_fd);
600 if (err) {
601 printk(KERN_ERR "hfs: can't init xattr find struct\n");
602 return err;
603 }
604
605 err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &cat_fd);
606 if (err) {
607 printk(KERN_ERR "hfs: catalog searching failed\n");
608 goto end_removexattr;
609 }
610
611 err = hfsplus_delete_attr(inode, name);
612 if (err)
613 goto end_removexattr;
614
615 is_xattr_acl_deleted = !strcmp_xattr_acl(name);
616 is_all_xattrs_deleted = !hfsplus_attr_exists(inode, NULL);
617
618 if (!is_xattr_acl_deleted && !is_all_xattrs_deleted)
619 goto end_removexattr;
620
621 cat_entry_type = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset);
622
623 if (cat_entry_type == HFSPLUS_FOLDER) {
624 flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset +
625 offsetof(struct hfsplus_cat_folder, flags));
626 if (is_xattr_acl_deleted)
627 flags &= ~HFSPLUS_ACL_EXISTS;
628 if (is_all_xattrs_deleted)
629 flags &= ~HFSPLUS_XATTR_EXISTS;
630 hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
631 offsetof(struct hfsplus_cat_folder, flags),
632 flags);
633 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
634 } else if (cat_entry_type == HFSPLUS_FILE) {
635 flags = hfs_bnode_read_u16(cat_fd.bnode, cat_fd.entryoffset +
636 offsetof(struct hfsplus_cat_file, flags));
637 if (is_xattr_acl_deleted)
638 flags &= ~HFSPLUS_ACL_EXISTS;
639 if (is_all_xattrs_deleted)
640 flags &= ~HFSPLUS_XATTR_EXISTS;
641 hfs_bnode_write_u16(cat_fd.bnode, cat_fd.entryoffset +
642 offsetof(struct hfsplus_cat_file, flags),
643 flags);
644 hfsplus_mark_inode_dirty(inode, HFSPLUS_I_CAT_DIRTY);
645 } else {
646 printk(KERN_ERR "hfs: invalid catalog entry type\n");
647 err = -EIO;
648 goto end_removexattr;
649 }
650
651end_removexattr:
652 hfs_find_exit(&cat_fd);
653 return err;
654}
655
656static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
657 void *buffer, size_t size, int type)
658{
659 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
660 XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
661 size_t len = strlen(name);
662
663 if (!strcmp(name, ""))
664 return -EINVAL;
665
666 if (len > HFSPLUS_ATTR_MAX_STRLEN)
667 return -EOPNOTSUPP;
668
669 strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
670 strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
671
672 return hfsplus_getxattr(dentry, xattr_name, buffer, size);
673}
674
675static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
676 const void *buffer, size_t size, int flags, int type)
677{
678 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN +
679 XATTR_MAC_OSX_PREFIX_LEN + 1] = {0};
680 size_t len = strlen(name);
681
682 if (!strcmp(name, ""))
683 return -EINVAL;
684
685 if (len > HFSPLUS_ATTR_MAX_STRLEN)
686 return -EOPNOTSUPP;
687
688 strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
689 strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
690
691 return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
692}
693
694static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
695 size_t list_size, const char *name, size_t name_len, int type)
696{
697 /*
698 * This method is not used.
699 * It is used hfsplus_listxattr() instead of generic_listxattr().
700 */
701 return -EOPNOTSUPP;
702}
703
704const struct xattr_handler hfsplus_xattr_osx_handler = {
705 .prefix = XATTR_MAC_OSX_PREFIX,
706 .list = hfsplus_osx_listxattr,
707 .get = hfsplus_osx_getxattr,
708 .set = hfsplus_osx_setxattr,
709};
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
new file mode 100644
index 000000000000..847b695b984d
--- /dev/null
+++ b/fs/hfsplus/xattr.h
@@ -0,0 +1,60 @@
1/*
2 * linux/fs/hfsplus/xattr.h
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Logic of processing extended attributes
7 */
8
9#ifndef _LINUX_HFSPLUS_XATTR_H
10#define _LINUX_HFSPLUS_XATTR_H
11
12#include <linux/xattr.h>
13
14extern const struct xattr_handler hfsplus_xattr_osx_handler;
15extern const struct xattr_handler hfsplus_xattr_user_handler;
16extern const struct xattr_handler hfsplus_xattr_trusted_handler;
17/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/
18/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/
19extern const struct xattr_handler hfsplus_xattr_security_handler;
20
21extern const struct xattr_handler *hfsplus_xattr_handlers[];
22
23int __hfsplus_setxattr(struct inode *inode, const char *name,
24 const void *value, size_t size, int flags);
25
26static inline int hfsplus_setxattr(struct dentry *dentry, const char *name,
27 const void *value, size_t size, int flags)
28{
29 return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
30}
31
32ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
33 void *value, size_t size);
34
35ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
36
37int hfsplus_removexattr(struct dentry *dentry, const char *name);
38
39int hfsplus_init_security(struct inode *inode, struct inode *dir,
40 const struct qstr *qstr);
41
42static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir)
43{
44 /*TODO: implement*/
45 return 0;
46}
47
48static inline int hfsplus_init_inode_security(struct inode *inode,
49 struct inode *dir,
50 const struct qstr *qstr)
51{
52 int err;
53
54 err = hfsplus_init_acl(inode, dir);
55 if (!err)
56 err = hfsplus_init_security(inode, dir, qstr);
57 return err;
58}
59
60#endif
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
new file mode 100644
index 000000000000..83b842f113c5
--- /dev/null
+++ b/fs/hfsplus/xattr_security.c
@@ -0,0 +1,104 @@
1/*
2 * linux/fs/hfsplus/xattr_trusted.c
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Handler for storing security labels as extended attributes.
7 */
8
9#include <linux/security.h>
10#include "hfsplus_fs.h"
11#include "xattr.h"
12
13static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
14 void *buffer, size_t size, int type)
15{
16 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
17 size_t len = strlen(name);
18
19 if (!strcmp(name, ""))
20 return -EINVAL;
21
22 if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
23 return -EOPNOTSUPP;
24
25 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
26 strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
27
28 return hfsplus_getxattr(dentry, xattr_name, buffer, size);
29}
30
31static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
32 const void *buffer, size_t size, int flags, int type)
33{
34 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
35 size_t len = strlen(name);
36
37 if (!strcmp(name, ""))
38 return -EINVAL;
39
40 if (len + XATTR_SECURITY_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
41 return -EOPNOTSUPP;
42
43 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
44 strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
45
46 return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
47}
48
49static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
50 size_t list_size, const char *name, size_t name_len, int type)
51{
52 /*
53 * This method is not used.
54 * It is used hfsplus_listxattr() instead of generic_listxattr().
55 */
56 return -EOPNOTSUPP;
57}
58
59static int hfsplus_initxattrs(struct inode *inode,
60 const struct xattr *xattr_array,
61 void *fs_info)
62{
63 const struct xattr *xattr;
64 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
65 size_t xattr_name_len;
66 int err = 0;
67
68 for (xattr = xattr_array; xattr->name != NULL; xattr++) {
69 xattr_name_len = strlen(xattr->name);
70
71 if (xattr_name_len == 0)
72 continue;
73
74 if (xattr_name_len + XATTR_SECURITY_PREFIX_LEN >
75 HFSPLUS_ATTR_MAX_STRLEN)
76 return -EOPNOTSUPP;
77
78 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
79 strcpy(xattr_name +
80 XATTR_SECURITY_PREFIX_LEN, xattr->name);
81 memset(xattr_name +
82 XATTR_SECURITY_PREFIX_LEN + xattr_name_len, 0, 1);
83
84 err = __hfsplus_setxattr(inode, xattr_name,
85 xattr->value, xattr->value_len, 0);
86 if (err)
87 break;
88 }
89 return err;
90}
91
92int hfsplus_init_security(struct inode *inode, struct inode *dir,
93 const struct qstr *qstr)
94{
95 return security_inode_init_security(inode, dir, qstr,
96 &hfsplus_initxattrs, NULL);
97}
98
99const struct xattr_handler hfsplus_xattr_security_handler = {
100 .prefix = XATTR_SECURITY_PREFIX,
101 .list = hfsplus_security_listxattr,
102 .get = hfsplus_security_getxattr,
103 .set = hfsplus_security_setxattr,
104};
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
new file mode 100644
index 000000000000..426cee277542
--- /dev/null
+++ b/fs/hfsplus/xattr_trusted.c
@@ -0,0 +1,63 @@
1/*
2 * linux/fs/hfsplus/xattr_trusted.c
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Handler for trusted extended attributes.
7 */
8
9#include "hfsplus_fs.h"
10#include "xattr.h"
11
12static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
13 void *buffer, size_t size, int type)
14{
15 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
16 size_t len = strlen(name);
17
18 if (!strcmp(name, ""))
19 return -EINVAL;
20
21 if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
22 return -EOPNOTSUPP;
23
24 strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
25 strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
26
27 return hfsplus_getxattr(dentry, xattr_name, buffer, size);
28}
29
30static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
31 const void *buffer, size_t size, int flags, int type)
32{
33 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
34 size_t len = strlen(name);
35
36 if (!strcmp(name, ""))
37 return -EINVAL;
38
39 if (len + XATTR_TRUSTED_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
40 return -EOPNOTSUPP;
41
42 strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
43 strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
44
45 return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
46}
47
48static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
49 size_t list_size, const char *name, size_t name_len, int type)
50{
51 /*
52 * This method is not used.
53 * It is used hfsplus_listxattr() instead of generic_listxattr().
54 */
55 return -EOPNOTSUPP;
56}
57
58const struct xattr_handler hfsplus_xattr_trusted_handler = {
59 .prefix = XATTR_TRUSTED_PREFIX,
60 .list = hfsplus_trusted_listxattr,
61 .get = hfsplus_trusted_getxattr,
62 .set = hfsplus_trusted_setxattr,
63};
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
new file mode 100644
index 000000000000..e34016561ae0
--- /dev/null
+++ b/fs/hfsplus/xattr_user.c
@@ -0,0 +1,63 @@
1/*
2 * linux/fs/hfsplus/xattr_user.c
3 *
4 * Vyacheslav Dubeyko <slava@dubeyko.com>
5 *
6 * Handler for user extended attributes.
7 */
8
9#include "hfsplus_fs.h"
10#include "xattr.h"
11
12static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
13 void *buffer, size_t size, int type)
14{
15 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
16 size_t len = strlen(name);
17
18 if (!strcmp(name, ""))
19 return -EINVAL;
20
21 if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
22 return -EOPNOTSUPP;
23
24 strcpy(xattr_name, XATTR_USER_PREFIX);
25 strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
26
27 return hfsplus_getxattr(dentry, xattr_name, buffer, size);
28}
29
30static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
31 const void *buffer, size_t size, int flags, int type)
32{
33 char xattr_name[HFSPLUS_ATTR_MAX_STRLEN + 1] = {0};
34 size_t len = strlen(name);
35
36 if (!strcmp(name, ""))
37 return -EINVAL;
38
39 if (len + XATTR_USER_PREFIX_LEN > HFSPLUS_ATTR_MAX_STRLEN)
40 return -EOPNOTSUPP;
41
42 strcpy(xattr_name, XATTR_USER_PREFIX);
43 strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
44
45 return hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
46}
47
48static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
49 size_t list_size, const char *name, size_t name_len, int type)
50{
51 /*
52 * This method is not used.
53 * It is used hfsplus_listxattr() instead of generic_listxattr().
54 */
55 return -EOPNOTSUPP;
56}
57
58const struct xattr_handler hfsplus_xattr_user_handler = {
59 .prefix = XATTR_USER_PREFIX,
60 .list = hfsplus_user_listxattr,
61 .get = hfsplus_user_getxattr,
62 .set = hfsplus_user_setxattr,
63};
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 457addc5c91f..fbabb906066f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -30,7 +30,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
30 return list_entry(inode, struct hostfs_inode_info, vfs_inode); 30 return list_entry(inode, struct hostfs_inode_info, vfs_inode);
31} 31}
32 32
33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) 33#define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
34 34
35static int hostfs_d_delete(const struct dentry *dentry) 35static int hostfs_d_delete(const struct dentry *dentry)
36{ 36{
@@ -861,14 +861,6 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
861} 861}
862 862
863static const struct inode_operations hostfs_iops = { 863static const struct inode_operations hostfs_iops = {
864 .create = hostfs_create,
865 .link = hostfs_link,
866 .unlink = hostfs_unlink,
867 .symlink = hostfs_symlink,
868 .mkdir = hostfs_mkdir,
869 .rmdir = hostfs_rmdir,
870 .mknod = hostfs_mknod,
871 .rename = hostfs_rename,
872 .permission = hostfs_permission, 864 .permission = hostfs_permission,
873 .setattr = hostfs_setattr, 865 .setattr = hostfs_setattr,
874}; 866};
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 78e12b2e0ea2..546f6d39713a 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -25,7 +25,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
25 loff_t new_off = off + (whence == 1 ? filp->f_pos : 0); 25 loff_t new_off = off + (whence == 1 ? filp->f_pos : 0);
26 loff_t pos; 26 loff_t pos;
27 struct quad_buffer_head qbh; 27 struct quad_buffer_head qbh;
28 struct inode *i = filp->f_path.dentry->d_inode; 28 struct inode *i = file_inode(filp);
29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
30 struct super_block *s = i->i_sb; 30 struct super_block *s = i->i_sb;
31 31
@@ -57,7 +57,7 @@ fail:
57 57
58static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 58static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
59{ 59{
60 struct inode *inode = filp->f_path.dentry->d_inode; 60 struct inode *inode = file_inode(filp);
61 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); 61 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
62 struct quad_buffer_head qbh; 62 struct quad_buffer_head qbh;
63 struct hpfs_dirent *de; 63 struct hpfs_dirent *de;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fbfe2df5624b..9f9dbeceeee7 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -152,7 +152,7 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
152 retval = do_sync_write(file, buf, count, ppos); 152 retval = do_sync_write(file, buf, count, ppos);
153 if (retval > 0) { 153 if (retval > 0) {
154 hpfs_lock(file->f_path.dentry->d_sb); 154 hpfs_lock(file->f_path.dentry->d_sb);
155 hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1; 155 hpfs_i(file_inode(file))->i_dirty = 1;
156 hpfs_unlock(file->f_path.dentry->d_sb); 156 hpfs_unlock(file->f_path.dentry->d_sb);
157 } 157 }
158 return retval; 158 return retval;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 5dc06c837105..9edeeb0ea97e 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -147,7 +147,7 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) { 147 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
148 Some unknown structures like ACL may be in fnode, 148 Some unknown structures like ACL may be in fnode,
149 we'd better not overwrite them 149 we'd better not overwrite them
150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 structures", i->i_ino);
151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { 151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
152 __le32 ea; 152 __le32 ea;
153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { 153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 43b315f2002b..74f55703be49 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -180,7 +180,7 @@ static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
180 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); 180 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
181 ssize_t n; 181 ssize_t n;
182 182
183 read = file->f_path.dentry->d_inode->i_fop->read; 183 read = file_inode(file)->i_fop->read;
184 184
185 if (!is_user) 185 if (!is_user)
186 set_fs(KERNEL_DS); 186 set_fs(KERNEL_DS);
@@ -288,7 +288,7 @@ static ssize_t hppfs_write(struct file *file, const char __user *buf,
288 struct file *proc_file = data->proc_file; 288 struct file *proc_file = data->proc_file;
289 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 289 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
290 290
291 write = proc_file->f_path.dentry->d_inode->i_fop->write; 291 write = file_inode(proc_file)->i_fop->write;
292 return (*write)(proc_file, buf, len, ppos); 292 return (*write)(proc_file, buf, len, ppos);
293} 293}
294 294
@@ -513,7 +513,7 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
513 loff_t (*llseek)(struct file *, loff_t, int); 513 loff_t (*llseek)(struct file *, loff_t, int);
514 loff_t ret; 514 loff_t ret;
515 515
516 llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; 516 llseek = file_inode(proc_file)->i_fop->llseek;
517 if (llseek != NULL) { 517 if (llseek != NULL) {
518 ret = (*llseek)(proc_file, off, where); 518 ret = (*llseek)(proc_file, off, where);
519 if (ret < 0) 519 if (ret < 0)
@@ -561,7 +561,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
561 }); 561 });
562 int err; 562 int err;
563 563
564 readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; 564 readdir = file_inode(proc_file)->i_fop->readdir;
565 565
566 proc_file->f_pos = file->f_pos; 566 proc_file->f_pos = file->f_pos;
567 err = (*readdir)(proc_file, &dirent, hppfs_filldir); 567 err = (*readdir)(proc_file, &dirent, hppfs_filldir);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 78bde32ea951..7f94e0cbc69c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -97,7 +97,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
97 97
98static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 98static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
99{ 99{
100 struct inode *inode = file->f_path.dentry->d_inode; 100 struct inode *inode = file_inode(file);
101 loff_t len, vma_len; 101 loff_t len, vma_len;
102 int ret; 102 int ret;
103 struct hstate *h = hstate_file(file); 103 struct hstate *h = hstate_file(file);
@@ -918,16 +918,25 @@ static int get_hstate_idx(int page_size_log)
918 return h - hstates; 918 return h - hstates;
919} 919}
920 920
921static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
922{
923 return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
924 dentry->d_name.name);
925}
926
927static struct dentry_operations anon_ops = {
928 .d_dname = hugetlb_dname
929};
930
921struct file *hugetlb_file_setup(const char *name, unsigned long addr, 931struct file *hugetlb_file_setup(const char *name, unsigned long addr,
922 size_t size, vm_flags_t acctflag, 932 size_t size, vm_flags_t acctflag,
923 struct user_struct **user, 933 struct user_struct **user,
924 int creat_flags, int page_size_log) 934 int creat_flags, int page_size_log)
925{ 935{
926 int error = -ENOMEM; 936 struct file *file = ERR_PTR(-ENOMEM);
927 struct file *file;
928 struct inode *inode; 937 struct inode *inode;
929 struct path path; 938 struct path path;
930 struct dentry *root; 939 struct super_block *sb;
931 struct qstr quick_string; 940 struct qstr quick_string;
932 struct hstate *hstate; 941 struct hstate *hstate;
933 unsigned long num_pages; 942 unsigned long num_pages;
@@ -955,17 +964,18 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
955 } 964 }
956 } 965 }
957 966
958 root = hugetlbfs_vfsmount[hstate_idx]->mnt_root; 967 sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb;
959 quick_string.name = name; 968 quick_string.name = name;
960 quick_string.len = strlen(quick_string.name); 969 quick_string.len = strlen(quick_string.name);
961 quick_string.hash = 0; 970 quick_string.hash = 0;
962 path.dentry = d_alloc(root, &quick_string); 971 path.dentry = d_alloc_pseudo(sb, &quick_string);
963 if (!path.dentry) 972 if (!path.dentry)
964 goto out_shm_unlock; 973 goto out_shm_unlock;
965 974
975 d_set_d_op(path.dentry, &anon_ops);
966 path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); 976 path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]);
967 error = -ENOSPC; 977 file = ERR_PTR(-ENOSPC);
968 inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0); 978 inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
969 if (!inode) 979 if (!inode)
970 goto out_dentry; 980 goto out_dentry;
971 981
@@ -973,7 +983,7 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
973 size += addr & ~huge_page_mask(hstate); 983 size += addr & ~huge_page_mask(hstate);
974 num_pages = ALIGN(size, huge_page_size(hstate)) >> 984 num_pages = ALIGN(size, huge_page_size(hstate)) >>
975 huge_page_shift(hstate); 985 huge_page_shift(hstate);
976 error = -ENOMEM; 986 file = ERR_PTR(-ENOMEM);
977 if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) 987 if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
978 goto out_inode; 988 goto out_inode;
979 989
@@ -981,10 +991,9 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
981 inode->i_size = size; 991 inode->i_size = size;
982 clear_nlink(inode); 992 clear_nlink(inode);
983 993
984 error = -ENFILE;
985 file = alloc_file(&path, FMODE_WRITE | FMODE_READ, 994 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
986 &hugetlbfs_file_operations); 995 &hugetlbfs_file_operations);
987 if (!file) 996 if (IS_ERR(file))
988 goto out_dentry; /* inode is already attached */ 997 goto out_dentry; /* inode is already attached */
989 998
990 return file; 999 return file;
@@ -998,7 +1007,7 @@ out_shm_unlock:
998 user_shm_unlock(size, *user); 1007 user_shm_unlock(size, *user);
999 *user = NULL; 1008 *user = NULL;
1000 } 1009 }
1001 return ERR_PTR(error); 1010 return file;
1002} 1011}
1003 1012
1004static int __init init_hugetlbfs_fs(void) 1013static int __init init_hugetlbfs_fs(void)
diff --git a/fs/inode.c b/fs/inode.c
index 14084b72b259..f5f7c06c36fb 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -798,11 +798,10 @@ static struct inode *find_inode(struct super_block *sb,
798 int (*test)(struct inode *, void *), 798 int (*test)(struct inode *, void *),
799 void *data) 799 void *data)
800{ 800{
801 struct hlist_node *node;
802 struct inode *inode = NULL; 801 struct inode *inode = NULL;
803 802
804repeat: 803repeat:
805 hlist_for_each_entry(inode, node, head, i_hash) { 804 hlist_for_each_entry(inode, head, i_hash) {
806 spin_lock(&inode->i_lock); 805 spin_lock(&inode->i_lock);
807 if (inode->i_sb != sb) { 806 if (inode->i_sb != sb) {
808 spin_unlock(&inode->i_lock); 807 spin_unlock(&inode->i_lock);
@@ -830,11 +829,10 @@ repeat:
830static struct inode *find_inode_fast(struct super_block *sb, 829static struct inode *find_inode_fast(struct super_block *sb,
831 struct hlist_head *head, unsigned long ino) 830 struct hlist_head *head, unsigned long ino)
832{ 831{
833 struct hlist_node *node;
834 struct inode *inode = NULL; 832 struct inode *inode = NULL;
835 833
836repeat: 834repeat:
837 hlist_for_each_entry(inode, node, head, i_hash) { 835 hlist_for_each_entry(inode, head, i_hash) {
838 spin_lock(&inode->i_lock); 836 spin_lock(&inode->i_lock);
839 if (inode->i_ino != ino) { 837 if (inode->i_ino != ino) {
840 spin_unlock(&inode->i_lock); 838 spin_unlock(&inode->i_lock);
@@ -1132,11 +1130,10 @@ EXPORT_SYMBOL(iget_locked);
1132static int test_inode_iunique(struct super_block *sb, unsigned long ino) 1130static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1133{ 1131{
1134 struct hlist_head *b = inode_hashtable + hash(sb, ino); 1132 struct hlist_head *b = inode_hashtable + hash(sb, ino);
1135 struct hlist_node *node;
1136 struct inode *inode; 1133 struct inode *inode;
1137 1134
1138 spin_lock(&inode_hash_lock); 1135 spin_lock(&inode_hash_lock);
1139 hlist_for_each_entry(inode, node, b, i_hash) { 1136 hlist_for_each_entry(inode, b, i_hash) {
1140 if (inode->i_ino == ino && inode->i_sb == sb) { 1137 if (inode->i_ino == ino && inode->i_sb == sb) {
1141 spin_unlock(&inode_hash_lock); 1138 spin_unlock(&inode_hash_lock);
1142 return 0; 1139 return 0;
@@ -1291,10 +1288,9 @@ int insert_inode_locked(struct inode *inode)
1291 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1288 struct hlist_head *head = inode_hashtable + hash(sb, ino);
1292 1289
1293 while (1) { 1290 while (1) {
1294 struct hlist_node *node;
1295 struct inode *old = NULL; 1291 struct inode *old = NULL;
1296 spin_lock(&inode_hash_lock); 1292 spin_lock(&inode_hash_lock);
1297 hlist_for_each_entry(old, node, head, i_hash) { 1293 hlist_for_each_entry(old, head, i_hash) {
1298 if (old->i_ino != ino) 1294 if (old->i_ino != ino)
1299 continue; 1295 continue;
1300 if (old->i_sb != sb) 1296 if (old->i_sb != sb)
@@ -1306,7 +1302,7 @@ int insert_inode_locked(struct inode *inode)
1306 } 1302 }
1307 break; 1303 break;
1308 } 1304 }
1309 if (likely(!node)) { 1305 if (likely(!old)) {
1310 spin_lock(&inode->i_lock); 1306 spin_lock(&inode->i_lock);
1311 inode->i_state |= I_NEW; 1307 inode->i_state |= I_NEW;
1312 hlist_add_head(&inode->i_hash, head); 1308 hlist_add_head(&inode->i_hash, head);
@@ -1334,11 +1330,10 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1334 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1330 struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1335 1331
1336 while (1) { 1332 while (1) {
1337 struct hlist_node *node;
1338 struct inode *old = NULL; 1333 struct inode *old = NULL;
1339 1334
1340 spin_lock(&inode_hash_lock); 1335 spin_lock(&inode_hash_lock);
1341 hlist_for_each_entry(old, node, head, i_hash) { 1336 hlist_for_each_entry(old, head, i_hash) {
1342 if (old->i_sb != sb) 1337 if (old->i_sb != sb)
1343 continue; 1338 continue;
1344 if (!test(old, data)) 1339 if (!test(old, data))
@@ -1350,7 +1345,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1350 } 1345 }
1351 break; 1346 break;
1352 } 1347 }
1353 if (likely(!node)) { 1348 if (likely(!old)) {
1354 spin_lock(&inode->i_lock); 1349 spin_lock(&inode->i_lock);
1355 inode->i_state |= I_NEW; 1350 inode->i_state |= I_NEW;
1356 hlist_add_head(&inode->i_hash, head); 1351 hlist_add_head(&inode->i_hash, head);
@@ -1655,7 +1650,7 @@ EXPORT_SYMBOL(file_remove_suid);
1655 1650
1656int file_update_time(struct file *file) 1651int file_update_time(struct file *file)
1657{ 1652{
1658 struct inode *inode = file->f_path.dentry->d_inode; 1653 struct inode *inode = file_inode(file);
1659 struct timespec now; 1654 struct timespec now;
1660 int sync_it = 0; 1655 int sync_it = 0;
1661 int ret; 1656 int ret;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 3bdad6d1f268..fd507fb460f8 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -175,7 +175,7 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
175 struct fiemap fiemap; 175 struct fiemap fiemap;
176 struct fiemap __user *ufiemap = (struct fiemap __user *) arg; 176 struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
177 struct fiemap_extent_info fieinfo = { 0, }; 177 struct fiemap_extent_info fieinfo = { 0, };
178 struct inode *inode = filp->f_path.dentry->d_inode; 178 struct inode *inode = file_inode(filp);
179 struct super_block *sb = inode->i_sb; 179 struct super_block *sb = inode->i_sb;
180 u64 len; 180 u64 len;
181 int error; 181 int error;
@@ -424,7 +424,7 @@ EXPORT_SYMBOL(generic_block_fiemap);
424 */ 424 */
425int ioctl_preallocate(struct file *filp, void __user *argp) 425int ioctl_preallocate(struct file *filp, void __user *argp)
426{ 426{
427 struct inode *inode = filp->f_path.dentry->d_inode; 427 struct inode *inode = file_inode(filp);
428 struct space_resv sr; 428 struct space_resv sr;
429 429
430 if (copy_from_user(&sr, argp, sizeof(sr))) 430 if (copy_from_user(&sr, argp, sizeof(sr)))
@@ -449,7 +449,7 @@ int ioctl_preallocate(struct file *filp, void __user *argp)
449static int file_ioctl(struct file *filp, unsigned int cmd, 449static int file_ioctl(struct file *filp, unsigned int cmd,
450 unsigned long arg) 450 unsigned long arg)
451{ 451{
452 struct inode *inode = filp->f_path.dentry->d_inode; 452 struct inode *inode = file_inode(filp);
453 int __user *p = (int __user *)arg; 453 int __user *p = (int __user *)arg;
454 454
455 switch (cmd) { 455 switch (cmd) {
@@ -512,7 +512,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
512 512
513static int ioctl_fsfreeze(struct file *filp) 513static int ioctl_fsfreeze(struct file *filp)
514{ 514{
515 struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; 515 struct super_block *sb = file_inode(filp)->i_sb;
516 516
517 if (!capable(CAP_SYS_ADMIN)) 517 if (!capable(CAP_SYS_ADMIN))
518 return -EPERM; 518 return -EPERM;
@@ -527,7 +527,7 @@ static int ioctl_fsfreeze(struct file *filp)
527 527
528static int ioctl_fsthaw(struct file *filp) 528static int ioctl_fsthaw(struct file *filp)
529{ 529{
530 struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; 530 struct super_block *sb = file_inode(filp)->i_sb;
531 531
532 if (!capable(CAP_SYS_ADMIN)) 532 if (!capable(CAP_SYS_ADMIN))
533 return -EPERM; 533 return -EPERM;
@@ -548,7 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
548{ 548{
549 int error = 0; 549 int error = 0;
550 int __user *argp = (int __user *)arg; 550 int __user *argp = (int __user *)arg;
551 struct inode *inode = filp->f_path.dentry->d_inode; 551 struct inode *inode = file_inode(filp);
552 552
553 switch (cmd) { 553 switch (cmd) {
554 case FIOCLEX: 554 case FIOCLEX:
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 0b3fa7974fa8..592e5115a561 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -296,7 +296,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
296 */ 296 */
297static int zisofs_readpage(struct file *file, struct page *page) 297static int zisofs_readpage(struct file *file, struct page *page)
298{ 298{
299 struct inode *inode = file->f_path.dentry->d_inode; 299 struct inode *inode = file_inode(file);
300 struct address_space *mapping = inode->i_mapping; 300 struct address_space *mapping = inode->i_mapping;
301 int err; 301 int err;
302 int i, pcount, full_page; 302 int i, pcount, full_page;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index f20437c068a0..a7d5c3c3d4e6 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -253,7 +253,7 @@ static int isofs_readdir(struct file *filp,
253 int result; 253 int result;
254 char *tmpname; 254 char *tmpname;
255 struct iso_directory_record *tmpde; 255 struct iso_directory_record *tmpde;
256 struct inode *inode = filp->f_path.dentry->d_inode; 256 struct inode *inode = file_inode(filp);
257 257
258 tmpname = (char *)__get_free_page(GFP_KERNEL); 258 tmpname = (char *)__get_free_page(GFP_KERNEL);
259 if (tmpname == NULL) 259 if (tmpname == NULL)
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 2b4f2358eadb..12088d8de3fa 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -125,10 +125,10 @@ isofs_export_encode_fh(struct inode *inode,
125 */ 125 */
126 if (parent && (len < 5)) { 126 if (parent && (len < 5)) {
127 *max_len = 5; 127 *max_len = 5;
128 return 255; 128 return FILEID_INVALID;
129 } else if (len < 3) { 129 } else if (len < 3) {
130 *max_len = 3; 130 *max_len = 3;
131 return 255; 131 return FILEID_INVALID;
132 } 132 }
133 133
134 len = 3; 134 len = 3;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index a2862339323b..81cc7eaff863 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -446,7 +446,8 @@ int __log_start_commit(journal_t *journal, tid_t target)
446 * currently running transaction (if it exists). Otherwise, 446 * currently running transaction (if it exists). Otherwise,
447 * the target tid must be an old one. 447 * the target tid must be an old one.
448 */ 448 */
449 if (journal->j_running_transaction && 449 if (journal->j_commit_request != target &&
450 journal->j_running_transaction &&
450 journal->j_running_transaction->t_tid == target) { 451 journal->j_running_transaction->t_tid == target) {
451 /* 452 /*
452 * We want a new commit: OK, mark the request and wakeup the 453 * We want a new commit: OK, mark the request and wakeup the
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3091d42992f0..750c70148eff 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -435,7 +435,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
435 435
436 trace_jbd2_commit_locking(journal, commit_transaction); 436 trace_jbd2_commit_locking(journal, commit_transaction);
437 stats.run.rs_wait = commit_transaction->t_max_wait; 437 stats.run.rs_wait = commit_transaction->t_max_wait;
438 stats.run.rs_request_delay = 0;
438 stats.run.rs_locked = jiffies; 439 stats.run.rs_locked = jiffies;
440 if (commit_transaction->t_requested)
441 stats.run.rs_request_delay =
442 jbd2_time_diff(commit_transaction->t_requested,
443 stats.run.rs_locked);
439 stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start, 444 stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
440 stats.run.rs_locked); 445 stats.run.rs_locked);
441 446
@@ -1116,7 +1121,10 @@ restart_loop:
1116 */ 1121 */
1117 spin_lock(&journal->j_history_lock); 1122 spin_lock(&journal->j_history_lock);
1118 journal->j_stats.ts_tid++; 1123 journal->j_stats.ts_tid++;
1124 if (commit_transaction->t_requested)
1125 journal->j_stats.ts_requested++;
1119 journal->j_stats.run.rs_wait += stats.run.rs_wait; 1126 journal->j_stats.run.rs_wait += stats.run.rs_wait;
1127 journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
1120 journal->j_stats.run.rs_running += stats.run.rs_running; 1128 journal->j_stats.run.rs_running += stats.run.rs_running;
1121 journal->j_stats.run.rs_locked += stats.run.rs_locked; 1129 journal->j_stats.run.rs_locked += stats.run.rs_locked;
1122 journal->j_stats.run.rs_flushing += stats.run.rs_flushing; 1130 journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index dbf41f9452db..ed10991ab006 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -35,7 +35,6 @@
35#include <linux/kthread.h> 35#include <linux/kthread.h>
36#include <linux/poison.h> 36#include <linux/poison.h>
37#include <linux/proc_fs.h> 37#include <linux/proc_fs.h>
38#include <linux/debugfs.h>
39#include <linux/seq_file.h> 38#include <linux/seq_file.h>
40#include <linux/math64.h> 39#include <linux/math64.h>
41#include <linux/hash.h> 40#include <linux/hash.h>
@@ -51,6 +50,14 @@
51#include <asm/uaccess.h> 50#include <asm/uaccess.h>
52#include <asm/page.h> 51#include <asm/page.h>
53 52
53#ifdef CONFIG_JBD2_DEBUG
54ushort jbd2_journal_enable_debug __read_mostly;
55EXPORT_SYMBOL(jbd2_journal_enable_debug);
56
57module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
58MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
59#endif
60
54EXPORT_SYMBOL(jbd2_journal_extend); 61EXPORT_SYMBOL(jbd2_journal_extend);
55EXPORT_SYMBOL(jbd2_journal_stop); 62EXPORT_SYMBOL(jbd2_journal_stop);
56EXPORT_SYMBOL(jbd2_journal_lock_updates); 63EXPORT_SYMBOL(jbd2_journal_lock_updates);
@@ -513,6 +520,10 @@ int __jbd2_log_space_left(journal_t *journal)
513 */ 520 */
514int __jbd2_log_start_commit(journal_t *journal, tid_t target) 521int __jbd2_log_start_commit(journal_t *journal, tid_t target)
515{ 522{
523 /* Return if the txn has already requested to be committed */
524 if (journal->j_commit_request == target)
525 return 0;
526
516 /* 527 /*
517 * The only transaction we can possibly wait upon is the 528 * The only transaction we can possibly wait upon is the
518 * currently running transaction (if it exists). Otherwise, 529 * currently running transaction (if it exists). Otherwise,
@@ -529,6 +540,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
529 jbd_debug(1, "JBD2: requesting commit %d/%d\n", 540 jbd_debug(1, "JBD2: requesting commit %d/%d\n",
530 journal->j_commit_request, 541 journal->j_commit_request,
531 journal->j_commit_sequence); 542 journal->j_commit_sequence);
543 journal->j_running_transaction->t_requested = jiffies;
532 wake_up(&journal->j_wait_commit); 544 wake_up(&journal->j_wait_commit);
533 return 1; 545 return 1;
534 } else if (!tid_geq(journal->j_commit_request, target)) 546 } else if (!tid_geq(journal->j_commit_request, target))
@@ -894,13 +906,18 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
894 906
895 if (v != SEQ_START_TOKEN) 907 if (v != SEQ_START_TOKEN)
896 return 0; 908 return 0;
897 seq_printf(seq, "%lu transaction, each up to %u blocks\n", 909 seq_printf(seq, "%lu transactions (%lu requested), "
898 s->stats->ts_tid, 910 "each up to %u blocks\n",
899 s->journal->j_max_transaction_buffers); 911 s->stats->ts_tid, s->stats->ts_requested,
912 s->journal->j_max_transaction_buffers);
900 if (s->stats->ts_tid == 0) 913 if (s->stats->ts_tid == 0)
901 return 0; 914 return 0;
902 seq_printf(seq, "average: \n %ums waiting for transaction\n", 915 seq_printf(seq, "average: \n %ums waiting for transaction\n",
903 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid)); 916 jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
917 seq_printf(seq, " %ums request delay\n",
918 (s->stats->ts_requested == 0) ? 0 :
919 jiffies_to_msecs(s->stats->run.rs_request_delay /
920 s->stats->ts_requested));
904 seq_printf(seq, " %ums running transaction\n", 921 seq_printf(seq, " %ums running transaction\n",
905 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid)); 922 jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
906 seq_printf(seq, " %ums transaction was being locked\n", 923 seq_printf(seq, " %ums transaction was being locked\n",
@@ -2485,45 +2502,6 @@ restart:
2485 spin_unlock(&journal->j_list_lock); 2502 spin_unlock(&journal->j_list_lock);
2486} 2503}
2487 2504
2488/*
2489 * debugfs tunables
2490 */
2491#ifdef CONFIG_JBD2_DEBUG
2492u8 jbd2_journal_enable_debug __read_mostly;
2493EXPORT_SYMBOL(jbd2_journal_enable_debug);
2494
2495#define JBD2_DEBUG_NAME "jbd2-debug"
2496
2497static struct dentry *jbd2_debugfs_dir;
2498static struct dentry *jbd2_debug;
2499
2500static void __init jbd2_create_debugfs_entry(void)
2501{
2502 jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
2503 if (jbd2_debugfs_dir)
2504 jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
2505 S_IRUGO | S_IWUSR,
2506 jbd2_debugfs_dir,
2507 &jbd2_journal_enable_debug);
2508}
2509
2510static void __exit jbd2_remove_debugfs_entry(void)
2511{
2512 debugfs_remove(jbd2_debug);
2513 debugfs_remove(jbd2_debugfs_dir);
2514}
2515
2516#else
2517
2518static void __init jbd2_create_debugfs_entry(void)
2519{
2520}
2521
2522static void __exit jbd2_remove_debugfs_entry(void)
2523{
2524}
2525
2526#endif
2527 2505
2528#ifdef CONFIG_PROC_FS 2506#ifdef CONFIG_PROC_FS
2529 2507
@@ -2609,7 +2587,6 @@ static int __init journal_init(void)
2609 2587
2610 ret = journal_init_caches(); 2588 ret = journal_init_caches();
2611 if (ret == 0) { 2589 if (ret == 0) {
2612 jbd2_create_debugfs_entry();
2613 jbd2_create_jbd_stats_proc_entry(); 2590 jbd2_create_jbd_stats_proc_entry();
2614 } else { 2591 } else {
2615 jbd2_journal_destroy_caches(); 2592 jbd2_journal_destroy_caches();
@@ -2624,7 +2601,6 @@ static void __exit journal_exit(void)
2624 if (n) 2601 if (n)
2625 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); 2602 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
2626#endif 2603#endif
2627 jbd2_remove_debugfs_entry();
2628 jbd2_remove_jbd_stats_proc_entry(); 2604 jbd2_remove_jbd_stats_proc_entry();
2629 jbd2_journal_destroy_caches(); 2605 jbd2_journal_destroy_caches();
2630} 2606}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index df9f29760efa..b7e2385c6e92 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,8 @@
30#include <linux/bug.h> 30#include <linux/bug.h>
31#include <linux/module.h> 31#include <linux/module.h>
32 32
33#include <trace/events/jbd2.h>
34
33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 35static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
34static void __jbd2_journal_unfile_buffer(struct journal_head *jh); 36static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
35 37
@@ -100,6 +102,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
100 journal->j_running_transaction = transaction; 102 journal->j_running_transaction = transaction;
101 transaction->t_max_wait = 0; 103 transaction->t_max_wait = 0;
102 transaction->t_start = jiffies; 104 transaction->t_start = jiffies;
105 transaction->t_requested = 0;
103 106
104 return transaction; 107 return transaction;
105} 108}
@@ -306,6 +309,8 @@ repeat:
306 */ 309 */
307 update_t_max_wait(transaction, ts); 310 update_t_max_wait(transaction, ts);
308 handle->h_transaction = transaction; 311 handle->h_transaction = transaction;
312 handle->h_requested_credits = nblocks;
313 handle->h_start_jiffies = jiffies;
309 atomic_inc(&transaction->t_updates); 314 atomic_inc(&transaction->t_updates);
310 atomic_inc(&transaction->t_handle_count); 315 atomic_inc(&transaction->t_handle_count);
311 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", 316 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
@@ -352,7 +357,8 @@ static handle_t *new_handle(int nblocks)
352 * Return a pointer to a newly allocated handle, or an ERR_PTR() value 357 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
353 * on failure. 358 * on failure.
354 */ 359 */
355handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask) 360handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask,
361 unsigned int type, unsigned int line_no)
356{ 362{
357 handle_t *handle = journal_current_handle(); 363 handle_t *handle = journal_current_handle();
358 int err; 364 int err;
@@ -378,6 +384,11 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
378 current->journal_info = NULL; 384 current->journal_info = NULL;
379 handle = ERR_PTR(err); 385 handle = ERR_PTR(err);
380 } 386 }
387 handle->h_type = type;
388 handle->h_line_no = line_no;
389 trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
390 handle->h_transaction->t_tid, type,
391 line_no, nblocks);
381 return handle; 392 return handle;
382} 393}
383EXPORT_SYMBOL(jbd2__journal_start); 394EXPORT_SYMBOL(jbd2__journal_start);
@@ -385,7 +396,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
385 396
386handle_t *jbd2_journal_start(journal_t *journal, int nblocks) 397handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
387{ 398{
388 return jbd2__journal_start(journal, nblocks, GFP_NOFS); 399 return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0);
389} 400}
390EXPORT_SYMBOL(jbd2_journal_start); 401EXPORT_SYMBOL(jbd2_journal_start);
391 402
@@ -447,7 +458,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
447 goto unlock; 458 goto unlock;
448 } 459 }
449 460
461 trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
462 handle->h_transaction->t_tid,
463 handle->h_type, handle->h_line_no,
464 handle->h_buffer_credits,
465 nblocks);
466
450 handle->h_buffer_credits += nblocks; 467 handle->h_buffer_credits += nblocks;
468 handle->h_requested_credits += nblocks;
451 atomic_add(nblocks, &transaction->t_outstanding_credits); 469 atomic_add(nblocks, &transaction->t_outstanding_credits);
452 result = 0; 470 result = 0;
453 471
@@ -1376,6 +1394,13 @@ int jbd2_journal_stop(handle_t *handle)
1376 } 1394 }
1377 1395
1378 jbd_debug(4, "Handle %p going down\n", handle); 1396 jbd_debug(4, "Handle %p going down\n", handle);
1397 trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
1398 handle->h_transaction->t_tid,
1399 handle->h_type, handle->h_line_no,
1400 jiffies - handle->h_start_jiffies,
1401 handle->h_sync, handle->h_requested_credits,
1402 (handle->h_requested_credits -
1403 handle->h_buffer_credits));
1379 1404
1380 /* 1405 /*
1381 * Implement synchronous transaction batching. If the handle 1406 * Implement synchronous transaction batching. If the handle
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig
index 6ae169cd8faa..d8bb6c411e96 100644
--- a/fs/jffs2/Kconfig
+++ b/fs/jffs2/Kconfig
@@ -50,8 +50,8 @@ config JFFS2_FS_WBUF_VERIFY
50 write-buffer, and check for errors. 50 write-buffer, and check for errors.
51 51
52config JFFS2_SUMMARY 52config JFFS2_SUMMARY
53 bool "JFFS2 summary support (EXPERIMENTAL)" 53 bool "JFFS2 summary support"
54 depends on JFFS2_FS && EXPERIMENTAL 54 depends on JFFS2_FS
55 default n 55 default n
56 help 56 help
57 This feature makes it possible to use summary information 57 This feature makes it possible to use summary information
@@ -63,8 +63,8 @@ config JFFS2_SUMMARY
63 If unsure, say 'N'. 63 If unsure, say 'N'.
64 64
65config JFFS2_FS_XATTR 65config JFFS2_FS_XATTR
66 bool "JFFS2 XATTR support (EXPERIMENTAL)" 66 bool "JFFS2 XATTR support"
67 depends on JFFS2_FS && EXPERIMENTAL 67 depends on JFFS2_FS
68 default n 68 default n
69 help 69 help
70 Extended attributes are name:value pairs associated with inodes by 70 Extended attributes are name:value pairs associated with inodes by
@@ -173,7 +173,7 @@ config JFFS2_CMODE_PRIORITY
173 successful one. 173 successful one.
174 174
175config JFFS2_CMODE_SIZE 175config JFFS2_CMODE_SIZE
176 bool "size (EXPERIMENTAL)" 176 bool "size"
177 help 177 help
178 Tries all compressors and chooses the one which has the smallest 178 Tries all compressors and chooses the one which has the smallest
179 result. 179 result.
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ad7774d32095..acd46a4160cb 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -117,12 +117,12 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
117static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) 117static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
118{ 118{
119 struct jffs2_inode_info *f; 119 struct jffs2_inode_info *f;
120 struct inode *inode = filp->f_path.dentry->d_inode; 120 struct inode *inode = file_inode(filp);
121 struct jffs2_full_dirent *fd; 121 struct jffs2_full_dirent *fd;
122 unsigned long offset, curofs; 122 unsigned long offset, curofs;
123 123
124 jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", 124 jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n",
125 filp->f_path.dentry->d_inode->i_ino); 125 file_inode(filp)->i_ino);
126 126
127 f = JFFS2_INODE_INFO(inode); 127 f = JFFS2_INODE_INFO(inode);
128 128
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index bc555ff417e9..93a1232894f6 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -58,7 +58,7 @@ static long jfs_map_ext2(unsigned long flags, int from)
58 58
59long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 59long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
60{ 60{
61 struct inode *inode = filp->f_dentry->d_inode; 61 struct inode *inode = file_inode(filp);
62 struct jfs_inode_info *jfs_inode = JFS_IP(inode); 62 struct jfs_inode_info *jfs_inode = JFS_IP(inode);
63 unsigned int flags; 63 unsigned int flags;
64 64
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 9197a1b0d02d..0ddbeceafc62 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3004,7 +3004,7 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
3004 */ 3004 */
3005int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 3005int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
3006{ 3006{
3007 struct inode *ip = filp->f_path.dentry->d_inode; 3007 struct inode *ip = file_inode(filp);
3008 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 3008 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
3009 int rc = 0; 3009 int rc = 0;
3010 loff_t dtpos; /* legacy OS/2 style position */ 3010 loff_t dtpos; /* legacy OS/2 style position */
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1a543be09c79..060ba638becb 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -154,7 +154,7 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
154 /* 154 /*
155 * If we really return the number of allocated & free inodes, some 155 * If we really return the number of allocated & free inodes, some
156 * applications will fail because they won't see enough free inodes. 156 * applications will fail because they won't see enough free inodes.
157 * We'll try to calculate some guess as to how may inodes we can 157 * We'll try to calculate some guess as to how many inodes we can
158 * really allocate 158 * really allocate
159 * 159 *
160 * buf->f_files = atomic_read(&imap->im_numinos); 160 * buf->f_files = atomic_read(&imap->im_numinos);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 6cd673d34fb9..0796c45d0d4d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -178,7 +178,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
178 continue; 178 continue;
179 if (!rpc_cmp_addr(nlm_addr(block->b_host), addr)) 179 if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
180 continue; 180 continue;
181 if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) 181 if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0)
182 continue; 182 continue;
183 /* Alright, we found a lock. Set the return status 183 /* Alright, we found a lock. Set the return status
184 * and wake up the caller 184 * and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b43114c4332a..7e529c3c45c0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -127,7 +127,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
127 struct nlm_lock *lock = &argp->lock; 127 struct nlm_lock *lock = &argp->lock;
128 128
129 nlmclnt_next_cookie(&argp->cookie); 129 nlmclnt_next_cookie(&argp->cookie);
130 memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh)); 130 memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
131 lock->caller = utsname()->nodename; 131 lock->caller = utsname()->nodename;
132 lock->oh.data = req->a_owner; 132 lock->oh.data = req->a_owner;
133 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", 133 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
@@ -550,6 +550,9 @@ again:
550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); 550 status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
551 if (status < 0) 551 if (status < 0)
552 break; 552 break;
553 /* Resend the blocking lock request after a server reboot */
554 if (resp->status == nlm_lck_denied_grace_period)
555 continue;
553 if (resp->status != nlm_lck_blocked) 556 if (resp->status != nlm_lck_blocked)
554 break; 557 break;
555 } 558 }
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 764c4d2ed804..969d589c848d 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -33,15 +33,15 @@
33static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH]; 33static struct hlist_head nlm_server_hosts[NLM_HOST_NRHASH];
34static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH]; 34static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
35 35
36#define for_each_host(host, pos, chain, table) \ 36#define for_each_host(host, chain, table) \
37 for ((chain) = (table); \ 37 for ((chain) = (table); \
38 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ 38 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
39 hlist_for_each_entry((host), (pos), (chain), h_hash) 39 hlist_for_each_entry((host), (chain), h_hash)
40 40
41#define for_each_host_safe(host, pos, next, chain, table) \ 41#define for_each_host_safe(host, next, chain, table) \
42 for ((chain) = (table); \ 42 for ((chain) = (table); \
43 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \ 43 (chain) < (table) + NLM_HOST_NRHASH; ++(chain)) \
44 hlist_for_each_entry_safe((host), (pos), (next), \ 44 hlist_for_each_entry_safe((host), (next), \
45 (chain), h_hash) 45 (chain), h_hash)
46 46
47static unsigned long nrhosts; 47static unsigned long nrhosts;
@@ -226,7 +226,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
226 .net = net, 226 .net = net,
227 }; 227 };
228 struct hlist_head *chain; 228 struct hlist_head *chain;
229 struct hlist_node *pos;
230 struct nlm_host *host; 229 struct nlm_host *host;
231 struct nsm_handle *nsm = NULL; 230 struct nsm_handle *nsm = NULL;
232 struct lockd_net *ln = net_generic(net, lockd_net_id); 231 struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -238,7 +237,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
238 mutex_lock(&nlm_host_mutex); 237 mutex_lock(&nlm_host_mutex);
239 238
240 chain = &nlm_client_hosts[nlm_hash_address(sap)]; 239 chain = &nlm_client_hosts[nlm_hash_address(sap)];
241 hlist_for_each_entry(host, pos, chain, h_hash) { 240 hlist_for_each_entry(host, chain, h_hash) {
242 if (host->net != net) 241 if (host->net != net)
243 continue; 242 continue;
244 if (!rpc_cmp_addr(nlm_addr(host), sap)) 243 if (!rpc_cmp_addr(nlm_addr(host), sap))
@@ -323,7 +322,6 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
323 const size_t hostname_len) 322 const size_t hostname_len)
324{ 323{
325 struct hlist_head *chain; 324 struct hlist_head *chain;
326 struct hlist_node *pos;
327 struct nlm_host *host = NULL; 325 struct nlm_host *host = NULL;
328 struct nsm_handle *nsm = NULL; 326 struct nsm_handle *nsm = NULL;
329 struct sockaddr *src_sap = svc_daddr(rqstp); 327 struct sockaddr *src_sap = svc_daddr(rqstp);
@@ -351,7 +349,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
351 nlm_gc_hosts(net); 349 nlm_gc_hosts(net);
352 350
353 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; 351 chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
354 hlist_for_each_entry(host, pos, chain, h_hash) { 352 hlist_for_each_entry(host, chain, h_hash) {
355 if (host->net != net) 353 if (host->net != net)
356 continue; 354 continue;
357 if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) 355 if (!rpc_cmp_addr(nlm_addr(host), ni.sap))
@@ -516,10 +514,9 @@ static struct nlm_host *next_host_state(struct hlist_head *cache,
516{ 514{
517 struct nlm_host *host; 515 struct nlm_host *host;
518 struct hlist_head *chain; 516 struct hlist_head *chain;
519 struct hlist_node *pos;
520 517
521 mutex_lock(&nlm_host_mutex); 518 mutex_lock(&nlm_host_mutex);
522 for_each_host(host, pos, chain, cache) { 519 for_each_host(host, chain, cache) {
523 if (host->h_nsmhandle == nsm 520 if (host->h_nsmhandle == nsm
524 && host->h_nsmstate != info->state) { 521 && host->h_nsmstate != info->state) {
525 host->h_nsmstate = info->state; 522 host->h_nsmstate = info->state;
@@ -571,7 +568,6 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
571static void nlm_complain_hosts(struct net *net) 568static void nlm_complain_hosts(struct net *net)
572{ 569{
573 struct hlist_head *chain; 570 struct hlist_head *chain;
574 struct hlist_node *pos;
575 struct nlm_host *host; 571 struct nlm_host *host;
576 572
577 if (net) { 573 if (net) {
@@ -588,7 +584,7 @@ static void nlm_complain_hosts(struct net *net)
588 dprintk("lockd: %lu hosts left:\n", nrhosts); 584 dprintk("lockd: %lu hosts left:\n", nrhosts);
589 } 585 }
590 586
591 for_each_host(host, pos, chain, nlm_server_hosts) { 587 for_each_host(host, chain, nlm_server_hosts) {
592 if (net && host->net != net) 588 if (net && host->net != net)
593 continue; 589 continue;
594 dprintk(" %s (cnt %d use %d exp %ld net %p)\n", 590 dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
@@ -601,14 +597,13 @@ void
601nlm_shutdown_hosts_net(struct net *net) 597nlm_shutdown_hosts_net(struct net *net)
602{ 598{
603 struct hlist_head *chain; 599 struct hlist_head *chain;
604 struct hlist_node *pos;
605 struct nlm_host *host; 600 struct nlm_host *host;
606 601
607 mutex_lock(&nlm_host_mutex); 602 mutex_lock(&nlm_host_mutex);
608 603
609 /* First, make all hosts eligible for gc */ 604 /* First, make all hosts eligible for gc */
610 dprintk("lockd: nuking all hosts in net %p...\n", net); 605 dprintk("lockd: nuking all hosts in net %p...\n", net);
611 for_each_host(host, pos, chain, nlm_server_hosts) { 606 for_each_host(host, chain, nlm_server_hosts) {
612 if (net && host->net != net) 607 if (net && host->net != net)
613 continue; 608 continue;
614 host->h_expires = jiffies - 1; 609 host->h_expires = jiffies - 1;
@@ -645,11 +640,11 @@ static void
645nlm_gc_hosts(struct net *net) 640nlm_gc_hosts(struct net *net)
646{ 641{
647 struct hlist_head *chain; 642 struct hlist_head *chain;
648 struct hlist_node *pos, *next; 643 struct hlist_node *next;
649 struct nlm_host *host; 644 struct nlm_host *host;
650 645
651 dprintk("lockd: host garbage collection for net %p\n", net); 646 dprintk("lockd: host garbage collection for net %p\n", net);
652 for_each_host(host, pos, chain, nlm_server_hosts) { 647 for_each_host(host, chain, nlm_server_hosts) {
653 if (net && host->net != net) 648 if (net && host->net != net)
654 continue; 649 continue;
655 host->h_inuse = 0; 650 host->h_inuse = 0;
@@ -658,7 +653,7 @@ nlm_gc_hosts(struct net *net)
658 /* Mark all hosts that hold locks, blocks or shares */ 653 /* Mark all hosts that hold locks, blocks or shares */
659 nlmsvc_mark_resources(net); 654 nlmsvc_mark_resources(net);
660 655
661 for_each_host_safe(host, pos, next, chain, nlm_server_hosts) { 656 for_each_host_safe(host, next, chain, nlm_server_hosts) {
662 if (net && host->net != net) 657 if (net && host->net != net)
663 continue; 658 continue;
664 if (atomic_read(&host->h_count) || host->h_inuse 659 if (atomic_read(&host->h_count) || host->h_inuse
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 8d80c990dffd..e703318c41df 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -406,8 +406,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
406 __be32 ret; 406 __be32 ret;
407 407
408 dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n", 408 dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
409 file->f_file->f_path.dentry->d_inode->i_sb->s_id, 409 file_inode(file->f_file)->i_sb->s_id,
410 file->f_file->f_path.dentry->d_inode->i_ino, 410 file_inode(file->f_file)->i_ino,
411 lock->fl.fl_type, lock->fl.fl_pid, 411 lock->fl.fl_type, lock->fl.fl_pid,
412 (long long)lock->fl.fl_start, 412 (long long)lock->fl.fl_start,
413 (long long)lock->fl.fl_end, 413 (long long)lock->fl.fl_end,
@@ -513,8 +513,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
513 __be32 ret; 513 __be32 ret;
514 514
515 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n", 515 dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
516 file->f_file->f_path.dentry->d_inode->i_sb->s_id, 516 file_inode(file->f_file)->i_sb->s_id,
517 file->f_file->f_path.dentry->d_inode->i_ino, 517 file_inode(file->f_file)->i_ino,
518 lock->fl.fl_type, 518 lock->fl.fl_type,
519 (long long)lock->fl.fl_start, 519 (long long)lock->fl.fl_start,
520 (long long)lock->fl.fl_end); 520 (long long)lock->fl.fl_end);
@@ -606,8 +606,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
606 int error; 606 int error;
607 607
608 dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n", 608 dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
609 file->f_file->f_path.dentry->d_inode->i_sb->s_id, 609 file_inode(file->f_file)->i_sb->s_id,
610 file->f_file->f_path.dentry->d_inode->i_ino, 610 file_inode(file->f_file)->i_ino,
611 lock->fl.fl_pid, 611 lock->fl.fl_pid,
612 (long long)lock->fl.fl_start, 612 (long long)lock->fl.fl_start,
613 (long long)lock->fl.fl_end); 613 (long long)lock->fl.fl_end);
@@ -635,8 +635,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
635 int status = 0; 635 int status = 0;
636 636
637 dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n", 637 dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
638 file->f_file->f_path.dentry->d_inode->i_sb->s_id, 638 file_inode(file->f_file)->i_sb->s_id,
639 file->f_file->f_path.dentry->d_inode->i_ino, 639 file_inode(file->f_file)->i_ino,
640 lock->fl.fl_pid, 640 lock->fl.fl_pid,
641 (long long)lock->fl.fl_start, 641 (long long)lock->fl.fl_start,
642 (long long)lock->fl.fl_end); 642 (long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 8064435e8bef..97e87415b145 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
45 45
46static inline void nlm_debug_print_file(char *msg, struct nlm_file *file) 46static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
47{ 47{
48 struct inode *inode = file->f_file->f_path.dentry->d_inode; 48 struct inode *inode = file_inode(file->f_file);
49 49
50 dprintk("lockd: %s %s/%ld\n", 50 dprintk("lockd: %s %s/%ld\n",
51 msg, inode->i_sb->s_id, inode->i_ino); 51 msg, inode->i_sb->s_id, inode->i_ino);
@@ -84,7 +84,6 @@ __be32
84nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, 84nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
85 struct nfs_fh *f) 85 struct nfs_fh *f)
86{ 86{
87 struct hlist_node *pos;
88 struct nlm_file *file; 87 struct nlm_file *file;
89 unsigned int hash; 88 unsigned int hash;
90 __be32 nfserr; 89 __be32 nfserr;
@@ -96,7 +95,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
96 /* Lock file table */ 95 /* Lock file table */
97 mutex_lock(&nlm_file_mutex); 96 mutex_lock(&nlm_file_mutex);
98 97
99 hlist_for_each_entry(file, pos, &nlm_files[hash], f_list) 98 hlist_for_each_entry(file, &nlm_files[hash], f_list)
100 if (!nfs_compare_fh(&file->f_handle, f)) 99 if (!nfs_compare_fh(&file->f_handle, f))
101 goto found; 100 goto found;
102 101
@@ -248,13 +247,13 @@ static int
248nlm_traverse_files(void *data, nlm_host_match_fn_t match, 247nlm_traverse_files(void *data, nlm_host_match_fn_t match,
249 int (*is_failover_file)(void *data, struct nlm_file *file)) 248 int (*is_failover_file)(void *data, struct nlm_file *file))
250{ 249{
251 struct hlist_node *pos, *next; 250 struct hlist_node *next;
252 struct nlm_file *file; 251 struct nlm_file *file;
253 int i, ret = 0; 252 int i, ret = 0;
254 253
255 mutex_lock(&nlm_file_mutex); 254 mutex_lock(&nlm_file_mutex);
256 for (i = 0; i < FILE_NRHASH; i++) { 255 for (i = 0; i < FILE_NRHASH; i++) {
257 hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { 256 hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
258 if (is_failover_file && !is_failover_file(data, file)) 257 if (is_failover_file && !is_failover_file(data, file))
259 continue; 258 continue;
260 file->f_count++; 259 file->f_count++;
diff --git a/fs/locks.c b/fs/locks.c
index a94e331a52a2..cb424a4fed71 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -334,7 +334,7 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
334 start = filp->f_pos; 334 start = filp->f_pos;
335 break; 335 break;
336 case SEEK_END: 336 case SEEK_END:
337 start = i_size_read(filp->f_path.dentry->d_inode); 337 start = i_size_read(file_inode(filp));
338 break; 338 break;
339 default: 339 default:
340 return -EINVAL; 340 return -EINVAL;
@@ -384,7 +384,7 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
384 start = filp->f_pos; 384 start = filp->f_pos;
385 break; 385 break;
386 case SEEK_END: 386 case SEEK_END:
387 start = i_size_read(filp->f_path.dentry->d_inode); 387 start = i_size_read(file_inode(filp));
388 break; 388 break;
389 default: 389 default:
390 return -EINVAL; 390 return -EINVAL;
@@ -627,7 +627,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
627 struct file_lock *cfl; 627 struct file_lock *cfl;
628 628
629 lock_flocks(); 629 lock_flocks();
630 for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { 630 for (cfl = file_inode(filp)->i_flock; cfl; cfl = cfl->fl_next) {
631 if (!IS_POSIX(cfl)) 631 if (!IS_POSIX(cfl))
632 continue; 632 continue;
633 if (posix_locks_conflict(fl, cfl)) 633 if (posix_locks_conflict(fl, cfl))
@@ -708,7 +708,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
708{ 708{
709 struct file_lock *new_fl = NULL; 709 struct file_lock *new_fl = NULL;
710 struct file_lock **before; 710 struct file_lock **before;
711 struct inode * inode = filp->f_path.dentry->d_inode; 711 struct inode * inode = file_inode(filp);
712 int error = 0; 712 int error = 0;
713 int found = 0; 713 int found = 0;
714 714
@@ -1002,7 +1002,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1002int posix_lock_file(struct file *filp, struct file_lock *fl, 1002int posix_lock_file(struct file *filp, struct file_lock *fl,
1003 struct file_lock *conflock) 1003 struct file_lock *conflock)
1004{ 1004{
1005 return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock); 1005 return __posix_lock_file(file_inode(filp), fl, conflock);
1006} 1006}
1007EXPORT_SYMBOL(posix_lock_file); 1007EXPORT_SYMBOL(posix_lock_file);
1008 1008
@@ -1326,8 +1326,8 @@ int fcntl_getlease(struct file *filp)
1326 int type = F_UNLCK; 1326 int type = F_UNLCK;
1327 1327
1328 lock_flocks(); 1328 lock_flocks();
1329 time_out_leases(filp->f_path.dentry->d_inode); 1329 time_out_leases(file_inode(filp));
1330 for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); 1330 for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
1331 fl = fl->fl_next) { 1331 fl = fl->fl_next) {
1332 if (fl->fl_file == filp) { 1332 if (fl->fl_file == filp) {
1333 type = target_leasetype(fl); 1333 type = target_leasetype(fl);
@@ -1843,7 +1843,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
1843 if (copy_from_user(&flock, l, sizeof(flock))) 1843 if (copy_from_user(&flock, l, sizeof(flock)))
1844 goto out; 1844 goto out;
1845 1845
1846 inode = filp->f_path.dentry->d_inode; 1846 inode = file_inode(filp);
1847 1847
1848 /* Don't allow mandatory locks on files that may be memory mapped 1848 /* Don't allow mandatory locks on files that may be memory mapped
1849 * and shared. 1849 * and shared.
@@ -1961,7 +1961,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
1961 if (copy_from_user(&flock, l, sizeof(flock))) 1961 if (copy_from_user(&flock, l, sizeof(flock)))
1962 goto out; 1962 goto out;
1963 1963
1964 inode = filp->f_path.dentry->d_inode; 1964 inode = file_inode(filp);
1965 1965
1966 /* Don't allow mandatory locks on files that may be memory mapped 1966 /* Don't allow mandatory locks on files that may be memory mapped
1967 * and shared. 1967 * and shared.
@@ -2030,7 +2030,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
2030 * posix_lock_file(). Another process could be setting a lock on this 2030 * posix_lock_file(). Another process could be setting a lock on this
2031 * file at the same time, but we wouldn't remove that lock anyway. 2031 * file at the same time, but we wouldn't remove that lock anyway.
2032 */ 2032 */
2033 if (!filp->f_path.dentry->d_inode->i_flock) 2033 if (!file_inode(filp)->i_flock)
2034 return; 2034 return;
2035 2035
2036 lock.fl_type = F_UNLCK; 2036 lock.fl_type = F_UNLCK;
@@ -2056,7 +2056,7 @@ EXPORT_SYMBOL(locks_remove_posix);
2056 */ 2056 */
2057void locks_remove_flock(struct file *filp) 2057void locks_remove_flock(struct file *filp)
2058{ 2058{
2059 struct inode * inode = filp->f_path.dentry->d_inode; 2059 struct inode * inode = file_inode(filp);
2060 struct file_lock *fl; 2060 struct file_lock *fl;
2061 struct file_lock **before; 2061 struct file_lock **before;
2062 2062
@@ -2152,7 +2152,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2152 fl_pid = fl->fl_pid; 2152 fl_pid = fl->fl_pid;
2153 2153
2154 if (fl->fl_file != NULL) 2154 if (fl->fl_file != NULL)
2155 inode = fl->fl_file->f_path.dentry->d_inode; 2155 inode = file_inode(fl->fl_file);
2156 2156
2157 seq_printf(f, "%lld:%s ", id, pfx); 2157 seq_printf(f, "%lld:%s ", id, pfx);
2158 if (IS_POSIX(fl)) { 2158 if (IS_POSIX(fl)) {
diff --git a/fs/logfs/Kconfig b/fs/logfs/Kconfig
index daf9a9b32dd3..09ed066c0221 100644
--- a/fs/logfs/Kconfig
+++ b/fs/logfs/Kconfig
@@ -1,6 +1,6 @@
1config LOGFS 1config LOGFS
2 tristate "LogFS file system (EXPERIMENTAL)" 2 tristate "LogFS file system"
3 depends on (MTD || BLOCK) && EXPERIMENTAL 3 depends on (MTD || BLOCK)
4 select ZLIB_INFLATE 4 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
6 select CRC32 6 select CRC32
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 26e4a941532f..b82751082112 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -284,7 +284,7 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
284#define IMPLICIT_NODES 2 284#define IMPLICIT_NODES 2
285static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) 285static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
286{ 286{
287 struct inode *dir = file->f_dentry->d_inode; 287 struct inode *dir = file_inode(file);
288 loff_t pos = file->f_pos - IMPLICIT_NODES; 288 loff_t pos = file->f_pos - IMPLICIT_NODES;
289 struct page *page; 289 struct page *page;
290 struct logfs_disk_dentry *dd; 290 struct logfs_disk_dentry *dd;
@@ -320,7 +320,7 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir)
320 320
321static int logfs_readdir(struct file *file, void *buf, filldir_t filldir) 321static int logfs_readdir(struct file *file, void *buf, filldir_t filldir)
322{ 322{
323 struct inode *inode = file->f_dentry->d_inode; 323 struct inode *inode = file_inode(file);
324 ino_t pino = parent_ino(file->f_dentry); 324 ino_t pino = parent_ino(file->f_dentry);
325 int err; 325 int err;
326 326
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 3886cded283c..c2219a6dd3c8 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -183,7 +183,7 @@ static int logfs_releasepage(struct page *page, gfp_t only_xfs_uses_this)
183 183
184long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 184long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
185{ 185{
186 struct inode *inode = file->f_path.dentry->d_inode; 186 struct inode *inode = file_inode(file);
187 struct logfs_inode *li = logfs_inode(inode); 187 struct logfs_inode *li = logfs_inode(inode);
188 unsigned int oldflags, flags; 188 unsigned int oldflags, flags;
189 int err; 189 int err;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 685b2d981b87..a9ed6f36e6ea 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -85,7 +85,7 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
85static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) 85static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
86{ 86{
87 unsigned long pos = filp->f_pos; 87 unsigned long pos = filp->f_pos;
88 struct inode *inode = filp->f_path.dentry->d_inode; 88 struct inode *inode = file_inode(filp);
89 struct super_block *sb = inode->i_sb; 89 struct super_block *sb = inode->i_sb;
90 unsigned offset = pos & ~PAGE_CACHE_MASK; 90 unsigned offset = pos & ~PAGE_CACHE_MASK;
91 unsigned long n = pos >> PAGE_CACHE_SHIFT; 91 unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/namei.c b/fs/namei.c
index 43a97ee1d4c8..dc984fee5532 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -600,14 +600,10 @@ static int complete_walk(struct nameidata *nd)
600 if (likely(!(nd->flags & LOOKUP_JUMPED))) 600 if (likely(!(nd->flags & LOOKUP_JUMPED)))
601 return 0; 601 return 0;
602 602
603 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) 603 if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
604 return 0; 604 return 0;
605 605
606 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) 606 status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
607 return 0;
608
609 /* Note: we do not d_invalidate() */
610 status = d_revalidate(dentry, nd->flags);
611 if (status > 0) 607 if (status > 0)
612 return 0; 608 return 0;
613 609
@@ -1342,7 +1338,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1342 * small and for now I'd prefer to have fast path as straight as possible. 1338 * small and for now I'd prefer to have fast path as straight as possible.
1343 * It _is_ time-critical. 1339 * It _is_ time-critical.
1344 */ 1340 */
1345static int lookup_fast(struct nameidata *nd, struct qstr *name, 1341static int lookup_fast(struct nameidata *nd,
1346 struct path *path, struct inode **inode) 1342 struct path *path, struct inode **inode)
1347{ 1343{
1348 struct vfsmount *mnt = nd->path.mnt; 1344 struct vfsmount *mnt = nd->path.mnt;
@@ -1358,7 +1354,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
1358 */ 1354 */
1359 if (nd->flags & LOOKUP_RCU) { 1355 if (nd->flags & LOOKUP_RCU) {
1360 unsigned seq; 1356 unsigned seq;
1361 dentry = __d_lookup_rcu(parent, name, &seq, nd->inode); 1357 dentry = __d_lookup_rcu(parent, &nd->last, &seq, nd->inode);
1362 if (!dentry) 1358 if (!dentry)
1363 goto unlazy; 1359 goto unlazy;
1364 1360
@@ -1400,7 +1396,7 @@ unlazy:
1400 if (unlazy_walk(nd, dentry)) 1396 if (unlazy_walk(nd, dentry))
1401 return -ECHILD; 1397 return -ECHILD;
1402 } else { 1398 } else {
1403 dentry = __d_lookup(parent, name); 1399 dentry = __d_lookup(parent, &nd->last);
1404 } 1400 }
1405 1401
1406 if (unlikely(!dentry)) 1402 if (unlikely(!dentry))
@@ -1436,8 +1432,7 @@ need_lookup:
1436} 1432}
1437 1433
1438/* Fast lookup failed, do it the slow way */ 1434/* Fast lookup failed, do it the slow way */
1439static int lookup_slow(struct nameidata *nd, struct qstr *name, 1435static int lookup_slow(struct nameidata *nd, struct path *path)
1440 struct path *path)
1441{ 1436{
1442 struct dentry *dentry, *parent; 1437 struct dentry *dentry, *parent;
1443 int err; 1438 int err;
@@ -1446,7 +1441,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name,
1446 BUG_ON(nd->inode != parent->d_inode); 1441 BUG_ON(nd->inode != parent->d_inode);
1447 1442
1448 mutex_lock(&parent->d_inode->i_mutex); 1443 mutex_lock(&parent->d_inode->i_mutex);
1449 dentry = __lookup_hash(name, parent, nd->flags); 1444 dentry = __lookup_hash(&nd->last, parent, nd->flags);
1450 mutex_unlock(&parent->d_inode->i_mutex); 1445 mutex_unlock(&parent->d_inode->i_mutex);
1451 if (IS_ERR(dentry)) 1446 if (IS_ERR(dentry))
1452 return PTR_ERR(dentry); 1447 return PTR_ERR(dentry);
@@ -1519,7 +1514,7 @@ static inline int should_follow_link(struct inode *inode, int follow)
1519} 1514}
1520 1515
1521static inline int walk_component(struct nameidata *nd, struct path *path, 1516static inline int walk_component(struct nameidata *nd, struct path *path,
1522 struct qstr *name, int type, int follow) 1517 int follow)
1523{ 1518{
1524 struct inode *inode; 1519 struct inode *inode;
1525 int err; 1520 int err;
@@ -1528,14 +1523,14 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1528 * to be able to know about the current root directory and 1523 * to be able to know about the current root directory and
1529 * parent relationships. 1524 * parent relationships.
1530 */ 1525 */
1531 if (unlikely(type != LAST_NORM)) 1526 if (unlikely(nd->last_type != LAST_NORM))
1532 return handle_dots(nd, type); 1527 return handle_dots(nd, nd->last_type);
1533 err = lookup_fast(nd, name, path, &inode); 1528 err = lookup_fast(nd, path, &inode);
1534 if (unlikely(err)) { 1529 if (unlikely(err)) {
1535 if (err < 0) 1530 if (err < 0)
1536 goto out_err; 1531 goto out_err;
1537 1532
1538 err = lookup_slow(nd, name, path); 1533 err = lookup_slow(nd, path);
1539 if (err < 0) 1534 if (err < 0)
1540 goto out_err; 1535 goto out_err;
1541 1536
@@ -1594,8 +1589,7 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1594 res = follow_link(&link, nd, &cookie); 1589 res = follow_link(&link, nd, &cookie);
1595 if (res) 1590 if (res)
1596 break; 1591 break;
1597 res = walk_component(nd, path, &nd->last, 1592 res = walk_component(nd, path, LOOKUP_FOLLOW);
1598 nd->last_type, LOOKUP_FOLLOW);
1599 put_link(nd, &link, cookie); 1593 put_link(nd, &link, cookie);
1600 } while (res > 0); 1594 } while (res > 0);
1601 1595
@@ -1802,8 +1796,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1802 } 1796 }
1803 } 1797 }
1804 1798
1799 nd->last = this;
1800 nd->last_type = type;
1801
1805 if (!name[len]) 1802 if (!name[len])
1806 goto last_component; 1803 return 0;
1807 /* 1804 /*
1808 * If it wasn't NUL, we know it was '/'. Skip that 1805 * If it wasn't NUL, we know it was '/'. Skip that
1809 * slash, and continue until no more slashes. 1806 * slash, and continue until no more slashes.
@@ -1812,10 +1809,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1812 len++; 1809 len++;
1813 } while (unlikely(name[len] == '/')); 1810 } while (unlikely(name[len] == '/'));
1814 if (!name[len]) 1811 if (!name[len])
1815 goto last_component; 1812 return 0;
1813
1816 name += len; 1814 name += len;
1817 1815
1818 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); 1816 err = walk_component(nd, &next, LOOKUP_FOLLOW);
1819 if (err < 0) 1817 if (err < 0)
1820 return err; 1818 return err;
1821 1819
@@ -1824,16 +1822,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1824 if (err) 1822 if (err)
1825 return err; 1823 return err;
1826 } 1824 }
1827 if (can_lookup(nd->inode)) 1825 if (!can_lookup(nd->inode)) {
1828 continue; 1826 err = -ENOTDIR;
1829 err = -ENOTDIR; 1827 break;
1830 break; 1828 }
1831 /* here ends the main loop */
1832
1833last_component:
1834 nd->last = this;
1835 nd->last_type = type;
1836 return 0;
1837 } 1829 }
1838 terminate_walk(nd); 1830 terminate_walk(nd);
1839 return err; 1831 return err;
@@ -1932,8 +1924,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
1932 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 1924 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1933 1925
1934 nd->flags &= ~LOOKUP_PARENT; 1926 nd->flags &= ~LOOKUP_PARENT;
1935 return walk_component(nd, path, &nd->last, nd->last_type, 1927 return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW);
1936 nd->flags & LOOKUP_FOLLOW);
1937} 1928}
1938 1929
1939/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1930/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
@@ -2732,7 +2723,7 @@ static int do_last(struct nameidata *nd, struct path *path,
2732 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) 2723 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2733 symlink_ok = true; 2724 symlink_ok = true;
2734 /* we _can_ be in RCU mode here */ 2725 /* we _can_ be in RCU mode here */
2735 error = lookup_fast(nd, &nd->last, path, &inode); 2726 error = lookup_fast(nd, path, &inode);
2736 if (likely(!error)) 2727 if (likely(!error))
2737 goto finish_lookup; 2728 goto finish_lookup;
2738 2729
@@ -2778,7 +2769,7 @@ retry_lookup:
2778 goto out; 2769 goto out;
2779 2770
2780 if ((*opened & FILE_CREATED) || 2771 if ((*opened & FILE_CREATED) ||
2781 !S_ISREG(file->f_path.dentry->d_inode->i_mode)) 2772 !S_ISREG(file_inode(file)->i_mode))
2782 will_truncate = false; 2773 will_truncate = false;
2783 2774
2784 audit_inode(name, file->f_path.dentry, 0); 2775 audit_inode(name, file->f_path.dentry, 0);
@@ -2941,8 +2932,8 @@ static struct file *path_openat(int dfd, struct filename *pathname,
2941 int error; 2932 int error;
2942 2933
2943 file = get_empty_filp(); 2934 file = get_empty_filp();
2944 if (!file) 2935 if (IS_ERR(file))
2945 return ERR_PTR(-ENFILE); 2936 return file;
2946 2937
2947 file->f_flags = op->open_flag; 2938 file->f_flags = op->open_flag;
2948 2939
diff --git a/fs/namespace.c b/fs/namespace.c
index 55605c552787..50ca17d3cb45 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -384,7 +384,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
384 */ 384 */
385int __mnt_want_write_file(struct file *file) 385int __mnt_want_write_file(struct file *file)
386{ 386{
387 struct inode *inode = file->f_dentry->d_inode; 387 struct inode *inode = file_inode(file);
388 388
389 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) 389 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
390 return __mnt_want_write(file->f_path.mnt); 390 return __mnt_want_write(file->f_path.mnt);
@@ -1237,6 +1237,14 @@ static int do_umount(struct mount *mnt, int flags)
1237 return retval; 1237 return retval;
1238} 1238}
1239 1239
1240/*
1241 * Is the caller allowed to modify his namespace?
1242 */
1243static inline bool may_mount(void)
1244{
1245 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1246}
1247
1240/* 1248/*
1241 * Now umount can handle mount points as well as block devices. 1249 * Now umount can handle mount points as well as block devices.
1242 * This is important for filesystems which use unnamed block devices. 1250 * This is important for filesystems which use unnamed block devices.
@@ -1255,6 +1263,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1255 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) 1263 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1256 return -EINVAL; 1264 return -EINVAL;
1257 1265
1266 if (!may_mount())
1267 return -EPERM;
1268
1258 if (!(flags & UMOUNT_NOFOLLOW)) 1269 if (!(flags & UMOUNT_NOFOLLOW))
1259 lookup_flags |= LOOKUP_FOLLOW; 1270 lookup_flags |= LOOKUP_FOLLOW;
1260 1271
@@ -1268,10 +1279,6 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1268 if (!check_mnt(mnt)) 1279 if (!check_mnt(mnt))
1269 goto dput_and_out; 1280 goto dput_and_out;
1270 1281
1271 retval = -EPERM;
1272 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1273 goto dput_and_out;
1274
1275 retval = do_umount(mnt, flags); 1282 retval = do_umount(mnt, flags);
1276dput_and_out: 1283dput_and_out:
1277 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 1284 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
@@ -1293,24 +1300,6 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
1293 1300
1294#endif 1301#endif
1295 1302
1296static int mount_is_safe(struct path *path)
1297{
1298 if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1299 return 0;
1300 return -EPERM;
1301#ifdef notyet
1302 if (S_ISLNK(path->dentry->d_inode->i_mode))
1303 return -EPERM;
1304 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1305 if (current_uid() != path->dentry->d_inode->i_uid)
1306 return -EPERM;
1307 }
1308 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1309 return -EPERM;
1310 return 0;
1311#endif
1312}
1313
1314static bool mnt_ns_loop(struct path *path) 1303static bool mnt_ns_loop(struct path *path)
1315{ 1304{
1316 /* Could bind mounting the mount namespace inode cause a 1305 /* Could bind mounting the mount namespace inode cause a
@@ -1633,9 +1622,6 @@ static int do_change_type(struct path *path, int flag)
1633 int type; 1622 int type;
1634 int err = 0; 1623 int err = 0;
1635 1624
1636 if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
1637 return -EPERM;
1638
1639 if (path->dentry != path->mnt->mnt_root) 1625 if (path->dentry != path->mnt->mnt_root)
1640 return -EINVAL; 1626 return -EINVAL;
1641 1627
@@ -1669,9 +1655,7 @@ static int do_loopback(struct path *path, const char *old_name,
1669 LIST_HEAD(umount_list); 1655 LIST_HEAD(umount_list);
1670 struct path old_path; 1656 struct path old_path;
1671 struct mount *mnt = NULL, *old; 1657 struct mount *mnt = NULL, *old;
1672 int err = mount_is_safe(path); 1658 int err;
1673 if (err)
1674 return err;
1675 if (!old_name || !*old_name) 1659 if (!old_name || !*old_name)
1676 return -EINVAL; 1660 return -EINVAL;
1677 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); 1661 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
@@ -1748,9 +1732,6 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1748 struct super_block *sb = path->mnt->mnt_sb; 1732 struct super_block *sb = path->mnt->mnt_sb;
1749 struct mount *mnt = real_mount(path->mnt); 1733 struct mount *mnt = real_mount(path->mnt);
1750 1734
1751 if (!capable(CAP_SYS_ADMIN))
1752 return -EPERM;
1753
1754 if (!check_mnt(mnt)) 1735 if (!check_mnt(mnt))
1755 return -EINVAL; 1736 return -EINVAL;
1756 1737
@@ -1764,6 +1745,8 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1764 down_write(&sb->s_umount); 1745 down_write(&sb->s_umount);
1765 if (flags & MS_BIND) 1746 if (flags & MS_BIND)
1766 err = change_mount_flags(path->mnt, flags); 1747 err = change_mount_flags(path->mnt, flags);
1748 else if (!capable(CAP_SYS_ADMIN))
1749 err = -EPERM;
1767 else 1750 else
1768 err = do_remount_sb(sb, flags, data, 0); 1751 err = do_remount_sb(sb, flags, data, 0);
1769 if (!err) { 1752 if (!err) {
@@ -1796,9 +1779,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1796 struct path old_path, parent_path; 1779 struct path old_path, parent_path;
1797 struct mount *p; 1780 struct mount *p;
1798 struct mount *old; 1781 struct mount *old;
1799 int err = 0; 1782 int err;
1800 if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))
1801 return -EPERM;
1802 if (!old_name || !*old_name) 1783 if (!old_name || !*old_name)
1803 return -EINVAL; 1784 return -EINVAL;
1804 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); 1785 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
@@ -1933,18 +1914,13 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
1933 int mnt_flags, const char *name, void *data) 1914 int mnt_flags, const char *name, void *data)
1934{ 1915{
1935 struct file_system_type *type; 1916 struct file_system_type *type;
1936 struct user_namespace *user_ns; 1917 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
1937 struct vfsmount *mnt; 1918 struct vfsmount *mnt;
1938 int err; 1919 int err;
1939 1920
1940 if (!fstype) 1921 if (!fstype)
1941 return -EINVAL; 1922 return -EINVAL;
1942 1923
1943 /* we need capabilities... */
1944 user_ns = real_mount(path->mnt)->mnt_ns->user_ns;
1945 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
1946 return -EPERM;
1947
1948 type = get_fs_type(fstype); 1924 type = get_fs_type(fstype);
1949 if (!type) 1925 if (!type)
1950 return -ENODEV; 1926 return -ENODEV;
@@ -2258,6 +2234,9 @@ long do_mount(const char *dev_name, const char *dir_name,
2258 if (retval) 2234 if (retval)
2259 goto dput_out; 2235 goto dput_out;
2260 2236
2237 if (!may_mount())
2238 return -EPERM;
2239
2261 /* Default to relatime unless overriden */ 2240 /* Default to relatime unless overriden */
2262 if (!(flags & MS_NOATIME)) 2241 if (!(flags & MS_NOATIME))
2263 mnt_flags |= MNT_RELATIME; 2242 mnt_flags |= MNT_RELATIME;
@@ -2567,7 +2546,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2567 struct mount *new_mnt, *root_mnt; 2546 struct mount *new_mnt, *root_mnt;
2568 int error; 2547 int error;
2569 2548
2570 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) 2549 if (!may_mount())
2571 return -EPERM; 2550 return -EPERM;
2572 2551
2573 error = user_path_dir(new_root, &new); 2552 error = user_path_dir(new_root, &new);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 4117e7b377bb..816326093656 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -593,14 +593,10 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
593 return 1; /* I'm not sure */ 593 return 1; /* I'm not sure */
594 594
595 qname.name = __name; 595 qname.name = __name;
596 qname.hash = full_name_hash(qname.name, qname.len);
597
598 if (dentry->d_op && dentry->d_op->d_hash)
599 if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
600 goto end_advance;
601
602 newdent = d_lookup(dentry, &qname);
603 596
597 newdent = d_hash_and_lookup(dentry, &qname);
598 if (unlikely(IS_ERR(newdent)))
599 goto end_advance;
604 if (!newdent) { 600 if (!newdent) {
605 newdent = d_alloc(dentry, &qname); 601 newdent = d_alloc(dentry, &qname);
606 if (!newdent) 602 if (!newdent)
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1acdad7fcec7..7dafd6899a62 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -331,12 +331,15 @@ static int ncp_show_options(struct seq_file *seq, struct dentry *root)
331 struct ncp_server *server = NCP_SBP(root->d_sb); 331 struct ncp_server *server = NCP_SBP(root->d_sb);
332 unsigned int tmp; 332 unsigned int tmp;
333 333
334 if (server->m.uid != 0) 334 if (!uid_eq(server->m.uid, GLOBAL_ROOT_UID))
335 seq_printf(seq, ",uid=%u", server->m.uid); 335 seq_printf(seq, ",uid=%u",
336 if (server->m.gid != 0) 336 from_kuid_munged(&init_user_ns, server->m.uid));
337 seq_printf(seq, ",gid=%u", server->m.gid); 337 if (!gid_eq(server->m.gid, GLOBAL_ROOT_GID))
338 if (server->m.mounted_uid != 0) 338 seq_printf(seq, ",gid=%u",
339 seq_printf(seq, ",owner=%u", server->m.mounted_uid); 339 from_kgid_munged(&init_user_ns, server->m.gid));
340 if (!uid_eq(server->m.mounted_uid, GLOBAL_ROOT_UID))
341 seq_printf(seq, ",owner=%u",
342 from_kuid_munged(&init_user_ns, server->m.mounted_uid));
340 tmp = server->m.file_mode & S_IALLUGO; 343 tmp = server->m.file_mode & S_IALLUGO;
341 if (tmp != NCP_DEFAULT_FILE_MODE) 344 if (tmp != NCP_DEFAULT_FILE_MODE)
342 seq_printf(seq, ",mode=0%o", tmp); 345 seq_printf(seq, ",mode=0%o", tmp);
@@ -381,13 +384,13 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
381 384
382 data->flags = 0; 385 data->flags = 0;
383 data->int_flags = 0; 386 data->int_flags = 0;
384 data->mounted_uid = 0; 387 data->mounted_uid = GLOBAL_ROOT_UID;
385 data->wdog_pid = NULL; 388 data->wdog_pid = NULL;
386 data->ncp_fd = ~0; 389 data->ncp_fd = ~0;
387 data->time_out = NCP_DEFAULT_TIME_OUT; 390 data->time_out = NCP_DEFAULT_TIME_OUT;
388 data->retry_count = NCP_DEFAULT_RETRY_COUNT; 391 data->retry_count = NCP_DEFAULT_RETRY_COUNT;
389 data->uid = 0; 392 data->uid = GLOBAL_ROOT_UID;
390 data->gid = 0; 393 data->gid = GLOBAL_ROOT_GID;
391 data->file_mode = NCP_DEFAULT_FILE_MODE; 394 data->file_mode = NCP_DEFAULT_FILE_MODE;
392 data->dir_mode = NCP_DEFAULT_DIR_MODE; 395 data->dir_mode = NCP_DEFAULT_DIR_MODE;
393 data->info_fd = -1; 396 data->info_fd = -1;
@@ -399,13 +402,19 @@ static int ncp_parse_options(struct ncp_mount_data_kernel *data, char *options)
399 goto err; 402 goto err;
400 switch (optval) { 403 switch (optval) {
401 case 'u': 404 case 'u':
402 data->uid = optint; 405 data->uid = make_kuid(current_user_ns(), optint);
406 if (!uid_valid(data->uid))
407 goto err;
403 break; 408 break;
404 case 'g': 409 case 'g':
405 data->gid = optint; 410 data->gid = make_kgid(current_user_ns(), optint);
411 if (!gid_valid(data->gid))
412 goto err;
406 break; 413 break;
407 case 'o': 414 case 'o':
408 data->mounted_uid = optint; 415 data->mounted_uid = make_kuid(current_user_ns(), optint);
416 if (!uid_valid(data->mounted_uid))
417 goto err;
409 break; 418 break;
410 case 'm': 419 case 'm':
411 data->file_mode = optint; 420 data->file_mode = optint;
@@ -480,13 +489,13 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
480 489
481 data.flags = md->flags; 490 data.flags = md->flags;
482 data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE; 491 data.int_flags = NCP_IMOUNT_LOGGEDIN_POSSIBLE;
483 data.mounted_uid = md->mounted_uid; 492 data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
484 data.wdog_pid = find_get_pid(md->wdog_pid); 493 data.wdog_pid = find_get_pid(md->wdog_pid);
485 data.ncp_fd = md->ncp_fd; 494 data.ncp_fd = md->ncp_fd;
486 data.time_out = md->time_out; 495 data.time_out = md->time_out;
487 data.retry_count = md->retry_count; 496 data.retry_count = md->retry_count;
488 data.uid = md->uid; 497 data.uid = make_kuid(current_user_ns(), md->uid);
489 data.gid = md->gid; 498 data.gid = make_kgid(current_user_ns(), md->gid);
490 data.file_mode = md->file_mode; 499 data.file_mode = md->file_mode;
491 data.dir_mode = md->dir_mode; 500 data.dir_mode = md->dir_mode;
492 data.info_fd = -1; 501 data.info_fd = -1;
@@ -499,13 +508,13 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
499 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; 508 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
500 509
501 data.flags = md->flags; 510 data.flags = md->flags;
502 data.mounted_uid = md->mounted_uid; 511 data.mounted_uid = make_kuid(current_user_ns(), md->mounted_uid);
503 data.wdog_pid = find_get_pid(md->wdog_pid); 512 data.wdog_pid = find_get_pid(md->wdog_pid);
504 data.ncp_fd = md->ncp_fd; 513 data.ncp_fd = md->ncp_fd;
505 data.time_out = md->time_out; 514 data.time_out = md->time_out;
506 data.retry_count = md->retry_count; 515 data.retry_count = md->retry_count;
507 data.uid = md->uid; 516 data.uid = make_kuid(current_user_ns(), md->uid);
508 data.gid = md->gid; 517 data.gid = make_kgid(current_user_ns(), md->gid);
509 data.file_mode = md->file_mode; 518 data.file_mode = md->file_mode;
510 data.dir_mode = md->dir_mode; 519 data.dir_mode = md->dir_mode;
511 data.info_fd = -1; 520 data.info_fd = -1;
@@ -520,12 +529,16 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
520 goto out; 529 goto out;
521 break; 530 break;
522 } 531 }
532 error = -EINVAL;
533 if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
534 !gid_valid(data.gid))
535 goto out;
523 error = -EBADF; 536 error = -EBADF;
524 ncp_filp = fget(data.ncp_fd); 537 ncp_filp = fget(data.ncp_fd);
525 if (!ncp_filp) 538 if (!ncp_filp)
526 goto out; 539 goto out;
527 error = -ENOTSOCK; 540 error = -ENOTSOCK;
528 sock_inode = ncp_filp->f_path.dentry->d_inode; 541 sock_inode = file_inode(ncp_filp);
529 if (!S_ISSOCK(sock_inode->i_mode)) 542 if (!S_ISSOCK(sock_inode->i_mode))
530 goto out_fput; 543 goto out_fput;
531 sock = SOCKET_I(sock_inode); 544 sock = SOCKET_I(sock_inode);
@@ -564,7 +577,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
564 if (!server->info_filp) 577 if (!server->info_filp)
565 goto out_bdi; 578 goto out_bdi;
566 error = -ENOTSOCK; 579 error = -ENOTSOCK;
567 sock_inode = server->info_filp->f_path.dentry->d_inode; 580 sock_inode = file_inode(server->info_filp);
568 if (!S_ISSOCK(sock_inode->i_mode)) 581 if (!S_ISSOCK(sock_inode->i_mode))
569 goto out_fput2; 582 goto out_fput2;
570 info_sock = SOCKET_I(sock_inode); 583 info_sock = SOCKET_I(sock_inode);
@@ -886,12 +899,10 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
886 goto out; 899 goto out;
887 900
888 result = -EPERM; 901 result = -EPERM;
889 if (((attr->ia_valid & ATTR_UID) && 902 if ((attr->ia_valid & ATTR_UID) && !uid_eq(attr->ia_uid, server->m.uid))
890 (attr->ia_uid != server->m.uid)))
891 goto out; 903 goto out;
892 904
893 if (((attr->ia_valid & ATTR_GID) && 905 if ((attr->ia_valid & ATTR_GID) && !gid_eq(attr->ia_gid, server->m.gid))
894 (attr->ia_gid != server->m.gid)))
895 goto out; 906 goto out;
896 907
897 if (((attr->ia_valid & ATTR_MODE) && 908 if (((attr->ia_valid & ATTR_MODE) &&
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 6958adfaff08..60426ccb3b65 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -45,7 +45,7 @@ ncp_get_fs_info(struct ncp_server * server, struct inode *inode,
45 return -EINVAL; 45 return -EINVAL;
46 } 46 }
47 /* TODO: info.addr = server->m.serv_addr; */ 47 /* TODO: info.addr = server->m.serv_addr; */
48 SET_UID(info.mounted_uid, server->m.mounted_uid); 48 SET_UID(info.mounted_uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
49 info.connection = server->connection; 49 info.connection = server->connection;
50 info.buffer_size = server->buffer_size; 50 info.buffer_size = server->buffer_size;
51 info.volume_number = NCP_FINFO(inode)->volNumber; 51 info.volume_number = NCP_FINFO(inode)->volNumber;
@@ -69,7 +69,7 @@ ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode,
69 DPRINTK("info.version invalid: %d\n", info2.version); 69 DPRINTK("info.version invalid: %d\n", info2.version);
70 return -EINVAL; 70 return -EINVAL;
71 } 71 }
72 info2.mounted_uid = server->m.mounted_uid; 72 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
73 info2.connection = server->connection; 73 info2.connection = server->connection;
74 info2.buffer_size = server->buffer_size; 74 info2.buffer_size = server->buffer_size;
75 info2.volume_number = NCP_FINFO(inode)->volNumber; 75 info2.volume_number = NCP_FINFO(inode)->volNumber;
@@ -135,7 +135,7 @@ ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode,
135 DPRINTK("info.version invalid: %d\n", info2.version); 135 DPRINTK("info.version invalid: %d\n", info2.version);
136 return -EINVAL; 136 return -EINVAL;
137 } 137 }
138 info2.mounted_uid = server->m.mounted_uid; 138 info2.mounted_uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
139 info2.connection = server->connection; 139 info2.connection = server->connection;
140 info2.buffer_size = server->buffer_size; 140 info2.buffer_size = server->buffer_size;
141 info2.volume_number = NCP_FINFO(inode)->volNumber; 141 info2.volume_number = NCP_FINFO(inode)->volNumber;
@@ -348,22 +348,25 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
348 { 348 {
349 u16 uid; 349 u16 uid;
350 350
351 SET_UID(uid, server->m.mounted_uid); 351 SET_UID(uid, from_kuid_munged(current_user_ns(), server->m.mounted_uid));
352 if (put_user(uid, (u16 __user *)argp)) 352 if (put_user(uid, (u16 __user *)argp))
353 return -EFAULT; 353 return -EFAULT;
354 return 0; 354 return 0;
355 } 355 }
356 case NCP_IOC_GETMOUNTUID32: 356 case NCP_IOC_GETMOUNTUID32:
357 if (put_user(server->m.mounted_uid, 357 {
358 (u32 __user *)argp)) 358 uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
359 if (put_user(uid, (u32 __user *)argp))
359 return -EFAULT; 360 return -EFAULT;
360 return 0; 361 return 0;
362 }
361 case NCP_IOC_GETMOUNTUID64: 363 case NCP_IOC_GETMOUNTUID64:
362 if (put_user(server->m.mounted_uid, 364 {
363 (u64 __user *)argp)) 365 uid_t uid = from_kuid_munged(current_user_ns(), server->m.mounted_uid);
366 if (put_user(uid, (u64 __user *)argp))
364 return -EFAULT; 367 return -EFAULT;
365 return 0; 368 return 0;
366 369 }
367 case NCP_IOC_GETROOT: 370 case NCP_IOC_GETROOT:
368 { 371 {
369 struct ncp_setroot_ioctl sr; 372 struct ncp_setroot_ioctl sr;
@@ -808,9 +811,9 @@ outrel:
808 811
809long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 812long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
810{ 813{
811 struct inode *inode = filp->f_dentry->d_inode; 814 struct inode *inode = file_inode(filp);
812 struct ncp_server *server = NCP_SERVER(inode); 815 struct ncp_server *server = NCP_SERVER(inode);
813 uid_t uid = current_uid(); 816 kuid_t uid = current_uid();
814 int need_drop_write = 0; 817 int need_drop_write = 0;
815 long ret; 818 long ret;
816 819
@@ -819,12 +822,12 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
819 case NCP_IOC_CONN_LOGGED_IN: 822 case NCP_IOC_CONN_LOGGED_IN:
820 case NCP_IOC_SETROOT: 823 case NCP_IOC_SETROOT:
821 if (!capable(CAP_SYS_ADMIN)) { 824 if (!capable(CAP_SYS_ADMIN)) {
822 ret = -EACCES; 825 ret = -EPERM;
823 goto out; 826 goto out;
824 } 827 }
825 break; 828 break;
826 } 829 }
827 if (server->m.mounted_uid != uid) { 830 if (!uid_eq(server->m.mounted_uid, uid)) {
828 switch (cmd) { 831 switch (cmd) {
829 /* 832 /*
830 * Only mount owner can issue these ioctls. Information 833 * Only mount owner can issue these ioctls. Information
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 63d14a99483d..ee24df5af1f9 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -105,7 +105,7 @@ static const struct vm_operations_struct ncp_file_mmap =
105/* This is used for a general mmap of a ncp file */ 105/* This is used for a general mmap of a ncp file */
106int ncp_mmap(struct file *file, struct vm_area_struct *vma) 106int ncp_mmap(struct file *file, struct vm_area_struct *vma)
107{ 107{
108 struct inode *inode = file->f_path.dentry->d_inode; 108 struct inode *inode = file_inode(file);
109 109
110 DPRINTK("ncp_mmap: called\n"); 110 DPRINTK("ncp_mmap: called\n");
111 111
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index 54cc0cdb3dcb..c51b2c543539 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -23,15 +23,15 @@ struct ncp_mount_data_kernel {
23 unsigned long flags; /* NCP_MOUNT_* flags */ 23 unsigned long flags; /* NCP_MOUNT_* flags */
24 unsigned int int_flags; /* internal flags */ 24 unsigned int int_flags; /* internal flags */
25#define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 25#define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001
26 uid_t mounted_uid; /* Who may umount() this filesystem? */ 26 kuid_t mounted_uid; /* Who may umount() this filesystem? */
27 struct pid *wdog_pid; /* Who cares for our watchdog packets? */ 27 struct pid *wdog_pid; /* Who cares for our watchdog packets? */
28 unsigned int ncp_fd; /* The socket to the ncp port */ 28 unsigned int ncp_fd; /* The socket to the ncp port */
29 unsigned int time_out; /* How long should I wait after 29 unsigned int time_out; /* How long should I wait after
30 sending a NCP request? */ 30 sending a NCP request? */
31 unsigned int retry_count; /* And how often should I retry? */ 31 unsigned int retry_count; /* And how often should I retry? */
32 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; 32 unsigned char mounted_vol[NCP_VOLNAME_LEN + 1];
33 uid_t uid; 33 kuid_t uid;
34 gid_t gid; 34 kgid_t gid;
35 umode_t file_mode; 35 umode_t file_mode;
36 umode_t dir_mode; 36 umode_t dir_mode;
37 int info_fd; 37 int info_fd;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 4fa788c93f46..434b93ec0970 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1273,6 +1273,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
1273static struct pnfs_layoutdriver_type blocklayout_type = { 1273static struct pnfs_layoutdriver_type blocklayout_type = {
1274 .id = LAYOUT_BLOCK_VOLUME, 1274 .id = LAYOUT_BLOCK_VOLUME,
1275 .name = "LAYOUT_BLOCK_VOLUME", 1275 .name = "LAYOUT_BLOCK_VOLUME",
1276 .owner = THIS_MODULE,
1276 .read_pagelist = bl_read_pagelist, 1277 .read_pagelist = bl_read_pagelist,
1277 .write_pagelist = bl_write_pagelist, 1278 .write_pagelist = bl_write_pagelist,
1278 .alloc_layout_hdr = bl_alloc_layout_hdr, 1279 .alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c89b26bc9759..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183static u32 initiate_bulk_draining(struct nfs_client *clp, 183static u32 initiate_bulk_draining(struct nfs_client *clp,
184 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
185{ 185{
186 struct nfs_server *server; 186 int stat;
187 struct pnfs_layout_hdr *lo;
188 struct inode *ino;
189 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190 struct pnfs_layout_hdr *tmp;
191 LIST_HEAD(recall_list);
192 LIST_HEAD(free_me_list);
193 struct pnfs_layout_range range = {
194 .iomode = IOMODE_ANY,
195 .offset = 0,
196 .length = NFS4_MAX_UINT64,
197 };
198
199 spin_lock(&clp->cl_lock);
200 rcu_read_lock();
201 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202 if ((args->cbl_recall_type == RETURN_FSID) &&
203 memcmp(&server->fsid, &args->cbl_fsid,
204 sizeof(struct nfs_fsid)))
205 continue;
206
207 list_for_each_entry(lo, &server->layouts, plh_layouts) {
208 ino = igrab(lo->plh_inode);
209 if (ino)
210 continue;
211 spin_lock(&ino->i_lock);
212 /* Is this layout in the process of being freed? */
213 if (NFS_I(ino)->layout != lo) {
214 spin_unlock(&ino->i_lock);
215 iput(ino);
216 continue;
217 }
218 pnfs_get_layout_hdr(lo);
219 spin_unlock(&ino->i_lock);
220 list_add(&lo->plh_bulk_recall, &recall_list);
221 }
222 }
223 rcu_read_unlock();
224 spin_unlock(&clp->cl_lock);
225 187
226 list_for_each_entry_safe(lo, tmp, 188 if (args->cbl_recall_type == RETURN_FSID)
227 &recall_list, plh_bulk_recall) { 189 stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
228 ino = lo->plh_inode; 190 else
229 spin_lock(&ino->i_lock); 191 stat = pnfs_destroy_layouts_byclid(clp, true);
230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 192 if (stat != 0)
231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) 193 return NFS4ERR_DELAY;
232 rv = NFS4ERR_DELAY; 194 return NFS4ERR_NOMATCHING_LAYOUT;
233 list_del_init(&lo->plh_bulk_recall);
234 spin_unlock(&ino->i_lock);
235 pnfs_free_lseg_list(&free_me_list);
236 pnfs_put_layout_hdr(lo);
237 iput(ino);
238 }
239 return rv;
240} 195}
241 196
242static u32 do_callback_layoutrecall(struct nfs_client *clp, 197static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9f3c66438d0e..84d8eae203a7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -197,7 +197,6 @@ error_0:
197EXPORT_SYMBOL_GPL(nfs_alloc_client); 197EXPORT_SYMBOL_GPL(nfs_alloc_client);
198 198
199#if IS_ENABLED(CONFIG_NFS_V4) 199#if IS_ENABLED(CONFIG_NFS_V4)
200/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
201void nfs_cleanup_cb_ident_idr(struct net *net) 200void nfs_cleanup_cb_ident_idr(struct net *net)
202{ 201{
203 struct nfs_net *nn = net_generic(net, nfs_net_id); 202 struct nfs_net *nn = net_generic(net, nfs_net_id);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 81c5eec3cf38..6390a4b5fee7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -55,7 +55,8 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
55 flags &= FMODE_READ|FMODE_WRITE; 55 flags &= FMODE_READ|FMODE_WRITE;
56 rcu_read_lock(); 56 rcu_read_lock();
57 delegation = rcu_dereference(NFS_I(inode)->delegation); 57 delegation = rcu_dereference(NFS_I(inode)->delegation);
58 if (delegation != NULL && (delegation->type & flags) == flags) { 58 if (delegation != NULL && (delegation->type & flags) == flags &&
59 !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
59 nfs_mark_delegation_referenced(delegation); 60 nfs_mark_delegation_referenced(delegation);
60 ret = 1; 61 ret = 1;
61 } 62 }
@@ -70,8 +71,10 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
70 int status = 0; 71 int status = 0;
71 72
72 if (inode->i_flock == NULL) 73 if (inode->i_flock == NULL)
73 goto out; 74 return 0;
74 75
76 if (inode->i_flock == NULL)
77 goto out;
75 /* Protect inode->i_flock using the file locks lock */ 78 /* Protect inode->i_flock using the file locks lock */
76 lock_flocks(); 79 lock_flocks();
77 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { 80 for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
@@ -94,7 +97,9 @@ static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *s
94{ 97{
95 struct nfs_inode *nfsi = NFS_I(inode); 98 struct nfs_inode *nfsi = NFS_I(inode);
96 struct nfs_open_context *ctx; 99 struct nfs_open_context *ctx;
100 struct nfs4_state_owner *sp;
97 struct nfs4_state *state; 101 struct nfs4_state *state;
102 unsigned int seq;
98 int err; 103 int err;
99 104
100again: 105again:
@@ -109,9 +114,16 @@ again:
109 continue; 114 continue;
110 get_nfs_open_context(ctx); 115 get_nfs_open_context(ctx);
111 spin_unlock(&inode->i_lock); 116 spin_unlock(&inode->i_lock);
117 sp = state->owner;
118 /* Block nfs4_proc_unlck */
119 mutex_lock(&sp->so_delegreturn_mutex);
120 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
112 err = nfs4_open_delegation_recall(ctx, state, stateid); 121 err = nfs4_open_delegation_recall(ctx, state, stateid);
113 if (err >= 0) 122 if (!err)
114 err = nfs_delegation_claim_locks(ctx, state); 123 err = nfs_delegation_claim_locks(ctx, state);
124 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
125 err = -EAGAIN;
126 mutex_unlock(&sp->so_delegreturn_mutex);
115 put_nfs_open_context(ctx); 127 put_nfs_open_context(ctx);
116 if (err != 0) 128 if (err != 0)
117 return err; 129 return err;
@@ -182,39 +194,91 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
182} 194}
183 195
184static struct nfs_delegation * 196static struct nfs_delegation *
197nfs_start_delegation_return_locked(struct nfs_inode *nfsi)
198{
199 struct nfs_delegation *ret = NULL;
200 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
201
202 if (delegation == NULL)
203 goto out;
204 spin_lock(&delegation->lock);
205 if (!test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
206 ret = delegation;
207 spin_unlock(&delegation->lock);
208out:
209 return ret;
210}
211
212static struct nfs_delegation *
213nfs_start_delegation_return(struct nfs_inode *nfsi)
214{
215 struct nfs_delegation *delegation;
216
217 rcu_read_lock();
218 delegation = nfs_start_delegation_return_locked(nfsi);
219 rcu_read_unlock();
220 return delegation;
221}
222
223static void
224nfs_abort_delegation_return(struct nfs_delegation *delegation,
225 struct nfs_client *clp)
226{
227
228 spin_lock(&delegation->lock);
229 clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
230 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
231 spin_unlock(&delegation->lock);
232 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
233}
234
235static struct nfs_delegation *
185nfs_detach_delegation_locked(struct nfs_inode *nfsi, 236nfs_detach_delegation_locked(struct nfs_inode *nfsi,
186 struct nfs_server *server) 237 struct nfs_delegation *delegation,
238 struct nfs_client *clp)
187{ 239{
188 struct nfs_delegation *delegation = 240 struct nfs_delegation *deleg_cur =
189 rcu_dereference_protected(nfsi->delegation, 241 rcu_dereference_protected(nfsi->delegation,
190 lockdep_is_held(&server->nfs_client->cl_lock)); 242 lockdep_is_held(&clp->cl_lock));
191 243
192 if (delegation == NULL) 244 if (deleg_cur == NULL || delegation != deleg_cur)
193 goto nomatch; 245 return NULL;
194 246
195 spin_lock(&delegation->lock); 247 spin_lock(&delegation->lock);
248 set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
196 list_del_rcu(&delegation->super_list); 249 list_del_rcu(&delegation->super_list);
197 delegation->inode = NULL; 250 delegation->inode = NULL;
198 nfsi->delegation_state = 0; 251 nfsi->delegation_state = 0;
199 rcu_assign_pointer(nfsi->delegation, NULL); 252 rcu_assign_pointer(nfsi->delegation, NULL);
200 spin_unlock(&delegation->lock); 253 spin_unlock(&delegation->lock);
201 return delegation; 254 return delegation;
202nomatch:
203 return NULL;
204} 255}
205 256
206static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi, 257static struct nfs_delegation *nfs_detach_delegation(struct nfs_inode *nfsi,
207 struct nfs_server *server) 258 struct nfs_delegation *delegation,
259 struct nfs_server *server)
208{ 260{
209 struct nfs_client *clp = server->nfs_client; 261 struct nfs_client *clp = server->nfs_client;
210 struct nfs_delegation *delegation;
211 262
212 spin_lock(&clp->cl_lock); 263 spin_lock(&clp->cl_lock);
213 delegation = nfs_detach_delegation_locked(nfsi, server); 264 delegation = nfs_detach_delegation_locked(nfsi, delegation, clp);
214 spin_unlock(&clp->cl_lock); 265 spin_unlock(&clp->cl_lock);
215 return delegation; 266 return delegation;
216} 267}
217 268
269static struct nfs_delegation *
270nfs_inode_detach_delegation(struct inode *inode)
271{
272 struct nfs_inode *nfsi = NFS_I(inode);
273 struct nfs_server *server = NFS_SERVER(inode);
274 struct nfs_delegation *delegation;
275
276 delegation = nfs_start_delegation_return(nfsi);
277 if (delegation == NULL)
278 return NULL;
279 return nfs_detach_delegation(nfsi, delegation, server);
280}
281
218/** 282/**
219 * nfs_inode_set_delegation - set up a delegation on an inode 283 * nfs_inode_set_delegation - set up a delegation on an inode
220 * @inode: inode to which delegation applies 284 * @inode: inode to which delegation applies
@@ -268,7 +332,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
268 delegation = NULL; 332 delegation = NULL;
269 goto out; 333 goto out;
270 } 334 }
271 freeme = nfs_detach_delegation_locked(nfsi, server); 335 freeme = nfs_detach_delegation_locked(nfsi,
336 old_delegation, clp);
337 if (freeme == NULL)
338 goto out;
272 } 339 }
273 list_add_rcu(&delegation->super_list, &server->delegations); 340 list_add_rcu(&delegation->super_list, &server->delegations);
274 nfsi->delegation_state = delegation->type; 341 nfsi->delegation_state = delegation->type;
@@ -292,19 +359,29 @@ out:
292/* 359/*
293 * Basic procedure for returning a delegation to the server 360 * Basic procedure for returning a delegation to the server
294 */ 361 */
295static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 362static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation *delegation, int issync)
296{ 363{
364 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
297 struct nfs_inode *nfsi = NFS_I(inode); 365 struct nfs_inode *nfsi = NFS_I(inode);
298 int err; 366 int err;
299 367
300 /* 368 if (delegation == NULL)
301 * Guard against new delegated open/lock/unlock calls and against 369 return 0;
302 * state recovery 370 do {
303 */ 371 err = nfs_delegation_claim_opens(inode, &delegation->stateid);
304 down_write(&nfsi->rwsem); 372 if (!issync || err != -EAGAIN)
305 err = nfs_delegation_claim_opens(inode, &delegation->stateid); 373 break;
306 up_write(&nfsi->rwsem); 374 /*
307 if (err) 375 * Guard against state recovery
376 */
377 err = nfs4_wait_clnt_recover(clp);
378 } while (err == 0);
379
380 if (err) {
381 nfs_abort_delegation_return(delegation, clp);
382 goto out;
383 }
384 if (!nfs_detach_delegation(nfsi, delegation, NFS_SERVER(inode)))
308 goto out; 385 goto out;
309 386
310 err = nfs_do_return_delegation(inode, delegation, issync); 387 err = nfs_do_return_delegation(inode, delegation, issync);
@@ -340,13 +417,10 @@ restart:
340 inode = nfs_delegation_grab_inode(delegation); 417 inode = nfs_delegation_grab_inode(delegation);
341 if (inode == NULL) 418 if (inode == NULL)
342 continue; 419 continue;
343 delegation = nfs_detach_delegation(NFS_I(inode), 420 delegation = nfs_start_delegation_return_locked(NFS_I(inode));
344 server);
345 rcu_read_unlock(); 421 rcu_read_unlock();
346 422
347 if (delegation != NULL) 423 err = nfs_end_delegation_return(inode, delegation, 0);
348 err = __nfs_inode_return_delegation(inode,
349 delegation, 0);
350 iput(inode); 424 iput(inode);
351 if (!err) 425 if (!err)
352 goto restart; 426 goto restart;
@@ -367,15 +441,11 @@ restart:
367 */ 441 */
368void nfs_inode_return_delegation_noreclaim(struct inode *inode) 442void nfs_inode_return_delegation_noreclaim(struct inode *inode)
369{ 443{
370 struct nfs_server *server = NFS_SERVER(inode);
371 struct nfs_inode *nfsi = NFS_I(inode);
372 struct nfs_delegation *delegation; 444 struct nfs_delegation *delegation;
373 445
374 if (rcu_access_pointer(nfsi->delegation) != NULL) { 446 delegation = nfs_inode_detach_delegation(inode);
375 delegation = nfs_detach_delegation(nfsi, server); 447 if (delegation != NULL)
376 if (delegation != NULL) 448 nfs_do_return_delegation(inode, delegation, 0);
377 nfs_do_return_delegation(inode, delegation, 0);
378 }
379} 449}
380 450
381/** 451/**
@@ -390,18 +460,14 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
390 */ 460 */
391int nfs4_inode_return_delegation(struct inode *inode) 461int nfs4_inode_return_delegation(struct inode *inode)
392{ 462{
393 struct nfs_server *server = NFS_SERVER(inode);
394 struct nfs_inode *nfsi = NFS_I(inode); 463 struct nfs_inode *nfsi = NFS_I(inode);
395 struct nfs_delegation *delegation; 464 struct nfs_delegation *delegation;
396 int err = 0; 465 int err = 0;
397 466
398 nfs_wb_all(inode); 467 nfs_wb_all(inode);
399 if (rcu_access_pointer(nfsi->delegation) != NULL) { 468 delegation = nfs_start_delegation_return(nfsi);
400 delegation = nfs_detach_delegation(nfsi, server); 469 if (delegation != NULL)
401 if (delegation != NULL) { 470 err = nfs_end_delegation_return(inode, delegation, 1);
402 err = __nfs_inode_return_delegation(inode, delegation, 1);
403 }
404 }
405 return err; 471 return err;
406} 472}
407 473
@@ -471,7 +537,7 @@ void nfs_remove_bad_delegation(struct inode *inode)
471{ 537{
472 struct nfs_delegation *delegation; 538 struct nfs_delegation *delegation;
473 539
474 delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); 540 delegation = nfs_inode_detach_delegation(inode);
475 if (delegation) { 541 if (delegation) {
476 nfs_inode_find_state_and_recover(inode, &delegation->stateid); 542 nfs_inode_find_state_and_recover(inode, &delegation->stateid);
477 nfs_free_delegation(delegation); 543 nfs_free_delegation(delegation);
@@ -649,7 +715,7 @@ restart:
649 if (inode == NULL) 715 if (inode == NULL)
650 continue; 716 continue;
651 delegation = nfs_detach_delegation(NFS_I(inode), 717 delegation = nfs_detach_delegation(NFS_I(inode),
652 server); 718 delegation, server);
653 rcu_read_unlock(); 719 rcu_read_unlock();
654 720
655 if (delegation != NULL) 721 if (delegation != NULL)
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index bbc6a4dba0d8..d54d4fca6793 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ enum {
29 NFS_DELEGATION_NEED_RECLAIM = 0, 29 NFS_DELEGATION_NEED_RECLAIM = 0,
30 NFS_DELEGATION_RETURN, 30 NFS_DELEGATION_RETURN,
31 NFS_DELEGATION_REFERENCED, 31 NFS_DELEGATION_REFERENCED,
32 NFS_DELEGATION_RETURNING,
32}; 33};
33 34
34int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); 35int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32e6c53520e2..f23f455be42b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -281,7 +281,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
281 281
282 for (i = 0; i < array->size; i++) { 282 for (i = 0; i < array->size; i++) {
283 if (array->array[i].cookie == *desc->dir_cookie) { 283 if (array->array[i].cookie == *desc->dir_cookie) {
284 struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode); 284 struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
285 struct nfs_open_dir_context *ctx = desc->file->private_data; 285 struct nfs_open_dir_context *ctx = desc->file->private_data;
286 286
287 new_pos = desc->current_index + i; 287 new_pos = desc->current_index + i;
@@ -629,7 +629,7 @@ out:
629static 629static
630int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) 630int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
631{ 631{
632 struct inode *inode = desc->file->f_path.dentry->d_inode; 632 struct inode *inode = file_inode(desc->file);
633 int ret; 633 int ret;
634 634
635 ret = nfs_readdir_xdr_to_array(desc, page, inode); 635 ret = nfs_readdir_xdr_to_array(desc, page, inode);
@@ -660,7 +660,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
660static 660static
661struct page *get_cache_page(nfs_readdir_descriptor_t *desc) 661struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
662{ 662{
663 return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, 663 return read_cache_page(file_inode(desc->file)->i_mapping,
664 desc->page_index, (filler_t *)nfs_readdir_filler, desc); 664 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
665} 665}
666 666
@@ -764,7 +764,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
764{ 764{
765 struct page *page = NULL; 765 struct page *page = NULL;
766 int status; 766 int status;
767 struct inode *inode = desc->file->f_path.dentry->d_inode; 767 struct inode *inode = file_inode(desc->file);
768 struct nfs_open_dir_context *ctx = desc->file->private_data; 768 struct nfs_open_dir_context *ctx = desc->file->private_data;
769 769
770 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 770 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
@@ -1136,6 +1136,45 @@ out_error:
1136} 1136}
1137 1137
1138/* 1138/*
1139 * A weaker form of d_revalidate for revalidating just the dentry->d_inode
1140 * when we don't really care about the dentry name. This is called when a
1141 * pathwalk ends on a dentry that was not found via a normal lookup in the
1142 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1143 *
1144 * In this situation, we just want to verify that the inode itself is OK
1145 * since the dentry might have changed on the server.
1146 */
1147static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1148{
1149 int error;
1150 struct inode *inode = dentry->d_inode;
1151
1152 /*
1153 * I believe we can only get a negative dentry here in the case of a
1154 * procfs-style symlink. Just assume it's correct for now, but we may
1155 * eventually need to do something more here.
1156 */
1157 if (!inode) {
1158 dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n",
1159 __func__, dentry->d_parent->d_name.name,
1160 dentry->d_name.name);
1161 return 1;
1162 }
1163
1164 if (is_bad_inode(inode)) {
1165 dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
1166 __func__, dentry->d_parent->d_name.name,
1167 dentry->d_name.name);
1168 return 0;
1169 }
1170
1171 error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
1172 dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1173 __func__, inode->i_ino, error ? "invalid" : "valid");
1174 return !error;
1175}
1176
1177/*
1139 * This is called from dput() when d_count is going to 0. 1178 * This is called from dput() when d_count is going to 0.
1140 */ 1179 */
1141static int nfs_dentry_delete(const struct dentry *dentry) 1180static int nfs_dentry_delete(const struct dentry *dentry)
@@ -1202,6 +1241,7 @@ static void nfs_d_release(struct dentry *dentry)
1202 1241
1203const struct dentry_operations nfs_dentry_operations = { 1242const struct dentry_operations nfs_dentry_operations = {
1204 .d_revalidate = nfs_lookup_revalidate, 1243 .d_revalidate = nfs_lookup_revalidate,
1244 .d_weak_revalidate = nfs_weak_revalidate,
1205 .d_delete = nfs_dentry_delete, 1245 .d_delete = nfs_dentry_delete,
1206 .d_iput = nfs_dentry_iput, 1246 .d_iput = nfs_dentry_iput,
1207 .d_automount = nfs_d_automount, 1247 .d_automount = nfs_d_automount,
@@ -2153,12 +2193,16 @@ static int nfs_open_permission_mask(int openflags)
2153{ 2193{
2154 int mask = 0; 2194 int mask = 0;
2155 2195
2156 if ((openflags & O_ACCMODE) != O_WRONLY) 2196 if (openflags & __FMODE_EXEC) {
2157 mask |= MAY_READ; 2197 /* ONLY check exec rights */
2158 if ((openflags & O_ACCMODE) != O_RDONLY) 2198 mask = MAY_EXEC;
2159 mask |= MAY_WRITE; 2199 } else {
2160 if (openflags & __FMODE_EXEC) 2200 if ((openflags & O_ACCMODE) != O_WRONLY)
2161 mask |= MAY_EXEC; 2201 mask |= MAY_READ;
2202 if ((openflags & O_ACCMODE) != O_RDONLY)
2203 mask |= MAY_WRITE;
2204 }
2205
2162 return mask; 2206 return mask;
2163} 2207}
2164 2208
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3c2b893665ba..29f4a48a0ee6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -292,7 +292,7 @@ static int
292nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) 292nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
293{ 293{
294 int ret; 294 int ret;
295 struct inode *inode = file->f_path.dentry->d_inode; 295 struct inode *inode = file_inode(file);
296 296
297 do { 297 do {
298 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 298 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 033803c36644..44efaa8c5f78 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -126,8 +126,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
126 } 126 }
127 spin_unlock(&ret->d_lock); 127 spin_unlock(&ret->d_lock);
128out: 128out:
129 if (name) 129 kfree(name);
130 kfree(name);
131 nfs_free_fattr(fsinfo.fattr); 130 nfs_free_fattr(fsinfo.fattr);
132 return ret; 131 return ret;
133} 132}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index bc3968fa81e5..dc0f98dfa717 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -97,7 +97,7 @@ static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
97static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr) 97static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
98{ 98{
99 struct nfs4_string *owner = fattr->owner_name; 99 struct nfs4_string *owner = fattr->owner_name;
100 __u32 uid; 100 kuid_t uid;
101 101
102 if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)) 102 if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
103 return false; 103 return false;
@@ -111,7 +111,7 @@ static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr
111static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr) 111static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
112{ 112{
113 struct nfs4_string *group = fattr->group_name; 113 struct nfs4_string *group = fattr->group_name;
114 __u32 gid; 114 kgid_t gid;
115 115
116 if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)) 116 if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
117 return false; 117 return false;
@@ -193,7 +193,8 @@ static int nfs_idmap_init_keyring(void)
193 if (!cred) 193 if (!cred)
194 return -ENOMEM; 194 return -ENOMEM;
195 195
196 keyring = keyring_alloc(".id_resolver", 0, 0, cred, 196 keyring = keyring_alloc(".id_resolver",
197 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
197 (KEY_POS_ALL & ~KEY_POS_SETATTR) | 198 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
198 KEY_USR_VIEW | KEY_USR_READ, 199 KEY_USR_VIEW | KEY_USR_READ,
199 KEY_ALLOC_NOT_IN_QUOTA, NULL); 200 KEY_ALLOC_NOT_IN_QUOTA, NULL);
@@ -764,7 +765,7 @@ out:
764static ssize_t 765static ssize_t
765idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) 766idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
766{ 767{
767 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 768 struct rpc_inode *rpci = RPC_I(file_inode(filp));
768 struct idmap *idmap = (struct idmap *)rpci->private; 769 struct idmap *idmap = (struct idmap *)rpci->private;
769 struct key_construction *cons; 770 struct key_construction *cons;
770 struct idmap_msg im; 771 struct idmap_msg im;
@@ -836,43 +837,61 @@ idmap_release_pipe(struct inode *inode)
836 nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); 837 nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
837} 838}
838 839
839int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 840int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
840{ 841{
841 struct idmap *idmap = server->nfs_client->cl_idmap; 842 struct idmap *idmap = server->nfs_client->cl_idmap;
843 __u32 id = -1;
844 int ret = 0;
842 845
843 if (nfs_map_string_to_numeric(name, namelen, uid)) 846 if (!nfs_map_string_to_numeric(name, namelen, &id))
844 return 0; 847 ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
845 return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); 848 if (ret == 0) {
849 *uid = make_kuid(&init_user_ns, id);
850 if (!uid_valid(*uid))
851 ret = -ERANGE;
852 }
853 return ret;
846} 854}
847 855
848int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) 856int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
849{ 857{
850 struct idmap *idmap = server->nfs_client->cl_idmap; 858 struct idmap *idmap = server->nfs_client->cl_idmap;
859 __u32 id = -1;
860 int ret = 0;
851 861
852 if (nfs_map_string_to_numeric(name, namelen, gid)) 862 if (!nfs_map_string_to_numeric(name, namelen, &id))
853 return 0; 863 ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
854 return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); 864 if (ret == 0) {
865 *gid = make_kgid(&init_user_ns, id);
866 if (!gid_valid(*gid))
867 ret = -ERANGE;
868 }
869 return ret;
855} 870}
856 871
857int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) 872int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
858{ 873{
859 struct idmap *idmap = server->nfs_client->cl_idmap; 874 struct idmap *idmap = server->nfs_client->cl_idmap;
860 int ret = -EINVAL; 875 int ret = -EINVAL;
876 __u32 id;
861 877
878 id = from_kuid(&init_user_ns, uid);
862 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 879 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
863 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); 880 ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
864 if (ret < 0) 881 if (ret < 0)
865 ret = nfs_map_numeric_to_string(uid, buf, buflen); 882 ret = nfs_map_numeric_to_string(id, buf, buflen);
866 return ret; 883 return ret;
867} 884}
868int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) 885int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
869{ 886{
870 struct idmap *idmap = server->nfs_client->cl_idmap; 887 struct idmap *idmap = server->nfs_client->cl_idmap;
871 int ret = -EINVAL; 888 int ret = -EINVAL;
889 __u32 id;
872 890
891 id = from_kgid(&init_user_ns, gid);
873 if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) 892 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
874 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); 893 ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
875 if (ret < 0) 894 if (ret < 0)
876 ret = nfs_map_numeric_to_string(gid, buf, buflen); 895 ret = nfs_map_numeric_to_string(id, buf, buflen);
877 return ret; 896 return ret;
878} 897}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index ebeb94ce1b0b..b586fe9af475 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -332,8 +332,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
332 inode->i_version = 0; 332 inode->i_version = 0;
333 inode->i_size = 0; 333 inode->i_size = 0;
334 clear_nlink(inode); 334 clear_nlink(inode);
335 inode->i_uid = -2; 335 inode->i_uid = make_kuid(&init_user_ns, -2);
336 inode->i_gid = -2; 336 inode->i_gid = make_kgid(&init_user_ns, -2);
337 inode->i_blocks = 0; 337 inode->i_blocks = 0;
338 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 338 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
339 nfsi->write_io = 0; 339 nfsi->write_io = 0;
@@ -694,10 +694,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
694 if (ctx->cred != NULL) 694 if (ctx->cred != NULL)
695 put_rpccred(ctx->cred); 695 put_rpccred(ctx->cred);
696 dput(ctx->dentry); 696 dput(ctx->dentry);
697 if (is_sync) 697 nfs_sb_deactive(sb);
698 nfs_sb_deactive(sb);
699 else
700 nfs_sb_deactive_async(sb);
701 kfree(ctx->mdsthreshold); 698 kfree(ctx->mdsthreshold);
702 kfree(ctx); 699 kfree(ctx);
703} 700}
@@ -714,7 +711,7 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context);
714 */ 711 */
715void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) 712void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
716{ 713{
717 struct inode *inode = filp->f_path.dentry->d_inode; 714 struct inode *inode = file_inode(filp);
718 struct nfs_inode *nfsi = NFS_I(inode); 715 struct nfs_inode *nfsi = NFS_I(inode);
719 716
720 filp->private_data = get_nfs_open_context(ctx); 717 filp->private_data = get_nfs_open_context(ctx);
@@ -747,7 +744,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
747 744
748static void nfs_file_clear_open_context(struct file *filp) 745static void nfs_file_clear_open_context(struct file *filp)
749{ 746{
750 struct inode *inode = filp->f_path.dentry->d_inode; 747 struct inode *inode = file_inode(filp);
751 struct nfs_open_context *ctx = nfs_file_open_context(filp); 748 struct nfs_open_context *ctx = nfs_file_open_context(filp);
752 749
753 if (ctx) { 750 if (ctx) {
@@ -1009,9 +1006,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
1009 /* Have any file permissions changed? */ 1006 /* Have any file permissions changed? */
1010 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) 1007 if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
1011 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1008 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1012 if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid) 1009 if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
1013 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1010 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1014 if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid) 1011 if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
1015 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; 1012 invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
1016 1013
1017 /* Has the link count changed? */ 1014 /* Has the link count changed? */
@@ -1440,7 +1437,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1440 | NFS_INO_REVAL_FORCED); 1437 | NFS_INO_REVAL_FORCED);
1441 1438
1442 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1439 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1443 if (inode->i_uid != fattr->uid) { 1440 if (!uid_eq(inode->i_uid, fattr->uid)) {
1444 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1441 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1445 inode->i_uid = fattr->uid; 1442 inode->i_uid = fattr->uid;
1446 } 1443 }
@@ -1451,7 +1448,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1451 | NFS_INO_REVAL_FORCED); 1448 | NFS_INO_REVAL_FORCED);
1452 1449
1453 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1450 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1454 if (inode->i_gid != fattr->gid) { 1451 if (!gid_eq(inode->i_gid, fattr->gid)) {
1455 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1452 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1456 inode->i_gid = fattr->gid; 1453 inode->i_gid = fattr->gid;
1457 } 1454 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f0e6c7df1a07..541c9ebdbc5a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -329,7 +329,6 @@ extern int __init register_nfs_fs(void);
329extern void __exit unregister_nfs_fs(void); 329extern void __exit unregister_nfs_fs(void);
330extern void nfs_sb_active(struct super_block *sb); 330extern void nfs_sb_active(struct super_block *sb);
331extern void nfs_sb_deactive(struct super_block *sb); 331extern void nfs_sb_deactive(struct super_block *sb);
332extern void nfs_sb_deactive_async(struct super_block *sb);
333 332
334/* namespace.c */ 333/* namespace.c */
335#define NFS_PATH_CANONICAL 1 334#define NFS_PATH_CANONICAL 1
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
177 return mnt; 177 return mnt;
178} 178}
179 179
180static int
181nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
182{
183 if (NFS_FH(dentry->d_inode)->size != 0)
184 return nfs_getattr(mnt, dentry, stat);
185 generic_fillattr(dentry->d_inode, stat);
186 return 0;
187}
188
189static int
190nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
191{
192 if (NFS_FH(dentry->d_inode)->size != 0)
193 return nfs_setattr(dentry, attr);
194 return -EACCES;
195}
196
180const struct inode_operations nfs_mountpoint_inode_operations = { 197const struct inode_operations nfs_mountpoint_inode_operations = {
181 .getattr = nfs_getattr, 198 .getattr = nfs_getattr,
199 .setattr = nfs_setattr,
182}; 200};
183 201
184const struct inode_operations nfs_referral_inode_operations = { 202const struct inode_operations nfs_referral_inode_operations = {
203 .getattr = nfs_namespace_getattr,
204 .setattr = nfs_namespace_setattr,
185}; 205};
186 206
187static void nfs_expire_automounts(struct work_struct *work) 207static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 06b9df49f7f7..62db136339ea 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -290,8 +290,13 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
290 290
291 fattr->mode = be32_to_cpup(p++); 291 fattr->mode = be32_to_cpup(p++);
292 fattr->nlink = be32_to_cpup(p++); 292 fattr->nlink = be32_to_cpup(p++);
293 fattr->uid = be32_to_cpup(p++); 293 fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
294 fattr->gid = be32_to_cpup(p++); 294 if (!uid_valid(fattr->uid))
295 goto out_uid;
296 fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
297 if (!gid_valid(fattr->gid))
298 goto out_gid;
299
295 fattr->size = be32_to_cpup(p++); 300 fattr->size = be32_to_cpup(p++);
296 fattr->du.nfs2.blocksize = be32_to_cpup(p++); 301 fattr->du.nfs2.blocksize = be32_to_cpup(p++);
297 302
@@ -313,6 +318,12 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
313 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); 318 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
314 319
315 return 0; 320 return 0;
321out_uid:
322 dprintk("NFS: returned invalid uid\n");
323 return -EINVAL;
324out_gid:
325 dprintk("NFS: returned invalid gid\n");
326 return -EINVAL;
316out_overflow: 327out_overflow:
317 print_overflow_msg(__func__, xdr); 328 print_overflow_msg(__func__, xdr);
318 return -EIO; 329 return -EIO;
@@ -351,11 +362,11 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr)
351 else 362 else
352 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 363 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
353 if (attr->ia_valid & ATTR_UID) 364 if (attr->ia_valid & ATTR_UID)
354 *p++ = cpu_to_be32(attr->ia_uid); 365 *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
355 else 366 else
356 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 367 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
357 if (attr->ia_valid & ATTR_GID) 368 if (attr->ia_valid & ATTR_GID)
358 *p++ = cpu_to_be32(attr->ia_gid); 369 *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
359 else 370 else
360 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); 371 *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
361 if (attr->ia_valid & ATTR_SIZE) 372 if (attr->ia_valid & ATTR_SIZE)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 70efb63b1e42..43ea96ced28c 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -872,7 +872,7 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
872static int 872static int
873nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) 873nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
874{ 874{
875 struct inode *inode = filp->f_path.dentry->d_inode; 875 struct inode *inode = file_inode(filp);
876 876
877 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); 877 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
878} 878}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index bffc32406fbf..fa6d72131c19 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -592,13 +592,13 @@ static void encode_sattr3(struct xdr_stream *xdr, const struct iattr *attr)
592 592
593 if (attr->ia_valid & ATTR_UID) { 593 if (attr->ia_valid & ATTR_UID) {
594 *p++ = xdr_one; 594 *p++ = xdr_one;
595 *p++ = cpu_to_be32(attr->ia_uid); 595 *p++ = cpu_to_be32(from_kuid(&init_user_ns, attr->ia_uid));
596 } else 596 } else
597 *p++ = xdr_zero; 597 *p++ = xdr_zero;
598 598
599 if (attr->ia_valid & ATTR_GID) { 599 if (attr->ia_valid & ATTR_GID) {
600 *p++ = xdr_one; 600 *p++ = xdr_one;
601 *p++ = cpu_to_be32(attr->ia_gid); 601 *p++ = cpu_to_be32(from_kgid(&init_user_ns, attr->ia_gid));
602 } else 602 } else
603 *p++ = xdr_zero; 603 *p++ = xdr_zero;
604 604
@@ -657,8 +657,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
657 657
658 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode; 658 fattr->mode = (be32_to_cpup(p++) & ~S_IFMT) | fmode;
659 fattr->nlink = be32_to_cpup(p++); 659 fattr->nlink = be32_to_cpup(p++);
660 fattr->uid = be32_to_cpup(p++); 660 fattr->uid = make_kuid(&init_user_ns, be32_to_cpup(p++));
661 fattr->gid = be32_to_cpup(p++); 661 if (!uid_valid(fattr->uid))
662 goto out_uid;
663 fattr->gid = make_kgid(&init_user_ns, be32_to_cpup(p++));
664 if (!gid_valid(fattr->gid))
665 goto out_gid;
662 666
663 p = xdr_decode_size3(p, &fattr->size); 667 p = xdr_decode_size3(p, &fattr->size);
664 p = xdr_decode_size3(p, &fattr->du.nfs3.used); 668 p = xdr_decode_size3(p, &fattr->du.nfs3.used);
@@ -675,6 +679,12 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
675 679
676 fattr->valid |= NFS_ATTR_FATTR_V3; 680 fattr->valid |= NFS_ATTR_FATTR_V3;
677 return 0; 681 return 0;
682out_uid:
683 dprintk("NFS: returned invalid uid\n");
684 return -EINVAL;
685out_gid:
686 dprintk("NFS: returned invalid gid\n");
687 return -EINVAL;
678out_overflow: 688out_overflow:
679 print_overflow_msg(__func__, xdr); 689 print_overflow_msg(__func__, xdr);
680 return -EIO; 690 return -EIO;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a3f488b074a2..944c9a5c1039 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -13,6 +13,8 @@
13 13
14#define NFS4_MAX_LOOP_ON_RECOVER (10) 14#define NFS4_MAX_LOOP_ON_RECOVER (10)
15 15
16#include <linux/seqlock.h>
17
16struct idmap; 18struct idmap;
17 19
18enum nfs4_client_state { 20enum nfs4_client_state {
@@ -90,6 +92,8 @@ struct nfs4_state_owner {
90 unsigned long so_flags; 92 unsigned long so_flags;
91 struct list_head so_states; 93 struct list_head so_states;
92 struct nfs_seqid_counter so_seqid; 94 struct nfs_seqid_counter so_seqid;
95 seqcount_t so_reclaim_seqcount;
96 struct mutex so_delegreturn_mutex;
93}; 97};
94 98
95enum { 99enum {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index e5f2fad14ff8..ac4fc9a8fdbc 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -30,15 +30,14 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
30 30
31 if (clp->rpc_ops->version != 4 || minorversion != 0) 31 if (clp->rpc_ops->version != 4 || minorversion != 0)
32 return ret; 32 return ret;
33retry: 33 idr_preload(GFP_KERNEL);
34 if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
35 return -ENOMEM;
36 spin_lock(&nn->nfs_client_lock); 34 spin_lock(&nn->nfs_client_lock);
37 ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); 35 ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
36 if (ret >= 0)
37 clp->cl_cb_ident = ret;
38 spin_unlock(&nn->nfs_client_lock); 38 spin_unlock(&nn->nfs_client_lock);
39 if (ret == -EAGAIN) 39 idr_preload_end();
40 goto retry; 40 return ret < 0 ? ret : 0;
41 return ret;
42} 41}
43 42
44#ifdef CONFIG_NFS_V4_1 43#ifdef CONFIG_NFS_V4_1
@@ -237,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
237 error = nfs4_discover_server_trunking(clp, &old); 236 error = nfs4_discover_server_trunking(clp, &old);
238 if (error < 0) 237 if (error < 0)
239 goto error; 238 goto error;
239 nfs_put_client(clp);
240 if (clp != old) { 240 if (clp != old) {
241 clp->cl_preserve_clid = true; 241 clp->cl_preserve_clid = true;
242 nfs_put_client(clp);
243 clp = old; 242 clp = old;
244 atomic_inc(&clp->cl_count);
245 } 243 }
246 244
247 return clp; 245 return clp;
@@ -307,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
307 .clientid = new->cl_clientid, 305 .clientid = new->cl_clientid,
308 .confirm = new->cl_confirm, 306 .confirm = new->cl_confirm,
309 }; 307 };
310 int status; 308 int status = -NFS4ERR_STALE_CLIENTID;
311 309
312 spin_lock(&nn->nfs_client_lock); 310 spin_lock(&nn->nfs_client_lock);
313 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 311 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -333,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
333 331
334 if (prev) 332 if (prev)
335 nfs_put_client(prev); 333 nfs_put_client(prev);
334 prev = pos;
336 335
337 status = nfs4_proc_setclientid_confirm(pos, &clid, cred); 336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
338 if (status == 0) { 337 switch (status) {
338 case -NFS4ERR_STALE_CLIENTID:
339 break;
340 case 0:
339 nfs4_swap_callback_idents(pos, new); 341 nfs4_swap_callback_idents(pos, new);
340 342
341 nfs_put_client(pos); 343 prev = NULL;
342 *result = pos; 344 *result = pos;
343 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 345 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
344 __func__, pos, atomic_read(&pos->cl_count)); 346 __func__, pos, atomic_read(&pos->cl_count));
345 return 0; 347 default:
346 } 348 goto out;
347 if (status != -NFS4ERR_STALE_CLIENTID) {
348 nfs_put_client(pos);
349 dprintk("NFS: <-- %s status = %d, no result\n",
350 __func__, status);
351 return status;
352 } 349 }
353 350
354 spin_lock(&nn->nfs_client_lock); 351 spin_lock(&nn->nfs_client_lock);
355 prev = pos;
356 } 352 }
353 spin_unlock(&nn->nfs_client_lock);
357 354
358 /* 355 /* No match found. The server lost our clientid */
359 * No matching nfs_client found. This should be impossible, 356out:
360 * because the new nfs_client has already been added to
361 * nfs_client_list by nfs_get_client().
362 *
363 * Don't BUG(), since the caller is holding a mutex.
364 */
365 if (prev) 357 if (prev)
366 nfs_put_client(prev); 358 nfs_put_client(prev);
367 spin_unlock(&nn->nfs_client_lock); 359 dprintk("NFS: <-- %s status = %d\n", __func__, status);
368 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 360 return status;
369 return -NFS4ERR_STALE_CLIENTID;
370} 361}
371 362
372#ifdef CONFIG_NFS_V4_1 363#ifdef CONFIG_NFS_V4_1
@@ -433,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
433{ 424{
434 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); 425 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
435 struct nfs_client *pos, *n, *prev = NULL; 426 struct nfs_client *pos, *n, *prev = NULL;
436 int error; 427 int status = -NFS4ERR_STALE_CLIENTID;
437 428
438 spin_lock(&nn->nfs_client_lock); 429 spin_lock(&nn->nfs_client_lock);
439 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 430 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -449,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
449 nfs_put_client(prev); 440 nfs_put_client(prev);
450 prev = pos; 441 prev = pos;
451 442
452 error = nfs_wait_client_init_complete(pos); 443 nfs4_schedule_lease_recovery(pos);
453 if (error < 0) { 444 status = nfs_wait_client_init_complete(pos);
445 if (status < 0) {
454 nfs_put_client(pos); 446 nfs_put_client(pos);
455 spin_lock(&nn->nfs_client_lock); 447 spin_lock(&nn->nfs_client_lock);
456 continue; 448 continue;
457 } 449 }
458 450 status = pos->cl_cons_state;
459 spin_lock(&nn->nfs_client_lock); 451 spin_lock(&nn->nfs_client_lock);
452 if (status < 0)
453 continue;
460 } 454 }
461 455
462 if (pos->rpc_ops != new->rpc_ops) 456 if (pos->rpc_ops != new->rpc_ops)
@@ -474,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
474 if (!nfs4_match_serverowners(pos, new)) 468 if (!nfs4_match_serverowners(pos, new))
475 continue; 469 continue;
476 470
471 atomic_inc(&pos->cl_count);
477 spin_unlock(&nn->nfs_client_lock); 472 spin_unlock(&nn->nfs_client_lock);
478 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 473 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
479 __func__, pos, atomic_read(&pos->cl_count)); 474 __func__, pos, atomic_read(&pos->cl_count));
@@ -482,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
482 return 0; 477 return 0;
483 } 478 }
484 479
485 /* 480 /* No matching nfs_client found. */
486 * No matching nfs_client found. This should be impossible,
487 * because the new nfs_client has already been added to
488 * nfs_client_list by nfs_get_client().
489 *
490 * Don't BUG(), since the caller is holding a mutex.
491 */
492 spin_unlock(&nn->nfs_client_lock); 481 spin_unlock(&nn->nfs_client_lock);
493 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 482 dprintk("NFS: <-- %s status = %d\n", __func__, status);
494 return -NFS4ERR_STALE_CLIENTID; 483 return status;
495} 484}
496#endif /* CONFIG_NFS_V4_1 */ 485#endif /* CONFIG_NFS_V4_1 */
497 486
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 08ddcccb8887..13e6bb3e3fe5 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -94,7 +94,7 @@ static int
94nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) 94nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
95{ 95{
96 int ret; 96 int ret;
97 struct inode *inode = file->f_path.dentry->d_inode; 97 struct inode *inode = file_inode(file);
98 98
99 do { 99 do {
100 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 100 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5d864fb36578..eae83bf96c6d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -896,6 +896,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896 return 0; 896 return 0;
897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 897 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
898 return 0; 898 return 0;
899 if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
900 return 0;
899 nfs_mark_delegation_referenced(delegation); 901 nfs_mark_delegation_referenced(delegation);
900 return 1; 902 return 1;
901} 903}
@@ -973,6 +975,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
973 975
974 spin_lock(&deleg_cur->lock); 976 spin_lock(&deleg_cur->lock);
975 if (nfsi->delegation != deleg_cur || 977 if (nfsi->delegation != deleg_cur ||
978 test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
976 (deleg_cur->type & fmode) != fmode) 979 (deleg_cur->type & fmode) != fmode)
977 goto no_delegation_unlock; 980 goto no_delegation_unlock;
978 981
@@ -1352,19 +1355,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1352 case -NFS4ERR_BAD_HIGH_SLOT: 1355 case -NFS4ERR_BAD_HIGH_SLOT:
1353 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1356 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1354 case -NFS4ERR_DEADSESSION: 1357 case -NFS4ERR_DEADSESSION:
1358 set_bit(NFS_DELEGATED_STATE, &state->flags);
1355 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 1359 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1360 err = -EAGAIN;
1356 goto out; 1361 goto out;
1357 case -NFS4ERR_STALE_CLIENTID: 1362 case -NFS4ERR_STALE_CLIENTID:
1358 case -NFS4ERR_STALE_STATEID: 1363 case -NFS4ERR_STALE_STATEID:
1364 set_bit(NFS_DELEGATED_STATE, &state->flags);
1359 case -NFS4ERR_EXPIRED: 1365 case -NFS4ERR_EXPIRED:
1360 /* Don't recall a delegation if it was lost */ 1366 /* Don't recall a delegation if it was lost */
1361 nfs4_schedule_lease_recovery(server->nfs_client); 1367 nfs4_schedule_lease_recovery(server->nfs_client);
1368 err = -EAGAIN;
1362 goto out; 1369 goto out;
1363 case -ERESTARTSYS:
1364 /*
1365 * The show must go on: exit, but mark the
1366 * stateid as needing recovery.
1367 */
1368 case -NFS4ERR_DELEG_REVOKED: 1370 case -NFS4ERR_DELEG_REVOKED:
1369 case -NFS4ERR_ADMIN_REVOKED: 1371 case -NFS4ERR_ADMIN_REVOKED:
1370 case -NFS4ERR_BAD_STATEID: 1372 case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1377,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1375 err = 0; 1377 err = 0;
1376 goto out; 1378 goto out;
1377 } 1379 }
1380 set_bit(NFS_DELEGATED_STATE, &state->flags);
1378 err = nfs4_handle_exception(server, err, &exception); 1381 err = nfs4_handle_exception(server, err, &exception);
1379 } while (exception.retry); 1382 } while (exception.retry);
1380out: 1383out:
@@ -1463,7 +1466,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1463 struct nfs4_state_owner *sp = data->owner; 1466 struct nfs4_state_owner *sp = data->owner;
1464 1467
1465 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) 1468 if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1466 return; 1469 goto out_wait;
1467 /* 1470 /*
1468 * Check if we still need to send an OPEN call, or if we can use 1471 * Check if we still need to send an OPEN call, or if we can use
1469 * a delegation instead. 1472 * a delegation instead.
@@ -1498,6 +1501,7 @@ unlock_no_action:
1498 rcu_read_unlock(); 1501 rcu_read_unlock();
1499out_no_action: 1502out_no_action:
1500 task->tk_action = NULL; 1503 task->tk_action = NULL;
1504out_wait:
1501 nfs4_sequence_done(task, &data->o_res.seq_res); 1505 nfs4_sequence_done(task, &data->o_res.seq_res);
1502} 1506}
1503 1507
@@ -1626,7 +1630,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1626 1630
1627static int nfs4_opendata_access(struct rpc_cred *cred, 1631static int nfs4_opendata_access(struct rpc_cred *cred,
1628 struct nfs4_opendata *opendata, 1632 struct nfs4_opendata *opendata,
1629 struct nfs4_state *state, fmode_t fmode) 1633 struct nfs4_state *state, fmode_t fmode,
1634 int openflags)
1630{ 1635{
1631 struct nfs_access_entry cache; 1636 struct nfs_access_entry cache;
1632 u32 mask; 1637 u32 mask;
@@ -1638,11 +1643,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
1638 1643
1639 mask = 0; 1644 mask = 0;
1640 /* don't check MAY_WRITE - a newly created file may not have 1645 /* don't check MAY_WRITE - a newly created file may not have
1641 * write mode bits, but POSIX allows the creating process to write */ 1646 * write mode bits, but POSIX allows the creating process to write.
1642 if (fmode & FMODE_READ) 1647 * use openflags to check for exec, because fmode won't
1643 mask |= MAY_READ; 1648 * always have FMODE_EXEC set when file open for exec. */
1644 if (fmode & FMODE_EXEC) 1649 if (openflags & __FMODE_EXEC) {
1645 mask |= MAY_EXEC; 1650 /* ONLY check for exec rights */
1651 mask = MAY_EXEC;
1652 } else if (fmode & FMODE_READ)
1653 mask = MAY_READ;
1646 1654
1647 cache.cred = cred; 1655 cache.cred = cred;
1648 cache.jiffies = jiffies; 1656 cache.jiffies = jiffies;
@@ -1841,6 +1849,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1841 sattr->ia_valid |= ATTR_MTIME; 1849 sattr->ia_valid |= ATTR_MTIME;
1842} 1850}
1843 1851
1852static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
1853 fmode_t fmode,
1854 int flags,
1855 struct nfs4_state **res)
1856{
1857 struct nfs4_state_owner *sp = opendata->owner;
1858 struct nfs_server *server = sp->so_server;
1859 struct nfs4_state *state;
1860 unsigned int seq;
1861 int ret;
1862
1863 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
1864
1865 ret = _nfs4_proc_open(opendata);
1866 if (ret != 0)
1867 goto out;
1868
1869 state = nfs4_opendata_to_nfs4_state(opendata);
1870 ret = PTR_ERR(state);
1871 if (IS_ERR(state))
1872 goto out;
1873 if (server->caps & NFS_CAP_POSIX_LOCK)
1874 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1875
1876 ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
1877 if (ret != 0)
1878 goto out;
1879
1880 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
1881 nfs4_schedule_stateid_recovery(server, state);
1882 nfs4_wait_clnt_recover(server->nfs_client);
1883 }
1884 *res = state;
1885out:
1886 return ret;
1887}
1888
1844/* 1889/*
1845 * Returns a referenced nfs4_state 1890 * Returns a referenced nfs4_state
1846 */ 1891 */
@@ -1885,18 +1930,7 @@ static int _nfs4_do_open(struct inode *dir,
1885 if (dentry->d_inode != NULL) 1930 if (dentry->d_inode != NULL)
1886 opendata->state = nfs4_get_open_state(dentry->d_inode, sp); 1931 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1887 1932
1888 status = _nfs4_proc_open(opendata); 1933 status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
1889 if (status != 0)
1890 goto err_opendata_put;
1891
1892 state = nfs4_opendata_to_nfs4_state(opendata);
1893 status = PTR_ERR(state);
1894 if (IS_ERR(state))
1895 goto err_opendata_put;
1896 if (server->caps & NFS_CAP_POSIX_LOCK)
1897 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1898
1899 status = nfs4_opendata_access(cred, opendata, state, fmode);
1900 if (status != 0) 1934 if (status != 0)
1901 goto err_opendata_put; 1935 goto err_opendata_put;
1902 1936
@@ -2084,7 +2118,7 @@ static void nfs4_free_closedata(void *data)
2084 nfs4_put_open_state(calldata->state); 2118 nfs4_put_open_state(calldata->state);
2085 nfs_free_seqid(calldata->arg.seqid); 2119 nfs_free_seqid(calldata->arg.seqid);
2086 nfs4_put_state_owner(sp); 2120 nfs4_put_state_owner(sp);
2087 nfs_sb_deactive_async(sb); 2121 nfs_sb_deactive(sb);
2088 kfree(calldata); 2122 kfree(calldata);
2089} 2123}
2090 2124
@@ -2146,7 +2180,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2146 2180
2147 dprintk("%s: begin!\n", __func__); 2181 dprintk("%s: begin!\n", __func__);
2148 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 2182 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
2149 return; 2183 goto out_wait;
2150 2184
2151 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 2185 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2152 calldata->arg.fmode = FMODE_READ|FMODE_WRITE; 2186 calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2168,16 +2202,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2168 2202
2169 if (!call_close) { 2203 if (!call_close) {
2170 /* Note: exit _without_ calling nfs4_close_done */ 2204 /* Note: exit _without_ calling nfs4_close_done */
2171 task->tk_action = NULL; 2205 goto out_no_action;
2172 nfs4_sequence_done(task, &calldata->res.seq_res);
2173 goto out;
2174 } 2206 }
2175 2207
2176 if (calldata->arg.fmode == 0) { 2208 if (calldata->arg.fmode == 0) {
2177 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2209 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2178 if (calldata->roc && 2210 if (calldata->roc &&
2179 pnfs_roc_drain(inode, &calldata->roc_barrier, task)) 2211 pnfs_roc_drain(inode, &calldata->roc_barrier, task))
2180 goto out; 2212 goto out_wait;
2181 } 2213 }
2182 2214
2183 nfs_fattr_init(calldata->res.fattr); 2215 nfs_fattr_init(calldata->res.fattr);
@@ -2187,8 +2219,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2187 &calldata->res.seq_res, 2219 &calldata->res.seq_res,
2188 task) != 0) 2220 task) != 0)
2189 nfs_release_seqid(calldata->arg.seqid); 2221 nfs_release_seqid(calldata->arg.seqid);
2190out:
2191 dprintk("%s: done!\n", __func__); 2222 dprintk("%s: done!\n", __func__);
2223 return;
2224out_no_action:
2225 task->tk_action = NULL;
2226out_wait:
2227 nfs4_sequence_done(task, &calldata->res.seq_res);
2192} 2228}
2193 2229
2194static const struct rpc_call_ops nfs4_close_ops = { 2230static const struct rpc_call_ops nfs4_close_ops = {
@@ -4419,12 +4455,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4419 struct nfs4_unlockdata *calldata = data; 4455 struct nfs4_unlockdata *calldata = data;
4420 4456
4421 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 4457 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
4422 return; 4458 goto out_wait;
4423 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { 4459 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
4424 /* Note: exit _without_ running nfs4_locku_done */ 4460 /* Note: exit _without_ running nfs4_locku_done */
4425 task->tk_action = NULL; 4461 goto out_no_action;
4426 nfs4_sequence_done(task, &calldata->res.seq_res);
4427 return;
4428 } 4462 }
4429 calldata->timestamp = jiffies; 4463 calldata->timestamp = jiffies;
4430 if (nfs4_setup_sequence(calldata->server, 4464 if (nfs4_setup_sequence(calldata->server,
@@ -4432,6 +4466,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4432 &calldata->res.seq_res, 4466 &calldata->res.seq_res,
4433 task) != 0) 4467 task) != 0)
4434 nfs_release_seqid(calldata->arg.seqid); 4468 nfs_release_seqid(calldata->arg.seqid);
4469 return;
4470out_no_action:
4471 task->tk_action = NULL;
4472out_wait:
4473 nfs4_sequence_done(task, &calldata->res.seq_res);
4435} 4474}
4436 4475
4437static const struct rpc_call_ops nfs4_locku_ops = { 4476static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4478,7 +4517,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
4478 4517
4479static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) 4518static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
4480{ 4519{
4481 struct nfs_inode *nfsi = NFS_I(state->inode); 4520 struct inode *inode = state->inode;
4521 struct nfs4_state_owner *sp = state->owner;
4522 struct nfs_inode *nfsi = NFS_I(inode);
4482 struct nfs_seqid *seqid; 4523 struct nfs_seqid *seqid;
4483 struct nfs4_lock_state *lsp; 4524 struct nfs4_lock_state *lsp;
4484 struct rpc_task *task; 4525 struct rpc_task *task;
@@ -4488,12 +4529,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
4488 status = nfs4_set_lock_state(state, request); 4529 status = nfs4_set_lock_state(state, request);
4489 /* Unlock _before_ we do the RPC call */ 4530 /* Unlock _before_ we do the RPC call */
4490 request->fl_flags |= FL_EXISTS; 4531 request->fl_flags |= FL_EXISTS;
4532 /* Exclude nfs_delegation_claim_locks() */
4533 mutex_lock(&sp->so_delegreturn_mutex);
4534 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
4491 down_read(&nfsi->rwsem); 4535 down_read(&nfsi->rwsem);
4492 if (do_vfs_lock(request->fl_file, request) == -ENOENT) { 4536 if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
4493 up_read(&nfsi->rwsem); 4537 up_read(&nfsi->rwsem);
4538 mutex_unlock(&sp->so_delegreturn_mutex);
4494 goto out; 4539 goto out;
4495 } 4540 }
4496 up_read(&nfsi->rwsem); 4541 up_read(&nfsi->rwsem);
4542 mutex_unlock(&sp->so_delegreturn_mutex);
4497 if (status != 0) 4543 if (status != 0)
4498 goto out; 4544 goto out;
4499 /* Is this a delegated lock? */ 4545 /* Is this a delegated lock? */
@@ -4572,7 +4618,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4572 4618
4573 dprintk("%s: begin!\n", __func__); 4619 dprintk("%s: begin!\n", __func__);
4574 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) 4620 if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
4575 return; 4621 goto out_wait;
4576 /* Do we need to do an open_to_lock_owner? */ 4622 /* Do we need to do an open_to_lock_owner? */
4577 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { 4623 if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
4578 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { 4624 if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4592,6 +4638,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
4592 nfs_release_seqid(data->arg.open_seqid); 4638 nfs_release_seqid(data->arg.open_seqid);
4593out_release_lock_seqid: 4639out_release_lock_seqid:
4594 nfs_release_seqid(data->arg.lock_seqid); 4640 nfs_release_seqid(data->arg.lock_seqid);
4641out_wait:
4642 nfs4_sequence_done(task, &data->res.seq_res);
4595 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); 4643 dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
4596} 4644}
4597 4645
@@ -4809,8 +4857,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
4809 4857
4810static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4858static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4811{ 4859{
4860 struct nfs4_state_owner *sp = state->owner;
4812 struct nfs_inode *nfsi = NFS_I(state->inode); 4861 struct nfs_inode *nfsi = NFS_I(state->inode);
4813 unsigned char fl_flags = request->fl_flags; 4862 unsigned char fl_flags = request->fl_flags;
4863 unsigned int seq;
4814 int status = -ENOLCK; 4864 int status = -ENOLCK;
4815 4865
4816 if ((fl_flags & FL_POSIX) && 4866 if ((fl_flags & FL_POSIX) &&
@@ -4832,9 +4882,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
4832 status = do_vfs_lock(request->fl_file, request); 4882 status = do_vfs_lock(request->fl_file, request);
4833 goto out_unlock; 4883 goto out_unlock;
4834 } 4884 }
4885 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
4886 up_read(&nfsi->rwsem);
4835 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); 4887 status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
4836 if (status != 0) 4888 if (status != 0)
4889 goto out;
4890 down_read(&nfsi->rwsem);
4891 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
4892 status = -NFS4ERR_DELAY;
4837 goto out_unlock; 4893 goto out_unlock;
4894 }
4838 /* Note: we always want to sleep here! */ 4895 /* Note: we always want to sleep here! */
4839 request->fl_flags = fl_flags | FL_SLEEP; 4896 request->fl_flags = fl_flags | FL_SLEEP;
4840 if (do_vfs_lock(request->fl_file, request) < 0) 4897 if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4941,24 +4998,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4941 case 0: 4998 case 0:
4942 case -ESTALE: 4999 case -ESTALE:
4943 goto out; 5000 goto out;
4944 case -NFS4ERR_EXPIRED:
4945 nfs4_schedule_stateid_recovery(server, state);
4946 case -NFS4ERR_STALE_CLIENTID: 5001 case -NFS4ERR_STALE_CLIENTID:
4947 case -NFS4ERR_STALE_STATEID: 5002 case -NFS4ERR_STALE_STATEID:
5003 set_bit(NFS_DELEGATED_STATE, &state->flags);
5004 case -NFS4ERR_EXPIRED:
4948 nfs4_schedule_lease_recovery(server->nfs_client); 5005 nfs4_schedule_lease_recovery(server->nfs_client);
5006 err = -EAGAIN;
4949 goto out; 5007 goto out;
4950 case -NFS4ERR_BADSESSION: 5008 case -NFS4ERR_BADSESSION:
4951 case -NFS4ERR_BADSLOT: 5009 case -NFS4ERR_BADSLOT:
4952 case -NFS4ERR_BAD_HIGH_SLOT: 5010 case -NFS4ERR_BAD_HIGH_SLOT:
4953 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 5011 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4954 case -NFS4ERR_DEADSESSION: 5012 case -NFS4ERR_DEADSESSION:
5013 set_bit(NFS_DELEGATED_STATE, &state->flags);
4955 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 5014 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
5015 err = -EAGAIN;
4956 goto out; 5016 goto out;
4957 case -ERESTARTSYS:
4958 /*
4959 * The show must go on: exit, but mark the
4960 * stateid as needing recovery.
4961 */
4962 case -NFS4ERR_DELEG_REVOKED: 5017 case -NFS4ERR_DELEG_REVOKED:
4963 case -NFS4ERR_ADMIN_REVOKED: 5018 case -NFS4ERR_ADMIN_REVOKED:
4964 case -NFS4ERR_BAD_STATEID: 5019 case -NFS4ERR_BAD_STATEID:
@@ -4971,9 +5026,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4971 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 5026 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4972 err = 0; 5027 err = 0;
4973 goto out; 5028 goto out;
4974 case -NFS4ERR_DELAY:
4975 break;
4976 } 5029 }
5030 set_bit(NFS_DELEGATED_STATE, &state->flags);
4977 err = nfs4_handle_exception(server, err, &exception); 5031 err = nfs4_handle_exception(server, err, &exception);
4978 } while (exception.retry); 5032 } while (exception.retry);
4979out: 5033out:
@@ -6130,7 +6184,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6130 status = nfs4_wait_for_completion_rpc_task(task); 6184 status = nfs4_wait_for_completion_rpc_task(task);
6131 if (status == 0) 6185 if (status == 0)
6132 status = task->tk_status; 6186 status = task->tk_status;
6133 if (status == 0) 6187 /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
6188 if (status == 0 && lgp->res.layoutp->len)
6134 lseg = pnfs_layout_process(lgp); 6189 lseg = pnfs_layout_process(lgp);
6135 rpc_put_task(task); 6190 rpc_put_task(task);
6136 dprintk("<-- %s status=%d\n", __func__, status); 6191 dprintk("<-- %s status=%d\n", __func__, status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..6ace365c6334 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
136 clp->cl_confirm = clid.confirm; 136 clp->cl_confirm = clid.confirm;
137 137
138 status = nfs40_walk_client_list(clp, result, cred); 138 status = nfs40_walk_client_list(clp, result, cred);
139 switch (status) { 139 if (status == 0) {
140 case -NFS4ERR_STALE_CLIENTID:
141 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
142 case 0:
143 /* Sustain the lease, even if it's empty. If the clientid4 140 /* Sustain the lease, even if it's empty. If the clientid4
144 * goes stale it's of no use for trunking discovery. */ 141 * goes stale it's of no use for trunking discovery. */
145 nfs4_schedule_state_renewal(*result); 142 nfs4_schedule_state_renewal(*result);
146 break;
147 } 143 }
148
149out: 144out:
150 return status; 145 return status;
151} 146}
@@ -523,6 +518,8 @@ nfs4_alloc_state_owner(struct nfs_server *server,
523 nfs4_init_seqid_counter(&sp->so_seqid); 518 nfs4_init_seqid_counter(&sp->so_seqid);
524 atomic_set(&sp->so_count, 1); 519 atomic_set(&sp->so_count, 1);
525 INIT_LIST_HEAD(&sp->so_lru); 520 INIT_LIST_HEAD(&sp->so_lru);
521 seqcount_init(&sp->so_reclaim_seqcount);
522 mutex_init(&sp->so_delegreturn_mutex);
526 return sp; 523 return sp;
527} 524}
528 525
@@ -1395,8 +1392,9 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
1395 * recovering after a network partition or a reboot from a 1392 * recovering after a network partition or a reboot from a
1396 * server that doesn't support a grace period. 1393 * server that doesn't support a grace period.
1397 */ 1394 */
1398restart:
1399 spin_lock(&sp->so_lock); 1395 spin_lock(&sp->so_lock);
1396 write_seqcount_begin(&sp->so_reclaim_seqcount);
1397restart:
1400 list_for_each_entry(state, &sp->so_states, open_states) { 1398 list_for_each_entry(state, &sp->so_states, open_states) {
1401 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) 1399 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1402 continue; 1400 continue;
@@ -1417,6 +1415,7 @@ restart:
1417 } 1415 }
1418 spin_unlock(&state->state_lock); 1416 spin_unlock(&state->state_lock);
1419 nfs4_put_open_state(state); 1417 nfs4_put_open_state(state);
1418 spin_lock(&sp->so_lock);
1420 goto restart; 1419 goto restart;
1421 } 1420 }
1422 } 1421 }
@@ -1454,12 +1453,17 @@ restart:
1454 goto out_err; 1453 goto out_err;
1455 } 1454 }
1456 nfs4_put_open_state(state); 1455 nfs4_put_open_state(state);
1456 spin_lock(&sp->so_lock);
1457 goto restart; 1457 goto restart;
1458 } 1458 }
1459 write_seqcount_end(&sp->so_reclaim_seqcount);
1459 spin_unlock(&sp->so_lock); 1460 spin_unlock(&sp->so_lock);
1460 return 0; 1461 return 0;
1461out_err: 1462out_err:
1462 nfs4_put_open_state(state); 1463 nfs4_put_open_state(state);
1464 spin_lock(&sp->so_lock);
1465 write_seqcount_end(&sp->so_reclaim_seqcount);
1466 spin_unlock(&sp->so_lock);
1463 return status; 1467 return status;
1464} 1468}
1465 1469
@@ -1863,6 +1867,7 @@ again:
1863 case -ETIMEDOUT: 1867 case -ETIMEDOUT:
1864 case -EAGAIN: 1868 case -EAGAIN:
1865 ssleep(1); 1869 ssleep(1);
1870 case -NFS4ERR_STALE_CLIENTID:
1866 dprintk("NFS: %s after status %d, retrying\n", 1871 dprintk("NFS: %s after status %d, retrying\n",
1867 __func__, status); 1872 __func__, status);
1868 goto again; 1873 goto again;
@@ -2022,8 +2027,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
2022 nfs4_begin_drain_session(clp); 2027 nfs4_begin_drain_session(clp);
2023 cred = nfs4_get_exchange_id_cred(clp); 2028 cred = nfs4_get_exchange_id_cred(clp);
2024 status = nfs4_proc_destroy_session(clp->cl_session, cred); 2029 status = nfs4_proc_destroy_session(clp->cl_session, cred);
2025 if (status && status != -NFS4ERR_BADSESSION && 2030 switch (status) {
2026 status != -NFS4ERR_DEADSESSION) { 2031 case 0:
2032 case -NFS4ERR_BADSESSION:
2033 case -NFS4ERR_DEADSESSION:
2034 break;
2035 case -NFS4ERR_BACK_CHAN_BUSY:
2036 case -NFS4ERR_DELAY:
2037 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2038 status = 0;
2039 ssleep(1);
2040 goto out;
2041 default:
2027 status = nfs4_recovery_handle_error(clp, status); 2042 status = nfs4_recovery_handle_error(clp, status);
2028 goto out; 2043 goto out;
2029 } 2044 }
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 84d2e9e2f313..569b166cc050 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -28,7 +28,7 @@ static struct file_system_type nfs4_remote_fs_type = {
28 .name = "nfs4", 28 .name = "nfs4",
29 .mount = nfs4_remote_mount, 29 .mount = nfs4_remote_mount,
30 .kill_sb = nfs_kill_super, 30 .kill_sb = nfs_kill_super,
31 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 31 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
32}; 32};
33 33
34static struct file_system_type nfs4_remote_referral_fs_type = { 34static struct file_system_type nfs4_remote_referral_fs_type = {
@@ -36,7 +36,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
36 .name = "nfs4", 36 .name = "nfs4",
37 .mount = nfs4_remote_referral_mount, 37 .mount = nfs4_remote_referral_mount,
38 .kill_sb = nfs_kill_super, 38 .kill_sb = nfs_kill_super,
39 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 39 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
40}; 40};
41 41
42struct file_system_type nfs4_referral_fs_type = { 42struct file_system_type nfs4_referral_fs_type = {
@@ -44,7 +44,7 @@ struct file_system_type nfs4_referral_fs_type = {
44 .name = "nfs4", 44 .name = "nfs4",
45 .mount = nfs4_referral_mount, 45 .mount = nfs4_referral_mount,
46 .kill_sb = nfs_kill_super, 46 .kill_sb = nfs_kill_super,
47 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 47 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
48}; 48};
49 49
50static const struct super_operations nfs4_sops = { 50static const struct super_operations nfs4_sops = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 26b143920433..e3edda554ac7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1002,7 +1002,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1002 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); 1002 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
1003 if (owner_namelen < 0) { 1003 if (owner_namelen < 0) {
1004 dprintk("nfs: couldn't resolve uid %d to string\n", 1004 dprintk("nfs: couldn't resolve uid %d to string\n",
1005 iap->ia_uid); 1005 from_kuid(&init_user_ns, iap->ia_uid));
1006 /* XXX */ 1006 /* XXX */
1007 strcpy(owner_name, "nobody"); 1007 strcpy(owner_name, "nobody");
1008 owner_namelen = sizeof("nobody") - 1; 1008 owner_namelen = sizeof("nobody") - 1;
@@ -1014,7 +1014,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
1014 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ); 1014 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
1015 if (owner_grouplen < 0) { 1015 if (owner_grouplen < 0) {
1016 dprintk("nfs: couldn't resolve gid %d to string\n", 1016 dprintk("nfs: couldn't resolve gid %d to string\n",
1017 iap->ia_gid); 1017 from_kgid(&init_user_ns, iap->ia_gid));
1018 strcpy(owner_group, "nobody"); 1018 strcpy(owner_group, "nobody");
1019 owner_grouplen = sizeof("nobody") - 1; 1019 owner_grouplen = sizeof("nobody") - 1;
1020 /* goto out; */ 1020 /* goto out; */
@@ -3778,14 +3778,14 @@ out_overflow:
3778} 3778}
3779 3779
3780static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3780static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3781 const struct nfs_server *server, uint32_t *uid, 3781 const struct nfs_server *server, kuid_t *uid,
3782 struct nfs4_string *owner_name) 3782 struct nfs4_string *owner_name)
3783{ 3783{
3784 uint32_t len; 3784 uint32_t len;
3785 __be32 *p; 3785 __be32 *p;
3786 int ret = 0; 3786 int ret = 0;
3787 3787
3788 *uid = -2; 3788 *uid = make_kuid(&init_user_ns, -2);
3789 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 3789 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
3790 return -EIO; 3790 return -EIO;
3791 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { 3791 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
@@ -3813,7 +3813,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3813 __func__, len); 3813 __func__, len);
3814 bitmap[1] &= ~FATTR4_WORD1_OWNER; 3814 bitmap[1] &= ~FATTR4_WORD1_OWNER;
3815 } 3815 }
3816 dprintk("%s: uid=%d\n", __func__, (int)*uid); 3816 dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
3817 return ret; 3817 return ret;
3818out_overflow: 3818out_overflow:
3819 print_overflow_msg(__func__, xdr); 3819 print_overflow_msg(__func__, xdr);
@@ -3821,14 +3821,14 @@ out_overflow:
3821} 3821}
3822 3822
3823static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3823static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3824 const struct nfs_server *server, uint32_t *gid, 3824 const struct nfs_server *server, kgid_t *gid,
3825 struct nfs4_string *group_name) 3825 struct nfs4_string *group_name)
3826{ 3826{
3827 uint32_t len; 3827 uint32_t len;
3828 __be32 *p; 3828 __be32 *p;
3829 int ret = 0; 3829 int ret = 0;
3830 3830
3831 *gid = -2; 3831 *gid = make_kgid(&init_user_ns, -2);
3832 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 3832 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
3833 return -EIO; 3833 return -EIO;
3834 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { 3834 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
@@ -3856,7 +3856,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3856 __func__, len); 3856 __func__, len);
3857 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; 3857 bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
3858 } 3858 }
3859 dprintk("%s: gid=%d\n", __func__, (int)*gid); 3859 dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
3860 return ret; 3860 return ret;
3861out_overflow: 3861out_overflow:
3862 print_overflow_msg(__func__, xdr); 3862 print_overflow_msg(__func__, xdr);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6f990656f89..88f9611a945c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -647,6 +647,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
647 .flags = PNFS_LAYOUTRET_ON_SETATTR | 647 .flags = PNFS_LAYOUTRET_ON_SETATTR |
648 PNFS_LAYOUTRET_ON_ERROR, 648 PNFS_LAYOUTRET_ON_ERROR,
649 649
650 .owner = THIS_MODULE,
650 .alloc_layout_hdr = objlayout_alloc_layout_hdr, 651 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
651 .free_layout_hdr = objlayout_free_layout_hdr, 652 .free_layout_hdr = objlayout_free_layout_hdr,
652 653
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e7165d915362..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -254,7 +254,7 @@ static void
254pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 254pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
255{ 255{
256 lo->plh_retry_timestamp = jiffies; 256 lo->plh_retry_timestamp = jiffies;
257 if (test_and_set_bit(fail_bit, &lo->plh_flags)) 257 if (!test_and_set_bit(fail_bit, &lo->plh_flags))
258 atomic_inc(&lo->plh_refcount); 258 atomic_inc(&lo->plh_refcount);
259} 259}
260 260
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505} 505}
506EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 506EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507 507
508/* 508static bool
509 * Called by the state manger to remove all layouts established under an 509pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510 * expired lease. 510 struct list_head *layout_list)
511 */
512void
513pnfs_destroy_all_layouts(struct nfs_client *clp)
514{ 511{
515 struct nfs_server *server;
516 struct pnfs_layout_hdr *lo; 512 struct pnfs_layout_hdr *lo;
517 LIST_HEAD(tmp_list); 513 bool ret = false;
518 514
519 nfs4_deviceid_mark_client_invalid(clp); 515 spin_lock(&inode->i_lock);
520 nfs4_deviceid_purge_client(clp); 516 lo = NFS_I(inode)->layout;
517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518 pnfs_get_layout_hdr(lo);
519 list_add(&lo->plh_bulk_destroy, layout_list);
520 ret = true;
521 }
522 spin_unlock(&inode->i_lock);
523 return ret;
524}
525
526/* Caller must hold rcu_read_lock and clp->cl_lock */
527static int
528pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529 struct nfs_server *server,
530 struct list_head *layout_list)
531{
532 struct pnfs_layout_hdr *lo, *next;
533 struct inode *inode;
534
535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536 inode = igrab(lo->plh_inode);
537 if (inode == NULL)
538 continue;
539 list_del_init(&lo->plh_layouts);
540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541 continue;
542 rcu_read_unlock();
543 spin_unlock(&clp->cl_lock);
544 iput(inode);
545 spin_lock(&clp->cl_lock);
546 rcu_read_lock();
547 return -EAGAIN;
548 }
549 return 0;
550}
551
552static int
553pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554 bool is_bulk_recall)
555{
556 struct pnfs_layout_hdr *lo;
557 struct inode *inode;
558 struct pnfs_layout_range range = {
559 .iomode = IOMODE_ANY,
560 .offset = 0,
561 .length = NFS4_MAX_UINT64,
562 };
563 LIST_HEAD(lseg_list);
564 int ret = 0;
565
566 while (!list_empty(layout_list)) {
567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568 plh_bulk_destroy);
569 dprintk("%s freeing layout for inode %lu\n", __func__,
570 lo->plh_inode->i_ino);
571 inode = lo->plh_inode;
572 spin_lock(&inode->i_lock);
573 list_del_init(&lo->plh_bulk_destroy);
574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575 if (is_bulk_recall)
576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578 ret = -EAGAIN;
579 spin_unlock(&inode->i_lock);
580 pnfs_free_lseg_list(&lseg_list);
581 pnfs_put_layout_hdr(lo);
582 iput(inode);
583 }
584 return ret;
585}
586
587int
588pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589 struct nfs_fsid *fsid,
590 bool is_recall)
591{
592 struct nfs_server *server;
593 LIST_HEAD(layout_list);
521 594
522 spin_lock(&clp->cl_lock); 595 spin_lock(&clp->cl_lock);
523 rcu_read_lock(); 596 rcu_read_lock();
597restart:
524 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525 if (!list_empty(&server->layouts)) 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
526 list_splice_init(&server->layouts, &tmp_list); 600 continue;
601 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602 server,
603 &layout_list) != 0)
604 goto restart;
527 } 605 }
528 rcu_read_unlock(); 606 rcu_read_unlock();
529 spin_unlock(&clp->cl_lock); 607 spin_unlock(&clp->cl_lock);
530 608
531 while (!list_empty(&tmp_list)) { 609 if (list_empty(&layout_list))
532 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 610 return 0;
533 plh_layouts); 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
534 dprintk("%s freeing layout for inode %lu\n", __func__, 612}
535 lo->plh_inode->i_ino); 613
536 list_del_init(&lo->plh_layouts); 614int
537 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 615pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616 bool is_recall)
617{
618 struct nfs_server *server;
619 LIST_HEAD(layout_list);
620
621 spin_lock(&clp->cl_lock);
622 rcu_read_lock();
623restart:
624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626 server,
627 &layout_list) != 0)
628 goto restart;
538 } 629 }
630 rcu_read_unlock();
631 spin_unlock(&clp->cl_lock);
632
633 if (list_empty(&layout_list))
634 return 0;
635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636}
637
638/*
639 * Called by the state manger to remove all layouts established under an
640 * expired lease.
641 */
642void
643pnfs_destroy_all_layouts(struct nfs_client *clp)
644{
645 nfs4_deviceid_mark_client_invalid(clp);
646 nfs4_deviceid_purge_client(clp);
647
648 pnfs_destroy_layouts_byclid(clp, false);
539} 649}
540 650
541/* 651/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888 atomic_set(&lo->plh_refcount, 1); 998 atomic_set(&lo->plh_refcount, 1);
889 INIT_LIST_HEAD(&lo->plh_layouts); 999 INIT_LIST_HEAD(&lo->plh_layouts);
890 INIT_LIST_HEAD(&lo->plh_segs); 1000 INIT_LIST_HEAD(&lo->plh_segs);
891 INIT_LIST_HEAD(&lo->plh_bulk_recall); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy);
892 lo->plh_inode = ino; 1002 lo->plh_inode = ino;
893 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
894 return lo; 1004 return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132struct pnfs_layout_hdr { 132struct pnfs_layout_hdr {
133 atomic_t plh_refcount; 133 atomic_t plh_refcount;
134 struct list_head plh_layouts; /* other client layouts */ 134 struct list_head plh_layouts; /* other client layouts */
135 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ 135 struct list_head plh_bulk_destroy;
136 struct list_head plh_segs; /* layout segments list */ 136 struct list_head plh_segs; /* layout segments list */
137 nfs4_stateid plh_stateid; 137 nfs4_stateid plh_stateid;
138 atomic_t plh_outstanding; /* number of RPCs out */ 138 atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 196void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 197void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 198void pnfs_destroy_all_layouts(struct nfs_client *);
199int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200 struct nfs_fsid *fsid,
201 bool is_recall);
202int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203 bool is_recall);
199void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); 204void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 205void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 206 const nfs4_stateid *new,
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index d35b62e83ea6..6da209bd9408 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -77,9 +77,8 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
77 long hash) 77 long hash)
78{ 78{
79 struct nfs4_deviceid_node *d; 79 struct nfs4_deviceid_node *d;
80 struct hlist_node *n;
81 80
82 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) 81 hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
83 if (d->ld == ld && d->nfs_client == clp && 82 if (d->ld == ld && d->nfs_client == clp &&
84 !memcmp(&d->deviceid, id, sizeof(*id))) { 83 !memcmp(&d->deviceid, id, sizeof(*id))) {
85 if (atomic_read(&d->ref)) 84 if (atomic_read(&d->ref))
@@ -248,12 +247,11 @@ static void
248_deviceid_purge_client(const struct nfs_client *clp, long hash) 247_deviceid_purge_client(const struct nfs_client *clp, long hash)
249{ 248{
250 struct nfs4_deviceid_node *d; 249 struct nfs4_deviceid_node *d;
251 struct hlist_node *n;
252 HLIST_HEAD(tmp); 250 HLIST_HEAD(tmp);
253 251
254 spin_lock(&nfs4_deviceid_lock); 252 spin_lock(&nfs4_deviceid_lock);
255 rcu_read_lock(); 253 rcu_read_lock();
256 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) 254 hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[hash], node)
257 if (d->nfs_client == clp && atomic_read(&d->ref)) { 255 if (d->nfs_client == clp && atomic_read(&d->ref)) {
258 hlist_del_init_rcu(&d->node); 256 hlist_del_init_rcu(&d->node);
259 hlist_add_head(&d->tmpnode, &tmp); 257 hlist_add_head(&d->tmpnode, &tmp);
@@ -291,12 +289,11 @@ void
291nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) 289nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
292{ 290{
293 struct nfs4_deviceid_node *d; 291 struct nfs4_deviceid_node *d;
294 struct hlist_node *n;
295 int i; 292 int i;
296 293
297 rcu_read_lock(); 294 rcu_read_lock();
298 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ 295 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
299 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) 296 hlist_for_each_entry_rcu(d, &nfs4_deviceid_cache[i], node)
300 if (d->nfs_client == clp) 297 if (d->nfs_client == clp)
301 set_bit(NFS_DEVICEID_INVALID, &d->flags); 298 set_bit(NFS_DEVICEID_INVALID, &d->flags);
302 } 299 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f084dac948e1..fc8de9016acf 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -662,7 +662,7 @@ nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
662static int 662static int
663nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) 663nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
664{ 664{
665 struct inode *inode = filp->f_path.dentry->d_inode; 665 struct inode *inode = file_inode(filp);
666 666
667 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); 667 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
668} 668}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b6bdb18e892c..a5e5d9899d56 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -91,12 +91,16 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
91 put_nfs_open_context(rdata->args.context); 91 put_nfs_open_context(rdata->args.context);
92 if (rdata->pages.pagevec != rdata->pages.page_array) 92 if (rdata->pages.pagevec != rdata->pages.page_array)
93 kfree(rdata->pages.pagevec); 93 kfree(rdata->pages.pagevec);
94 if (rdata != &read_header->rpc_data) 94 if (rdata == &read_header->rpc_data) {
95 kfree(rdata);
96 else
97 rdata->header = NULL; 95 rdata->header = NULL;
96 rdata = NULL;
97 }
98 if (atomic_dec_and_test(&hdr->refcnt)) 98 if (atomic_dec_and_test(&hdr->refcnt))
99 hdr->completion_ops->completion(hdr); 99 hdr->completion_ops->completion(hdr);
100 /* Note: we only free the rpc_task after callbacks are done.
101 * See the comment in rpc_free_task() for why
102 */
103 kfree(rdata);
100} 104}
101EXPORT_SYMBOL_GPL(nfs_readdata_release); 105EXPORT_SYMBOL_GPL(nfs_readdata_release);
102 106
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3250b41eb562..17b32b722457 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -55,7 +55,6 @@
55#include <linux/parser.h> 55#include <linux/parser.h>
56#include <linux/nsproxy.h> 56#include <linux/nsproxy.h>
57#include <linux/rcupdate.h> 57#include <linux/rcupdate.h>
58#include <linux/kthread.h>
59 58
60#include <asm/uaccess.h> 59#include <asm/uaccess.h>
61 60
@@ -293,7 +292,7 @@ struct file_system_type nfs_fs_type = {
293 .name = "nfs", 292 .name = "nfs",
294 .mount = nfs_fs_mount, 293 .mount = nfs_fs_mount,
295 .kill_sb = nfs_kill_super, 294 .kill_sb = nfs_kill_super,
296 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 295 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
297}; 296};
298EXPORT_SYMBOL_GPL(nfs_fs_type); 297EXPORT_SYMBOL_GPL(nfs_fs_type);
299 298
@@ -302,7 +301,7 @@ struct file_system_type nfs_xdev_fs_type = {
302 .name = "nfs", 301 .name = "nfs",
303 .mount = nfs_xdev_mount, 302 .mount = nfs_xdev_mount,
304 .kill_sb = nfs_kill_super, 303 .kill_sb = nfs_kill_super,
305 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 304 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
306}; 305};
307 306
308const struct super_operations nfs_sops = { 307const struct super_operations nfs_sops = {
@@ -332,7 +331,7 @@ struct file_system_type nfs4_fs_type = {
332 .name = "nfs4", 331 .name = "nfs4",
333 .mount = nfs_fs_mount, 332 .mount = nfs_fs_mount,
334 .kill_sb = nfs_kill_super, 333 .kill_sb = nfs_kill_super,
335 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 334 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
336}; 335};
337EXPORT_SYMBOL_GPL(nfs4_fs_type); 336EXPORT_SYMBOL_GPL(nfs4_fs_type);
338 337
@@ -419,54 +418,6 @@ void nfs_sb_deactive(struct super_block *sb)
419} 418}
420EXPORT_SYMBOL_GPL(nfs_sb_deactive); 419EXPORT_SYMBOL_GPL(nfs_sb_deactive);
421 420
422static int nfs_deactivate_super_async_work(void *ptr)
423{
424 struct super_block *sb = ptr;
425
426 deactivate_super(sb);
427 module_put_and_exit(0);
428 return 0;
429}
430
431/*
432 * same effect as deactivate_super, but will do final unmount in kthread
433 * context
434 */
435static void nfs_deactivate_super_async(struct super_block *sb)
436{
437 struct task_struct *task;
438 char buf[INET6_ADDRSTRLEN + 1];
439 struct nfs_server *server = NFS_SB(sb);
440 struct nfs_client *clp = server->nfs_client;
441
442 if (!atomic_add_unless(&sb->s_active, -1, 1)) {
443 rcu_read_lock();
444 snprintf(buf, sizeof(buf),
445 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
446 rcu_read_unlock();
447
448 __module_get(THIS_MODULE);
449 task = kthread_run(nfs_deactivate_super_async_work, sb,
450 "%s-deactivate-super", buf);
451 if (IS_ERR(task)) {
452 pr_err("%s: kthread_run: %ld\n",
453 __func__, PTR_ERR(task));
454 /* make synchronous call and hope for the best */
455 deactivate_super(sb);
456 module_put(THIS_MODULE);
457 }
458 }
459}
460
461void nfs_sb_deactive_async(struct super_block *sb)
462{
463 struct nfs_server *server = NFS_SB(sb);
464
465 if (atomic_dec_and_test(&server->active))
466 nfs_deactivate_super_async(sb);
467}
468EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
469
470/* 421/*
471 * Deliver file system statistics to userspace 422 * Deliver file system statistics to userspace
472 */ 423 */
@@ -1153,7 +1104,7 @@ static int nfs_get_option_str(substring_t args[], char **option)
1153{ 1104{
1154 kfree(*option); 1105 kfree(*option);
1155 *option = match_strdup(args); 1106 *option = match_strdup(args);
1156 return !option; 1107 return !*option;
1157} 1108}
1158 1109
1159static int nfs_get_option_ul(substring_t args[], unsigned long *option) 1110static int nfs_get_option_ul(substring_t args[], unsigned long *option)
@@ -2590,27 +2541,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2590 struct nfs_server *server; 2541 struct nfs_server *server;
2591 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2542 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2592 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; 2543 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
2593 int error;
2594 2544
2595 dprintk("--> nfs_xdev_mount_common()\n"); 2545 dprintk("--> nfs_xdev_mount()\n");
2596 2546
2597 mount_info.mntfh = mount_info.cloned->fh; 2547 mount_info.mntfh = mount_info.cloned->fh;
2598 2548
2599 /* create a new volume representation */ 2549 /* create a new volume representation */
2600 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2550 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2601 if (IS_ERR(server)) {
2602 error = PTR_ERR(server);
2603 goto out_err;
2604 }
2605 2551
2606 mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); 2552 if (IS_ERR(server))
2607 dprintk("<-- nfs_xdev_mount_common() = 0\n"); 2553 mntroot = ERR_CAST(server);
2608out: 2554 else
2609 return mntroot; 2555 mntroot = nfs_fs_mount_common(server, flags,
2556 dev_name, &mount_info, nfs_mod);
2610 2557
2611out_err: 2558 dprintk("<-- nfs_xdev_mount() = %ld\n",
2612 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); 2559 IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
2613 goto out; 2560 return mntroot;
2614} 2561}
2615 2562
2616#if IS_ENABLED(CONFIG_NFS_V4) 2563#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3f79c77153b8..d26a32f5b53b 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
95 95
96 nfs_dec_sillycount(data->dir); 96 nfs_dec_sillycount(data->dir);
97 nfs_free_unlinkdata(data); 97 nfs_free_unlinkdata(data);
98 nfs_sb_deactive_async(sb); 98 nfs_sb_deactive(sb);
99} 99}
100 100
101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) 101static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
@@ -268,8 +268,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
268 * point dentry is definitely not a root, so we won't need 268 * point dentry is definitely not a root, so we won't need
269 * that anymore. 269 * that anymore.
270 */ 270 */
271 if (devname_garbage) 271 kfree(devname_garbage);
272 kfree(devname_garbage);
273 return 0; 272 return 0;
274out_unlock: 273out_unlock:
275 spin_unlock(&dentry->d_lock); 274 spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b673be31590e..c483cc50b82e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -126,12 +126,16 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
126 put_nfs_open_context(wdata->args.context); 126 put_nfs_open_context(wdata->args.context);
127 if (wdata->pages.pagevec != wdata->pages.page_array) 127 if (wdata->pages.pagevec != wdata->pages.page_array)
128 kfree(wdata->pages.pagevec); 128 kfree(wdata->pages.pagevec);
129 if (wdata != &write_header->rpc_data) 129 if (wdata == &write_header->rpc_data) {
130 kfree(wdata);
131 else
132 wdata->header = NULL; 130 wdata->header = NULL;
131 wdata = NULL;
132 }
133 if (atomic_dec_and_test(&hdr->refcnt)) 133 if (atomic_dec_and_test(&hdr->refcnt))
134 hdr->completion_ops->completion(hdr); 134 hdr->completion_ops->completion(hdr);
135 /* Note: we only free the rpc_task after callbacks are done.
136 * See the comment in rpc_free_task() for why
137 */
138 kfree(wdata);
135} 139}
136EXPORT_SYMBOL_GPL(nfs_writedata_release); 140EXPORT_SYMBOL_GPL(nfs_writedata_release);
137 141
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 6940439bd609..ed628f71274c 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -38,8 +38,8 @@ struct nfsacl_encode_desc {
38 unsigned int count; 38 unsigned int count;
39 struct posix_acl *acl; 39 struct posix_acl *acl;
40 int typeflag; 40 int typeflag;
41 uid_t uid; 41 kuid_t uid;
42 gid_t gid; 42 kgid_t gid;
43}; 43};
44 44
45struct nfsacl_simple_acl { 45struct nfsacl_simple_acl {
@@ -60,14 +60,16 @@ xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem)
60 *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); 60 *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag);
61 switch(entry->e_tag) { 61 switch(entry->e_tag) {
62 case ACL_USER_OBJ: 62 case ACL_USER_OBJ:
63 *p++ = htonl(nfsacl_desc->uid); 63 *p++ = htonl(from_kuid(&init_user_ns, nfsacl_desc->uid));
64 break; 64 break;
65 case ACL_GROUP_OBJ: 65 case ACL_GROUP_OBJ:
66 *p++ = htonl(nfsacl_desc->gid); 66 *p++ = htonl(from_kgid(&init_user_ns, nfsacl_desc->gid));
67 break; 67 break;
68 case ACL_USER: 68 case ACL_USER:
69 *p++ = htonl(from_kuid(&init_user_ns, entry->e_uid));
70 break;
69 case ACL_GROUP: 71 case ACL_GROUP:
70 *p++ = htonl(entry->e_id); 72 *p++ = htonl(from_kgid(&init_user_ns, entry->e_gid));
71 break; 73 break;
72 default: /* Solaris depends on that! */ 74 default: /* Solaris depends on that! */
73 *p++ = 0; 75 *p++ = 0;
@@ -148,6 +150,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
148 (struct nfsacl_decode_desc *) desc; 150 (struct nfsacl_decode_desc *) desc;
149 __be32 *p = elem; 151 __be32 *p = elem;
150 struct posix_acl_entry *entry; 152 struct posix_acl_entry *entry;
153 unsigned int id;
151 154
152 if (!nfsacl_desc->acl) { 155 if (!nfsacl_desc->acl) {
153 if (desc->array_len > NFS_ACL_MAX_ENTRIES) 156 if (desc->array_len > NFS_ACL_MAX_ENTRIES)
@@ -160,14 +163,22 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
160 163
161 entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; 164 entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++];
162 entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT; 165 entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT;
163 entry->e_id = ntohl(*p++); 166 id = ntohl(*p++);
164 entry->e_perm = ntohl(*p++); 167 entry->e_perm = ntohl(*p++);
165 168
166 switch(entry->e_tag) { 169 switch(entry->e_tag) {
167 case ACL_USER_OBJ:
168 case ACL_USER: 170 case ACL_USER:
169 case ACL_GROUP_OBJ: 171 entry->e_uid = make_kuid(&init_user_ns, id);
172 if (!uid_valid(entry->e_uid))
173 return -EINVAL;
174 break;
170 case ACL_GROUP: 175 case ACL_GROUP:
176 entry->e_gid = make_kgid(&init_user_ns, id);
177 if (!gid_valid(entry->e_gid))
178 return -EINVAL;
179 break;
180 case ACL_USER_OBJ:
181 case ACL_GROUP_OBJ:
171 case ACL_OTHER: 182 case ACL_OTHER:
172 if (entry->e_perm & ~S_IRWXO) 183 if (entry->e_perm & ~S_IRWXO)
173 return -EINVAL; 184 return -EINVAL;
@@ -190,9 +201,13 @@ cmp_acl_entry(const void *x, const void *y)
190 201
191 if (a->e_tag != b->e_tag) 202 if (a->e_tag != b->e_tag)
192 return a->e_tag - b->e_tag; 203 return a->e_tag - b->e_tag;
193 else if (a->e_id > b->e_id) 204 else if ((a->e_tag == ACL_USER) && uid_gt(a->e_uid, b->e_uid))
205 return 1;
206 else if ((a->e_tag == ACL_USER) && uid_lt(a->e_uid, b->e_uid))
207 return -1;
208 else if ((a->e_tag == ACL_GROUP) && gid_gt(a->e_gid, b->e_gid))
194 return 1; 209 return 1;
195 else if (a->e_id < b->e_id) 210 else if ((a->e_tag == ACL_GROUP) && gid_lt(a->e_gid, b->e_gid))
196 return -1; 211 return -1;
197 else 212 else
198 return 0; 213 return 0;
@@ -213,22 +228,18 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
213 sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry), 228 sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry),
214 cmp_acl_entry, NULL); 229 cmp_acl_entry, NULL);
215 230
216 /* Clear undefined identifier fields and find the ACL_GROUP_OBJ 231 /* Find the ACL_GROUP_OBJ and ACL_MASK entries. */
217 and ACL_MASK entries. */
218 FOREACH_ACL_ENTRY(pa, acl, pe) { 232 FOREACH_ACL_ENTRY(pa, acl, pe) {
219 switch(pa->e_tag) { 233 switch(pa->e_tag) {
220 case ACL_USER_OBJ: 234 case ACL_USER_OBJ:
221 pa->e_id = ACL_UNDEFINED_ID;
222 break; 235 break;
223 case ACL_GROUP_OBJ: 236 case ACL_GROUP_OBJ:
224 pa->e_id = ACL_UNDEFINED_ID;
225 group_obj = pa; 237 group_obj = pa;
226 break; 238 break;
227 case ACL_MASK: 239 case ACL_MASK:
228 mask = pa; 240 mask = pa;
229 /* fall through */ 241 /* fall through */
230 case ACL_OTHER: 242 case ACL_OTHER:
231 pa->e_id = ACL_UNDEFINED_ID;
232 break; 243 break;
233 } 244 }
234 } 245 }
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 8df1ea4a6ff9..430b6872806f 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -65,8 +65,8 @@ config NFSD_V3_ACL
65 If unsure, say N. 65 If unsure, say N.
66 66
67config NFSD_V4 67config NFSD_V4
68 bool "NFS server support for NFS version 4 (EXPERIMENTAL)" 68 bool "NFS server support for NFS version 4"
69 depends on NFSD && PROC_FS && EXPERIMENTAL 69 depends on NFSD && PROC_FS
70 select NFSD_V3 70 select NFSD_V3
71 select FS_POSIX_ACL 71 select FS_POSIX_ACL
72 select SUNRPC_GSS 72 select SUNRPC_GSS
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 34e5c40af5ef..8b186a4955cc 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -44,8 +44,6 @@
44struct nfs4_acl *nfs4_acl_new(int); 44struct nfs4_acl *nfs4_acl_new(int);
45int nfs4_acl_get_whotype(char *, u32); 45int nfs4_acl_get_whotype(char *, u32);
46int nfs4_acl_write_who(int who, char *p); 46int nfs4_acl_write_who(int who, char *p);
47int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
48 uid_t who, u32 mask);
49 47
50#define NFS4_ACL_TYPE_DEFAULT 0x01 48#define NFS4_ACL_TYPE_DEFAULT 0x01
51#define NFS4_ACL_DIR 0x02 49#define NFS4_ACL_DIR 0x02
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 34a10d78b839..06cddd572264 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -47,9 +47,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
47 if (!gi) 47 if (!gi)
48 goto oom; 48 goto oom;
49 } else if (flags & NFSEXP_ROOTSQUASH) { 49 } else if (flags & NFSEXP_ROOTSQUASH) {
50 if (!new->fsuid) 50 if (uid_eq(new->fsuid, GLOBAL_ROOT_UID))
51 new->fsuid = exp->ex_anon_uid; 51 new->fsuid = exp->ex_anon_uid;
52 if (!new->fsgid) 52 if (gid_eq(new->fsgid, GLOBAL_ROOT_GID))
53 new->fsgid = exp->ex_anon_gid; 53 new->fsgid = exp->ex_anon_gid;
54 54
55 gi = groups_alloc(rqgi->ngroups); 55 gi = groups_alloc(rqgi->ngroups);
@@ -58,7 +58,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
58 58
59 for (i = 0; i < rqgi->ngroups; i++) { 59 for (i = 0; i < rqgi->ngroups; i++) {
60 if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i))) 60 if (gid_eq(GLOBAL_ROOT_GID, GROUP_AT(rqgi, i)))
61 GROUP_AT(gi, i) = make_kgid(&init_user_ns, exp->ex_anon_gid); 61 GROUP_AT(gi, i) = exp->ex_anon_gid;
62 else 62 else
63 GROUP_AT(gi, i) = GROUP_AT(rqgi, i); 63 GROUP_AT(gi, i) = GROUP_AT(rqgi, i);
64 } 64 }
@@ -66,9 +66,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
66 gi = get_group_info(rqgi); 66 gi = get_group_info(rqgi);
67 } 67 }
68 68
69 if (new->fsuid == (uid_t) -1) 69 if (uid_eq(new->fsuid, INVALID_UID))
70 new->fsuid = exp->ex_anon_uid; 70 new->fsuid = exp->ex_anon_uid;
71 if (new->fsgid == (gid_t) -1) 71 if (gid_eq(new->fsgid, INVALID_GID))
72 new->fsgid = exp->ex_anon_gid; 72 new->fsgid = exp->ex_anon_gid;
73 73
74 ret = set_groups(new, gi); 74 ret = set_groups(new, gi);
@@ -76,7 +76,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
76 if (ret < 0) 76 if (ret < 0)
77 goto error; 77 goto error;
78 78
79 if (new->fsuid) 79 if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID))
80 new->cap_effective = cap_drop_nfsd_set(new->cap_effective); 80 new->cap_effective = cap_drop_nfsd_set(new->cap_effective);
81 else 81 else
82 new->cap_effective = cap_raise_nfsd_set(new->cap_effective, 82 new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
index 78b3c0e93822..53325a12ba62 100644
--- a/fs/nfsd/auth.h
+++ b/fs/nfsd/auth.h
@@ -1,6 +1,5 @@
1/* 1/*
2 * nfsd-specific authentication stuff. 2 * nfsd-specific authentication stuff.
3 * uid/gid mapping not yet implemented.
4 * 3 *
5 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
6 */ 5 */
@@ -8,11 +7,6 @@
8#ifndef LINUX_NFSD_AUTH_H 7#ifndef LINUX_NFSD_AUTH_H
9#define LINUX_NFSD_AUTH_H 8#define LINUX_NFSD_AUTH_H
10 9
11#define nfsd_luid(rq, uid) ((u32)(uid))
12#define nfsd_lgid(rq, gid) ((u32)(gid))
13#define nfsd_ruid(rq, uid) ((u32)(uid))
14#define nfsd_rgid(rq, gid) ((u32)(gid))
15
16/* 10/*
17 * Set the current process's fsuid/fsgid etc to those of the NFS 11 * Set the current process's fsuid/fsgid etc to those of the NFS
18 * client user 12 * client user
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 15ebf91982b0..5f38ea36e266 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -535,13 +535,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
535 err = get_int(&mesg, &an_int); 535 err = get_int(&mesg, &an_int);
536 if (err) 536 if (err)
537 goto out3; 537 goto out3;
538 exp.ex_anon_uid= an_int; 538 exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
539 if (!uid_valid(exp.ex_anon_uid))
540 goto out3;
539 541
540 /* anon gid */ 542 /* anon gid */
541 err = get_int(&mesg, &an_int); 543 err = get_int(&mesg, &an_int);
542 if (err) 544 if (err)
543 goto out3; 545 goto out3;
544 exp.ex_anon_gid= an_int; 546 exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
547 if (!gid_valid(exp.ex_anon_gid))
548 goto out3;
545 549
546 /* fsid */ 550 /* fsid */
547 err = get_int(&mesg, &an_int); 551 err = get_int(&mesg, &an_int);
@@ -604,7 +608,7 @@ out:
604} 608}
605 609
606static void exp_flags(struct seq_file *m, int flag, int fsid, 610static void exp_flags(struct seq_file *m, int flag, int fsid,
607 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fslocs); 611 kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs);
608static void show_secinfo(struct seq_file *m, struct svc_export *exp); 612static void show_secinfo(struct seq_file *m, struct svc_export *exp);
609 613
610static int svc_export_show(struct seq_file *m, 614static int svc_export_show(struct seq_file *m,
@@ -1171,15 +1175,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp)
1171} 1175}
1172 1176
1173static void exp_flags(struct seq_file *m, int flag, int fsid, 1177static void exp_flags(struct seq_file *m, int flag, int fsid,
1174 uid_t anonu, uid_t anong, struct nfsd4_fs_locations *fsloc) 1178 kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
1175{ 1179{
1176 show_expflags(m, flag, NFSEXP_ALLFLAGS); 1180 show_expflags(m, flag, NFSEXP_ALLFLAGS);
1177 if (flag & NFSEXP_FSID) 1181 if (flag & NFSEXP_FSID)
1178 seq_printf(m, ",fsid=%d", fsid); 1182 seq_printf(m, ",fsid=%d", fsid);
1179 if (anonu != (uid_t)-2 && anonu != (0x10000-2)) 1183 if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
1180 seq_printf(m, ",anonuid=%u", anonu); 1184 !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
1181 if (anong != (gid_t)-2 && anong != (0x10000-2)) 1185 seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
1182 seq_printf(m, ",anongid=%u", anong); 1186 if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
1187 !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
1188 seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
1183 if (fsloc && fsloc->locations_count > 0) { 1189 if (fsloc && fsloc->locations_count > 0) {
1184 char *loctype = (fsloc->migrated) ? "refer" : "replicas"; 1190 char *loctype = (fsloc->migrated) ? "refer" : "replicas";
1185 int i; 1191 int i;
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 247c00ccdb0f..d620e7f81429 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -101,7 +101,7 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
101 loff_t pos = *ppos; 101 loff_t pos = *ppos;
102 102
103 if (!pos) 103 if (!pos)
104 nfsd_inject_get(file->f_dentry->d_inode->i_private, &val); 104 nfsd_inject_get(file_inode(file)->i_private, &val);
105 size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); 105 size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
106 106
107 if (pos < 0) 107 if (pos < 0)
@@ -133,10 +133,10 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
133 133
134 size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); 134 size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
135 if (size > 0) 135 if (size > 0)
136 nfsd_inject_set_client(file->f_dentry->d_inode->i_private, &sa, size); 136 nfsd_inject_set_client(file_inode(file)->i_private, &sa, size);
137 else { 137 else {
138 val = simple_strtoll(write_buf, NULL, 0); 138 val = simple_strtoll(write_buf, NULL, 0);
139 nfsd_inject_set(file->f_dentry->d_inode->i_private, val); 139 nfsd_inject_set(file_inode(file)->i_private, val);
140 } 140 }
141 return len; /* on success, claim we got the whole input */ 141 return len; /* on success, claim we got the whole input */
142} 142}
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index 9d513efc01ba..bf95f6b817a4 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -54,9 +54,9 @@ static inline void nfsd_idmap_shutdown(struct net *net)
54} 54}
55#endif 55#endif
56 56
57__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); 57__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
58__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); 58__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
59int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); 59int nfsd_map_uid_to_name(struct svc_rqst *, kuid_t, char *);
60int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); 60int nfsd_map_gid_to_name(struct svc_rqst *, kgid_t, char *);
61 61
62#endif /* LINUX_NFSD_IDMAP_H */ 62#endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 9170861c804a..95d76dc6c5da 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -45,6 +45,10 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
45 RETURN_STATUS(nfserr_inval); 45 RETURN_STATUS(nfserr_inval);
46 resp->mask = argp->mask; 46 resp->mask = argp->mask;
47 47
48 nfserr = fh_getattr(fh, &resp->stat);
49 if (nfserr)
50 goto fail;
51
48 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { 52 if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
49 acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); 53 acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS);
50 if (IS_ERR(acl)) { 54 if (IS_ERR(acl)) {
@@ -115,6 +119,9 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
115 nfserr = nfserrno( nfsd_set_posix_acl( 119 nfserr = nfserrno( nfsd_set_posix_acl(
116 fh, ACL_TYPE_DEFAULT, argp->acl_default) ); 120 fh, ACL_TYPE_DEFAULT, argp->acl_default) );
117 } 121 }
122 if (!nfserr) {
123 nfserr = fh_getattr(fh, &resp->stat);
124 }
118 125
119 /* argp->acl_{access,default} may have been allocated in 126 /* argp->acl_{access,default} may have been allocated in
120 nfssvc_decode_setaclargs. */ 127 nfssvc_decode_setaclargs. */
@@ -129,10 +136,15 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
129static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp, 136static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
130 struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) 137 struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
131{ 138{
139 __be32 nfserr;
132 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 140 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
133 141
134 fh_copy(&resp->fh, &argp->fh); 142 fh_copy(&resp->fh, &argp->fh);
135 return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); 143 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
144 if (nfserr)
145 return nfserr;
146 nfserr = fh_getattr(&resp->fh, &resp->stat);
147 return nfserr;
136} 148}
137 149
138/* 150/*
@@ -150,6 +162,9 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
150 fh_copy(&resp->fh, &argp->fh); 162 fh_copy(&resp->fh, &argp->fh);
151 resp->access = argp->access; 163 resp->access = argp->access;
152 nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); 164 nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
165 if (nfserr)
166 return nfserr;
167 nfserr = fh_getattr(&resp->fh, &resp->stat);
153 return nfserr; 168 return nfserr;
154} 169}
155 170
@@ -243,7 +258,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
243 return 0; 258 return 0;
244 inode = dentry->d_inode; 259 inode = dentry->d_inode;
245 260
246 p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); 261 p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
247 *p++ = htonl(resp->mask); 262 *p++ = htonl(resp->mask);
248 if (!xdr_ressize_check(rqstp, p)) 263 if (!xdr_ressize_check(rqstp, p))
249 return 0; 264 return 0;
@@ -274,7 +289,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
274static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p, 289static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
275 struct nfsd_attrstat *resp) 290 struct nfsd_attrstat *resp)
276{ 291{
277 p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); 292 p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
278 return xdr_ressize_check(rqstp, p); 293 return xdr_ressize_check(rqstp, p);
279} 294}
280 295
@@ -282,7 +297,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
282static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p, 297static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
283 struct nfsd3_accessres *resp) 298 struct nfsd3_accessres *resp)
284{ 299{
285 p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); 300 p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
286 *p++ = htonl(resp->access); 301 *p++ = htonl(resp->access);
287 return xdr_ressize_check(rqstp, p); 302 return xdr_ressize_check(rqstp, p);
288} 303}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 1fc02dfdc5c4..401289913130 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -43,7 +43,6 @@ static __be32
43nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, 43nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
44 struct nfsd3_attrstat *resp) 44 struct nfsd3_attrstat *resp)
45{ 45{
46 int err;
47 __be32 nfserr; 46 __be32 nfserr;
48 47
49 dprintk("nfsd: GETATTR(3) %s\n", 48 dprintk("nfsd: GETATTR(3) %s\n",
@@ -55,9 +54,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
55 if (nfserr) 54 if (nfserr)
56 RETURN_STATUS(nfserr); 55 RETURN_STATUS(nfserr);
57 56
58 err = vfs_getattr(resp->fh.fh_export->ex_path.mnt, 57 nfserr = fh_getattr(&resp->fh, &resp->stat);
59 resp->fh.fh_dentry, &resp->stat);
60 nfserr = nfserrno(err);
61 58
62 RETURN_STATUS(nfserr); 59 RETURN_STATUS(nfserr);
63} 60}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 324c0baf7cda..14d9ecb96cff 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -11,6 +11,7 @@
11#include "xdr3.h" 11#include "xdr3.h"
12#include "auth.h" 12#include "auth.h"
13#include "netns.h" 13#include "netns.h"
14#include "vfs.h"
14 15
15#define NFSDDBG_FACILITY NFSDDBG_XDR 16#define NFSDDBG_FACILITY NFSDDBG_XDR
16 17
@@ -105,12 +106,14 @@ decode_sattr3(__be32 *p, struct iattr *iap)
105 iap->ia_mode = ntohl(*p++); 106 iap->ia_mode = ntohl(*p++);
106 } 107 }
107 if (*p++) { 108 if (*p++) {
108 iap->ia_valid |= ATTR_UID; 109 iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
109 iap->ia_uid = ntohl(*p++); 110 if (uid_valid(iap->ia_uid))
111 iap->ia_valid |= ATTR_UID;
110 } 112 }
111 if (*p++) { 113 if (*p++) {
112 iap->ia_valid |= ATTR_GID; 114 iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
113 iap->ia_gid = ntohl(*p++); 115 if (gid_valid(iap->ia_gid))
116 iap->ia_valid |= ATTR_GID;
114 } 117 }
115 if (*p++) { 118 if (*p++) {
116 u64 newsize; 119 u64 newsize;
@@ -167,8 +170,8 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
167 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); 170 *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
168 *p++ = htonl((u32) stat->mode); 171 *p++ = htonl((u32) stat->mode);
169 *p++ = htonl((u32) stat->nlink); 172 *p++ = htonl((u32) stat->nlink);
170 *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); 173 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
171 *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); 174 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
172 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { 175 if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
173 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); 176 p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
174 } else { 177 } else {
@@ -204,10 +207,10 @@ encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
204{ 207{
205 struct dentry *dentry = fhp->fh_dentry; 208 struct dentry *dentry = fhp->fh_dentry;
206 if (dentry && dentry->d_inode) { 209 if (dentry && dentry->d_inode) {
207 int err; 210 __be32 err;
208 struct kstat stat; 211 struct kstat stat;
209 212
210 err = vfs_getattr(fhp->fh_export->ex_path.mnt, dentry, &stat); 213 err = fh_getattr(fhp, &stat);
211 if (!err) { 214 if (!err) {
212 *p++ = xdr_one; /* attributes follow */ 215 *p++ = xdr_one; /* attributes follow */
213 lease_get_mtime(dentry->d_inode, &stat.mtime); 216 lease_get_mtime(dentry->d_inode, &stat.mtime);
@@ -254,13 +257,12 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
254 */ 257 */
255void fill_post_wcc(struct svc_fh *fhp) 258void fill_post_wcc(struct svc_fh *fhp)
256{ 259{
257 int err; 260 __be32 err;
258 261
259 if (fhp->fh_post_saved) 262 if (fhp->fh_post_saved)
260 printk("nfsd: inode locked twice during operation.\n"); 263 printk("nfsd: inode locked twice during operation.\n");
261 264
262 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, 265 err = fh_getattr(fhp, &fhp->fh_post_attr);
263 &fhp->fh_post_attr);
264 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; 266 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
265 if (err) { 267 if (err) {
266 fhp->fh_post_saved = 0; 268 fhp->fh_post_saved = 0;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 9c51aff02ae2..8a50b3c18093 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -264,7 +264,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
264 ace->flag = eflag; 264 ace->flag = eflag;
265 ace->access_mask = deny_mask_from_posix(deny, flags); 265 ace->access_mask = deny_mask_from_posix(deny, flags);
266 ace->whotype = NFS4_ACL_WHO_NAMED; 266 ace->whotype = NFS4_ACL_WHO_NAMED;
267 ace->who = pa->e_id; 267 ace->who_uid = pa->e_uid;
268 ace++; 268 ace++;
269 acl->naces++; 269 acl->naces++;
270 } 270 }
@@ -273,7 +273,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
273 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, 273 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
274 flags); 274 flags);
275 ace->whotype = NFS4_ACL_WHO_NAMED; 275 ace->whotype = NFS4_ACL_WHO_NAMED;
276 ace->who = pa->e_id; 276 ace->who_uid = pa->e_uid;
277 ace++; 277 ace++;
278 acl->naces++; 278 acl->naces++;
279 pa++; 279 pa++;
@@ -300,7 +300,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
300 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask, 300 ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
301 flags); 301 flags);
302 ace->whotype = NFS4_ACL_WHO_NAMED; 302 ace->whotype = NFS4_ACL_WHO_NAMED;
303 ace->who = pa->e_id; 303 ace->who_gid = pa->e_gid;
304 ace++; 304 ace++;
305 acl->naces++; 305 acl->naces++;
306 pa++; 306 pa++;
@@ -329,7 +329,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
329 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP; 329 ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
330 ace->access_mask = deny_mask_from_posix(deny, flags); 330 ace->access_mask = deny_mask_from_posix(deny, flags);
331 ace->whotype = NFS4_ACL_WHO_NAMED; 331 ace->whotype = NFS4_ACL_WHO_NAMED;
332 ace->who = pa->e_id; 332 ace->who_gid = pa->e_gid;
333 ace++; 333 ace++;
334 acl->naces++; 334 acl->naces++;
335 } 335 }
@@ -345,6 +345,18 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
345 acl->naces++; 345 acl->naces++;
346} 346}
347 347
348static bool
349pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
350{
351 if (pace1->e_tag != pace2->e_tag)
352 return pace1->e_tag > pace2->e_tag;
353 if (pace1->e_tag == ACL_USER)
354 return uid_gt(pace1->e_uid, pace2->e_uid);
355 if (pace1->e_tag == ACL_GROUP)
356 return gid_gt(pace1->e_gid, pace2->e_gid);
357 return false;
358}
359
348static void 360static void
349sort_pacl_range(struct posix_acl *pacl, int start, int end) { 361sort_pacl_range(struct posix_acl *pacl, int start, int end) {
350 int sorted = 0, i; 362 int sorted = 0, i;
@@ -355,8 +367,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
355 while (!sorted) { 367 while (!sorted) {
356 sorted = 1; 368 sorted = 1;
357 for (i = start; i < end; i++) { 369 for (i = start; i < end; i++) {
358 if (pacl->a_entries[i].e_id 370 if (pace_gt(&pacl->a_entries[i],
359 > pacl->a_entries[i+1].e_id) { 371 &pacl->a_entries[i+1])) {
360 sorted = 0; 372 sorted = 0;
361 tmp = pacl->a_entries[i]; 373 tmp = pacl->a_entries[i];
362 pacl->a_entries[i] = pacl->a_entries[i+1]; 374 pacl->a_entries[i] = pacl->a_entries[i+1];
@@ -398,7 +410,10 @@ struct posix_ace_state {
398}; 410};
399 411
400struct posix_user_ace_state { 412struct posix_user_ace_state {
401 uid_t uid; 413 union {
414 kuid_t uid;
415 kgid_t gid;
416 };
402 struct posix_ace_state perms; 417 struct posix_ace_state perms;
403}; 418};
404 419
@@ -521,7 +536,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
521 if (error) 536 if (error)
522 goto out_err; 537 goto out_err;
523 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags); 538 low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
524 pace->e_id = ACL_UNDEFINED_ID;
525 539
526 for (i=0; i < state->users->n; i++) { 540 for (i=0; i < state->users->n; i++) {
527 pace++; 541 pace++;
@@ -531,7 +545,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
531 goto out_err; 545 goto out_err;
532 low_mode_from_nfs4(state->users->aces[i].perms.allow, 546 low_mode_from_nfs4(state->users->aces[i].perms.allow,
533 &pace->e_perm, flags); 547 &pace->e_perm, flags);
534 pace->e_id = state->users->aces[i].uid; 548 pace->e_uid = state->users->aces[i].uid;
535 add_to_mask(state, &state->users->aces[i].perms); 549 add_to_mask(state, &state->users->aces[i].perms);
536 } 550 }
537 551
@@ -541,7 +555,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
541 if (error) 555 if (error)
542 goto out_err; 556 goto out_err;
543 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags); 557 low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
544 pace->e_id = ACL_UNDEFINED_ID;
545 add_to_mask(state, &state->group); 558 add_to_mask(state, &state->group);
546 559
547 for (i=0; i < state->groups->n; i++) { 560 for (i=0; i < state->groups->n; i++) {
@@ -552,14 +565,13 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
552 goto out_err; 565 goto out_err;
553 low_mode_from_nfs4(state->groups->aces[i].perms.allow, 566 low_mode_from_nfs4(state->groups->aces[i].perms.allow,
554 &pace->e_perm, flags); 567 &pace->e_perm, flags);
555 pace->e_id = state->groups->aces[i].uid; 568 pace->e_gid = state->groups->aces[i].gid;
556 add_to_mask(state, &state->groups->aces[i].perms); 569 add_to_mask(state, &state->groups->aces[i].perms);
557 } 570 }
558 571
559 pace++; 572 pace++;
560 pace->e_tag = ACL_MASK; 573 pace->e_tag = ACL_MASK;
561 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags); 574 low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
562 pace->e_id = ACL_UNDEFINED_ID;
563 575
564 pace++; 576 pace++;
565 pace->e_tag = ACL_OTHER; 577 pace->e_tag = ACL_OTHER;
@@ -567,7 +579,6 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
567 if (error) 579 if (error)
568 goto out_err; 580 goto out_err;
569 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags); 581 low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
570 pace->e_id = ACL_UNDEFINED_ID;
571 582
572 return pacl; 583 return pacl;
573out_err: 584out_err:
@@ -587,12 +598,13 @@ static inline void deny_bits(struct posix_ace_state *astate, u32 mask)
587 astate->deny |= mask & ~astate->allow; 598 astate->deny |= mask & ~astate->allow;
588} 599}
589 600
590static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array *a, uid_t uid) 601static int find_uid(struct posix_acl_state *state, kuid_t uid)
591{ 602{
603 struct posix_ace_state_array *a = state->users;
592 int i; 604 int i;
593 605
594 for (i = 0; i < a->n; i++) 606 for (i = 0; i < a->n; i++)
595 if (a->aces[i].uid == uid) 607 if (uid_eq(a->aces[i].uid, uid))
596 return i; 608 return i;
597 /* Not found: */ 609 /* Not found: */
598 a->n++; 610 a->n++;
@@ -603,6 +615,23 @@ static int find_uid(struct posix_acl_state *state, struct posix_ace_state_array
603 return i; 615 return i;
604} 616}
605 617
618static int find_gid(struct posix_acl_state *state, kgid_t gid)
619{
620 struct posix_ace_state_array *a = state->groups;
621 int i;
622
623 for (i = 0; i < a->n; i++)
624 if (gid_eq(a->aces[i].gid, gid))
625 return i;
626 /* Not found: */
627 a->n++;
628 a->aces[i].gid = gid;
629 a->aces[i].perms.allow = state->everyone.allow;
630 a->aces[i].perms.deny = state->everyone.deny;
631
632 return i;
633}
634
606static void deny_bits_array(struct posix_ace_state_array *a, u32 mask) 635static void deny_bits_array(struct posix_ace_state_array *a, u32 mask)
607{ 636{
608 int i; 637 int i;
@@ -636,7 +665,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
636 } 665 }
637 break; 666 break;
638 case ACL_USER: 667 case ACL_USER:
639 i = find_uid(state, state->users, ace->who); 668 i = find_uid(state, ace->who_uid);
640 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { 669 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
641 allow_bits(&state->users->aces[i].perms, mask); 670 allow_bits(&state->users->aces[i].perms, mask);
642 } else { 671 } else {
@@ -658,7 +687,7 @@ static void process_one_v4_ace(struct posix_acl_state *state,
658 } 687 }
659 break; 688 break;
660 case ACL_GROUP: 689 case ACL_GROUP:
661 i = find_uid(state, state->groups, ace->who); 690 i = find_gid(state, ace->who_gid);
662 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) { 691 if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
663 allow_bits(&state->groups->aces[i].perms, mask); 692 allow_bits(&state->groups->aces[i].perms, mask);
664 } else { 693 } else {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index d9402ea9d751..4832fd819f88 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -65,7 +65,7 @@ MODULE_PARM_DESC(nfs4_disable_idmapping,
65struct ent { 65struct ent {
66 struct cache_head h; 66 struct cache_head h;
67 int type; /* User / Group */ 67 int type; /* User / Group */
68 uid_t id; 68 u32 id;
69 char name[IDMAP_NAMESZ]; 69 char name[IDMAP_NAMESZ];
70 char authname[IDMAP_NAMESZ]; 70 char authname[IDMAP_NAMESZ];
71}; 71};
@@ -528,7 +528,7 @@ rqst_authname(struct svc_rqst *rqstp)
528 528
529static __be32 529static __be32
530idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, 530idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
531 uid_t *id) 531 u32 *id)
532{ 532{
533 struct ent *item, key = { 533 struct ent *item, key = {
534 .type = type, 534 .type = type,
@@ -552,7 +552,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
552} 552}
553 553
554static int 554static int
555idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) 555idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
556{ 556{
557 struct ent *item, key = { 557 struct ent *item, key = {
558 .id = id, 558 .id = id,
@@ -575,7 +575,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
575} 575}
576 576
577static bool 577static bool
578numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) 578numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id)
579{ 579{
580 int ret; 580 int ret;
581 char buf[11]; 581 char buf[11];
@@ -591,7 +591,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel
591} 591}
592 592
593static __be32 593static __be32
594do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) 594do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u32 *id)
595{ 595{
596 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) 596 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
597 if (numeric_name_to_id(rqstp, type, name, namelen, id)) 597 if (numeric_name_to_id(rqstp, type, name, namelen, id))
@@ -604,7 +604,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
604} 604}
605 605
606static int 606static int
607do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) 607do_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
608{ 608{
609 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS) 609 if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
610 return sprintf(name, "%u", id); 610 return sprintf(name, "%u", id);
@@ -613,26 +613,40 @@ do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
613 613
614__be32 614__be32
615nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, 615nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
616 __u32 *id) 616 kuid_t *uid)
617{ 617{
618 return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); 618 __be32 status;
619 u32 id = -1;
620 status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
621 *uid = make_kuid(&init_user_ns, id);
622 if (!uid_valid(*uid))
623 status = nfserr_badowner;
624 return status;
619} 625}
620 626
621__be32 627__be32
622nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, 628nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
623 __u32 *id) 629 kgid_t *gid)
624{ 630{
625 return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); 631 __be32 status;
632 u32 id = -1;
633 status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
634 *gid = make_kgid(&init_user_ns, id);
635 if (!gid_valid(*gid))
636 status = nfserr_badowner;
637 return status;
626} 638}
627 639
628int 640int
629nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) 641nfsd_map_uid_to_name(struct svc_rqst *rqstp, kuid_t uid, char *name)
630{ 642{
643 u32 id = from_kuid(&init_user_ns, uid);
631 return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); 644 return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
632} 645}
633 646
634int 647int
635nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) 648nfsd_map_gid_to_name(struct svc_rqst *rqstp, kgid_t gid, char *name)
636{ 649{
650 u32 id = from_kgid(&init_user_ns, gid);
637 return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); 651 return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
638} 652}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e0ae1cf18a82..899ca26dd194 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -73,8 +73,8 @@ nfs4_save_creds(const struct cred **original_creds)
73 if (!new) 73 if (!new)
74 return -ENOMEM; 74 return -ENOMEM;
75 75
76 new->fsuid = 0; 76 new->fsuid = GLOBAL_ROOT_UID;
77 new->fsgid = 0; 77 new->fsgid = GLOBAL_ROOT_GID;
78 *original_creds = override_creds(new); 78 *original_creds = override_creds(new);
79 put_cred(new); 79 put_cred(new);
80 return 0; 80 return 0;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f194f869be4c..16d39c6c4fbb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -151,7 +151,7 @@ get_nfs4_file(struct nfs4_file *fi)
151} 151}
152 152
153static int num_delegations; 153static int num_delegations;
154unsigned int max_delegations; 154unsigned long max_delegations;
155 155
156/* 156/*
157 * Open owner state (share locks) 157 * Open owner state (share locks)
@@ -719,8 +719,8 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num)
719 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); 719 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
720 720
721 spin_lock(&nfsd_drc_lock); 721 spin_lock(&nfsd_drc_lock);
722 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, 722 avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
723 nfsd_drc_max_mem - nfsd_drc_mem_used); 723 nfsd_drc_max_mem - nfsd_drc_mem_used);
724 num = min_t(int, num, avail / slotsize); 724 num = min_t(int, num, avail / slotsize);
725 nfsd_drc_mem_used += num * slotsize; 725 nfsd_drc_mem_used += num * slotsize;
726 spin_unlock(&nfsd_drc_lock); 726 spin_unlock(&nfsd_drc_lock);
@@ -1079,7 +1079,6 @@ free_client(struct nfs4_client *clp)
1079 } 1079 }
1080 free_svc_cred(&clp->cl_cred); 1080 free_svc_cred(&clp->cl_cred);
1081 kfree(clp->cl_name.data); 1081 kfree(clp->cl_name.data);
1082 idr_remove_all(&clp->cl_stateids);
1083 idr_destroy(&clp->cl_stateids); 1082 idr_destroy(&clp->cl_stateids);
1084 kfree(clp); 1083 kfree(clp);
1085} 1084}
@@ -1223,7 +1222,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
1223 if (g1->ngroups != g2->ngroups) 1222 if (g1->ngroups != g2->ngroups)
1224 return false; 1223 return false;
1225 for (i=0; i<g1->ngroups; i++) 1224 for (i=0; i<g1->ngroups; i++)
1226 if (GROUP_AT(g1, i) != GROUP_AT(g2, i)) 1225 if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
1227 return false; 1226 return false;
1228 return true; 1227 return true;
1229} 1228}
@@ -1248,8 +1247,8 @@ static bool
1248same_creds(struct svc_cred *cr1, struct svc_cred *cr2) 1247same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
1249{ 1248{
1250 if ((is_gss_cred(cr1) != is_gss_cred(cr2)) 1249 if ((is_gss_cred(cr1) != is_gss_cred(cr2))
1251 || (cr1->cr_uid != cr2->cr_uid) 1250 || (!uid_eq(cr1->cr_uid, cr2->cr_uid))
1252 || (cr1->cr_gid != cr2->cr_gid) 1251 || (!gid_eq(cr1->cr_gid, cr2->cr_gid))
1253 || !groups_equal(cr1->cr_group_info, cr2->cr_group_info)) 1252 || !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
1254 return false; 1253 return false;
1255 if (cr1->cr_principal == cr2->cr_principal) 1254 if (cr1->cr_principal == cr2->cr_principal)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fcb5bed99c33..01168865dd37 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -293,13 +293,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
293 ace->whotype = nfs4_acl_get_whotype(buf, dummy32); 293 ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
294 status = nfs_ok; 294 status = nfs_ok;
295 if (ace->whotype != NFS4_ACL_WHO_NAMED) 295 if (ace->whotype != NFS4_ACL_WHO_NAMED)
296 ace->who = 0; 296 ;
297 else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) 297 else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
298 status = nfsd_map_name_to_gid(argp->rqstp, 298 status = nfsd_map_name_to_gid(argp->rqstp,
299 buf, dummy32, &ace->who); 299 buf, dummy32, &ace->who_gid);
300 else 300 else
301 status = nfsd_map_name_to_uid(argp->rqstp, 301 status = nfsd_map_name_to_uid(argp->rqstp,
302 buf, dummy32, &ace->who); 302 buf, dummy32, &ace->who_uid);
303 if (status) 303 if (status)
304 return status; 304 return status;
305 } 305 }
@@ -464,9 +464,16 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
464 READ32(dummy); 464 READ32(dummy);
465 READ_BUF(dummy * 4); 465 READ_BUF(dummy * 4);
466 if (cbs->flavor == (u32)(-1)) { 466 if (cbs->flavor == (u32)(-1)) {
467 cbs->uid = uid; 467 kuid_t kuid = make_kuid(&init_user_ns, uid);
468 cbs->gid = gid; 468 kgid_t kgid = make_kgid(&init_user_ns, gid);
469 cbs->flavor = RPC_AUTH_UNIX; 469 if (uid_valid(kuid) && gid_valid(kgid)) {
470 cbs->uid = kuid;
471 cbs->gid = kgid;
472 cbs->flavor = RPC_AUTH_UNIX;
473 } else {
474 dprintk("RPC_AUTH_UNIX with invalid"
475 "uid or gid ignoring!\n");
476 }
470 } 477 }
471 break; 478 break;
472 case RPC_AUTH_GSS: 479 case RPC_AUTH_GSS:
@@ -1926,7 +1933,7 @@ static u32 nfs4_file_type(umode_t mode)
1926} 1933}
1927 1934
1928static __be32 1935static __be32
1929nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, 1936nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid,
1930 __be32 **p, int *buflen) 1937 __be32 **p, int *buflen)
1931{ 1938{
1932 int status; 1939 int status;
@@ -1935,10 +1942,10 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
1935 return nfserr_resource; 1942 return nfserr_resource;
1936 if (whotype != NFS4_ACL_WHO_NAMED) 1943 if (whotype != NFS4_ACL_WHO_NAMED)
1937 status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); 1944 status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
1938 else if (group) 1945 else if (gid_valid(gid))
1939 status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); 1946 status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1));
1940 else 1947 else
1941 status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); 1948 status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1));
1942 if (status < 0) 1949 if (status < 0)
1943 return nfserrno(status); 1950 return nfserrno(status);
1944 *p = xdr_encode_opaque(*p, NULL, status); 1951 *p = xdr_encode_opaque(*p, NULL, status);
@@ -1948,22 +1955,33 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
1948} 1955}
1949 1956
1950static inline __be32 1957static inline __be32
1951nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen) 1958nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen)
1952{ 1959{
1953 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); 1960 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID,
1961 p, buflen);
1954} 1962}
1955 1963
1956static inline __be32 1964static inline __be32
1957nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen) 1965nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen)
1958{ 1966{
1959 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); 1967 return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group,
1968 p, buflen);
1960} 1969}
1961 1970
1962static inline __be32 1971static inline __be32
1963nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, 1972nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
1964 __be32 **p, int *buflen) 1973 __be32 **p, int *buflen)
1965{ 1974{
1966 return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); 1975 kuid_t uid = INVALID_UID;
1976 kgid_t gid = INVALID_GID;
1977
1978 if (ace->whotype == NFS4_ACL_WHO_NAMED) {
1979 if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
1980 gid = ace->who_gid;
1981 else
1982 uid = ace->who_uid;
1983 }
1984 return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen);
1967} 1985}
1968 1986
1969#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \ 1987#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -1997,7 +2015,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
1997 if (path.dentry != path.mnt->mnt_root) 2015 if (path.dentry != path.mnt->mnt_root)
1998 break; 2016 break;
1999 } 2017 }
2000 err = vfs_getattr(path.mnt, path.dentry, stat); 2018 err = vfs_getattr(&path, stat);
2001 path_put(&path); 2019 path_put(&path);
2002 return err; 2020 return err;
2003} 2021}
@@ -2049,7 +2067,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2049 goto out; 2067 goto out;
2050 } 2068 }
2051 2069
2052 err = vfs_getattr(exp->ex_path.mnt, dentry, &stat); 2070 err = vfs_getattr(&path, &stat);
2053 if (err) 2071 if (err)
2054 goto out_nfserr; 2072 goto out_nfserr;
2055 if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | 2073 if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
@@ -2223,9 +2241,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
2223 WRITE32(ace->type); 2241 WRITE32(ace->type);
2224 WRITE32(ace->flag); 2242 WRITE32(ace->flag);
2225 WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); 2243 WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
2226 status = nfsd4_encode_aclname(rqstp, ace->whotype, 2244 status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
2227 ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
2228 &p, &buflen);
2229 if (status == nfserr_resource) 2245 if (status == nfserr_resource)
2230 goto out_resource; 2246 goto out_resource;
2231 if (status) 2247 if (status)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ca43664422f6..62c1ee128aeb 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -281,7 +281,6 @@ static struct svc_cacherep *
281nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) 281nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
282{ 282{
283 struct svc_cacherep *rp; 283 struct svc_cacherep *rp;
284 struct hlist_node *hn;
285 struct hlist_head *rh; 284 struct hlist_head *rh;
286 __be32 xid = rqstp->rq_xid; 285 __be32 xid = rqstp->rq_xid;
287 u32 proto = rqstp->rq_prot, 286 u32 proto = rqstp->rq_prot,
@@ -289,7 +288,7 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
289 proc = rqstp->rq_proc; 288 proc = rqstp->rq_proc;
290 289
291 rh = &cache_hash[request_hash(xid)]; 290 rh = &cache_hash[request_hash(xid)];
292 hlist_for_each_entry(rp, hn, rh, c_hash) { 291 hlist_for_each_entry(rp, rh, c_hash) {
293 if (xid == rp->c_xid && proc == rp->c_proc && 292 if (xid == rp->c_xid && proc == rp->c_proc &&
294 proto == rp->c_prot && vers == rp->c_vers && 293 proto == rp->c_prot && vers == rp->c_vers &&
295 rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && 294 rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum &&
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 8ead2c25ce65..13a21c8fca49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -85,7 +85,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
85 85
86static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) 86static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
87{ 87{
88 ino_t ino = file->f_path.dentry->d_inode->i_ino; 88 ino_t ino = file_inode(file)->i_ino;
89 char *data; 89 char *data;
90 ssize_t rv; 90 ssize_t rv;
91 91
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index de23db255c69..07a473fd49bc 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -56,8 +56,8 @@ extern struct svc_version nfsd_version2, nfsd_version3,
56extern u32 nfsd_supported_minorversion; 56extern u32 nfsd_supported_minorversion;
57extern struct mutex nfsd_mutex; 57extern struct mutex nfsd_mutex;
58extern spinlock_t nfsd_drc_lock; 58extern spinlock_t nfsd_drc_lock;
59extern unsigned int nfsd_drc_max_mem; 59extern unsigned long nfsd_drc_max_mem;
60extern unsigned int nfsd_drc_mem_used; 60extern unsigned long nfsd_drc_mem_used;
61 61
62extern const struct seq_operations nfs_exports_op; 62extern const struct seq_operations nfs_exports_op;
63 63
@@ -106,7 +106,7 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
106 * NFSv4 State 106 * NFSv4 State
107 */ 107 */
108#ifdef CONFIG_NFSD_V4 108#ifdef CONFIG_NFSD_V4
109extern unsigned int max_delegations; 109extern unsigned long max_delegations;
110void nfs4_state_init(void); 110void nfs4_state_init(void);
111int nfsd4_init_slabs(void); 111int nfsd4_init_slabs(void);
112void nfsd4_free_slabs(void); 112void nfsd4_free_slabs(void);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index aad6d457b9e8..54c6b3d3cc79 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -26,17 +26,13 @@ static __be32
26nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) 26nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp)
27{ 27{
28 if (err) return err; 28 if (err) return err;
29 return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, 29 return fh_getattr(&resp->fh, &resp->stat);
30 resp->fh.fh_dentry,
31 &resp->stat));
32} 30}
33static __be32 31static __be32
34nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) 32nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
35{ 33{
36 if (err) return err; 34 if (err) return err;
37 return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, 35 return fh_getattr(&resp->fh, &resp->stat);
38 resp->fh.fh_dentry,
39 &resp->stat));
40} 36}
41/* 37/*
42 * Get a file's attributes 38 * Get a file's attributes
@@ -150,9 +146,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
150 &resp->count); 146 &resp->count);
151 147
152 if (nfserr) return nfserr; 148 if (nfserr) return nfserr;
153 return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, 149 return fh_getattr(&resp->fh, &resp->stat);
154 resp->fh.fh_dentry,
155 &resp->stat));
156} 150}
157 151
158/* 152/*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6cee5db72047..262df5ccbf59 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -59,8 +59,8 @@ DEFINE_MUTEX(nfsd_mutex);
59 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. 59 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
60 */ 60 */
61spinlock_t nfsd_drc_lock; 61spinlock_t nfsd_drc_lock;
62unsigned int nfsd_drc_max_mem; 62unsigned long nfsd_drc_max_mem;
63unsigned int nfsd_drc_mem_used; 63unsigned long nfsd_drc_mem_used;
64 64
65#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 65#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
66static struct svc_stat nfsd_acl_svcstats; 66static struct svc_stat nfsd_acl_svcstats;
@@ -342,7 +342,7 @@ static void set_max_drc(void)
342 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; 342 >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
343 nfsd_drc_mem_used = 0; 343 nfsd_drc_mem_used = 0;
344 spin_lock_init(&nfsd_drc_lock); 344 spin_lock_init(&nfsd_drc_lock);
345 dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); 345 dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
346} 346}
347 347
348static int nfsd_get_default_max_blksize(void) 348static int nfsd_get_default_max_blksize(void)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 979b42106979..9c769a47ac5a 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -4,6 +4,7 @@
4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 4 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
5 */ 5 */
6 6
7#include "vfs.h"
7#include "xdr.h" 8#include "xdr.h"
8#include "auth.h" 9#include "auth.h"
9 10
@@ -100,12 +101,14 @@ decode_sattr(__be32 *p, struct iattr *iap)
100 iap->ia_mode = tmp; 101 iap->ia_mode = tmp;
101 } 102 }
102 if ((tmp = ntohl(*p++)) != (u32)-1) { 103 if ((tmp = ntohl(*p++)) != (u32)-1) {
103 iap->ia_valid |= ATTR_UID; 104 iap->ia_uid = make_kuid(&init_user_ns, tmp);
104 iap->ia_uid = tmp; 105 if (uid_valid(iap->ia_uid))
106 iap->ia_valid |= ATTR_UID;
105 } 107 }
106 if ((tmp = ntohl(*p++)) != (u32)-1) { 108 if ((tmp = ntohl(*p++)) != (u32)-1) {
107 iap->ia_valid |= ATTR_GID; 109 iap->ia_gid = make_kgid(&init_user_ns, tmp);
108 iap->ia_gid = tmp; 110 if (gid_valid(iap->ia_gid))
111 iap->ia_valid |= ATTR_GID;
109 } 112 }
110 if ((tmp = ntohl(*p++)) != (u32)-1) { 113 if ((tmp = ntohl(*p++)) != (u32)-1) {
111 iap->ia_valid |= ATTR_SIZE; 114 iap->ia_valid |= ATTR_SIZE;
@@ -151,8 +154,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
151 *p++ = htonl(nfs_ftypes[type >> 12]); 154 *p++ = htonl(nfs_ftypes[type >> 12]);
152 *p++ = htonl((u32) stat->mode); 155 *p++ = htonl((u32) stat->mode);
153 *p++ = htonl((u32) stat->nlink); 156 *p++ = htonl((u32) stat->nlink);
154 *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); 157 *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
155 *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); 158 *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
156 159
157 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { 160 if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
158 *p++ = htonl(NFS_MAXPATHLEN); 161 *p++ = htonl(NFS_MAXPATHLEN);
@@ -194,11 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
194} 197}
195 198
196/* Helper function for NFSv2 ACL code */ 199/* Helper function for NFSv2 ACL code */
197__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) 200__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat)
198{ 201{
199 struct kstat stat; 202 return encode_fattr(rqstp, p, fhp, stat);
200 vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &stat);
201 return encode_fattr(rqstp, p, fhp, &stat);
202} 203}
203 204
204/* 205/*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d1c229feed52..1a8c7391f7ae 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -152,8 +152,8 @@ struct nfsd4_channel_attrs {
152 152
153struct nfsd4_cb_sec { 153struct nfsd4_cb_sec {
154 u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */ 154 u32 flavor; /* (u32)(-1) used to mean "no valid flavor" */
155 u32 uid; 155 kuid_t uid;
156 u32 gid; 156 kgid_t gid;
157}; 157};
158 158
159struct nfsd4_create_session { 159struct nfsd4_create_session {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d586117fa94a..2a7eb536de0b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -401,8 +401,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
401 401
402 /* Revoke setuid/setgid on chown */ 402 /* Revoke setuid/setgid on chown */
403 if (!S_ISDIR(inode->i_mode) && 403 if (!S_ISDIR(inode->i_mode) &&
404 (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || 404 (((iap->ia_valid & ATTR_UID) && !uid_eq(iap->ia_uid, inode->i_uid)) ||
405 ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { 405 ((iap->ia_valid & ATTR_GID) && !gid_eq(iap->ia_gid, inode->i_gid)))) {
406 iap->ia_valid |= ATTR_KILL_PRIV; 406 iap->ia_valid |= ATTR_KILL_PRIV;
407 if (iap->ia_valid & ATTR_MODE) { 407 if (iap->ia_valid & ATTR_MODE) {
408 /* we're setting mode too, just clear the s*id bits */ 408 /* we're setting mode too, just clear the s*id bits */
@@ -979,7 +979,7 @@ static void kill_suid(struct dentry *dentry)
979 */ 979 */
980static int wait_for_concurrent_writes(struct file *file) 980static int wait_for_concurrent_writes(struct file *file)
981{ 981{
982 struct inode *inode = file->f_path.dentry->d_inode; 982 struct inode *inode = file_inode(file);
983 static ino_t last_ino; 983 static ino_t last_ino;
984 static dev_t last_dev; 984 static dev_t last_dev;
985 int err = 0; 985 int err = 0;
@@ -1070,7 +1070,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1070 if (err) 1070 if (err)
1071 return err; 1071 return err;
1072 1072
1073 inode = file->f_path.dentry->d_inode; 1073 inode = file_inode(file);
1074 1074
1075 /* Get readahead parameters */ 1075 /* Get readahead parameters */
1076 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); 1076 ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
@@ -1205,7 +1205,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
1205 * send along the gid on create when it tries to implement 1205 * send along the gid on create when it tries to implement
1206 * setgid directories via NFS: 1206 * setgid directories via NFS:
1207 */ 1207 */
1208 if (current_fsuid() != 0) 1208 if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
1209 iap->ia_valid &= ~(ATTR_UID|ATTR_GID); 1209 iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1210 if (iap->ia_valid) 1210 if (iap->ia_valid)
1211 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); 1211 return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
@@ -1957,7 +1957,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
1957 offset = *offsetp; 1957 offset = *offsetp;
1958 1958
1959 while (1) { 1959 while (1) {
1960 struct inode *dir_inode = file->f_path.dentry->d_inode; 1960 struct inode *dir_inode = file_inode(file);
1961 unsigned int reclen; 1961 unsigned int reclen;
1962 1962
1963 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1963 cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -2150,7 +2150,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
2150 * with NFSv3. 2150 * with NFSv3.
2151 */ 2151 */
2152 if ((acc & NFSD_MAY_OWNER_OVERRIDE) && 2152 if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
2153 inode->i_uid == current_fsuid()) 2153 uid_eq(inode->i_uid, current_fsuid()))
2154 return 0; 2154 return 0;
2155 2155
2156 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ 2156 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 359594c393d2..5b5894159f22 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -6,6 +6,7 @@
6#define LINUX_NFSD_VFS_H 6#define LINUX_NFSD_VFS_H
7 7
8#include "nfsfh.h" 8#include "nfsfh.h"
9#include "nfsd.h"
9 10
10/* 11/*
11 * Flags for nfsd_permission 12 * Flags for nfsd_permission
@@ -125,4 +126,11 @@ static inline void fh_drop_write(struct svc_fh *fh)
125 } 126 }
126} 127}
127 128
129static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
130{
131 struct path p = {.mnt = fh->fh_export->ex_path.mnt,
132 .dentry = fh->fh_dentry};
133 return nfserrno(vfs_getattr(&p, stat));
134}
135
128#endif /* LINUX_NFSD_VFS_H */ 136#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 53b1863dd8f6..4f0481d63804 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -167,7 +167,7 @@ int nfssvc_encode_entry(void *, const char *name,
167int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *); 167int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
168 168
169/* Helper functions for NFSv2 ACL code */ 169/* Helper functions for NFSv2 ACL code */
170__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp); 170__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
171__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp); 171__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
172 172
173#endif /* LINUX_NFSD_H */ 173#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 7df980eb0562..b6d5542a4ac8 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -136,6 +136,7 @@ struct nfsd3_accessres {
136 __be32 status; 136 __be32 status;
137 struct svc_fh fh; 137 struct svc_fh fh;
138 __u32 access; 138 __u32 access;
139 struct kstat stat;
139}; 140};
140 141
141struct nfsd3_readlinkres { 142struct nfsd3_readlinkres {
@@ -225,6 +226,7 @@ struct nfsd3_getaclres {
225 int mask; 226 int mask;
226 struct posix_acl *acl_access; 227 struct posix_acl *acl_access;
227 struct posix_acl *acl_default; 228 struct posix_acl *acl_default;
229 struct kstat stat;
228}; 230};
229 231
230/* dummy type for release */ 232/* dummy type for release */
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 251da07b2a1d..80da8eb27393 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,5 @@
1config NILFS2_FS 1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support"
3 depends on EXPERIMENTAL
4 select CRC32 3 select CRC32
5 help 4 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous 5 NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index df1a7fb238d1..f30b017740a7 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -259,7 +259,7 @@ static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
259static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 259static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
260{ 260{
261 loff_t pos = filp->f_pos; 261 loff_t pos = filp->f_pos;
262 struct inode *inode = filp->f_dentry->d_inode; 262 struct inode *inode = file_inode(filp);
263 struct super_block *sb = inode->i_sb; 263 struct super_block *sb = inode->i_sb;
264 unsigned int offset = pos & ~PAGE_CACHE_MASK; 264 unsigned int offset = pos & ~PAGE_CACHE_MASK;
265 unsigned long n = pos >> PAGE_CACHE_SHIFT; 265 unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 61946883025c..08fdb77852ac 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -67,7 +67,7 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
67static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 67static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
68{ 68{
69 struct page *page = vmf->page; 69 struct page *page = vmf->page;
70 struct inode *inode = vma->vm_file->f_dentry->d_inode; 70 struct inode *inode = file_inode(vma->vm_file);
71 struct nilfs_transaction_info ti; 71 struct nilfs_transaction_info ti;
72 int ret = 0; 72 int ret = 0;
73 73
@@ -126,7 +126,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
126 nilfs_transaction_commit(inode->i_sb); 126 nilfs_transaction_commit(inode->i_sb);
127 127
128 mapped: 128 mapped:
129 wait_on_page_writeback(page); 129 wait_for_stable_page(page);
130 out: 130 out:
131 sb_end_pagefault(inode->i_sb); 131 sb_end_pagefault(inode->i_sb);
132 return block_page_mkwrite_return(ret); 132 return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..b44bdb291b84 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
664 if (ret < 0) 664 if (ret < 0)
665 printk(KERN_ERR "NILFS: GC failed during preparation: " 665 printk(KERN_ERR "NILFS: GC failed during preparation: "
666 "cannot read source blocks: err=%d\n", ret); 666 "cannot read source blocks: err=%d\n", ret);
667 else 667 else {
668 if (nilfs_sb_need_update(nilfs))
669 set_nilfs_discontinued(nilfs);
668 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 670 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
671 }
669 672
670 nilfs_remove_all_gcinodes(nilfs); 673 nilfs_remove_all_gcinodes(nilfs);
671 clear_nilfs_gc_running(nilfs); 674 clear_nilfs_gc_running(nilfs);
@@ -793,7 +796,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
793 796
794long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 797long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
795{ 798{
796 struct inode *inode = filp->f_dentry->d_inode; 799 struct inode *inode = file_inode(filp);
797 void __user *argp = (void __user *)arg; 800 void __user *argp = (void __user *)arg;
798 801
799 switch (cmd) { 802 switch (cmd) {
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 1d0c0b84c5a3..9de78f08989e 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -517,11 +517,11 @@ static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
517 517
518 if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { 518 if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) {
519 *lenp = NILFS_FID_SIZE_CONNECTABLE; 519 *lenp = NILFS_FID_SIZE_CONNECTABLE;
520 return 255; 520 return FILEID_INVALID;
521 } 521 }
522 if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { 522 if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) {
523 *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; 523 *lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
524 return 255; 524 return FILEID_INVALID;
525 } 525 }
526 526
527 fid->cno = root->cno; 527 fid->cno = root->cno;
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 08b886f119ce..2bfe6dc413a0 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -174,7 +174,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
174 struct dnotify_struct **prev; 174 struct dnotify_struct **prev;
175 struct inode *inode; 175 struct inode *inode;
176 176
177 inode = filp->f_path.dentry->d_inode; 177 inode = file_inode(filp);
178 if (!S_ISDIR(inode->i_mode)) 178 if (!S_ISDIR(inode->i_mode))
179 return; 179 return;
180 180
@@ -296,7 +296,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
296 } 296 }
297 297
298 /* dnotify only works on directories */ 298 /* dnotify only works on directories */
299 inode = filp->f_path.dentry->d_inode; 299 inode = file_inode(filp);
300 if (!S_ISDIR(inode->i_mode)) { 300 if (!S_ISDIR(inode->i_mode)) {
301 error = -ENOTDIR; 301 error = -ENOTDIR;
302 goto out_err; 302 goto out_err;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9ff4a5ee6e20..5d8444268a16 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -466,7 +466,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
466 466
467 ret = -ENOTDIR; 467 ret = -ENOTDIR;
468 if ((flags & FAN_MARK_ONLYDIR) && 468 if ((flags & FAN_MARK_ONLYDIR) &&
469 !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) { 469 !(S_ISDIR(file_inode(f.file)->i_mode))) {
470 fdput(f); 470 fdput(f);
471 goto out; 471 goto out;
472 } 472 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 6baadb5a8430..4bb21d67d9b1 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,7 +52,6 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
52void __fsnotify_update_child_dentry_flags(struct inode *inode) 52void __fsnotify_update_child_dentry_flags(struct inode *inode)
53{ 53{
54 struct dentry *alias; 54 struct dentry *alias;
55 struct hlist_node *p;
56 int watched; 55 int watched;
57 56
58 if (!S_ISDIR(inode->i_mode)) 57 if (!S_ISDIR(inode->i_mode))
@@ -64,7 +63,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
64 spin_lock(&inode->i_lock); 63 spin_lock(&inode->i_lock);
65 /* run all of the dentries associated with this inode. Since this is a 64 /* run all of the dentries associated with this inode. Since this is a
66 * directory, there damn well better only be one item on this list */ 65 * directory, there damn well better only be one item on this list */
67 hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { 66 hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
68 struct dentry *child; 67 struct dentry *child;
69 68
70 /* run all of the children of the original inode and fix their 69 /* run all of the children of the original inode and fix their
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index f31e90fc050d..74825be65b7b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -36,12 +36,11 @@
36static void fsnotify_recalc_inode_mask_locked(struct inode *inode) 36static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
37{ 37{
38 struct fsnotify_mark *mark; 38 struct fsnotify_mark *mark;
39 struct hlist_node *pos;
40 __u32 new_mask = 0; 39 __u32 new_mask = 0;
41 40
42 assert_spin_locked(&inode->i_lock); 41 assert_spin_locked(&inode->i_lock);
43 42
44 hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) 43 hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list)
45 new_mask |= mark->mask; 44 new_mask |= mark->mask;
46 inode->i_fsnotify_mask = new_mask; 45 inode->i_fsnotify_mask = new_mask;
47} 46}
@@ -87,11 +86,11 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
87void fsnotify_clear_marks_by_inode(struct inode *inode) 86void fsnotify_clear_marks_by_inode(struct inode *inode)
88{ 87{
89 struct fsnotify_mark *mark, *lmark; 88 struct fsnotify_mark *mark, *lmark;
90 struct hlist_node *pos, *n; 89 struct hlist_node *n;
91 LIST_HEAD(free_list); 90 LIST_HEAD(free_list);
92 91
93 spin_lock(&inode->i_lock); 92 spin_lock(&inode->i_lock);
94 hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) { 93 hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) {
95 list_add(&mark->i.free_i_list, &free_list); 94 list_add(&mark->i.free_i_list, &free_list);
96 hlist_del_init_rcu(&mark->i.i_list); 95 hlist_del_init_rcu(&mark->i.i_list);
97 fsnotify_get_mark(mark); 96 fsnotify_get_mark(mark);
@@ -129,11 +128,10 @@ static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
129 struct inode *inode) 128 struct inode *inode)
130{ 129{
131 struct fsnotify_mark *mark; 130 struct fsnotify_mark *mark;
132 struct hlist_node *pos;
133 131
134 assert_spin_locked(&inode->i_lock); 132 assert_spin_locked(&inode->i_lock);
135 133
136 hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) { 134 hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) {
137 if (mark->group == group) { 135 if (mark->group == group) {
138 fsnotify_get_mark(mark); 136 fsnotify_get_mark(mark);
139 return mark; 137 return mark;
@@ -194,8 +192,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
194 struct fsnotify_group *group, struct inode *inode, 192 struct fsnotify_group *group, struct inode *inode,
195 int allow_dups) 193 int allow_dups)
196{ 194{
197 struct fsnotify_mark *lmark; 195 struct fsnotify_mark *lmark, *last = NULL;
198 struct hlist_node *node, *last = NULL;
199 int ret = 0; 196 int ret = 0;
200 197
201 mark->flags |= FSNOTIFY_MARK_FLAG_INODE; 198 mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@@ -214,8 +211,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
214 } 211 }
215 212
216 /* should mark be in the middle of the current list? */ 213 /* should mark be in the middle of the current list? */
217 hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) { 214 hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) {
218 last = node; 215 last = lmark;
219 216
220 if ((lmark->group == group) && !allow_dups) { 217 if ((lmark->group == group) && !allow_dups) {
221 ret = -EEXIST; 218 ret = -EEXIST;
@@ -235,7 +232,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
235 232
236 BUG_ON(last == NULL); 233 BUG_ON(last == NULL);
237 /* mark should be the last entry. last is the current last entry */ 234 /* mark should be the last entry. last is the current last entry */
238 hlist_add_after_rcu(last, &mark->i.i_list); 235 hlist_add_after_rcu(&last->i.i_list, &mark->i.i_list);
239out: 236out:
240 fsnotify_recalc_inode_mask_locked(inode); 237 fsnotify_recalc_inode_mask_locked(inode);
241 spin_unlock(&inode->i_lock); 238 spin_unlock(&inode->i_lock);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 871569c7d609..4216308b81b4 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -197,7 +197,6 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
197{ 197{
198 /* ideally the idr is empty and we won't hit the BUG in the callback */ 198 /* ideally the idr is empty and we won't hit the BUG in the callback */
199 idr_for_each(&group->inotify_data.idr, idr_callback, group); 199 idr_for_each(&group->inotify_data.idr, idr_callback, group);
200 idr_remove_all(&group->inotify_data.idr);
201 idr_destroy(&group->inotify_data.idr); 200 idr_destroy(&group->inotify_data.idr);
202 atomic_dec(&group->inotify_data.user->inotify_devs); 201 atomic_dec(&group->inotify_data.user->inotify_devs);
203 free_uid(group->inotify_data.user); 202 free_uid(group->inotify_data.user);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 228a2c2ad8d7..e0f7c1241a6a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -364,22 +364,20 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
364{ 364{
365 int ret; 365 int ret;
366 366
367 do { 367 idr_preload(GFP_KERNEL);
368 if (unlikely(!idr_pre_get(idr, GFP_KERNEL))) 368 spin_lock(idr_lock);
369 return -ENOMEM;
370 369
371 spin_lock(idr_lock); 370 ret = idr_alloc(idr, i_mark, *last_wd + 1, 0, GFP_NOWAIT);
372 ret = idr_get_new_above(idr, i_mark, *last_wd + 1, 371 if (ret >= 0) {
373 &i_mark->wd);
374 /* we added the mark to the idr, take a reference */ 372 /* we added the mark to the idr, take a reference */
375 if (!ret) { 373 i_mark->wd = ret;
376 *last_wd = i_mark->wd; 374 *last_wd = i_mark->wd;
377 fsnotify_get_mark(&i_mark->fsn_mark); 375 fsnotify_get_mark(&i_mark->fsn_mark);
378 } 376 }
379 spin_unlock(idr_lock);
380 } while (ret == -EAGAIN);
381 377
382 return ret; 378 spin_unlock(idr_lock);
379 idr_preload_end();
380 return ret < 0 ? ret : 0;
383} 381}
384 382
385static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, 383static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
@@ -576,8 +574,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
576 574
577 /* don't allow invalid bits: we don't want flags set */ 575 /* don't allow invalid bits: we don't want flags set */
578 mask = inotify_arg_to_mask(arg); 576 mask = inotify_arg_to_mask(arg);
579 if (unlikely(!(mask & IN_ALL_EVENTS)))
580 return -EINVAL;
581 577
582 fsn_mark = fsnotify_find_inode_mark(group, inode); 578 fsn_mark = fsnotify_find_inode_mark(group, inode);
583 if (!fsn_mark) 579 if (!fsn_mark)
@@ -629,8 +625,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
629 625
630 /* don't allow invalid bits: we don't want flags set */ 626 /* don't allow invalid bits: we don't want flags set */
631 mask = inotify_arg_to_mask(arg); 627 mask = inotify_arg_to_mask(arg);
632 if (unlikely(!(mask & IN_ALL_EVENTS)))
633 return -EINVAL;
634 628
635 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 629 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
636 if (unlikely(!tmp_i_mark)) 630 if (unlikely(!tmp_i_mark))
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 4df58b8ea64a..68ca5a8704b5 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -33,12 +33,12 @@
33void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) 33void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
34{ 34{
35 struct fsnotify_mark *mark, *lmark; 35 struct fsnotify_mark *mark, *lmark;
36 struct hlist_node *pos, *n; 36 struct hlist_node *n;
37 struct mount *m = real_mount(mnt); 37 struct mount *m = real_mount(mnt);
38 LIST_HEAD(free_list); 38 LIST_HEAD(free_list);
39 39
40 spin_lock(&mnt->mnt_root->d_lock); 40 spin_lock(&mnt->mnt_root->d_lock);
41 hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) { 41 hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) {
42 list_add(&mark->m.free_m_list, &free_list); 42 list_add(&mark->m.free_m_list, &free_list);
43 hlist_del_init_rcu(&mark->m.m_list); 43 hlist_del_init_rcu(&mark->m.m_list);
44 fsnotify_get_mark(mark); 44 fsnotify_get_mark(mark);
@@ -71,12 +71,11 @@ static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
71{ 71{
72 struct mount *m = real_mount(mnt); 72 struct mount *m = real_mount(mnt);
73 struct fsnotify_mark *mark; 73 struct fsnotify_mark *mark;
74 struct hlist_node *pos;
75 __u32 new_mask = 0; 74 __u32 new_mask = 0;
76 75
77 assert_spin_locked(&mnt->mnt_root->d_lock); 76 assert_spin_locked(&mnt->mnt_root->d_lock);
78 77
79 hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) 78 hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list)
80 new_mask |= mark->mask; 79 new_mask |= mark->mask;
81 m->mnt_fsnotify_mask = new_mask; 80 m->mnt_fsnotify_mask = new_mask;
82} 81}
@@ -114,11 +113,10 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_
114{ 113{
115 struct mount *m = real_mount(mnt); 114 struct mount *m = real_mount(mnt);
116 struct fsnotify_mark *mark; 115 struct fsnotify_mark *mark;
117 struct hlist_node *pos;
118 116
119 assert_spin_locked(&mnt->mnt_root->d_lock); 117 assert_spin_locked(&mnt->mnt_root->d_lock);
120 118
121 hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) { 119 hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) {
122 if (mark->group == group) { 120 if (mark->group == group) {
123 fsnotify_get_mark(mark); 121 fsnotify_get_mark(mark);
124 return mark; 122 return mark;
@@ -153,8 +151,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
153 int allow_dups) 151 int allow_dups)
154{ 152{
155 struct mount *m = real_mount(mnt); 153 struct mount *m = real_mount(mnt);
156 struct fsnotify_mark *lmark; 154 struct fsnotify_mark *lmark, *last = NULL;
157 struct hlist_node *node, *last = NULL;
158 int ret = 0; 155 int ret = 0;
159 156
160 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; 157 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@@ -173,8 +170,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
173 } 170 }
174 171
175 /* should mark be in the middle of the current list? */ 172 /* should mark be in the middle of the current list? */
176 hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) { 173 hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) {
177 last = node; 174 last = lmark;
178 175
179 if ((lmark->group == group) && !allow_dups) { 176 if ((lmark->group == group) && !allow_dups) {
180 ret = -EEXIST; 177 ret = -EEXIST;
@@ -194,7 +191,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
194 191
195 BUG_ON(last == NULL); 192 BUG_ON(last == NULL);
196 /* mark should be the last entry. last is the current last entry */ 193 /* mark should be the last entry. last is the current last entry */
197 hlist_add_after_rcu(last, &mark->m.m_list); 194 hlist_add_after_rcu(&last->m.m_list, &mark->m.m_list);
198out: 195out:
199 fsnotify_recalc_vfsmount_mask_locked(mnt); 196 fsnotify_recalc_vfsmount_mask_locked(mnt);
200 spin_unlock(&mnt->mnt_root->d_lock); 197 spin_unlock(&mnt->mnt_root->d_lock);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 99e36107ff60..aa411c3f20e9 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1101,7 +1101,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1101{ 1101{
1102 s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; 1102 s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1103 loff_t fpos, i_size; 1103 loff_t fpos, i_size;
1104 struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode; 1104 struct inode *bmp_vi, *vdir = file_inode(filp);
1105 struct super_block *sb = vdir->i_sb; 1105 struct super_block *sb = vdir->i_sb;
1106 ntfs_inode *ndir = NTFS_I(vdir); 1106 ntfs_inode *ndir = NTFS_I(vdir);
1107 ntfs_volume *vol = NTFS_SB(sb); 1107 ntfs_volume *vol = NTFS_SB(sb);
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 260b16281fc3..8a404576fb26 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -65,7 +65,20 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
65 65
66 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 66 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
67 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 67 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
68 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); 68 switch(acl->a_entries[n].e_tag) {
69 case ACL_USER:
70 acl->a_entries[n].e_uid =
71 make_kuid(&init_user_ns,
72 le32_to_cpu(entry->e_id));
73 break;
74 case ACL_GROUP:
75 acl->a_entries[n].e_gid =
76 make_kgid(&init_user_ns,
77 le32_to_cpu(entry->e_id));
78 break;
79 default:
80 break;
81 }
69 value += sizeof(struct posix_acl_entry); 82 value += sizeof(struct posix_acl_entry);
70 83
71 } 84 }
@@ -91,7 +104,21 @@ static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
91 for (n = 0; n < acl->a_count; n++, entry++) { 104 for (n = 0; n < acl->a_count; n++, entry++) {
92 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 105 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
93 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 106 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
94 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); 107 switch(acl->a_entries[n].e_tag) {
108 case ACL_USER:
109 entry->e_id = cpu_to_le32(
110 from_kuid(&init_user_ns,
111 acl->a_entries[n].e_uid));
112 break;
113 case ACL_GROUP:
114 entry->e_id = cpu_to_le32(
115 from_kgid(&init_user_ns,
116 acl->a_entries[n].e_gid));
117 break;
118 default:
119 entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
120 break;
121 }
95 } 122 }
96 return ocfs2_acl; 123 return ocfs2_acl;
97} 124}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 31b9463fba1f..b8a9d87231b1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6751,8 +6751,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
6751 mlog_errno(ret); 6751 mlog_errno(ret);
6752 6752
6753out: 6753out:
6754 if (pages) 6754 kfree(pages);
6755 kfree(pages);
6756 6755
6757 return ret; 6756 return ret;
6758} 6757}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..20dfec72e903 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -569,7 +569,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
569 int ret, 569 int ret,
570 bool is_async) 570 bool is_async)
571{ 571{
572 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 572 struct inode *inode = file_inode(iocb->ki_filp);
573 int level; 573 int level;
574 wait_queue_head_t *wq = ocfs2_ioend_wq(inode); 574 wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
575 575
@@ -593,9 +593,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
593 level = ocfs2_iocb_rw_locked_level(iocb); 593 level = ocfs2_iocb_rw_locked_level(iocb);
594 ocfs2_rw_unlock(inode, level); 594 ocfs2_rw_unlock(inode, level);
595 595
596 inode_dio_done(inode);
596 if (is_async) 597 if (is_async)
597 aio_complete(iocb, ret, 0); 598 aio_complete(iocb, ret, 0);
598 inode_dio_done(inode);
599} 599}
600 600
601/* 601/*
@@ -626,7 +626,7 @@ static ssize_t ocfs2_direct_IO(int rw,
626 unsigned long nr_segs) 626 unsigned long nr_segs)
627{ 627{
628 struct file *file = iocb->ki_filp; 628 struct file *file = iocb->ki_filp;
629 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 629 struct inode *inode = file_inode(file)->i_mapping->host;
630 630
631 /* 631 /*
632 * Fallback to buffered I/O if we see an inode without 632 * Fallback to buffered I/O if we see an inode without
@@ -1194,6 +1194,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1194 goto out; 1194 goto out;
1195 } 1195 }
1196 } 1196 }
1197 wait_for_stable_page(wc->w_pages[i]);
1197 1198
1198 if (index == target_index) 1199 if (index == target_index)
1199 wc->w_target_page = wc->w_pages[i]; 1200 wc->w_target_page = wc->w_pages[i];
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f7c648d7d6bf..42252bf64b51 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1471,8 +1471,7 @@ static void o2hb_region_release(struct config_item *item)
1471 1471
1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); 1472 mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);
1473 1473
1474 if (reg->hr_tmp_block) 1474 kfree(reg->hr_tmp_block);
1475 kfree(reg->hr_tmp_block);
1476 1475
1477 if (reg->hr_slot_data) { 1476 if (reg->hr_slot_data) {
1478 for (i = 0; i < reg->hr_num_pages; i++) { 1477 for (i = 0; i < reg->hr_num_pages; i++) {
@@ -1486,8 +1485,7 @@ static void o2hb_region_release(struct config_item *item)
1486 if (reg->hr_bdev) 1485 if (reg->hr_bdev)
1487 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1486 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
1488 1487
1489 if (reg->hr_slots) 1488 kfree(reg->hr_slots);
1490 kfree(reg->hr_slots);
1491 1489
1492 kfree(reg->hr_db_regnum); 1490 kfree(reg->hr_db_regnum);
1493 kfree(reg->hr_db_livenodes); 1491 kfree(reg->hr_db_livenodes);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1bfe8802cc1e..aa88bd8bcedc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -304,28 +304,22 @@ static u8 o2net_num_from_nn(struct o2net_node *nn)
304 304
305static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw) 305static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw)
306{ 306{
307 int ret = 0; 307 int ret;
308
309 do {
310 if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) {
311 ret = -EAGAIN;
312 break;
313 }
314 spin_lock(&nn->nn_lock);
315 ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id);
316 if (ret == 0)
317 list_add_tail(&nsw->ns_node_item,
318 &nn->nn_status_list);
319 spin_unlock(&nn->nn_lock);
320 } while (ret == -EAGAIN);
321 308
322 if (ret == 0) { 309 spin_lock(&nn->nn_lock);
323 init_waitqueue_head(&nsw->ns_wq); 310 ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC);
324 nsw->ns_sys_status = O2NET_ERR_NONE; 311 if (ret >= 0) {
325 nsw->ns_status = 0; 312 nsw->ns_id = ret;
313 list_add_tail(&nsw->ns_node_item, &nn->nn_status_list);
326 } 314 }
315 spin_unlock(&nn->nn_lock);
316 if (ret < 0)
317 return ret;
327 318
328 return ret; 319 init_waitqueue_head(&nsw->ns_wq);
320 nsw->ns_sys_status = O2NET_ERR_NONE;
321 nsw->ns_status = 0;
322 return 0;
329} 323}
330 324
331static void o2net_complete_nsw_locked(struct o2net_node *nn, 325static void o2net_complete_nsw_locked(struct o2net_node *nn,
@@ -870,7 +864,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
870 /* we've had some trouble with handlers seemingly vanishing. */ 864 /* we've had some trouble with handlers seemingly vanishing. */
871 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p, 865 mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p,
872 &parent) == NULL, 866 &parent) == NULL,
873 "couldn't find handler we *just* registerd " 867 "couldn't find handler we *just* registered "
874 "for type %u key %08x\n", msg_type, key); 868 "for type %u key %08x\n", msg_type, key);
875 } 869 }
876 write_unlock(&o2net_handler_lock); 870 write_unlock(&o2net_handler_lock);
@@ -1165,10 +1159,8 @@ out:
1165 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ 1159 o2net_debug_del_nst(&nst); /* must be before dropping sc and node */
1166 if (sc) 1160 if (sc)
1167 sc_put(sc); 1161 sc_put(sc);
1168 if (vec) 1162 kfree(vec);
1169 kfree(vec); 1163 kfree(msg);
1170 if (msg)
1171 kfree(msg);
1172 o2net_complete_nsw(nn, &nsw, 0, 0, 0); 1164 o2net_complete_nsw(nn, &nsw, 0, 0, 0);
1173 return ret; 1165 return ret;
1174} 1166}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 8db4b58b2e4b..ef999729e274 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -169,11 +169,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
169 u64 parent_blkno, 169 u64 parent_blkno,
170 int skip_unhashed) 170 int skip_unhashed)
171{ 171{
172 struct hlist_node *p;
173 struct dentry *dentry; 172 struct dentry *dentry;
174 173
175 spin_lock(&inode->i_lock); 174 spin_lock(&inode->i_lock);
176 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { 175 hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
177 spin_lock(&dentry->d_lock); 176 spin_lock(&dentry->d_lock);
178 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 177 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
179 trace_ocfs2_find_local_alias(dentry->d_name.len, 178 trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8fe4e2892ab9..f1e1aed8f638 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -67,7 +67,6 @@
67#define NAMEI_RA_CHUNKS 2 67#define NAMEI_RA_CHUNKS 2
68#define NAMEI_RA_BLOCKS 4 68#define NAMEI_RA_BLOCKS 4
69#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) 69#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
70#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
71 70
72static unsigned char ocfs2_filetype_table[] = { 71static unsigned char ocfs2_filetype_table[] = {
73 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 72 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -2015,12 +2014,12 @@ int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
2015int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 2014int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
2016{ 2015{
2017 int error = 0; 2016 int error = 0;
2018 struct inode *inode = filp->f_path.dentry->d_inode; 2017 struct inode *inode = file_inode(filp);
2019 int lock_level = 0; 2018 int lock_level = 0;
2020 2019
2021 trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); 2020 trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
2022 2021
2023 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2022 error = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
2024 if (lock_level && error >= 0) { 2023 if (lock_level && error >= 0) {
2025 /* We release EX lock which used to update atime 2024 /* We release EX lock which used to update atime
2026 * and get PR lock again to reduce contention 2025 * and get PR lock again to reduce contention
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 9e89d70df337..dbb17c07656a 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -319,9 +319,7 @@ static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm)
319 if (dlm->master_hash) 319 if (dlm->master_hash)
320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES); 320 dlm_free_pagevec((void **)dlm->master_hash, DLM_HASH_PAGES);
321 321
322 if (dlm->name) 322 kfree(dlm->name);
323 kfree(dlm->name);
324
325 kfree(dlm); 323 kfree(dlm);
326} 324}
327 325
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 005261c333b0..33ecbe0e6734 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2020,7 +2020,7 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
2020 int ignore_higher, u8 request_from, u32 flags) 2020 int ignore_higher, u8 request_from, u32 flags)
2021{ 2021{
2022 struct dlm_work_item *item; 2022 struct dlm_work_item *item;
2023 item = kzalloc(sizeof(*item), GFP_NOFS); 2023 item = kzalloc(sizeof(*item), GFP_ATOMIC);
2024 if (!item) 2024 if (!item)
2025 return -ENOMEM; 2025 return -ENOMEM;
2026 2026
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 01ebfd0bdad7..eeac97bb3bfa 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2083,7 +2083,6 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2083 u8 dead_node, u8 new_master) 2083 u8 dead_node, u8 new_master)
2084{ 2084{
2085 int i; 2085 int i;
2086 struct hlist_node *hash_iter;
2087 struct hlist_head *bucket; 2086 struct hlist_head *bucket;
2088 struct dlm_lock_resource *res, *next; 2087 struct dlm_lock_resource *res, *next;
2089 2088
@@ -2114,7 +2113,7 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2114 * if necessary */ 2113 * if necessary */
2115 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 2114 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
2116 bucket = dlm_lockres_hash(dlm, i); 2115 bucket = dlm_lockres_hash(dlm, i);
2117 hlist_for_each_entry(res, hash_iter, bucket, hash_node) { 2116 hlist_for_each_entry(res, bucket, hash_node) {
2118 if (!(res->state & DLM_LOCK_RES_RECOVERING)) 2117 if (!(res->state & DLM_LOCK_RES_RECOVERING))
2119 continue; 2118 continue;
2120 2119
@@ -2273,7 +2272,6 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2273 2272
2274static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) 2273static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2275{ 2274{
2276 struct hlist_node *iter;
2277 struct dlm_lock_resource *res; 2275 struct dlm_lock_resource *res;
2278 int i; 2276 int i;
2279 struct hlist_head *bucket; 2277 struct hlist_head *bucket;
@@ -2299,7 +2297,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2299 */ 2297 */
2300 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 2298 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
2301 bucket = dlm_lockres_hash(dlm, i); 2299 bucket = dlm_lockres_hash(dlm, i);
2302 hlist_for_each_entry(res, iter, bucket, hash_node) { 2300 hlist_for_each_entry(res, bucket, hash_node) {
2303 /* always prune any $RECOVERY entries for dead nodes, 2301 /* always prune any $RECOVERY entries for dead nodes,
2304 * otherwise hangs can occur during later recovery */ 2302 * otherwise hangs can occur during later recovery */
2305 if (dlm_is_recovery_lock(res->lockname.name, 2303 if (dlm_is_recovery_lock(res->lockname.name,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 16b712d260d4..4c5fc8d77dc2 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -224,7 +224,7 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
224static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) 224static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
225{ 225{
226 int event = 0; 226 int event = 0;
227 struct inode *inode = file->f_path.dentry->d_inode; 227 struct inode *inode = file_inode(file);
228 struct dlmfs_inode_private *ip = DLMFS_I(inode); 228 struct dlmfs_inode_private *ip = DLMFS_I(inode);
229 229
230 poll_wait(file, &ip->ip_lockres.l_event, wait); 230 poll_wait(file, &ip->ip_lockres.l_event, wait);
@@ -245,7 +245,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
245 int bytes_left; 245 int bytes_left;
246 ssize_t readlen, got; 246 ssize_t readlen, got;
247 char *lvb_buf; 247 char *lvb_buf;
248 struct inode *inode = filp->f_path.dentry->d_inode; 248 struct inode *inode = file_inode(filp);
249 249
250 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", 250 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
251 inode->i_ino, count, *ppos); 251 inode->i_ino, count, *ppos);
@@ -293,7 +293,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
293 int bytes_left; 293 int bytes_left;
294 ssize_t writelen; 294 ssize_t writelen;
295 char *lvb_buf; 295 char *lvb_buf;
296 struct inode *inode = filp->f_path.dentry->d_inode; 296 struct inode *inode = file_inode(filp);
297 297
298 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", 298 mlog(0, "inode %lu, count = %zu, *ppos = %llu\n",
299 inode->i_ino, count, *ppos); 299 inode->i_ino, count, *ppos);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4f7795fb5fc0..12ae194ac943 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2045,8 +2045,8 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
2045 lvb->lvb_version = OCFS2_LVB_VERSION; 2045 lvb->lvb_version = OCFS2_LVB_VERSION;
2046 lvb->lvb_isize = cpu_to_be64(i_size_read(inode)); 2046 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
2047 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters); 2047 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
2048 lvb->lvb_iuid = cpu_to_be32(inode->i_uid); 2048 lvb->lvb_iuid = cpu_to_be32(i_uid_read(inode));
2049 lvb->lvb_igid = cpu_to_be32(inode->i_gid); 2049 lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
2050 lvb->lvb_imode = cpu_to_be16(inode->i_mode); 2050 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
2051 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink); 2051 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
2052 lvb->lvb_iatime_packed = 2052 lvb->lvb_iatime_packed =
@@ -2095,8 +2095,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2095 else 2095 else
2096 inode->i_blocks = ocfs2_inode_sector_count(inode); 2096 inode->i_blocks = ocfs2_inode_sector_count(inode);
2097 2097
2098 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2098 i_uid_write(inode, be32_to_cpu(lvb->lvb_iuid));
2099 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2099 i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
2100 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2100 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2101 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink)); 2101 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2102 ocfs2_unpack_timespec(&inode->i_atime, 2102 ocfs2_unpack_timespec(&inode->i_atime,
@@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2545 * everything is up to the caller :) */ 2545 * everything is up to the caller :) */
2546 status = ocfs2_should_refresh_lock_res(lockres); 2546 status = ocfs2_should_refresh_lock_res(lockres);
2547 if (status < 0) { 2547 if (status < 0) {
2548 ocfs2_cluster_unlock(osb, lockres, level);
2548 mlog_errno(status); 2549 mlog_errno(status);
2549 goto bail; 2550 goto bail;
2550 } 2551 }
@@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
2553 2554
2554 ocfs2_complete_lock_res_refresh(lockres, status); 2555 ocfs2_complete_lock_res_refresh(lockres, status);
2555 2556
2556 if (status < 0) 2557 if (status < 0) {
2558 ocfs2_cluster_unlock(osb, lockres, level);
2557 mlog_errno(status); 2559 mlog_errno(status);
2560 }
2558 ocfs2_track_lock_refresh(lockres); 2561 ocfs2_track_lock_refresh(lockres);
2559 } 2562 }
2560bail: 2563bail:
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 322216a5f0dd..29651167190d 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -195,11 +195,11 @@ static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len,
195 195
196 if (parent && (len < 6)) { 196 if (parent && (len < 6)) {
197 *max_len = 6; 197 *max_len = 6;
198 type = 255; 198 type = FILEID_INVALID;
199 goto bail; 199 goto bail;
200 } else if (len < 3) { 200 } else if (len < 3) {
201 *max_len = 3; 201 *max_len = 3;
202 type = 255; 202 type = FILEID_INVALID;
203 goto bail; 203 goto bail;
204 } 204 }
205 205
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f487aa343442..1c39efb71bab 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -282,8 +282,7 @@ search:
282 spin_unlock(&oi->ip_lock); 282 spin_unlock(&oi->ip_lock);
283 283
284out: 284out:
285 if (new_emi) 285 kfree(new_emi);
286 kfree(new_emi);
287} 286}
288 287
289static int ocfs2_last_eb_is_empty(struct inode *inode, 288static int ocfs2_last_eb_is_empty(struct inode *inode,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 37d313ede159..6474cb44004d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1116,7 +1116,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1116 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1116 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1117 dentry->d_name.len, dentry->d_name.name, 1117 dentry->d_name.len, dentry->d_name.name,
1118 attr->ia_valid, attr->ia_mode, 1118 attr->ia_valid, attr->ia_mode,
1119 attr->ia_uid, attr->ia_gid); 1119 from_kuid(&init_user_ns, attr->ia_uid),
1120 from_kgid(&init_user_ns, attr->ia_gid));
1120 1121
1121 /* ensuring we don't even attempt to truncate a symlink */ 1122 /* ensuring we don't even attempt to truncate a symlink */
1122 if (S_ISLNK(inode->i_mode)) 1123 if (S_ISLNK(inode->i_mode))
@@ -1174,14 +1175,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1174 } 1175 }
1175 } 1176 }
1176 1177
1177 if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 1178 if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
1178 (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 1179 (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
1179 /* 1180 /*
1180 * Gather pointers to quota structures so that allocation / 1181 * Gather pointers to quota structures so that allocation /
1181 * freeing of quota structures happens here and not inside 1182 * freeing of quota structures happens here and not inside
1182 * dquot_transfer() where we have problems with lock ordering 1183 * dquot_transfer() where we have problems with lock ordering
1183 */ 1184 */
1184 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid 1185 if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)
1185 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1186 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1186 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1187 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1187 transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); 1188 transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
@@ -1190,7 +1191,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1190 goto bail_unlock; 1191 goto bail_unlock;
1191 } 1192 }
1192 } 1193 }
1193 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid 1194 if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid)
1194 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1195 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1195 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1196 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1196 transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); 1197 transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
@@ -1949,7 +1950,7 @@ out:
1949int ocfs2_change_file_space(struct file *file, unsigned int cmd, 1950int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1950 struct ocfs2_space_resv *sr) 1951 struct ocfs2_space_resv *sr)
1951{ 1952{
1952 struct inode *inode = file->f_path.dentry->d_inode; 1953 struct inode *inode = file_inode(file);
1953 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1954 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1954 int ret; 1955 int ret;
1955 1956
@@ -1977,7 +1978,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1977static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, 1978static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
1978 loff_t len) 1979 loff_t len)
1979{ 1980{
1980 struct inode *inode = file->f_path.dentry->d_inode; 1981 struct inode *inode = file_inode(file);
1981 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1982 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1982 struct ocfs2_space_resv sr; 1983 struct ocfs2_space_resv sr;
1983 int change_size = 1; 1984 int change_size = 1;
@@ -2232,7 +2233,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2232 loff_t old_size, *ppos = &iocb->ki_pos; 2233 loff_t old_size, *ppos = &iocb->ki_pos;
2233 u32 old_clusters; 2234 u32 old_clusters;
2234 struct file *file = iocb->ki_filp; 2235 struct file *file = iocb->ki_filp;
2235 struct inode *inode = file->f_path.dentry->d_inode; 2236 struct inode *inode = file_inode(file);
2236 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2237 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2237 int full_coherency = !(osb->s_mount_opt & 2238 int full_coherency = !(osb->s_mount_opt &
2238 OCFS2_MOUNT_COHERENCY_BUFFERED); 2239 OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2516,7 +2517,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2516 unsigned int flags) 2517 unsigned int flags)
2517{ 2518{
2518 int ret = 0, lock_level = 0; 2519 int ret = 0, lock_level = 0;
2519 struct inode *inode = in->f_path.dentry->d_inode; 2520 struct inode *inode = file_inode(in);
2520 2521
2521 trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, 2522 trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
2522 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2523 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2526,7 +2527,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2526 /* 2527 /*
2527 * See the comment in ocfs2_file_aio_read() 2528 * See the comment in ocfs2_file_aio_read()
2528 */ 2529 */
2529 ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); 2530 ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
2530 if (ret < 0) { 2531 if (ret < 0) {
2531 mlog_errno(ret); 2532 mlog_errno(ret);
2532 goto bail; 2533 goto bail;
@@ -2546,7 +2547,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2546{ 2547{
2547 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; 2548 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
2548 struct file *filp = iocb->ki_filp; 2549 struct file *filp = iocb->ki_filp;
2549 struct inode *inode = filp->f_path.dentry->d_inode; 2550 struct inode *inode = file_inode(filp);
2550 2551
2551 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, 2552 trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
2552 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2553 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2589,7 +2590,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2589 * like i_size. This allows the checks down below 2590 * like i_size. This allows the checks down below
2590 * generic_file_aio_read() a chance of actually working. 2591 * generic_file_aio_read() a chance of actually working.
2591 */ 2592 */
2592 ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); 2593 ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
2593 if (ret < 0) { 2594 if (ret < 0) {
2594 mlog_errno(ret); 2595 mlog_errno(ret);
2595 goto bail; 2596 goto bail;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index d89e08a81eda..f87f9bd1edff 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -269,8 +269,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
269 inode->i_generation = le32_to_cpu(fe->i_generation); 269 inode->i_generation = le32_to_cpu(fe->i_generation);
270 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 270 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
271 inode->i_mode = le16_to_cpu(fe->i_mode); 271 inode->i_mode = le16_to_cpu(fe->i_mode);
272 inode->i_uid = le32_to_cpu(fe->i_uid); 272 i_uid_write(inode, le32_to_cpu(fe->i_uid));
273 inode->i_gid = le32_to_cpu(fe->i_gid); 273 i_gid_write(inode, le32_to_cpu(fe->i_gid));
274 274
275 /* Fast symlinks will have i_size but no allocated clusters. */ 275 /* Fast symlinks will have i_size but no allocated clusters. */
276 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { 276 if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
@@ -1259,8 +1259,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1259 1259
1260 fe->i_size = cpu_to_le64(i_size_read(inode)); 1260 fe->i_size = cpu_to_le64(i_size_read(inode));
1261 ocfs2_set_links_count(fe, inode->i_nlink); 1261 ocfs2_set_links_count(fe, inode->i_nlink);
1262 fe->i_uid = cpu_to_le32(inode->i_uid); 1262 fe->i_uid = cpu_to_le32(i_uid_read(inode));
1263 fe->i_gid = cpu_to_le32(inode->i_gid); 1263 fe->i_gid = cpu_to_le32(i_gid_read(inode));
1264 fe->i_mode = cpu_to_le16(inode->i_mode); 1264 fe->i_mode = cpu_to_le16(inode->i_mode);
1265 fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 1265 fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
1266 fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 1266 fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
@@ -1290,8 +1290,8 @@ void ocfs2_refresh_inode(struct inode *inode,
1290 ocfs2_set_inode_flags(inode); 1290 ocfs2_set_inode_flags(inode);
1291 i_size_write(inode, le64_to_cpu(fe->i_size)); 1291 i_size_write(inode, le64_to_cpu(fe->i_size));
1292 set_nlink(inode, ocfs2_read_links_count(fe)); 1292 set_nlink(inode, ocfs2_read_links_count(fe));
1293 inode->i_uid = le32_to_cpu(fe->i_uid); 1293 i_uid_write(inode, le32_to_cpu(fe->i_uid));
1294 inode->i_gid = le32_to_cpu(fe->i_gid); 1294 i_gid_write(inode, le32_to_cpu(fe->i_gid));
1295 inode->i_mode = le16_to_cpu(fe->i_mode); 1295 inode->i_mode = le16_to_cpu(fe->i_mode);
1296 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0) 1296 if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
1297 inode->i_blocks = 0; 1297 inode->i_blocks = 0;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index f20edcbfe700..752f0b26221d 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -881,7 +881,7 @@ bail:
881 881
882long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 882long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
883{ 883{
884 struct inode *inode = filp->f_path.dentry->d_inode; 884 struct inode *inode = file_inode(filp);
885 unsigned int flags; 885 unsigned int flags;
886 int new_clusters; 886 int new_clusters;
887 int status; 887 int status;
@@ -994,7 +994,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
994{ 994{
995 bool preserve; 995 bool preserve;
996 struct reflink_arguments args; 996 struct reflink_arguments args;
997 struct inode *inode = file->f_path.dentry->d_inode; 997 struct inode *inode = file_inode(file);
998 struct ocfs2_info info; 998 struct ocfs2_info info;
999 void __user *argp = (void __user *)arg; 999 void __user *argp = (void __user *)arg;
1000 1000
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 2dd36af79e26..8eccfabcd12e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1234,11 +1234,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
1234 /* Though we wish to avoid it, we are in fact safe in 1234 /* Though we wish to avoid it, we are in fact safe in
1235 * skipping local alloc cleanup as fsck.ocfs2 is more 1235 * skipping local alloc cleanup as fsck.ocfs2 is more
1236 * than capable of reclaiming unused space. */ 1236 * than capable of reclaiming unused space. */
1237 if (la_dinode) 1237 kfree(la_dinode);
1238 kfree(la_dinode); 1238 kfree(tl_dinode);
1239
1240 if (tl_dinode)
1241 kfree(tl_dinode);
1242 1239
1243 if (qrec) 1240 if (qrec)
1244 ocfs2_free_quota_recovery(qrec); 1241 ocfs2_free_quota_recovery(qrec);
@@ -1408,8 +1405,7 @@ bail:
1408 1405
1409 mutex_unlock(&osb->recovery_lock); 1406 mutex_unlock(&osb->recovery_lock);
1410 1407
1411 if (rm_quota) 1408 kfree(rm_quota);
1412 kfree(rm_quota);
1413 1409
1414 /* no one is callint kthread_stop() for us so the kthread() api 1410 /* no one is callint kthread_stop() for us so the kthread() api
1415 * requires that we call do_exit(). And it isn't exported, but 1411 * requires that we call do_exit(). And it isn't exported, but
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index a9f78c74d687..aebeacd807c3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -476,8 +476,7 @@ out:
476 if (local_alloc_inode) 476 if (local_alloc_inode)
477 iput(local_alloc_inode); 477 iput(local_alloc_inode);
478 478
479 if (alloc_copy) 479 kfree(alloc_copy);
480 kfree(alloc_copy);
481} 480}
482 481
483/* 482/*
@@ -534,7 +533,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
534 mlog_errno(status); 533 mlog_errno(status);
535 534
536bail: 535bail:
537 if ((status < 0) && (*alloc_copy)) { 536 if (status < 0) {
538 kfree(*alloc_copy); 537 kfree(*alloc_copy);
539 *alloc_copy = NULL; 538 *alloc_copy = NULL;
540 } 539 }
@@ -1290,8 +1289,7 @@ bail:
1290 if (main_bm_inode) 1289 if (main_bm_inode)
1291 iput(main_bm_inode); 1290 iput(main_bm_inode);
1292 1291
1293 if (alloc_copy) 1292 kfree(alloc_copy);
1294 kfree(alloc_copy);
1295 1293
1296 if (ac) 1294 if (ac)
1297 ocfs2_free_alloc_context(ac); 1295 ocfs2_free_alloc_context(ac);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 47a87dda54ce..10d66c75cecb 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -62,7 +62,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
62 struct page *page) 62 struct page *page)
63{ 63{
64 int ret = VM_FAULT_NOPAGE; 64 int ret = VM_FAULT_NOPAGE;
65 struct inode *inode = file->f_path.dentry->d_inode; 65 struct inode *inode = file_inode(file);
66 struct address_space *mapping = inode->i_mapping; 66 struct address_space *mapping = inode->i_mapping;
67 loff_t pos = page_offset(page); 67 loff_t pos = page_offset(page);
68 unsigned int len = PAGE_CACHE_SIZE; 68 unsigned int len = PAGE_CACHE_SIZE;
@@ -131,7 +131,7 @@ out:
131static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 131static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
132{ 132{
133 struct page *page = vmf->page; 133 struct page *page = vmf->page;
134 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 134 struct inode *inode = file_inode(vma->vm_file);
135 struct buffer_head *di_bh = NULL; 135 struct buffer_head *di_bh = NULL;
136 sigset_t oldset; 136 sigset_t oldset;
137 int ret; 137 int ret;
@@ -180,13 +180,13 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
180{ 180{
181 int ret = 0, lock_level = 0; 181 int ret = 0, lock_level = 0;
182 182
183 ret = ocfs2_inode_lock_atime(file->f_dentry->d_inode, 183 ret = ocfs2_inode_lock_atime(file_inode(file),
184 file->f_vfsmnt, &lock_level); 184 file->f_path.mnt, &lock_level);
185 if (ret < 0) { 185 if (ret < 0) {
186 mlog_errno(ret); 186 mlog_errno(ret);
187 goto out; 187 goto out;
188 } 188 }
189 ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); 189 ocfs2_inode_unlock(file_inode(file), lock_level);
190out: 190out:
191 vma->vm_ops = &ocfs2_file_vm_ops; 191 vma->vm_ops = &ocfs2_file_vm_ops;
192 return 0; 192 return 0;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 6083432f667e..9f8dcadd9a50 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1055,7 +1055,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
1055{ 1055{
1056 int status; 1056 int status;
1057 1057
1058 struct inode *inode = filp->f_path.dentry->d_inode; 1058 struct inode *inode = file_inode(filp);
1059 struct ocfs2_move_extents range; 1059 struct ocfs2_move_extents range;
1060 struct ocfs2_move_extents_context *context = NULL; 1060 struct ocfs2_move_extents_context *context = NULL;
1061 1061
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f1fd0741162b..04ee1b57c243 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -512,8 +512,8 @@ static int __ocfs2_mknod_locked(struct inode *dir,
512 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc); 512 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
513 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 513 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
514 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 514 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
515 fe->i_uid = cpu_to_le32(inode->i_uid); 515 fe->i_uid = cpu_to_le32(i_uid_read(inode));
516 fe->i_gid = cpu_to_le32(inode->i_gid); 516 fe->i_gid = cpu_to_le32(i_gid_read(inode));
517 fe->i_mode = cpu_to_le16(inode->i_mode); 517 fe->i_mode = cpu_to_le16(inode->i_mode);
518 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 518 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
519 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); 519 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30a055049e16..998b17eda09d 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2927,7 +2927,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2927 u32 new_cluster, u32 new_len) 2927 u32 new_cluster, u32 new_len)
2928{ 2928{
2929 int ret = 0, partial; 2929 int ret = 0, partial;
2930 struct inode *inode = file->f_path.dentry->d_inode; 2930 struct inode *inode = file_inode(file);
2931 struct ocfs2_caching_info *ci = INODE_CACHE(inode); 2931 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
2932 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2932 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2933 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2933 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
@@ -3020,7 +3020,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3020 u32 new_cluster, u32 new_len) 3020 u32 new_cluster, u32 new_len)
3021{ 3021{
3022 int ret = 0; 3022 int ret = 0;
3023 struct inode *inode = file->f_path.dentry->d_inode; 3023 struct inode *inode = file_inode(file);
3024 struct super_block *sb = inode->i_sb; 3024 struct super_block *sb = inode->i_sb;
3025 struct ocfs2_caching_info *ci = INODE_CACHE(inode); 3025 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
3026 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); 3026 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@@ -4407,7 +4407,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4407 * rights to do so. 4407 * rights to do so.
4408 */ 4408 */
4409 if (preserve) { 4409 if (preserve) {
4410 if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN)) 4410 if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_CHOWN))
4411 return -EPERM; 4411 return -EPERM;
4412 if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN)) 4412 if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
4413 return -EPERM; 4413 return -EPERM;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 94368017edb3..bf1f8930456f 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -376,7 +376,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); 376 dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
377 377
378out_free: 378out_free:
379 if (rc && conn->cc_private) 379 if (rc)
380 kfree(conn->cc_private); 380 kfree(conn->cc_private);
381 381
382out: 382out:
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f169da4624fd..b7e74b580c0f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
642 * cluster groups will be staying in cache for the duration of 642 * cluster groups will be staying in cache for the duration of
643 * this operation. 643 * this operation.
644 */ 644 */
645 ac->ac_allow_chain_relink = 0; 645 ac->ac_disable_chain_relink = 1;
646 646
647 /* Claim the first region */ 647 /* Claim the first region */
648 status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, 648 status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
@@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1823 * Do this *after* figuring out how many bits we're taking out 1823 * Do this *after* figuring out how many bits we're taking out
1824 * of our target group. 1824 * of our target group.
1825 */ 1825 */
1826 if (ac->ac_allow_chain_relink && 1826 if (!ac->ac_disable_chain_relink &&
1827 (prev_group_bh) && 1827 (prev_group_bh) &&
1828 (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { 1828 (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
1829 status = ocfs2_relink_block_group(handle, alloc_inode, 1829 status = ocfs2_relink_block_group(handle, alloc_inode,
@@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1928 1928
1929 victim = ocfs2_find_victim_chain(cl); 1929 victim = ocfs2_find_victim_chain(cl);
1930 ac->ac_chain = victim; 1930 ac->ac_chain = victim;
1931 ac->ac_allow_chain_relink = 1;
1932 1931
1933 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1932 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1934 res, &bits_left); 1933 res, &bits_left);
@@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1947 * searching each chain in order. Don't allow chain relinking 1946 * searching each chain in order. Don't allow chain relinking
1948 * because we only calculate enough journal credits for one 1947 * because we only calculate enough journal credits for one
1949 * relink per alloc. */ 1948 * relink per alloc. */
1950 ac->ac_allow_chain_relink = 0; 1949 ac->ac_disable_chain_relink = 1;
1951 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { 1950 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1952 if (i == victim) 1951 if (i == victim)
1953 continue; 1952 continue;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index b8afabfeede4..a36d0aa50911 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,7 +49,7 @@ struct ocfs2_alloc_context {
49 49
50 /* these are used by the chain search */ 50 /* these are used by the chain search */
51 u16 ac_chain; 51 u16 ac_chain;
52 int ac_allow_chain_relink; 52 int ac_disable_chain_relink;
53 group_search_t *ac_group_search; 53 group_search_t *ac_group_search;
54 54
55 u64 ac_last_group; 55 u64 ac_last_group;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0e91ec22a940..9b6910dec4ba 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2525,8 +2525,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2525 mlog_errno(status); 2525 mlog_errno(status);
2526 2526
2527finally: 2527finally:
2528 if (local_alloc) 2528 kfree(local_alloc);
2529 kfree(local_alloc);
2530 2529
2531 if (status) 2530 if (status)
2532 mlog_errno(status); 2531 mlog_errno(status);
@@ -2553,8 +2552,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
2553 * we free it here. 2552 * we free it here.
2554 */ 2553 */
2555 kfree(osb->journal); 2554 kfree(osb->journal);
2556 if (osb->local_alloc_copy) 2555 kfree(osb->local_alloc_copy);
2557 kfree(osb->local_alloc_copy);
2558 kfree(osb->uuid_str); 2556 kfree(osb->uuid_str);
2559 ocfs2_put_dlm_debug(osb->osb_dlm_debug); 2557 ocfs2_put_dlm_debug(osb->osb_dlm_debug);
2560 memset(osb, 0, sizeof(struct ocfs2_super)); 2558 memset(osb, 0, sizeof(struct ocfs2_super));
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index f1fbb4b552ad..66edce7ecfd7 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -57,7 +57,7 @@
57static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) 57static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page)
58{ 58{
59 struct inode *inode = page->mapping->host; 59 struct inode *inode = page->mapping->host;
60 struct buffer_head *bh; 60 struct buffer_head *bh = NULL;
61 int status = ocfs2_read_inode_block(inode, &bh); 61 int status = ocfs2_read_inode_block(inode, &bh);
62 struct ocfs2_dinode *fe; 62 struct ocfs2_dinode *fe;
63 const char *link; 63 const char *link;
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 3d635f4bbb20..f053688d22a3 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -91,8 +91,7 @@ static struct inode **get_local_system_inode(struct ocfs2_super *osb,
91 } else 91 } else
92 osb->local_system_inodes = local_system_inodes; 92 osb->local_system_inodes = local_system_inodes;
93 spin_unlock(&osb->osb_lock); 93 spin_unlock(&osb->osb_lock);
94 if (unlikely(free)) 94 kfree(free);
95 kfree(free);
96 } 95 }
97 96
98 index = (slot * NUM_LOCAL_SYSTEM_INODES) + 97 index = (slot * NUM_LOCAL_SYSTEM_INODES) +
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 0ba9ea1e7961..2e3ea308c144 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
7189 struct buffer_head *dir_bh = NULL; 7189 struct buffer_head *dir_bh = NULL;
7190 7190
7191 ret = ocfs2_init_security_get(inode, dir, qstr, NULL); 7191 ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
7192 if (!ret) { 7192 if (ret) {
7193 mlog_errno(ret); 7193 mlog_errno(ret);
7194 goto leave; 7194 goto leave;
7195 } 7195 }
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index fb5b3ff79dc6..acbaebcad3a8 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -330,7 +330,7 @@ int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
330static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, 330static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
331 u64 fsblock, int hindex) 331 u64 fsblock, int hindex)
332{ 332{
333 struct inode *dir = filp->f_dentry->d_inode; 333 struct inode *dir = file_inode(filp);
334 struct buffer_head *bh; 334 struct buffer_head *bh;
335 struct omfs_inode *oi; 335 struct omfs_inode *oi;
336 u64 self; 336 u64 self;
@@ -405,7 +405,7 @@ out:
405 405
406static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 406static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
407{ 407{
408 struct inode *dir = filp->f_dentry->d_inode; 408 struct inode *dir = file_inode(filp);
409 struct buffer_head *bh; 409 struct buffer_head *bh;
410 loff_t offset, res; 410 loff_t offset, res;
411 unsigned int hchain, hindex; 411 unsigned int hchain, hindex;
diff --git a/fs/open.c b/fs/open.c
index 9b33c0cbfacf..62f907e3bc36 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -228,7 +228,7 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
228 228
229int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 229int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
230{ 230{
231 struct inode *inode = file->f_path.dentry->d_inode; 231 struct inode *inode = file_inode(file);
232 long ret; 232 long ret;
233 233
234 if (offset < 0 || len <= 0) 234 if (offset < 0 || len <= 0)
@@ -426,7 +426,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
426 if (!f.file) 426 if (!f.file)
427 goto out; 427 goto out;
428 428
429 inode = f.file->f_path.dentry->d_inode; 429 inode = file_inode(f.file);
430 430
431 error = -ENOTDIR; 431 error = -ENOTDIR;
432 if (!S_ISDIR(inode->i_mode)) 432 if (!S_ISDIR(inode->i_mode))
@@ -689,7 +689,7 @@ static int do_dentry_open(struct file *f,
689 f->f_mode = FMODE_PATH; 689 f->f_mode = FMODE_PATH;
690 690
691 path_get(&f->f_path); 691 path_get(&f->f_path);
692 inode = f->f_path.dentry->d_inode; 692 inode = file_inode(f);
693 if (f->f_mode & FMODE_WRITE) { 693 if (f->f_mode & FMODE_WRITE) {
694 error = __get_file_write_access(inode, f->f_path.mnt); 694 error = __get_file_write_access(inode, f->f_path.mnt);
695 if (error) 695 if (error)
@@ -699,7 +699,6 @@ static int do_dentry_open(struct file *f,
699 } 699 }
700 700
701 f->f_mapping = inode->i_mapping; 701 f->f_mapping = inode->i_mapping;
702 f->f_pos = 0;
703 file_sb_list_add(f, inode->i_sb); 702 file_sb_list_add(f, inode->i_sb);
704 703
705 if (unlikely(f->f_mode & FMODE_PATH)) { 704 if (unlikely(f->f_mode & FMODE_PATH)) {
@@ -810,23 +809,22 @@ struct file *dentry_open(const struct path *path, int flags,
810 /* We must always pass in a valid mount pointer. */ 809 /* We must always pass in a valid mount pointer. */
811 BUG_ON(!path->mnt); 810 BUG_ON(!path->mnt);
812 811
813 error = -ENFILE;
814 f = get_empty_filp(); 812 f = get_empty_filp();
815 if (f == NULL) 813 if (!IS_ERR(f)) {
816 return ERR_PTR(error); 814 f->f_flags = flags;
817 815 f->f_path = *path;
818 f->f_flags = flags; 816 error = do_dentry_open(f, NULL, cred);
819 f->f_path = *path; 817 if (!error) {
820 error = do_dentry_open(f, NULL, cred); 818 /* from now on we need fput() to dispose of f */
821 if (!error) { 819 error = open_check_o_direct(f);
822 error = open_check_o_direct(f); 820 if (error) {
823 if (error) { 821 fput(f);
824 fput(f); 822 f = ERR_PTR(error);
823 }
824 } else {
825 put_filp(f);
825 f = ERR_PTR(error); 826 f = ERR_PTR(error);
826 } 827 }
827 } else {
828 put_filp(f);
829 f = ERR_PTR(error);
830 } 828 }
831 return f; 829 return f;
832} 830}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 2ad080faca34..ae47fa7efb9d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -262,7 +262,7 @@ found:
262 262
263static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) 263static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
264{ 264{
265 struct inode *inode = filp->f_path.dentry->d_inode; 265 struct inode *inode = file_inode(filp);
266 struct op_inode_info *oi = OP_I(inode); 266 struct op_inode_info *oi = OP_I(inode);
267 struct device_node *dp = oi->u.node; 267 struct device_node *dp = oi->u.node;
268 struct device_node *child; 268 struct device_node *child;
diff --git a/fs/pipe.c b/fs/pipe.c
index bd3479db4b62..64a494cef0a0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -361,7 +361,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
361 unsigned long nr_segs, loff_t pos) 361 unsigned long nr_segs, loff_t pos)
362{ 362{
363 struct file *filp = iocb->ki_filp; 363 struct file *filp = iocb->ki_filp;
364 struct inode *inode = filp->f_path.dentry->d_inode; 364 struct inode *inode = file_inode(filp);
365 struct pipe_inode_info *pipe; 365 struct pipe_inode_info *pipe;
366 int do_wakeup; 366 int do_wakeup;
367 ssize_t ret; 367 ssize_t ret;
@@ -486,7 +486,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
486 unsigned long nr_segs, loff_t ppos) 486 unsigned long nr_segs, loff_t ppos)
487{ 487{
488 struct file *filp = iocb->ki_filp; 488 struct file *filp = iocb->ki_filp;
489 struct inode *inode = filp->f_path.dentry->d_inode; 489 struct inode *inode = file_inode(filp);
490 struct pipe_inode_info *pipe; 490 struct pipe_inode_info *pipe;
491 ssize_t ret; 491 ssize_t ret;
492 int do_wakeup; 492 int do_wakeup;
@@ -677,7 +677,7 @@ bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
677 677
678static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 678static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
679{ 679{
680 struct inode *inode = filp->f_path.dentry->d_inode; 680 struct inode *inode = file_inode(filp);
681 struct pipe_inode_info *pipe; 681 struct pipe_inode_info *pipe;
682 int count, buf, nrbufs; 682 int count, buf, nrbufs;
683 683
@@ -705,7 +705,7 @@ static unsigned int
705pipe_poll(struct file *filp, poll_table *wait) 705pipe_poll(struct file *filp, poll_table *wait)
706{ 706{
707 unsigned int mask; 707 unsigned int mask;
708 struct inode *inode = filp->f_path.dentry->d_inode; 708 struct inode *inode = file_inode(filp);
709 struct pipe_inode_info *pipe = inode->i_pipe; 709 struct pipe_inode_info *pipe = inode->i_pipe;
710 int nrbufs; 710 int nrbufs;
711 711
@@ -758,7 +758,7 @@ pipe_release(struct inode *inode, int decr, int decw)
758static int 758static int
759pipe_read_fasync(int fd, struct file *filp, int on) 759pipe_read_fasync(int fd, struct file *filp, int on)
760{ 760{
761 struct inode *inode = filp->f_path.dentry->d_inode; 761 struct inode *inode = file_inode(filp);
762 int retval; 762 int retval;
763 763
764 mutex_lock(&inode->i_mutex); 764 mutex_lock(&inode->i_mutex);
@@ -772,7 +772,7 @@ pipe_read_fasync(int fd, struct file *filp, int on)
772static int 772static int
773pipe_write_fasync(int fd, struct file *filp, int on) 773pipe_write_fasync(int fd, struct file *filp, int on)
774{ 774{
775 struct inode *inode = filp->f_path.dentry->d_inode; 775 struct inode *inode = file_inode(filp);
776 int retval; 776 int retval;
777 777
778 mutex_lock(&inode->i_mutex); 778 mutex_lock(&inode->i_mutex);
@@ -786,7 +786,7 @@ pipe_write_fasync(int fd, struct file *filp, int on)
786static int 786static int
787pipe_rdwr_fasync(int fd, struct file *filp, int on) 787pipe_rdwr_fasync(int fd, struct file *filp, int on)
788{ 788{
789 struct inode *inode = filp->f_path.dentry->d_inode; 789 struct inode *inode = file_inode(filp);
790 struct pipe_inode_info *pipe = inode->i_pipe; 790 struct pipe_inode_info *pipe = inode->i_pipe;
791 int retval; 791 int retval;
792 792
@@ -1037,13 +1037,13 @@ int create_pipe_files(struct file **res, int flags)
1037 1037
1038 err = -ENFILE; 1038 err = -ENFILE;
1039 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); 1039 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1040 if (!f) 1040 if (IS_ERR(f))
1041 goto err_dentry; 1041 goto err_dentry;
1042 1042
1043 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); 1043 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
1044 1044
1045 res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); 1045 res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
1046 if (!res[0]) 1046 if (IS_ERR(res[0]))
1047 goto err_file; 1047 goto err_file;
1048 1048
1049 path_get(&path); 1049 path_get(&path);
@@ -1226,7 +1226,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1226 */ 1226 */
1227struct pipe_inode_info *get_pipe_info(struct file *file) 1227struct pipe_inode_info *get_pipe_info(struct file *file)
1228{ 1228{
1229 struct inode *i = file->f_path.dentry->d_inode; 1229 struct inode *i = file_inode(file);
1230 1230
1231 return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; 1231 return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
1232} 1232}
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 981b05601931..712f24db9600 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,8 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o fd.o 11 fd.o
12proc-$(CONFIG_TTY) += proc_tty.o
12proc-y += cmdline.o 13proc-y += cmdline.o
13proc-y += consoles.o 14proc-y += consoles.o
14proc-y += cpuinfo.o 15proc-y += cpuinfo.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 6a91e6ffbcbd..f7ed9ee46eb9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
449 do { 449 do {
450 min_flt += t->min_flt; 450 min_flt += t->min_flt;
451 maj_flt += t->maj_flt; 451 maj_flt += t->maj_flt;
452 gtime += t->gtime; 452 gtime += task_gtime(t);
453 t = next_thread(t); 453 t = next_thread(t);
454 } while (t != task); 454 } while (t != task);
455 455
@@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
472 min_flt = task->min_flt; 472 min_flt = task->min_flt;
473 maj_flt = task->maj_flt; 473 maj_flt = task->maj_flt;
474 task_cputime_adjusted(task, &utime, &stime); 474 task_cputime_adjusted(task, &utime, &stime);
475 gtime = task->gtime; 475 gtime = task_gtime(task);
476 } 476 }
477 477
478 /* scale priority and nice values from timeslices to -20..20 */ 478 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b43ff77a51e..69078c7cef1f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -73,6 +73,7 @@
73#include <linux/security.h> 73#include <linux/security.h>
74#include <linux/ptrace.h> 74#include <linux/ptrace.h>
75#include <linux/tracehook.h> 75#include <linux/tracehook.h>
76#include <linux/printk.h>
76#include <linux/cgroup.h> 77#include <linux/cgroup.h>
77#include <linux/cpuset.h> 78#include <linux/cpuset.h>
78#include <linux/audit.h> 79#include <linux/audit.h>
@@ -383,7 +384,7 @@ static int lstats_open(struct inode *inode, struct file *file)
383static ssize_t lstats_write(struct file *file, const char __user *buf, 384static ssize_t lstats_write(struct file *file, const char __user *buf,
384 size_t count, loff_t *offs) 385 size_t count, loff_t *offs)
385{ 386{
386 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 387 struct task_struct *task = get_proc_task(file_inode(file));
387 388
388 if (!task) 389 if (!task)
389 return -ESRCH; 390 return -ESRCH;
@@ -602,7 +603,7 @@ static const struct inode_operations proc_def_inode_operations = {
602static ssize_t proc_info_read(struct file * file, char __user * buf, 603static ssize_t proc_info_read(struct file * file, char __user * buf,
603 size_t count, loff_t *ppos) 604 size_t count, loff_t *ppos)
604{ 605{
605 struct inode * inode = file->f_path.dentry->d_inode; 606 struct inode * inode = file_inode(file);
606 unsigned long page; 607 unsigned long page;
607 ssize_t length; 608 ssize_t length;
608 struct task_struct *task = get_proc_task(inode); 609 struct task_struct *task = get_proc_task(inode);
@@ -668,7 +669,7 @@ static const struct file_operations proc_single_file_operations = {
668 669
669static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) 670static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
670{ 671{
671 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 672 struct task_struct *task = get_proc_task(file_inode(file));
672 struct mm_struct *mm; 673 struct mm_struct *mm;
673 674
674 if (!task) 675 if (!task)
@@ -869,7 +870,7 @@ static const struct file_operations proc_environ_operations = {
869static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 870static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
870 loff_t *ppos) 871 loff_t *ppos)
871{ 872{
872 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 873 struct task_struct *task = get_proc_task(file_inode(file));
873 char buffer[PROC_NUMBUF]; 874 char buffer[PROC_NUMBUF];
874 int oom_adj = OOM_ADJUST_MIN; 875 int oom_adj = OOM_ADJUST_MIN;
875 size_t len; 876 size_t len;
@@ -916,7 +917,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
916 goto out; 917 goto out;
917 } 918 }
918 919
919 task = get_proc_task(file->f_path.dentry->d_inode); 920 task = get_proc_task(file_inode(file));
920 if (!task) { 921 if (!task) {
921 err = -ESRCH; 922 err = -ESRCH;
922 goto out; 923 goto out;
@@ -952,7 +953,7 @@ static ssize_t oom_adj_write(struct file *file, const char __user *buf,
952 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 953 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
953 * /proc/pid/oom_score_adj instead. 954 * /proc/pid/oom_score_adj instead.
954 */ 955 */
955 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 956 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
956 current->comm, task_pid_nr(current), task_pid_nr(task), 957 current->comm, task_pid_nr(current), task_pid_nr(task),
957 task_pid_nr(task)); 958 task_pid_nr(task));
958 959
@@ -976,7 +977,7 @@ static const struct file_operations proc_oom_adj_operations = {
976static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 977static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
977 size_t count, loff_t *ppos) 978 size_t count, loff_t *ppos)
978{ 979{
979 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 980 struct task_struct *task = get_proc_task(file_inode(file));
980 char buffer[PROC_NUMBUF]; 981 char buffer[PROC_NUMBUF];
981 short oom_score_adj = OOM_SCORE_ADJ_MIN; 982 short oom_score_adj = OOM_SCORE_ADJ_MIN;
982 unsigned long flags; 983 unsigned long flags;
@@ -1019,7 +1020,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1019 goto out; 1020 goto out;
1020 } 1021 }
1021 1022
1022 task = get_proc_task(file->f_path.dentry->d_inode); 1023 task = get_proc_task(file_inode(file));
1023 if (!task) { 1024 if (!task) {
1024 err = -ESRCH; 1025 err = -ESRCH;
1025 goto out; 1026 goto out;
@@ -1067,7 +1068,7 @@ static const struct file_operations proc_oom_score_adj_operations = {
1067static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1068static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1068 size_t count, loff_t *ppos) 1069 size_t count, loff_t *ppos)
1069{ 1070{
1070 struct inode * inode = file->f_path.dentry->d_inode; 1071 struct inode * inode = file_inode(file);
1071 struct task_struct *task = get_proc_task(inode); 1072 struct task_struct *task = get_proc_task(inode);
1072 ssize_t length; 1073 ssize_t length;
1073 char tmpbuf[TMPBUFLEN]; 1074 char tmpbuf[TMPBUFLEN];
@@ -1084,7 +1085,7 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1084static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1085static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1085 size_t count, loff_t *ppos) 1086 size_t count, loff_t *ppos)
1086{ 1087{
1087 struct inode * inode = file->f_path.dentry->d_inode; 1088 struct inode * inode = file_inode(file);
1088 char *page, *tmp; 1089 char *page, *tmp;
1089 ssize_t length; 1090 ssize_t length;
1090 uid_t loginuid; 1091 uid_t loginuid;
@@ -1142,7 +1143,7 @@ static const struct file_operations proc_loginuid_operations = {
1142static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1143static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1143 size_t count, loff_t *ppos) 1144 size_t count, loff_t *ppos)
1144{ 1145{
1145 struct inode * inode = file->f_path.dentry->d_inode; 1146 struct inode * inode = file_inode(file);
1146 struct task_struct *task = get_proc_task(inode); 1147 struct task_struct *task = get_proc_task(inode);
1147 ssize_t length; 1148 ssize_t length;
1148 char tmpbuf[TMPBUFLEN]; 1149 char tmpbuf[TMPBUFLEN];
@@ -1165,7 +1166,7 @@ static const struct file_operations proc_sessionid_operations = {
1165static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1166static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
1166 size_t count, loff_t *ppos) 1167 size_t count, loff_t *ppos)
1167{ 1168{
1168 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 1169 struct task_struct *task = get_proc_task(file_inode(file));
1169 char buffer[PROC_NUMBUF]; 1170 char buffer[PROC_NUMBUF];
1170 size_t len; 1171 size_t len;
1171 int make_it_fail; 1172 int make_it_fail;
@@ -1197,7 +1198,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
1197 make_it_fail = simple_strtol(strstrip(buffer), &end, 0); 1198 make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
1198 if (*end) 1199 if (*end)
1199 return -EINVAL; 1200 return -EINVAL;
1200 task = get_proc_task(file->f_dentry->d_inode); 1201 task = get_proc_task(file_inode(file));
1201 if (!task) 1202 if (!task)
1202 return -ESRCH; 1203 return -ESRCH;
1203 task->make_it_fail = make_it_fail; 1204 task->make_it_fail = make_it_fail;
@@ -1237,7 +1238,7 @@ static ssize_t
1237sched_write(struct file *file, const char __user *buf, 1238sched_write(struct file *file, const char __user *buf,
1238 size_t count, loff_t *offset) 1239 size_t count, loff_t *offset)
1239{ 1240{
1240 struct inode *inode = file->f_path.dentry->d_inode; 1241 struct inode *inode = file_inode(file);
1241 struct task_struct *p; 1242 struct task_struct *p;
1242 1243
1243 p = get_proc_task(inode); 1244 p = get_proc_task(inode);
@@ -1288,7 +1289,7 @@ static ssize_t
1288sched_autogroup_write(struct file *file, const char __user *buf, 1289sched_autogroup_write(struct file *file, const char __user *buf,
1289 size_t count, loff_t *offset) 1290 size_t count, loff_t *offset)
1290{ 1291{
1291 struct inode *inode = file->f_path.dentry->d_inode; 1292 struct inode *inode = file_inode(file);
1292 struct task_struct *p; 1293 struct task_struct *p;
1293 char buffer[PROC_NUMBUF]; 1294 char buffer[PROC_NUMBUF];
1294 int nice; 1295 int nice;
@@ -1343,7 +1344,7 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
1343static ssize_t comm_write(struct file *file, const char __user *buf, 1344static ssize_t comm_write(struct file *file, const char __user *buf,
1344 size_t count, loff_t *offset) 1345 size_t count, loff_t *offset)
1345{ 1346{
1346 struct inode *inode = file->f_path.dentry->d_inode; 1347 struct inode *inode = file_inode(file);
1347 struct task_struct *p; 1348 struct task_struct *p;
1348 char buffer[TASK_COMM_LEN]; 1349 char buffer[TASK_COMM_LEN];
1349 1350
@@ -1711,7 +1712,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
1711 return -ECHILD; 1712 return -ECHILD;
1712 1713
1713 if (!capable(CAP_SYS_ADMIN)) { 1714 if (!capable(CAP_SYS_ADMIN)) {
1714 status = -EACCES; 1715 status = -EPERM;
1715 goto out_notask; 1716 goto out_notask;
1716 } 1717 }
1717 1718
@@ -1844,7 +1845,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
1844 struct dentry *result; 1845 struct dentry *result;
1845 struct mm_struct *mm; 1846 struct mm_struct *mm;
1846 1847
1847 result = ERR_PTR(-EACCES); 1848 result = ERR_PTR(-EPERM);
1848 if (!capable(CAP_SYS_ADMIN)) 1849 if (!capable(CAP_SYS_ADMIN))
1849 goto out; 1850 goto out;
1850 1851
@@ -1900,7 +1901,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
1900 ino_t ino; 1901 ino_t ino;
1901 int ret; 1902 int ret;
1902 1903
1903 ret = -EACCES; 1904 ret = -EPERM;
1904 if (!capable(CAP_SYS_ADMIN)) 1905 if (!capable(CAP_SYS_ADMIN))
1905 goto out; 1906 goto out;
1906 1907
@@ -2146,7 +2147,7 @@ out_no_task:
2146static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2147static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2147 size_t count, loff_t *ppos) 2148 size_t count, loff_t *ppos)
2148{ 2149{
2149 struct inode * inode = file->f_path.dentry->d_inode; 2150 struct inode * inode = file_inode(file);
2150 char *p = NULL; 2151 char *p = NULL;
2151 ssize_t length; 2152 ssize_t length;
2152 struct task_struct *task = get_proc_task(inode); 2153 struct task_struct *task = get_proc_task(inode);
@@ -2167,7 +2168,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
2167static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2168static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
2168 size_t count, loff_t *ppos) 2169 size_t count, loff_t *ppos)
2169{ 2170{
2170 struct inode * inode = file->f_path.dentry->d_inode; 2171 struct inode * inode = file_inode(file);
2171 char *page; 2172 char *page;
2172 ssize_t length; 2173 ssize_t length;
2173 struct task_struct *task = get_proc_task(inode); 2174 struct task_struct *task = get_proc_task(inode);
@@ -2256,7 +2257,7 @@ static const struct inode_operations proc_attr_dir_inode_operations = {
2256static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2257static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
2257 size_t count, loff_t *ppos) 2258 size_t count, loff_t *ppos)
2258{ 2259{
2259 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 2260 struct task_struct *task = get_proc_task(file_inode(file));
2260 struct mm_struct *mm; 2261 struct mm_struct *mm;
2261 char buffer[PROC_NUMBUF]; 2262 char buffer[PROC_NUMBUF];
2262 size_t len; 2263 size_t len;
@@ -2308,7 +2309,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
2308 goto out_no_task; 2309 goto out_no_task;
2309 2310
2310 ret = -ESRCH; 2311 ret = -ESRCH;
2311 task = get_proc_task(file->f_dentry->d_inode); 2312 task = get_proc_task(file_inode(file));
2312 if (!task) 2313 if (!task)
2313 goto out_no_task; 2314 goto out_no_task;
2314 2315
@@ -2618,6 +2619,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2618 2619
2619 name.name = buf; 2620 name.name = buf;
2620 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2621 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2622 /* no ->d_hash() rejects on procfs */
2621 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2623 dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2622 if (dentry) { 2624 if (dentry) {
2623 shrink_dcache_parent(dentry); 2625 shrink_dcache_parent(dentry);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 76ddae83daa5..4b3b3ffb52f1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -15,6 +15,7 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/printk.h>
18#include <linux/mount.h> 19#include <linux/mount.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/idr.h> 21#include <linux/idr.h>
@@ -42,7 +43,7 @@ static ssize_t
42__proc_file_read(struct file *file, char __user *buf, size_t nbytes, 43__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
43 loff_t *ppos) 44 loff_t *ppos)
44{ 45{
45 struct inode * inode = file->f_path.dentry->d_inode; 46 struct inode * inode = file_inode(file);
46 char *page; 47 char *page;
47 ssize_t retval=0; 48 ssize_t retval=0;
48 int eof=0; 49 int eof=0;
@@ -132,11 +133,8 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
132 } 133 }
133 134
134 if (start == NULL) { 135 if (start == NULL) {
135 if (n > PAGE_SIZE) { 136 if (n > PAGE_SIZE) /* Apparent buffer overflow */
136 printk(KERN_ERR
137 "proc_file_read: Apparent buffer overflow!\n");
138 n = PAGE_SIZE; 137 n = PAGE_SIZE;
139 }
140 n -= *ppos; 138 n -= *ppos;
141 if (n <= 0) 139 if (n <= 0)
142 break; 140 break;
@@ -144,26 +142,19 @@ __proc_file_read(struct file *file, char __user *buf, size_t nbytes,
144 n = count; 142 n = count;
145 start = page + *ppos; 143 start = page + *ppos;
146 } else if (start < page) { 144 } else if (start < page) {
147 if (n > PAGE_SIZE) { 145 if (n > PAGE_SIZE) /* Apparent buffer overflow */
148 printk(KERN_ERR
149 "proc_file_read: Apparent buffer overflow!\n");
150 n = PAGE_SIZE; 146 n = PAGE_SIZE;
151 }
152 if (n > count) { 147 if (n > count) {
153 /* 148 /*
154 * Don't reduce n because doing so might 149 * Don't reduce n because doing so might
155 * cut off part of a data block. 150 * cut off part of a data block.
156 */ 151 */
157 printk(KERN_WARNING 152 pr_warn("proc_file_read: count exceeded\n");
158 "proc_file_read: Read count exceeded\n");
159 } 153 }
160 } else /* start >= page */ { 154 } else /* start >= page */ {
161 unsigned long startoff = (unsigned long)(start - page); 155 unsigned long startoff = (unsigned long)(start - page);
162 if (n > (PAGE_SIZE - startoff)) { 156 if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */
163 printk(KERN_ERR
164 "proc_file_read: Apparent buffer overflow!\n");
165 n = PAGE_SIZE - startoff; 157 n = PAGE_SIZE - startoff;
166 }
167 if (n > count) 158 if (n > count)
168 n = count; 159 n = count;
169 } 160 }
@@ -188,7 +179,7 @@ static ssize_t
188proc_file_read(struct file *file, char __user *buf, size_t nbytes, 179proc_file_read(struct file *file, char __user *buf, size_t nbytes,
189 loff_t *ppos) 180 loff_t *ppos)
190{ 181{
191 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 182 struct proc_dir_entry *pde = PDE(file_inode(file));
192 ssize_t rv = -EIO; 183 ssize_t rv = -EIO;
193 184
194 spin_lock(&pde->pde_unload_lock); 185 spin_lock(&pde->pde_unload_lock);
@@ -209,7 +200,7 @@ static ssize_t
209proc_file_write(struct file *file, const char __user *buffer, 200proc_file_write(struct file *file, const char __user *buffer,
210 size_t count, loff_t *ppos) 201 size_t count, loff_t *ppos)
211{ 202{
212 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 203 struct proc_dir_entry *pde = PDE(file_inode(file));
213 ssize_t rv = -EIO; 204 ssize_t rv = -EIO;
214 205
215 if (pde->write_proc) { 206 if (pde->write_proc) {
@@ -412,8 +403,7 @@ static const struct dentry_operations proc_dentry_operations =
412struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, 403struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
413 struct dentry *dentry) 404 struct dentry *dentry)
414{ 405{
415 struct inode *inode = NULL; 406 struct inode *inode;
416 int error = -ENOENT;
417 407
418 spin_lock(&proc_subdir_lock); 408 spin_lock(&proc_subdir_lock);
419 for (de = de->subdir; de ; de = de->next) { 409 for (de = de->subdir; de ; de = de->next) {
@@ -422,22 +412,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
422 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 412 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
423 pde_get(de); 413 pde_get(de);
424 spin_unlock(&proc_subdir_lock); 414 spin_unlock(&proc_subdir_lock);
425 error = -ENOMEM;
426 inode = proc_get_inode(dir->i_sb, de); 415 inode = proc_get_inode(dir->i_sb, de);
427 goto out_unlock; 416 if (!inode)
417 return ERR_PTR(-ENOMEM);
418 d_set_d_op(dentry, &proc_dentry_operations);
419 d_add(dentry, inode);
420 return NULL;
428 } 421 }
429 } 422 }
430 spin_unlock(&proc_subdir_lock); 423 spin_unlock(&proc_subdir_lock);
431out_unlock: 424 return ERR_PTR(-ENOENT);
432
433 if (inode) {
434 d_set_d_op(dentry, &proc_dentry_operations);
435 d_add(dentry, inode);
436 return NULL;
437 }
438 if (de)
439 pde_put(de);
440 return ERR_PTR(error);
441} 425}
442 426
443struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 427struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
@@ -460,7 +444,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
460{ 444{
461 unsigned int ino; 445 unsigned int ino;
462 int i; 446 int i;
463 struct inode *inode = filp->f_path.dentry->d_inode; 447 struct inode *inode = file_inode(filp);
464 int ret = 0; 448 int ret = 0;
465 449
466 ino = inode->i_ino; 450 ino = inode->i_ino;
@@ -522,7 +506,7 @@ out:
522 506
523int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) 507int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
524{ 508{
525 struct inode *inode = filp->f_path.dentry->d_inode; 509 struct inode *inode = file_inode(filp);
526 510
527 return proc_readdir_de(PDE(inode), filp, dirent, filldir); 511 return proc_readdir_de(PDE(inode), filp, dirent, filldir);
528} 512}
@@ -576,7 +560,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
576 560
577 for (tmp = dir->subdir; tmp; tmp = tmp->next) 561 for (tmp = dir->subdir; tmp; tmp = tmp->next)
578 if (strcmp(tmp->name, dp->name) == 0) { 562 if (strcmp(tmp->name, dp->name) == 0) {
579 WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", 563 WARN(1, "proc_dir_entry '%s/%s' already registered\n",
580 dir->name, dp->name); 564 dir->name, dp->name);
581 break; 565 break;
582 } 566 }
@@ -837,9 +821,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
837 if (S_ISDIR(de->mode)) 821 if (S_ISDIR(de->mode))
838 parent->nlink--; 822 parent->nlink--;
839 de->nlink = 0; 823 de->nlink = 0;
840 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " 824 WARN(de->subdir, "%s: removing non-empty directory "
841 "'%s/%s', leaking at least '%s'\n", __func__, 825 "'%s/%s', leaking at least '%s'\n", __func__,
842 de->parent->name, de->name, de->subdir->name); 826 de->parent->name, de->name, de->subdir->name);
843 pde_put(de); 827 pde_put(de);
844} 828}
845EXPORT_SYMBOL(remove_proc_entry); 829EXPORT_SYMBOL(remove_proc_entry);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 439ae6886507..a86aebc9ba7c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -13,6 +13,7 @@
13#include <linux/stat.h> 13#include <linux/stat.h>
14#include <linux/completion.h> 14#include <linux/completion.h>
15#include <linux/poll.h> 15#include <linux/poll.h>
16#include <linux/printk.h>
16#include <linux/file.h> 17#include <linux/file.h>
17#include <linux/limits.h> 18#include <linux/limits.h>
18#include <linux/init.h> 19#include <linux/init.h>
@@ -144,7 +145,7 @@ void pde_users_dec(struct proc_dir_entry *pde)
144 145
145static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) 146static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
146{ 147{
147 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 148 struct proc_dir_entry *pde = PDE(file_inode(file));
148 loff_t rv = -EINVAL; 149 loff_t rv = -EINVAL;
149 loff_t (*llseek)(struct file *, loff_t, int); 150 loff_t (*llseek)(struct file *, loff_t, int);
150 151
@@ -179,7 +180,7 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
179 180
180static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 181static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
181{ 182{
182 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 183 struct proc_dir_entry *pde = PDE(file_inode(file));
183 ssize_t rv = -EIO; 184 ssize_t rv = -EIO;
184 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); 185 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
185 186
@@ -201,7 +202,7 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count,
201 202
202static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 203static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
203{ 204{
204 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 205 struct proc_dir_entry *pde = PDE(file_inode(file));
205 ssize_t rv = -EIO; 206 ssize_t rv = -EIO;
206 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 207 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
207 208
@@ -223,7 +224,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
223 224
224static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) 225static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
225{ 226{
226 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 227 struct proc_dir_entry *pde = PDE(file_inode(file));
227 unsigned int rv = DEFAULT_POLLMASK; 228 unsigned int rv = DEFAULT_POLLMASK;
228 unsigned int (*poll)(struct file *, struct poll_table_struct *); 229 unsigned int (*poll)(struct file *, struct poll_table_struct *);
229 230
@@ -245,7 +246,7 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
245 246
246static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 247static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
247{ 248{
248 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 249 struct proc_dir_entry *pde = PDE(file_inode(file));
249 long rv = -ENOTTY; 250 long rv = -ENOTTY;
250 long (*ioctl)(struct file *, unsigned int, unsigned long); 251 long (*ioctl)(struct file *, unsigned int, unsigned long);
251 252
@@ -268,7 +269,7 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
268#ifdef CONFIG_COMPAT 269#ifdef CONFIG_COMPAT
269static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 270static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
270{ 271{
271 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 272 struct proc_dir_entry *pde = PDE(file_inode(file));
272 long rv = -ENOTTY; 273 long rv = -ENOTTY;
273 long (*compat_ioctl)(struct file *, unsigned int, unsigned long); 274 long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
274 275
@@ -291,7 +292,7 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
291 292
292static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) 293static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
293{ 294{
294 struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); 295 struct proc_dir_entry *pde = PDE(file_inode(file));
295 int rv = -EIO; 296 int rv = -EIO;
296 int (*mmap)(struct file *, struct vm_area_struct *); 297 int (*mmap)(struct file *, struct vm_area_struct *);
297 298
@@ -445,12 +446,9 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
445 446
446struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) 447struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
447{ 448{
448 struct inode * inode; 449 struct inode *inode = iget_locked(sb, de->low_ino);
449 450
450 inode = iget_locked(sb, de->low_ino); 451 if (inode && (inode->i_state & I_NEW)) {
451 if (!inode)
452 return NULL;
453 if (inode->i_state & I_NEW) {
454 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 452 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
455 PROC_I(inode)->pde = de; 453 PROC_I(inode)->pde = de;
456 454
@@ -482,10 +480,12 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
482 } else 480 } else
483 pde_put(de); 481 pde_put(de);
484 return inode; 482 return inode;
485} 483}
486 484
487int proc_fill_super(struct super_block *s) 485int proc_fill_super(struct super_block *s)
488{ 486{
487 struct inode *root_inode;
488
489 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; 489 s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
490 s->s_blocksize = 1024; 490 s->s_blocksize = 1024;
491 s->s_blocksize_bits = 10; 491 s->s_blocksize_bits = 10;
@@ -494,11 +494,17 @@ int proc_fill_super(struct super_block *s)
494 s->s_time_gran = 1; 494 s->s_time_gran = 1;
495 495
496 pde_get(&proc_root); 496 pde_get(&proc_root);
497 s->s_root = d_make_root(proc_get_inode(s, &proc_root)); 497 root_inode = proc_get_inode(s, &proc_root);
498 if (s->s_root) 498 if (!root_inode) {
499 return 0; 499 pr_err("proc_fill_super: get root inode failed\n");
500 return -ENOMEM;
501 }
500 502
501 printk("proc_read_super: get root inode failed\n"); 503 s->s_root = d_make_root(root_inode);
502 pde_put(&proc_root); 504 if (!s->s_root) {
503 return -ENOMEM; 505 pr_err("proc_fill_super: allocate dentry failed\n");
506 return -ENOMEM;
507 }
508
509 return 0;
504} 510}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 252544c05207..85ff3a4598b3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/binfmts.h>
14struct ctl_table_header; 15struct ctl_table_header;
15struct mempolicy; 16struct mempolicy;
16 17
@@ -108,7 +109,7 @@ static inline int task_dumpable(struct task_struct *task)
108 if (mm) 109 if (mm)
109 dumpable = get_dumpable(mm); 110 dumpable = get_dumpable(mm);
110 task_unlock(task); 111 task_unlock(task);
111 if (dumpable == SUID_DUMPABLE_ENABLED) 112 if (dumpable == SUID_DUMP_USER)
112 return 1; 113 return 1;
113 return 0; 114 return 0;
114} 115}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e96d4f18ca3a..eda6f017f272 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -17,6 +17,7 @@
17#include <linux/elfcore.h> 17#include <linux/elfcore.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/highmem.h> 19#include <linux/highmem.h>
20#include <linux/printk.h>
20#include <linux/bootmem.h> 21#include <linux/bootmem.h>
21#include <linux/init.h> 22#include <linux/init.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
@@ -619,7 +620,7 @@ static int __init proc_kcore_init(void)
619 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, 620 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
620 &proc_kcore_operations); 621 &proc_kcore_operations);
621 if (!proc_root_kcore) { 622 if (!proc_root_kcore) {
622 printk(KERN_ERR "couldn't create /proc/kcore\n"); 623 pr_err("couldn't create /proc/kcore\n");
623 return 0; /* Always returns 0. */ 624 return 0; /* Always returns 0. */
624 } 625 }
625 /* Store text area if it's special */ 626 /* Store text area if it's special */
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 80e4645f7990..1efaaa19c4f3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
40 * sysctl_overcommit_ratio / 100) + total_swap_pages; 40 * sysctl_overcommit_ratio / 100) + total_swap_pages;
41 41
42 cached = global_page_state(NR_FILE_PAGES) - 42 cached = global_page_state(NR_FILE_PAGES) -
43 total_swapcache_pages - i.bufferram; 43 total_swapcache_pages() - i.bufferram;
44 if (cached < 0) 44 if (cached < 0)
45 cached = 0; 45 cached = 0;
46 46
@@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
109 K(i.freeram), 109 K(i.freeram),
110 K(i.bufferram), 110 K(i.bufferram),
111 K(cached), 111 K(cached),
112 K(total_swapcache_pages), 112 K(total_swapcache_pages()),
113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), 113 K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), 114 K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
115 K(pages[LRU_ACTIVE_ANON]), 115 K(pages[LRU_ACTIVE_ANON]),
@@ -158,7 +158,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
158 vmi.used >> 10, 158 vmi.used >> 10,
159 vmi.largest_chunk >> 10 159 vmi.largest_chunk >> 10
160#ifdef CONFIG_MEMORY_FAILURE 160#ifdef CONFIG_MEMORY_FAILURE
161 ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) 161 ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
162#endif 162#endif
163#ifdef CONFIG_TRANSPARENT_HUGEPAGE 163#ifdef CONFIG_TRANSPARENT_HUGEPAGE
164 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 164 ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index b1822dde55c2..ccfd99bd1c5a 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -45,7 +45,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
45 file = region->vm_file; 45 file = region->vm_file;
46 46
47 if (file) { 47 if (file) {
48 struct inode *inode = region->vm_file->f_path.dentry->d_inode; 48 struct inode *inode = file_inode(region->vm_file);
49 dev = inode->i_sb->s_dev; 49 dev = inode->i_sb->s_dev;
50 ino = inode->i_ino; 50 ino = inode->i_ino;
51 } 51 }
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de20ec480fa0..30b590f5bd35 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -8,6 +8,7 @@
8#include <linux/time.h> 8#include <linux/time.h>
9#include <linux/proc_fs.h> 9#include <linux/proc_fs.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/printk.h>
11#include <linux/stat.h> 12#include <linux/stat.h>
12#include <linux/string.h> 13#include <linux/string.h>
13#include <linux/of.h> 14#include <linux/of.h>
@@ -110,8 +111,8 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
110 if (ent->data == oldprop) 111 if (ent->data == oldprop)
111 break; 112 break;
112 if (ent == NULL) { 113 if (ent == NULL) {
113 printk(KERN_WARNING "device-tree: property \"%s\" " 114 pr_warn("device-tree: property \"%s\" does not exist\n",
114 " does not exist\n", oldprop->name); 115 oldprop->name);
115 } else { 116 } else {
116 ent->data = newprop; 117 ent->data = newprop;
117 ent->size = newprop->length; 118 ent->size = newprop->length;
@@ -153,8 +154,8 @@ static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
153realloc: 154realloc:
154 fixed_name = kmalloc(fixup_len, GFP_KERNEL); 155 fixed_name = kmalloc(fixup_len, GFP_KERNEL);
155 if (fixed_name == NULL) { 156 if (fixed_name == NULL) {
156 printk(KERN_ERR "device-tree: Out of memory trying to fixup " 157 pr_err("device-tree: Out of memory trying to fixup "
157 "name \"%s\"\n", name); 158 "name \"%s\"\n", name);
158 return name; 159 return name;
159 } 160 }
160 161
@@ -175,8 +176,8 @@ retry:
175 goto retry; 176 goto retry;
176 } 177 }
177 178
178 printk(KERN_WARNING "device-tree: Duplicate name in %s, " 179 pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n",
179 "renamed to \"%s\"\n", np->full_name, fixed_name); 180 np->full_name, fixed_name);
180 181
181 return fixed_name; 182 return fixed_name;
182} 183}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index fe72cd073dea..b4ac6572474f 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -163,7 +163,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent,
163 struct net *net; 163 struct net *net;
164 164
165 ret = -EINVAL; 165 ret = -EINVAL;
166 net = get_proc_task_net(filp->f_path.dentry->d_inode); 166 net = get_proc_task_net(file_inode(filp));
167 if (net != NULL) { 167 if (net != NULL) {
168 ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); 168 ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
169 put_net(net); 169 put_net(net);
@@ -177,20 +177,6 @@ const struct file_operations proc_net_operations = {
177 .readdir = proc_tgid_net_readdir, 177 .readdir = proc_tgid_net_readdir,
178}; 178};
179 179
180
181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, umode_t mode, const struct file_operations *fops)
183{
184 return proc_create(name, mode, net->proc_net, fops);
185}
186EXPORT_SYMBOL_GPL(proc_net_fops_create);
187
188void proc_net_remove(struct net *net, const char *name)
189{
190 remove_proc_entry(name, net->proc_net);
191}
192EXPORT_SYMBOL_GPL(proc_net_remove);
193
194static __net_init int proc_net_ns_init(struct net *net) 180static __net_init int proc_net_ns_init(struct net *net)
195{ 181{
196 struct proc_dir_entry *netd, *net_statd; 182 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1827d88ad58b..ac05f33a0dde 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/poll.h> 6#include <linux/poll.h>
7#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
8#include <linux/printk.h>
8#include <linux/security.h> 9#include <linux/security.h>
9#include <linux/sched.h> 10#include <linux/sched.h>
10#include <linux/namei.h> 11#include <linux/namei.h>
@@ -57,7 +58,7 @@ static void sysctl_print_dir(struct ctl_dir *dir)
57{ 58{
58 if (dir->header.parent) 59 if (dir->header.parent)
59 sysctl_print_dir(dir->header.parent); 60 sysctl_print_dir(dir->header.parent);
60 printk(KERN_CONT "%s/", dir->header.ctl_table[0].procname); 61 pr_cont("%s/", dir->header.ctl_table[0].procname);
61} 62}
62 63
63static int namecmp(const char *name1, int len1, const char *name2, int len2) 64static int namecmp(const char *name1, int len1, const char *name2, int len2)
@@ -134,9 +135,9 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
134 else if (cmp > 0) 135 else if (cmp > 0)
135 p = &(*p)->rb_right; 136 p = &(*p)->rb_right;
136 else { 137 else {
137 printk(KERN_ERR "sysctl duplicate entry: "); 138 pr_err("sysctl duplicate entry: ");
138 sysctl_print_dir(head->parent); 139 sysctl_print_dir(head->parent);
139 printk(KERN_CONT "/%s\n", entry->procname); 140 pr_cont("/%s\n", entry->procname);
140 return -EEXIST; 141 return -EEXIST;
141 } 142 }
142 } 143 }
@@ -478,7 +479,7 @@ out:
478static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 479static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
479 size_t count, loff_t *ppos, int write) 480 size_t count, loff_t *ppos, int write)
480{ 481{
481 struct inode *inode = filp->f_path.dentry->d_inode; 482 struct inode *inode = file_inode(filp);
482 struct ctl_table_header *head = grab_header(inode); 483 struct ctl_table_header *head = grab_header(inode);
483 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 484 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
484 ssize_t error; 485 ssize_t error;
@@ -542,7 +543,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
542 543
543static unsigned int proc_sys_poll(struct file *filp, poll_table *wait) 544static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
544{ 545{
545 struct inode *inode = filp->f_path.dentry->d_inode; 546 struct inode *inode = file_inode(filp);
546 struct ctl_table_header *head = grab_header(inode); 547 struct ctl_table_header *head = grab_header(inode);
547 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 548 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
548 unsigned int ret = DEFAULT_POLLMASK; 549 unsigned int ret = DEFAULT_POLLMASK;
@@ -927,9 +928,9 @@ found:
927 subdir->header.nreg++; 928 subdir->header.nreg++;
928failed: 929failed:
929 if (unlikely(IS_ERR(subdir))) { 930 if (unlikely(IS_ERR(subdir))) {
930 printk(KERN_ERR "sysctl could not get directory: "); 931 pr_err("sysctl could not get directory: ");
931 sysctl_print_dir(dir); 932 sysctl_print_dir(dir);
932 printk(KERN_CONT "/%*.*s %ld\n", 933 pr_cont("/%*.*s %ld\n",
933 namelen, namelen, name, PTR_ERR(subdir)); 934 namelen, namelen, name, PTR_ERR(subdir));
934 } 935 }
935 drop_sysctl_table(&dir->header); 936 drop_sysctl_table(&dir->header);
@@ -995,8 +996,8 @@ static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
995 vaf.fmt = fmt; 996 vaf.fmt = fmt;
996 vaf.va = &args; 997 vaf.va = &args;
997 998
998 printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", 999 pr_err("sysctl table check failed: %s/%s %pV\n",
999 path, table->procname, &vaf); 1000 path, table->procname, &vaf);
1000 1001
1001 va_end(args); 1002 va_end(args);
1002 return -EINVAL; 1003 return -EINVAL;
@@ -1510,9 +1511,9 @@ static void put_links(struct ctl_table_header *header)
1510 drop_sysctl_table(link_head); 1511 drop_sysctl_table(link_head);
1511 } 1512 }
1512 else { 1513 else {
1513 printk(KERN_ERR "sysctl link missing during unregister: "); 1514 pr_err("sysctl link missing during unregister: ");
1514 sysctl_print_dir(parent); 1515 sysctl_print_dir(parent);
1515 printk(KERN_CONT "/%s\n", name); 1516 pr_cont("/%s\n", name);
1516 } 1517 }
1517 } 1518 }
1518} 1519}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ca5ce7f9f800..3e636d864d56 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -271,7 +271,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
271 const char *name = NULL; 271 const char *name = NULL;
272 272
273 if (file) { 273 if (file) {
274 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 274 struct inode *inode = file_inode(vma->vm_file);
275 dev = inode->i_sb->s_dev; 275 dev = inode->i_sb->s_dev;
276 ino = inode->i_ino; 276 ino = inode->i_ino;
277 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; 277 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
@@ -743,7 +743,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
743 return rv; 743 return rv;
744 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) 744 if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
745 return -EINVAL; 745 return -EINVAL;
746 task = get_proc_task(file->f_path.dentry->d_inode); 746 task = get_proc_task(file_inode(file));
747 if (!task) 747 if (!task)
748 return -ESRCH; 748 return -ESRCH;
749 mm = get_task_mm(task); 749 mm = get_task_mm(task);
@@ -1015,7 +1015,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
1015static ssize_t pagemap_read(struct file *file, char __user *buf, 1015static ssize_t pagemap_read(struct file *file, char __user *buf,
1016 size_t count, loff_t *ppos) 1016 size_t count, loff_t *ppos)
1017{ 1017{
1018 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 1018 struct task_struct *task = get_proc_task(file_inode(file));
1019 struct mm_struct *mm; 1019 struct mm_struct *mm;
1020 struct pagemapread pm; 1020 struct pagemapread pm;
1021 int ret = -ESRCH; 1021 int ret = -ESRCH;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1ccfa537f5f5..56123a6f462e 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -149,7 +149,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
149 file = vma->vm_file; 149 file = vma->vm_file;
150 150
151 if (file) { 151 if (file) {
152 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 152 struct inode *inode = file_inode(vma->vm_file);
153 dev = inode->i_sb->s_dev; 153 dev = inode->i_sb->s_dev;
154 ino = inode->i_ino; 154 ino = inode->i_ino;
155 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; 155 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0d5071d29985..b870f740ab5a 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -15,6 +15,7 @@
15#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/printk.h>
18#include <linux/bootmem.h> 19#include <linux/bootmem.h>
19#include <linux/init.h> 20#include <linux/init.h>
20#include <linux/crash_dump.h> 21#include <linux/crash_dump.h>
@@ -175,15 +176,15 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
175 start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); 176 start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
176 if (!curr_m) 177 if (!curr_m)
177 return -EINVAL; 178 return -EINVAL;
178 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
179 tsz = buflen;
180
181 /* Calculate left bytes in current memory segment. */
182 nr_bytes = (curr_m->size - (start - curr_m->paddr));
183 if (tsz > nr_bytes)
184 tsz = nr_bytes;
185 179
186 while (buflen) { 180 while (buflen) {
181 tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK));
182
183 /* Calculate left bytes in current memory segment. */
184 nr_bytes = (curr_m->size - (start - curr_m->paddr));
185 if (tsz > nr_bytes)
186 tsz = nr_bytes;
187
187 tmp = read_from_oldmem(buffer, tsz, &start, 1); 188 tmp = read_from_oldmem(buffer, tsz, &start, 1);
188 if (tmp < 0) 189 if (tmp < 0)
189 return tmp; 190 return tmp;
@@ -198,12 +199,6 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
198 struct vmcore, list); 199 struct vmcore, list);
199 start = curr_m->paddr; 200 start = curr_m->paddr;
200 } 201 }
201 if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
202 tsz = buflen;
203 /* Calculate left bytes in current memory segment. */
204 nr_bytes = (curr_m->size - (start - curr_m->paddr));
205 if (tsz > nr_bytes)
206 tsz = nr_bytes;
207 } 202 }
208 return acc; 203 return acc;
209} 204}
@@ -553,8 +548,7 @@ static int __init parse_crash_elf64_headers(void)
553 ehdr.e_ehsize != sizeof(Elf64_Ehdr) || 548 ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
554 ehdr.e_phentsize != sizeof(Elf64_Phdr) || 549 ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
555 ehdr.e_phnum == 0) { 550 ehdr.e_phnum == 0) {
556 printk(KERN_WARNING "Warning: Core image elf header is not" 551 pr_warn("Warning: Core image elf header is not sane\n");
557 "sane\n");
558 return -EINVAL; 552 return -EINVAL;
559 } 553 }
560 554
@@ -609,8 +603,7 @@ static int __init parse_crash_elf32_headers(void)
609 ehdr.e_ehsize != sizeof(Elf32_Ehdr) || 603 ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
610 ehdr.e_phentsize != sizeof(Elf32_Phdr) || 604 ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
611 ehdr.e_phnum == 0) { 605 ehdr.e_phnum == 0) {
612 printk(KERN_WARNING "Warning: Core image elf header is not" 606 pr_warn("Warning: Core image elf header is not sane\n");
613 "sane\n");
614 return -EINVAL; 607 return -EINVAL;
615 } 608 }
616 609
@@ -653,8 +646,7 @@ static int __init parse_crash_elf_headers(void)
653 if (rc < 0) 646 if (rc < 0)
654 return rc; 647 return rc;
655 if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) { 648 if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
656 printk(KERN_WARNING "Warning: Core image elf header" 649 pr_warn("Warning: Core image elf header not found\n");
657 " not found\n");
658 return -EINVAL; 650 return -EINVAL;
659 } 651 }
660 652
@@ -673,8 +665,7 @@ static int __init parse_crash_elf_headers(void)
673 /* Determine vmcore size. */ 665 /* Determine vmcore size. */
674 vmcore_size = get_vmcore_size_elf32(elfcorebuf); 666 vmcore_size = get_vmcore_size_elf32(elfcorebuf);
675 } else { 667 } else {
676 printk(KERN_WARNING "Warning: Core image elf header is not" 668 pr_warn("Warning: Core image elf header is not sane\n");
677 " sane\n");
678 return -EINVAL; 669 return -EINVAL;
679 } 670 }
680 return 0; 671 return 0;
@@ -690,7 +681,7 @@ static int __init vmcore_init(void)
690 return rc; 681 return rc;
691 rc = parse_crash_elf_headers(); 682 rc = parse_crash_elf_headers();
692 if (rc) { 683 if (rc) {
693 printk(KERN_WARNING "Kdump: vmcore not initialized\n"); 684 pr_warn("Kdump: vmcore not initialized\n");
694 return rc; 685 return rc;
695 } 686 }
696 687
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 67de74ca85f4..e4bcb2cf055a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -418,9 +418,25 @@ static struct file_system_type pstore_fs_type = {
418 .kill_sb = pstore_kill_sb, 418 .kill_sb = pstore_kill_sb,
419}; 419};
420 420
421static struct kobject *pstore_kobj;
422
421static int __init init_pstore_fs(void) 423static int __init init_pstore_fs(void)
422{ 424{
423 return register_filesystem(&pstore_fs_type); 425 int err = 0;
426
427 /* Create a convenient mount point for people to access pstore */
428 pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
429 if (!pstore_kobj) {
430 err = -ENOMEM;
431 goto out;
432 }
433
434 err = register_filesystem(&pstore_fs_type);
435 if (err < 0)
436 kobject_put(pstore_kobj);
437
438out:
439 return err;
424} 440}
425module_init(init_pstore_fs) 441module_init(init_pstore_fs)
426 442
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 5ea2e77ff023..86d1038b5a12 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -96,6 +96,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
96 } 96 }
97} 97}
98 98
99bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
100{
101 /*
102 * In case of NMI path, pstore shouldn't be blocked
103 * regardless of reason.
104 */
105 if (in_nmi())
106 return true;
107
108 switch (reason) {
109 /* In panic case, other cpus are stopped by smp_send_stop(). */
110 case KMSG_DUMP_PANIC:
111 /* Emergency restart shouldn't be blocked by spin lock. */
112 case KMSG_DUMP_EMERG:
113 return true;
114 default:
115 return false;
116 }
117}
118EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
119
99/* 120/*
100 * callback from kmsg_dump. (s2,l2) has the most recently 121 * callback from kmsg_dump. (s2,l2) has the most recently
101 * written bytes, older bytes are in (s1,l1). Save as much 122 * written bytes, older bytes are in (s1,l1). Save as much
@@ -114,10 +135,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
114 135
115 why = get_reason_str(reason); 136 why = get_reason_str(reason);
116 137
117 if (in_nmi()) { 138 if (pstore_cannot_block_path(reason)) {
118 is_locked = spin_trylock(&psinfo->buf_lock); 139 is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
119 if (!is_locked) 140 if (!is_locked) {
120 pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); 141 pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
142 , in_nmi() ? "NMI" : why);
143 }
121 } else 144 } else
122 spin_lock_irqsave(&psinfo->buf_lock, flags); 145 spin_lock_irqsave(&psinfo->buf_lock, flags);
123 oopscount++; 146 oopscount++;
@@ -143,9 +166,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
143 total += hsize + len; 166 total += hsize + len;
144 part++; 167 part++;
145 } 168 }
146 if (in_nmi()) { 169 if (pstore_cannot_block_path(reason)) {
147 if (is_locked) 170 if (is_locked)
148 spin_unlock(&psinfo->buf_lock); 171 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
149 } else 172 } else
150 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 173 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
151} 174}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f883e7e74305..288f068740f6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) 167static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
168{ 168{
169 char *hdr; 169 char *hdr;
170 struct timeval timestamp; 170 struct timespec timestamp;
171 size_t len; 171 size_t len;
172 172
173 do_gettimeofday(&timestamp); 173 /* Report zeroed timestamp if called before timekeeping has resumed. */
174 if (__getnstimeofday(&timestamp)) {
175 timestamp.tv_sec = 0;
176 timestamp.tv_nsec = 0;
177 }
174 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", 178 hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
175 (long)timestamp.tv_sec, (long)timestamp.tv_usec); 179 (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000));
176 WARN_ON_ONCE(!hdr); 180 WARN_ON_ONCE(!hdr);
177 len = hdr ? strlen(hdr) : 0; 181 len = hdr ? strlen(hdr) : 0;
178 persistent_ram_write(prz, hdr, len); 182 persistent_ram_write(prz, hdr, len);
@@ -291,9 +295,8 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
291 kfree(cxt->przs); 295 kfree(cxt->przs);
292} 296}
293 297
294static int __devinit ramoops_init_przs(struct device *dev, 298static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
295 struct ramoops_context *cxt, 299 phys_addr_t *paddr, size_t dump_mem_sz)
296 phys_addr_t *paddr, size_t dump_mem_sz)
297{ 300{
298 int err = -ENOMEM; 301 int err = -ENOMEM;
299 int i; 302 int i;
@@ -336,10 +339,9 @@ fail_prz:
336 return err; 339 return err;
337} 340}
338 341
339static int __devinit ramoops_init_prz(struct device *dev, 342static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
340 struct ramoops_context *cxt, 343 struct persistent_ram_zone **prz,
341 struct persistent_ram_zone **prz, 344 phys_addr_t *paddr, size_t sz, u32 sig)
342 phys_addr_t *paddr, size_t sz, u32 sig)
343{ 345{
344 if (!sz) 346 if (!sz)
345 return 0; 347 return 0;
@@ -367,7 +369,7 @@ static int __devinit ramoops_init_prz(struct device *dev,
367 return 0; 369 return 0;
368} 370}
369 371
370static int __devinit ramoops_probe(struct platform_device *pdev) 372static int ramoops_probe(struct platform_device *pdev)
371{ 373{
372 struct device *dev = &pdev->dev; 374 struct device *dev = &pdev->dev;
373 struct ramoops_platform_data *pdata = pdev->dev.platform_data; 375 struct ramoops_platform_data *pdata = pdev->dev.platform_data;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index eecd2a8a84dd..0306303be372 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -390,8 +390,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
390 return 0; 390 return 0;
391} 391}
392 392
393static int __devinit persistent_ram_post_init(struct persistent_ram_zone *prz, 393static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
394 u32 sig, int ecc_size) 394 int ecc_size)
395{ 395{
396 int ret; 396 int ret;
397 397
@@ -443,9 +443,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
443 kfree(prz); 443 kfree(prz);
444} 444}
445 445
446struct persistent_ram_zone * __devinit persistent_ram_new(phys_addr_t start, 446struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
447 size_t size, u32 sig, 447 u32 sig, int ecc_size)
448 int ecc_size)
449{ 448{
450 struct persistent_ram_zone *prz; 449 struct persistent_ram_zone *prz;
451 int ret = -ENOMEM; 450 int ret = -ENOMEM;
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 7b0329468a5d..28ce014b3cef 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -16,7 +16,7 @@
16 16
17static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) 17static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
18{ 18{
19 struct inode *inode = filp->f_path.dentry->d_inode; 19 struct inode *inode = file_inode(filp);
20 unsigned int offset; 20 unsigned int offset;
21 struct buffer_head *bh; 21 struct buffer_head *bh;
22 struct qnx4_inode_entry *de; 22 struct qnx4_inode_entry *de;
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index dc597353db3b..8798d065e400 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -117,7 +117,7 @@ static int qnx6_dir_longfilename(struct inode *inode,
117 117
118static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) 118static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
119{ 119{
120 struct inode *inode = filp->f_path.dentry->d_inode; 120 struct inode *inode = file_inode(filp);
121 struct super_block *s = inode->i_sb; 121 struct super_block *s = inode->i_sb;
122 struct qnx6_sb_info *sbi = QNX6_SB(s); 122 struct qnx6_sb_info *sbi = QNX6_SB(s);
123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1); 123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index b6addf560483..57199a52a351 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -285,7 +285,7 @@ static struct buffer_head *qnx6_check_first_superblock(struct super_block *s,
285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) { 285 if (fs32_to_cpu(sbi, sb->sb_magic) == QNX6_SUPER_MAGIC) {
286 /* we got a big endian fs */ 286 /* we got a big endian fs */
287 QNX6DEBUG((KERN_INFO "qnx6: fs got different" 287 QNX6DEBUG((KERN_INFO "qnx6: fs got different"
288 " endianess.\n")); 288 " endianness.\n"));
289 return bh; 289 return bh;
290 } else 290 } else
291 sbi->s_bytesex = BYTESEX_LE; 291 sbi->s_bytesex = BYTESEX_LE;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d5378d028589..8d5b438cc188 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -202,7 +202,7 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
202 unsigned long pgoff, unsigned long flags) 202 unsigned long pgoff, unsigned long flags)
203{ 203{
204 unsigned long maxpages, lpages, nr, loop, ret; 204 unsigned long maxpages, lpages, nr, loop, ret;
205 struct inode *inode = file->f_path.dentry->d_inode; 205 struct inode *inode = file_inode(file);
206 struct page **pages = NULL, **ptr, *page; 206 struct page **pages = NULL, **ptr, *page;
207 loff_t isize; 207 loff_t isize;
208 208
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index eab8c09d3801..c24f1e10b946 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -260,6 +260,7 @@ static struct file_system_type ramfs_fs_type = {
260 .name = "ramfs", 260 .name = "ramfs",
261 .mount = ramfs_mount, 261 .mount = ramfs_mount,
262 .kill_sb = ramfs_kill_sb, 262 .kill_sb = ramfs_kill_sb,
263 .fs_flags = FS_USERNS_MOUNT,
263}; 264};
264static struct file_system_type rootfs_fs_type = { 265static struct file_system_type rootfs_fs_type = {
265 .name = "rootfs", 266 .name = "rootfs",
diff --git a/fs/read_write.c b/fs/read_write.c
index bb34af315280..3ae6dbe828bf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(no_llseek);
163 163
164loff_t default_llseek(struct file *file, loff_t offset, int whence) 164loff_t default_llseek(struct file *file, loff_t offset, int whence)
165{ 165{
166 struct inode *inode = file->f_path.dentry->d_inode; 166 struct inode *inode = file_inode(file);
167 loff_t retval; 167 loff_t retval;
168 168
169 mutex_lock(&inode->i_mutex); 169 mutex_lock(&inode->i_mutex);
@@ -290,7 +290,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
290 loff_t pos; 290 loff_t pos;
291 int retval = -EINVAL; 291 int retval = -EINVAL;
292 292
293 inode = file->f_path.dentry->d_inode; 293 inode = file_inode(file);
294 if (unlikely((ssize_t) count < 0)) 294 if (unlikely((ssize_t) count < 0))
295 return retval; 295 return retval;
296 pos = *ppos; 296 pos = *ppos;
@@ -901,8 +901,8 @@ ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
901 if (!(out.file->f_mode & FMODE_WRITE)) 901 if (!(out.file->f_mode & FMODE_WRITE))
902 goto fput_out; 902 goto fput_out;
903 retval = -EINVAL; 903 retval = -EINVAL;
904 in_inode = in.file->f_path.dentry->d_inode; 904 in_inode = file_inode(in.file);
905 out_inode = out.file->f_path.dentry->d_inode; 905 out_inode = file_inode(out.file);
906 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); 906 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
907 if (retval < 0) 907 if (retval < 0)
908 goto fput_out; 908 goto fput_out;
diff --git a/fs/readdir.c b/fs/readdir.c
index 5e69ef533b77..fee38e04fae4 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -22,7 +22,7 @@
22 22
23int vfs_readdir(struct file *file, filldir_t filler, void *buf) 23int vfs_readdir(struct file *file, filldir_t filler, void *buf)
24{ 24{
25 struct inode *inode = file->f_path.dentry->d_inode; 25 struct inode *inode = file_inode(file);
26 int res = -ENOTDIR; 26 int res = -ENOTDIR;
27 if (!file->f_op || !file->f_op->readdir) 27 if (!file->f_op || !file->f_op->readdir)
28 goto out; 28 goto out;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 50302d6f8895..6165bd4784f6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -268,7 +268,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
268 * new current position before returning. */ 268 * new current position before returning. */
269 ) 269 )
270{ 270{
271 struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. 271 struct inode *inode = file_inode(file); // Inode of the file that we are writing to.
272 /* To simplify coding at this time, we store 272 /* To simplify coding at this time, we store
273 locked pages in array for now */ 273 locked pages in array for now */
274 struct reiserfs_transaction_handle th; 274 struct reiserfs_transaction_handle th;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 95d7680ead47..ea5061fd4f3e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1603,10 +1603,10 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
1603 1603
1604 if (parent && (maxlen < 5)) { 1604 if (parent && (maxlen < 5)) {
1605 *lenp = 5; 1605 *lenp = 5;
1606 return 255; 1606 return FILEID_INVALID;
1607 } else if (maxlen < 3) { 1607 } else if (maxlen < 3) {
1608 *lenp = 3; 1608 *lenp = 3;
1609 return 255; 1609 return FILEID_INVALID;
1610 } 1610 }
1611 1611
1612 data[0] = inode->i_ino; 1612 data[0] = inode->i_ino;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0c2185042d5f..15cb5fe6b425 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -21,7 +21,7 @@
21 */ 21 */
22long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 22long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
23{ 23{
24 struct inode *inode = filp->f_path.dentry->d_inode; 24 struct inode *inode = file_inode(filp);
25 unsigned int flags; 25 unsigned int flags;
26 int err = 0; 26 int err = 0;
27 27
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e60e87035bb3..9cc0740adffa 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -281,7 +281,7 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
281 } 281 }
282#if defined( REISERFS_USE_OIDMAPF ) 282#if defined( REISERFS_USE_OIDMAPF )
283 if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) { 283 if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
284 loff_t size = sb_info->oidmap.mapf->f_path.dentry->d_inode->i_size; 284 loff_t size = file_inode(sb_info->oidmap.mapf)->i_size;
285 total_used += size / sizeof(reiserfs_oidinterval_d_t); 285 total_used += size / sizeof(reiserfs_oidinterval_d_t);
286 } 286 }
287#endif 287#endif
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index fd7c5f60b46b..7e8d3a80bdab 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -147,7 +147,7 @@ static const struct address_space_operations romfs_aops = {
147 */ 147 */
148static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 148static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
149{ 149{
150 struct inode *i = filp->f_dentry->d_inode; 150 struct inode *i = file_inode(filp);
151 struct romfs_inode ri; 151 struct romfs_inode ri;
152 unsigned long offset, maxoff; 152 unsigned long offset, maxoff;
153 int j, ino, nextfh; 153 int j, ino, nextfh;
diff --git a/fs/select.c b/fs/select.c
index 2ef72d965036..8c1c96c27062 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/rcupdate.h> 27#include <linux/rcupdate.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/sched/rt.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31 32
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 9d863fb501f9..15c6304bab71 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -296,7 +296,7 @@ EXPORT_SYMBOL(seq_read);
296 * seq_lseek - ->llseek() method for sequential files. 296 * seq_lseek - ->llseek() method for sequential files.
297 * @file: the file in question 297 * @file: the file in question
298 * @offset: new position 298 * @offset: new position
299 * @origin: 0 for absolute, 1 for relative position 299 * @whence: 0 for absolute, 1 for relative position
300 * 300 *
301 * Ready-made ->f_op->llseek() 301 * Ready-made ->f_op->llseek()
302 */ 302 */
@@ -308,27 +308,27 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
308 mutex_lock(&m->lock); 308 mutex_lock(&m->lock);
309 m->version = file->f_version; 309 m->version = file->f_version;
310 switch (whence) { 310 switch (whence) {
311 case 1: 311 case SEEK_CUR:
312 offset += file->f_pos; 312 offset += file->f_pos;
313 case 0: 313 case SEEK_SET:
314 if (offset < 0) 314 if (offset < 0)
315 break; 315 break;
316 retval = offset; 316 retval = offset;
317 if (offset != m->read_pos) { 317 if (offset != m->read_pos) {
318 while ((retval=traverse(m, offset)) == -EAGAIN) 318 while ((retval = traverse(m, offset)) == -EAGAIN)
319 ; 319 ;
320 if (retval) { 320 if (retval) {
321 /* with extreme prejudice... */ 321 /* with extreme prejudice... */
322 file->f_pos = 0; 322 file->f_pos = 0;
323 m->read_pos = 0; 323 m->read_pos = 0;
324 m->version = 0; 324 m->version = 0;
325 m->index = 0; 325 m->index = 0;
326 m->count = 0; 326 m->count = 0;
327 } else { 327 } else {
328 m->read_pos = offset; 328 m->read_pos = offset;
329 retval = file->f_pos = offset; 329 retval = file->f_pos = offset;
330 }
331 } 330 }
331 }
332 } 332 }
333 file->f_version = m->version; 333 file->f_version = m->version;
334 mutex_unlock(&m->lock); 334 mutex_unlock(&m->lock);
diff --git a/fs/splice.c b/fs/splice.c
index 8890604e3fcd..718bd0056384 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -569,7 +569,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
569 return res; 569 return res;
570} 570}
571 571
572static ssize_t kernel_write(struct file *file, const char *buf, size_t count, 572ssize_t kernel_write(struct file *file, const char *buf, size_t count,
573 loff_t pos) 573 loff_t pos)
574{ 574{
575 mm_segment_t old_fs; 575 mm_segment_t old_fs;
@@ -578,11 +578,12 @@ static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
578 old_fs = get_fs(); 578 old_fs = get_fs();
579 set_fs(get_ds()); 579 set_fs(get_ds());
580 /* The cast to a user pointer is valid due to the set_fs() */ 580 /* The cast to a user pointer is valid due to the set_fs() */
581 res = vfs_write(file, (const char __user *)buf, count, &pos); 581 res = vfs_write(file, (__force const char __user *)buf, count, &pos);
582 set_fs(old_fs); 582 set_fs(old_fs);
583 583
584 return res; 584 return res;
585} 585}
586EXPORT_SYMBOL(kernel_write);
586 587
587ssize_t default_file_splice_read(struct file *in, loff_t *ppos, 588ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
588 struct pipe_inode_info *pipe, size_t len, 589 struct pipe_inode_info *pipe, size_t len,
@@ -696,8 +697,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
696 return -EINVAL; 697 return -EINVAL;
697 698
698 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; 699 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
699 if (sd->len < sd->total_len) 700
701 if (sd->len < sd->total_len && pipe->nrbufs > 1)
700 more |= MSG_SENDPAGE_NOTLAST; 702 more |= MSG_SENDPAGE_NOTLAST;
703
701 return file->f_op->sendpage(file, buf->page, buf->offset, 704 return file->f_op->sendpage(file, buf->page, buf->offset,
702 sd->len, &pos, more); 705 sd->len, &pos, more);
703} 706}
@@ -1168,7 +1171,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1168 * randomly drop data for eg socket -> socket splicing. Use the 1171 * randomly drop data for eg socket -> socket splicing. Use the
1169 * piped splicing for that! 1172 * piped splicing for that!
1170 */ 1173 */
1171 i_mode = in->f_path.dentry->d_inode->i_mode; 1174 i_mode = file_inode(in)->i_mode;
1172 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 1175 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))
1173 return -EINVAL; 1176 return -EINVAL;
1174 1177
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index b381305c9a47..57dc70ebbb19 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -102,7 +102,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
102 102
103static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) 103static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
104{ 104{
105 struct inode *inode = file->f_dentry->d_inode; 105 struct inode *inode = file_inode(file);
106 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; 106 struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
107 u64 block = squashfs_i(inode)->start + msblk->directory_table; 107 u64 block = squashfs_i(inode)->start + msblk->directory_table;
108 int offset = squashfs_i(inode)->offset, length, dir_count, size, 108 int offset = squashfs_i(inode)->offset, length, dir_count, size,
diff --git a/fs/stat.c b/fs/stat.c
index 14f45459c83d..04ce1ac20d20 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -37,17 +37,17 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
37 37
38EXPORT_SYMBOL(generic_fillattr); 38EXPORT_SYMBOL(generic_fillattr);
39 39
40int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 40int vfs_getattr(struct path *path, struct kstat *stat)
41{ 41{
42 struct inode *inode = dentry->d_inode; 42 struct inode *inode = path->dentry->d_inode;
43 int retval; 43 int retval;
44 44
45 retval = security_inode_getattr(mnt, dentry); 45 retval = security_inode_getattr(path->mnt, path->dentry);
46 if (retval) 46 if (retval)
47 return retval; 47 return retval;
48 48
49 if (inode->i_op->getattr) 49 if (inode->i_op->getattr)
50 return inode->i_op->getattr(mnt, dentry, stat); 50 return inode->i_op->getattr(path->mnt, path->dentry, stat);
51 51
52 generic_fillattr(inode, stat); 52 generic_fillattr(inode, stat);
53 return 0; 53 return 0;
@@ -61,8 +61,7 @@ int vfs_fstat(unsigned int fd, struct kstat *stat)
61 int error = -EBADF; 61 int error = -EBADF;
62 62
63 if (f.file) { 63 if (f.file) {
64 error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry, 64 error = vfs_getattr(&f.file->f_path, stat);
65 stat);
66 fdput(f); 65 fdput(f);
67 } 66 }
68 return error; 67 return error;
@@ -89,7 +88,7 @@ retry:
89 if (error) 88 if (error)
90 goto out; 89 goto out;
91 90
92 error = vfs_getattr(path.mnt, path.dentry, stat); 91 error = vfs_getattr(&path, stat);
93 path_put(&path); 92 path_put(&path);
94 if (retry_estale(error, lookup_flags)) { 93 if (retry_estale(error, lookup_flags)) {
95 lookup_flags |= LOOKUP_REVAL; 94 lookup_flags |= LOOKUP_REVAL;
diff --git a/fs/super.c b/fs/super.c
index 12f123712161..7465d4364208 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -447,14 +447,13 @@ struct super_block *sget(struct file_system_type *type,
447 void *data) 447 void *data)
448{ 448{
449 struct super_block *s = NULL; 449 struct super_block *s = NULL;
450 struct hlist_node *node;
451 struct super_block *old; 450 struct super_block *old;
452 int err; 451 int err;
453 452
454retry: 453retry:
455 spin_lock(&sb_lock); 454 spin_lock(&sb_lock);
456 if (test) { 455 if (test) {
457 hlist_for_each_entry(old, node, &type->fs_supers, s_instances) { 456 hlist_for_each_entry(old, &type->fs_supers, s_instances) {
458 if (!test(old, data)) 457 if (!test(old, data))
459 continue; 458 continue;
460 if (!grab_super(old)) 459 if (!grab_super(old))
@@ -554,10 +553,9 @@ void iterate_supers_type(struct file_system_type *type,
554 void (*f)(struct super_block *, void *), void *arg) 553 void (*f)(struct super_block *, void *), void *arg)
555{ 554{
556 struct super_block *sb, *p = NULL; 555 struct super_block *sb, *p = NULL;
557 struct hlist_node *node;
558 556
559 spin_lock(&sb_lock); 557 spin_lock(&sb_lock);
560 hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) { 558 hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
561 sb->s_count++; 559 sb->s_count++;
562 spin_unlock(&sb_lock); 560 spin_unlock(&sb_lock);
563 561
@@ -842,7 +840,7 @@ int get_anon_bdev(dev_t *p)
842 else if (error) 840 else if (error)
843 return -EAGAIN; 841 return -EAGAIN;
844 842
845 if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) { 843 if (dev == (1 << MINORBITS)) {
846 spin_lock(&unnamed_dev_lock); 844 spin_lock(&unnamed_dev_lock);
847 ida_remove(&unnamed_dev_ida, dev); 845 ida_remove(&unnamed_dev_ida, dev);
848 if (unnamed_dev_start > dev) 846 if (unnamed_dev_start > dev)
diff --git a/fs/sync.c b/fs/sync.c
index 14eefeb44636..2c5d6639a66a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -332,7 +332,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
332 if (!f.file) 332 if (!f.file)
333 goto out; 333 goto out;
334 334
335 i_mode = f.file->f_path.dentry->d_inode->i_mode; 335 i_mode = file_inode(f.file)->i_mode;
336 ret = -ESPIPE; 336 ret = -ESPIPE;
337 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 337 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
338 !S_ISLNK(i_mode)) 338 !S_ISLNK(i_mode))
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 614b2b544880..15c68f9489ae 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -70,7 +70,7 @@ static ssize_t
70read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) 70read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
71{ 71{
72 struct bin_buffer *bb = file->private_data; 72 struct bin_buffer *bb = file->private_data;
73 int size = file->f_path.dentry->d_inode->i_size; 73 int size = file_inode(file)->i_size;
74 loff_t offs = *off; 74 loff_t offs = *off;
75 int count = min_t(size_t, bytes, PAGE_SIZE); 75 int count = min_t(size_t, bytes, PAGE_SIZE);
76 char *temp; 76 char *temp;
@@ -140,7 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
140 size_t bytes, loff_t *off) 140 size_t bytes, loff_t *off)
141{ 141{
142 struct bin_buffer *bb = file->private_data; 142 struct bin_buffer *bb = file->private_data;
143 int size = file->f_path.dentry->d_inode->i_size; 143 int size = file_inode(file)->i_size;
144 loff_t offs = *off; 144 loff_t offs = *off;
145 int count = min_t(size_t, bytes, PAGE_SIZE); 145 int count = min_t(size_t, bytes, PAGE_SIZE);
146 char *temp; 146 char *temp;
@@ -461,15 +461,14 @@ const struct file_operations bin_fops = {
461void unmap_bin_file(struct sysfs_dirent *attr_sd) 461void unmap_bin_file(struct sysfs_dirent *attr_sd)
462{ 462{
463 struct bin_buffer *bb; 463 struct bin_buffer *bb;
464 struct hlist_node *tmp;
465 464
466 if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) 465 if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
467 return; 466 return;
468 467
469 mutex_lock(&sysfs_bin_lock); 468 mutex_lock(&sysfs_bin_lock);
470 469
471 hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) { 470 hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) {
472 struct inode *inode = bb->file->f_path.dentry->d_inode; 471 struct inode *inode = file_inode(bb->file);
473 472
474 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 473 unmap_mapping_range(inode->i_mapping, 0, 0, 1);
475 } 474 }
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2df555c66d57..aec3d5c98c94 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -205,6 +205,48 @@ void sysfs_unmerge_group(struct kobject *kobj,
205} 205}
206EXPORT_SYMBOL_GPL(sysfs_unmerge_group); 206EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
207 207
208/**
209 * sysfs_add_link_to_group - add a symlink to an attribute group.
210 * @kobj: The kobject containing the group.
211 * @group_name: The name of the group.
212 * @target: The target kobject of the symlink to create.
213 * @link_name: The name of the symlink to create.
214 */
215int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
216 struct kobject *target, const char *link_name)
217{
218 struct sysfs_dirent *dir_sd;
219 int error = 0;
220
221 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
222 if (!dir_sd)
223 return -ENOENT;
224
225 error = sysfs_create_link_sd(dir_sd, target, link_name);
226 sysfs_put(dir_sd);
227
228 return error;
229}
230EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
231
232/**
233 * sysfs_remove_link_from_group - remove a symlink from an attribute group.
234 * @kobj: The kobject containing the group.
235 * @group_name: The name of the group.
236 * @link_name: The name of the symlink to remove.
237 */
238void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
239 const char *link_name)
240{
241 struct sysfs_dirent *dir_sd;
242
243 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name);
244 if (dir_sd) {
245 sysfs_hash_and_remove(dir_sd, NULL, link_name);
246 sysfs_put(dir_sd);
247 }
248}
249EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
208 250
209EXPORT_SYMBOL_GPL(sysfs_create_group); 251EXPORT_SYMBOL_GPL(sysfs_create_group);
210EXPORT_SYMBOL_GPL(sysfs_update_group); 252EXPORT_SYMBOL_GPL(sysfs_update_group);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db940a9be045..8d924b5ec733 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -10,7 +10,7 @@
10 * Please see Documentation/filesystems/sysfs.txt for more information. 10 * Please see Documentation/filesystems/sysfs.txt for more information.
11 */ 11 */
12 12
13#define DEBUG 13#define DEBUG
14 14
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3c9eb5624f5e..8c940df97a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -21,26 +21,17 @@
21 21
22#include "sysfs.h" 22#include "sysfs.h"
23 23
24static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target, 24static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
25 const char *name, int warn) 25 struct kobject *target,
26 const char *name, int warn)
26{ 27{
27 struct sysfs_dirent *parent_sd = NULL;
28 struct sysfs_dirent *target_sd = NULL; 28 struct sysfs_dirent *target_sd = NULL;
29 struct sysfs_dirent *sd = NULL; 29 struct sysfs_dirent *sd = NULL;
30 struct sysfs_addrm_cxt acxt; 30 struct sysfs_addrm_cxt acxt;
31 enum kobj_ns_type ns_type; 31 enum kobj_ns_type ns_type;
32 int error; 32 int error;
33 33
34 BUG_ON(!name); 34 BUG_ON(!name || !parent_sd);
35
36 if (!kobj)
37 parent_sd = &sysfs_root;
38 else
39 parent_sd = kobj->sd;
40
41 error = -EFAULT;
42 if (!parent_sd)
43 goto out_put;
44 35
45 /* target->sd can go away beneath us but is protected with 36 /* target->sd can go away beneath us but is protected with
46 * sysfs_assoc_lock. Fetch target_sd from it. 37 * sysfs_assoc_lock. Fetch target_sd from it.
@@ -96,6 +87,34 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
96} 87}
97 88
98/** 89/**
90 * sysfs_create_link_sd - create symlink to a given object.
91 * @sd: directory we're creating the link in.
92 * @target: object we're pointing to.
93 * @name: name of the symlink.
94 */
95int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
96 const char *name)
97{
98 return sysfs_do_create_link_sd(sd, target, name, 1);
99}
100
101static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
102 const char *name, int warn)
103{
104 struct sysfs_dirent *parent_sd = NULL;
105
106 if (!kobj)
107 parent_sd = &sysfs_root;
108 else
109 parent_sd = kobj->sd;
110
111 if (!parent_sd)
112 return -EFAULT;
113
114 return sysfs_do_create_link_sd(parent_sd, target, name, warn);
115}
116
117/**
99 * sysfs_create_link - create symlink between two objects. 118 * sysfs_create_link - create symlink between two objects.
100 * @kobj: object whose directory we're creating the link in. 119 * @kobj: object whose directory we're creating the link in.
101 * @target: object we're pointing to. 120 * @target: object we're pointing to.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d73c0932bbd6..d1e4043eb0c3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -240,3 +240,5 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd);
240 * symlink.c 240 * symlink.c
241 */ 241 */
242extern const struct inode_operations sysfs_symlink_inode_operations; 242extern const struct inode_operations sysfs_symlink_inode_operations;
243int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
244 const char *name);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index a77c42157620..3799e8dac3eb 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -68,7 +68,7 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
68static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) 68static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
69{ 69{
70 unsigned long pos = filp->f_pos; 70 unsigned long pos = filp->f_pos;
71 struct inode *inode = filp->f_path.dentry->d_inode; 71 struct inode *inode = file_inode(filp);
72 struct super_block *sb = inode->i_sb; 72 struct super_block *sb = inode->i_sb;
73 unsigned offset = pos & ~PAGE_CACHE_MASK; 73 unsigned offset = pos & ~PAGE_CACHE_MASK;
74 unsigned long n = pos >> PAGE_CACHE_SHIFT; 74 unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d03822bbf190..0e606b12a59d 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/compat.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
27struct timerfd_ctx { 28struct timerfd_ctx {
@@ -278,21 +279,17 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
278 return ufd; 279 return ufd;
279} 280}
280 281
281SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 282static int do_timerfd_settime(int ufd, int flags,
282 const struct itimerspec __user *, utmr, 283 const struct itimerspec *new,
283 struct itimerspec __user *, otmr) 284 struct itimerspec *old)
284{ 285{
285 struct fd f; 286 struct fd f;
286 struct timerfd_ctx *ctx; 287 struct timerfd_ctx *ctx;
287 struct itimerspec ktmr, kotmr;
288 int ret; 288 int ret;
289 289
290 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
291 return -EFAULT;
292
293 if ((flags & ~TFD_SETTIME_FLAGS) || 290 if ((flags & ~TFD_SETTIME_FLAGS) ||
294 !timespec_valid(&ktmr.it_value) || 291 !timespec_valid(&new->it_value) ||
295 !timespec_valid(&ktmr.it_interval)) 292 !timespec_valid(&new->it_interval))
296 return -EINVAL; 293 return -EINVAL;
297 294
298 ret = timerfd_fget(ufd, &f); 295 ret = timerfd_fget(ufd, &f);
@@ -323,27 +320,23 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
323 if (ctx->expired && ctx->tintv.tv64) 320 if (ctx->expired && ctx->tintv.tv64)
324 hrtimer_forward_now(&ctx->tmr, ctx->tintv); 321 hrtimer_forward_now(&ctx->tmr, ctx->tintv);
325 322
326 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 323 old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
327 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 324 old->it_interval = ktime_to_timespec(ctx->tintv);
328 325
329 /* 326 /*
330 * Re-program the timer to the new value ... 327 * Re-program the timer to the new value ...
331 */ 328 */
332 ret = timerfd_setup(ctx, flags, &ktmr); 329 ret = timerfd_setup(ctx, flags, new);
333 330
334 spin_unlock_irq(&ctx->wqh.lock); 331 spin_unlock_irq(&ctx->wqh.lock);
335 fdput(f); 332 fdput(f);
336 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
337 return -EFAULT;
338
339 return ret; 333 return ret;
340} 334}
341 335
342SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 336static int do_timerfd_gettime(int ufd, struct itimerspec *t)
343{ 337{
344 struct fd f; 338 struct fd f;
345 struct timerfd_ctx *ctx; 339 struct timerfd_ctx *ctx;
346 struct itimerspec kotmr;
347 int ret = timerfd_fget(ufd, &f); 340 int ret = timerfd_fget(ufd, &f);
348 if (ret) 341 if (ret)
349 return ret; 342 return ret;
@@ -356,11 +349,65 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
356 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; 349 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
357 hrtimer_restart(&ctx->tmr); 350 hrtimer_restart(&ctx->tmr);
358 } 351 }
359 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 352 t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
360 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 353 t->it_interval = ktime_to_timespec(ctx->tintv);
361 spin_unlock_irq(&ctx->wqh.lock); 354 spin_unlock_irq(&ctx->wqh.lock);
362 fdput(f); 355 fdput(f);
356 return 0;
357}
358
359SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
360 const struct itimerspec __user *, utmr,
361 struct itimerspec __user *, otmr)
362{
363 struct itimerspec new, old;
364 int ret;
365
366 if (copy_from_user(&new, utmr, sizeof(new)))
367 return -EFAULT;
368 ret = do_timerfd_settime(ufd, flags, &new, &old);
369 if (ret)
370 return ret;
371 if (otmr && copy_to_user(otmr, &old, sizeof(old)))
372 return -EFAULT;
373
374 return ret;
375}
363 376
377SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
378{
379 struct itimerspec kotmr;
380 int ret = do_timerfd_gettime(ufd, &kotmr);
381 if (ret)
382 return ret;
364 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 383 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
365} 384}
366 385
386#ifdef COMPAT
387COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
388 const struct itimerspec __user *, utmr,
389 struct itimerspec __user *, otmr)
390{
391 struct itimerspec new, old;
392 int ret;
393
394 if (get_compat_itimerspec(&new, utmr))
395 return -EFAULT;
396 ret = do_timerfd_settime(ufd, flags, &new, &old);
397 if (ret)
398 return ret;
399 if (otmr && put_compat_itimerspec(otmr, &old))
400 return -EFAULT;
401 return ret;
402}
403
404COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
405 struct itimerspec __user *, otmr)
406{
407 struct itimerspec kotmr;
408 int ret = do_timerfd_gettime(ufd, &kotmr);
409 if (ret)
410 return ret;
411 return put_compat_itimerspec(otmr, &t) ? -EFAULT: 0;
412}
413#endif
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 12817ffc7345..7f60e900edff 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2459,7 +2459,7 @@ error_dump:
2459 2459
2460static inline int chance(unsigned int n, unsigned int out_of) 2460static inline int chance(unsigned int n, unsigned int out_of)
2461{ 2461{
2462 return !!((random32() % out_of) + 1 <= n); 2462 return !!((prandom_u32() % out_of) + 1 <= n);
2463 2463
2464} 2464}
2465 2465
@@ -2477,13 +2477,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2477 if (chance(1, 2)) { 2477 if (chance(1, 2)) {
2478 d->pc_delay = 1; 2478 d->pc_delay = 1;
2479 /* Fail withing 1 minute */ 2479 /* Fail withing 1 minute */
2480 delay = random32() % 60000; 2480 delay = prandom_u32() % 60000;
2481 d->pc_timeout = jiffies; 2481 d->pc_timeout = jiffies;
2482 d->pc_timeout += msecs_to_jiffies(delay); 2482 d->pc_timeout += msecs_to_jiffies(delay);
2483 ubifs_warn("failing after %lums", delay); 2483 ubifs_warn("failing after %lums", delay);
2484 } else { 2484 } else {
2485 d->pc_delay = 2; 2485 d->pc_delay = 2;
2486 delay = random32() % 10000; 2486 delay = prandom_u32() % 10000;
2487 /* Fail within 10000 operations */ 2487 /* Fail within 10000 operations */
2488 d->pc_cnt_max = delay; 2488 d->pc_cnt_max = delay;
2489 ubifs_warn("failing after %lu calls", delay); 2489 ubifs_warn("failing after %lu calls", delay);
@@ -2563,7 +2563,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
2563 unsigned int from, to, ffs = chance(1, 2); 2563 unsigned int from, to, ffs = chance(1, 2);
2564 unsigned char *p = (void *)buf; 2564 unsigned char *p = (void *)buf;
2565 2565
2566 from = random32() % (len + 1); 2566 from = prandom_u32() % (len + 1);
2567 /* Corruption may only span one max. write unit */ 2567 /* Corruption may only span one max. write unit */
2568 to = min(len, ALIGN(from, c->max_write_size)); 2568 to = min(len, ALIGN(from, c->max_write_size));
2569 2569
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 8a574776a493..de08c92f2e23 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -352,7 +352,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
352 struct qstr nm; 352 struct qstr nm;
353 union ubifs_key key; 353 union ubifs_key key;
354 struct ubifs_dent_node *dent; 354 struct ubifs_dent_node *dent;
355 struct inode *dir = file->f_path.dentry->d_inode; 355 struct inode *dir = file_inode(file);
356 struct ubifs_info *c = dir->i_sb->s_fs_info; 356 struct ubifs_info *c = dir->i_sb->s_fs_info;
357 357
358 dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); 358 dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5bc77817f382..f12189d2db1d 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1444,7 +1444,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1444 struct vm_fault *vmf) 1444 struct vm_fault *vmf)
1445{ 1445{
1446 struct page *page = vmf->page; 1446 struct page *page = vmf->page;
1447 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1447 struct inode *inode = file_inode(vma->vm_file);
1448 struct ubifs_info *c = inode->i_sb->s_fs_info; 1448 struct ubifs_info *c = inode->i_sb->s_fs_info;
1449 struct timespec now = ubifs_current_time(inode); 1449 struct timespec now = ubifs_current_time(inode);
1450 struct ubifs_budget_req req = { .new_page = 1 }; 1450 struct ubifs_budget_req req = { .new_page = 1 };
@@ -1522,6 +1522,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1522 ubifs_release_dirty_inode_budget(c, ui); 1522 ubifs_release_dirty_inode_budget(c, ui);
1523 } 1523 }
1524 1524
1525 wait_for_stable_page(page);
1525 unlock_page(page); 1526 unlock_page(page);
1526 return 0; 1527 return 0;
1527 1528
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 1a7e2d8bdbe9..648b143606cc 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -147,7 +147,7 @@ out_unlock:
147long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 147long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
148{ 148{
149 int flags, err; 149 int flags, err;
150 struct inode *inode = file->f_path.dentry->d_inode; 150 struct inode *inode = file_inode(file);
151 151
152 switch (cmd) { 152 switch (cmd) {
153 case FS_IOC_GETFLAGS: 153 case FS_IOC_GETFLAGS:
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 9daaeef675dd..4b826abb1528 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -2007,28 +2007,28 @@ static int dbg_populate_lsave(struct ubifs_info *c)
2007 2007
2008 if (!dbg_is_chk_gen(c)) 2008 if (!dbg_is_chk_gen(c))
2009 return 0; 2009 return 0;
2010 if (random32() & 3) 2010 if (prandom_u32() & 3)
2011 return 0; 2011 return 0;
2012 2012
2013 for (i = 0; i < c->lsave_cnt; i++) 2013 for (i = 0; i < c->lsave_cnt; i++)
2014 c->lsave[i] = c->main_first; 2014 c->lsave[i] = c->main_first;
2015 2015
2016 list_for_each_entry(lprops, &c->empty_list, list) 2016 list_for_each_entry(lprops, &c->empty_list, list)
2017 c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2017 c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
2018 list_for_each_entry(lprops, &c->freeable_list, list) 2018 list_for_each_entry(lprops, &c->freeable_list, list)
2019 c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2019 c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
2020 list_for_each_entry(lprops, &c->frdi_idx_list, list) 2020 list_for_each_entry(lprops, &c->frdi_idx_list, list)
2021 c->lsave[random32() % c->lsave_cnt] = lprops->lnum; 2021 c->lsave[prandom_u32() % c->lsave_cnt] = lprops->lnum;
2022 2022
2023 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; 2023 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
2024 for (i = 0; i < heap->cnt; i++) 2024 for (i = 0; i < heap->cnt; i++)
2025 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2025 c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
2026 heap = &c->lpt_heap[LPROPS_DIRTY - 1]; 2026 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
2027 for (i = 0; i < heap->cnt; i++) 2027 for (i = 0; i < heap->cnt; i++)
2028 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2028 c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
2029 heap = &c->lpt_heap[LPROPS_FREE - 1]; 2029 heap = &c->lpt_heap[LPROPS_FREE - 1];
2030 for (i = 0; i < heap->cnt; i++) 2030 for (i = 0; i < heap->cnt; i++)
2031 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; 2031 c->lsave[prandom_u32() % c->lsave_cnt] = heap->arr[i]->lnum;
2032 2032
2033 return 1; 2033 return 1;
2034} 2034}
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 769701ccb5c9..ba32da3fe08a 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -126,13 +126,14 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
126 else if (inum > o->inum) 126 else if (inum > o->inum)
127 p = p->rb_right; 127 p = p->rb_right;
128 else { 128 else {
129 if (o->dnext) { 129 if (o->del) {
130 spin_unlock(&c->orphan_lock); 130 spin_unlock(&c->orphan_lock);
131 dbg_gen("deleted twice ino %lu", 131 dbg_gen("deleted twice ino %lu",
132 (unsigned long)inum); 132 (unsigned long)inum);
133 return; 133 return;
134 } 134 }
135 if (o->cnext) { 135 if (o->cmt) {
136 o->del = 1;
136 o->dnext = c->orph_dnext; 137 o->dnext = c->orph_dnext;
137 c->orph_dnext = o; 138 c->orph_dnext = o;
138 spin_unlock(&c->orphan_lock); 139 spin_unlock(&c->orphan_lock);
@@ -172,7 +173,9 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
172 last = &c->orph_cnext; 173 last = &c->orph_cnext;
173 list_for_each_entry(orphan, &c->orph_new, new_list) { 174 list_for_each_entry(orphan, &c->orph_new, new_list) {
174 ubifs_assert(orphan->new); 175 ubifs_assert(orphan->new);
176 ubifs_assert(!orphan->cmt);
175 orphan->new = 0; 177 orphan->new = 0;
178 orphan->cmt = 1;
176 *last = orphan; 179 *last = orphan;
177 last = &orphan->cnext; 180 last = &orphan->cnext;
178 } 181 }
@@ -299,7 +302,9 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
299 cnext = c->orph_cnext; 302 cnext = c->orph_cnext;
300 for (i = 0; i < cnt; i++) { 303 for (i = 0; i < cnt; i++) {
301 orphan = cnext; 304 orphan = cnext;
305 ubifs_assert(orphan->cmt);
302 orph->inos[i] = cpu_to_le64(orphan->inum); 306 orph->inos[i] = cpu_to_le64(orphan->inum);
307 orphan->cmt = 0;
303 cnext = orphan->cnext; 308 cnext = orphan->cnext;
304 orphan->cnext = NULL; 309 orphan->cnext = NULL;
305 } 310 }
@@ -378,6 +383,7 @@ static int consolidate(struct ubifs_info *c)
378 list_for_each_entry(orphan, &c->orph_list, list) { 383 list_for_each_entry(orphan, &c->orph_list, list) {
379 if (orphan->new) 384 if (orphan->new)
380 continue; 385 continue;
386 orphan->cmt = 1;
381 *last = orphan; 387 *last = orphan;
382 last = &orphan->cnext; 388 last = &orphan->cnext;
383 cnt += 1; 389 cnt += 1;
@@ -442,6 +448,7 @@ static void erase_deleted(struct ubifs_info *c)
442 orphan = dnext; 448 orphan = dnext;
443 dnext = orphan->dnext; 449 dnext = orphan->dnext;
444 ubifs_assert(!orphan->new); 450 ubifs_assert(!orphan->new);
451 ubifs_assert(orphan->del);
445 rb_erase(&orphan->rb, &c->orph_tree); 452 rb_erase(&orphan->rb, &c->orph_tree);
446 list_del(&orphan->list); 453 list_del(&orphan->list);
447 c->tot_orphans -= 1; 454 c->tot_orphans -= 1;
@@ -531,6 +538,7 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
531 rb_link_node(&orphan->rb, parent, p); 538 rb_link_node(&orphan->rb, parent, p);
532 rb_insert_color(&orphan->rb, &c->orph_tree); 539 rb_insert_color(&orphan->rb, &c->orph_tree);
533 list_add_tail(&orphan->list, &c->orph_list); 540 list_add_tail(&orphan->list, &c->orph_list);
541 orphan->del = 1;
534 orphan->dnext = c->orph_dnext; 542 orphan->dnext = c->orph_dnext;
535 c->orph_dnext = orphan; 543 c->orph_dnext = orphan;
536 dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, 544 dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 523bbad69c0c..52a6559275c4 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -683,7 +683,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
683 c->ilebs[c->ileb_cnt++] = lnum; 683 c->ilebs[c->ileb_cnt++] = lnum;
684 dbg_cmt("LEB %d", lnum); 684 dbg_cmt("LEB %d", lnum);
685 } 685 }
686 if (dbg_is_chk_index(c) && !(random32() & 7)) 686 if (dbg_is_chk_index(c) && !(prandom_u32() & 7))
687 return -ENOSPC; 687 return -ENOSPC;
688 return 0; 688 return 0;
689} 689}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d133c276fe05..b2babce4d70f 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -904,6 +904,8 @@ struct ubifs_budget_req {
904 * @dnext: next orphan to delete 904 * @dnext: next orphan to delete
905 * @inum: inode number 905 * @inum: inode number
906 * @new: %1 => added since the last commit, otherwise %0 906 * @new: %1 => added since the last commit, otherwise %0
907 * @cmt: %1 => commit pending, otherwise %0
908 * @del: %1 => delete pending, otherwise %0
907 */ 909 */
908struct ubifs_orphan { 910struct ubifs_orphan {
909 struct rb_node rb; 911 struct rb_node rb;
@@ -912,7 +914,9 @@ struct ubifs_orphan {
912 struct ubifs_orphan *cnext; 914 struct ubifs_orphan *cnext;
913 struct ubifs_orphan *dnext; 915 struct ubifs_orphan *dnext;
914 ino_t inum; 916 ino_t inum;
915 int new; 917 unsigned new:1;
918 unsigned cmt:1;
919 unsigned del:1;
916}; 920};
917 921
918/** 922/**
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index eb8bfe2b89a5..b3e93f5e17c3 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -186,7 +186,7 @@ out:
186 186
187static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) 187static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
188{ 188{
189 struct inode *dir = filp->f_path.dentry->d_inode; 189 struct inode *dir = file_inode(filp);
190 int result; 190 int result;
191 191
192 if (filp->f_pos == 0) { 192 if (filp->f_pos == 0) {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 77b5953eaac8..29569dd08168 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -139,7 +139,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
139{ 139{
140 ssize_t retval; 140 ssize_t retval;
141 struct file *file = iocb->ki_filp; 141 struct file *file = iocb->ki_filp;
142 struct inode *inode = file->f_path.dentry->d_inode; 142 struct inode *inode = file_inode(file);
143 int err, pos; 143 int err, pos;
144 size_t count = iocb->ki_left; 144 size_t count = iocb->ki_left;
145 struct udf_inode_info *iinfo = UDF_I(inode); 145 struct udf_inode_info *iinfo = UDF_I(inode);
@@ -178,7 +178,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
178 178
179long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 179long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
180{ 180{
181 struct inode *inode = filp->f_dentry->d_inode; 181 struct inode *inode = file_inode(filp);
182 long old_block, new_block; 182 long old_block, new_block;
183 int result = -EINVAL; 183 int result = -EINVAL;
184 184
@@ -204,7 +204,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
204 goto out; 204 goto out;
205 case UDF_RELOCATE_BLOCKS: 205 case UDF_RELOCATE_BLOCKS:
206 if (!capable(CAP_SYS_ADMIN)) { 206 if (!capable(CAP_SYS_ADMIN)) {
207 result = -EACCES; 207 result = -EPERM;
208 goto out; 208 goto out;
209 } 209 }
210 if (get_user(old_block, (long __user *)arg)) { 210 if (get_user(old_block, (long __user *)arg)) {
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index cbae1ed0b7c1..7a12e48ad819 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -67,6 +67,74 @@ static void udf_update_extents(struct inode *,
67 struct extent_position *); 67 struct extent_position *);
68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); 68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
69 69
70static void __udf_clear_extent_cache(struct inode *inode)
71{
72 struct udf_inode_info *iinfo = UDF_I(inode);
73
74 if (iinfo->cached_extent.lstart != -1) {
75 brelse(iinfo->cached_extent.epos.bh);
76 iinfo->cached_extent.lstart = -1;
77 }
78}
79
80/* Invalidate extent cache */
81static void udf_clear_extent_cache(struct inode *inode)
82{
83 struct udf_inode_info *iinfo = UDF_I(inode);
84
85 spin_lock(&iinfo->i_extent_cache_lock);
86 __udf_clear_extent_cache(inode);
87 spin_unlock(&iinfo->i_extent_cache_lock);
88}
89
90/* Return contents of extent cache */
91static int udf_read_extent_cache(struct inode *inode, loff_t bcount,
92 loff_t *lbcount, struct extent_position *pos)
93{
94 struct udf_inode_info *iinfo = UDF_I(inode);
95 int ret = 0;
96
97 spin_lock(&iinfo->i_extent_cache_lock);
98 if ((iinfo->cached_extent.lstart <= bcount) &&
99 (iinfo->cached_extent.lstart != -1)) {
100 /* Cache hit */
101 *lbcount = iinfo->cached_extent.lstart;
102 memcpy(pos, &iinfo->cached_extent.epos,
103 sizeof(struct extent_position));
104 if (pos->bh)
105 get_bh(pos->bh);
106 ret = 1;
107 }
108 spin_unlock(&iinfo->i_extent_cache_lock);
109 return ret;
110}
111
112/* Add extent to extent cache */
113static void udf_update_extent_cache(struct inode *inode, loff_t estart,
114 struct extent_position *pos, int next_epos)
115{
116 struct udf_inode_info *iinfo = UDF_I(inode);
117
118 spin_lock(&iinfo->i_extent_cache_lock);
119 /* Invalidate previously cached extent */
120 __udf_clear_extent_cache(inode);
121 if (pos->bh)
122 get_bh(pos->bh);
123 memcpy(&iinfo->cached_extent.epos, pos,
124 sizeof(struct extent_position));
125 iinfo->cached_extent.lstart = estart;
126 if (next_epos)
127 switch (iinfo->i_alloc_type) {
128 case ICBTAG_FLAG_AD_SHORT:
129 iinfo->cached_extent.epos.offset -=
130 sizeof(struct short_ad);
131 break;
132 case ICBTAG_FLAG_AD_LONG:
133 iinfo->cached_extent.epos.offset -=
134 sizeof(struct long_ad);
135 }
136 spin_unlock(&iinfo->i_extent_cache_lock);
137}
70 138
71void udf_evict_inode(struct inode *inode) 139void udf_evict_inode(struct inode *inode)
72{ 140{
@@ -90,6 +158,7 @@ void udf_evict_inode(struct inode *inode)
90 } 158 }
91 kfree(iinfo->i_ext.i_data); 159 kfree(iinfo->i_ext.i_data);
92 iinfo->i_ext.i_data = NULL; 160 iinfo->i_ext.i_data = NULL;
161 udf_clear_extent_cache(inode);
93 if (want_delete) { 162 if (want_delete) {
94 udf_free_inode(inode); 163 udf_free_inode(inode);
95 } 164 }
@@ -105,6 +174,7 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
105 truncate_pagecache(inode, to, isize); 174 truncate_pagecache(inode, to, isize);
106 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 175 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
107 down_write(&iinfo->i_data_sem); 176 down_write(&iinfo->i_data_sem);
177 udf_clear_extent_cache(inode);
108 udf_truncate_extents(inode); 178 udf_truncate_extents(inode);
109 up_write(&iinfo->i_data_sem); 179 up_write(&iinfo->i_data_sem);
110 } 180 }
@@ -372,7 +442,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
372 iinfo->i_next_alloc_goal++; 442 iinfo->i_next_alloc_goal++;
373 } 443 }
374 444
375 445 udf_clear_extent_cache(inode);
376 phys = inode_getblk(inode, block, &err, &new); 446 phys = inode_getblk(inode, block, &err, &new);
377 if (!phys) 447 if (!phys)
378 goto abort; 448 goto abort;
@@ -1171,6 +1241,7 @@ set_size:
1171 } else { 1241 } else {
1172 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1242 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1173 down_write(&iinfo->i_data_sem); 1243 down_write(&iinfo->i_data_sem);
1244 udf_clear_extent_cache(inode);
1174 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize, 1245 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize,
1175 0x00, bsize - newsize - 1246 0x00, bsize - newsize -
1176 udf_file_entry_alloc_offset(inode)); 1247 udf_file_entry_alloc_offset(inode));
@@ -1184,6 +1255,7 @@ set_size:
1184 if (err) 1255 if (err)
1185 return err; 1256 return err;
1186 down_write(&iinfo->i_data_sem); 1257 down_write(&iinfo->i_data_sem);
1258 udf_clear_extent_cache(inode);
1187 truncate_setsize(inode, newsize); 1259 truncate_setsize(inode, newsize);
1188 udf_truncate_extents(inode); 1260 udf_truncate_extents(inode);
1189 up_write(&iinfo->i_data_sem); 1261 up_write(&iinfo->i_data_sem);
@@ -2156,11 +2228,12 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
2156 struct udf_inode_info *iinfo; 2228 struct udf_inode_info *iinfo;
2157 2229
2158 iinfo = UDF_I(inode); 2230 iinfo = UDF_I(inode);
2159 pos->offset = 0; 2231 if (!udf_read_extent_cache(inode, bcount, &lbcount, pos)) {
2160 pos->block = iinfo->i_location; 2232 pos->offset = 0;
2161 pos->bh = NULL; 2233 pos->block = iinfo->i_location;
2234 pos->bh = NULL;
2235 }
2162 *elen = 0; 2236 *elen = 0;
2163
2164 do { 2237 do {
2165 etype = udf_next_aext(inode, pos, eloc, elen, 1); 2238 etype = udf_next_aext(inode, pos, eloc, elen, 1);
2166 if (etype == -1) { 2239 if (etype == -1) {
@@ -2170,7 +2243,8 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
2170 } 2243 }
2171 lbcount += *elen; 2244 lbcount += *elen;
2172 } while (lbcount <= bcount); 2245 } while (lbcount <= bcount);
2173 2246 /* update extent cache */
2247 udf_update_extent_cache(inode, lbcount - *elen, pos, 1);
2174 *offset = (bcount + *elen - lbcount) >> blocksize_bits; 2248 *offset = (bcount + *elen - lbcount) >> blocksize_bits;
2175 2249
2176 return etype; 2250 return etype;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 95fee278ab9d..102c072c6bbf 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1270,10 +1270,10 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
1270 1270
1271 if (parent && (len < 5)) { 1271 if (parent && (len < 5)) {
1272 *lenp = 5; 1272 *lenp = 5;
1273 return 255; 1273 return FILEID_INVALID;
1274 } else if (len < 3) { 1274 } else if (len < 3) {
1275 *lenp = 3; 1275 *lenp = 3;
1276 return 255; 1276 return FILEID_INVALID;
1277 } 1277 }
1278 1278
1279 *lenp = 3; 1279 *lenp = 3;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index d44fb568abe1..bc5b30a819e8 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -134,6 +134,8 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
134 ei->i_next_alloc_goal = 0; 134 ei->i_next_alloc_goal = 0;
135 ei->i_strat4096 = 0; 135 ei->i_strat4096 = 0;
136 init_rwsem(&ei->i_data_sem); 136 init_rwsem(&ei->i_data_sem);
137 ei->cached_extent.lstart = -1;
138 spin_lock_init(&ei->i_extent_cache_lock);
137 139
138 return &ei->vfs_inode; 140 return &ei->vfs_inode;
139} 141}
@@ -307,7 +309,8 @@ static void udf_sb_free_partitions(struct super_block *sb)
307{ 309{
308 struct udf_sb_info *sbi = UDF_SB(sb); 310 struct udf_sb_info *sbi = UDF_SB(sb);
309 int i; 311 int i;
310 312 if (sbi->s_partmaps == NULL)
313 return;
311 for (i = 0; i < sbi->s_partitions; i++) 314 for (i = 0; i < sbi->s_partitions; i++)
312 udf_free_partition(&sbi->s_partmaps[i]); 315 udf_free_partition(&sbi->s_partmaps[i]);
313 kfree(sbi->s_partmaps); 316 kfree(sbi->s_partmaps);
@@ -1020,7 +1023,6 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
1020 if (bitmap == NULL) 1023 if (bitmap == NULL)
1021 return NULL; 1024 return NULL;
1022 1025
1023 bitmap->s_block_bitmap = (struct buffer_head **)(bitmap + 1);
1024 bitmap->s_nr_groups = nr_groups; 1026 bitmap->s_nr_groups = nr_groups;
1025 return bitmap; 1027 return bitmap;
1026} 1028}
@@ -1078,8 +1080,6 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1078 if (!bitmap) 1080 if (!bitmap)
1079 return 1; 1081 return 1;
1080 map->s_uspace.s_bitmap = bitmap; 1082 map->s_uspace.s_bitmap = bitmap;
1081 bitmap->s_extLength = le32_to_cpu(
1082 phd->unallocSpaceBitmap.extLength);
1083 bitmap->s_extPosition = le32_to_cpu( 1083 bitmap->s_extPosition = le32_to_cpu(
1084 phd->unallocSpaceBitmap.extPosition); 1084 phd->unallocSpaceBitmap.extPosition);
1085 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; 1085 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
@@ -1114,8 +1114,6 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1114 if (!bitmap) 1114 if (!bitmap)
1115 return 1; 1115 return 1;
1116 map->s_fspace.s_bitmap = bitmap; 1116 map->s_fspace.s_bitmap = bitmap;
1117 bitmap->s_extLength = le32_to_cpu(
1118 phd->freedSpaceBitmap.extLength);
1119 bitmap->s_extPosition = le32_to_cpu( 1117 bitmap->s_extPosition = le32_to_cpu(
1120 phd->freedSpaceBitmap.extPosition); 1118 phd->freedSpaceBitmap.extPosition);
1121 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; 1119 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
@@ -1865,6 +1863,8 @@ static void udf_open_lvid(struct super_block *sb)
1865 mark_buffer_dirty(bh); 1863 mark_buffer_dirty(bh);
1866 sbi->s_lvid_dirty = 0; 1864 sbi->s_lvid_dirty = 0;
1867 mutex_unlock(&sbi->s_alloc_mutex); 1865 mutex_unlock(&sbi->s_alloc_mutex);
1866 /* Make opening of filesystem visible on the media immediately */
1867 sync_dirty_buffer(bh);
1868} 1868}
1869 1869
1870static void udf_close_lvid(struct super_block *sb) 1870static void udf_close_lvid(struct super_block *sb)
@@ -1905,6 +1905,8 @@ static void udf_close_lvid(struct super_block *sb)
1905 mark_buffer_dirty(bh); 1905 mark_buffer_dirty(bh);
1906 sbi->s_lvid_dirty = 0; 1906 sbi->s_lvid_dirty = 0;
1907 mutex_unlock(&sbi->s_alloc_mutex); 1907 mutex_unlock(&sbi->s_alloc_mutex);
1908 /* Make closing of filesystem visible on the media immediately */
1909 sync_dirty_buffer(bh);
1908} 1910}
1909 1911
1910u64 lvid_get_unique_id(struct super_block *sb) 1912u64 lvid_get_unique_id(struct super_block *sb)
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index bb8309dcd5c1..b5cd8ed2aa12 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -1,6 +1,19 @@
1#ifndef _UDF_I_H 1#ifndef _UDF_I_H
2#define _UDF_I_H 2#define _UDF_I_H
3 3
4struct extent_position {
5 struct buffer_head *bh;
6 uint32_t offset;
7 struct kernel_lb_addr block;
8};
9
10struct udf_ext_cache {
11 /* Extent position */
12 struct extent_position epos;
13 /* Start logical offset in bytes */
14 loff_t lstart;
15};
16
4/* 17/*
5 * The i_data_sem and i_mutex serve for protection of allocation information 18 * The i_data_sem and i_mutex serve for protection of allocation information
6 * of a regular files and symlinks. This includes all extents belonging to 19 * of a regular files and symlinks. This includes all extents belonging to
@@ -35,6 +48,9 @@ struct udf_inode_info {
35 __u8 *i_data; 48 __u8 *i_data;
36 } i_ext; 49 } i_ext;
37 struct rw_semaphore i_data_sem; 50 struct rw_semaphore i_data_sem;
51 struct udf_ext_cache cached_extent;
52 /* Spinlock for protecting extent cache */
53 spinlock_t i_extent_cache_lock;
38 struct inode vfs_inode; 54 struct inode vfs_inode;
39}; 55};
40 56
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 5f027227f085..ed401e94aa8c 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -80,10 +80,9 @@ struct udf_virtual_data {
80}; 80};
81 81
82struct udf_bitmap { 82struct udf_bitmap {
83 __u32 s_extLength;
84 __u32 s_extPosition; 83 __u32 s_extPosition;
85 __u16 s_nr_groups; 84 int s_nr_groups;
86 struct buffer_head **s_block_bitmap; 85 struct buffer_head *s_block_bitmap[0];
87}; 86};
88 87
89struct udf_part_map { 88struct udf_part_map {
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index de038da6f6bd..be7dabbbcb49 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -113,11 +113,6 @@ struct ustr {
113 uint8_t u_len; 113 uint8_t u_len;
114}; 114};
115 115
116struct extent_position {
117 struct buffer_head *bh;
118 uint32_t offset;
119 struct kernel_lb_addr block;
120};
121 116
122/* super.c */ 117/* super.c */
123 118
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index e4f10a40768a..0bf6e16f8d79 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -29,7 +29,7 @@ config UFS_FS
29 29
30config UFS_FS_WRITE 30config UFS_FS_WRITE
31 bool "UFS file system write support (DANGEROUS)" 31 bool "UFS file system write support (DANGEROUS)"
32 depends on UFS_FS && EXPERIMENTAL 32 depends on UFS_FS
33 help 33 help
34 Say Y here if you want to try writing to UFS partitions. This is 34 Say Y here if you want to try writing to UFS partitions. This is
35 experimental, so you should back up your UFS partitions beforehand. 35 experimental, so you should back up your UFS partitions beforehand.
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index dbc90994715a..3a75ca09c506 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -433,7 +433,7 @@ static int
433ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) 433ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
434{ 434{
435 loff_t pos = filp->f_pos; 435 loff_t pos = filp->f_pos;
436 struct inode *inode = filp->f_path.dentry->d_inode; 436 struct inode *inode = file_inode(filp);
437 struct super_block *sb = inode->i_sb; 437 struct super_block *sb = inode->i_sb;
438 unsigned int offset = pos & ~PAGE_CACHE_MASK; 438 unsigned int offset = pos & ~PAGE_CACHE_MASK;
439 unsigned long n = pos >> PAGE_CACHE_SHIFT; 439 unsigned long n = pos >> PAGE_CACHE_SHIFT;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5a7ffe54f5d5..cc33aaf219f1 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -70,8 +70,8 @@ config XFS_RT
70 If unsure, say N. 70 If unsure, say N.
71 71
72config XFS_DEBUG 72config XFS_DEBUG
73 bool "XFS Debugging support (EXPERIMENTAL)" 73 bool "XFS Debugging support"
74 depends on XFS_FS && EXPERIMENTAL 74 depends on XFS_FS
75 help 75 help
76 Say Y here to get an XFS build with many debugging features, 76 Say Y here to get an XFS build with many debugging features,
77 including ASSERT checks, function wrappers around macros, 77 including ASSERT checks, function wrappers around macros,
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 393055fe3aef..0ad23253e8b1 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1925,8 +1925,6 @@ xfs_alloc_fix_freelist(
1925 targs.mp = mp; 1925 targs.mp = mp;
1926 targs.agbp = agbp; 1926 targs.agbp = agbp;
1927 targs.agno = args->agno; 1927 targs.agno = args->agno;
1928 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1929 targs.minalignslop = 0;
1930 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1928 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1931 targs.type = XFS_ALLOCTYPE_THIS_AG; 1929 targs.type = XFS_ALLOCTYPE_THIS_AG;
1932 targs.pag = pag; 1930 targs.pag = pag;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
86 } 86 }
87 87
88 if (ioend->io_iocb) { 88 if (ioend->io_iocb) {
89 inode_dio_done(ioend->io_inode);
89 if (ioend->io_isasync) { 90 if (ioend->io_isasync) {
90 aio_complete(ioend->io_iocb, ioend->io_error ? 91 aio_complete(ioend->io_iocb, ioend->io_error ?
91 ioend->io_error : ioend->io_result, 0); 92 ioend->io_error : ioend->io_result, 0);
92 } 93 }
93 inode_dio_done(ioend->io_inode);
94 } 94 }
95 95
96 mempool_free(ioend, xfs_ioend_pool); 96 mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index aaf472532b3c..888683844d98 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -300,9 +300,12 @@ xfs_attr_set_int(
300 if (rsvd) 300 if (rsvd)
301 args.trans->t_flags |= XFS_TRANS_RESERVE; 301 args.trans->t_flags |= XFS_TRANS_RESERVE;
302 302
303 if ((error = xfs_trans_reserve(args.trans, args.total, 303 error = xfs_trans_reserve(args.trans, args.total,
304 XFS_ATTRSET_LOG_RES(mp, args.total), 0, 304 XFS_ATTRSETM_LOG_RES(mp) +
305 XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { 305 XFS_ATTRSETRT_LOG_RES(mp) * args.total,
306 0, XFS_TRANS_PERM_LOG_RES,
307 XFS_ATTRSET_LOG_COUNT);
308 if (error) {
306 xfs_trans_cancel(args.trans, 0); 309 xfs_trans_cancel(args.trans, 0);
307 return(error); 310 return(error);
308 } 311 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..b44af9211bd9 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -147,7 +147,10 @@ xfs_bmap_local_to_extents(
147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 147 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
148 xfs_extlen_t total, /* total blocks needed by transaction */ 148 xfs_extlen_t total, /* total blocks needed by transaction */
149 int *logflagsp, /* inode logging flags */ 149 int *logflagsp, /* inode logging flags */
150 int whichfork); /* data or attr fork */ 150 int whichfork, /* data or attr fork */
151 void (*init_fn)(struct xfs_buf *bp,
152 struct xfs_inode *ip,
153 struct xfs_ifork *ifp));
151 154
152/* 155/*
153 * Search the extents list for the inode, for the extent containing bno. 156 * Search the extents list for the inode, for the extent containing bno.
@@ -357,7 +360,42 @@ xfs_bmap_add_attrfork_extents(
357} 360}
358 361
359/* 362/*
360 * Called from xfs_bmap_add_attrfork to handle local format files. 363 * Block initialisation functions for local to extent format conversion.
364 * As these get more complex, they will be moved to the relevant files,
365 * but for now they are too simple to worry about.
366 */
367STATIC void
368xfs_bmap_local_to_extents_init_fn(
369 struct xfs_buf *bp,
370 struct xfs_inode *ip,
371 struct xfs_ifork *ifp)
372{
373 bp->b_ops = &xfs_bmbt_buf_ops;
374 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
375}
376
377STATIC void
378xfs_symlink_local_to_remote(
379 struct xfs_buf *bp,
380 struct xfs_inode *ip,
381 struct xfs_ifork *ifp)
382{
383 /* remote symlink blocks are not verifiable until CRCs come along */
384 bp->b_ops = NULL;
385 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
386}
387
388/*
389 * Called from xfs_bmap_add_attrfork to handle local format files. Each
390 * different data fork content type needs a different callout to do the
391 * conversion. Some are basic and only require special block initialisation
392 * callouts for the data formating, others (directories) are so specialised they
393 * handle everything themselves.
394 *
395 * XXX (dgc): investigate whether directory conversion can use the generic
396 * formatting callout. It should be possible - it's just a very complex
397 * formatter. it would also require passing the transaction through to the init
398 * function.
361 */ 399 */
362STATIC int /* error */ 400STATIC int /* error */
363xfs_bmap_add_attrfork_local( 401xfs_bmap_add_attrfork_local(
@@ -368,25 +406,29 @@ xfs_bmap_add_attrfork_local(
368 int *flags) /* inode logging flags */ 406 int *flags) /* inode logging flags */
369{ 407{
370 xfs_da_args_t dargs; /* args for dir/attr code */ 408 xfs_da_args_t dargs; /* args for dir/attr code */
371 int error; /* error return value */
372 xfs_mount_t *mp; /* mount structure pointer */
373 409
374 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 410 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
375 return 0; 411 return 0;
412
376 if (S_ISDIR(ip->i_d.di_mode)) { 413 if (S_ISDIR(ip->i_d.di_mode)) {
377 mp = ip->i_mount;
378 memset(&dargs, 0, sizeof(dargs)); 414 memset(&dargs, 0, sizeof(dargs));
379 dargs.dp = ip; 415 dargs.dp = ip;
380 dargs.firstblock = firstblock; 416 dargs.firstblock = firstblock;
381 dargs.flist = flist; 417 dargs.flist = flist;
382 dargs.total = mp->m_dirblkfsbs; 418 dargs.total = ip->i_mount->m_dirblkfsbs;
383 dargs.whichfork = XFS_DATA_FORK; 419 dargs.whichfork = XFS_DATA_FORK;
384 dargs.trans = tp; 420 dargs.trans = tp;
385 error = xfs_dir2_sf_to_block(&dargs); 421 return xfs_dir2_sf_to_block(&dargs);
386 } else 422 }
387 error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, 423
388 XFS_DATA_FORK); 424 if (S_ISLNK(ip->i_d.di_mode))
389 return error; 425 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
426 flags, XFS_DATA_FORK,
427 xfs_symlink_local_to_remote);
428
429 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
430 XFS_DATA_FORK,
431 xfs_bmap_local_to_extents_init_fn);
390} 432}
391 433
392/* 434/*
@@ -3099,8 +3141,6 @@ xfs_bmap_extents_to_btree(
3099 args.fsbno = *firstblock; 3141 args.fsbno = *firstblock;
3100 } 3142 }
3101 args.minlen = args.maxlen = args.prod = 1; 3143 args.minlen = args.maxlen = args.prod = 1;
3102 args.total = args.minleft = args.alignment = args.mod = args.isfl =
3103 args.minalignslop = 0;
3104 args.wasdel = wasdel; 3144 args.wasdel = wasdel;
3105 *logflagsp = 0; 3145 *logflagsp = 0;
3106 if ((error = xfs_alloc_vextent(&args))) { 3146 if ((error = xfs_alloc_vextent(&args))) {
@@ -3221,7 +3261,10 @@ xfs_bmap_local_to_extents(
3221 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 3261 xfs_fsblock_t *firstblock, /* first block allocated in xaction */
3222 xfs_extlen_t total, /* total blocks needed by transaction */ 3262 xfs_extlen_t total, /* total blocks needed by transaction */
3223 int *logflagsp, /* inode logging flags */ 3263 int *logflagsp, /* inode logging flags */
3224 int whichfork) /* data or attr fork */ 3264 int whichfork,
3265 void (*init_fn)(struct xfs_buf *bp,
3266 struct xfs_inode *ip,
3267 struct xfs_ifork *ifp))
3225{ 3268{
3226 int error; /* error return value */ 3269 int error; /* error return value */
3227 int flags; /* logging flags returned */ 3270 int flags; /* logging flags returned */
@@ -3241,12 +3284,12 @@ xfs_bmap_local_to_extents(
3241 xfs_buf_t *bp; /* buffer for extent block */ 3284 xfs_buf_t *bp; /* buffer for extent block */
3242 xfs_bmbt_rec_host_t *ep;/* extent record pointer */ 3285 xfs_bmbt_rec_host_t *ep;/* extent record pointer */
3243 3286
3287 ASSERT((ifp->if_flags &
3288 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3244 memset(&args, 0, sizeof(args)); 3289 memset(&args, 0, sizeof(args));
3245 args.tp = tp; 3290 args.tp = tp;
3246 args.mp = ip->i_mount; 3291 args.mp = ip->i_mount;
3247 args.firstblock = *firstblock; 3292 args.firstblock = *firstblock;
3248 ASSERT((ifp->if_flags &
3249 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3250 /* 3293 /*
3251 * Allocate a block. We know we need only one, since the 3294 * Allocate a block. We know we need only one, since the
3252 * file currently fits in an inode. 3295 * file currently fits in an inode.
@@ -3259,20 +3302,21 @@ xfs_bmap_local_to_extents(
3259 args.type = XFS_ALLOCTYPE_NEAR_BNO; 3302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3260 } 3303 }
3261 args.total = total; 3304 args.total = total;
3262 args.mod = args.minleft = args.alignment = args.wasdel =
3263 args.isfl = args.minalignslop = 0;
3264 args.minlen = args.maxlen = args.prod = 1; 3305 args.minlen = args.maxlen = args.prod = 1;
3265 if ((error = xfs_alloc_vextent(&args))) 3306 error = xfs_alloc_vextent(&args);
3307 if (error)
3266 goto done; 3308 goto done;
3267 /* 3309
3268 * Can't fail, the space was reserved. 3310 /* Can't fail, the space was reserved. */
3269 */
3270 ASSERT(args.fsbno != NULLFSBLOCK); 3311 ASSERT(args.fsbno != NULLFSBLOCK);
3271 ASSERT(args.len == 1); 3312 ASSERT(args.len == 1);
3272 *firstblock = args.fsbno; 3313 *firstblock = args.fsbno;
3273 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 3314 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3274 bp->b_ops = &xfs_bmbt_buf_ops; 3315
3275 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 3316 /* initialise the block and copy the data */
3317 init_fn(bp, ip, ifp);
3318
3319 /* account for the change in fork size and log everything */
3276 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); 3320 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3277 xfs_bmap_forkoff_reset(args.mp, ip, whichfork); 3321 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
3278 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 3322 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -4680,9 +4724,6 @@ __xfs_bmapi_allocate(
4680 return error; 4724 return error;
4681 } 4725 }
4682 4726
4683 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4684 bma->stack_switch = 1;
4685
4686 error = xfs_bmap_alloc(bma); 4727 error = xfs_bmap_alloc(bma);
4687 if (error) 4728 if (error)
4688 return error; 4729 return error;
@@ -4922,8 +4963,32 @@ xfs_bmapi_write(
4922 XFS_STATS_INC(xs_blk_mapw); 4963 XFS_STATS_INC(xs_blk_mapw);
4923 4964
4924 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 4965 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4966 /*
4967 * XXX (dgc): This assumes we are only called for inodes that
4968 * contain content neutral data in local format. Anything that
4969 * contains caller-specific data in local format that needs
4970 * transformation to move to a block format needs to do the
4971 * conversion to extent format itself.
4972 *
4973 * Directory data forks and attribute forks handle this
4974 * themselves, but with the addition of metadata verifiers every
4975 * data fork in local format now contains caller specific data
4976 * and as such conversion through this function is likely to be
4977 * broken.
4978 *
4979 * The only likely user of this branch is for remote symlinks,
4980 * but we cannot overwrite the data fork contents of the symlink
4981 * (EEXIST occurs higher up the stack) and so it will never go
4982 * from local format to extent format here. Hence I don't think
4983 * this branch is ever executed intentionally and we should
4984 * consider removing it and asserting that xfs_bmapi_write()
4985 * cannot be called directly on local format forks. i.e. callers
4986 * are completely responsible for local to extent format
4987 * conversion, not xfs_bmapi_write().
4988 */
4925 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, 4989 error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
4926 &bma.logflags, whichfork); 4990 &bma.logflags, whichfork,
4991 xfs_bmap_local_to_extents_init_fn);
4927 if (error) 4992 if (error)
4928 goto error0; 4993 goto error0;
4929 } 4994 }
@@ -4956,6 +5021,9 @@ xfs_bmapi_write(
4956 bma.flist = flist; 5021 bma.flist = flist;
4957 bma.firstblock = firstblock; 5022 bma.firstblock = firstblock;
4958 5023
5024 if (flags & XFS_BMAPI_STACK_SWITCH)
5025 bma.stack_switch = 1;
5026
4959 while (bno < end && n < *nmap) { 5027 while (bno < end && n < *nmap) {
4960 inhole = eof || bma.got.br_startoff > bno; 5028 inhole = eof || bma.got.br_startoff > bno;
4961 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 5029 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 26673a0b20e7..4e8f0df82d02 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -175,7 +175,7 @@ xfs_buf_get_maps(
175 bp->b_map_count = map_count; 175 bp->b_map_count = map_count;
176 176
177 if (map_count == 1) { 177 if (map_count == 1) {
178 bp->b_maps = &bp->b_map; 178 bp->b_maps = &bp->__b_map;
179 return 0; 179 return 0;
180 } 180 }
181 181
@@ -193,7 +193,7 @@ static void
193xfs_buf_free_maps( 193xfs_buf_free_maps(
194 struct xfs_buf *bp) 194 struct xfs_buf *bp)
195{ 195{
196 if (bp->b_maps != &bp->b_map) { 196 if (bp->b_maps != &bp->__b_map) {
197 kmem_free(bp->b_maps); 197 kmem_free(bp->b_maps);
198 bp->b_maps = NULL; 198 bp->b_maps = NULL;
199 } 199 }
@@ -377,8 +377,8 @@ xfs_buf_allocate_memory(
377 } 377 }
378 378
379use_alloc_page: 379use_alloc_page:
380 start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT; 380 start = BBTOB(bp->b_maps[0].bm_bn) >> PAGE_SHIFT;
381 end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1) 381 end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)
382 >> PAGE_SHIFT; 382 >> PAGE_SHIFT;
383 page_count = end - start; 383 page_count = end - start;
384 error = _xfs_buf_get_pages(bp, page_count, flags); 384 error = _xfs_buf_get_pages(bp, page_count, flags);
@@ -487,6 +487,7 @@ _xfs_buf_find(
487 struct rb_node *parent; 487 struct rb_node *parent;
488 xfs_buf_t *bp; 488 xfs_buf_t *bp;
489 xfs_daddr_t blkno = map[0].bm_bn; 489 xfs_daddr_t blkno = map[0].bm_bn;
490 xfs_daddr_t eofs;
490 int numblks = 0; 491 int numblks = 0;
491 int i; 492 int i;
492 493
@@ -498,6 +499,23 @@ _xfs_buf_find(
498 ASSERT(!(numbytes < (1 << btp->bt_sshift))); 499 ASSERT(!(numbytes < (1 << btp->bt_sshift)));
499 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); 500 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
500 501
502 /*
503 * Corrupted block numbers can get through to here, unfortunately, so we
504 * have to check that the buffer falls within the filesystem bounds.
505 */
506 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
507 if (blkno >= eofs) {
508 /*
509 * XXX (dgc): we should really be returning EFSCORRUPTED here,
510 * but none of the higher level infrastructure supports
511 * returning a specific error on buffer lookup failures.
512 */
513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs);
516 return NULL;
517 }
518
501 /* get tree root */ 519 /* get tree root */
502 pag = xfs_perag_get(btp->bt_mount, 520 pag = xfs_perag_get(btp->bt_mount,
503 xfs_daddr_to_agno(btp->bt_mount, blkno)); 521 xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -640,7 +658,7 @@ _xfs_buf_read(
640 xfs_buf_flags_t flags) 658 xfs_buf_flags_t flags)
641{ 659{
642 ASSERT(!(flags & XBF_WRITE)); 660 ASSERT(!(flags & XBF_WRITE));
643 ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL); 661 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
644 662
645 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD); 663 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
646 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); 664 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -933,8 +951,6 @@ xfs_buf_trylock(
933 locked = down_trylock(&bp->b_sema) == 0; 951 locked = down_trylock(&bp->b_sema) == 0;
934 if (locked) 952 if (locked)
935 XB_SET_OWNER(bp); 953 XB_SET_OWNER(bp);
936 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
937 xfs_log_force(bp->b_target->bt_mount, 0);
938 954
939 trace_xfs_buf_trylock(bp, _RET_IP_); 955 trace_xfs_buf_trylock(bp, _RET_IP_);
940 return locked; 956 return locked;
@@ -1487,6 +1503,8 @@ restart:
1487 while (!list_empty(&btp->bt_lru)) { 1503 while (!list_empty(&btp->bt_lru)) {
1488 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); 1504 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1489 if (atomic_read(&bp->b_hold) > 1) { 1505 if (atomic_read(&bp->b_hold) > 1) {
1506 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1507 list_move_tail(&bp->b_lru, &btp->bt_lru);
1490 spin_unlock(&btp->bt_lru_lock); 1508 spin_unlock(&btp->bt_lru_lock);
1491 delay(100); 1509 delay(100);
1492 goto restart; 1510 goto restart;
@@ -1709,7 +1727,7 @@ xfs_buf_cmp(
1709 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); 1727 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1710 xfs_daddr_t diff; 1728 xfs_daddr_t diff;
1711 1729
1712 diff = ap->b_map.bm_bn - bp->b_map.bm_bn; 1730 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
1713 if (diff < 0) 1731 if (diff < 0)
1714 return -1; 1732 return -1;
1715 if (diff > 0) 1733 if (diff > 0)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 23f5642480bb..433a12ed7b17 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -151,7 +151,7 @@ typedef struct xfs_buf {
151 struct page **b_pages; /* array of page pointers */ 151 struct page **b_pages; /* array of page pointers */
152 struct page *b_page_array[XB_PAGES]; /* inline pages */ 152 struct page *b_page_array[XB_PAGES]; /* inline pages */
153 struct xfs_buf_map *b_maps; /* compound buffer map */ 153 struct xfs_buf_map *b_maps; /* compound buffer map */
154 struct xfs_buf_map b_map; /* inline compound buffer map */ 154 struct xfs_buf_map __b_map; /* inline compound buffer map */
155 int b_map_count; 155 int b_map_count;
156 int b_io_length; /* IO size in BBs */ 156 int b_io_length; /* IO size in BBs */
157 atomic_t b_pin_count; /* pin count */ 157 atomic_t b_pin_count; /* pin count */
@@ -330,8 +330,8 @@ void xfs_buf_stale(struct xfs_buf *bp);
330 * In future, uncached buffers will pass the block number directly to the io 330 * In future, uncached buffers will pass the block number directly to the io
331 * request function and hence these macros will go away at that point. 331 * request function and hence these macros will go away at that point.
332 */ 332 */
333#define XFS_BUF_ADDR(bp) ((bp)->b_map.bm_bn) 333#define XFS_BUF_ADDR(bp) ((bp)->b_maps[0].bm_bn)
334#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno)) 334#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_maps[0].bm_bn = (xfs_daddr_t)(bno))
335 335
336static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 336static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
337{ 337{
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index becf4a97efc6..cf263476d6b4 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -37,109 +37,6 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
37 return container_of(lip, struct xfs_buf_log_item, bli_item); 37 return container_of(lip, struct xfs_buf_log_item, bli_item);
38} 38}
39 39
40
41#ifdef XFS_TRANS_DEBUG
42/*
43 * This function uses an alternate strategy for tracking the bytes
44 * that the user requests to be logged. This can then be used
45 * in conjunction with the bli_orig array in the buf log item to
46 * catch bugs in our callers' code.
47 *
48 * We also double check the bits set in xfs_buf_item_log using a
49 * simple algorithm to check that every byte is accounted for.
50 */
51STATIC void
52xfs_buf_item_log_debug(
53 xfs_buf_log_item_t *bip,
54 uint first,
55 uint last)
56{
57 uint x;
58 uint byte;
59 uint nbytes;
60 uint chunk_num;
61 uint word_num;
62 uint bit_num;
63 uint bit_set;
64 uint *wordp;
65
66 ASSERT(bip->bli_logged != NULL);
67 byte = first;
68 nbytes = last - first + 1;
69 bfset(bip->bli_logged, first, nbytes);
70 for (x = 0; x < nbytes; x++) {
71 chunk_num = byte >> XFS_BLF_SHIFT;
72 word_num = chunk_num >> BIT_TO_WORD_SHIFT;
73 bit_num = chunk_num & (NBWORD - 1);
74 wordp = &(bip->bli_format.blf_data_map[word_num]);
75 bit_set = *wordp & (1 << bit_num);
76 ASSERT(bit_set);
77 byte++;
78 }
79}
80
81/*
82 * This function is called when we flush something into a buffer without
83 * logging it. This happens for things like inodes which are logged
84 * separately from the buffer.
85 */
86void
87xfs_buf_item_flush_log_debug(
88 xfs_buf_t *bp,
89 uint first,
90 uint last)
91{
92 xfs_buf_log_item_t *bip = bp->b_fspriv;
93 uint nbytes;
94
95 if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
96 return;
97
98 ASSERT(bip->bli_logged != NULL);
99 nbytes = last - first + 1;
100 bfset(bip->bli_logged, first, nbytes);
101}
102
103/*
104 * This function is called to verify that our callers have logged
105 * all the bytes that they changed.
106 *
107 * It does this by comparing the original copy of the buffer stored in
108 * the buf log item's bli_orig array to the current copy of the buffer
109 * and ensuring that all bytes which mismatch are set in the bli_logged
110 * array of the buf log item.
111 */
112STATIC void
113xfs_buf_item_log_check(
114 xfs_buf_log_item_t *bip)
115{
116 char *orig;
117 char *buffer;
118 int x;
119 xfs_buf_t *bp;
120
121 ASSERT(bip->bli_orig != NULL);
122 ASSERT(bip->bli_logged != NULL);
123
124 bp = bip->bli_buf;
125 ASSERT(bp->b_length > 0);
126 ASSERT(bp->b_addr != NULL);
127 orig = bip->bli_orig;
128 buffer = bp->b_addr;
129 for (x = 0; x < BBTOB(bp->b_length); x++) {
130 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
131 xfs_emerg(bp->b_mount,
132 "%s: bip %x buffer %x orig %x index %d",
133 __func__, bip, bp, orig, x);
134 ASSERT(0);
135 }
136 }
137}
138#else
139#define xfs_buf_item_log_debug(x,y,z)
140#define xfs_buf_item_log_check(x)
141#endif
142
143STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); 40STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
144 41
145/* 42/*
@@ -237,7 +134,7 @@ xfs_buf_item_size(
237 * cancel flag in it. 134 * cancel flag in it.
238 */ 135 */
239 trace_xfs_buf_item_size_stale(bip); 136 trace_xfs_buf_item_size_stale(bip);
240 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 137 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
241 return bip->bli_format_count; 138 return bip->bli_format_count;
242 } 139 }
243 140
@@ -278,7 +175,7 @@ xfs_buf_item_format_segment(
278 uint buffer_offset; 175 uint buffer_offset;
279 176
280 /* copy the flags across from the base format item */ 177 /* copy the flags across from the base format item */
281 blfp->blf_flags = bip->bli_format.blf_flags; 178 blfp->blf_flags = bip->__bli_format.blf_flags;
282 179
283 /* 180 /*
284 * Base size is the actual size of the ondisk structure - it reflects 181 * Base size is the actual size of the ondisk structure - it reflects
@@ -287,6 +184,17 @@ xfs_buf_item_format_segment(
287 */ 184 */
288 base_size = offsetof(struct xfs_buf_log_format, blf_data_map) + 185 base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
289 (blfp->blf_map_size * sizeof(blfp->blf_data_map[0])); 186 (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
187
188 nvecs = 0;
189 first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
190 if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
191 /*
192 * If the map is not be dirty in the transaction, mark
193 * the size as zero and do not advance the vector pointer.
194 */
195 goto out;
196 }
197
290 vecp->i_addr = blfp; 198 vecp->i_addr = blfp;
291 vecp->i_len = base_size; 199 vecp->i_len = base_size;
292 vecp->i_type = XLOG_REG_TYPE_BFORMAT; 200 vecp->i_type = XLOG_REG_TYPE_BFORMAT;
@@ -301,15 +209,13 @@ xfs_buf_item_format_segment(
301 */ 209 */
302 trace_xfs_buf_item_format_stale(bip); 210 trace_xfs_buf_item_format_stale(bip);
303 ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); 211 ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
304 blfp->blf_size = nvecs; 212 goto out;
305 return vecp;
306 } 213 }
307 214
308 /* 215 /*
309 * Fill in an iovec for each set of contiguous chunks. 216 * Fill in an iovec for each set of contiguous chunks.
310 */ 217 */
311 first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 218
312 ASSERT(first_bit != -1);
313 last_bit = first_bit; 219 last_bit = first_bit;
314 nbits = 1; 220 nbits = 1;
315 for (;;) { 221 for (;;) {
@@ -371,7 +277,8 @@ xfs_buf_item_format_segment(
371 nbits++; 277 nbits++;
372 } 278 }
373 } 279 }
374 bip->bli_format.blf_size = nvecs; 280out:
281 blfp->blf_size = nvecs;
375 return vecp; 282 return vecp;
376} 283}
377 284
@@ -405,7 +312,7 @@ xfs_buf_item_format(
405 if (bip->bli_flags & XFS_BLI_INODE_BUF) { 312 if (bip->bli_flags & XFS_BLI_INODE_BUF) {
406 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 313 if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
407 xfs_log_item_in_current_chkpt(lip))) 314 xfs_log_item_in_current_chkpt(lip)))
408 bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF; 315 bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
409 bip->bli_flags &= ~XFS_BLI_INODE_BUF; 316 bip->bli_flags &= ~XFS_BLI_INODE_BUF;
410 } 317 }
411 318
@@ -419,7 +326,6 @@ xfs_buf_item_format(
419 * Check to make sure everything is consistent. 326 * Check to make sure everything is consistent.
420 */ 327 */
421 trace_xfs_buf_item_format(bip); 328 trace_xfs_buf_item_format(bip);
422 xfs_buf_item_log_check(bip);
423} 329}
424 330
425/* 331/*
@@ -485,7 +391,7 @@ xfs_buf_item_unpin(
485 ASSERT(bip->bli_flags & XFS_BLI_STALE); 391 ASSERT(bip->bli_flags & XFS_BLI_STALE);
486 ASSERT(xfs_buf_islocked(bp)); 392 ASSERT(xfs_buf_islocked(bp));
487 ASSERT(XFS_BUF_ISSTALE(bp)); 393 ASSERT(XFS_BUF_ISSTALE(bp));
488 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 394 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
489 395
490 trace_xfs_buf_item_unpin_stale(bip); 396 trace_xfs_buf_item_unpin_stale(bip);
491 397
@@ -563,8 +469,18 @@ xfs_buf_item_push(
563 469
564 if (xfs_buf_ispinned(bp)) 470 if (xfs_buf_ispinned(bp))
565 return XFS_ITEM_PINNED; 471 return XFS_ITEM_PINNED;
566 if (!xfs_buf_trylock(bp)) 472 if (!xfs_buf_trylock(bp)) {
473 /*
474 * If we have just raced with a buffer being pinned and it has
475 * been marked stale, we could end up stalling until someone else
476 * issues a log force to unpin the stale buffer. Check for the
477 * race condition here so xfsaild recognizes the buffer is pinned
478 * and queues a log force to move it along.
479 */
480 if (xfs_buf_ispinned(bp))
481 return XFS_ITEM_PINNED;
567 return XFS_ITEM_LOCKED; 482 return XFS_ITEM_LOCKED;
483 }
568 484
569 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 485 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
570 486
@@ -601,7 +517,7 @@ xfs_buf_item_unlock(
601{ 517{
602 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 518 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
603 struct xfs_buf *bp = bip->bli_buf; 519 struct xfs_buf *bp = bip->bli_buf;
604 int aborted; 520 int aborted, clean, i;
605 uint hold; 521 uint hold;
606 522
607 /* Clear the buffer's association with this transaction. */ 523 /* Clear the buffer's association with this transaction. */
@@ -631,7 +547,7 @@ xfs_buf_item_unlock(
631 */ 547 */
632 if (bip->bli_flags & XFS_BLI_STALE) { 548 if (bip->bli_flags & XFS_BLI_STALE) {
633 trace_xfs_buf_item_unlock_stale(bip); 549 trace_xfs_buf_item_unlock_stale(bip);
634 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 550 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
635 if (!aborted) { 551 if (!aborted) {
636 atomic_dec(&bip->bli_refcount); 552 atomic_dec(&bip->bli_refcount);
637 return; 553 return;
@@ -642,12 +558,27 @@ xfs_buf_item_unlock(
642 558
643 /* 559 /*
644 * If the buf item isn't tracking any data, free it, otherwise drop the 560 * If the buf item isn't tracking any data, free it, otherwise drop the
645 * reference we hold to it. 561 * reference we hold to it. If we are aborting the transaction, this may
562 * be the only reference to the buf item, so we free it anyway
563 * regardless of whether it is dirty or not. A dirty abort implies a
564 * shutdown, anyway.
646 */ 565 */
647 if (xfs_bitmap_empty(bip->bli_format.blf_data_map, 566 clean = 1;
648 bip->bli_format.blf_map_size)) 567 for (i = 0; i < bip->bli_format_count; i++) {
568 if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
569 bip->bli_formats[i].blf_map_size)) {
570 clean = 0;
571 break;
572 }
573 }
574 if (clean)
649 xfs_buf_item_relse(bp); 575 xfs_buf_item_relse(bp);
650 else 576 else if (aborted) {
577 if (atomic_dec_and_test(&bip->bli_refcount)) {
578 ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
579 xfs_buf_item_relse(bp);
580 }
581 } else
651 atomic_dec(&bip->bli_refcount); 582 atomic_dec(&bip->bli_refcount);
652 583
653 if (!hold) 584 if (!hold)
@@ -716,7 +647,7 @@ xfs_buf_item_get_format(
716 bip->bli_format_count = count; 647 bip->bli_format_count = count;
717 648
718 if (count == 1) { 649 if (count == 1) {
719 bip->bli_formats = &bip->bli_format; 650 bip->bli_formats = &bip->__bli_format;
720 return 0; 651 return 0;
721 } 652 }
722 653
@@ -731,7 +662,7 @@ STATIC void
731xfs_buf_item_free_format( 662xfs_buf_item_free_format(
732 struct xfs_buf_log_item *bip) 663 struct xfs_buf_log_item *bip)
733{ 664{
734 if (bip->bli_formats != &bip->bli_format) { 665 if (bip->bli_formats != &bip->__bli_format) {
735 kmem_free(bip->bli_formats); 666 kmem_free(bip->bli_formats);
736 bip->bli_formats = NULL; 667 bip->bli_formats = NULL;
737 } 668 }
@@ -898,8 +829,6 @@ xfs_buf_item_log_segment(
898 mask = (1 << end_bit) - 1; 829 mask = (1 << end_bit) - 1;
899 *wordp |= mask; 830 *wordp |= mask;
900 } 831 }
901
902 xfs_buf_item_log_debug(bip, first, last);
903} 832}
904 833
905/* 834/*
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 6850f49f4af3..ee36c88ecfde 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -98,13 +98,9 @@ typedef struct xfs_buf_log_item {
98 unsigned int bli_flags; /* misc flags */ 98 unsigned int bli_flags; /* misc flags */
99 unsigned int bli_recur; /* lock recursion count */ 99 unsigned int bli_recur; /* lock recursion count */
100 atomic_t bli_refcount; /* cnt of tp refs */ 100 atomic_t bli_refcount; /* cnt of tp refs */
101#ifdef XFS_TRANS_DEBUG
102 char *bli_orig; /* original buffer copy */
103 char *bli_logged; /* bytes logged (bitmap) */
104#endif
105 int bli_format_count; /* count of headers */ 101 int bli_format_count; /* count of headers */
106 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */ 102 struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
107 struct xfs_buf_log_format bli_format; /* embedded in-log header */ 103 struct xfs_buf_log_format __bli_format; /* embedded in-log header */
108} xfs_buf_log_item_t; 104} xfs_buf_log_item_t;
109 105
110void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 106void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
@@ -117,16 +113,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *,
117void xfs_buf_iodone_callbacks(struct xfs_buf *); 113void xfs_buf_iodone_callbacks(struct xfs_buf *);
118void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 114void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
119 115
120#ifdef XFS_TRANS_DEBUG
121void
122xfs_buf_item_flush_log_debug(
123 struct xfs_buf *bp,
124 uint first,
125 uint last);
126#else
127#define xfs_buf_item_flush_log_debug(bp, first, last)
128#endif
129
130#endif /* __KERNEL__ */ 116#endif /* __KERNEL__ */
131 117
132#endif /* __XFS_BUF_ITEM_H__ */ 118#endif /* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..f852b082a084 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -78,14 +78,14 @@ xfs_swapext(
78 goto out_put_tmp_file; 78 goto out_put_tmp_file;
79 } 79 }
80 80
81 if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) || 81 if (IS_SWAPFILE(file_inode(f.file)) ||
82 IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) { 82 IS_SWAPFILE(file_inode(tmp.file))) {
83 error = XFS_ERROR(EINVAL); 83 error = XFS_ERROR(EINVAL);
84 goto out_put_tmp_file; 84 goto out_put_tmp_file;
85 } 85 }
86 86
87 ip = XFS_I(f.file->f_path.dentry->d_inode); 87 ip = XFS_I(file_inode(f.file));
88 tip = XFS_I(tmp.file->f_path.dentry->d_inode); 88 tip = XFS_I(file_inode(tmp.file));
89 89
90 if (ip->i_mount != tip->i_mount) { 90 if (ip->i_mount != tip->i_mount) {
91 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
@@ -246,10 +246,10 @@ xfs_swap_extents(
246 goto out_unlock; 246 goto out_unlock;
247 } 247 }
248 248
249 error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); 249 error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
250 if (error) 250 if (error)
251 goto out_unlock; 251 goto out_unlock;
252 truncate_pagecache_range(VFS_I(ip), 0, -1); 252 truncate_pagecache_range(VFS_I(tip), 0, -1);
253 253
254 /* Verify O_DIRECT for ftmp */ 254 /* Verify O_DIRECT for ftmp */
255 if (VN_CACHED(VFS_I(tip)) != 0) { 255 if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 7536faaa61e7..12afe07a91d7 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -355,10 +355,12 @@ xfs_dir2_block_addname(
355 /* 355 /*
356 * If need to compact the leaf entries, do it now. 356 * If need to compact the leaf entries, do it now.
357 */ 357 */
358 if (compact) 358 if (compact) {
359 xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, 359 xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog,
360 &lfloghigh, &lfloglow); 360 &lfloghigh, &lfloglow);
361 else if (btp->stale) { 361 /* recalculate blp post-compaction */
362 blp = xfs_dir2_block_leaf_p(btp);
363 } else if (btp->stale) {
362 /* 364 /*
363 * Set leaf logging boundaries to impossible state. 365 * Set leaf logging boundaries to impossible state.
364 * For the no-stale case they're set explicitly. 366 * For the no-stale case they're set explicitly.
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9e1bf5294c91..8025eb23ad72 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -612,15 +612,9 @@ xfs_qm_dqread(
612 if (flags & XFS_QMOPT_DQALLOC) { 612 if (flags & XFS_QMOPT_DQALLOC) {
613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 613 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), 614 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
615 XFS_WRITE_LOG_RES(mp) + 615 XFS_QM_DQALLOC_LOG_RES(mp), 0,
616 /* 616 XFS_TRANS_PERM_LOG_RES,
617 * Round the chunklen up to the next multiple 617 XFS_WRITE_LOG_COUNT);
618 * of 128 (buf log item chunk size)).
619 */
620 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
621 0,
622 XFS_TRANS_PERM_LOG_RES,
623 XFS_WRITE_LOG_COUNT);
624 if (error) 618 if (error)
625 goto error1; 619 goto error1;
626 cancelflags = XFS_TRANS_RELEASE_LOG_RES; 620 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index a83611849cee..c585bc646395 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -48,7 +48,7 @@ static int xfs_fileid_length(int fileid_type)
48 case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: 48 case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
49 return 6; 49 return 6;
50 } 50 }
51 return 255; /* invalid */ 51 return FILEID_INVALID;
52} 52}
53 53
54STATIC int 54STATIC int
@@ -90,7 +90,7 @@ xfs_fs_encode_fh(
90 len = xfs_fileid_length(fileid_type); 90 len = xfs_fileid_length(fileid_type);
91 if (*max_len < len) { 91 if (*max_len < len) {
92 *max_len = len; 92 *max_len = len;
93 return 255; 93 return FILEID_INVALID;
94 } 94 }
95 *max_len = len; 95 *max_len = len;
96 96
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 67284edb84d7..f03bf1a456fb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -811,7 +811,7 @@ xfs_file_fallocate(
811 loff_t offset, 811 loff_t offset,
812 loff_t len) 812 loff_t len)
813{ 813{
814 struct inode *inode = file->f_path.dentry->d_inode; 814 struct inode *inode = file_inode(file);
815 long error; 815 long error;
816 loff_t new_size = 0; 816 loff_t new_size = 0;
817 xfs_flock64_t bf; 817 xfs_flock64_t bf;
@@ -912,7 +912,7 @@ xfs_file_readdir(
912 void *dirent, 912 void *dirent,
913 filldir_t filldir) 913 filldir_t filldir)
914{ 914{
915 struct inode *inode = filp->f_path.dentry->d_inode; 915 struct inode *inode = file_inode(filp);
916 xfs_inode_t *ip = XFS_I(inode); 916 xfs_inode_t *ip = XFS_I(inode);
917 int error; 917 int error;
918 size_t bufsize; 918 size_t bufsize;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 94eaeedc5498..2866b8c78b7a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -709,8 +709,8 @@ xfs_fs_log_dummy(
709 int error; 709 int error;
710 710
711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP); 711 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
712 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 712 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
713 XFS_DEFAULT_LOG_COUNT); 713 XFS_DEFAULT_LOG_COUNT);
714 if (error) { 714 if (error) {
715 xfs_trans_cancel(tp, 0); 715 xfs_trans_cancel(tp, 0);
716 return error; 716 return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a815412eab80..515bf71ce01c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -279,8 +279,6 @@ xfs_ialloc_ag_alloc(
279 (args.agbno < be32_to_cpu(agi->agi_length)))) { 279 (args.agbno < be32_to_cpu(agi->agi_length)))) {
280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 280 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
281 args.type = XFS_ALLOCTYPE_THIS_BNO; 281 args.type = XFS_ALLOCTYPE_THIS_BNO;
282 args.mod = args.total = args.wasdel = args.isfl =
283 args.userdata = args.minalignslop = 0;
284 args.prod = 1; 282 args.prod = 1;
285 283
286 /* 284 /*
@@ -333,8 +331,6 @@ xfs_ialloc_ag_alloc(
333 * Allocate a fixed-size extent of inodes. 331 * Allocate a fixed-size extent of inodes.
334 */ 332 */
335 args.type = XFS_ALLOCTYPE_NEAR_BNO; 333 args.type = XFS_ALLOCTYPE_NEAR_BNO;
336 args.mod = args.total = args.wasdel = args.isfl =
337 args.userdata = args.minalignslop = 0;
338 args.prod = 1; 334 args.prod = 1;
339 /* 335 /*
340 * Allow space for the inode btree to split. 336 * Allow space for the inode btree to split.
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 66282dcb821b..4f201656d2d9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2379,9 +2379,6 @@ xfs_iflush_fork(
2379 char *cp; 2379 char *cp;
2380 xfs_ifork_t *ifp; 2380 xfs_ifork_t *ifp;
2381 xfs_mount_t *mp; 2381 xfs_mount_t *mp;
2382#ifdef XFS_TRANS_DEBUG
2383 int first;
2384#endif
2385 static const short brootflag[2] = 2382 static const short brootflag[2] =
2386 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; 2383 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
2387 static const short dataflag[2] = 2384 static const short dataflag[2] =
@@ -2724,9 +2721,6 @@ xfs_iflush_int(
2724 xfs_inode_log_item_t *iip; 2721 xfs_inode_log_item_t *iip;
2725 xfs_dinode_t *dip; 2722 xfs_dinode_t *dip;
2726 xfs_mount_t *mp; 2723 xfs_mount_t *mp;
2727#ifdef XFS_TRANS_DEBUG
2728 int first;
2729#endif
2730 2724
2731 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2725 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2732 ASSERT(xfs_isiflocked(ip)); 2726 ASSERT(xfs_isiflocked(ip));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 22baf6ea4fac..237e7f6f2ab3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -419,6 +419,7 @@ static inline void xfs_iflock(struct xfs_inode *ip)
419static inline void xfs_ifunlock(struct xfs_inode *ip) 419static inline void xfs_ifunlock(struct xfs_inode *ip)
420{ 420{
421 xfs_iflags_clear(ip, XFS_IFLOCK); 421 xfs_iflags_clear(ip, XFS_IFLOCK);
422 smp_mb();
422 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); 423 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
423} 424}
424 425
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d041d47d9d86..f034bd1652f0 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -269,17 +269,6 @@ xfs_inode_item_format(
269 } else { 269 } else {
270 ASSERT(!(iip->ili_fields & 270 ASSERT(!(iip->ili_fields &
271 XFS_ILOG_DBROOT)); 271 XFS_ILOG_DBROOT));
272#ifdef XFS_TRANS_DEBUG
273 if (iip->ili_root_size > 0) {
274 ASSERT(iip->ili_root_size ==
275 ip->i_df.if_broot_bytes);
276 ASSERT(memcmp(iip->ili_orig_root,
277 ip->i_df.if_broot,
278 iip->ili_root_size) == 0);
279 } else {
280 ASSERT(ip->i_df.if_broot_bytes == 0);
281 }
282#endif
283 iip->ili_fields &= ~XFS_ILOG_DBROOT; 272 iip->ili_fields &= ~XFS_ILOG_DBROOT;
284 } 273 }
285 break; 274 break;
@@ -678,11 +667,6 @@ void
678xfs_inode_item_destroy( 667xfs_inode_item_destroy(
679 xfs_inode_t *ip) 668 xfs_inode_t *ip)
680{ 669{
681#ifdef XFS_TRANS_DEBUG
682 if (ip->i_itemp->ili_root_size != 0) {
683 kmem_free(ip->i_itemp->ili_orig_root);
684 }
685#endif
686 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 670 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
687} 671}
688 672
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 376d4d0b2635..779812fb3d80 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -148,10 +148,6 @@ typedef struct xfs_inode_log_item {
148 data exts */ 148 data exts */
149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged 149 struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
150 attr exts */ 150 attr exts */
151#ifdef XFS_TRANS_DEBUG
152 int ili_root_size;
153 char *ili_orig_root;
154#endif
155 xfs_inode_log_format_t ili_format; /* logged structure */ 151 xfs_inode_log_format_t ili_format; /* logged structure */
156} xfs_inode_log_item_t; 152} xfs_inode_log_item_t;
157 153
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c1c3ef88a260..d681e34c2950 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -80,7 +80,7 @@ xfs_find_handle(
80 f = fdget(hreq->fd); 80 f = fdget(hreq->fd);
81 if (!f.file) 81 if (!f.file)
82 return -EBADF; 82 return -EBADF;
83 inode = f.file->f_path.dentry->d_inode; 83 inode = file_inode(f.file);
84 } else { 84 } else {
85 error = user_lpath((const char __user *)hreq->path, &path); 85 error = user_lpath((const char __user *)hreq->path, &path);
86 if (error) 86 if (error)
@@ -168,7 +168,7 @@ xfs_handle_to_dentry(
168 /* 168 /*
169 * Only allow handle opens under a directory. 169 * Only allow handle opens under a directory.
170 */ 170 */
171 if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) 171 if (!S_ISDIR(file_inode(parfilp)->i_mode))
172 return ERR_PTR(-ENOTDIR); 172 return ERR_PTR(-ENOTDIR);
173 173
174 if (hlen != sizeof(xfs_handle_t)) 174 if (hlen != sizeof(xfs_handle_t))
@@ -1334,7 +1334,7 @@ xfs_file_ioctl(
1334 unsigned int cmd, 1334 unsigned int cmd,
1335 unsigned long p) 1335 unsigned long p)
1336{ 1336{
1337 struct inode *inode = filp->f_path.dentry->d_inode; 1337 struct inode *inode = file_inode(filp);
1338 struct xfs_inode *ip = XFS_I(inode); 1338 struct xfs_inode *ip = XFS_I(inode);
1339 struct xfs_mount *mp = ip->i_mount; 1339 struct xfs_mount *mp = ip->i_mount;
1340 void __user *arg = (void __user *)p; 1340 void __user *arg = (void __user *)p;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1244274a5674..63b8fc432151 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -530,7 +530,7 @@ xfs_file_compat_ioctl(
530 unsigned cmd, 530 unsigned cmd,
531 unsigned long p) 531 unsigned long p)
532{ 532{
533 struct inode *inode = filp->f_path.dentry->d_inode; 533 struct inode *inode = file_inode(filp);
534 struct xfs_inode *ip = XFS_I(inode); 534 struct xfs_inode *ip = XFS_I(inode);
535 struct xfs_mount *mp = ip->i_mount; 535 struct xfs_mount *mp = ip->i_mount;
536 void __user *arg = (void __user *)p; 536 void __user *arg = (void __user *)p;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
311} 311}
312 312
313/* 313/*
314 * Determine the initial size of the preallocation. We are beyond the current
315 * EOF here, but we need to take into account whether this is a sparse write or
316 * an extending write when determining the preallocation size. Hence we need to
317 * look up the extent that ends at the current write offset and use the result
318 * to determine the preallocation size.
319 *
320 * If the extent is a hole, then preallocation is essentially disabled.
321 * Otherwise we take the size of the preceeding data extent as the basis for the
322 * preallocation size. If the size of the extent is greater than half the
323 * maximum extent length, then use the current offset as the basis. This ensures
324 * that for large files the preallocation size always extends to MAXEXTLEN
325 * rather than falling short due to things like stripe unit/width alignment of
326 * real extents.
327 */
328STATIC int
329xfs_iomap_eof_prealloc_initial_size(
330 struct xfs_mount *mp,
331 struct xfs_inode *ip,
332 xfs_off_t offset,
333 xfs_bmbt_irec_t *imap,
334 int nimaps)
335{
336 xfs_fileoff_t start_fsb;
337 int imaps = 1;
338 int error;
339
340 ASSERT(nimaps >= imaps);
341
342 /* if we are using a specific prealloc size, return now */
343 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
344 return 0;
345
346 /*
347 * As we write multiple pages, the offset will always align to the
348 * start of a page and hence point to a hole at EOF. i.e. if the size is
349 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
350 * will return FSB 1. Hence if there are blocks in the file, we want to
351 * point to the block prior to the EOF block and not the hole that maps
352 * directly at @offset.
353 */
354 start_fsb = XFS_B_TO_FSB(mp, offset);
355 if (start_fsb)
356 start_fsb--;
357 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
358 if (error)
359 return 0;
360
361 ASSERT(imaps == 1);
362 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount;
366 return XFS_B_TO_FSB(mp, offset);
367}
368
369/*
314 * If we don't have a user specified preallocation size, dynamically increase 370 * If we don't have a user specified preallocation size, dynamically increase
315 * the preallocation size as the size of the file grows. Cap the maximum size 371 * the preallocation size as the size of the file grows. Cap the maximum size
316 * at a single extent or less if the filesystem is near full. The closer the 372 * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
319STATIC xfs_fsblock_t 375STATIC xfs_fsblock_t
320xfs_iomap_prealloc_size( 376xfs_iomap_prealloc_size(
321 struct xfs_mount *mp, 377 struct xfs_mount *mp,
322 struct xfs_inode *ip) 378 struct xfs_inode *ip,
379 xfs_off_t offset,
380 struct xfs_bmbt_irec *imap,
381 int nimaps)
323{ 382{
324 xfs_fsblock_t alloc_blocks = 0; 383 xfs_fsblock_t alloc_blocks = 0;
325 384
326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps);
387 if (alloc_blocks > 0) {
327 int shift = 0; 388 int shift = 0;
328 int64_t freesp; 389 int64_t freesp;
329 390
330 /*
331 * rounddown_pow_of_two() returns an undefined result
332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
333 * ensure we always pass in a non-zero value.
334 */
335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
337 rounddown_pow_of_two(alloc_blocks)); 392 rounddown_pow_of_two(alloc_blocks));
338 393
@@ -351,6 +406,15 @@ xfs_iomap_prealloc_size(
351 } 406 }
352 if (shift) 407 if (shift)
353 alloc_blocks >>= shift; 408 alloc_blocks >>= shift;
409
410 /*
411 * If we are still trying to allocate more space than is
412 * available, squash the prealloc hard. This can happen if we
413 * have a large file on a small filesystem and the above
414 * lowspace thresholds are smaller than MAXEXTLEN.
415 */
416 while (alloc_blocks >= freesp)
417 alloc_blocks >>= 4;
354 } 418 }
355 419
356 if (alloc_blocks < mp->m_writeio_blocks) 420 if (alloc_blocks < mp->m_writeio_blocks)
@@ -390,7 +454,6 @@ xfs_iomap_write_delay(
390 extsz = xfs_get_extsz_hint(ip); 454 extsz = xfs_get_extsz_hint(ip);
391 offset_fsb = XFS_B_TO_FSBT(mp, offset); 455 offset_fsb = XFS_B_TO_FSBT(mp, offset);
392 456
393
394 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 457 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
395 imap, XFS_WRITE_IMAPS, &prealloc); 458 imap, XFS_WRITE_IMAPS, &prealloc);
396 if (error) 459 if (error)
@@ -398,7 +461,10 @@ xfs_iomap_write_delay(
398 461
399retry: 462retry:
400 if (prealloc) { 463 if (prealloc) {
401 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); 464 xfs_fsblock_t alloc_blocks;
465
466 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
467 XFS_WRITE_IMAPS);
402 468
403 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 469 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
404 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 470 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 46bd9d52ab51..eec226f78a40 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -120,7 +120,7 @@ xlog_verify_iclog(
120 struct xlog *log, 120 struct xlog *log,
121 struct xlog_in_core *iclog, 121 struct xlog_in_core *iclog,
122 int count, 122 int count,
123 boolean_t syncing); 123 bool syncing);
124STATIC void 124STATIC void
125xlog_verify_tail_lsn( 125xlog_verify_tail_lsn(
126 struct xlog *log, 126 struct xlog *log,
@@ -1737,7 +1737,7 @@ xlog_sync(
1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1737 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1738 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1739 1739
1740 xlog_verify_iclog(log, iclog, count, B_TRUE); 1740 xlog_verify_iclog(log, iclog, count, true);
1741 1741
1742 /* account for log which doesn't start at block #0 */ 1742 /* account for log which doesn't start at block #0 */
1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); 1743 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -3611,7 +3611,7 @@ xlog_verify_iclog(
3611 struct xlog *log, 3611 struct xlog *log,
3612 struct xlog_in_core *iclog, 3612 struct xlog_in_core *iclog,
3613 int count, 3613 int count,
3614 boolean_t syncing) 3614 bool syncing)
3615{ 3615{
3616 xlog_op_header_t *ophead; 3616 xlog_op_header_t *ophead;
3617 xlog_in_core_t *icptr; 3617 xlog_in_core_t *icptr;
@@ -3659,7 +3659,7 @@ xlog_verify_iclog(
3659 /* clientid is only 1 byte */ 3659 /* clientid is only 1 byte */
3660 field_offset = (__psint_t) 3660 field_offset = (__psint_t)
3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); 3661 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
3662 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3662 if (!syncing || (field_offset & 0x1ff)) {
3663 clientid = ophead->oh_clientid; 3663 clientid = ophead->oh_clientid;
3664 } else { 3664 } else {
3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); 3665 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3682,7 +3682,7 @@ xlog_verify_iclog(
3682 /* check length */ 3682 /* check length */
3683 field_offset = (__psint_t) 3683 field_offset = (__psint_t)
3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); 3684 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
3685 if (syncing == B_FALSE || (field_offset & 0x1ff)) { 3685 if (!syncing || (field_offset & 0x1ff)) {
3686 op_len = be32_to_cpu(ophead->oh_len); 3686 op_len = be32_to_cpu(ophead->oh_len);
3687 } else { 3687 } else {
3688 idx = BTOBBT((__psint_t)&ophead->oh_len - 3688 idx = BTOBBT((__psint_t)&ophead->oh_len -
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 96fcbb85ff83..d1dba7ce75ae 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1442,9 +1442,8 @@ xlog_recover_find_tid(
1442 xlog_tid_t tid) 1442 xlog_tid_t tid)
1443{ 1443{
1444 xlog_recover_t *trans; 1444 xlog_recover_t *trans;
1445 struct hlist_node *n;
1446 1445
1447 hlist_for_each_entry(trans, n, head, r_list) { 1446 hlist_for_each_entry(trans, head, r_list) {
1448 if (trans->r_log_tid == tid) 1447 if (trans->r_log_tid == tid)
1449 return trans; 1448 return trans;
1450 } 1449 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..3806088a8f77 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
658 return; 658 return;
659 } 659 }
660 /* quietly fail */ 660 /* quietly fail */
661 xfs_buf_ioerror(bp, EFSCORRUPTED); 661 xfs_buf_ioerror(bp, EWRONGFS);
662} 662}
663 663
664static void 664static void
@@ -1109,8 +1109,8 @@ xfs_mount_reset_sbqflags(
1109 return 0; 1109 return 0;
1110 1110
1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1111 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1112 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1112 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1113 XFS_DEFAULT_LOG_COUNT); 1113 0, 0, XFS_DEFAULT_LOG_COUNT);
1114 if (error) { 1114 if (error) {
1115 xfs_trans_cancel(tp, 0); 1115 xfs_trans_cancel(tp, 0);
1116 xfs_alert(mp, "%s: Superblock update failed!", __func__); 1116 xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1583,8 +1583,8 @@ xfs_log_sbcount(xfs_mount_t *mp)
1583 return 0; 1583 return 0;
1584 1584
1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); 1585 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1586 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1586 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1587 XFS_DEFAULT_LOG_COUNT); 1587 XFS_DEFAULT_LOG_COUNT);
1588 if (error) { 1588 if (error) {
1589 xfs_trans_cancel(tp, 0); 1589 xfs_trans_cancel(tp, 0);
1590 return error; 1590 return error;
@@ -1945,8 +1945,8 @@ xfs_mount_log_sb(
1945 XFS_SB_VERSIONNUM)); 1945 XFS_SB_VERSIONNUM));
1946 1946
1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1947 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1948 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1948 error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
1949 XFS_DEFAULT_LOG_COUNT); 1949 XFS_DEFAULT_LOG_COUNT);
1950 if (error) { 1950 if (error) {
1951 xfs_trans_cancel(tp, 0); 1951 xfs_trans_cancel(tp, 0);
1952 return error; 1952 return error;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bab8314507e4..bc907061d392 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -34,12 +34,19 @@ typedef struct xfs_trans_reservations {
34 uint tr_addafork; /* cvt inode to attributed trans */ 34 uint tr_addafork; /* cvt inode to attributed trans */
35 uint tr_writeid; /* write setuid/setgid file */ 35 uint tr_writeid; /* write setuid/setgid file */
36 uint tr_attrinval; /* attr fork buffer invalidation */ 36 uint tr_attrinval; /* attr fork buffer invalidation */
37 uint tr_attrset; /* set/create an attribute */ 37 uint tr_attrsetm; /* set/create an attribute at mount time */
38 uint tr_attrsetrt; /* set/create an attribute at runtime */
38 uint tr_attrrm; /* remove an attribute */ 39 uint tr_attrrm; /* remove an attribute */
39 uint tr_clearagi; /* clear bad agi unlinked ino bucket */ 40 uint tr_clearagi; /* clear bad agi unlinked ino bucket */
40 uint tr_growrtalloc; /* grow realtime allocations */ 41 uint tr_growrtalloc; /* grow realtime allocations */
41 uint tr_growrtzero; /* grow realtime zeroing */ 42 uint tr_growrtzero; /* grow realtime zeroing */
42 uint tr_growrtfree; /* grow realtime freeing */ 43 uint tr_growrtfree; /* grow realtime freeing */
44 uint tr_qm_sbchange; /* change quota flags */
45 uint tr_qm_setqlim; /* adjust quota limits */
46 uint tr_qm_dqalloc; /* allocate quota on disk */
47 uint tr_qm_quotaoff; /* turn quota off */
48 uint tr_qm_equotaoff;/* end of turn quota off */
49 uint tr_sb; /* modify superblock */
43} xfs_trans_reservations_t; 50} xfs_trans_reservations_t;
44 51
45#ifndef __KERNEL__ 52#ifndef __KERNEL__
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 60eff4763156..e5b5cf973781 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1584,10 +1584,9 @@ xfs_qm_write_sb_changes(
1584 int error; 1584 int error;
1585 1585
1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1586 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1587 if ((error = xfs_trans_reserve(tp, 0, 1587 error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
1588 mp->m_sb.sb_sectsize + 128, 0, 1588 0, 0, XFS_DEFAULT_LOG_COUNT);
1589 0, 1589 if (error) {
1590 XFS_DEFAULT_LOG_COUNT))) {
1591 xfs_trans_cancel(tp, 0); 1590 xfs_trans_cancel(tp, 0);
1592 return error; 1591 return error;
1593 } 1592 }
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 6b39115bf145..2d02eac1c9a8 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -146,7 +146,7 @@ xfs_qm_newmount(
146 * inode goes inactive and wants to free blocks, 146 * inode goes inactive and wants to free blocks,
147 * or via xfs_log_mount_finish. 147 * or via xfs_log_mount_finish.
148 */ 148 */
149 *needquotamount = B_TRUE; 149 *needquotamount = true;
150 *quotaflags = mp->m_qflags; 150 *quotaflags = mp->m_qflags;
151 mp->m_qflags = 0; 151 mp->m_qflags = 0;
152 } 152 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5f53e75409b8..cf9a34051e07 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -408,10 +408,10 @@ xfs_qm_scall_getqstat(
408{ 408{
409 struct xfs_quotainfo *q = mp->m_quotainfo; 409 struct xfs_quotainfo *q = mp->m_quotainfo;
410 struct xfs_inode *uip, *gip; 410 struct xfs_inode *uip, *gip;
411 boolean_t tempuqip, tempgqip; 411 bool tempuqip, tempgqip;
412 412
413 uip = gip = NULL; 413 uip = gip = NULL;
414 tempuqip = tempgqip = B_FALSE; 414 tempuqip = tempgqip = false;
415 memset(out, 0, sizeof(fs_quota_stat_t)); 415 memset(out, 0, sizeof(fs_quota_stat_t));
416 416
417 out->qs_version = FS_QSTAT_VERSION; 417 out->qs_version = FS_QSTAT_VERSION;
@@ -434,12 +434,12 @@ xfs_qm_scall_getqstat(
434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 434 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 435 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
436 0, 0, &uip) == 0) 436 0, 0, &uip) == 0)
437 tempuqip = B_TRUE; 437 tempuqip = true;
438 } 438 }
439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 439 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 440 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
441 0, 0, &gip) == 0) 441 0, 0, &gip) == 0)
442 tempgqip = B_TRUE; 442 tempgqip = true;
443 } 443 }
444 if (uip) { 444 if (uip) {
445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; 445 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
@@ -490,8 +490,9 @@ xfs_qm_scall_setqlim(
490 return 0; 490 return 0;
491 491
492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
493 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 493 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
494 0, 0, XFS_DEFAULT_LOG_COUNT))) { 494 0, 0, XFS_DEFAULT_LOG_COUNT);
495 if (error) {
495 xfs_trans_cancel(tp, 0); 496 xfs_trans_cancel(tp, 0);
496 return (error); 497 return (error);
497 } 498 }
@@ -638,8 +639,9 @@ xfs_qm_log_quotaoff_end(
638 639
639 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); 640 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
640 641
641 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, 642 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
642 0, 0, XFS_DEFAULT_LOG_COUNT))) { 643 0, 0, XFS_DEFAULT_LOG_COUNT);
644 if (error) {
643 xfs_trans_cancel(tp, 0); 645 xfs_trans_cancel(tp, 0);
644 return (error); 646 return (error);
645 } 647 }
@@ -671,14 +673,10 @@ xfs_qm_log_quotaoff(
671 uint oldsbqflag=0; 673 uint oldsbqflag=0;
672 674
673 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); 675 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
674 if ((error = xfs_trans_reserve(tp, 0, 676 error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
675 sizeof(xfs_qoff_logitem_t) * 2 + 677 0, 0, XFS_DEFAULT_LOG_COUNT);
676 mp->m_sb.sb_sectsize + 128, 678 if (error)
677 0,
678 0,
679 XFS_DEFAULT_LOG_COUNT))) {
680 goto error0; 679 goto error0;
681 }
682 680
683 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); 681 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
684 xfs_trans_log_quotaoff_item(tp, qoffi); 682 xfs_trans_log_quotaoff_item(tp, qoffi);
@@ -784,11 +782,11 @@ xfs_qm_scall_getquota(
784 (XFS_IS_OQUOTA_ENFORCED(mp) && 782 (XFS_IS_OQUOTA_ENFORCED(mp) &&
785 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && 783 (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
786 dst->d_id != 0) { 784 dst->d_id != 0) {
787 if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) && 785 if ((dst->d_bcount > dst->d_blk_softlimit) &&
788 (dst->d_blk_softlimit > 0)) { 786 (dst->d_blk_softlimit > 0)) {
789 ASSERT(dst->d_btimer != 0); 787 ASSERT(dst->d_btimer != 0);
790 } 788 }
791 if (((int) dst->d_icount > (int) dst->d_ino_softlimit) && 789 if ((dst->d_icount > dst->d_ino_softlimit) &&
792 (dst->d_ino_softlimit > 0)) { 790 (dst->d_ino_softlimit > 0)) {
793 ASSERT(dst->d_itimer != 0); 791 ASSERT(dst->d_itimer != 0);
794 } 792 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ab8839b26272..c407121873b4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -139,9 +139,9 @@ static const match_table_t tokens = {
139 139
140 140
141STATIC unsigned long 141STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 142suffix_kstrtoint(char *s, unsigned int base, int *res)
143{ 143{
144 int last, shift_left_factor = 0; 144 int last, shift_left_factor = 0, _res;
145 char *value = s; 145 char *value = s;
146 146
147 last = strlen(value) - 1; 147 last = strlen(value) - 1;
@@ -158,7 +158,10 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
158 value[last] = '\0'; 158 value[last] = '\0';
159 } 159 }
160 160
161 return simple_strtoul((const char *)s, endp, base) << shift_left_factor; 161 if (kstrtoint(s, base, &_res))
162 return -EINVAL;
163 *res = _res << shift_left_factor;
164 return 0;
162} 165}
163 166
164/* 167/*
@@ -174,7 +177,7 @@ xfs_parseargs(
174 char *options) 177 char *options)
175{ 178{
176 struct super_block *sb = mp->m_super; 179 struct super_block *sb = mp->m_super;
177 char *this_char, *value, *eov; 180 char *this_char, *value;
178 int dsunit = 0; 181 int dsunit = 0;
179 int dswidth = 0; 182 int dswidth = 0;
180 int iosize = 0; 183 int iosize = 0;
@@ -230,14 +233,16 @@ xfs_parseargs(
230 this_char); 233 this_char);
231 return EINVAL; 234 return EINVAL;
232 } 235 }
233 mp->m_logbufs = simple_strtoul(value, &eov, 10); 236 if (kstrtoint(value, 10, &mp->m_logbufs))
237 return EINVAL;
234 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 238 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
235 if (!value || !*value) { 239 if (!value || !*value) {
236 xfs_warn(mp, "%s option requires an argument", 240 xfs_warn(mp, "%s option requires an argument",
237 this_char); 241 this_char);
238 return EINVAL; 242 return EINVAL;
239 } 243 }
240 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 244 if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
245 return EINVAL;
241 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 246 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
242 if (!value || !*value) { 247 if (!value || !*value) {
243 xfs_warn(mp, "%s option requires an argument", 248 xfs_warn(mp, "%s option requires an argument",
@@ -266,7 +271,8 @@ xfs_parseargs(
266 this_char); 271 this_char);
267 return EINVAL; 272 return EINVAL;
268 } 273 }
269 iosize = simple_strtoul(value, &eov, 10); 274 if (kstrtoint(value, 10, &iosize))
275 return EINVAL;
270 iosizelog = ffs(iosize) - 1; 276 iosizelog = ffs(iosize) - 1;
271 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 277 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
272 if (!value || !*value) { 278 if (!value || !*value) {
@@ -274,7 +280,8 @@ xfs_parseargs(
274 this_char); 280 this_char);
275 return EINVAL; 281 return EINVAL;
276 } 282 }
277 iosize = suffix_strtoul(value, &eov, 10); 283 if (suffix_kstrtoint(value, 10, &iosize))
284 return EINVAL;
278 iosizelog = ffs(iosize) - 1; 285 iosizelog = ffs(iosize) - 1;
279 } else if (!strcmp(this_char, MNTOPT_GRPID) || 286 } else if (!strcmp(this_char, MNTOPT_GRPID) ||
280 !strcmp(this_char, MNTOPT_BSDGROUPS)) { 287 !strcmp(this_char, MNTOPT_BSDGROUPS)) {
@@ -296,14 +303,16 @@ xfs_parseargs(
296 this_char); 303 this_char);
297 return EINVAL; 304 return EINVAL;
298 } 305 }
299 dsunit = simple_strtoul(value, &eov, 10); 306 if (kstrtoint(value, 10, &dsunit))
307 return EINVAL;
300 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 308 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
301 if (!value || !*value) { 309 if (!value || !*value) {
302 xfs_warn(mp, "%s option requires an argument", 310 xfs_warn(mp, "%s option requires an argument",
303 this_char); 311 this_char);
304 return EINVAL; 312 return EINVAL;
305 } 313 }
306 dswidth = simple_strtoul(value, &eov, 10); 314 if (kstrtoint(value, 10, &dswidth))
315 return EINVAL;
307 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) { 316 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
308 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 317 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
309 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 318 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
341DEFINE_BUF_EVENT(xfs_buf_item_iodone); 341DEFINE_BUF_EVENT(xfs_buf_item_iodone);
342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); 342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
343DEFINE_BUF_EVENT(xfs_buf_error_relse); 343DEFINE_BUF_EVENT(xfs_buf_error_relse);
344DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
344DEFINE_BUF_EVENT(xfs_trans_read_buf_io); 345DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
345DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); 346DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
346 347
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 06ed520a767f..2fd7c1ff1d21 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -37,14 +37,45 @@
37#include "xfs_extent_busy.h" 37#include "xfs_extent_busy.h"
38#include "xfs_bmap.h" 38#include "xfs_bmap.h"
39#include "xfs_quota.h" 39#include "xfs_quota.h"
40#include "xfs_qm.h"
40#include "xfs_trans_priv.h" 41#include "xfs_trans_priv.h"
41#include "xfs_trans_space.h" 42#include "xfs_trans_space.h"
42#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
44#include "xfs_log_priv.h"
45#include "xfs_buf_item.h"
43#include "xfs_trace.h" 46#include "xfs_trace.h"
44 47
45kmem_zone_t *xfs_trans_zone; 48kmem_zone_t *xfs_trans_zone;
46kmem_zone_t *xfs_log_item_desc_zone; 49kmem_zone_t *xfs_log_item_desc_zone;
47 50
51/*
52 * A buffer has a format structure overhead in the log in addition
53 * to the data, so we need to take this into account when reserving
54 * space in a transaction for a buffer. Round the space required up
55 * to a multiple of 128 bytes so that we don't change the historical
56 * reservation that has been used for this overhead.
57 */
58STATIC uint
59xfs_buf_log_overhead(void)
60{
61 return round_up(sizeof(struct xlog_op_header) +
62 sizeof(struct xfs_buf_log_format), 128);
63}
64
65/*
66 * Calculate out transaction log reservation per item in bytes.
67 *
68 * The nbufs argument is used to indicate the number of items that
69 * will be changed in a transaction. size is used to tell how many
70 * bytes should be reserved per item.
71 */
72STATIC uint
73xfs_calc_buf_res(
74 uint nbufs,
75 uint size)
76{
77 return nbufs * (size + xfs_buf_log_overhead());
78}
48 79
49/* 80/*
50 * Various log reservation values. 81 * Various log reservation values.
@@ -85,18 +116,15 @@ xfs_calc_write_reservation(
85 struct xfs_mount *mp) 116 struct xfs_mount *mp)
86{ 117{
87 return XFS_DQUOT_LOGRES(mp) + 118 return XFS_DQUOT_LOGRES(mp) +
88 MAX((mp->m_sb.sb_inodesize + 119 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
89 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 120 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
90 2 * mp->m_sb.sb_sectsize + 121 XFS_FSB_TO_B(mp, 1)) +
91 mp->m_sb.sb_sectsize + 122 xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
92 XFS_ALLOCFREE_LOG_RES(mp, 2) + 123 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
93 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 124 XFS_FSB_TO_B(mp, 1))),
94 XFS_ALLOCFREE_LOG_COUNT(mp, 2))), 125 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
95 (2 * mp->m_sb.sb_sectsize + 126 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
96 2 * mp->m_sb.sb_sectsize + 127 XFS_FSB_TO_B(mp, 1))));
97 mp->m_sb.sb_sectsize +
98 XFS_ALLOCFREE_LOG_RES(mp, 2) +
99 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
100} 128}
101 129
102/* 130/*
@@ -117,18 +145,17 @@ xfs_calc_itruncate_reservation(
117 struct xfs_mount *mp) 145 struct xfs_mount *mp)
118{ 146{
119 return XFS_DQUOT_LOGRES(mp) + 147 return XFS_DQUOT_LOGRES(mp) +
120 MAX((mp->m_sb.sb_inodesize + 148 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
121 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + 149 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
122 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 150 XFS_FSB_TO_B(mp, 1))),
123 (4 * mp->m_sb.sb_sectsize + 151 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
124 4 * mp->m_sb.sb_sectsize + 152 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
125 mp->m_sb.sb_sectsize + 153 XFS_FSB_TO_B(mp, 1)) +
126 XFS_ALLOCFREE_LOG_RES(mp, 4) + 154 xfs_calc_buf_res(5, 0) +
127 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) + 155 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
128 128 * 5 + 156 XFS_FSB_TO_B(mp, 1)) +
129 XFS_ALLOCFREE_LOG_RES(mp, 1) + 157 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
130 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 158 mp->m_in_maxlevels, 0)));
131 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
132} 159}
133 160
134/* 161/*
@@ -148,14 +175,12 @@ xfs_calc_rename_reservation(
148 struct xfs_mount *mp) 175 struct xfs_mount *mp)
149{ 176{
150 return XFS_DQUOT_LOGRES(mp) + 177 return XFS_DQUOT_LOGRES(mp) +
151 MAX((4 * mp->m_sb.sb_inodesize + 178 MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
152 2 * XFS_DIROP_LOG_RES(mp) + 179 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
153 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))), 180 XFS_FSB_TO_B(mp, 1))),
154 (3 * mp->m_sb.sb_sectsize + 181 (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
155 3 * mp->m_sb.sb_sectsize + 182 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
156 mp->m_sb.sb_sectsize + 183 XFS_FSB_TO_B(mp, 1))));
157 XFS_ALLOCFREE_LOG_RES(mp, 3) +
158 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
159} 184}
160 185
161/* 186/*
@@ -175,15 +200,12 @@ xfs_calc_link_reservation(
175 struct xfs_mount *mp) 200 struct xfs_mount *mp)
176{ 201{
177 return XFS_DQUOT_LOGRES(mp) + 202 return XFS_DQUOT_LOGRES(mp) +
178 MAX((mp->m_sb.sb_inodesize + 203 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
179 mp->m_sb.sb_inodesize + 204 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
180 XFS_DIROP_LOG_RES(mp) + 205 XFS_FSB_TO_B(mp, 1))),
181 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 206 (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
182 (mp->m_sb.sb_sectsize + 207 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
183 mp->m_sb.sb_sectsize + 208 XFS_FSB_TO_B(mp, 1))));
184 mp->m_sb.sb_sectsize +
185 XFS_ALLOCFREE_LOG_RES(mp, 1) +
186 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
187} 209}
188 210
189/* 211/*
@@ -203,15 +225,12 @@ xfs_calc_remove_reservation(
203 struct xfs_mount *mp) 225 struct xfs_mount *mp)
204{ 226{
205 return XFS_DQUOT_LOGRES(mp) + 227 return XFS_DQUOT_LOGRES(mp) +
206 MAX((mp->m_sb.sb_inodesize + 228 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
207 mp->m_sb.sb_inodesize + 229 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
208 XFS_DIROP_LOG_RES(mp) + 230 XFS_FSB_TO_B(mp, 1))),
209 128 * (2 + XFS_DIROP_LOG_COUNT(mp))), 231 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
210 (2 * mp->m_sb.sb_sectsize + 232 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
211 2 * mp->m_sb.sb_sectsize + 233 XFS_FSB_TO_B(mp, 1))));
212 mp->m_sb.sb_sectsize +
213 XFS_ALLOCFREE_LOG_RES(mp, 2) +
214 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
215} 234}
216 235
217/* 236/*
@@ -233,18 +252,18 @@ xfs_calc_symlink_reservation(
233 struct xfs_mount *mp) 252 struct xfs_mount *mp)
234{ 253{
235 return XFS_DQUOT_LOGRES(mp) + 254 return XFS_DQUOT_LOGRES(mp) +
236 MAX((mp->m_sb.sb_inodesize + 255 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
237 mp->m_sb.sb_inodesize + 256 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
238 XFS_FSB_TO_B(mp, 1) + 257 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
239 XFS_DIROP_LOG_RES(mp) + 258 XFS_FSB_TO_B(mp, 1)) +
240 1024 + 259 xfs_calc_buf_res(1, 1024)),
241 128 * (4 + XFS_DIROP_LOG_COUNT(mp))), 260 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
242 (2 * mp->m_sb.sb_sectsize + 261 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
243 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 262 XFS_FSB_TO_B(mp, 1)) +
244 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 263 xfs_calc_buf_res(mp->m_in_maxlevels,
245 XFS_ALLOCFREE_LOG_RES(mp, 1) + 264 XFS_FSB_TO_B(mp, 1)) +
246 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 265 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
247 XFS_ALLOCFREE_LOG_COUNT(mp, 1)))); 266 XFS_FSB_TO_B(mp, 1))));
248} 267}
249 268
250/* 269/*
@@ -267,18 +286,19 @@ xfs_calc_create_reservation(
267 struct xfs_mount *mp) 286 struct xfs_mount *mp)
268{ 287{
269 return XFS_DQUOT_LOGRES(mp) + 288 return XFS_DQUOT_LOGRES(mp) +
270 MAX((mp->m_sb.sb_inodesize + 289 MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
271 mp->m_sb.sb_inodesize + 290 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
291 (uint)XFS_FSB_TO_B(mp, 1) +
292 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
293 XFS_FSB_TO_B(mp, 1))),
294 (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
272 mp->m_sb.sb_sectsize + 295 mp->m_sb.sb_sectsize +
273 XFS_FSB_TO_B(mp, 1) + 296 xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp),
274 XFS_DIROP_LOG_RES(mp) + 297 XFS_FSB_TO_B(mp, 1)) +
275 128 * (3 + XFS_DIROP_LOG_COUNT(mp))), 298 xfs_calc_buf_res(mp->m_in_maxlevels,
276 (3 * mp->m_sb.sb_sectsize + 299 XFS_FSB_TO_B(mp, 1)) +
277 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) + 300 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
278 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) + 301 XFS_FSB_TO_B(mp, 1))));
279 XFS_ALLOCFREE_LOG_RES(mp, 1) +
280 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
281 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
282} 302}
283 303
284/* 304/*
@@ -306,16 +326,16 @@ xfs_calc_ifree_reservation(
306 struct xfs_mount *mp) 326 struct xfs_mount *mp)
307{ 327{
308 return XFS_DQUOT_LOGRES(mp) + 328 return XFS_DQUOT_LOGRES(mp) +
309 mp->m_sb.sb_inodesize + 329 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
310 mp->m_sb.sb_sectsize + 330 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
311 mp->m_sb.sb_sectsize + 331 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
312 XFS_FSB_TO_B(mp, 1) +
313 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), 332 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
314 XFS_INODE_CLUSTER_SIZE(mp)) + 333 XFS_INODE_CLUSTER_SIZE(mp)) +
315 128 * 5 + 334 xfs_calc_buf_res(1, 0) +
316 XFS_ALLOCFREE_LOG_RES(mp, 1) + 335 xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
317 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels + 336 mp->m_in_maxlevels, 0) +
318 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 337 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
338 XFS_FSB_TO_B(mp, 1));
319} 339}
320 340
321/* 341/*
@@ -343,9 +363,9 @@ STATIC uint
343xfs_calc_growdata_reservation( 363xfs_calc_growdata_reservation(
344 struct xfs_mount *mp) 364 struct xfs_mount *mp)
345{ 365{
346 return mp->m_sb.sb_sectsize * 3 + 366 return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
347 XFS_ALLOCFREE_LOG_RES(mp, 1) + 367 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
348 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 368 XFS_FSB_TO_B(mp, 1));
349} 369}
350 370
351/* 371/*
@@ -362,12 +382,12 @@ STATIC uint
362xfs_calc_growrtalloc_reservation( 382xfs_calc_growrtalloc_reservation(
363 struct xfs_mount *mp) 383 struct xfs_mount *mp)
364{ 384{
365 return 2 * mp->m_sb.sb_sectsize + 385 return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
366 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + 386 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
367 mp->m_sb.sb_inodesize + 387 XFS_FSB_TO_B(mp, 1)) +
368 XFS_ALLOCFREE_LOG_RES(mp, 1) + 388 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
369 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 389 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
370 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 390 XFS_FSB_TO_B(mp, 1));
371} 391}
372 392
373/* 393/*
@@ -379,7 +399,7 @@ STATIC uint
379xfs_calc_growrtzero_reservation( 399xfs_calc_growrtzero_reservation(
380 struct xfs_mount *mp) 400 struct xfs_mount *mp)
381{ 401{
382 return mp->m_sb.sb_blocksize + 128; 402 return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
383} 403}
384 404
385/* 405/*
@@ -396,11 +416,10 @@ STATIC uint
396xfs_calc_growrtfree_reservation( 416xfs_calc_growrtfree_reservation(
397 struct xfs_mount *mp) 417 struct xfs_mount *mp)
398{ 418{
399 return mp->m_sb.sb_sectsize + 419 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
400 2 * mp->m_sb.sb_inodesize + 420 xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
401 mp->m_sb.sb_blocksize + 421 xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
402 mp->m_rsumsize + 422 xfs_calc_buf_res(1, mp->m_rsumsize);
403 128 * 5;
404} 423}
405 424
406/* 425/*
@@ -411,7 +430,7 @@ STATIC uint
411xfs_calc_swrite_reservation( 430xfs_calc_swrite_reservation(
412 struct xfs_mount *mp) 431 struct xfs_mount *mp)
413{ 432{
414 return mp->m_sb.sb_inodesize + 128; 433 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
415} 434}
416 435
417/* 436/*
@@ -421,7 +440,7 @@ xfs_calc_swrite_reservation(
421STATIC uint 440STATIC uint
422xfs_calc_writeid_reservation(xfs_mount_t *mp) 441xfs_calc_writeid_reservation(xfs_mount_t *mp)
423{ 442{
424 return mp->m_sb.sb_inodesize + 128; 443 return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
425} 444}
426 445
427/* 446/*
@@ -437,13 +456,13 @@ xfs_calc_addafork_reservation(
437 struct xfs_mount *mp) 456 struct xfs_mount *mp)
438{ 457{
439 return XFS_DQUOT_LOGRES(mp) + 458 return XFS_DQUOT_LOGRES(mp) +
440 mp->m_sb.sb_inodesize + 459 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
441 mp->m_sb.sb_sectsize * 2 + 460 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
442 mp->m_dirblksize + 461 xfs_calc_buf_res(1, mp->m_dirblksize) +
443 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + 462 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
444 XFS_ALLOCFREE_LOG_RES(mp, 1) + 463 XFS_FSB_TO_B(mp, 1)) +
445 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 + 464 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
446 XFS_ALLOCFREE_LOG_COUNT(mp, 1)); 465 XFS_FSB_TO_B(mp, 1));
447} 466}
448 467
449/* 468/*
@@ -461,35 +480,51 @@ STATIC uint
461xfs_calc_attrinval_reservation( 480xfs_calc_attrinval_reservation(
462 struct xfs_mount *mp) 481 struct xfs_mount *mp)
463{ 482{
464 return MAX((mp->m_sb.sb_inodesize + 483 return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
465 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 484 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
466 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))), 485 XFS_FSB_TO_B(mp, 1))),
467 (4 * mp->m_sb.sb_sectsize + 486 (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
468 4 * mp->m_sb.sb_sectsize + 487 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
469 mp->m_sb.sb_sectsize + 488 XFS_FSB_TO_B(mp, 1))));
470 XFS_ALLOCFREE_LOG_RES(mp, 4) +
471 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
472} 489}
473 490
474/* 491/*
475 * Setting an attribute. 492 * Setting an attribute at mount time.
476 * the inode getting the attribute 493 * the inode getting the attribute
477 * the superblock for allocations 494 * the superblock for allocations
478 * the agfs extents are allocated from 495 * the agfs extents are allocated from
479 * the attribute btree * max depth 496 * the attribute btree * max depth
480 * the inode allocation btree 497 * the inode allocation btree
481 * Since attribute transaction space is dependent on the size of the attribute, 498 * Since attribute transaction space is dependent on the size of the attribute,
482 * the calculation is done partially at mount time and partially at runtime. 499 * the calculation is done partially at mount time and partially at runtime(see
500 * below).
483 */ 501 */
484STATIC uint 502STATIC uint
485xfs_calc_attrset_reservation( 503xfs_calc_attrsetm_reservation(
486 struct xfs_mount *mp) 504 struct xfs_mount *mp)
487{ 505{
488 return XFS_DQUOT_LOGRES(mp) + 506 return XFS_DQUOT_LOGRES(mp) +
489 mp->m_sb.sb_inodesize + 507 xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
490 mp->m_sb.sb_sectsize + 508 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
491 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 509 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
492 128 * (2 + XFS_DA_NODE_MAXDEPTH); 510}
511
512/*
513 * Setting an attribute at runtime, transaction space unit per block.
514 * the superblock for allocations: sector size
515 * the inode bmap btree could join or split: max depth * block size
516 * Since the runtime attribute transaction space is dependent on the total
517 * blocks needed for the 1st bmap, here we calculate out the space unit for
518 * one block so that the caller could figure out the total space according
519 * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
520 */
521STATIC uint
522xfs_calc_attrsetrt_reservation(
523 struct xfs_mount *mp)
524{
525 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
526 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
527 XFS_FSB_TO_B(mp, 1));
493} 528}
494 529
495/* 530/*
@@ -508,16 +543,15 @@ xfs_calc_attrrm_reservation(
508 struct xfs_mount *mp) 543 struct xfs_mount *mp)
509{ 544{
510 return XFS_DQUOT_LOGRES(mp) + 545 return XFS_DQUOT_LOGRES(mp) +
511 MAX((mp->m_sb.sb_inodesize + 546 MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
512 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) + 547 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
513 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + 548 XFS_FSB_TO_B(mp, 1)) +
514 128 * (1 + XFS_DA_NODE_MAXDEPTH + 549 (uint)XFS_FSB_TO_B(mp,
515 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))), 550 XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
516 (2 * mp->m_sb.sb_sectsize + 551 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
517 2 * mp->m_sb.sb_sectsize + 552 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
518 mp->m_sb.sb_sectsize + 553 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
519 XFS_ALLOCFREE_LOG_RES(mp, 2) + 554 XFS_FSB_TO_B(mp, 1))));
520 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
521} 555}
522 556
523/* 557/*
@@ -527,7 +561,78 @@ STATIC uint
527xfs_calc_clear_agi_bucket_reservation( 561xfs_calc_clear_agi_bucket_reservation(
528 struct xfs_mount *mp) 562 struct xfs_mount *mp)
529{ 563{
530 return mp->m_sb.sb_sectsize + 128; 564 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
565}
566
567/*
568 * Clearing the quotaflags in the superblock.
569 * the super block for changing quota flags: sector size
570 */
571STATIC uint
572xfs_calc_qm_sbchange_reservation(
573 struct xfs_mount *mp)
574{
575 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
576}
577
578/*
579 * Adjusting quota limits.
580 * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
581 */
582STATIC uint
583xfs_calc_qm_setqlim_reservation(
584 struct xfs_mount *mp)
585{
586 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
587}
588
589/*
590 * Allocating quota on disk if needed.
591 * the write transaction log space: XFS_WRITE_LOG_RES(mp)
592 * the unit of quota allocation: one system block size
593 */
594STATIC uint
595xfs_calc_qm_dqalloc_reservation(
596 struct xfs_mount *mp)
597{
598 return XFS_WRITE_LOG_RES(mp) +
599 xfs_calc_buf_res(1,
600 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
601}
602
603/*
604 * Turning off quotas.
605 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
606 * the superblock for the quota flags: sector size
607 */
608STATIC uint
609xfs_calc_qm_quotaoff_reservation(
610 struct xfs_mount *mp)
611{
612 return sizeof(struct xfs_qoff_logitem) * 2 +
613 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
614}
615
616/*
617 * End of turning off quotas.
618 * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
619 */
620STATIC uint
621xfs_calc_qm_quotaoff_end_reservation(
622 struct xfs_mount *mp)
623{
624 return sizeof(struct xfs_qoff_logitem) * 2;
625}
626
627/*
628 * Syncing the incore super block changes to disk.
629 * the super block to reflect the changes: sector size
630 */
631STATIC uint
632xfs_calc_sb_reservation(
633 struct xfs_mount *mp)
634{
635 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
531} 636}
532 637
533/* 638/*
@@ -555,12 +660,19 @@ xfs_trans_init(
555 resp->tr_writeid = xfs_calc_writeid_reservation(mp); 660 resp->tr_writeid = xfs_calc_writeid_reservation(mp);
556 resp->tr_addafork = xfs_calc_addafork_reservation(mp); 661 resp->tr_addafork = xfs_calc_addafork_reservation(mp);
557 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); 662 resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
558 resp->tr_attrset = xfs_calc_attrset_reservation(mp); 663 resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
664 resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
559 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); 665 resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
560 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); 666 resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
561 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); 667 resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
562 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); 668 resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
563 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); 669 resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
670 resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
671 resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
672 resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
673 resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
674 resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
675 resp->tr_sb = xfs_calc_sb_reservation(mp);
564} 676}
565 677
566/* 678/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6c0601abd7a..cd29f6171021 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -252,17 +252,19 @@ struct xfs_log_item_desc {
252 * as long as SWRITE logs the entire inode core 252 * as long as SWRITE logs the entire inode core
253 */ 253 */
254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 254#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite) 255#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork) 256#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval) 257#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
258#define XFS_ATTRSET_LOG_RES(mp, ext) \ 258#define XFS_ATTRSETM_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetm)
259 ((mp)->m_reservations.tr_attrset + \ 259#define XFS_ATTRSETRT_LOG_RES(mp) ((mp)->m_reservations.tr_attrsetrt)
260 (ext * (mp)->m_sb.sb_sectsize) + \ 260#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
261 (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
262 (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
263#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
264#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi) 261#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
265 262#define XFS_QM_SBCHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_qm_sbchange)
263#define XFS_QM_SETQLIM_LOG_RES(mp) ((mp)->m_reservations.tr_qm_setqlim)
264#define XFS_QM_DQALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_qm_dqalloc)
265#define XFS_QM_QUOTAOFF_LOG_RES(mp) ((mp)->m_reservations.tr_qm_quotaoff)
266#define XFS_QM_QUOTAOFF_END_LOG_RES(mp) ((mp)->m_reservations.tr_qm_equotaoff)
267#define XFS_SB_LOG_RES(mp) ((mp)->m_reservations.tr_sb)
266 268
267/* 269/*
268 * Various log count values. 270 * Various log count values.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6011ee661339..0eda7254305f 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -55,20 +55,6 @@ xfs_ail_check(
55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); 55 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
56 56
57 57
58#ifdef XFS_TRANS_DEBUG
59 /*
60 * Walk the list checking lsn ordering, and that every entry has the
61 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
62 * when specifically debugging the transaction subsystem.
63 */
64 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
65 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
66 if (&prev_lip->li_ail != &ailp->xa_ail)
67 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
68 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
69 prev_lip = lip;
70 }
71#endif /* XFS_TRANS_DEBUG */
72} 58}
73#else /* !DEBUG */ 59#else /* !DEBUG */
74#define xfs_ail_check(a,l) 60#define xfs_ail_check(a,l)
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 4fc17d479d42..3edf5dbee001 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -93,7 +93,7 @@ _xfs_trans_bjoin(
93 xfs_buf_item_init(bp, tp->t_mountp); 93 xfs_buf_item_init(bp, tp->t_mountp);
94 bip = bp->b_fspriv; 94 bip = bp->b_fspriv;
95 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 95 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
96 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 96 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
97 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 97 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
98 if (reset_recur) 98 if (reset_recur)
99 bip->bli_recur = 0; 99 bip->bli_recur = 0;
@@ -432,7 +432,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
432 bip = bp->b_fspriv; 432 bip = bp->b_fspriv;
433 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 433 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
434 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 434 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
435 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 435 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
436 ASSERT(atomic_read(&bip->bli_refcount) > 0); 436 ASSERT(atomic_read(&bip->bli_refcount) > 0);
437 437
438 trace_xfs_trans_brelse(bip); 438 trace_xfs_trans_brelse(bip);
@@ -519,7 +519,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
519 ASSERT(bp->b_transp == tp); 519 ASSERT(bp->b_transp == tp);
520 ASSERT(bip != NULL); 520 ASSERT(bip != NULL);
521 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 521 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
522 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 522 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
523 ASSERT(atomic_read(&bip->bli_refcount) > 0); 523 ASSERT(atomic_read(&bip->bli_refcount) > 0);
524 524
525 bip->bli_flags |= XFS_BLI_HOLD; 525 bip->bli_flags |= XFS_BLI_HOLD;
@@ -539,7 +539,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
539 ASSERT(bp->b_transp == tp); 539 ASSERT(bp->b_transp == tp);
540 ASSERT(bip != NULL); 540 ASSERT(bip != NULL);
541 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 541 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
542 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 542 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
543 ASSERT(atomic_read(&bip->bli_refcount) > 0); 543 ASSERT(atomic_read(&bip->bli_refcount) > 0);
544 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 544 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
545 545
@@ -598,7 +598,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
598 bip->bli_flags &= ~XFS_BLI_STALE; 598 bip->bli_flags &= ~XFS_BLI_STALE;
599 ASSERT(XFS_BUF_ISSTALE(bp)); 599 ASSERT(XFS_BUF_ISSTALE(bp));
600 XFS_BUF_UNSTALE(bp); 600 XFS_BUF_UNSTALE(bp);
601 bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL; 601 bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
602 } 602 }
603 603
604 tp->t_flags |= XFS_TRANS_DIRTY; 604 tp->t_flags |= XFS_TRANS_DIRTY;
@@ -643,6 +643,7 @@ xfs_trans_binval(
643 xfs_buf_t *bp) 643 xfs_buf_t *bp)
644{ 644{
645 xfs_buf_log_item_t *bip = bp->b_fspriv; 645 xfs_buf_log_item_t *bip = bp->b_fspriv;
646 int i;
646 647
647 ASSERT(bp->b_transp == tp); 648 ASSERT(bp->b_transp == tp);
648 ASSERT(bip != NULL); 649 ASSERT(bip != NULL);
@@ -657,8 +658,8 @@ xfs_trans_binval(
657 */ 658 */
658 ASSERT(XFS_BUF_ISSTALE(bp)); 659 ASSERT(XFS_BUF_ISSTALE(bp));
659 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY))); 660 ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
660 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF)); 661 ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
661 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 662 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
662 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY); 663 ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
663 ASSERT(tp->t_flags & XFS_TRANS_DIRTY); 664 ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
664 return; 665 return;
@@ -668,10 +669,12 @@ xfs_trans_binval(
668 669
669 bip->bli_flags |= XFS_BLI_STALE; 670 bip->bli_flags |= XFS_BLI_STALE;
670 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY); 671 bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
671 bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF; 672 bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
672 bip->bli_format.blf_flags |= XFS_BLF_CANCEL; 673 bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
673 memset((char *)(bip->bli_format.blf_data_map), 0, 674 for (i = 0; i < bip->bli_format_count; i++) {
674 (bip->bli_format.blf_map_size * sizeof(uint))); 675 memset(bip->bli_formats[i].blf_data_map, 0,
676 (bip->bli_formats[i].blf_map_size * sizeof(uint)));
677 }
675 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; 678 bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
676 tp->t_flags |= XFS_TRANS_DIRTY; 679 tp->t_flags |= XFS_TRANS_DIRTY;
677} 680}
@@ -775,5 +778,5 @@ xfs_trans_dquot_buf(
775 type == XFS_BLF_GDQUOT_BUF); 778 type == XFS_BLF_GDQUOT_BUF);
776 ASSERT(atomic_read(&bip->bli_refcount) > 0); 779 ASSERT(atomic_read(&bip->bli_refcount) > 0);
777 780
778 bip->bli_format.blf_flags |= type; 781 bip->__bli_format.blf_flags |= type;
779} 782}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 0c7fa54f309e..642c2d6e1db1 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -516,7 +516,7 @@ xfs_trans_unreserve_and_mod_dquots(
516 int i, j; 516 int i, j;
517 xfs_dquot_t *dqp; 517 xfs_dquot_t *dqp;
518 xfs_dqtrx_t *qtrx, *qa; 518 xfs_dqtrx_t *qtrx, *qa;
519 boolean_t locked; 519 bool locked;
520 520
521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) 521 if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
522 return; 522 return;
@@ -537,17 +537,17 @@ xfs_trans_unreserve_and_mod_dquots(
537 * about the number of blocks used field, or deltas. 537 * about the number of blocks used field, or deltas.
538 * Also we don't bother to zero the fields. 538 * Also we don't bother to zero the fields.
539 */ 539 */
540 locked = B_FALSE; 540 locked = false;
541 if (qtrx->qt_blk_res) { 541 if (qtrx->qt_blk_res) {
542 xfs_dqlock(dqp); 542 xfs_dqlock(dqp);
543 locked = B_TRUE; 543 locked = true;
544 dqp->q_res_bcount -= 544 dqp->q_res_bcount -=
545 (xfs_qcnt_t)qtrx->qt_blk_res; 545 (xfs_qcnt_t)qtrx->qt_blk_res;
546 } 546 }
547 if (qtrx->qt_ino_res) { 547 if (qtrx->qt_ino_res) {
548 if (!locked) { 548 if (!locked) {
549 xfs_dqlock(dqp); 549 xfs_dqlock(dqp);
550 locked = B_TRUE; 550 locked = true;
551 } 551 }
552 dqp->q_res_icount -= 552 dqp->q_res_icount -=
553 (xfs_qcnt_t)qtrx->qt_ino_res; 553 (xfs_qcnt_t)qtrx->qt_ino_res;
@@ -556,7 +556,7 @@ xfs_trans_unreserve_and_mod_dquots(
556 if (qtrx->qt_rtblk_res) { 556 if (qtrx->qt_rtblk_res) {
557 if (!locked) { 557 if (!locked) {
558 xfs_dqlock(dqp); 558 xfs_dqlock(dqp);
559 locked = B_TRUE; 559 locked = true;
560 } 560 }
561 dqp->q_res_rtbcount -= 561 dqp->q_res_rtbcount -=
562 (xfs_qcnt_t)qtrx->qt_rtblk_res; 562 (xfs_qcnt_t)qtrx->qt_rtblk_res;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index d2eee20d5f5b..ac6d567704db 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -33,14 +33,6 @@
33#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
34#include "xfs_trace.h" 34#include "xfs_trace.h"
35 35
36#ifdef XFS_TRANS_DEBUG
37STATIC void
38xfs_trans_inode_broot_debug(
39 xfs_inode_t *ip);
40#else
41#define xfs_trans_inode_broot_debug(ip)
42#endif
43
44/* 36/*
45 * Add a locked inode to the transaction. 37 * Add a locked inode to the transaction.
46 * 38 *
@@ -67,8 +59,6 @@ xfs_trans_ijoin(
67 * Get a log_item_desc to point at the new item. 59 * Get a log_item_desc to point at the new item.
68 */ 60 */
69 xfs_trans_add_item(tp, &iip->ili_item); 61 xfs_trans_add_item(tp, &iip->ili_item);
70
71 xfs_trans_inode_broot_debug(ip);
72} 62}
73 63
74/* 64/*
@@ -135,34 +125,3 @@ xfs_trans_log_inode(
135 flags |= ip->i_itemp->ili_last_fields; 125 flags |= ip->i_itemp->ili_last_fields;
136 ip->i_itemp->ili_fields |= flags; 126 ip->i_itemp->ili_fields |= flags;
137} 127}
138
139#ifdef XFS_TRANS_DEBUG
140/*
141 * Keep track of the state of the inode btree root to make sure we
142 * log it properly.
143 */
144STATIC void
145xfs_trans_inode_broot_debug(
146 xfs_inode_t *ip)
147{
148 xfs_inode_log_item_t *iip;
149
150 ASSERT(ip->i_itemp != NULL);
151 iip = ip->i_itemp;
152 if (iip->ili_root_size != 0) {
153 ASSERT(iip->ili_orig_root != NULL);
154 kmem_free(iip->ili_orig_root);
155 iip->ili_root_size = 0;
156 iip->ili_orig_root = NULL;
157 }
158 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
159 ASSERT((ip->i_df.if_broot != NULL) &&
160 (ip->i_df.if_broot_bytes > 0));
161 iip->ili_root_size = ip->i_df.if_broot_bytes;
162 iip->ili_orig_root =
163 (char*)kmem_alloc(iip->ili_root_size, KM_SLEEP);
164 memcpy(iip->ili_orig_root, (char*)(ip->i_df.if_broot),
165 iip->ili_root_size);
166 }
167}
168#endif
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 7a41874f4c20..61ba1cfa974c 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -32,7 +32,6 @@ typedef unsigned int __uint32_t;
32typedef signed long long int __int64_t; 32typedef signed long long int __int64_t;
33typedef unsigned long long int __uint64_t; 33typedef unsigned long long int __uint64_t;
34 34
35typedef enum { B_FALSE,B_TRUE } boolean_t;
36typedef __uint32_t prid_t; /* project ID */ 35typedef __uint32_t prid_t; /* project ID */
37typedef __uint32_t inst_t; /* an instruction */ 36typedef __uint32_t inst_t; /* an instruction */
38 37
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d95f565a390e..77ad74834baa 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -725,7 +725,7 @@ xfs_create(
725 int error; 725 int error;
726 xfs_bmap_free_t free_list; 726 xfs_bmap_free_t free_list;
727 xfs_fsblock_t first_block; 727 xfs_fsblock_t first_block;
728 boolean_t unlock_dp_on_error = B_FALSE; 728 bool unlock_dp_on_error = false;
729 uint cancel_flags; 729 uint cancel_flags;
730 int committed; 730 int committed;
731 prid_t prid; 731 prid_t prid;
@@ -794,7 +794,7 @@ xfs_create(
794 } 794 }
795 795
796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 796 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
797 unlock_dp_on_error = B_TRUE; 797 unlock_dp_on_error = true;
798 798
799 xfs_bmap_init(&free_list, &first_block); 799 xfs_bmap_init(&free_list, &first_block);
800 800
@@ -830,7 +830,7 @@ xfs_create(
830 * error path. 830 * error path.
831 */ 831 */
832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 832 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
833 unlock_dp_on_error = B_FALSE; 833 unlock_dp_on_error = false;
834 834
835 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 835 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
836 &first_block, &free_list, resblks ? 836 &first_block, &free_list, resblks ?
@@ -1367,7 +1367,7 @@ xfs_symlink(
1367 int pathlen; 1367 int pathlen;
1368 xfs_bmap_free_t free_list; 1368 xfs_bmap_free_t free_list;
1369 xfs_fsblock_t first_block; 1369 xfs_fsblock_t first_block;
1370 boolean_t unlock_dp_on_error = B_FALSE; 1370 bool unlock_dp_on_error = false;
1371 uint cancel_flags; 1371 uint cancel_flags;
1372 int committed; 1372 int committed;
1373 xfs_fileoff_t first_fsb; 1373 xfs_fileoff_t first_fsb;
@@ -1438,7 +1438,7 @@ xfs_symlink(
1438 } 1438 }
1439 1439
1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1440 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1441 unlock_dp_on_error = B_TRUE; 1441 unlock_dp_on_error = true;
1442 1442
1443 /* 1443 /*
1444 * Check whether the directory allows new symlinks or not. 1444 * Check whether the directory allows new symlinks or not.
@@ -1484,7 +1484,7 @@ xfs_symlink(
1484 * error path. 1484 * error path.
1485 */ 1485 */
1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1486 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1487 unlock_dp_on_error = B_FALSE; 1487 unlock_dp_on_error = false;
1488 1488
1489 /* 1489 /*
1490 * Also attach the dquot(s) to it, if applicable. 1490 * Also attach the dquot(s) to it, if applicable.