aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
committerJ. Bruce Fields <bfields@redhat.com>2012-10-09 18:35:22 -0400
commitf474af7051212b4efc8267583fad9c4ebf33ccff (patch)
tree1aa46ebc8065a341f247c2a2d9af2f624ad1d4f8 /fs
parent0d22f68f02c10d5d10ec5712917e5828b001a822 (diff)
parente3dd9a52cb5552c46c2a4ca7ccdfb4dab5c72457 (diff)
nfs: disintegrate UAPI for nfs
This is to complete part of the Userspace API (UAPI) disintegration for which the preparatory patches were pulled recently. After these patches, userspace headers will be segregated into: include/uapi/linux/.../foo.h for the userspace interface stuff, and: include/linux/.../foo.h for the strictly kernel internal stuff. Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c8
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/9p/vfs_file.c1
-rw-r--r--fs/Kconfig.binfmt8
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/adfs.h4
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/adfs/super.c26
-rw-r--r--fs/affs/affs.h4
-rw-r--r--fs/affs/inode.c20
-rw-r--r--fs/affs/super.c25
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/server.c10
-rw-r--r--fs/afs/super.c5
-rw-r--r--fs/afs/vlocation.c14
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs4/dev-ioctl.c18
-rw-r--r--fs/autofs4/expire.c5
-rw-r--r--fs/autofs4/waitq.c3
-rw-r--r--fs/befs/befs.h4
-rw-r--r--fs/befs/linuxvfs.c32
-rw-r--r--fs/bfs/inode.c13
-rw-r--r--fs/binfmt_aout.c54
-rw-r--r--fs/binfmt_elf.c172
-rw-r--r--fs/binfmt_elf_fdpic.c8
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c11
-rw-r--r--fs/block_dev.c3
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-inode.c20
-rw-r--r--fs/btrfs/delayed-ref.c163
-rw-r--r--fs/btrfs/delayed-ref.h6
-rw-r--r--fs/btrfs/disk-io.c53
-rw-r--r--fs/btrfs/disk-io.h2
-rw-r--r--fs/btrfs/extent-tree.c123
-rw-r--r--fs/btrfs/extent_io.c23
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c343
-rw-r--r--fs/btrfs/ioctl.c40
-rw-r--r--fs/btrfs/locking.c2
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/reada.c18
-rw-r--r--fs/btrfs/root-tree.c4
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c79
-rw-r--r--fs/ceph/addr.c21
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/file.c4
-rw-r--r--fs/ceph/inode.c19
-rw-r--r--fs/ceph/ioctl.c11
-rw-r--r--fs/ceph/mds_client.c3
-rw-r--r--fs/ceph/super.c42
-rw-r--r--fs/cifs/Kconfig38
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/cifs_unicode.c2
-rw-r--r--fs/cifs/cifsacl.c2
-rw-r--r--fs/cifs/cifsencrypt.c67
-rw-r--r--fs/cifs/cifsfs.c68
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h268
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/cifsproto.h136
-rw-r--r--fs/cifs/cifssmb.c294
-rw-r--r--fs/cifs/connect.c198
-rw-r--r--fs/cifs/dir.c131
-rw-r--r--fs/cifs/file.c1058
-rw-r--r--fs/cifs/inode.c459
-rw-r--r--fs/cifs/ioctl.c32
-rw-r--r--fs/cifs/link.c76
-rw-r--r--fs/cifs/misc.c32
-rw-r--r--fs/cifs/netmisc.c9
-rw-r--r--fs/cifs/readdir.c167
-rw-r--r--fs/cifs/sess.c3
-rw-r--r--fs/cifs/smb1ops.c328
-rw-r--r--fs/cifs/smb2file.c302
-rw-r--r--fs/cifs/smb2glob.h14
-rw-r--r--fs/cifs/smb2inode.c98
-rw-r--r--fs/cifs/smb2maperror.c6
-rw-r--r--fs/cifs/smb2misc.c256
-rw-r--r--fs/cifs/smb2ops.c346
-rw-r--r--fs/cifs/smb2pdu.c1235
-rw-r--r--fs/cifs/smb2pdu.h294
-rw-r--r--fs/cifs/smb2proto.h87
-rw-r--r--fs/cifs/smb2transport.c204
-rw-r--r--fs/cifs/transport.c290
-rw-r--r--fs/coda/inode.c37
-rw-r--r--fs/compat.c110
-rw-r--r--fs/compat_binfmt_elf.c7
-rw-r--r--fs/compat_ioctl.c35
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/coredump.c692
-rw-r--r--fs/coredump.h6
-rw-r--r--fs/cramfs/inode.c4
-rw-r--r--fs/dcache.c14
-rw-r--r--fs/debugfs/file.c76
-rw-r--r--fs/debugfs/inode.c34
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/dlm/ast.c4
-rw-r--r--fs/dlm/config.c79
-rw-r--r--fs/dlm/config.h2
-rw-r--r--fs/dlm/dlm_internal.h46
-rw-r--r--fs/dlm/lockspace.c15
-rw-r--r--fs/dlm/lowcomms.c215
-rw-r--r--fs/dlm/lowcomms.h2
-rw-r--r--fs/dlm/main.c2
-rw-r--r--fs/dlm/member.c17
-rw-r--r--fs/dlm/netlink.c8
-rw-r--r--fs/dlm/rcom.c2
-rw-r--r--fs/dlm/recoverd.c27
-rw-r--r--fs/dlm/recoverd.h1
-rw-r--r--fs/dlm/user.c7
-rw-r--r--fs/ecryptfs/file.c10
-rw-r--r--fs/ecryptfs/inode.c5
-rw-r--r--fs/ecryptfs/main.c12
-rw-r--r--fs/ecryptfs/messaging.c5
-rw-r--r--fs/efs/inode.c4
-rw-r--r--fs/efs/super.c5
-rw-r--r--fs/eventpoll.c63
-rw-r--r--fs/exec.c707
-rw-r--r--fs/exofs/inode.c8
-rw-r--r--fs/exofs/ore_raid.c2
-rw-r--r--fs/exofs/super.c5
-rw-r--r--fs/exofs/sys.c7
-rw-r--r--fs/ext2/acl.c32
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext3/acl.c32
-rw-r--r--fs/ext3/balloc.c2
-rw-r--r--fs/ext3/inode.c19
-rw-r--r--fs/ext3/super.c15
-rw-r--r--fs/ext4/acl.c31
-rw-r--r--fs/ext4/balloc.c62
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/ext4.h49
-rw-r--r--fs/ext4/extents.c259
-rw-r--r--fs/ext4/file.c8
-rw-r--r--fs/ext4/fsync.c92
-rw-r--r--fs/ext4/ialloc.c9
-rw-r--r--fs/ext4/indirect.c18
-rw-r--r--fs/ext4/inode.c97
-rw-r--r--fs/ext4/ioctl.c37
-rw-r--r--fs/ext4/mballoc.c131
-rw-r--r--fs/ext4/mballoc.h5
-rw-r--r--fs/ext4/move_extent.c520
-rw-r--r--fs/ext4/namei.c105
-rw-r--r--fs/ext4/page-io.c176
-rw-r--r--fs/ext4/resize.c432
-rw-r--r--fs/ext4/super.c105
-rw-r--r--fs/fat/Makefile2
-rw-r--r--fs/fat/cache.c10
-rw-r--r--fs/fat/dir.c56
-rw-r--r--fs/fat/fat.h101
-rw-r--r--fs/fat/fatent.c13
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c210
-rw-r--r--fs/fat/namei_msdos.c7
-rw-r--r--fs/fat/namei_vfat.c5
-rw-r--r--fs/fat/nfs.c101
-rw-r--r--fs/fcntl.c166
-rw-r--r--fs/fhandle.c17
-rw-r--r--fs/file.c573
-rw-r--r--fs/file_table.c108
-rw-r--r--fs/freevxfs/vxfs_inode.c4
-rw-r--r--fs/freevxfs/vxfs_super.c5
-rw-r--r--fs/fs-writeback.c12
-rw-r--r--fs/fuse/control.c4
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/fuse/inode.c18
-rw-r--r--fs/generic_acl.c4
-rw-r--r--fs/gfs2/acl.c14
-rw-r--r--fs/gfs2/aops.c11
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c37
-rw-r--r--fs/gfs2/glock.c60
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h30
-rw-r--r--fs/gfs2/inode.c28
-rw-r--r--fs/gfs2/lock_dlm.c2
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c43
-rw-r--r--fs/gfs2/rgrp.c1221
-rw-r--r--fs/gfs2/rgrp.h28
-rw-r--r--fs/gfs2/super.c11
-rw-r--r--fs/gfs2/trace_gfs2.h20
-rw-r--r--fs/gfs2/trans.h7
-rw-r--r--fs/gfs2/xattr.c96
-rw-r--r--fs/hfs/hfs_fs.h4
-rw-r--r--fs/hfs/inode.c6
-rw-r--r--fs/hfs/super.c22
-rw-r--r--fs/hfsplus/catalog.c4
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c8
-rw-r--r--fs/hfsplus/options.c15
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hostfs/hostfs_kern.c8
-rw-r--r--fs/hpfs/anode.c6
-rw-r--r--fs/hpfs/dnode.c28
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/inode.c19
-rw-r--r--fs/hpfs/namei.c8
-rw-r--r--fs/hpfs/super.c23
-rw-r--r--fs/hugetlbfs/inode.c32
-rw-r--r--fs/inode.c2
-rw-r--r--fs/ioctl.c25
-rw-r--r--fs/isofs/inode.c22
-rw-r--r--fs/isofs/isofs.h4
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd/commit.c45
-rw-r--r--fs/jbd/journal.c5
-rw-r--r--fs/jbd/transaction.c64
-rw-r--r--fs/jbd2/commit.c40
-rw-r--r--fs/jbd2/journal.c8
-rw-r--r--fs/jbd2/recovery.c7
-rw-r--r--fs/jbd2/transaction.c65
-rw-r--r--fs/jffs2/acl.c30
-rw-r--r--fs/jffs2/file.c8
-rw-r--r--fs/jffs2/fs.c24
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/readinode.c13
-rw-r--r--fs/jffs2/super.c6
-rw-r--r--fs/jfs/Makefile2
-rw-r--r--fs/jfs/acl.c4
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/jfs/ioctl.c43
-rw-r--r--fs/jfs/jfs_discard.c117
-rw-r--r--fs/jfs/jfs_discard.h26
-rw-r--r--fs/jfs/jfs_dmap.c126
-rw-r--r--fs/jfs/jfs_dmap.h2
-rw-r--r--fs/jfs/jfs_filsys.h3
-rw-r--r--fs/jfs/jfs_imap.c22
-rw-r--r--fs/jfs/jfs_incore.h9
-rw-r--r--fs/jfs/jfs_txnmgr.c9
-rw-r--r--fs/jfs/super.c99
-rw-r--r--fs/jfs/xattr.c4
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/svclock.c3
-rw-r--r--fs/locks.c20
-rw-r--r--fs/logfs/dev_bdev.c15
-rw-r--r--fs/logfs/inode.c27
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c9
-rw-r--r--fs/logfs/segment.c2
-rw-r--r--fs/minix/inode.c21
-rw-r--r--fs/namei.c57
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/ncpfs/inode.c11
-rw-r--r--fs/nfs/Makefile18
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/idmap.c62
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfs/nfs3acl.c4
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4proc.c127
-rw-r--r--fs/nfs/nfs4renewd.c3
-rw-r--r--fs/nfs/nfs4super.c15
-rw-r--r--fs/nfs/nfs4xdr.c41
-rw-r--r--fs/nfs/objlayout/objio_osd.c55
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/super.c43
-rw-r--r--fs/nfs/write.c15
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nilfs2/file.c3
-rw-r--r--fs/nilfs2/inode.c8
-rw-r--r--fs/nilfs2/super.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c87
-rw-r--r--fs/notify/inotify/inotify_user.c28
-rw-r--r--fs/ntfs/inode.c7
-rw-r--r--fs/ntfs/super.c45
-rw-r--r--fs/ntfs/volume.h5
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c38
-rw-r--r--fs/ocfs2/cluster/quorum.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c5
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/mmap.c2
-rw-r--r--fs/ocfs2/quota_global.c43
-rw-r--r--fs/ocfs2/quota_local.c15
-rw-r--r--fs/ocfs2/super.c5
-rw-r--r--fs/omfs/file.c5
-rw-r--r--fs/omfs/inode.c8
-rw-r--r--fs/omfs/omfs.h4
-rw-r--r--fs/open.c139
-rw-r--r--fs/openpromfs/inode.c5
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/posix_acl.c30
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/base.c561
-rw-r--r--fs/proc/fd.c367
-rw-r--r--fs/proc/fd.h14
-rw-r--r--fs/proc/generic.c15
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/internal.h48
-rw-r--r--fs/proc/page.c8
-rw-r--r--fs/proc/proc_sysctl.c13
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/pstore/Kconfig1
-rw-r--r--fs/pstore/ftrace.c96
-rw-r--r--fs/pstore/internal.h6
-rw-r--r--fs/pstore/platform.c9
-rw-r--r--fs/pstore/ram.c28
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/qnx6/inode.c9
-rw-r--r--fs/quota/Makefile2
-rw-r--r--fs/quota/dquot.c116
-rw-r--r--fs/quota/kqid.c132
-rw-r--r--fs/quota/netlink.c10
-rw-r--r--fs/quota/quota.c28
-rw-r--r--fs/quota/quota_tree.c22
-rw-r--r--fs/quota/quota_v1.c12
-rw-r--r--fs/quota/quota_v2.c26
-rw-r--r--fs/read_write.c180
-rw-r--r--fs/read_write.h2
-rw-r--r--fs/readdir.c36
-rw-r--r--fs/reiserfs/bitmap.c2
-rw-r--r--fs/reiserfs/inode.c28
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_acl.c24
-rw-r--r--fs/romfs/super.c5
-rw-r--r--fs/select.c31
-rw-r--r--fs/seq_file.c4
-rw-r--r--fs/signalfd.c13
-rw-r--r--fs/splice.c69
-rw-r--r--fs/squashfs/inode.c8
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/stat.c14
-rw-r--r--fs/statfs.c9
-rw-r--r--fs/super.c8
-rw-r--r--fs/sync.c33
-rw-r--r--fs/sysfs/symlink.c2
-rw-r--r--fs/sysv/inode.c13
-rw-r--r--fs/timerfd.c45
-rw-r--r--fs/ubifs/budget.c9
-rw-r--r--fs/ubifs/commit.c8
-rw-r--r--fs/ubifs/compress.c7
-rw-r--r--fs/ubifs/debug.c633
-rw-r--r--fs/ubifs/debug.h17
-rw-r--r--fs/ubifs/dir.c4
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/ubifs/gc.c6
-rw-r--r--fs/ubifs/journal.c4
-rw-r--r--fs/ubifs/log.c14
-rw-r--r--fs/ubifs/lprops.c66
-rw-r--r--fs/ubifs/lpt.c10
-rw-r--r--fs/ubifs/lpt_commit.c58
-rw-r--r--fs/ubifs/orphan.c7
-rw-r--r--fs/ubifs/recovery.c13
-rw-r--r--fs/ubifs/replay.c19
-rw-r--r--fs/ubifs/sb.c23
-rw-r--r--fs/ubifs/scan.c15
-rw-r--r--fs/ubifs/super.c132
-rw-r--r--fs/ubifs/tnc_misc.c4
-rw-r--r--fs/ubifs/ubifs.h17
-rw-r--r--fs/udf/file.c44
-rw-r--r--fs/udf/inode.c76
-rw-r--r--fs/udf/super.c32
-rw-r--r--fs/udf/udf_sb.h4
-rw-r--r--fs/ufs/inode.c16
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/utimes.c11
-rw-r--r--fs/xattr.c245
-rw-r--r--fs/xattr_acl.c96
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_buf.c5
-rw-r--r--fs/xfs/xfs_buf.h41
-rw-r--r--fs/xfs/xfs_dfrag.c34
-rw-r--r--fs/xfs/xfs_discard.c6
-rw-r--r--fs/xfs/xfs_file.c381
-rw-r--r--fs/xfs/xfs_ialloc.c19
-rw-r--r--fs/xfs/xfs_ioctl.c10
-rw-r--r--fs/xfs/xfs_mount.c43
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_quotaops.c12
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_super.c103
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_sync.c2
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans_dquot.c8
399 files changed, 14232 insertions, 7962 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 9a1d42630751..15b679166201 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -37,7 +37,7 @@ static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
37 return ERR_PTR(-ENOMEM); 37 return ERR_PTR(-ENOMEM);
38 size = v9fs_fid_xattr_get(fid, name, value, size); 38 size = v9fs_fid_xattr_get(fid, name, value, size);
39 if (size > 0) { 39 if (size > 0) {
40 acl = posix_acl_from_xattr(value, size); 40 acl = posix_acl_from_xattr(&init_user_ns, value, size);
41 if (IS_ERR(acl)) 41 if (IS_ERR(acl))
42 goto err_out; 42 goto err_out;
43 } 43 }
@@ -131,7 +131,7 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
131 buffer = kmalloc(size, GFP_KERNEL); 131 buffer = kmalloc(size, GFP_KERNEL);
132 if (!buffer) 132 if (!buffer)
133 return -ENOMEM; 133 return -ENOMEM;
134 retval = posix_acl_to_xattr(acl, buffer, size); 134 retval = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
135 if (retval < 0) 135 if (retval < 0)
136 goto err_free_out; 136 goto err_free_out;
137 switch (type) { 137 switch (type) {
@@ -251,7 +251,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
251 return PTR_ERR(acl); 251 return PTR_ERR(acl);
252 if (acl == NULL) 252 if (acl == NULL)
253 return -ENODATA; 253 return -ENODATA;
254 error = posix_acl_to_xattr(acl, buffer, size); 254 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
255 posix_acl_release(acl); 255 posix_acl_release(acl);
256 256
257 return error; 257 return error;
@@ -304,7 +304,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
304 return -EPERM; 304 return -EPERM;
305 if (value) { 305 if (value) {
306 /* update the cached acl value */ 306 /* update the cached acl value */
307 acl = posix_acl_from_xattr(value, size); 307 acl = posix_acl_from_xattr(&init_user_ns, value, size);
308 if (IS_ERR(acl)) 308 if (IS_ERR(acl))
309 return PTR_ERR(acl); 309 return PTR_ERR(acl);
310 else if (acl) { 310 else if (acl) {
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index b85efa773949..392c5dac1981 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -560,6 +560,11 @@ static int v9fs_init_inode_cache(void)
560 */ 560 */
561static void v9fs_destroy_inode_cache(void) 561static void v9fs_destroy_inode_cache(void)
562{ 562{
563 /*
564 * Make sure all delayed rcu free inodes are flushed before we
565 * destroy cache.
566 */
567 rcu_barrier();
563 kmem_cache_destroy(v9fs_inode_cache); 568 kmem_cache_destroy(v9fs_inode_cache);
564} 569}
565 570
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index dd6f7ee1e312..c2483e97beee 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -738,6 +738,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
738static const struct vm_operations_struct v9fs_file_vm_ops = { 738static const struct vm_operations_struct v9fs_file_vm_ops = {
739 .fault = filemap_fault, 739 .fault = filemap_fault,
740 .page_mkwrite = v9fs_vm_page_mkwrite, 740 .page_mkwrite = v9fs_vm_page_mkwrite,
741 .remap_pages = generic_file_remap_pages,
741}; 742};
742 743
743 744
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 022574202749..0efd1524b977 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -164,3 +164,11 @@ config BINFMT_MISC
164 You may say M here for module support and later load the module when 164 You may say M here for module support and later load the module when
165 you have use for it; the module is called binfmt_misc. If you 165 you have use for it; the module is called binfmt_misc. If you
166 don't know what to answer at this point, say Y. 166 don't know what to answer at this point, say Y.
167
168config COREDUMP
169 bool "Enable core dump support" if EXPERT
170 default y
171 help
172 This option enables support for performing core dumps. You almost
173 certainly want to say Y here. Not necessary on systems that never
174 need debugging or only ever run flawless code.
diff --git a/fs/Makefile b/fs/Makefile
index 2fb977934673..1d7af79288a0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o
48obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o 48obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
49obj-$(CONFIG_NFS_COMMON) += nfs_common/ 49obj-$(CONFIG_NFS_COMMON) += nfs_common/
50obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 50obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
51obj-$(CONFIG_COREDUMP) += coredump.o
51 52
52obj-$(CONFIG_FHANDLE) += fhandle.o 53obj-$(CONFIG_FHANDLE) += fhandle.o
53 54
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 718ac1f440c6..585adafb0cc2 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -46,8 +46,8 @@ struct adfs_sb_info {
46 struct adfs_discmap *s_map; /* bh list containing map */ 46 struct adfs_discmap *s_map; /* bh list containing map */
47 struct adfs_dir_ops *s_dir; /* directory operations */ 47 struct adfs_dir_ops *s_dir; /* directory operations */
48 48
49 uid_t s_uid; /* owner uid */ 49 kuid_t s_uid; /* owner uid */
50 gid_t s_gid; /* owner gid */ 50 kgid_t s_gid; /* owner gid */
51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ 51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
52 umode_t s_other_mask; /* ADFS other perm -> unix perm */ 52 umode_t s_other_mask; /* ADFS other perm -> unix perm */
53 int s_ftsuffix; /* ,xyz hex filetype suffix option */ 53 int s_ftsuffix; /* ,xyz hex filetype suffix option */
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 1dab6a174d6a..e9bad5093a3f 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -304,8 +304,8 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
304 * we can't change the UID or GID of any file - 304 * we can't change the UID or GID of any file -
305 * we have a global UID/GID in the superblock 305 * we have a global UID/GID in the superblock
306 */ 306 */
307 if ((ia_valid & ATTR_UID && attr->ia_uid != ADFS_SB(sb)->s_uid) || 307 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, ADFS_SB(sb)->s_uid)) ||
308 (ia_valid & ATTR_GID && attr->ia_gid != ADFS_SB(sb)->s_gid)) 308 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, ADFS_SB(sb)->s_gid)))
309 error = -EPERM; 309 error = -EPERM;
310 310
311 if (error) 311 if (error)
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index bdaec92353c2..d57122935793 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -15,6 +15,7 @@
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/statfs.h> 17#include <linux/statfs.h>
18#include <linux/user_namespace.h>
18#include "adfs.h" 19#include "adfs.h"
19#include "dir_f.h" 20#include "dir_f.h"
20#include "dir_fplus.h" 21#include "dir_fplus.h"
@@ -130,10 +131,10 @@ static int adfs_show_options(struct seq_file *seq, struct dentry *root)
130{ 131{
131 struct adfs_sb_info *asb = ADFS_SB(root->d_sb); 132 struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
132 133
133 if (asb->s_uid != 0) 134 if (!uid_eq(asb->s_uid, GLOBAL_ROOT_UID))
134 seq_printf(seq, ",uid=%u", asb->s_uid); 135 seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, asb->s_uid));
135 if (asb->s_gid != 0) 136 if (!gid_eq(asb->s_gid, GLOBAL_ROOT_GID))
136 seq_printf(seq, ",gid=%u", asb->s_gid); 137 seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, asb->s_gid));
137 if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK) 138 if (asb->s_owner_mask != ADFS_DEFAULT_OWNER_MASK)
138 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); 139 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask);
139 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) 140 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK)
@@ -175,12 +176,16 @@ static int parse_options(struct super_block *sb, char *options)
175 case Opt_uid: 176 case Opt_uid:
176 if (match_int(args, &option)) 177 if (match_int(args, &option))
177 return -EINVAL; 178 return -EINVAL;
178 asb->s_uid = option; 179 asb->s_uid = make_kuid(current_user_ns(), option);
180 if (!uid_valid(asb->s_uid))
181 return -EINVAL;
179 break; 182 break;
180 case Opt_gid: 183 case Opt_gid:
181 if (match_int(args, &option)) 184 if (match_int(args, &option))
182 return -EINVAL; 185 return -EINVAL;
183 asb->s_gid = option; 186 asb->s_gid = make_kgid(current_user_ns(), option);
187 if (!gid_valid(asb->s_gid))
188 return -EINVAL;
184 break; 189 break;
185 case Opt_ownmask: 190 case Opt_ownmask:
186 if (match_octal(args, &option)) 191 if (match_octal(args, &option))
@@ -275,6 +280,11 @@ static int init_inodecache(void)
275 280
276static void destroy_inodecache(void) 281static void destroy_inodecache(void)
277{ 282{
283 /*
284 * Make sure all delayed rcu free inodes are flushed before we
285 * destroy cache.
286 */
287 rcu_barrier();
278 kmem_cache_destroy(adfs_inode_cachep); 288 kmem_cache_destroy(adfs_inode_cachep);
279} 289}
280 290
@@ -369,8 +379,8 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
369 sb->s_fs_info = asb; 379 sb->s_fs_info = asb;
370 380
371 /* set default options */ 381 /* set default options */
372 asb->s_uid = 0; 382 asb->s_uid = GLOBAL_ROOT_UID;
373 asb->s_gid = 0; 383 asb->s_gid = GLOBAL_ROOT_GID;
374 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; 384 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
375 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; 385 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
376 asb->s_ftsuffix = 0; 386 asb->s_ftsuffix = 0;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 6e216419f340..3952121f2f28 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -88,8 +88,8 @@ struct affs_sb_info {
88 u32 s_root_block; /* FFS root block number. */ 88 u32 s_root_block; /* FFS root block number. */
89 int s_hashsize; /* Size of hash table. */ 89 int s_hashsize; /* Size of hash table. */
90 unsigned long s_flags; /* See below. */ 90 unsigned long s_flags; /* See below. */
91 uid_t s_uid; /* uid to override */ 91 kuid_t s_uid; /* uid to override */
92 gid_t s_gid; /* gid to override */ 92 kgid_t s_gid; /* gid to override */
93 umode_t s_mode; /* mode to override */ 93 umode_t s_mode; /* mode to override */
94 struct buffer_head *s_root_bh; /* Cached root block. */ 94 struct buffer_head *s_root_bh; /* Cached root block. */
95 struct mutex s_bmlock; /* Protects bitmap access. */ 95 struct mutex s_bmlock; /* Protects bitmap access. */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 8bc4a59f4e7e..15c484268229 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -80,17 +80,17 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
80 if (id == 0 || sbi->s_flags & SF_SETUID) 80 if (id == 0 || sbi->s_flags & SF_SETUID)
81 inode->i_uid = sbi->s_uid; 81 inode->i_uid = sbi->s_uid;
82 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) 82 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
83 inode->i_uid = 0; 83 i_uid_write(inode, 0);
84 else 84 else
85 inode->i_uid = id; 85 i_uid_write(inode, id);
86 86
87 id = be16_to_cpu(tail->gid); 87 id = be16_to_cpu(tail->gid);
88 if (id == 0 || sbi->s_flags & SF_SETGID) 88 if (id == 0 || sbi->s_flags & SF_SETGID)
89 inode->i_gid = sbi->s_gid; 89 inode->i_gid = sbi->s_gid;
90 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) 90 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
91 inode->i_gid = 0; 91 i_gid_write(inode, 0);
92 else 92 else
93 inode->i_gid = id; 93 i_gid_write(inode, id);
94 94
95 switch (be32_to_cpu(tail->stype)) { 95 switch (be32_to_cpu(tail->stype)) {
96 case ST_ROOT: 96 case ST_ROOT:
@@ -193,13 +193,13 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
193 tail->size = cpu_to_be32(inode->i_size); 193 tail->size = cpu_to_be32(inode->i_size);
194 secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change); 194 secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
195 if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { 195 if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
196 uid = inode->i_uid; 196 uid = i_uid_read(inode);
197 gid = inode->i_gid; 197 gid = i_gid_read(inode);
198 if (AFFS_SB(sb)->s_flags & SF_MUFS) { 198 if (AFFS_SB(sb)->s_flags & SF_MUFS) {
199 if (inode->i_uid == 0 || inode->i_uid == 0xFFFF) 199 if (uid == 0 || uid == 0xFFFF)
200 uid = inode->i_uid ^ ~0; 200 uid = uid ^ ~0;
201 if (inode->i_gid == 0 || inode->i_gid == 0xFFFF) 201 if (gid == 0 || gid == 0xFFFF)
202 gid = inode->i_gid ^ ~0; 202 gid = gid ^ ~0;
203 } 203 }
204 if (!(AFFS_SB(sb)->s_flags & SF_SETUID)) 204 if (!(AFFS_SB(sb)->s_flags & SF_SETUID))
205 tail->uid = cpu_to_be16(uid); 205 tail->uid = cpu_to_be16(uid);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c70f1e5fc024..b84dc7352502 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -147,6 +147,11 @@ static int init_inodecache(void)
147 147
148static void destroy_inodecache(void) 148static void destroy_inodecache(void)
149{ 149{
150 /*
151 * Make sure all delayed rcu free inodes are flushed before we
152 * destroy cache.
153 */
154 rcu_barrier();
150 kmem_cache_destroy(affs_inode_cachep); 155 kmem_cache_destroy(affs_inode_cachep);
151} 156}
152 157
@@ -188,7 +193,7 @@ static const match_table_t tokens = {
188}; 193};
189 194
190static int 195static int
191parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s32 *root, 196parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, s32 *root,
192 int *blocksize, char **prefix, char *volume, unsigned long *mount_opts) 197 int *blocksize, char **prefix, char *volume, unsigned long *mount_opts)
193{ 198{
194 char *p; 199 char *p;
@@ -253,13 +258,17 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
253 case Opt_setgid: 258 case Opt_setgid:
254 if (match_int(&args[0], &option)) 259 if (match_int(&args[0], &option))
255 return 0; 260 return 0;
256 *gid = option; 261 *gid = make_kgid(current_user_ns(), option);
262 if (!gid_valid(*gid))
263 return 0;
257 *mount_opts |= SF_SETGID; 264 *mount_opts |= SF_SETGID;
258 break; 265 break;
259 case Opt_setuid: 266 case Opt_setuid:
260 if (match_int(&args[0], &option)) 267 if (match_int(&args[0], &option))
261 return 0; 268 return 0;
262 *uid = option; 269 *uid = make_kuid(current_user_ns(), option);
270 if (!uid_valid(*uid))
271 return 0;
263 *mount_opts |= SF_SETUID; 272 *mount_opts |= SF_SETUID;
264 break; 273 break;
265 case Opt_verbose: 274 case Opt_verbose:
@@ -301,8 +310,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
301 int num_bm; 310 int num_bm;
302 int i, j; 311 int i, j;
303 s32 key; 312 s32 key;
304 uid_t uid; 313 kuid_t uid;
305 gid_t gid; 314 kgid_t gid;
306 int reserved; 315 int reserved;
307 unsigned long mount_flags; 316 unsigned long mount_flags;
308 int tmp_flags; /* fix remount prototype... */ 317 int tmp_flags; /* fix remount prototype... */
@@ -527,8 +536,8 @@ affs_remount(struct super_block *sb, int *flags, char *data)
527{ 536{
528 struct affs_sb_info *sbi = AFFS_SB(sb); 537 struct affs_sb_info *sbi = AFFS_SB(sb);
529 int blocksize; 538 int blocksize;
530 uid_t uid; 539 kuid_t uid;
531 gid_t gid; 540 kgid_t gid;
532 int mode; 541 int mode;
533 int reserved; 542 int reserved;
534 int root_block; 543 int root_block;
@@ -551,7 +560,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
551 return -EINVAL; 560 return -EINVAL;
552 } 561 }
553 562
554 flush_delayed_work_sync(&sbi->sb_work); 563 flush_delayed_work(&sbi->sb_work);
555 replace_mount_options(sb, new_opts); 564 replace_mount_options(sb, new_opts);
556 565
557 sbi->s_flags = mount_flags; 566 sbi->s_flags = mount_flags;
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 587ef5123cd8..7ef637d7f3a5 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -351,9 +351,7 @@ void afs_dispatch_give_up_callbacks(struct work_struct *work)
351 */ 351 */
352void afs_flush_callback_breaks(struct afs_server *server) 352void afs_flush_callback_breaks(struct afs_server *server)
353{ 353{
354 cancel_delayed_work(&server->cb_break_work); 354 mod_delayed_work(afs_callback_update_worker, &server->cb_break_work, 0);
355 queue_delayed_work(afs_callback_update_worker,
356 &server->cb_break_work, 0);
357} 355}
358 356
359#if 0 357#if 0
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d59b7516e943..f342acf3547d 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -285,12 +285,7 @@ static void afs_reap_server(struct work_struct *work)
285 expiry = server->time_of_death + afs_server_timeout; 285 expiry = server->time_of_death + afs_server_timeout;
286 if (expiry > now) { 286 if (expiry > now) {
287 delay = (expiry - now) * HZ; 287 delay = (expiry - now) * HZ;
288 if (!queue_delayed_work(afs_wq, &afs_server_reaper, 288 mod_delayed_work(afs_wq, &afs_server_reaper, delay);
289 delay)) {
290 cancel_delayed_work(&afs_server_reaper);
291 queue_delayed_work(afs_wq, &afs_server_reaper,
292 delay);
293 }
294 break; 289 break;
295 } 290 }
296 291
@@ -323,6 +318,5 @@ static void afs_reap_server(struct work_struct *work)
323void __exit afs_purge_servers(void) 318void __exit afs_purge_servers(void)
324{ 319{
325 afs_server_timeout = 0; 320 afs_server_timeout = 0;
326 cancel_delayed_work(&afs_server_reaper); 321 mod_delayed_work(afs_wq, &afs_server_reaper, 0);
327 queue_delayed_work(afs_wq, &afs_server_reaper, 0);
328} 322}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index df8c6047c2a1..43165009428d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -123,6 +123,11 @@ void __exit afs_fs_exit(void)
123 BUG(); 123 BUG();
124 } 124 }
125 125
126 /*
127 * Make sure all delayed rcu free inodes are flushed before we
128 * destroy cache.
129 */
130 rcu_barrier();
126 kmem_cache_destroy(afs_inode_cachep); 131 kmem_cache_destroy(afs_inode_cachep);
127 _leave(""); 132 _leave("");
128} 133}
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 431984d2e372..57bcb1596530 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -561,12 +561,7 @@ static void afs_vlocation_reaper(struct work_struct *work)
561 if (expiry > now) { 561 if (expiry > now) {
562 delay = (expiry - now) * HZ; 562 delay = (expiry - now) * HZ;
563 _debug("delay %lu", delay); 563 _debug("delay %lu", delay);
564 if (!queue_delayed_work(afs_wq, &afs_vlocation_reap, 564 mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
565 delay)) {
566 cancel_delayed_work(&afs_vlocation_reap);
567 queue_delayed_work(afs_wq, &afs_vlocation_reap,
568 delay);
569 }
570 break; 565 break;
571 } 566 }
572 567
@@ -614,13 +609,10 @@ void afs_vlocation_purge(void)
614 spin_lock(&afs_vlocation_updates_lock); 609 spin_lock(&afs_vlocation_updates_lock);
615 list_del_init(&afs_vlocation_updates); 610 list_del_init(&afs_vlocation_updates);
616 spin_unlock(&afs_vlocation_updates_lock); 611 spin_unlock(&afs_vlocation_updates_lock);
617 cancel_delayed_work(&afs_vlocation_update); 612 mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
618 queue_delayed_work(afs_vlocation_update_worker,
619 &afs_vlocation_update, 0);
620 destroy_workqueue(afs_vlocation_update_worker); 613 destroy_workqueue(afs_vlocation_update_worker);
621 614
622 cancel_delayed_work(&afs_vlocation_reap); 615 mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
623 queue_delayed_work(afs_wq, &afs_vlocation_reap, 0);
624} 616}
625 617
626/* 618/*
diff --git a/fs/attr.c b/fs/attr.c
index 29e38a1f7f77..cce7df53b694 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,6 +14,7 @@
14#include <linux/fcntl.h> 14#include <linux/fcntl.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/evm.h> 16#include <linux/evm.h>
17#include <linux/ima.h>
17 18
18/** 19/**
19 * inode_change_ok - check if attribute changes to an inode are allowed 20 * inode_change_ok - check if attribute changes to an inode are allowed
@@ -247,6 +248,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
247 248
248 if (!error) { 249 if (!error) {
249 fsnotify_change(dentry, ia_valid); 250 fsnotify_change(dentry, ia_valid);
251 ima_inode_post_setattr(dentry);
250 evm_inode_post_setattr(dentry, ia_valid); 252 evm_inode_post_setattr(dentry, ia_valid);
251 } 253 }
252 254
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index abf645c1703b..a16214109d31 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -221,20 +221,6 @@ static int test_by_type(struct path *path, void *p)
221 return ino && ino->sbi->type & *(unsigned *)p; 221 return ino && ino->sbi->type & *(unsigned *)p;
222} 222}
223 223
224static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
225{
226 struct files_struct *files = current->files;
227 struct fdtable *fdt;
228
229 spin_lock(&files->file_lock);
230 fdt = files_fdtable(files);
231 BUG_ON(fdt->fd[fd] != NULL);
232 rcu_assign_pointer(fdt->fd[fd], file);
233 __set_close_on_exec(fd, fdt);
234 spin_unlock(&files->file_lock);
235}
236
237
238/* 224/*
239 * Open a file descriptor on the autofs mount point corresponding 225 * Open a file descriptor on the autofs mount point corresponding
240 * to the given path and device number (aka. new_encode_dev(sb->s_dev)). 226 * to the given path and device number (aka. new_encode_dev(sb->s_dev)).
@@ -243,7 +229,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
243{ 229{
244 int err, fd; 230 int err, fd;
245 231
246 fd = get_unused_fd(); 232 fd = get_unused_fd_flags(O_CLOEXEC);
247 if (likely(fd >= 0)) { 233 if (likely(fd >= 0)) {
248 struct file *filp; 234 struct file *filp;
249 struct path path; 235 struct path path;
@@ -264,7 +250,7 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
264 goto out; 250 goto out;
265 } 251 }
266 252
267 autofs_dev_ioctl_fd_install(fd, filp); 253 fd_install(fd, filp);
268 } 254 }
269 255
270 return fd; 256 return fd;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 8c0e56d92938..842d00048a65 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -399,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
399 DPRINTK("checking mountpoint %p %.*s", 399 DPRINTK("checking mountpoint %p %.*s",
400 dentry, (int)dentry->d_name.len, dentry->d_name.name); 400 dentry, (int)dentry->d_name.len, dentry->d_name.name);
401 401
402 /* Path walk currently on this dentry? */
403 ino_count = atomic_read(&ino->count) + 2;
404 if (dentry->d_count > ino_count)
405 goto next;
406
407 /* Can we umount this guy */ 402 /* Can we umount this guy */
408 if (autofs4_mount_busy(mnt, dentry)) 403 if (autofs4_mount_busy(mnt, dentry))
409 goto next; 404 goto next;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7b..dce436e595c1 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -175,8 +175,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
175 return; 175 return;
176 } 176 }
177 177
178 pipe = sbi->pipe; 178 pipe = get_file(sbi->pipe);
179 get_file(pipe);
180 179
181 mutex_unlock(&sbi->wq_mutex); 180 mutex_unlock(&sbi->wq_mutex);
182 181
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index d9a40abda6b7..b26642839156 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -20,8 +20,8 @@ typedef u64 befs_blocknr_t;
20 */ 20 */
21 21
22typedef struct befs_mount_options { 22typedef struct befs_mount_options {
23 gid_t gid; 23 kgid_t gid;
24 uid_t uid; 24 kuid_t uid;
25 int use_gid; 25 int use_gid;
26 int use_uid; 26 int use_uid;
27 int debug; 27 int debug;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index cf7f3c67c8b7..2b3bda8d5e68 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -15,6 +15,7 @@
15#include <linux/vfs.h> 15#include <linux/vfs.h>
16#include <linux/parser.h> 16#include <linux/parser.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/sched.h>
18 19
19#include "befs.h" 20#include "befs.h"
20#include "btree.h" 21#include "btree.h"
@@ -352,9 +353,11 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
352 */ 353 */
353 354
354 inode->i_uid = befs_sb->mount_opts.use_uid ? 355 inode->i_uid = befs_sb->mount_opts.use_uid ?
355 befs_sb->mount_opts.uid : (uid_t) fs32_to_cpu(sb, raw_inode->uid); 356 befs_sb->mount_opts.uid :
357 make_kuid(&init_user_ns, fs32_to_cpu(sb, raw_inode->uid));
356 inode->i_gid = befs_sb->mount_opts.use_gid ? 358 inode->i_gid = befs_sb->mount_opts.use_gid ?
357 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid); 359 befs_sb->mount_opts.gid :
360 make_kgid(&init_user_ns, fs32_to_cpu(sb, raw_inode->gid));
358 361
359 set_nlink(inode, 1); 362 set_nlink(inode, 1);
360 363
@@ -454,6 +457,11 @@ befs_init_inodecache(void)
454static void 457static void
455befs_destroy_inodecache(void) 458befs_destroy_inodecache(void)
456{ 459{
460 /*
461 * Make sure all delayed rcu free inodes are flushed before we
462 * destroy cache.
463 */
464 rcu_barrier();
457 kmem_cache_destroy(befs_inode_cachep); 465 kmem_cache_destroy(befs_inode_cachep);
458} 466}
459 467
@@ -674,10 +682,12 @@ parse_options(char *options, befs_mount_options * opts)
674 char *p; 682 char *p;
675 substring_t args[MAX_OPT_ARGS]; 683 substring_t args[MAX_OPT_ARGS];
676 int option; 684 int option;
685 kuid_t uid;
686 kgid_t gid;
677 687
678 /* Initialize options */ 688 /* Initialize options */
679 opts->uid = 0; 689 opts->uid = GLOBAL_ROOT_UID;
680 opts->gid = 0; 690 opts->gid = GLOBAL_ROOT_GID;
681 opts->use_uid = 0; 691 opts->use_uid = 0;
682 opts->use_gid = 0; 692 opts->use_gid = 0;
683 opts->iocharset = NULL; 693 opts->iocharset = NULL;
@@ -696,23 +706,29 @@ parse_options(char *options, befs_mount_options * opts)
696 case Opt_uid: 706 case Opt_uid:
697 if (match_int(&args[0], &option)) 707 if (match_int(&args[0], &option))
698 return 0; 708 return 0;
699 if (option < 0) { 709 uid = INVALID_UID;
710 if (option >= 0)
711 uid = make_kuid(current_user_ns(), option);
712 if (!uid_valid(uid)) {
700 printk(KERN_ERR "BeFS: Invalid uid %d, " 713 printk(KERN_ERR "BeFS: Invalid uid %d, "
701 "using default\n", option); 714 "using default\n", option);
702 break; 715 break;
703 } 716 }
704 opts->uid = option; 717 opts->uid = uid;
705 opts->use_uid = 1; 718 opts->use_uid = 1;
706 break; 719 break;
707 case Opt_gid: 720 case Opt_gid:
708 if (match_int(&args[0], &option)) 721 if (match_int(&args[0], &option))
709 return 0; 722 return 0;
710 if (option < 0) { 723 gid = INVALID_GID;
724 if (option >= 0)
725 gid = make_kgid(current_user_ns(), option);
726 if (!gid_valid(gid)) {
711 printk(KERN_ERR "BeFS: Invalid gid %d, " 727 printk(KERN_ERR "BeFS: Invalid gid %d, "
712 "using default\n", option); 728 "using default\n", option);
713 break; 729 break;
714 } 730 }
715 opts->gid = option; 731 opts->gid = gid;
716 opts->use_gid = 1; 732 opts->use_gid = 1;
717 break; 733 break;
718 case Opt_charset: 734 case Opt_charset:
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 9870417c26e7..737aaa3f7090 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -76,8 +76,8 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
76 BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); 76 BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock);
77 BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); 77 BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock);
78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); 78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
79 inode->i_uid = le32_to_cpu(di->i_uid); 79 i_uid_write(inode, le32_to_cpu(di->i_uid));
80 inode->i_gid = le32_to_cpu(di->i_gid); 80 i_gid_write(inode, le32_to_cpu(di->i_gid));
81 set_nlink(inode, le32_to_cpu(di->i_nlink)); 81 set_nlink(inode, le32_to_cpu(di->i_nlink));
82 inode->i_size = BFS_FILESIZE(di); 82 inode->i_size = BFS_FILESIZE(di);
83 inode->i_blocks = BFS_FILEBLOCKS(di); 83 inode->i_blocks = BFS_FILEBLOCKS(di);
@@ -139,8 +139,8 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
139 139
140 di->i_ino = cpu_to_le16(ino); 140 di->i_ino = cpu_to_le16(ino);
141 di->i_mode = cpu_to_le32(inode->i_mode); 141 di->i_mode = cpu_to_le32(inode->i_mode);
142 di->i_uid = cpu_to_le32(inode->i_uid); 142 di->i_uid = cpu_to_le32(i_uid_read(inode));
143 di->i_gid = cpu_to_le32(inode->i_gid); 143 di->i_gid = cpu_to_le32(i_gid_read(inode));
144 di->i_nlink = cpu_to_le32(inode->i_nlink); 144 di->i_nlink = cpu_to_le32(inode->i_nlink);
145 di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 145 di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
146 di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 146 di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -280,6 +280,11 @@ static int init_inodecache(void)
280 280
281static void destroy_inodecache(void) 281static void destroy_inodecache(void)
282{ 282{
283 /*
284 * Make sure all delayed rcu free inodes are flushed before we
285 * destroy cache.
286 */
287 rcu_barrier();
283 kmem_cache_destroy(bfs_inode_cachep); 288 kmem_cache_destroy(bfs_inode_cachep);
284} 289}
285 290
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index d146e181d10d..0e7a6f81ae36 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,31 +32,8 @@
32 32
33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); 33static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
34static int load_aout_library(struct file*); 34static int load_aout_library(struct file*);
35static int aout_core_dump(struct coredump_params *cprm);
36
37static struct linux_binfmt aout_format = {
38 .module = THIS_MODULE,
39 .load_binary = load_aout_binary,
40 .load_shlib = load_aout_library,
41 .core_dump = aout_core_dump,
42 .min_coredump = PAGE_SIZE
43};
44
45#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
46
47static int set_brk(unsigned long start, unsigned long end)
48{
49 start = PAGE_ALIGN(start);
50 end = PAGE_ALIGN(end);
51 if (end > start) {
52 unsigned long addr;
53 addr = vm_brk(start, end - start);
54 if (BAD_ADDR(addr))
55 return addr;
56 }
57 return 0;
58}
59 35
36#ifdef CONFIG_COREDUMP
60/* 37/*
61 * Routine writes a core dump image in the current directory. 38 * Routine writes a core dump image in the current directory.
62 * Currently only a stub-function. 39 * Currently only a stub-function.
@@ -66,7 +43,6 @@ static int set_brk(unsigned long start, unsigned long end)
66 * field, which also makes sure the core-dumps won't be recursive if the 43 * field, which also makes sure the core-dumps won't be recursive if the
67 * dumping of the process results in another error.. 44 * dumping of the process results in another error..
68 */ 45 */
69
70static int aout_core_dump(struct coredump_params *cprm) 46static int aout_core_dump(struct coredump_params *cprm)
71{ 47{
72 struct file *file = cprm->file; 48 struct file *file = cprm->file;
@@ -89,7 +65,7 @@ static int aout_core_dump(struct coredump_params *cprm)
89 current->flags |= PF_DUMPCORE; 65 current->flags |= PF_DUMPCORE;
90 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); 66 strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
91 dump.u_ar0 = offsetof(struct user, regs); 67 dump.u_ar0 = offsetof(struct user, regs);
92 dump.signal = cprm->signr; 68 dump.signal = cprm->siginfo->si_signo;
93 aout_dump_thread(cprm->regs, &dump); 69 aout_dump_thread(cprm->regs, &dump);
94 70
95/* If the size of the dump file exceeds the rlimit, then see what would happen 71/* If the size of the dump file exceeds the rlimit, then see what would happen
@@ -135,6 +111,32 @@ end_coredump:
135 set_fs(fs); 111 set_fs(fs);
136 return has_dumped; 112 return has_dumped;
137} 113}
114#else
115#define aout_core_dump NULL
116#endif
117
118static struct linux_binfmt aout_format = {
119 .module = THIS_MODULE,
120 .load_binary = load_aout_binary,
121 .load_shlib = load_aout_library,
122 .core_dump = aout_core_dump,
123 .min_coredump = PAGE_SIZE
124};
125
126#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
127
128static int set_brk(unsigned long start, unsigned long end)
129{
130 start = PAGE_ALIGN(start);
131 end = PAGE_ALIGN(end);
132 if (end > start) {
133 unsigned long addr;
134 addr = vm_brk(start, end - start);
135 if (BAD_ADDR(addr))
136 return addr;
137 }
138 return 0;
139}
138 140
139/* 141/*
140 * create_aout_tables() parses the env- and arg-strings in new user 142 * create_aout_tables() parses the env- and arg-strings in new user
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1b52956afe33..e800dec958c3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -27,6 +27,7 @@
27#include <linux/compiler.h> 27#include <linux/compiler.h>
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/vmalloc.h>
30#include <linux/security.h> 31#include <linux/security.h>
31#include <linux/random.h> 32#include <linux/random.h>
32#include <linux/elf.h> 33#include <linux/elf.h>
@@ -37,6 +38,13 @@
37#include <asm/page.h> 38#include <asm/page.h>
38#include <asm/exec.h> 39#include <asm/exec.h>
39 40
41#ifndef user_long_t
42#define user_long_t long
43#endif
44#ifndef user_siginfo_t
45#define user_siginfo_t siginfo_t
46#endif
47
40static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); 48static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
41static int load_elf_library(struct file *); 49static int load_elf_library(struct file *);
42static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, 50static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
@@ -881,7 +889,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
881 } 889 }
882 890
883 if (elf_interpreter) { 891 if (elf_interpreter) {
884 unsigned long uninitialized_var(interp_map_addr); 892 unsigned long interp_map_addr = 0;
885 893
886 elf_entry = load_elf_interp(&loc->interp_elf_ex, 894 elf_entry = load_elf_interp(&loc->interp_elf_ex,
887 interpreter, 895 interpreter,
@@ -1115,7 +1123,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1115 if (always_dump_vma(vma)) 1123 if (always_dump_vma(vma))
1116 goto whole; 1124 goto whole;
1117 1125
1118 if (vma->vm_flags & VM_NODUMP) 1126 if (vma->vm_flags & VM_DONTDUMP)
1119 return 0; 1127 return 0;
1120 1128
1121 /* Hugetlb memory check */ 1129 /* Hugetlb memory check */
@@ -1127,7 +1135,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
1127 } 1135 }
1128 1136
1129 /* Do not dump I/O mapped devices or special mappings */ 1137 /* Do not dump I/O mapped devices or special mappings */
1130 if (vma->vm_flags & (VM_IO | VM_RESERVED)) 1138 if (vma->vm_flags & VM_IO)
1131 return 0; 1139 return 0;
1132 1140
1133 /* By default, dump shared memory if mapped from an anonymous file. */ 1141 /* By default, dump shared memory if mapped from an anonymous file. */
@@ -1372,6 +1380,103 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1372 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); 1380 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1373} 1381}
1374 1382
1383static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1384 siginfo_t *siginfo)
1385{
1386 mm_segment_t old_fs = get_fs();
1387 set_fs(KERNEL_DS);
1388 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1389 set_fs(old_fs);
1390 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1391}
1392
1393#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1394/*
1395 * Format of NT_FILE note:
1396 *
1397 * long count -- how many files are mapped
1398 * long page_size -- units for file_ofs
1399 * array of [COUNT] elements of
1400 * long start
1401 * long end
1402 * long file_ofs
1403 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1404 */
1405static void fill_files_note(struct memelfnote *note)
1406{
1407 struct vm_area_struct *vma;
1408 unsigned count, size, names_ofs, remaining, n;
1409 user_long_t *data;
1410 user_long_t *start_end_ofs;
1411 char *name_base, *name_curpos;
1412
1413 /* *Estimated* file count and total data size needed */
1414 count = current->mm->map_count;
1415 size = count * 64;
1416
1417 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1418 alloc:
1419 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1420 goto err;
1421 size = round_up(size, PAGE_SIZE);
1422 data = vmalloc(size);
1423 if (!data)
1424 goto err;
1425
1426 start_end_ofs = data + 2;
1427 name_base = name_curpos = ((char *)data) + names_ofs;
1428 remaining = size - names_ofs;
1429 count = 0;
1430 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1431 struct file *file;
1432 const char *filename;
1433
1434 file = vma->vm_file;
1435 if (!file)
1436 continue;
1437 filename = d_path(&file->f_path, name_curpos, remaining);
1438 if (IS_ERR(filename)) {
1439 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1440 vfree(data);
1441 size = size * 5 / 4;
1442 goto alloc;
1443 }
1444 continue;
1445 }
1446
1447 /* d_path() fills at the end, move name down */
1448 /* n = strlen(filename) + 1: */
1449 n = (name_curpos + remaining) - filename;
1450 remaining = filename - name_curpos;
1451 memmove(name_curpos, filename, n);
1452 name_curpos += n;
1453
1454 *start_end_ofs++ = vma->vm_start;
1455 *start_end_ofs++ = vma->vm_end;
1456 *start_end_ofs++ = vma->vm_pgoff;
1457 count++;
1458 }
1459
1460 /* Now we know exact count of files, can store it */
1461 data[0] = count;
1462 data[1] = PAGE_SIZE;
1463 /*
1464 * Count usually is less than current->mm->map_count,
1465 * we need to move filenames down.
1466 */
1467 n = current->mm->map_count - count;
1468 if (n != 0) {
1469 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1470 memmove(name_base - shift_bytes, name_base,
1471 name_curpos - name_base);
1472 name_curpos -= shift_bytes;
1473 }
1474
1475 size = name_curpos - (char *)data;
1476 fill_note(note, "CORE", NT_FILE, size, data);
1477 err: ;
1478}
1479
1375#ifdef CORE_DUMP_USE_REGSET 1480#ifdef CORE_DUMP_USE_REGSET
1376#include <linux/regset.h> 1481#include <linux/regset.h>
1377 1482
@@ -1385,7 +1490,10 @@ struct elf_thread_core_info {
1385struct elf_note_info { 1490struct elf_note_info {
1386 struct elf_thread_core_info *thread; 1491 struct elf_thread_core_info *thread;
1387 struct memelfnote psinfo; 1492 struct memelfnote psinfo;
1493 struct memelfnote signote;
1388 struct memelfnote auxv; 1494 struct memelfnote auxv;
1495 struct memelfnote files;
1496 user_siginfo_t csigdata;
1389 size_t size; 1497 size_t size;
1390 int thread_notes; 1498 int thread_notes;
1391}; 1499};
@@ -1480,7 +1588,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
1480 1588
1481static int fill_note_info(struct elfhdr *elf, int phdrs, 1589static int fill_note_info(struct elfhdr *elf, int phdrs,
1482 struct elf_note_info *info, 1590 struct elf_note_info *info,
1483 long signr, struct pt_regs *regs) 1591 siginfo_t *siginfo, struct pt_regs *regs)
1484{ 1592{
1485 struct task_struct *dump_task = current; 1593 struct task_struct *dump_task = current;
1486 const struct user_regset_view *view = task_user_regset_view(dump_task); 1594 const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -1550,7 +1658,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1550 * Now fill in each thread's information. 1658 * Now fill in each thread's information.
1551 */ 1659 */
1552 for (t = info->thread; t != NULL; t = t->next) 1660 for (t = info->thread; t != NULL; t = t->next)
1553 if (!fill_thread_core_info(t, view, signr, &info->size)) 1661 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1554 return 0; 1662 return 0;
1555 1663
1556 /* 1664 /*
@@ -1559,9 +1667,15 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1559 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); 1667 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1560 info->size += notesize(&info->psinfo); 1668 info->size += notesize(&info->psinfo);
1561 1669
1670 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1671 info->size += notesize(&info->signote);
1672
1562 fill_auxv_note(&info->auxv, current->mm); 1673 fill_auxv_note(&info->auxv, current->mm);
1563 info->size += notesize(&info->auxv); 1674 info->size += notesize(&info->auxv);
1564 1675
1676 fill_files_note(&info->files);
1677 info->size += notesize(&info->files);
1678
1565 return 1; 1679 return 1;
1566} 1680}
1567 1681
@@ -1588,8 +1702,12 @@ static int write_note_info(struct elf_note_info *info,
1588 1702
1589 if (first && !writenote(&info->psinfo, file, foffset)) 1703 if (first && !writenote(&info->psinfo, file, foffset))
1590 return 0; 1704 return 0;
1705 if (first && !writenote(&info->signote, file, foffset))
1706 return 0;
1591 if (first && !writenote(&info->auxv, file, foffset)) 1707 if (first && !writenote(&info->auxv, file, foffset))
1592 return 0; 1708 return 0;
1709 if (first && !writenote(&info->files, file, foffset))
1710 return 0;
1593 1711
1594 for (i = 1; i < info->thread_notes; ++i) 1712 for (i = 1; i < info->thread_notes; ++i)
1595 if (t->notes[i].data && 1713 if (t->notes[i].data &&
@@ -1616,6 +1734,7 @@ static void free_note_info(struct elf_note_info *info)
1616 kfree(t); 1734 kfree(t);
1617 } 1735 }
1618 kfree(info->psinfo.data); 1736 kfree(info->psinfo.data);
1737 vfree(info->files.data);
1619} 1738}
1620 1739
1621#else 1740#else
@@ -1681,6 +1800,7 @@ struct elf_note_info {
1681#ifdef ELF_CORE_COPY_XFPREGS 1800#ifdef ELF_CORE_COPY_XFPREGS
1682 elf_fpxregset_t *xfpu; 1801 elf_fpxregset_t *xfpu;
1683#endif 1802#endif
1803 user_siginfo_t csigdata;
1684 int thread_status_size; 1804 int thread_status_size;
1685 int numnote; 1805 int numnote;
1686}; 1806};
@@ -1690,48 +1810,37 @@ static int elf_note_info_init(struct elf_note_info *info)
1690 memset(info, 0, sizeof(*info)); 1810 memset(info, 0, sizeof(*info));
1691 INIT_LIST_HEAD(&info->thread_list); 1811 INIT_LIST_HEAD(&info->thread_list);
1692 1812
1693 /* Allocate space for six ELF notes */ 1813 /* Allocate space for ELF notes */
1694 info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); 1814 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1695 if (!info->notes) 1815 if (!info->notes)
1696 return 0; 1816 return 0;
1697 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1817 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698 if (!info->psinfo) 1818 if (!info->psinfo)
1699 goto notes_free; 1819 return 0;
1700 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1820 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701 if (!info->prstatus) 1821 if (!info->prstatus)
1702 goto psinfo_free; 1822 return 0;
1703 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1823 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704 if (!info->fpu) 1824 if (!info->fpu)
1705 goto prstatus_free; 1825 return 0;
1706#ifdef ELF_CORE_COPY_XFPREGS 1826#ifdef ELF_CORE_COPY_XFPREGS
1707 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1827 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708 if (!info->xfpu) 1828 if (!info->xfpu)
1709 goto fpu_free; 1829 return 0;
1710#endif 1830#endif
1711 return 1; 1831 return 1;
1712#ifdef ELF_CORE_COPY_XFPREGS
1713 fpu_free:
1714 kfree(info->fpu);
1715#endif
1716 prstatus_free:
1717 kfree(info->prstatus);
1718 psinfo_free:
1719 kfree(info->psinfo);
1720 notes_free:
1721 kfree(info->notes);
1722 return 0;
1723} 1832}
1724 1833
1725static int fill_note_info(struct elfhdr *elf, int phdrs, 1834static int fill_note_info(struct elfhdr *elf, int phdrs,
1726 struct elf_note_info *info, 1835 struct elf_note_info *info,
1727 long signr, struct pt_regs *regs) 1836 siginfo_t *siginfo, struct pt_regs *regs)
1728{ 1837{
1729 struct list_head *t; 1838 struct list_head *t;
1730 1839
1731 if (!elf_note_info_init(info)) 1840 if (!elf_note_info_init(info))
1732 return 0; 1841 return 0;
1733 1842
1734 if (signr) { 1843 if (siginfo->si_signo) {
1735 struct core_thread *ct; 1844 struct core_thread *ct;
1736 struct elf_thread_status *ets; 1845 struct elf_thread_status *ets;
1737 1846
@@ -1749,13 +1858,13 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1749 int sz; 1858 int sz;
1750 1859
1751 ets = list_entry(t, struct elf_thread_status, list); 1860 ets = list_entry(t, struct elf_thread_status, list);
1752 sz = elf_dump_thread_status(signr, ets); 1861 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1753 info->thread_status_size += sz; 1862 info->thread_status_size += sz;
1754 } 1863 }
1755 } 1864 }
1756 /* now collect the dump for the current */ 1865 /* now collect the dump for the current */
1757 memset(info->prstatus, 0, sizeof(*info->prstatus)); 1866 memset(info->prstatus, 0, sizeof(*info->prstatus));
1758 fill_prstatus(info->prstatus, current, signr); 1867 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1759 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 1868 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1760 1869
1761 /* Set up header */ 1870 /* Set up header */
@@ -1772,9 +1881,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1772 fill_note(info->notes + 1, "CORE", NT_PRPSINFO, 1881 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1773 sizeof(*info->psinfo), info->psinfo); 1882 sizeof(*info->psinfo), info->psinfo);
1774 1883
1775 info->numnote = 2; 1884 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1885 fill_auxv_note(info->notes + 3, current->mm);
1886 fill_files_note(info->notes + 4);
1776 1887
1777 fill_auxv_note(&info->notes[info->numnote++], current->mm); 1888 info->numnote = 5;
1778 1889
1779 /* Try to dump the FPU. */ 1890 /* Try to dump the FPU. */
1780 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, 1891 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1836,6 +1947,9 @@ static void free_note_info(struct elf_note_info *info)
1836 kfree(list_entry(tmp, struct elf_thread_status, list)); 1947 kfree(list_entry(tmp, struct elf_thread_status, list));
1837 } 1948 }
1838 1949
1950 /* Free data allocated by fill_files_note(): */
1951 vfree(info->notes[4].data);
1952
1839 kfree(info->prstatus); 1953 kfree(info->prstatus);
1840 kfree(info->psinfo); 1954 kfree(info->psinfo);
1841 kfree(info->notes); 1955 kfree(info->notes);
@@ -1962,7 +2076,7 @@ static int elf_core_dump(struct coredump_params *cprm)
1962 * Collect all the non-memory information about the process for the 2076 * Collect all the non-memory information about the process for the
1963 * notes. This also sets up the file header. 2077 * notes. This also sets up the file header.
1964 */ 2078 */
1965 if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs)) 2079 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
1966 goto cleanup; 2080 goto cleanup;
1967 2081
1968 has_dumped = 1; 2082 has_dumped = 1;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 3d77cf81ba3c..262db114ff01 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1205,7 +1205,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
1205 int dump_ok; 1205 int dump_ok;
1206 1206
1207 /* Do not dump I/O mapped devices or special mappings */ 1207 /* Do not dump I/O mapped devices or special mappings */
1208 if (vma->vm_flags & (VM_IO | VM_RESERVED)) { 1208 if (vma->vm_flags & VM_IO) {
1209 kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags); 1209 kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
1210 return 0; 1210 return 0;
1211 } 1211 }
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1642 goto cleanup; 1642 goto cleanup;
1643#endif 1643#endif
1644 1644
1645 if (cprm->signr) { 1645 if (cprm->siginfo->si_signo) {
1646 struct core_thread *ct; 1646 struct core_thread *ct;
1647 struct elf_thread_status *tmp; 1647 struct elf_thread_status *tmp;
1648 1648
@@ -1661,13 +1661,13 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1661 int sz; 1661 int sz;
1662 1662
1663 tmp = list_entry(t, struct elf_thread_status, list); 1663 tmp = list_entry(t, struct elf_thread_status, list);
1664 sz = elf_dump_thread_status(cprm->signr, tmp); 1664 sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
1665 thread_status_size += sz; 1665 thread_status_size += sz;
1666 } 1666 }
1667 } 1667 }
1668 1668
1669 /* now collect the dump for the current */ 1669 /* now collect the dump for the current */
1670 fill_prstatus(prstatus, current, cprm->signr); 1670 fill_prstatus(prstatus, current, cprm->siginfo->si_signo);
1671 elf_core_copy_regs(&prstatus->pr_reg, cprm->regs); 1671 elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
1672 1672
1673 segs = current->mm->map_count; 1673 segs = current->mm->map_count;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 178cb70acc26..e280352b28f9 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -107,7 +107,7 @@ static struct linux_binfmt flat_format = {
107static int flat_core_dump(struct coredump_params *cprm) 107static int flat_core_dump(struct coredump_params *cprm)
108{ 108{
109 printk("Process %s:%d received signr %d and should have core dumped\n", 109 printk("Process %s:%d received signr %d and should have core dumped\n",
110 current->comm, current->pid, (int) cprm->signr); 110 current->comm, current->pid, (int) cprm->siginfo->si_signo);
111 return(1); 111 return(1);
112} 112}
113 113
diff --git a/fs/bio.c b/fs/bio.c
index 5eaa70c9d96e..71072ab99128 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{ 73{
74 unsigned int sz = sizeof(struct bio) + extra_size; 74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL; 75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab; 76 struct bio_slab *bslab, *new_bio_slabs;
77 unsigned int i, entry = -1; 77 unsigned int i, entry = -1;
78 78
79 mutex_lock(&bio_slab_lock); 79 mutex_lock(&bio_slab_lock);
@@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
97 97
98 if (bio_slab_nr == bio_slab_max && entry == -1) { 98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1; 99 bio_slab_max <<= 1;
100 bio_slabs = krealloc(bio_slabs, 100 new_bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab), 101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL); 102 GFP_KERNEL);
103 if (!bio_slabs) 103 if (!new_bio_slabs)
104 goto out_unlock; 104 goto out_unlock;
105 bio_slabs = new_bio_slabs;
105 } 106 }
106 if (entry == -1) 107 if (entry == -1)
107 entry = bio_slab_nr++; 108 entry = bio_slab_nr++;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e519195d45b..38e721b35d45 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1578 unsigned long nr_segs, loff_t pos) 1578 unsigned long nr_segs, loff_t pos)
1579{ 1579{
1580 struct file *file = iocb->ki_filp; 1580 struct file *file = iocb->ki_filp;
1581 struct blk_plug plug;
1581 ssize_t ret; 1582 ssize_t ret;
1582 1583
1583 BUG_ON(iocb->ki_pos != pos); 1584 BUG_ON(iocb->ki_pos != pos);
1584 1585
1586 blk_start_plug(&plug);
1585 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1587 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1586 if (ret > 0 || ret == -EIOCBQUEUED) { 1588 if (ret > 0 || ret == -EIOCBQUEUED) {
1587 ssize_t err; 1589 ssize_t err;
@@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1590 if (err < 0 && ret > 0) 1592 if (err < 0 && ret > 0)
1591 ret = err; 1593 ret = err;
1592 } 1594 }
1595 blk_finish_plug(&plug);
1593 return ret; 1596 return ret;
1594} 1597}
1595EXPORT_SYMBOL_GPL(blkdev_aio_write); 1598EXPORT_SYMBOL_GPL(blkdev_aio_write);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 761e2cd8fed1..0c16e3dbfd56 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -61,7 +61,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
61 size = __btrfs_getxattr(inode, name, value, size); 61 size = __btrfs_getxattr(inode, name, value, size);
62 } 62 }
63 if (size > 0) { 63 if (size > 0) {
64 acl = posix_acl_from_xattr(value, size); 64 acl = posix_acl_from_xattr(&init_user_ns, value, size);
65 } else if (size == -ENOENT || size == -ENODATA || size == 0) { 65 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
66 /* FIXME, who returns -ENOENT? I think nobody */ 66 /* FIXME, who returns -ENOENT? I think nobody */
67 acl = NULL; 67 acl = NULL;
@@ -91,7 +91,7 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
91 return PTR_ERR(acl); 91 return PTR_ERR(acl);
92 if (acl == NULL) 92 if (acl == NULL)
93 return -ENODATA; 93 return -ENODATA;
94 ret = posix_acl_to_xattr(acl, value, size); 94 ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
95 posix_acl_release(acl); 95 posix_acl_release(acl);
96 96
97 return ret; 97 return ret;
@@ -141,7 +141,7 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
141 goto out; 141 goto out;
142 } 142 }
143 143
144 ret = posix_acl_to_xattr(acl, value, size); 144 ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
145 if (ret < 0) 145 if (ret < 0)
146 goto out; 146 goto out;
147 } 147 }
@@ -169,7 +169,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
169 return -EOPNOTSUPP; 169 return -EOPNOTSUPP;
170 170
171 if (value) { 171 if (value) {
172 acl = posix_acl_from_xattr(value, size); 172 acl = posix_acl_from_xattr(&init_user_ns, value, size);
173 if (IS_ERR(acl)) 173 if (IS_ERR(acl))
174 return PTR_ERR(acl); 174 return PTR_ERR(acl);
175 175
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index a256f3b2a845..ff6475f409d6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1438,10 +1438,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
1438 ret = extent_from_logical(fs_info, logical, path, 1438 ret = extent_from_logical(fs_info, logical, path,
1439 &found_key); 1439 &found_key);
1440 btrfs_release_path(path); 1440 btrfs_release_path(path);
1441 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1442 ret = -EINVAL;
1443 if (ret < 0) 1441 if (ret < 0)
1444 return ret; 1442 return ret;
1443 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1444 return -EINVAL;
1445 1445
1446 extent_item_pos = logical - found_key.objectid; 1446 extent_item_pos = logical - found_key.objectid;
1447 ret = iterate_extent_inodes(fs_info, found_key.objectid, 1447 ret = iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 86eff48dab78..43d1c5a3a030 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -818,6 +818,7 @@ static void free_workspace(int type, struct list_head *workspace)
818 btrfs_compress_op[idx]->free_workspace(workspace); 818 btrfs_compress_op[idx]->free_workspace(workspace);
819 atomic_dec(alloc_workspace); 819 atomic_dec(alloc_workspace);
820wake: 820wake:
821 smp_mb();
821 if (waitqueue_active(workspace_wait)) 822 if (waitqueue_active(workspace_wait))
822 wake_up(workspace_wait); 823 wake_up(workspace_wait);
823} 824}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f271ff..6d183f60d63a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
421 spin_unlock(&fs_info->tree_mod_seq_lock); 421 spin_unlock(&fs_info->tree_mod_seq_lock);
422 422
423 /* 423 /*
424 * we removed the lowest blocker from the blocker list, so there may be
425 * more processible delayed refs.
426 */
427 wake_up(&fs_info->tree_mod_seq_wait);
428
429 /*
430 * anything that's lower than the lowest existing (read: blocked) 424 * anything that's lower than the lowest existing (read: blocked)
431 * sequence number can be removed from the tree. 425 * sequence number can be removed from the tree.
432 */ 426 */
@@ -631,6 +625,9 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
631 u32 nritems; 625 u32 nritems;
632 int ret; 626 int ret;
633 627
628 if (btrfs_header_level(eb) == 0)
629 return;
630
634 nritems = btrfs_header_nritems(eb); 631 nritems = btrfs_header_nritems(eb);
635 for (i = nritems - 1; i >= 0; i--) { 632 for (i = nritems - 1; i >= 0; i--) {
636 ret = tree_mod_log_insert_key_locked(fs_info, eb, i, 633 ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4bab807227ad..9821b672f5a2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -116,7 +116,7 @@ struct btrfs_ordered_sum;
116#define BTRFS_FREE_SPACE_OBJECTID -11ULL 116#define BTRFS_FREE_SPACE_OBJECTID -11ULL
117 117
118/* 118/*
119 * The inode number assigned to the special inode for sotring 119 * The inode number assigned to the special inode for storing
120 * free ino cache 120 * free ino cache
121 */ 121 */
122#define BTRFS_FREE_INO_OBJECTID -12ULL 122#define BTRFS_FREE_INO_OBJECTID -12ULL
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
1252 atomic_t tree_mod_seq; 1252 atomic_t tree_mod_seq;
1253 struct list_head tree_mod_seq_list; 1253 struct list_head tree_mod_seq_list;
1254 struct seq_list tree_mod_seq_elem; 1254 struct seq_list tree_mod_seq_elem;
1255 wait_queue_head_t tree_mod_seq_wait;
1256 1255
1257 /* this protects tree_mod_log */ 1256 /* this protects tree_mod_log */
1258 rwlock_t tree_mod_log_lock; 1257 rwlock_t tree_mod_log_lock;
@@ -3192,7 +3191,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
3192int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3191int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
3193 struct bio *bio, u32 *dst); 3192 struct bio *bio, u32 *dst);
3194int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3193int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
3195 struct bio *bio, u64 logical_offset, u32 *dst); 3194 struct bio *bio, u64 logical_offset);
3196int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3195int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3197 struct btrfs_root *root, 3196 struct btrfs_root *root,
3198 u64 objectid, u64 pos, 3197 u64 objectid, u64 pos,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 335605c8ceab..52c85e2b95d0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -512,8 +512,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
512 512
513 rb_erase(&delayed_item->rb_node, root); 513 rb_erase(&delayed_item->rb_node, root);
514 delayed_item->delayed_node->count--; 514 delayed_item->delayed_node->count--;
515 atomic_dec(&delayed_root->items); 515 if (atomic_dec_return(&delayed_root->items) <
516 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND && 516 BTRFS_DELAYED_BACKGROUND &&
517 waitqueue_active(&delayed_root->wait)) 517 waitqueue_active(&delayed_root->wait))
518 wake_up(&delayed_root->wait); 518 wake_up(&delayed_root->wait);
519} 519}
@@ -1028,9 +1028,10 @@ do_again:
1028 btrfs_release_delayed_item(prev); 1028 btrfs_release_delayed_item(prev);
1029 ret = 0; 1029 ret = 0;
1030 btrfs_release_path(path); 1030 btrfs_release_path(path);
1031 if (curr) 1031 if (curr) {
1032 mutex_unlock(&node->mutex);
1032 goto do_again; 1033 goto do_again;
1033 else 1034 } else
1034 goto delete_fail; 1035 goto delete_fail;
1035 } 1036 }
1036 1037
@@ -1055,8 +1056,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1055 delayed_node->count--; 1056 delayed_node->count--;
1056 1057
1057 delayed_root = delayed_node->root->fs_info->delayed_root; 1058 delayed_root = delayed_node->root->fs_info->delayed_root;
1058 atomic_dec(&delayed_root->items); 1059 if (atomic_dec_return(&delayed_root->items) <
1059 if (atomic_read(&delayed_root->items) <
1060 BTRFS_DELAYED_BACKGROUND && 1060 BTRFS_DELAYED_BACKGROUND &&
1061 waitqueue_active(&delayed_root->wait)) 1061 waitqueue_active(&delayed_root->wait))
1062 wake_up(&delayed_root->wait); 1062 wake_up(&delayed_root->wait);
@@ -1715,8 +1715,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1715 struct btrfs_inode_item *inode_item, 1715 struct btrfs_inode_item *inode_item,
1716 struct inode *inode) 1716 struct inode *inode)
1717{ 1717{
1718 btrfs_set_stack_inode_uid(inode_item, inode->i_uid); 1718 btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1719 btrfs_set_stack_inode_gid(inode_item, inode->i_gid); 1719 btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1720 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); 1720 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1721 btrfs_set_stack_inode_mode(inode_item, inode->i_mode); 1721 btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1722 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); 1722 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
@@ -1764,8 +1764,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1764 1764
1765 inode_item = &delayed_node->inode_item; 1765 inode_item = &delayed_node->inode_item;
1766 1766
1767 inode->i_uid = btrfs_stack_inode_uid(inode_item); 1767 i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1768 inode->i_gid = btrfs_stack_inode_gid(inode_item); 1768 i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1769 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); 1769 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1770 inode->i_mode = btrfs_stack_inode_mode(inode_item); 1770 inode->i_mode = btrfs_stack_inode_mode(inode_item);
1771 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1771 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419ed01bb..ae9411773397 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -38,17 +38,14 @@
38static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, 38static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
39 struct btrfs_delayed_tree_ref *ref1) 39 struct btrfs_delayed_tree_ref *ref1)
40{ 40{
41 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { 41 if (ref1->root < ref2->root)
42 if (ref1->root < ref2->root) 42 return -1;
43 return -1; 43 if (ref1->root > ref2->root)
44 if (ref1->root > ref2->root) 44 return 1;
45 return 1; 45 if (ref1->parent < ref2->parent)
46 } else { 46 return -1;
47 if (ref1->parent < ref2->parent) 47 if (ref1->parent > ref2->parent)
48 return -1; 48 return 1;
49 if (ref1->parent > ref2->parent)
50 return 1;
51 }
52 return 0; 49 return 0;
53} 50}
54 51
@@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
85 * type of the delayed backrefs and content of delayed backrefs. 82 * type of the delayed backrefs and content of delayed backrefs.
86 */ 83 */
87static int comp_entry(struct btrfs_delayed_ref_node *ref2, 84static int comp_entry(struct btrfs_delayed_ref_node *ref2,
88 struct btrfs_delayed_ref_node *ref1) 85 struct btrfs_delayed_ref_node *ref1,
86 bool compare_seq)
89{ 87{
90 if (ref1->bytenr < ref2->bytenr) 88 if (ref1->bytenr < ref2->bytenr)
91 return -1; 89 return -1;
@@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
102 if (ref1->type > ref2->type) 100 if (ref1->type > ref2->type)
103 return 1; 101 return 1;
104 /* merging of sequenced refs is not allowed */ 102 /* merging of sequenced refs is not allowed */
105 if (ref1->seq < ref2->seq) 103 if (compare_seq) {
106 return -1; 104 if (ref1->seq < ref2->seq)
107 if (ref1->seq > ref2->seq) 105 return -1;
108 return 1; 106 if (ref1->seq > ref2->seq)
107 return 1;
108 }
109 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 109 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
110 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { 110 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
111 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), 111 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 139 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
140 rb_node); 140 rb_node);
141 141
142 cmp = comp_entry(entry, ins); 142 cmp = comp_entry(entry, ins, 1);
143 if (cmp < 0) 143 if (cmp < 0)
144 p = &(*p)->rb_left; 144 p = &(*p)->rb_left;
145 else if (cmp > 0) 145 else if (cmp > 0)
@@ -233,6 +233,114 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
233 return 0; 233 return 0;
234} 234}
235 235
236static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
237 struct btrfs_delayed_ref_root *delayed_refs,
238 struct btrfs_delayed_ref_node *ref)
239{
240 rb_erase(&ref->rb_node, &delayed_refs->root);
241 ref->in_tree = 0;
242 btrfs_put_delayed_ref(ref);
243 delayed_refs->num_entries--;
244 if (trans->delayed_ref_updates)
245 trans->delayed_ref_updates--;
246}
247
248static int merge_ref(struct btrfs_trans_handle *trans,
249 struct btrfs_delayed_ref_root *delayed_refs,
250 struct btrfs_delayed_ref_node *ref, u64 seq)
251{
252 struct rb_node *node;
253 int merged = 0;
254 int mod = 0;
255 int done = 0;
256
257 node = rb_prev(&ref->rb_node);
258 while (node) {
259 struct btrfs_delayed_ref_node *next;
260
261 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
262 node = rb_prev(node);
263 if (next->bytenr != ref->bytenr)
264 break;
265 if (seq && next->seq >= seq)
266 break;
267 if (comp_entry(ref, next, 0))
268 continue;
269
270 if (ref->action == next->action) {
271 mod = next->ref_mod;
272 } else {
273 if (ref->ref_mod < next->ref_mod) {
274 struct btrfs_delayed_ref_node *tmp;
275
276 tmp = ref;
277 ref = next;
278 next = tmp;
279 done = 1;
280 }
281 mod = -next->ref_mod;
282 }
283
284 merged++;
285 drop_delayed_ref(trans, delayed_refs, next);
286 ref->ref_mod += mod;
287 if (ref->ref_mod == 0) {
288 drop_delayed_ref(trans, delayed_refs, ref);
289 break;
290 } else {
291 /*
292 * You can't have multiples of the same ref on a tree
293 * block.
294 */
295 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
296 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
297 }
298
299 if (done)
300 break;
301 node = rb_prev(&ref->rb_node);
302 }
303
304 return merged;
305}
306
307void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
308 struct btrfs_fs_info *fs_info,
309 struct btrfs_delayed_ref_root *delayed_refs,
310 struct btrfs_delayed_ref_head *head)
311{
312 struct rb_node *node;
313 u64 seq = 0;
314
315 spin_lock(&fs_info->tree_mod_seq_lock);
316 if (!list_empty(&fs_info->tree_mod_seq_list)) {
317 struct seq_list *elem;
318
319 elem = list_first_entry(&fs_info->tree_mod_seq_list,
320 struct seq_list, list);
321 seq = elem->seq;
322 }
323 spin_unlock(&fs_info->tree_mod_seq_lock);
324
325 node = rb_prev(&head->node.rb_node);
326 while (node) {
327 struct btrfs_delayed_ref_node *ref;
328
329 ref = rb_entry(node, struct btrfs_delayed_ref_node,
330 rb_node);
331 if (ref->bytenr != head->node.bytenr)
332 break;
333
334 /* We can't merge refs that are outside of our seq count */
335 if (seq && ref->seq >= seq)
336 break;
337 if (merge_ref(trans, delayed_refs, ref, seq))
338 node = rb_prev(&head->node.rb_node);
339 else
340 node = rb_prev(node);
341 }
342}
343
236int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 344int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
237 struct btrfs_delayed_ref_root *delayed_refs, 345 struct btrfs_delayed_ref_root *delayed_refs,
238 u64 seq) 346 u64 seq)
@@ -336,18 +444,11 @@ update_existing_ref(struct btrfs_trans_handle *trans,
336 * every changing the extent allocation tree. 444 * every changing the extent allocation tree.
337 */ 445 */
338 existing->ref_mod--; 446 existing->ref_mod--;
339 if (existing->ref_mod == 0) { 447 if (existing->ref_mod == 0)
340 rb_erase(&existing->rb_node, 448 drop_delayed_ref(trans, delayed_refs, existing);
341 &delayed_refs->root); 449 else
342 existing->in_tree = 0;
343 btrfs_put_delayed_ref(existing);
344 delayed_refs->num_entries--;
345 if (trans->delayed_ref_updates)
346 trans->delayed_ref_updates--;
347 } else {
348 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 450 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
349 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 451 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
350 }
351 } else { 452 } else {
352 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 453 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
353 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 454 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -662,9 +763,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 763 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
663 num_bytes, parent, ref_root, level, action, 764 num_bytes, parent, ref_root, level, action,
664 for_cow); 765 for_cow);
665 if (!need_ref_seq(for_cow, ref_root) &&
666 waitqueue_active(&fs_info->tree_mod_seq_wait))
667 wake_up(&fs_info->tree_mod_seq_wait);
668 spin_unlock(&delayed_refs->lock); 766 spin_unlock(&delayed_refs->lock);
669 if (need_ref_seq(for_cow, ref_root)) 767 if (need_ref_seq(for_cow, ref_root))
670 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 768 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +811,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
713 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 811 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
714 num_bytes, parent, ref_root, owner, offset, 812 num_bytes, parent, ref_root, owner, offset,
715 action, for_cow); 813 action, for_cow);
716 if (!need_ref_seq(for_cow, ref_root) &&
717 waitqueue_active(&fs_info->tree_mod_seq_wait))
718 wake_up(&fs_info->tree_mod_seq_wait);
719 spin_unlock(&delayed_refs->lock); 814 spin_unlock(&delayed_refs->lock);
720 if (need_ref_seq(for_cow, ref_root)) 815 if (need_ref_seq(for_cow, ref_root))
721 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 816 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +839,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 839 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
745 extent_op->is_data); 840 extent_op->is_data);
746 841
747 if (waitqueue_active(&fs_info->tree_mod_seq_wait))
748 wake_up(&fs_info->tree_mod_seq_wait);
749 spin_unlock(&delayed_refs->lock); 842 spin_unlock(&delayed_refs->lock);
750 return 0; 843 return 0;
751} 844}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 0d7c90c366b6..c9d703693df0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -18,7 +18,7 @@
18#ifndef __DELAYED_REF__ 18#ifndef __DELAYED_REF__
19#define __DELAYED_REF__ 19#define __DELAYED_REF__
20 20
21/* these are the possible values of struct btrfs_delayed_ref->action */ 21/* these are the possible values of struct btrfs_delayed_ref_node->action */
22#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ 22#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
23#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ 23#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
24#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ 24#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
@@ -167,6 +167,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
167 struct btrfs_trans_handle *trans, 167 struct btrfs_trans_handle *trans,
168 u64 bytenr, u64 num_bytes, 168 u64 bytenr, u64 num_bytes,
169 struct btrfs_delayed_extent_op *extent_op); 169 struct btrfs_delayed_extent_op *extent_op);
170void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
171 struct btrfs_fs_info *fs_info,
172 struct btrfs_delayed_ref_root *delayed_refs,
173 struct btrfs_delayed_ref_head *head);
170 174
171struct btrfs_delayed_ref_head * 175struct btrfs_delayed_ref_head *
172btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 176btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62e0cafd6e25..22e98e04c2ea 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -377,9 +377,13 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
377 ret = read_extent_buffer_pages(io_tree, eb, start, 377 ret = read_extent_buffer_pages(io_tree, eb, start,
378 WAIT_COMPLETE, 378 WAIT_COMPLETE,
379 btree_get_extent, mirror_num); 379 btree_get_extent, mirror_num);
380 if (!ret && !verify_parent_transid(io_tree, eb, 380 if (!ret) {
381 if (!verify_parent_transid(io_tree, eb,
381 parent_transid, 0)) 382 parent_transid, 0))
382 break; 383 break;
384 else
385 ret = -EIO;
386 }
383 387
384 /* 388 /*
385 * This buffer's crc is fine, but its contents are corrupted, so 389 * This buffer's crc is fine, but its contents are corrupted, so
@@ -754,9 +758,7 @@ static void run_one_async_done(struct btrfs_work *work)
754 limit = btrfs_async_submit_limit(fs_info); 758 limit = btrfs_async_submit_limit(fs_info);
755 limit = limit * 2 / 3; 759 limit = limit * 2 / 3;
756 760
757 atomic_dec(&fs_info->nr_async_submits); 761 if (atomic_dec_return(&fs_info->nr_async_submits) < limit &&
758
759 if (atomic_read(&fs_info->nr_async_submits) < limit &&
760 waitqueue_active(&fs_info->async_submit_wait)) 762 waitqueue_active(&fs_info->async_submit_wait))
761 wake_up(&fs_info->async_submit_wait); 763 wake_up(&fs_info->async_submit_wait);
762 764
@@ -2032,8 +2034,6 @@ int open_ctree(struct super_block *sb,
2032 fs_info->free_chunk_space = 0; 2034 fs_info->free_chunk_space = 0;
2033 fs_info->tree_mod_log = RB_ROOT; 2035 fs_info->tree_mod_log = RB_ROOT;
2034 2036
2035 init_waitqueue_head(&fs_info->tree_mod_seq_wait);
2036
2037 /* readahead state */ 2037 /* readahead state */
2038 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2038 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2039 spin_lock_init(&fs_info->reada_lock); 2039 spin_lock_init(&fs_info->reada_lock);
@@ -2528,8 +2528,7 @@ retry_root_backup:
2528 goto fail_trans_kthread; 2528 goto fail_trans_kthread;
2529 2529
2530 /* do not make disk changes in broken FS */ 2530 /* do not make disk changes in broken FS */
2531 if (btrfs_super_log_root(disk_super) != 0 && 2531 if (btrfs_super_log_root(disk_super) != 0) {
2532 !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
2533 u64 bytenr = btrfs_super_log_root(disk_super); 2532 u64 bytenr = btrfs_super_log_root(disk_super);
2534 2533
2535 if (fs_devices->rw_devices == 0) { 2534 if (fs_devices->rw_devices == 0) {
@@ -3189,30 +3188,14 @@ int close_ctree(struct btrfs_root *root)
3189 /* clear out the rbtree of defraggable inodes */ 3188 /* clear out the rbtree of defraggable inodes */
3190 btrfs_run_defrag_inodes(fs_info); 3189 btrfs_run_defrag_inodes(fs_info);
3191 3190
3192 /*
3193 * Here come 2 situations when btrfs is broken to flip readonly:
3194 *
3195 * 1. when btrfs flips readonly somewhere else before
3196 * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
3197 * and btrfs will skip to write sb directly to keep
3198 * ERROR state on disk.
3199 *
3200 * 2. when btrfs flips readonly just in btrfs_commit_super,
3201 * and in such case, btrfs cannot write sb via btrfs_commit_super,
3202 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
3203 * btrfs will cleanup all FS resources first and write sb then.
3204 */
3205 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3191 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3206 ret = btrfs_commit_super(root); 3192 ret = btrfs_commit_super(root);
3207 if (ret) 3193 if (ret)
3208 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3194 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3209 } 3195 }
3210 3196
3211 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3197 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
3212 ret = btrfs_error_commit_super(root); 3198 btrfs_error_commit_super(root);
3213 if (ret)
3214 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3215 }
3216 3199
3217 btrfs_put_block_group_cache(fs_info); 3200 btrfs_put_block_group_cache(fs_info);
3218 3201
@@ -3434,18 +3417,11 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3434 if (read_only) 3417 if (read_only)
3435 return 0; 3418 return 0;
3436 3419
3437 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3438 printk(KERN_WARNING "warning: mount fs with errors, "
3439 "running btrfsck is recommended\n");
3440 }
3441
3442 return 0; 3420 return 0;
3443} 3421}
3444 3422
3445int btrfs_error_commit_super(struct btrfs_root *root) 3423void btrfs_error_commit_super(struct btrfs_root *root)
3446{ 3424{
3447 int ret;
3448
3449 mutex_lock(&root->fs_info->cleaner_mutex); 3425 mutex_lock(&root->fs_info->cleaner_mutex);
3450 btrfs_run_delayed_iputs(root); 3426 btrfs_run_delayed_iputs(root);
3451 mutex_unlock(&root->fs_info->cleaner_mutex); 3427 mutex_unlock(&root->fs_info->cleaner_mutex);
@@ -3455,10 +3431,6 @@ int btrfs_error_commit_super(struct btrfs_root *root)
3455 3431
3456 /* cleanup FS via transaction */ 3432 /* cleanup FS via transaction */
3457 btrfs_cleanup_transaction(root); 3433 btrfs_cleanup_transaction(root);
3458
3459 ret = write_ctree_super(NULL, root, 0);
3460
3461 return ret;
3462} 3434}
3463 3435
3464static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3436static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
@@ -3782,14 +3754,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3782 /* FIXME: cleanup wait for commit */ 3754 /* FIXME: cleanup wait for commit */
3783 t->in_commit = 1; 3755 t->in_commit = 1;
3784 t->blocked = 1; 3756 t->blocked = 1;
3757 smp_mb();
3785 if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) 3758 if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
3786 wake_up(&root->fs_info->transaction_blocked_wait); 3759 wake_up(&root->fs_info->transaction_blocked_wait);
3787 3760
3788 t->blocked = 0; 3761 t->blocked = 0;
3762 smp_mb();
3789 if (waitqueue_active(&root->fs_info->transaction_wait)) 3763 if (waitqueue_active(&root->fs_info->transaction_wait))
3790 wake_up(&root->fs_info->transaction_wait); 3764 wake_up(&root->fs_info->transaction_wait);
3791 3765
3792 t->commit_done = 1; 3766 t->commit_done = 1;
3767 smp_mb();
3793 if (waitqueue_active(&t->commit_wait)) 3768 if (waitqueue_active(&t->commit_wait))
3794 wake_up(&t->commit_wait); 3769 wake_up(&t->commit_wait);
3795 3770
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 95e147eea239..c5b00a735fef 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -54,7 +54,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, int max_mirrors); 54 struct btrfs_root *root, int max_mirrors);
55struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 55struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
56int btrfs_commit_super(struct btrfs_root *root); 56int btrfs_commit_super(struct btrfs_root *root);
57int btrfs_error_commit_super(struct btrfs_root *root); 57void btrfs_error_commit_super(struct btrfs_root *root);
58struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 58struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
59 u64 bytenr, u32 blocksize); 59 u64 bytenr, u32 blocksize);
60struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 60struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153b7c47..ba58024d40d3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2252,6 +2252,16 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2252 } 2252 }
2253 2253
2254 /* 2254 /*
2255 * We need to try and merge add/drops of the same ref since we
2256 * can run into issues with relocate dropping the implicit ref
2257 * and then it being added back again before the drop can
2258 * finish. If we merged anything we need to re-loop so we can
2259 * get a good ref.
2260 */
2261 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2262 locked_ref);
2263
2264 /*
2255 * locked_ref is the head node, so we have to go one 2265 * locked_ref is the head node, so we have to go one
2256 * node back for any delayed ref updates 2266 * node back for any delayed ref updates
2257 */ 2267 */
@@ -2318,12 +2328,23 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2318 ref->in_tree = 0; 2328 ref->in_tree = 0;
2319 rb_erase(&ref->rb_node, &delayed_refs->root); 2329 rb_erase(&ref->rb_node, &delayed_refs->root);
2320 delayed_refs->num_entries--; 2330 delayed_refs->num_entries--;
2321 /* 2331 if (locked_ref) {
2322 * we modified num_entries, but as we're currently running 2332 /*
2323 * delayed refs, skip 2333 * when we play the delayed ref, also correct the
2324 * wake_up(&delayed_refs->seq_wait); 2334 * ref_mod on head
2325 * here. 2335 */
2326 */ 2336 switch (ref->action) {
2337 case BTRFS_ADD_DELAYED_REF:
2338 case BTRFS_ADD_DELAYED_EXTENT:
2339 locked_ref->node.ref_mod -= ref->ref_mod;
2340 break;
2341 case BTRFS_DROP_DELAYED_REF:
2342 locked_ref->node.ref_mod += ref->ref_mod;
2343 break;
2344 default:
2345 WARN_ON(1);
2346 }
2347 }
2327 spin_unlock(&delayed_refs->lock); 2348 spin_unlock(&delayed_refs->lock);
2328 2349
2329 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2350 ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2371,6 @@ next:
2350 return count; 2371 return count;
2351} 2372}
2352 2373
2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2355 unsigned long num_refs,
2356 struct list_head *first_seq)
2357{
2358 spin_unlock(&delayed_refs->lock);
2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2360 num_refs, first_seq);
2361 wait_event(fs_info->tree_mod_seq_wait,
2362 num_refs != delayed_refs->num_entries ||
2363 fs_info->tree_mod_seq_list.next != first_seq);
2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2366 spin_lock(&delayed_refs->lock);
2367}
2368
2369#ifdef SCRAMBLE_DELAYED_REFS 2374#ifdef SCRAMBLE_DELAYED_REFS
2370/* 2375/*
2371 * Normally delayed refs get processed in ascending bytenr order. This 2376 * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2465,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2460 struct btrfs_delayed_ref_root *delayed_refs; 2465 struct btrfs_delayed_ref_root *delayed_refs;
2461 struct btrfs_delayed_ref_node *ref; 2466 struct btrfs_delayed_ref_node *ref;
2462 struct list_head cluster; 2467 struct list_head cluster;
2463 struct list_head *first_seq = NULL;
2464 int ret; 2468 int ret;
2465 u64 delayed_start; 2469 u64 delayed_start;
2466 int run_all = count == (unsigned long)-1; 2470 int run_all = count == (unsigned long)-1;
2467 int run_most = 0; 2471 int run_most = 0;
2468 unsigned long num_refs = 0; 2472 int loops;
2469 int consider_waiting;
2470 2473
2471 /* We'll clean this up in btrfs_cleanup_transaction */ 2474 /* We'll clean this up in btrfs_cleanup_transaction */
2472 if (trans->aborted) 2475 if (trans->aborted)
@@ -2484,7 +2487,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2484 delayed_refs = &trans->transaction->delayed_refs; 2487 delayed_refs = &trans->transaction->delayed_refs;
2485 INIT_LIST_HEAD(&cluster); 2488 INIT_LIST_HEAD(&cluster);
2486again: 2489again:
2487 consider_waiting = 0; 2490 loops = 0;
2488 spin_lock(&delayed_refs->lock); 2491 spin_lock(&delayed_refs->lock);
2489 2492
2490#ifdef SCRAMBLE_DELAYED_REFS 2493#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2515,6 @@ again:
2512 if (ret) 2515 if (ret)
2513 break; 2516 break;
2514 2517
2515 if (delayed_start >= delayed_refs->run_delayed_start) {
2516 if (consider_waiting == 0) {
2517 /*
2518 * btrfs_find_ref_cluster looped. let's do one
2519 * more cycle. if we don't run any delayed ref
2520 * during that cycle (because we can't because
2521 * all of them are blocked) and if the number of
2522 * refs doesn't change, we avoid busy waiting.
2523 */
2524 consider_waiting = 1;
2525 num_refs = delayed_refs->num_entries;
2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2527 } else {
2528 wait_for_more_refs(root->fs_info, delayed_refs,
2529 num_refs, first_seq);
2530 /*
2531 * after waiting, things have changed. we
2532 * dropped the lock and someone else might have
2533 * run some refs, built new clusters and so on.
2534 * therefore, we restart staleness detection.
2535 */
2536 consider_waiting = 0;
2537 }
2538 }
2539
2540 ret = run_clustered_refs(trans, root, &cluster); 2518 ret = run_clustered_refs(trans, root, &cluster);
2541 if (ret < 0) { 2519 if (ret < 0) {
2542 spin_unlock(&delayed_refs->lock); 2520 spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2527,26 @@ again:
2549 if (count == 0) 2527 if (count == 0)
2550 break; 2528 break;
2551 2529
2552 if (ret || delayed_refs->run_delayed_start == 0) { 2530 if (delayed_start >= delayed_refs->run_delayed_start) {
2531 if (loops == 0) {
2532 /*
2533 * btrfs_find_ref_cluster looped. let's do one
2534 * more cycle. if we don't run any delayed ref
2535 * during that cycle (because we can't because
2536 * all of them are blocked), bail out.
2537 */
2538 loops = 1;
2539 } else {
2540 /*
2541 * no runnable refs left, stop trying
2542 */
2543 BUG_ON(run_all);
2544 break;
2545 }
2546 }
2547 if (ret) {
2553 /* refs were run, let's reset staleness detection */ 2548 /* refs were run, let's reset staleness detection */
2554 consider_waiting = 0; 2549 loops = 0;
2555 } 2550 }
2556 } 2551 }
2557 2552
@@ -3007,17 +3002,16 @@ again:
3007 } 3002 }
3008 spin_unlock(&block_group->lock); 3003 spin_unlock(&block_group->lock);
3009 3004
3010 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 3005 /*
3006 * Try to preallocate enough space based on how big the block group is.
3007 * Keep in mind this has to include any pinned space which could end up
3008 * taking up quite a bit since it's not folded into the other space
3009 * cache.
3010 */
3011 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
3011 if (!num_pages) 3012 if (!num_pages)
3012 num_pages = 1; 3013 num_pages = 1;
3013 3014
3014 /*
3015 * Just to make absolutely sure we have enough space, we're going to
3016 * preallocate 12 pages worth of space for each block group. In
3017 * practice we ought to use at most 8, but we need extra space so we can
3018 * add our header and have a terminator between the extents and the
3019 * bitmaps.
3020 */
3021 num_pages *= 16; 3015 num_pages *= 16;
3022 num_pages *= PAGE_CACHE_SIZE; 3016 num_pages *= PAGE_CACHE_SIZE;
3023 3017
@@ -4571,8 +4565,10 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4571 if (root->fs_info->quota_enabled) { 4565 if (root->fs_info->quota_enabled) {
4572 ret = btrfs_qgroup_reserve(root, num_bytes + 4566 ret = btrfs_qgroup_reserve(root, num_bytes +
4573 nr_extents * root->leafsize); 4567 nr_extents * root->leafsize);
4574 if (ret) 4568 if (ret) {
4569 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4575 return ret; 4570 return ret;
4571 }
4576 } 4572 }
4577 4573
4578 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4574 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -5294,9 +5290,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5294 rb_erase(&head->node.rb_node, &delayed_refs->root); 5290 rb_erase(&head->node.rb_node, &delayed_refs->root);
5295 5291
5296 delayed_refs->num_entries--; 5292 delayed_refs->num_entries--;
5297 smp_mb();
5298 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5299 wake_up(&root->fs_info->tree_mod_seq_wait);
5300 5293
5301 /* 5294 /*
5302 * we don't take a ref on the node because we're removing it from the 5295 * we don't take a ref on the node because we're removing it from the
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 45c81bb4ac82..b08ea4717e9d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -107,6 +107,12 @@ void extent_io_exit(void)
107 list_del(&eb->leak_list); 107 list_del(&eb->leak_list);
108 kmem_cache_free(extent_buffer_cache, eb); 108 kmem_cache_free(extent_buffer_cache, eb);
109 } 109 }
110
111 /*
112 * Make sure all delayed rcu free are flushed before we
113 * destroy caches.
114 */
115 rcu_barrier();
110 if (extent_state_cache) 116 if (extent_state_cache)
111 kmem_cache_destroy(extent_state_cache); 117 kmem_cache_destroy(extent_state_cache);
112 if (extent_buffer_cache) 118 if (extent_buffer_cache)
@@ -2330,23 +2336,10 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2330 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2336 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
2331 ret = tree->ops->readpage_end_io_hook(page, start, end, 2337 ret = tree->ops->readpage_end_io_hook(page, start, end,
2332 state, mirror); 2338 state, mirror);
2333 if (ret) { 2339 if (ret)
2334 /* no IO indicated but software detected errors
2335 * in the block, either checksum errors or
2336 * issues with the contents */
2337 struct btrfs_root *root =
2338 BTRFS_I(page->mapping->host)->root;
2339 struct btrfs_device *device;
2340
2341 uptodate = 0; 2340 uptodate = 0;
2342 device = btrfs_find_device_for_logical( 2341 else
2343 root, start, mirror);
2344 if (device)
2345 btrfs_dev_stat_inc_and_print(device,
2346 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2347 } else {
2348 clean_io_failure(start, page); 2342 clean_io_failure(start, page);
2349 }
2350 } 2343 }
2351 2344
2352 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2345 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b45b9de0c21d..857d93cd01dc 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -272,9 +272,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
272} 272}
273 273
274int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 274int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
275 struct bio *bio, u64 offset, u32 *dst) 275 struct bio *bio, u64 offset)
276{ 276{
277 return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); 277 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
278} 278}
279 279
280int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 280int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5caf285c6e4d..f6b40e86121b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1599,6 +1599,7 @@ out:
1599static const struct vm_operations_struct btrfs_file_vm_ops = { 1599static const struct vm_operations_struct btrfs_file_vm_ops = {
1600 .fault = filemap_fault, 1600 .fault = filemap_fault,
1601 .page_mkwrite = btrfs_page_mkwrite, 1601 .page_mkwrite = btrfs_page_mkwrite,
1602 .remap_pages = generic_file_remap_pages,
1602}; 1603};
1603 1604
1604static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 1605static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1610,7 +1611,6 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
1610 1611
1611 file_accessed(filp); 1612 file_accessed(filp);
1612 vma->vm_ops = &btrfs_file_vm_ops; 1613 vma->vm_ops = &btrfs_file_vm_ops;
1613 vma->vm_flags |= VM_CAN_NONLINEAR;
1614 1614
1615 return 0; 1615 return 0;
1616} 1616}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6e8f416773d4..a6ed6944e50c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1008,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1009 PAGE_CACHE_SHIFT; 1009 PAGE_CACHE_SHIFT;
1010 1010
1011 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); 1011 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1012
1013 if (atomic_read(&root->fs_info->async_delalloc_pages) <
1014 5 * 1024 * 1024 && 1012 5 * 1024 * 1024 &&
1015 waitqueue_active(&root->fs_info->async_submit_wait)) 1013 waitqueue_active(&root->fs_info->async_submit_wait))
1016 wake_up(&root->fs_info->async_submit_wait); 1014 wake_up(&root->fs_info->async_submit_wait);
@@ -1885,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1885 trans = btrfs_join_transaction_nolock(root); 1883 trans = btrfs_join_transaction_nolock(root);
1886 else 1884 else
1887 trans = btrfs_join_transaction(root); 1885 trans = btrfs_join_transaction(root);
1888 if (IS_ERR(trans)) 1886 if (IS_ERR(trans)) {
1889 return PTR_ERR(trans); 1887 ret = PTR_ERR(trans);
1888 trans = NULL;
1889 goto out;
1890 }
1890 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1891 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1891 ret = btrfs_update_inode_fallback(trans, root, inode); 1892 ret = btrfs_update_inode_fallback(trans, root, inode);
1892 if (ret) /* -ENOMEM or corruption */ 1893 if (ret) /* -ENOMEM or corruption */
@@ -1970,8 +1971,8 @@ out:
1970 ordered_extent->len - 1, NULL, GFP_NOFS); 1971 ordered_extent->len - 1, NULL, GFP_NOFS);
1971 1972
1972 /* 1973 /*
1973 * This needs to be dont to make sure anybody waiting knows we are done 1974 * This needs to be done to make sure anybody waiting knows we are done
1974 * upating everything for this ordered extent. 1975 * updating everything for this ordered extent.
1975 */ 1976 */
1976 btrfs_remove_ordered_extent(inode, ordered_extent); 1977 btrfs_remove_ordered_extent(inode, ordered_extent);
1977 1978
@@ -2571,8 +2572,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2571 struct btrfs_inode_item); 2572 struct btrfs_inode_item);
2572 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2573 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2573 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); 2574 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
2574 inode->i_uid = btrfs_inode_uid(leaf, inode_item); 2575 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
2575 inode->i_gid = btrfs_inode_gid(leaf, inode_item); 2576 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
2576 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 2577 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
2577 2578
2578 tspec = btrfs_inode_atime(inode_item); 2579 tspec = btrfs_inode_atime(inode_item);
@@ -2650,8 +2651,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2650 struct btrfs_inode_item *item, 2651 struct btrfs_inode_item *item,
2651 struct inode *inode) 2652 struct inode *inode)
2652{ 2653{
2653 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2654 btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
2654 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2655 btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
2655 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2656 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
2656 btrfs_set_inode_mode(leaf, item, inode->i_mode); 2657 btrfs_set_inode_mode(leaf, item, inode->i_mode);
2657 btrfs_set_inode_nlink(leaf, item, inode->i_nlink); 2658 btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
@@ -3174,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3174 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3175 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3175 inode_inc_iversion(dir); 3176 inode_inc_iversion(dir);
3176 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3177 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3177 ret = btrfs_update_inode(trans, root, dir); 3178 ret = btrfs_update_inode_fallback(trans, root, dir);
3178 if (ret) 3179 if (ret)
3179 btrfs_abort_transaction(trans, root, ret); 3180 btrfs_abort_transaction(trans, root, ret);
3180out: 3181out:
@@ -5774,18 +5775,112 @@ out:
5774 return ret; 5775 return ret;
5775} 5776}
5776 5777
5778static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
5779 struct extent_state **cached_state, int writing)
5780{
5781 struct btrfs_ordered_extent *ordered;
5782 int ret = 0;
5783
5784 while (1) {
5785 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5786 0, cached_state);
5787 /*
5788 * We're concerned with the entire range that we're going to be
5789 * doing DIO to, so we need to make sure theres no ordered
5790 * extents in this range.
5791 */
5792 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5793 lockend - lockstart + 1);
5794
5795 /*
5796 * We need to make sure there are no buffered pages in this
5797 * range either, we could have raced between the invalidate in
5798 * generic_file_direct_write and locking the extent. The
5799 * invalidate needs to happen so that reads after a write do not
5800 * get stale data.
5801 */
5802 if (!ordered && (!writing ||
5803 !test_range_bit(&BTRFS_I(inode)->io_tree,
5804 lockstart, lockend, EXTENT_UPTODATE, 0,
5805 *cached_state)))
5806 break;
5807
5808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5809 cached_state, GFP_NOFS);
5810
5811 if (ordered) {
5812 btrfs_start_ordered_extent(inode, ordered, 1);
5813 btrfs_put_ordered_extent(ordered);
5814 } else {
5815 /* Screw you mmap */
5816 ret = filemap_write_and_wait_range(inode->i_mapping,
5817 lockstart,
5818 lockend);
5819 if (ret)
5820 break;
5821
5822 /*
5823 * If we found a page that couldn't be invalidated just
5824 * fall back to buffered.
5825 */
5826 ret = invalidate_inode_pages2_range(inode->i_mapping,
5827 lockstart >> PAGE_CACHE_SHIFT,
5828 lockend >> PAGE_CACHE_SHIFT);
5829 if (ret)
5830 break;
5831 }
5832
5833 cond_resched();
5834 }
5835
5836 return ret;
5837}
5838
5777static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5839static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5778 struct buffer_head *bh_result, int create) 5840 struct buffer_head *bh_result, int create)
5779{ 5841{
5780 struct extent_map *em; 5842 struct extent_map *em;
5781 struct btrfs_root *root = BTRFS_I(inode)->root; 5843 struct btrfs_root *root = BTRFS_I(inode)->root;
5844 struct extent_state *cached_state = NULL;
5782 u64 start = iblock << inode->i_blkbits; 5845 u64 start = iblock << inode->i_blkbits;
5846 u64 lockstart, lockend;
5783 u64 len = bh_result->b_size; 5847 u64 len = bh_result->b_size;
5784 struct btrfs_trans_handle *trans; 5848 struct btrfs_trans_handle *trans;
5849 int unlock_bits = EXTENT_LOCKED;
5850 int ret;
5851
5852 if (create) {
5853 ret = btrfs_delalloc_reserve_space(inode, len);
5854 if (ret)
5855 return ret;
5856 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
5857 } else {
5858 len = min_t(u64, len, root->sectorsize);
5859 }
5860
5861 lockstart = start;
5862 lockend = start + len - 1;
5863
5864 /*
5865 * If this errors out it's because we couldn't invalidate pagecache for
5866 * this range and we need to fallback to buffered.
5867 */
5868 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
5869 return -ENOTBLK;
5870
5871 if (create) {
5872 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5873 lockend, EXTENT_DELALLOC, NULL,
5874 &cached_state, GFP_NOFS);
5875 if (ret)
5876 goto unlock_err;
5877 }
5785 5878
5786 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5879 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5787 if (IS_ERR(em)) 5880 if (IS_ERR(em)) {
5788 return PTR_ERR(em); 5881 ret = PTR_ERR(em);
5882 goto unlock_err;
5883 }
5789 5884
5790 /* 5885 /*
5791 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered 5886 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5804,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5804 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5899 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5805 em->block_start == EXTENT_MAP_INLINE) { 5900 em->block_start == EXTENT_MAP_INLINE) {
5806 free_extent_map(em); 5901 free_extent_map(em);
5807 return -ENOTBLK; 5902 ret = -ENOTBLK;
5903 goto unlock_err;
5808 } 5904 }
5809 5905
5810 /* Just a good old fashioned hole, return */ 5906 /* Just a good old fashioned hole, return */
5811 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5907 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5812 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5908 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5813 free_extent_map(em); 5909 free_extent_map(em);
5814 /* DIO will do one hole at a time, so just unlock a sector */ 5910 ret = 0;
5815 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5911 goto unlock_err;
5816 start + root->sectorsize - 1);
5817 return 0;
5818 } 5912 }
5819 5913
5820 /* 5914 /*
@@ -5827,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5827 * 5921 *
5828 */ 5922 */
5829 if (!create) { 5923 if (!create) {
5830 len = em->len - (start - em->start); 5924 len = min(len, em->len - (start - em->start));
5831 goto map; 5925 lockstart = start + len;
5926 goto unlock;
5832 } 5927 }
5833 5928
5834 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5929 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5860,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5860 btrfs_end_transaction(trans, root); 5955 btrfs_end_transaction(trans, root);
5861 if (ret) { 5956 if (ret) {
5862 free_extent_map(em); 5957 free_extent_map(em);
5863 return ret; 5958 goto unlock_err;
5864 } 5959 }
5865 goto unlock; 5960 goto unlock;
5866 } 5961 }
@@ -5873,14 +5968,12 @@ must_cow:
5873 */ 5968 */
5874 len = bh_result->b_size; 5969 len = bh_result->b_size;
5875 em = btrfs_new_extent_direct(inode, em, start, len); 5970 em = btrfs_new_extent_direct(inode, em, start, len);
5876 if (IS_ERR(em)) 5971 if (IS_ERR(em)) {
5877 return PTR_ERR(em); 5972 ret = PTR_ERR(em);
5973 goto unlock_err;
5974 }
5878 len = min(len, em->len - (start - em->start)); 5975 len = min(len, em->len - (start - em->start));
5879unlock: 5976unlock:
5880 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5881 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5882 0, NULL, GFP_NOFS);
5883map:
5884 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5977 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5885 inode->i_blkbits; 5978 inode->i_blkbits;
5886 bh_result->b_size = len; 5979 bh_result->b_size = len;
@@ -5898,9 +5991,44 @@ map:
5898 i_size_write(inode, start + len); 5991 i_size_write(inode, start + len);
5899 } 5992 }
5900 5993
5994 /*
5995 * In the case of write we need to clear and unlock the entire range,
5996 * in the case of read we need to unlock only the end area that we
5997 * aren't using if there is any left over space.
5998 */
5999 if (lockstart < lockend) {
6000 if (create && len < lockend - lockstart) {
6001 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6002 lockstart + len - 1, unlock_bits, 1, 0,
6003 &cached_state, GFP_NOFS);
6004 /*
6005 * Beside unlock, we also need to cleanup reserved space
6006 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6007 */
6008 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6009 lockstart + len, lockend,
6010 unlock_bits | EXTENT_DO_ACCOUNTING,
6011 1, 0, NULL, GFP_NOFS);
6012 } else {
6013 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6014 lockend, unlock_bits, 1, 0,
6015 &cached_state, GFP_NOFS);
6016 }
6017 } else {
6018 free_extent_state(cached_state);
6019 }
6020
5901 free_extent_map(em); 6021 free_extent_map(em);
5902 6022
5903 return 0; 6023 return 0;
6024
6025unlock_err:
6026 if (create)
6027 unlock_bits |= EXTENT_DO_ACCOUNTING;
6028
6029 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6030 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6031 return ret;
5904} 6032}
5905 6033
5906struct btrfs_dio_private { 6034struct btrfs_dio_private {
@@ -5908,7 +6036,6 @@ struct btrfs_dio_private {
5908 u64 logical_offset; 6036 u64 logical_offset;
5909 u64 disk_bytenr; 6037 u64 disk_bytenr;
5910 u64 bytes; 6038 u64 bytes;
5911 u32 *csums;
5912 void *private; 6039 void *private;
5913 6040
5914 /* number of bios pending for this dio */ 6041 /* number of bios pending for this dio */
@@ -5928,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5928 struct inode *inode = dip->inode; 6055 struct inode *inode = dip->inode;
5929 struct btrfs_root *root = BTRFS_I(inode)->root; 6056 struct btrfs_root *root = BTRFS_I(inode)->root;
5930 u64 start; 6057 u64 start;
5931 u32 *private = dip->csums;
5932 6058
5933 start = dip->logical_offset; 6059 start = dip->logical_offset;
5934 do { 6060 do {
@@ -5936,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5936 struct page *page = bvec->bv_page; 6062 struct page *page = bvec->bv_page;
5937 char *kaddr; 6063 char *kaddr;
5938 u32 csum = ~(u32)0; 6064 u32 csum = ~(u32)0;
6065 u64 private = ~(u32)0;
5939 unsigned long flags; 6066 unsigned long flags;
5940 6067
6068 if (get_state_private(&BTRFS_I(inode)->io_tree,
6069 start, &private))
6070 goto failed;
5941 local_irq_save(flags); 6071 local_irq_save(flags);
5942 kaddr = kmap_atomic(page); 6072 kaddr = kmap_atomic(page);
5943 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6073 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5947,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5947 local_irq_restore(flags); 6077 local_irq_restore(flags);
5948 6078
5949 flush_dcache_page(bvec->bv_page); 6079 flush_dcache_page(bvec->bv_page);
5950 if (csum != *private) { 6080 if (csum != private) {
6081failed:
5951 printk(KERN_ERR "btrfs csum failed ino %llu off" 6082 printk(KERN_ERR "btrfs csum failed ino %llu off"
5952 " %llu csum %u private %u\n", 6083 " %llu csum %u private %u\n",
5953 (unsigned long long)btrfs_ino(inode), 6084 (unsigned long long)btrfs_ino(inode),
5954 (unsigned long long)start, 6085 (unsigned long long)start,
5955 csum, *private); 6086 csum, (unsigned)private);
5956 err = -EIO; 6087 err = -EIO;
5957 } 6088 }
5958 } 6089 }
5959 6090
5960 start += bvec->bv_len; 6091 start += bvec->bv_len;
5961 private++;
5962 bvec++; 6092 bvec++;
5963 } while (bvec <= bvec_end); 6093 } while (bvec <= bvec_end);
5964 6094
@@ -5966,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5966 dip->logical_offset + dip->bytes - 1); 6096 dip->logical_offset + dip->bytes - 1);
5967 bio->bi_private = dip->private; 6097 bio->bi_private = dip->private;
5968 6098
5969 kfree(dip->csums);
5970 kfree(dip); 6099 kfree(dip);
5971 6100
5972 /* If we had a csum failure make sure to clear the uptodate flag */ 6101 /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6072,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
6072 6201
6073static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 6202static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6074 int rw, u64 file_offset, int skip_sum, 6203 int rw, u64 file_offset, int skip_sum,
6075 u32 *csums, int async_submit) 6204 int async_submit)
6076{ 6205{
6077 int write = rw & REQ_WRITE; 6206 int write = rw & REQ_WRITE;
6078 struct btrfs_root *root = BTRFS_I(inode)->root; 6207 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6105,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6105 if (ret) 6234 if (ret)
6106 goto err; 6235 goto err;
6107 } else if (!skip_sum) { 6236 } else if (!skip_sum) {
6108 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6237 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
6109 file_offset, csums);
6110 if (ret) 6238 if (ret)
6111 goto err; 6239 goto err;
6112 } 6240 }
@@ -6132,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6132 u64 submit_len = 0; 6260 u64 submit_len = 0;
6133 u64 map_length; 6261 u64 map_length;
6134 int nr_pages = 0; 6262 int nr_pages = 0;
6135 u32 *csums = dip->csums;
6136 int ret = 0; 6263 int ret = 0;
6137 int async_submit = 0; 6264 int async_submit = 0;
6138 int write = rw & REQ_WRITE;
6139 6265
6140 map_length = orig_bio->bi_size; 6266 map_length = orig_bio->bi_size;
6141 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6267 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6171,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6171 atomic_inc(&dip->pending_bios); 6297 atomic_inc(&dip->pending_bios);
6172 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6298 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6173 file_offset, skip_sum, 6299 file_offset, skip_sum,
6174 csums, async_submit); 6300 async_submit);
6175 if (ret) { 6301 if (ret) {
6176 bio_put(bio); 6302 bio_put(bio);
6177 atomic_dec(&dip->pending_bios); 6303 atomic_dec(&dip->pending_bios);
6178 goto out_err; 6304 goto out_err;
6179 } 6305 }
6180 6306
6181 /* Write's use the ordered csums */
6182 if (!write && !skip_sum)
6183 csums = csums + nr_pages;
6184 start_sector += submit_len >> 9; 6307 start_sector += submit_len >> 9;
6185 file_offset += submit_len; 6308 file_offset += submit_len;
6186 6309
@@ -6210,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6210 6333
6211submit: 6334submit:
6212 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6335 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6213 csums, async_submit); 6336 async_submit);
6214 if (!ret) 6337 if (!ret)
6215 return 0; 6338 return 0;
6216 6339
@@ -6246,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6246 ret = -ENOMEM; 6369 ret = -ENOMEM;
6247 goto free_ordered; 6370 goto free_ordered;
6248 } 6371 }
6249 dip->csums = NULL;
6250
6251 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6252 if (!write && !skip_sum) {
6253 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6254 if (!dip->csums) {
6255 kfree(dip);
6256 ret = -ENOMEM;
6257 goto free_ordered;
6258 }
6259 }
6260 6372
6261 dip->private = bio->bi_private; 6373 dip->private = bio->bi_private;
6262 dip->inode = inode; 6374 dip->inode = inode;
@@ -6341,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6341out: 6453out:
6342 return retval; 6454 return retval;
6343} 6455}
6456
6344static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6457static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6345 const struct iovec *iov, loff_t offset, 6458 const struct iovec *iov, loff_t offset,
6346 unsigned long nr_segs) 6459 unsigned long nr_segs)
6347{ 6460{
6348 struct file *file = iocb->ki_filp; 6461 struct file *file = iocb->ki_filp;
6349 struct inode *inode = file->f_mapping->host; 6462 struct inode *inode = file->f_mapping->host;
6350 struct btrfs_ordered_extent *ordered;
6351 struct extent_state *cached_state = NULL;
6352 u64 lockstart, lockend;
6353 ssize_t ret;
6354 int writing = rw & WRITE;
6355 int write_bits = 0;
6356 size_t count = iov_length(iov, nr_segs);
6357 6463
6358 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6464 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6359 offset, nr_segs)) { 6465 offset, nr_segs))
6360 return 0; 6466 return 0;
6361 }
6362
6363 lockstart = offset;
6364 lockend = offset + count - 1;
6365 6467
6366 if (writing) { 6468 return __blockdev_direct_IO(rw, iocb, inode,
6367 ret = btrfs_delalloc_reserve_space(inode, count);
6368 if (ret)
6369 goto out;
6370 }
6371
6372 while (1) {
6373 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6374 0, &cached_state);
6375 /*
6376 * We're concerned with the entire range that we're going to be
6377 * doing DIO to, so we need to make sure theres no ordered
6378 * extents in this range.
6379 */
6380 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6381 lockend - lockstart + 1);
6382
6383 /*
6384 * We need to make sure there are no buffered pages in this
6385 * range either, we could have raced between the invalidate in
6386 * generic_file_direct_write and locking the extent. The
6387 * invalidate needs to happen so that reads after a write do not
6388 * get stale data.
6389 */
6390 if (!ordered && (!writing ||
6391 !test_range_bit(&BTRFS_I(inode)->io_tree,
6392 lockstart, lockend, EXTENT_UPTODATE, 0,
6393 cached_state)))
6394 break;
6395
6396 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6397 &cached_state, GFP_NOFS);
6398
6399 if (ordered) {
6400 btrfs_start_ordered_extent(inode, ordered, 1);
6401 btrfs_put_ordered_extent(ordered);
6402 } else {
6403 /* Screw you mmap */
6404 ret = filemap_write_and_wait_range(file->f_mapping,
6405 lockstart,
6406 lockend);
6407 if (ret)
6408 goto out;
6409
6410 /*
6411 * If we found a page that couldn't be invalidated just
6412 * fall back to buffered.
6413 */
6414 ret = invalidate_inode_pages2_range(file->f_mapping,
6415 lockstart >> PAGE_CACHE_SHIFT,
6416 lockend >> PAGE_CACHE_SHIFT);
6417 if (ret) {
6418 if (ret == -EBUSY)
6419 ret = 0;
6420 goto out;
6421 }
6422 }
6423
6424 cond_resched();
6425 }
6426
6427 /*
6428 * we don't use btrfs_set_extent_delalloc because we don't want
6429 * the dirty or uptodate bits
6430 */
6431 if (writing) {
6432 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6433 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6434 EXTENT_DELALLOC, NULL, &cached_state,
6435 GFP_NOFS);
6436 if (ret) {
6437 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6438 lockend, EXTENT_LOCKED | write_bits,
6439 1, 0, &cached_state, GFP_NOFS);
6440 goto out;
6441 }
6442 }
6443
6444 free_extent_state(cached_state);
6445 cached_state = NULL;
6446
6447 ret = __blockdev_direct_IO(rw, iocb, inode,
6448 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6469 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6449 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6470 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6450 btrfs_submit_direct, 0); 6471 btrfs_submit_direct, 0);
6451
6452 if (ret < 0 && ret != -EIOCBQUEUED) {
6453 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6454 offset + iov_length(iov, nr_segs) - 1,
6455 EXTENT_LOCKED | write_bits, 1, 0,
6456 &cached_state, GFP_NOFS);
6457 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6458 /*
6459 * We're falling back to buffered, unlock the section we didn't
6460 * do IO on.
6461 */
6462 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6463 offset + iov_length(iov, nr_segs) - 1,
6464 EXTENT_LOCKED | write_bits, 1, 0,
6465 &cached_state, GFP_NOFS);
6466 }
6467out:
6468 free_extent_state(cached_state);
6469 return ret;
6470} 6472}
6471 6473
6472static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6474static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -7074,6 +7076,11 @@ static void init_once(void *foo)
7074 7076
7075void btrfs_destroy_cachep(void) 7077void btrfs_destroy_cachep(void)
7076{ 7078{
7079 /*
7080 * Make sure all delayed rcu free inodes are flushed before we
7081 * destroy cache.
7082 */
7083 rcu_barrier();
7077 if (btrfs_inode_cachep) 7084 if (btrfs_inode_cachep)
7078 kmem_cache_destroy(btrfs_inode_cachep); 7085 kmem_cache_destroy(btrfs_inode_cachep);
7079 if (btrfs_trans_handle_cachep) 7086 if (btrfs_trans_handle_cachep)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7bb755677a22..47127c1bd290 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -424,7 +424,7 @@ static noinline int create_subvol(struct btrfs_root *root,
424 uuid_le_gen(&new_uuid); 424 uuid_le_gen(&new_uuid);
425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 425 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 426 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
427 root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec); 427 root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
428 root_item.ctime = root_item.otime; 428 root_item.ctime = root_item.otime;
429 btrfs_set_root_ctransid(&root_item, trans->transid); 429 btrfs_set_root_ctransid(&root_item, trans->transid);
430 btrfs_set_root_otransid(&root_item, trans->transid); 430 btrfs_set_root_otransid(&root_item, trans->transid);
@@ -575,13 +575,13 @@ fail:
575*/ 575*/
576static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 576static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
577{ 577{
578 uid_t fsuid = current_fsuid(); 578 kuid_t fsuid = current_fsuid();
579 579
580 if (!(dir->i_mode & S_ISVTX)) 580 if (!(dir->i_mode & S_ISVTX))
581 return 0; 581 return 0;
582 if (inode->i_uid == fsuid) 582 if (uid_eq(inode->i_uid, fsuid))
583 return 0; 583 return 0;
584 if (dir->i_uid == fsuid) 584 if (uid_eq(dir->i_uid, fsuid))
585 return 0; 585 return 0;
586 return !capable(CAP_FOWNER); 586 return !capable(CAP_FOWNER);
587} 587}
@@ -1397,7 +1397,6 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1397 u64 *transid, bool readonly, 1397 u64 *transid, bool readonly,
1398 struct btrfs_qgroup_inherit **inherit) 1398 struct btrfs_qgroup_inherit **inherit)
1399{ 1399{
1400 struct file *src_file;
1401 int namelen; 1400 int namelen;
1402 int ret = 0; 1401 int ret = 0;
1403 1402
@@ -1421,25 +1420,24 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1421 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1420 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1422 NULL, transid, readonly, inherit); 1421 NULL, transid, readonly, inherit);
1423 } else { 1422 } else {
1423 struct fd src = fdget(fd);
1424 struct inode *src_inode; 1424 struct inode *src_inode;
1425 src_file = fget(fd); 1425 if (!src.file) {
1426 if (!src_file) {
1427 ret = -EINVAL; 1426 ret = -EINVAL;
1428 goto out_drop_write; 1427 goto out_drop_write;
1429 } 1428 }
1430 1429
1431 src_inode = src_file->f_path.dentry->d_inode; 1430 src_inode = src.file->f_path.dentry->d_inode;
1432 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) { 1431 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
1433 printk(KERN_INFO "btrfs: Snapshot src from " 1432 printk(KERN_INFO "btrfs: Snapshot src from "
1434 "another FS\n"); 1433 "another FS\n");
1435 ret = -EINVAL; 1434 ret = -EINVAL;
1436 fput(src_file); 1435 } else {
1437 goto out_drop_write; 1436 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1437 BTRFS_I(src_inode)->root,
1438 transid, readonly, inherit);
1438 } 1439 }
1439 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1440 fdput(src);
1440 BTRFS_I(src_inode)->root,
1441 transid, readonly, inherit);
1442 fput(src_file);
1443 } 1441 }
1444out_drop_write: 1442out_drop_write:
1445 mnt_drop_write_file(file); 1443 mnt_drop_write_file(file);
@@ -2341,7 +2339,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2341{ 2339{
2342 struct inode *inode = fdentry(file)->d_inode; 2340 struct inode *inode = fdentry(file)->d_inode;
2343 struct btrfs_root *root = BTRFS_I(inode)->root; 2341 struct btrfs_root *root = BTRFS_I(inode)->root;
2344 struct file *src_file; 2342 struct fd src_file;
2345 struct inode *src; 2343 struct inode *src;
2346 struct btrfs_trans_handle *trans; 2344 struct btrfs_trans_handle *trans;
2347 struct btrfs_path *path; 2345 struct btrfs_path *path;
@@ -2376,24 +2374,24 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2376 if (ret) 2374 if (ret)
2377 return ret; 2375 return ret;
2378 2376
2379 src_file = fget(srcfd); 2377 src_file = fdget(srcfd);
2380 if (!src_file) { 2378 if (!src_file.file) {
2381 ret = -EBADF; 2379 ret = -EBADF;
2382 goto out_drop_write; 2380 goto out_drop_write;
2383 } 2381 }
2384 2382
2385 ret = -EXDEV; 2383 ret = -EXDEV;
2386 if (src_file->f_path.mnt != file->f_path.mnt) 2384 if (src_file.file->f_path.mnt != file->f_path.mnt)
2387 goto out_fput; 2385 goto out_fput;
2388 2386
2389 src = src_file->f_dentry->d_inode; 2387 src = src_file.file->f_dentry->d_inode;
2390 2388
2391 ret = -EINVAL; 2389 ret = -EINVAL;
2392 if (src == inode) 2390 if (src == inode)
2393 goto out_fput; 2391 goto out_fput;
2394 2392
2395 /* the src must be open for reading */ 2393 /* the src must be open for reading */
2396 if (!(src_file->f_mode & FMODE_READ)) 2394 if (!(src_file.file->f_mode & FMODE_READ))
2397 goto out_fput; 2395 goto out_fput;
2398 2396
2399 /* don't make the dst file partly checksummed */ 2397 /* don't make the dst file partly checksummed */
@@ -2724,7 +2722,7 @@ out_unlock:
2724 vfree(buf); 2722 vfree(buf);
2725 btrfs_free_path(path); 2723 btrfs_free_path(path);
2726out_fput: 2724out_fput:
2727 fput(src_file); 2725 fdput(src_file);
2728out_drop_write: 2726out_drop_write:
2729 mnt_drop_write_file(file); 2727 mnt_drop_write_file(file);
2730 return ret; 2728 return ret;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index a44eff074805..2a1762c66041 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -67,7 +67,7 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
67{ 67{
68 if (eb->lock_nested) { 68 if (eb->lock_nested) {
69 read_lock(&eb->lock); 69 read_lock(&eb->lock);
70 if (&eb->lock_nested && current->pid == eb->lock_owner) { 70 if (eb->lock_nested && current->pid == eb->lock_owner) {
71 read_unlock(&eb->lock); 71 read_unlock(&eb->lock);
72 return; 72 return;
73 } 73 }
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index bc424ae5a81a..b65015581744 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1364,8 +1364,10 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1364 spin_lock(&fs_info->qgroup_lock); 1364 spin_lock(&fs_info->qgroup_lock);
1365 1365
1366 dstgroup = add_qgroup_rb(fs_info, objectid); 1366 dstgroup = add_qgroup_rb(fs_info, objectid);
1367 if (!dstgroup) 1367 if (IS_ERR(dstgroup)) {
1368 ret = PTR_ERR(dstgroup);
1368 goto unlock; 1369 goto unlock;
1370 }
1369 1371
1370 if (srcid) { 1372 if (srcid) {
1371 srcgroup = find_qgroup_rb(fs_info, srcid); 1373 srcgroup = find_qgroup_rb(fs_info, srcid);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 48a4882d8ad5..a955669519a2 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -68,7 +68,7 @@ struct reada_extent {
68 u32 blocksize; 68 u32 blocksize;
69 int err; 69 int err;
70 struct list_head extctl; 70 struct list_head extctl;
71 struct kref refcnt; 71 int refcnt;
72 spinlock_t lock; 72 spinlock_t lock;
73 struct reada_zone *zones[BTRFS_MAX_MIRRORS]; 73 struct reada_zone *zones[BTRFS_MAX_MIRRORS];
74 int nzones; 74 int nzones;
@@ -126,7 +126,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
126 spin_lock(&fs_info->reada_lock); 126 spin_lock(&fs_info->reada_lock);
127 re = radix_tree_lookup(&fs_info->reada_tree, index); 127 re = radix_tree_lookup(&fs_info->reada_tree, index);
128 if (re) 128 if (re)
129 kref_get(&re->refcnt); 129 re->refcnt++;
130 spin_unlock(&fs_info->reada_lock); 130 spin_unlock(&fs_info->reada_lock);
131 131
132 if (!re) 132 if (!re)
@@ -336,7 +336,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
336 spin_lock(&fs_info->reada_lock); 336 spin_lock(&fs_info->reada_lock);
337 re = radix_tree_lookup(&fs_info->reada_tree, index); 337 re = radix_tree_lookup(&fs_info->reada_tree, index);
338 if (re) 338 if (re)
339 kref_get(&re->refcnt); 339 re->refcnt++;
340 spin_unlock(&fs_info->reada_lock); 340 spin_unlock(&fs_info->reada_lock);
341 341
342 if (re) 342 if (re)
@@ -352,7 +352,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
352 re->top = *top; 352 re->top = *top;
353 INIT_LIST_HEAD(&re->extctl); 353 INIT_LIST_HEAD(&re->extctl);
354 spin_lock_init(&re->lock); 354 spin_lock_init(&re->lock);
355 kref_init(&re->refcnt); 355 re->refcnt = 1;
356 356
357 /* 357 /*
358 * map block 358 * map block
@@ -398,7 +398,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
398 if (ret == -EEXIST) { 398 if (ret == -EEXIST) {
399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index); 399 re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
400 BUG_ON(!re_exist); 400 BUG_ON(!re_exist);
401 kref_get(&re_exist->refcnt); 401 re_exist->refcnt++;
402 spin_unlock(&fs_info->reada_lock); 402 spin_unlock(&fs_info->reada_lock);
403 goto error; 403 goto error;
404 } 404 }
@@ -465,10 +465,6 @@ error:
465 return re_exist; 465 return re_exist;
466} 466}
467 467
468static void reada_kref_dummy(struct kref *kr)
469{
470}
471
472static void reada_extent_put(struct btrfs_fs_info *fs_info, 468static void reada_extent_put(struct btrfs_fs_info *fs_info,
473 struct reada_extent *re) 469 struct reada_extent *re)
474{ 470{
@@ -476,7 +472,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
476 unsigned long index = re->logical >> PAGE_CACHE_SHIFT; 472 unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
477 473
478 spin_lock(&fs_info->reada_lock); 474 spin_lock(&fs_info->reada_lock);
479 if (!kref_put(&re->refcnt, reada_kref_dummy)) { 475 if (--re->refcnt) {
480 spin_unlock(&fs_info->reada_lock); 476 spin_unlock(&fs_info->reada_lock);
481 return; 477 return;
482 } 478 }
@@ -671,7 +667,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
671 return 0; 667 return 0;
672 } 668 }
673 dev->reada_next = re->logical + re->blocksize; 669 dev->reada_next = re->logical + re->blocksize;
674 kref_get(&re->refcnt); 670 re->refcnt++;
675 671
676 spin_unlock(&fs_info->reada_lock); 672 spin_unlock(&fs_info->reada_lock);
677 673
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 6bb465cca20f..10d8e4d88071 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -544,8 +544,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
544 struct timespec ct = CURRENT_TIME; 544 struct timespec ct = CURRENT_TIME;
545 545
546 spin_lock(&root->root_times_lock); 546 spin_lock(&root->root_times_lock);
547 item->ctransid = trans->transid; 547 item->ctransid = cpu_to_le64(trans->transid);
548 item->ctime.sec = cpu_to_le64(ct.tv_sec); 548 item->ctime.sec = cpu_to_le64(ct.tv_sec);
549 item->ctime.nsec = cpu_to_le64(ct.tv_nsec); 549 item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
550 spin_unlock(&root->root_times_lock); 550 spin_unlock(&root->root_times_lock);
551} 551}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2eb24c477a3..83d6f9f9c220 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -838,7 +838,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
838 struct btrfs_trans_handle *trans; 838 struct btrfs_trans_handle *trans;
839 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 839 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
840 struct btrfs_root *root = fs_info->tree_root; 840 struct btrfs_root *root = fs_info->tree_root;
841 int ret;
842 841
843 trace_btrfs_sync_fs(wait); 842 trace_btrfs_sync_fs(wait);
844 843
@@ -849,11 +848,17 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
849 848
850 btrfs_wait_ordered_extents(root, 0, 0); 849 btrfs_wait_ordered_extents(root, 0, 0);
851 850
852 trans = btrfs_start_transaction(root, 0); 851 spin_lock(&fs_info->trans_lock);
852 if (!fs_info->running_transaction) {
853 spin_unlock(&fs_info->trans_lock);
854 return 0;
855 }
856 spin_unlock(&fs_info->trans_lock);
857
858 trans = btrfs_join_transaction(root);
853 if (IS_ERR(trans)) 859 if (IS_ERR(trans))
854 return PTR_ERR(trans); 860 return PTR_ERR(trans);
855 ret = btrfs_commit_transaction(trans, root); 861 return btrfs_commit_transaction(trans, root);
856 return ret;
857} 862}
858 863
859static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) 864static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
@@ -1530,6 +1535,8 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
1530 while (cur_devices) { 1535 while (cur_devices) {
1531 head = &cur_devices->devices; 1536 head = &cur_devices->devices;
1532 list_for_each_entry(dev, head, dev_list) { 1537 list_for_each_entry(dev, head, dev_list) {
1538 if (dev->missing)
1539 continue;
1533 if (!first_dev || dev->devid < first_dev->devid) 1540 if (!first_dev || dev->devid < first_dev->devid)
1534 first_dev = dev; 1541 first_dev = dev;
1535 } 1542 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 17be3dedacba..27c26004e050 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1031,6 +1031,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1031 1031
1032 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1032 btrfs_i_size_write(parent_inode, parent_inode->i_size +
1033 dentry->d_name.len * 2); 1033 dentry->d_name.len * 2);
1034 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1034 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1035 ret = btrfs_update_inode(trans, parent_root, parent_inode);
1035 if (ret) 1036 if (ret)
1036 goto abort_trans_dput; 1037 goto abort_trans_dput;
@@ -1066,7 +1067,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1066 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1067 memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1067 BTRFS_UUID_SIZE); 1068 BTRFS_UUID_SIZE);
1068 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1069 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
1069 new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); 1070 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
1070 btrfs_set_root_otransid(new_root_item, trans->transid); 1071 btrfs_set_root_otransid(new_root_item, trans->transid);
1071 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); 1072 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1072 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); 1073 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e86ae04abe6a..88b969aeeb71 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -227,9 +227,8 @@ loop_lock:
227 cur = pending; 227 cur = pending;
228 pending = pending->bi_next; 228 pending = pending->bi_next;
229 cur->bi_next = NULL; 229 cur->bi_next = NULL;
230 atomic_dec(&fs_info->nr_async_bios);
231 230
232 if (atomic_read(&fs_info->nr_async_bios) < limit && 231 if (atomic_dec_return(&fs_info->nr_async_bios) < limit &&
233 waitqueue_active(&fs_info->async_submit_wait)) 232 waitqueue_active(&fs_info->async_submit_wait))
234 wake_up(&fs_info->async_submit_wait); 233 wake_up(&fs_info->async_submit_wait);
235 234
@@ -569,9 +568,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
569 memcpy(new_device, device, sizeof(*new_device)); 568 memcpy(new_device, device, sizeof(*new_device));
570 569
571 /* Safe because we are under uuid_mutex */ 570 /* Safe because we are under uuid_mutex */
572 name = rcu_string_strdup(device->name->str, GFP_NOFS); 571 if (device->name) {
573 BUG_ON(device->name && !name); /* -ENOMEM */ 572 name = rcu_string_strdup(device->name->str, GFP_NOFS);
574 rcu_assign_pointer(new_device->name, name); 573 BUG_ON(device->name && !name); /* -ENOMEM */
574 rcu_assign_pointer(new_device->name, name);
575 }
575 new_device->bdev = NULL; 576 new_device->bdev = NULL;
576 new_device->writeable = 0; 577 new_device->writeable = 0;
577 new_device->in_fs_metadata = 0; 578 new_device->in_fs_metadata = 0;
@@ -4605,28 +4606,6 @@ int btrfs_read_sys_array(struct btrfs_root *root)
4605 return ret; 4606 return ret;
4606} 4607}
4607 4608
4608struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
4609 u64 logical, int mirror_num)
4610{
4611 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
4612 int ret;
4613 u64 map_length = 0;
4614 struct btrfs_bio *bbio = NULL;
4615 struct btrfs_device *device;
4616
4617 BUG_ON(mirror_num == 0);
4618 ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
4619 mirror_num);
4620 if (ret) {
4621 BUG_ON(bbio != NULL);
4622 return NULL;
4623 }
4624 BUG_ON(mirror_num != bbio->mirror_num);
4625 device = bbio->stripes[mirror_num - 1].dev;
4626 kfree(bbio);
4627 return device;
4628}
4629
4630int btrfs_read_chunk_tree(struct btrfs_root *root) 4609int btrfs_read_chunk_tree(struct btrfs_root *root)
4631{ 4610{
4632 struct btrfs_path *path; 4611 struct btrfs_path *path;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5479325987b3..53c06af92e8d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -289,8 +289,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
289int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 289int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
290int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 290int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
291 u64 *start, u64 *max_avail); 291 u64 *start, u64 *max_avail);
292struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
293 u64 logical, int mirror_num);
294void btrfs_dev_stat_print_on_error(struct btrfs_device *device); 292void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
295void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 293void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
296int btrfs_get_dev_stats(struct btrfs_root *root, 294int btrfs_get_dev_stats(struct btrfs_root *root,
diff --git a/fs/buffer.c b/fs/buffer.c
index 9f6d2e41281d..b5f044283edb 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
914/* 914/*
915 * Initialise the state of a blockdev page's buffers. 915 * Initialise the state of a blockdev page's buffers.
916 */ 916 */
917static void 917static sector_t
918init_page_buffers(struct page *page, struct block_device *bdev, 918init_page_buffers(struct page *page, struct block_device *bdev,
919 sector_t block, int size) 919 sector_t block, int size)
920{ 920{
@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
936 block++; 936 block++;
937 bh = bh->b_this_page; 937 bh = bh->b_this_page;
938 } while (bh != head); 938 } while (bh != head);
939
940 /*
941 * Caller needs to validate requested block against end of device.
942 */
943 return end_block;
939} 944}
940 945
941/* 946/*
942 * Create the page-cache page that contains the requested block. 947 * Create the page-cache page that contains the requested block.
943 * 948 *
944 * This is user purely for blockdev mappings. 949 * This is used purely for blockdev mappings.
945 */ 950 */
946static struct page * 951static int
947grow_dev_page(struct block_device *bdev, sector_t block, 952grow_dev_page(struct block_device *bdev, sector_t block,
948 pgoff_t index, int size) 953 pgoff_t index, int size, int sizebits)
949{ 954{
950 struct inode *inode = bdev->bd_inode; 955 struct inode *inode = bdev->bd_inode;
951 struct page *page; 956 struct page *page;
952 struct buffer_head *bh; 957 struct buffer_head *bh;
958 sector_t end_block;
959 int ret = 0; /* Will call free_more_memory() */
953 960
954 page = find_or_create_page(inode->i_mapping, index, 961 page = find_or_create_page(inode->i_mapping, index,
955 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); 962 (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
956 if (!page) 963 if (!page)
957 return NULL; 964 return ret;
958 965
959 BUG_ON(!PageLocked(page)); 966 BUG_ON(!PageLocked(page));
960 967
961 if (page_has_buffers(page)) { 968 if (page_has_buffers(page)) {
962 bh = page_buffers(page); 969 bh = page_buffers(page);
963 if (bh->b_size == size) { 970 if (bh->b_size == size) {
964 init_page_buffers(page, bdev, block, size); 971 end_block = init_page_buffers(page, bdev,
965 return page; 972 index << sizebits, size);
973 goto done;
966 } 974 }
967 if (!try_to_free_buffers(page)) 975 if (!try_to_free_buffers(page))
968 goto failed; 976 goto failed;
@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
982 */ 990 */
983 spin_lock(&inode->i_mapping->private_lock); 991 spin_lock(&inode->i_mapping->private_lock);
984 link_dev_buffers(page, bh); 992 link_dev_buffers(page, bh);
985 init_page_buffers(page, bdev, block, size); 993 end_block = init_page_buffers(page, bdev, index << sizebits, size);
986 spin_unlock(&inode->i_mapping->private_lock); 994 spin_unlock(&inode->i_mapping->private_lock);
987 return page; 995done:
988 996 ret = (block < end_block) ? 1 : -ENXIO;
989failed: 997failed:
990 unlock_page(page); 998 unlock_page(page);
991 page_cache_release(page); 999 page_cache_release(page);
992 return NULL; 1000 return ret;
993} 1001}
994 1002
995/* 1003/*
@@ -999,7 +1007,6 @@ failed:
999static int 1007static int
1000grow_buffers(struct block_device *bdev, sector_t block, int size) 1008grow_buffers(struct block_device *bdev, sector_t block, int size)
1001{ 1009{
1002 struct page *page;
1003 pgoff_t index; 1010 pgoff_t index;
1004 int sizebits; 1011 int sizebits;
1005 1012
@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1023 bdevname(bdev, b)); 1030 bdevname(bdev, b));
1024 return -EIO; 1031 return -EIO;
1025 } 1032 }
1026 block = index << sizebits; 1033
1027 /* Create a page with the proper size buffers.. */ 1034 /* Create a page with the proper size buffers.. */
1028 page = grow_dev_page(bdev, block, index, size); 1035 return grow_dev_page(bdev, block, index, size, sizebits);
1029 if (!page)
1030 return 0;
1031 unlock_page(page);
1032 page_cache_release(page);
1033 return 1;
1034} 1036}
1035 1037
1036static struct buffer_head * 1038static struct buffer_head *
1037__getblk_slow(struct block_device *bdev, sector_t block, int size) 1039__getblk_slow(struct block_device *bdev, sector_t block, int size)
1038{ 1040{
1039 int ret;
1040 struct buffer_head *bh;
1041
1042 /* Size must be multiple of hard sectorsize */ 1041 /* Size must be multiple of hard sectorsize */
1043 if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 1042 if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
1044 (size < 512 || size > PAGE_SIZE))) { 1043 (size < 512 || size > PAGE_SIZE))) {
@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
1051 return NULL; 1050 return NULL;
1052 } 1051 }
1053 1052
1054retry: 1053 for (;;) {
1055 bh = __find_get_block(bdev, block, size); 1054 struct buffer_head *bh;
1056 if (bh) 1055 int ret;
1057 return bh;
1058 1056
1059 ret = grow_buffers(bdev, block, size);
1060 if (ret == 0) {
1061 free_more_memory();
1062 goto retry;
1063 } else if (ret > 0) {
1064 bh = __find_get_block(bdev, block, size); 1057 bh = __find_get_block(bdev, block, size);
1065 if (bh) 1058 if (bh)
1066 return bh; 1059 return bh;
1060
1061 ret = grow_buffers(bdev, block, size);
1062 if (ret < 0)
1063 return NULL;
1064 if (ret == 0)
1065 free_more_memory();
1067 } 1066 }
1068 return NULL;
1069} 1067}
1070 1068
1071/* 1069/*
@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block);
1321 * which corresponds to the passed block_device, block and size. The 1319 * which corresponds to the passed block_device, block and size. The
1322 * returned buffer has its reference count incremented. 1320 * returned buffer has its reference count incremented.
1323 * 1321 *
1324 * __getblk() cannot fail - it just keeps trying. If you pass it an
1325 * illegal block number, __getblk() will happily return a buffer_head
1326 * which represents the non-existent block. Very weird.
1327 *
1328 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() 1322 * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
1329 * attempt is failing. FIXME, perhaps? 1323 * attempt is failing. FIXME, perhaps?
1330 */ 1324 */
@@ -2318,12 +2312,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2318 loff_t size; 2312 loff_t size;
2319 int ret; 2313 int ret;
2320 2314
2321 /*
2322 * Update file times before taking page lock. We may end up failing the
2323 * fault so this update may be superfluous but who really cares...
2324 */
2325 file_update_time(vma->vm_file);
2326
2327 lock_page(page); 2315 lock_page(page);
2328 size = i_size_read(inode); 2316 size = i_size_read(inode);
2329 if ((page->mapping != inode->i_mapping) || 2317 if ((page->mapping != inode->i_mapping) ||
@@ -2361,6 +2349,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2361 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; 2349 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2362 2350
2363 sb_start_pagefault(sb); 2351 sb_start_pagefault(sb);
2352
2353 /*
2354 * Update file times before taking page lock. We may end up failing the
2355 * fault so this update may be superfluous but who really cares...
2356 */
2357 file_update_time(vma->vm_file);
2358
2364 ret = __block_page_mkwrite(vma, vmf, get_block); 2359 ret = __block_page_mkwrite(vma, vmf, get_block);
2365 sb_end_pagefault(sb); 2360 sb_end_pagefault(sb);
2366 return block_page_mkwrite_return(ret); 2361 return block_page_mkwrite_return(ret);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 452e71a1b753..6690269f5dde 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -205,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
205 dout("readpage inode %p file %p page %p index %lu\n", 205 dout("readpage inode %p file %p page %p index %lu\n",
206 inode, filp, page, page->index); 206 inode, filp, page, page->index);
207 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 207 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
208 page->index << PAGE_CACHE_SHIFT, &len, 208 (u64) page_offset(page), &len,
209 ci->i_truncate_seq, ci->i_truncate_size, 209 ci->i_truncate_seq, ci->i_truncate_size,
210 &page, 1, 0); 210 &page, 1, 0);
211 if (err == -ENOENT) 211 if (err == -ENOENT)
@@ -286,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
286 int nr_pages = 0; 286 int nr_pages = 0;
287 int ret; 287 int ret;
288 288
289 off = page->index << PAGE_CACHE_SHIFT; 289 off = (u64) page_offset(page);
290 290
291 /* count pages */ 291 /* count pages */
292 next_index = page->index; 292 next_index = page->index;
@@ -308,8 +308,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
308 NULL, 0, 308 NULL, 0,
309 ci->i_truncate_seq, ci->i_truncate_size, 309 ci->i_truncate_seq, ci->i_truncate_size,
310 NULL, false, 1, 0); 310 NULL, false, 1, 0);
311 if (!req) 311 if (IS_ERR(req))
312 return -ENOMEM; 312 return PTR_ERR(req);
313 313
314 /* build page vector */ 314 /* build page vector */
315 nr_pages = len >> PAGE_CACHE_SHIFT; 315 nr_pages = len >> PAGE_CACHE_SHIFT;
@@ -426,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
426 struct ceph_inode_info *ci; 426 struct ceph_inode_info *ci;
427 struct ceph_fs_client *fsc; 427 struct ceph_fs_client *fsc;
428 struct ceph_osd_client *osdc; 428 struct ceph_osd_client *osdc;
429 loff_t page_off = page->index << PAGE_CACHE_SHIFT; 429 loff_t page_off = page_offset(page);
430 int len = PAGE_CACHE_SIZE; 430 int len = PAGE_CACHE_SIZE;
431 loff_t i_size; 431 loff_t i_size;
432 int err = 0; 432 int err = 0;
@@ -817,8 +817,7 @@ get_more_pages:
817 /* ok */ 817 /* ok */
818 if (locked_pages == 0) { 818 if (locked_pages == 0) {
819 /* prepare async write request */ 819 /* prepare async write request */
820 offset = (unsigned long long)page->index 820 offset = (u64) page_offset(page);
821 << PAGE_CACHE_SHIFT;
822 len = wsize; 821 len = wsize;
823 req = ceph_osdc_new_request(&fsc->client->osdc, 822 req = ceph_osdc_new_request(&fsc->client->osdc,
824 &ci->i_layout, 823 &ci->i_layout,
@@ -832,8 +831,8 @@ get_more_pages:
832 ci->i_truncate_size, 831 ci->i_truncate_size,
833 &inode->i_mtime, true, 1, 0); 832 &inode->i_mtime, true, 1, 0);
834 833
835 if (!req) { 834 if (IS_ERR(req)) {
836 rc = -ENOMEM; 835 rc = PTR_ERR(req);
837 unlock_page(page); 836 unlock_page(page);
838 break; 837 break;
839 } 838 }
@@ -1180,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1180 struct inode *inode = vma->vm_file->f_dentry->d_inode; 1179 struct inode *inode = vma->vm_file->f_dentry->d_inode;
1181 struct page *page = vmf->page; 1180 struct page *page = vmf->page;
1182 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 1181 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
1183 loff_t off = page->index << PAGE_CACHE_SHIFT; 1182 loff_t off = page_offset(page);
1184 loff_t size, len; 1183 loff_t size, len;
1185 int ret; 1184 int ret;
1186 1185
@@ -1225,6 +1224,7 @@ out:
1225static struct vm_operations_struct ceph_vmops = { 1224static struct vm_operations_struct ceph_vmops = {
1226 .fault = filemap_fault, 1225 .fault = filemap_fault,
1227 .page_mkwrite = ceph_page_mkwrite, 1226 .page_mkwrite = ceph_page_mkwrite,
1227 .remap_pages = generic_file_remap_pages,
1228}; 1228};
1229 1229
1230int ceph_mmap(struct file *file, struct vm_area_struct *vma) 1230int ceph_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1235,6 +1235,5 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma)
1235 return -ENOEXEC; 1235 return -ENOEXEC;
1236 file_accessed(file); 1236 file_accessed(file);
1237 vma->vm_ops = &ceph_vmops; 1237 vma->vm_ops = &ceph_vmops;
1238 vma->vm_flags |= VM_CAN_NONLINEAR;
1239 return 0; 1238 return 0;
1240} 1239}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 620daad201db..3251e9cc6401 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1005,7 +1005,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
1005 1005
1006 BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); 1006 BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
1007 head = msg->front.iov_base; 1007 head = msg->front.iov_base;
1008 head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); 1008 le32_add_cpu(&head->num, 1);
1009 item = msg->front.iov_base + msg->front.iov_len; 1009 item = msg->front.iov_base + msg->front.iov_len;
1010 item->ino = cpu_to_le64(ino); 1010 item->ino = cpu_to_le64(ino);
1011 item->cap_id = cpu_to_le64(cap_id); 1011 item->cap_id = cpu_to_le64(cap_id);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb962efdacee..6d59006bfa27 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
201 int err = -ENOMEM; 201 int err = -ENOMEM;
202 202
203 dout("ceph_fs_debugfs_init\n"); 203 dout("ceph_fs_debugfs_init\n");
204 BUG_ON(!fsc->client->debugfs_dir);
204 fsc->debugfs_congestion_kb = 205 fsc->debugfs_congestion_kb =
205 debugfs_create_file("writeback_congestion_kb", 206 debugfs_create_file("writeback_congestion_kb",
206 0600, 207 0600,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ecebbc09bfc7..5840d2aaed15 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -536,8 +536,8 @@ more:
536 do_sync, 536 do_sync,
537 ci->i_truncate_seq, ci->i_truncate_size, 537 ci->i_truncate_seq, ci->i_truncate_size,
538 &mtime, false, 2, page_align); 538 &mtime, false, 2, page_align);
539 if (!req) 539 if (IS_ERR(req))
540 return -ENOMEM; 540 return PTR_ERR(req);
541 541
542 if (file->f_flags & O_DIRECT) { 542 if (file->f_flags & O_DIRECT) {
543 pages = ceph_get_direct_page_vector(data, num_pages, false); 543 pages = ceph_get_direct_page_vector(data, num_pages, false);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9fff9f3b17e4..ba95eea201bf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
992 if (rinfo->head->is_dentry) { 992 if (rinfo->head->is_dentry) {
993 struct inode *dir = req->r_locked_dir; 993 struct inode *dir = req->r_locked_dir;
994 994
995 err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, 995 if (dir) {
996 session, req->r_request_started, -1, 996 err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
997 &req->r_caps_reservation); 997 session, req->r_request_started, -1,
998 if (err < 0) 998 &req->r_caps_reservation);
999 return err; 999 if (err < 0)
1000 return err;
1001 } else {
1002 WARN_ON_ONCE(1);
1003 }
1000 } 1004 }
1001 1005
1002 /* 1006 /*
@@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1004 * will have trouble splicing in the virtual snapdir later 1008 * will have trouble splicing in the virtual snapdir later
1005 */ 1009 */
1006 if (rinfo->head->is_dentry && !req->r_aborted && 1010 if (rinfo->head->is_dentry && !req->r_aborted &&
1011 req->r_locked_dir &&
1007 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, 1012 (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
1008 fsc->mount_options->snapdir_name, 1013 fsc->mount_options->snapdir_name,
1009 req->r_dentry->d_name.len))) { 1014 req->r_dentry->d_name.len))) {
@@ -1099,7 +1104,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1099 pr_err("fill_trace bad get_inode " 1104 pr_err("fill_trace bad get_inode "
1100 "%llx.%llx\n", vino.ino, vino.snap); 1105 "%llx.%llx\n", vino.ino, vino.snap);
1101 err = PTR_ERR(in); 1106 err = PTR_ERR(in);
1102 d_delete(dn); 1107 d_drop(dn);
1103 goto done; 1108 goto done;
1104 } 1109 }
1105 dn = splice_dentry(dn, in, &have_lease, true); 1110 dn = splice_dentry(dn, in, &have_lease, true);
@@ -1272,7 +1277,7 @@ retry_lookup:
1272 in = ceph_get_inode(parent->d_sb, vino); 1277 in = ceph_get_inode(parent->d_sb, vino);
1273 if (IS_ERR(in)) { 1278 if (IS_ERR(in)) {
1274 dout("new_inode badness\n"); 1279 dout("new_inode badness\n");
1275 d_delete(dn); 1280 d_drop(dn);
1276 dput(dn); 1281 dput(dn);
1277 err = PTR_ERR(in); 1282 err = PTR_ERR(in);
1278 goto out; 1283 goto out;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8e3fb69fbe62..36549a46e311 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -42,7 +42,8 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
42 /* validate striping parameters */ 42 /* validate striping parameters */
43 if ((l->object_size & ~PAGE_MASK) || 43 if ((l->object_size & ~PAGE_MASK) ||
44 (l->stripe_unit & ~PAGE_MASK) || 44 (l->stripe_unit & ~PAGE_MASK) ||
45 ((unsigned)l->object_size % (unsigned)l->stripe_unit)) 45 (l->stripe_unit != 0 &&
46 ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
46 return -EINVAL; 47 return -EINVAL;
47 48
48 /* make sure it's a valid data pool */ 49 /* make sure it's a valid data pool */
@@ -186,14 +187,18 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
186 u64 tmp; 187 u64 tmp;
187 struct ceph_object_layout ol; 188 struct ceph_object_layout ol;
188 struct ceph_pg pgid; 189 struct ceph_pg pgid;
190 int r;
189 191
190 /* copy and validate */ 192 /* copy and validate */
191 if (copy_from_user(&dl, arg, sizeof(dl))) 193 if (copy_from_user(&dl, arg, sizeof(dl)))
192 return -EFAULT; 194 return -EFAULT;
193 195
194 down_read(&osdc->map_sem); 196 down_read(&osdc->map_sem);
195 ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, 197 r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
196 &dl.object_no, &dl.object_offset, &olen); 198 &dl.object_no, &dl.object_offset,
199 &olen);
200 if (r < 0)
201 return -EIO;
197 dl.file_offset -= dl.object_offset; 202 dl.file_offset -= dl.object_offset;
198 dl.object_size = ceph_file_layout_object_size(ci->i_layout); 203 dl.object_size = ceph_file_layout_object_size(ci->i_layout);
199 dl.block_size = ceph_file_layout_su(ci->i_layout); 204 dl.block_size = ceph_file_layout_su(ci->i_layout);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a5a735422aa7..1bcf712655d9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2625,7 +2625,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
2625 ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", 2625 ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
2626 session_state_name(s->s_state)); 2626 session_state_name(s->s_state));
2627 2627
2628 if (memcmp(ceph_mdsmap_get_addr(oldmap, i), 2628 if (i >= newmap->m_max_mds ||
2629 memcmp(ceph_mdsmap_get_addr(oldmap, i),
2629 ceph_mdsmap_get_addr(newmap, i), 2630 ceph_mdsmap_get_addr(newmap, i),
2630 sizeof(struct ceph_entity_addr))) { 2631 sizeof(struct ceph_entity_addr))) {
2631 if (s->s_state == CEPH_MDS_SESSION_OPENING) { 2632 if (s->s_state == CEPH_MDS_SESSION_OPENING) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index b982239f38f9..2eb43f211325 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -307,7 +307,10 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
307{ 307{
308 struct ceph_mount_options *fsopt; 308 struct ceph_mount_options *fsopt;
309 const char *dev_name_end; 309 const char *dev_name_end;
310 int err = -ENOMEM; 310 int err;
311
312 if (!dev_name || !*dev_name)
313 return -EINVAL;
311 314
312 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); 315 fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
313 if (!fsopt) 316 if (!fsopt)
@@ -328,21 +331,33 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
328 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; 331 fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
329 fsopt->congestion_kb = default_congestion_kb(); 332 fsopt->congestion_kb = default_congestion_kb();
330 333
331 /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ 334 /*
335 * Distinguish the server list from the path in "dev_name".
336 * Internally we do not include the leading '/' in the path.
337 *
338 * "dev_name" will look like:
339 * <server_spec>[,<server_spec>...]:[<path>]
340 * where
341 * <server_spec> is <ip>[:<port>]
342 * <path> is optional, but if present must begin with '/'
343 */
344 dev_name_end = strchr(dev_name, '/');
345 if (dev_name_end) {
346 /* skip over leading '/' for path */
347 *path = dev_name_end + 1;
348 } else {
349 /* path is empty */
350 dev_name_end = dev_name + strlen(dev_name);
351 *path = dev_name_end;
352 }
332 err = -EINVAL; 353 err = -EINVAL;
333 if (!dev_name) 354 dev_name_end--; /* back up to ':' separator */
334 goto out; 355 if (*dev_name_end != ':') {
335 *path = strstr(dev_name, ":/"); 356 pr_err("device name is missing path (no : separator in %s)\n",
336 if (*path == NULL) {
337 pr_err("device name is missing path (no :/ in %s)\n",
338 dev_name); 357 dev_name);
339 goto out; 358 goto out;
340 } 359 }
341 dev_name_end = *path;
342 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); 360 dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
343
344 /* path on server */
345 *path += 2;
346 dout("server path '%s'\n", *path); 361 dout("server path '%s'\n", *path);
347 362
348 *popt = ceph_parse_options(options, dev_name, dev_name_end, 363 *popt = ceph_parse_options(options, dev_name, dev_name_end,
@@ -603,6 +618,11 @@ bad_cap:
603 618
604static void destroy_caches(void) 619static void destroy_caches(void)
605{ 620{
621 /*
622 * Make sure all delayed rcu free inodes are flushed before we
623 * destroy cache.
624 */
625 rcu_barrier();
606 kmem_cache_destroy(ceph_inode_cachep); 626 kmem_cache_destroy(ceph_inode_cachep);
607 kmem_cache_destroy(ceph_cap_cachep); 627 kmem_cache_destroy(ceph_cap_cachep);
608 kmem_cache_destroy(ceph_dentry_cachep); 628 kmem_cache_destroy(ceph_dentry_cachep);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index a08306a8bec9..2075ddfffa73 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -9,13 +9,14 @@ config CIFS
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_ECB 10 select CRYPTO_ECB
11 select CRYPTO_DES 11 select CRYPTO_DES
12 select CRYPTO_SHA256
12 help 13 help
13 This is the client VFS module for the Common Internet File System 14 This is the client VFS module for the Common Internet File System
14 (CIFS) protocol which is the successor to the Server Message Block 15 (CIFS) protocol which is the successor to the Server Message Block
15 (SMB) protocol, the native file sharing mechanism for most early 16 (SMB) protocol, the native file sharing mechanism for most early
16 PC operating systems. The CIFS protocol is fully supported by 17 PC operating systems. The CIFS protocol is fully supported by
17 file servers such as Windows 2000 (including Windows 2003, NT 4 18 file servers such as Windows 2000 (including Windows 2003, Windows 2008,
18 and Windows XP) as well by Samba (which provides excellent CIFS 19 NT 4 and Windows XP) as well by Samba (which provides excellent CIFS
19 server support for Linux and many other operating systems). Limited 20 server support for Linux and many other operating systems). Limited
20 support for OS/2 and Windows ME and similar servers is provided as 21 support for OS/2 and Windows ME and similar servers is provided as
21 well. 22 well.
@@ -114,6 +115,13 @@ config CIFS_POSIX
114 (such as Samba 3.10 and later) which can negotiate 115 (such as Samba 3.10 and later) which can negotiate
115 CIFS POSIX ACL support. If unsure, say N. 116 CIFS POSIX ACL support. If unsure, say N.
116 117
118config CIFS_ACL
119 bool "Provide CIFS ACL support"
120 depends on CIFS_XATTR && KEYS
121 help
122 Allows fetching CIFS/NTFS ACL from the server. The DACL blob
123 is handed over to the application/caller.
124
117config CIFS_DEBUG2 125config CIFS_DEBUG2
118 bool "Enable additional CIFS debugging routines" 126 bool "Enable additional CIFS debugging routines"
119 depends on CIFS 127 depends on CIFS
@@ -138,21 +146,6 @@ config CIFS_DFS_UPCALL
138 IP addresses) which is needed for implicit mounts of DFS junction 146 IP addresses) which is needed for implicit mounts of DFS junction
139 points. If unsure, say N. 147 points. If unsure, say N.
140 148
141config CIFS_FSCACHE
142 bool "Provide CIFS client caching support"
143 depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
144 help
145 Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
146 to be cached locally on disk through the general filesystem cache
147 manager. If unsure, say N.
148
149config CIFS_ACL
150 bool "Provide CIFS ACL support"
151 depends on CIFS_XATTR && KEYS
152 help
153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
154 is handed over to the application/caller.
155
156config CIFS_NFSD_EXPORT 149config CIFS_NFSD_EXPORT
157 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)" 150 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
158 depends on CIFS && EXPERIMENTAL && BROKEN 151 depends on CIFS && EXPERIMENTAL && BROKEN
@@ -161,7 +154,7 @@ config CIFS_NFSD_EXPORT
161 154
162config CIFS_SMB2 155config CIFS_SMB2
163 bool "SMB2 network file system support (EXPERIMENTAL)" 156 bool "SMB2 network file system support (EXPERIMENTAL)"
164 depends on EXPERIMENTAL && INET && BROKEN 157 depends on CIFS && EXPERIMENTAL && INET
165 select NLS 158 select NLS
166 select KEYS 159 select KEYS
167 select FSCACHE 160 select FSCACHE
@@ -178,3 +171,12 @@ config CIFS_SMB2
178 (compared to cifs) due to protocol improvements. 171 (compared to cifs) due to protocol improvements.
179 172
180 Unless you are a developer or tester, say N. 173 Unless you are a developer or tester, say N.
174
175config CIFS_FSCACHE
176 bool "Provide CIFS client caching support"
177 depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
178 help
179 Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
180 to be cached locally on disk through the general filesystem cache
181 manager. If unsure, say N.
182
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index feee94309271..aa0d68b086eb 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -17,4 +17,4 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o
17cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o 17cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
18 18
19cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \ 19cifs-$(CONFIG_CIFS_SMB2) += smb2ops.o smb2maperror.o smb2transport.o \
20 smb2misc.o smb2pdu.o smb2inode.o 20 smb2misc.o smb2pdu.o smb2inode.o smb2file.o
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 7dab9c04ad52..53cf2aabce87 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -328,7 +328,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
328 } 328 }
329 329
330ctoUTF16_out: 330ctoUTF16_out:
331 return i; 331 return j;
332} 332}
333 333
334#ifdef CONFIG_CIFS_SMB2 334#ifdef CONFIG_CIFS_SMB2
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 05f4dc263a23..2ee5c54797fa 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1222,7 +1222,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
1222 if (!open_file) 1222 if (!open_file)
1223 return get_cifs_acl_by_path(cifs_sb, path, pacllen); 1223 return get_cifs_acl_by_path(cifs_sb, path, pacllen);
1224 1224
1225 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); 1225 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->fid.netfid, pacllen);
1226 cifsFileInfo_put(open_file); 1226 cifsFileInfo_put(open_file);
1227 return pntsd; 1227 return pntsd;
1228} 1228}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 6a0d741159f0..652f5051be09 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -29,6 +29,7 @@
29#include "ntlmssp.h" 29#include "ntlmssp.h"
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/random.h> 31#include <linux/random.h>
32#include <linux/highmem.h>
32 33
33/* 34/*
34 * Calculate and return the CIFS signature based on the mac key and SMB PDU. 35 * Calculate and return the CIFS signature based on the mac key and SMB PDU.
@@ -37,11 +38,13 @@
37 * the sequence number before this function is called. Also, this function 38 * the sequence number before this function is called. Also, this function
38 * should be called with the server->srv_mutex held. 39 * should be called with the server->srv_mutex held.
39 */ 40 */
40static int cifs_calc_signature(const struct kvec *iov, int n_vec, 41static int cifs_calc_signature(struct smb_rqst *rqst,
41 struct TCP_Server_Info *server, char *signature) 42 struct TCP_Server_Info *server, char *signature)
42{ 43{
43 int i; 44 int i;
44 int rc; 45 int rc;
46 struct kvec *iov = rqst->rq_iov;
47 int n_vec = rqst->rq_nvec;
45 48
46 if (iov == NULL || signature == NULL || server == NULL) 49 if (iov == NULL || signature == NULL || server == NULL)
47 return -EINVAL; 50 return -EINVAL;
@@ -91,6 +94,16 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec,
91 } 94 }
92 } 95 }
93 96
97 /* now hash over the rq_pages array */
98 for (i = 0; i < rqst->rq_npages; i++) {
99 struct kvec p_iov;
100
101 cifs_rqst_page_to_kvec(rqst, i, &p_iov);
102 crypto_shash_update(&server->secmech.sdescmd5->shash,
103 p_iov.iov_base, p_iov.iov_len);
104 kunmap(rqst->rq_pages[i]);
105 }
106
94 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); 107 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
95 if (rc) 108 if (rc)
96 cERROR(1, "%s: Could not generate md5 hash", __func__); 109 cERROR(1, "%s: Could not generate md5 hash", __func__);
@@ -99,12 +112,12 @@ static int cifs_calc_signature(const struct kvec *iov, int n_vec,
99} 112}
100 113
101/* must be called with server->srv_mutex held */ 114/* must be called with server->srv_mutex held */
102int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, 115int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
103 __u32 *pexpected_response_sequence_number) 116 __u32 *pexpected_response_sequence_number)
104{ 117{
105 int rc = 0; 118 int rc = 0;
106 char smb_signature[20]; 119 char smb_signature[20];
107 struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; 120 struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
108 121
109 if ((cifs_pdu == NULL) || (server == NULL)) 122 if ((cifs_pdu == NULL) || (server == NULL))
110 return -EINVAL; 123 return -EINVAL;
@@ -125,7 +138,7 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
125 *pexpected_response_sequence_number = server->sequence_number++; 138 *pexpected_response_sequence_number = server->sequence_number++;
126 server->sequence_number++; 139 server->sequence_number++;
127 140
128 rc = cifs_calc_signature(iov, n_vec, server, smb_signature); 141 rc = cifs_calc_signature(rqst, server, smb_signature);
129 if (rc) 142 if (rc)
130 memset(cifs_pdu->Signature.SecuritySignature, 0, 8); 143 memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
131 else 144 else
@@ -134,6 +147,15 @@ int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
134 return rc; 147 return rc;
135} 148}
136 149
150int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
151 __u32 *pexpected_response_sequence)
152{
153 struct smb_rqst rqst = { .rq_iov = iov,
154 .rq_nvec = n_vec };
155
156 return cifs_sign_rqst(&rqst, server, pexpected_response_sequence);
157}
158
137/* must be called with server->srv_mutex held */ 159/* must be called with server->srv_mutex held */
138int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, 160int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
139 __u32 *pexpected_response_sequence_number) 161 __u32 *pexpected_response_sequence_number)
@@ -147,14 +169,14 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
147 pexpected_response_sequence_number); 169 pexpected_response_sequence_number);
148} 170}
149 171
150int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, 172int cifs_verify_signature(struct smb_rqst *rqst,
151 struct TCP_Server_Info *server, 173 struct TCP_Server_Info *server,
152 __u32 expected_sequence_number) 174 __u32 expected_sequence_number)
153{ 175{
154 unsigned int rc; 176 unsigned int rc;
155 char server_response_sig[8]; 177 char server_response_sig[8];
156 char what_we_think_sig_should_be[20]; 178 char what_we_think_sig_should_be[20];
157 struct smb_hdr *cifs_pdu = (struct smb_hdr *)iov[0].iov_base; 179 struct smb_hdr *cifs_pdu = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
158 180
159 if (cifs_pdu == NULL || server == NULL) 181 if (cifs_pdu == NULL || server == NULL)
160 return -EINVAL; 182 return -EINVAL;
@@ -186,8 +208,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
186 cifs_pdu->Signature.Sequence.Reserved = 0; 208 cifs_pdu->Signature.Sequence.Reserved = 0;
187 209
188 mutex_lock(&server->srv_mutex); 210 mutex_lock(&server->srv_mutex);
189 rc = cifs_calc_signature(iov, nr_iov, server, 211 rc = cifs_calc_signature(rqst, server, what_we_think_sig_should_be);
190 what_we_think_sig_should_be);
191 mutex_unlock(&server->srv_mutex); 212 mutex_unlock(&server->srv_mutex);
192 213
193 if (rc) 214 if (rc)
@@ -686,12 +707,17 @@ calc_seckey(struct cifs_ses *ses)
686void 707void
687cifs_crypto_shash_release(struct TCP_Server_Info *server) 708cifs_crypto_shash_release(struct TCP_Server_Info *server)
688{ 709{
710 if (server->secmech.hmacsha256)
711 crypto_free_shash(server->secmech.hmacsha256);
712
689 if (server->secmech.md5) 713 if (server->secmech.md5)
690 crypto_free_shash(server->secmech.md5); 714 crypto_free_shash(server->secmech.md5);
691 715
692 if (server->secmech.hmacmd5) 716 if (server->secmech.hmacmd5)
693 crypto_free_shash(server->secmech.hmacmd5); 717 crypto_free_shash(server->secmech.hmacmd5);
694 718
719 kfree(server->secmech.sdeschmacsha256);
720
695 kfree(server->secmech.sdeschmacmd5); 721 kfree(server->secmech.sdeschmacmd5);
696 722
697 kfree(server->secmech.sdescmd5); 723 kfree(server->secmech.sdescmd5);
@@ -716,6 +742,13 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
716 goto crypto_allocate_md5_fail; 742 goto crypto_allocate_md5_fail;
717 } 743 }
718 744
745 server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
746 if (IS_ERR(server->secmech.hmacsha256)) {
747 cERROR(1, "could not allocate crypto hmacsha256\n");
748 rc = PTR_ERR(server->secmech.hmacsha256);
749 goto crypto_allocate_hmacsha256_fail;
750 }
751
719 size = sizeof(struct shash_desc) + 752 size = sizeof(struct shash_desc) +
720 crypto_shash_descsize(server->secmech.hmacmd5); 753 crypto_shash_descsize(server->secmech.hmacmd5);
721 server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); 754 server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
@@ -727,7 +760,6 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
727 server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; 760 server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
728 server->secmech.sdeschmacmd5->shash.flags = 0x0; 761 server->secmech.sdeschmacmd5->shash.flags = 0x0;
729 762
730
731 size = sizeof(struct shash_desc) + 763 size = sizeof(struct shash_desc) +
732 crypto_shash_descsize(server->secmech.md5); 764 crypto_shash_descsize(server->secmech.md5);
733 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); 765 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
@@ -739,12 +771,29 @@ cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
739 server->secmech.sdescmd5->shash.tfm = server->secmech.md5; 771 server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
740 server->secmech.sdescmd5->shash.flags = 0x0; 772 server->secmech.sdescmd5->shash.flags = 0x0;
741 773
774 size = sizeof(struct shash_desc) +
775 crypto_shash_descsize(server->secmech.hmacsha256);
776 server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL);
777 if (!server->secmech.sdeschmacsha256) {
778 cERROR(1, "%s: Can't alloc hmacsha256\n", __func__);
779 rc = -ENOMEM;
780 goto crypto_allocate_hmacsha256_sdesc_fail;
781 }
782 server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256;
783 server->secmech.sdeschmacsha256->shash.flags = 0x0;
784
742 return 0; 785 return 0;
743 786
787crypto_allocate_hmacsha256_sdesc_fail:
788 kfree(server->secmech.sdescmd5);
789
744crypto_allocate_md5_sdesc_fail: 790crypto_allocate_md5_sdesc_fail:
745 kfree(server->secmech.sdeschmacmd5); 791 kfree(server->secmech.sdeschmacmd5);
746 792
747crypto_allocate_hmacmd5_sdesc_fail: 793crypto_allocate_hmacmd5_sdesc_fail:
794 crypto_free_shash(server->secmech.hmacsha256);
795
796crypto_allocate_hmacsha256_fail:
748 crypto_free_shash(server->secmech.md5); 797 crypto_free_shash(server->secmech.md5);
749 798
750crypto_allocate_md5_fail: 799crypto_allocate_md5_fail:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index db8a404a51dd..e7931cc55d0c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -36,6 +36,7 @@
36#include <linux/kthread.h> 36#include <linux/kthread.h>
37#include <linux/freezer.h> 37#include <linux/freezer.h>
38#include <linux/namei.h> 38#include <linux/namei.h>
39#include <linux/random.h>
39#include <net/ipv6.h> 40#include <net/ipv6.h>
40#include "cifsfs.h" 41#include "cifsfs.h"
41#include "cifspdu.h" 42#include "cifspdu.h"
@@ -51,7 +52,6 @@
51#ifdef CONFIG_CIFS_SMB2 52#ifdef CONFIG_CIFS_SMB2
52#include "smb2pdu.h" 53#include "smb2pdu.h"
53#endif 54#endif
54#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
55 55
56int cifsFYI = 0; 56int cifsFYI = 0;
57int cifsERROR = 1; 57int cifsERROR = 1;
@@ -89,6 +89,10 @@ extern mempool_t *cifs_mid_poolp;
89 89
90struct workqueue_struct *cifsiod_wq; 90struct workqueue_struct *cifsiod_wq;
91 91
92#ifdef CONFIG_CIFS_SMB2
93__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
94#endif
95
92static int 96static int
93cifs_read_super(struct super_block *sb) 97cifs_read_super(struct super_block *sb)
94{ 98{
@@ -160,13 +164,12 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
160 struct super_block *sb = dentry->d_sb; 164 struct super_block *sb = dentry->d_sb;
161 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 165 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
162 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); 166 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
163 int rc = -EOPNOTSUPP; 167 struct TCP_Server_Info *server = tcon->ses->server;
164 unsigned int xid; 168 unsigned int xid;
169 int rc = 0;
165 170
166 xid = get_xid(); 171 xid = get_xid();
167 172
168 buf->f_type = CIFS_MAGIC_NUMBER;
169
170 /* 173 /*
171 * PATH_MAX may be too long - it would presumably be total path, 174 * PATH_MAX may be too long - it would presumably be total path,
172 * but note that some servers (includinng Samba 3) have a shorter 175 * but note that some servers (includinng Samba 3) have a shorter
@@ -178,27 +181,8 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
178 buf->f_files = 0; /* undefined */ 181 buf->f_files = 0; /* undefined */
179 buf->f_ffree = 0; /* unlimited */ 182 buf->f_ffree = 0; /* unlimited */
180 183
181 /* 184 if (server->ops->queryfs)
182 * We could add a second check for a QFS Unix capability bit 185 rc = server->ops->queryfs(xid, tcon, buf);
183 */
184 if ((tcon->ses->capabilities & CAP_UNIX) &&
185 (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability)))
186 rc = CIFSSMBQFSPosixInfo(xid, tcon, buf);
187
188 /*
189 * Only need to call the old QFSInfo if failed on newer one,
190 * e.g. by OS/2.
191 **/
192 if (rc && (tcon->ses->capabilities & CAP_NT_SMBS))
193 rc = CIFSSMBQFSInfo(xid, tcon, buf);
194
195 /*
196 * Some old Windows servers also do not support level 103, retry with
197 * older level one if old server failed the previous call or we
198 * bypassed it because we detected that this was an older LANMAN sess
199 */
200 if (rc)
201 rc = SMBOldQFSInfo(xid, tcon, buf);
202 186
203 free_xid(xid); 187 free_xid(xid);
204 return 0; 188 return 0;
@@ -239,9 +223,10 @@ cifs_alloc_inode(struct super_block *sb)
239 return NULL; 223 return NULL;
240 cifs_inode->cifsAttrs = 0x20; /* default */ 224 cifs_inode->cifsAttrs = 0x20; /* default */
241 cifs_inode->time = 0; 225 cifs_inode->time = 0;
242 /* Until the file is open and we have gotten oplock 226 /*
243 info back from the server, can not assume caching of 227 * Until the file is open and we have gotten oplock info back from the
244 file data or metadata */ 228 * server, can not assume caching of file data or metadata.
229 */
245 cifs_set_oplock_level(cifs_inode, 0); 230 cifs_set_oplock_level(cifs_inode, 0);
246 cifs_inode->delete_pending = false; 231 cifs_inode->delete_pending = false;
247 cifs_inode->invalid_mapping = false; 232 cifs_inode->invalid_mapping = false;
@@ -249,11 +234,16 @@ cifs_alloc_inode(struct super_block *sb)
249 cifs_inode->server_eof = 0; 234 cifs_inode->server_eof = 0;
250 cifs_inode->uniqueid = 0; 235 cifs_inode->uniqueid = 0;
251 cifs_inode->createtime = 0; 236 cifs_inode->createtime = 0;
252 237#ifdef CONFIG_CIFS_SMB2
253 /* Can not set i_flags here - they get immediately overwritten 238 get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
254 to zero by the VFS */ 239#endif
255/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME;*/ 240 /*
241 * Can not set i_flags here - they get immediately overwritten to zero
242 * by the VFS.
243 */
244 /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
256 INIT_LIST_HEAD(&cifs_inode->openFileList); 245 INIT_LIST_HEAD(&cifs_inode->openFileList);
246 INIT_LIST_HEAD(&cifs_inode->llist);
257 return &cifs_inode->vfs_inode; 247 return &cifs_inode->vfs_inode;
258} 248}
259 249
@@ -360,7 +350,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
360 cifs_show_security(s, tcon->ses->server); 350 cifs_show_security(s, tcon->ses->server);
361 cifs_show_cache_flavor(s, cifs_sb); 351 cifs_show_cache_flavor(s, cifs_sb);
362 352
363 seq_printf(s, ",unc=%s", tcon->treeName); 353 seq_printf(s, ",unc=");
354 seq_escape(s, tcon->treeName, " \t\n\\");
364 355
365 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 356 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
366 seq_printf(s, ",multiuser"); 357 seq_printf(s, ",multiuser");
@@ -957,7 +948,7 @@ cifs_init_once(void *inode)
957 struct cifsInodeInfo *cifsi = inode; 948 struct cifsInodeInfo *cifsi = inode;
958 949
959 inode_init_once(&cifsi->vfs_inode); 950 inode_init_once(&cifsi->vfs_inode);
960 mutex_init(&cifsi->lock_mutex); 951 init_rwsem(&cifsi->lock_sem);
961} 952}
962 953
963static int 954static int
@@ -977,6 +968,11 @@ cifs_init_inodecache(void)
977static void 968static void
978cifs_destroy_inodecache(void) 969cifs_destroy_inodecache(void)
979{ 970{
971 /*
972 * Make sure all delayed rcu free inodes are flushed before we
973 * destroy cache.
974 */
975 rcu_barrier();
980 kmem_cache_destroy(cifs_inode_cachep); 976 kmem_cache_destroy(cifs_inode_cachep);
981} 977}
982 978
@@ -1127,6 +1123,10 @@ init_cifs(void)
1127 spin_lock_init(&cifs_file_list_lock); 1123 spin_lock_init(&cifs_file_list_lock);
1128 spin_lock_init(&GlobalMid_Lock); 1124 spin_lock_init(&GlobalMid_Lock);
1129 1125
1126#ifdef CONFIG_CIFS_SMB2
1127 get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
1128#endif
1129
1130 if (cifs_max_pending < 2) { 1130 if (cifs_max_pending < 2) {
1131 cifs_max_pending = 2; 1131 cifs_max_pending = 2;
1132 cFYI(1, "cifs_max_pending set to min of 2"); 1132 cFYI(1, "cifs_max_pending set to min of 2");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 1c49c5a9b27a..7163419cecd9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -128,5 +128,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
128extern const struct export_operations cifs_export_ops; 128extern const struct export_operations cifs_export_ops;
129#endif /* CONFIG_CIFS_NFSD_EXPORT */ 129#endif /* CONFIG_CIFS_NFSD_EXPORT */
130 130
131#define CIFS_VERSION "1.78" 131#define CIFS_VERSION "2.0"
132#endif /* _CIFSFS_H */ 132#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 977dc0e85ccb..f5af2527fc69 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -32,6 +32,8 @@
32#include "smb2pdu.h" 32#include "smb2pdu.h"
33#endif 33#endif
34 34
35#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
36
35/* 37/*
36 * The sizes of various internal tables and strings 38 * The sizes of various internal tables and strings
37 */ 39 */
@@ -128,8 +130,10 @@ struct sdesc {
128struct cifs_secmech { 130struct cifs_secmech {
129 struct crypto_shash *hmacmd5; /* hmac-md5 hash function */ 131 struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
130 struct crypto_shash *md5; /* md5 hash function */ 132 struct crypto_shash *md5; /* md5 hash function */
133 struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */
131 struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */ 134 struct sdesc *sdeschmacmd5; /* ctxt to generate ntlmv2 hash, CR1 */
132 struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */ 135 struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
136 struct sdesc *sdeschmacsha256; /* ctxt to generate smb2 signature */
133}; 137};
134 138
135/* per smb session structure/fields */ 139/* per smb session structure/fields */
@@ -158,9 +162,24 @@ struct cifs_cred {
158 ***************************************************************** 162 *****************************************************************
159 */ 163 */
160 164
165/*
166 * A smb_rqst represents a complete request to be issued to a server. It's
167 * formed by a kvec array, followed by an array of pages. Page data is assumed
168 * to start at the beginning of the first page.
169 */
170struct smb_rqst {
171 struct kvec *rq_iov; /* array of kvecs */
172 unsigned int rq_nvec; /* number of kvecs in array */
173 struct page **rq_pages; /* pointer to array of page ptrs */
174 unsigned int rq_npages; /* number pages in array */
175 unsigned int rq_pagesz; /* page size to use */
176 unsigned int rq_tailsz; /* length of last page */
177};
178
161enum smb_version { 179enum smb_version {
162 Smb_1 = 1, 180 Smb_1 = 1,
163 Smb_21, 181 Smb_21,
182 Smb_30,
164}; 183};
165 184
166struct mid_q_entry; 185struct mid_q_entry;
@@ -171,17 +190,23 @@ struct cifs_tcon;
171struct dfs_info3_param; 190struct dfs_info3_param;
172struct cifs_fattr; 191struct cifs_fattr;
173struct smb_vol; 192struct smb_vol;
193struct cifs_fid;
194struct cifs_readdata;
195struct cifs_writedata;
196struct cifs_io_parms;
197struct cifs_search_info;
198struct cifsInodeInfo;
174 199
175struct smb_version_operations { 200struct smb_version_operations {
176 int (*send_cancel)(struct TCP_Server_Info *, void *, 201 int (*send_cancel)(struct TCP_Server_Info *, void *,
177 struct mid_q_entry *); 202 struct mid_q_entry *);
178 bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); 203 bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *);
179 /* setup request: allocate mid, sign message */ 204 /* setup request: allocate mid, sign message */
180 int (*setup_request)(struct cifs_ses *, struct kvec *, unsigned int, 205 struct mid_q_entry *(*setup_request)(struct cifs_ses *,
181 struct mid_q_entry **); 206 struct smb_rqst *);
182 /* setup async request: allocate mid, sign message */ 207 /* setup async request: allocate mid, sign message */
183 int (*setup_async_request)(struct TCP_Server_Info *, struct kvec *, 208 struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *,
184 unsigned int, struct mid_q_entry **); 209 struct smb_rqst *);
185 /* check response: verify signature, map error */ 210 /* check response: verify signature, map error */
186 int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, 211 int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *,
187 bool); 212 bool);
@@ -212,6 +237,10 @@ struct smb_version_operations {
212 bool (*need_neg)(struct TCP_Server_Info *); 237 bool (*need_neg)(struct TCP_Server_Info *);
213 /* negotiate to the server */ 238 /* negotiate to the server */
214 int (*negotiate)(const unsigned int, struct cifs_ses *); 239 int (*negotiate)(const unsigned int, struct cifs_ses *);
240 /* set negotiated write size */
241 unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *);
242 /* set negotiated read size */
243 unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *);
215 /* setup smb sessionn */ 244 /* setup smb sessionn */
216 int (*sess_setup)(const unsigned int, struct cifs_ses *, 245 int (*sess_setup)(const unsigned int, struct cifs_ses *,
217 const struct nls_table *); 246 const struct nls_table *);
@@ -235,10 +264,22 @@ struct smb_version_operations {
235 int (*query_path_info)(const unsigned int, struct cifs_tcon *, 264 int (*query_path_info)(const unsigned int, struct cifs_tcon *,
236 struct cifs_sb_info *, const char *, 265 struct cifs_sb_info *, const char *,
237 FILE_ALL_INFO *, bool *); 266 FILE_ALL_INFO *, bool *);
267 /* query file data from the server */
268 int (*query_file_info)(const unsigned int, struct cifs_tcon *,
269 struct cifs_fid *, FILE_ALL_INFO *);
238 /* get server index number */ 270 /* get server index number */
239 int (*get_srv_inum)(const unsigned int, struct cifs_tcon *, 271 int (*get_srv_inum)(const unsigned int, struct cifs_tcon *,
240 struct cifs_sb_info *, const char *, 272 struct cifs_sb_info *, const char *,
241 u64 *uniqueid, FILE_ALL_INFO *); 273 u64 *uniqueid, FILE_ALL_INFO *);
274 /* set size by path */
275 int (*set_path_size)(const unsigned int, struct cifs_tcon *,
276 const char *, __u64, struct cifs_sb_info *, bool);
277 /* set size by file handle */
278 int (*set_file_size)(const unsigned int, struct cifs_tcon *,
279 struct cifsFileInfo *, __u64, bool);
280 /* set attributes */
281 int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *,
282 const unsigned int);
242 /* build a full path to the root of the mount */ 283 /* build a full path to the root of the mount */
243 char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *, 284 char * (*build_path_to_root)(struct smb_vol *, struct cifs_sb_info *,
244 struct cifs_tcon *); 285 struct cifs_tcon *);
@@ -256,10 +297,84 @@ struct smb_version_operations {
256 /* remove directory */ 297 /* remove directory */
257 int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, 298 int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *,
258 struct cifs_sb_info *); 299 struct cifs_sb_info *);
300 /* unlink file */
301 int (*unlink)(const unsigned int, struct cifs_tcon *, const char *,
302 struct cifs_sb_info *);
303 /* open, rename and delete file */
304 int (*rename_pending_delete)(const char *, struct dentry *,
305 const unsigned int);
306 /* send rename request */
307 int (*rename)(const unsigned int, struct cifs_tcon *, const char *,
308 const char *, struct cifs_sb_info *);
309 /* send create hardlink request */
310 int (*create_hardlink)(const unsigned int, struct cifs_tcon *,
311 const char *, const char *,
312 struct cifs_sb_info *);
313 /* open a file for non-posix mounts */
314 int (*open)(const unsigned int, struct cifs_tcon *, const char *, int,
315 int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *,
316 struct cifs_sb_info *);
317 /* set fid protocol-specific info */
318 void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
319 /* close a file */
320 void (*close)(const unsigned int, struct cifs_tcon *,
321 struct cifs_fid *);
322 /* send a flush request to the server */
323 int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
324 /* async read from the server */
325 int (*async_readv)(struct cifs_readdata *);
326 /* async write to the server */
327 int (*async_writev)(struct cifs_writedata *);
328 /* sync read from the server */
329 int (*sync_read)(const unsigned int, struct cifsFileInfo *,
330 struct cifs_io_parms *, unsigned int *, char **,
331 int *);
332 /* sync write to the server */
333 int (*sync_write)(const unsigned int, struct cifsFileInfo *,
334 struct cifs_io_parms *, unsigned int *, struct kvec *,
335 unsigned long);
336 /* open dir, start readdir */
337 int (*query_dir_first)(const unsigned int, struct cifs_tcon *,
338 const char *, struct cifs_sb_info *,
339 struct cifs_fid *, __u16,
340 struct cifs_search_info *);
341 /* continue readdir */
342 int (*query_dir_next)(const unsigned int, struct cifs_tcon *,
343 struct cifs_fid *,
344 __u16, struct cifs_search_info *srch_inf);
345 /* close dir */
346 int (*close_dir)(const unsigned int, struct cifs_tcon *,
347 struct cifs_fid *);
348 /* calculate a size of SMB message */
349 unsigned int (*calc_smb_size)(void *);
350 /* check for STATUS_PENDING and process it in a positive case */
351 bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
352 /* send oplock break response */
353 int (*oplock_response)(struct cifs_tcon *, struct cifs_fid *,
354 struct cifsInodeInfo *);
355 /* query remote filesystem */
356 int (*queryfs)(const unsigned int, struct cifs_tcon *,
357 struct kstatfs *);
358 /* send mandatory brlock to the server */
359 int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64,
360 __u64, __u32, int, int, bool);
361 /* unlock range of mandatory locks */
362 int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *,
363 const unsigned int);
364 /* push brlocks from the cache to the server */
365 int (*push_mand_locks)(struct cifsFileInfo *);
366 /* get lease key of the inode */
367 void (*get_lease_key)(struct inode *, struct cifs_fid *fid);
368 /* set lease key of the inode */
369 void (*set_lease_key)(struct inode *, struct cifs_fid *fid);
370 /* generate new lease key */
371 void (*new_lease_key)(struct cifs_fid *fid);
259}; 372};
260 373
261struct smb_version_values { 374struct smb_version_values {
262 char *version_string; 375 char *version_string;
376 __u16 protocol_id;
377 __u32 req_capabilities;
263 __u32 large_lock_type; 378 __u32 large_lock_type;
264 __u32 exclusive_lock_type; 379 __u32 exclusive_lock_type;
265 __u32 shared_lock_type; 380 __u32 shared_lock_type;
@@ -496,6 +611,51 @@ get_next_mid(struct TCP_Server_Info *server)
496} 611}
497 612
498/* 613/*
614 * When the server supports very large reads and writes via POSIX extensions,
615 * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not
616 * including the RFC1001 length.
617 *
618 * Note that this might make for "interesting" allocation problems during
619 * writeback however as we have to allocate an array of pointers for the
620 * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
621 *
622 * For reads, there is a similar problem as we need to allocate an array
623 * of kvecs to handle the receive, though that should only need to be done
624 * once.
625 */
626#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
627#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4)
628
629/*
630 * When the server doesn't allow large posix writes, only allow a rsize/wsize
631 * of 2^17-1 minus the size of the call header. That allows for a read or
632 * write up to the maximum size described by RFC1002.
633 */
634#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
635#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4)
636
637/*
638 * The default wsize is 1M. find_get_pages seems to return a maximum of 256
639 * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
640 * a single wsize request with a single call.
641 */
642#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
643
644/*
645 * Windows only supports a max of 60kb reads and 65535 byte writes. Default to
646 * those values when posix extensions aren't in force. In actuality here, we
647 * use 65536 to allow for a write that is a multiple of 4k. Most servers seem
648 * to be ok with the extra byte even though Windows doesn't send writes that
649 * are that large.
650 *
651 * Citation:
652 *
653 * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx
654 */
655#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
656#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
657
658/*
499 * Macros to allow the TCP_Server_Info->net field and related code to drop out 659 * Macros to allow the TCP_Server_Info->net field and related code to drop out
500 * when CONFIG_NET_NS isn't set. 660 * when CONFIG_NET_NS isn't set.
501 */ 661 */
@@ -559,6 +719,7 @@ struct cifs_ses {
559 __u16 session_flags; 719 __u16 session_flags;
560#endif /* CONFIG_CIFS_SMB2 */ 720#endif /* CONFIG_CIFS_SMB2 */
561}; 721};
722
562/* no more than one of the following three session flags may be set */ 723/* no more than one of the following three session flags may be set */
563#define CIFS_SES_NT4 1 724#define CIFS_SES_NT4 1
564#define CIFS_SES_OS2 2 725#define CIFS_SES_OS2 2
@@ -665,6 +826,7 @@ struct cifs_tcon {
665 u64 resource_id; /* server resource id */ 826 u64 resource_id; /* server resource id */
666 struct fscache_cookie *fscache; /* cookie for share */ 827 struct fscache_cookie *fscache; /* cookie for share */
667#endif 828#endif
829 struct list_head pending_opens; /* list of incomplete opens */
668 /* BB add field for back pointer to sb struct(s)? */ 830 /* BB add field for back pointer to sb struct(s)? */
669}; 831};
670 832
@@ -707,6 +869,15 @@ cifs_get_tlink(struct tcon_link *tlink)
707/* This function is always expected to succeed */ 869/* This function is always expected to succeed */
708extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); 870extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb);
709 871
872#define CIFS_OPLOCK_NO_CHANGE 0xfe
873
874struct cifs_pending_open {
875 struct list_head olist;
876 struct tcon_link *tlink;
877 __u8 lease_key[16];
878 __u32 oplock;
879};
880
710/* 881/*
711 * This info hangs off the cifsFileInfo structure, pointed to by llist. 882 * This info hangs off the cifsFileInfo structure, pointed to by llist.
712 * This is used to track byte stream locks on the file 883 * This is used to track byte stream locks on the file
@@ -740,16 +911,29 @@ struct cifs_search_info {
740 bool smallBuf:1; /* so we know which buf_release function to call */ 911 bool smallBuf:1; /* so we know which buf_release function to call */
741}; 912};
742 913
914struct cifs_fid {
915 __u16 netfid;
916#ifdef CONFIG_CIFS_SMB2
917 __u64 persistent_fid; /* persist file id for smb2 */
918 __u64 volatile_fid; /* volatile file id for smb2 */
919 __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */
920#endif
921 struct cifs_pending_open *pending_open;
922};
923
924struct cifs_fid_locks {
925 struct list_head llist;
926 struct cifsFileInfo *cfile; /* fid that owns locks */
927 struct list_head locks; /* locks held by fid above */
928};
929
743struct cifsFileInfo { 930struct cifsFileInfo {
744 struct list_head tlist; /* pointer to next fid owned by tcon */ 931 struct list_head tlist; /* pointer to next fid owned by tcon */
745 struct list_head flist; /* next fid (file instance) for this inode */ 932 struct list_head flist; /* next fid (file instance) for this inode */
746 struct list_head llist; /* 933 struct cifs_fid_locks *llist; /* brlocks held by this fid */
747 * brlocks held by this fid, protected by
748 * lock_mutex from cifsInodeInfo structure
749 */
750 unsigned int uid; /* allows finding which FileInfo structure */ 934 unsigned int uid; /* allows finding which FileInfo structure */
751 __u32 pid; /* process id who opened file */ 935 __u32 pid; /* process id who opened file */
752 __u16 netfid; /* file id from remote */ 936 struct cifs_fid fid; /* file id from remote */
753 /* BB add lock scope info here if needed */ ; 937 /* BB add lock scope info here if needed */ ;
754 /* lock scope id (0 if none) */ 938 /* lock scope id (0 if none) */
755 struct dentry *dentry; 939 struct dentry *dentry;
@@ -765,12 +949,60 @@ struct cifsFileInfo {
765 949
766struct cifs_io_parms { 950struct cifs_io_parms {
767 __u16 netfid; 951 __u16 netfid;
952#ifdef CONFIG_CIFS_SMB2
953 __u64 persistent_fid; /* persist file id for smb2 */
954 __u64 volatile_fid; /* volatile file id for smb2 */
955#endif
768 __u32 pid; 956 __u32 pid;
769 __u64 offset; 957 __u64 offset;
770 unsigned int length; 958 unsigned int length;
771 struct cifs_tcon *tcon; 959 struct cifs_tcon *tcon;
772}; 960};
773 961
962struct cifs_readdata;
963
964/* asynchronous read support */
965struct cifs_readdata {
966 struct kref refcount;
967 struct list_head list;
968 struct completion done;
969 struct cifsFileInfo *cfile;
970 struct address_space *mapping;
971 __u64 offset;
972 unsigned int bytes;
973 pid_t pid;
974 int result;
975 struct work_struct work;
976 int (*read_into_pages)(struct TCP_Server_Info *server,
977 struct cifs_readdata *rdata,
978 unsigned int len);
979 struct kvec iov;
980 unsigned int pagesz;
981 unsigned int tailsz;
982 unsigned int nr_pages;
983 struct page *pages[];
984};
985
986struct cifs_writedata;
987
988/* asynchronous write support */
989struct cifs_writedata {
990 struct kref refcount;
991 struct list_head list;
992 struct completion done;
993 enum writeback_sync_modes sync_mode;
994 struct work_struct work;
995 struct cifsFileInfo *cfile;
996 __u64 offset;
997 pid_t pid;
998 unsigned int bytes;
999 int result;
1000 unsigned int pagesz;
1001 unsigned int tailsz;
1002 unsigned int nr_pages;
1003 struct page *pages[1];
1004};
1005
774/* 1006/*
775 * Take a reference on the file private data. Must be called with 1007 * Take a reference on the file private data. Must be called with
776 * cifs_file_list_lock held. 1008 * cifs_file_list_lock held.
@@ -790,11 +1022,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
790 1022
791struct cifsInodeInfo { 1023struct cifsInodeInfo {
792 bool can_cache_brlcks; 1024 bool can_cache_brlcks;
793 struct mutex lock_mutex; /* 1025 struct list_head llist; /* locks helb by this inode */
794 * protect the field above and llist 1026 struct rw_semaphore lock_sem; /* protect the fields above */
795 * from every cifsFileInfo structure
796 * from openFileList
797 */
798 /* BB add in lists for dirty pages i.e. write caching info for oplock */ 1027 /* BB add in lists for dirty pages i.e. write caching info for oplock */
799 struct list_head openFileList; 1028 struct list_head openFileList;
800 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 1029 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
@@ -806,6 +1035,9 @@ struct cifsInodeInfo {
806 u64 server_eof; /* current file size on server -- protected by i_lock */ 1035 u64 server_eof; /* current file size on server -- protected by i_lock */
807 u64 uniqueid; /* server inode number */ 1036 u64 uniqueid; /* server inode number */
808 u64 createtime; /* creation time on server */ 1037 u64 createtime; /* creation time on server */
1038#ifdef CONFIG_CIFS_SMB2
1039 __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */
1040#endif
809#ifdef CONFIG_CIFS_FSCACHE 1041#ifdef CONFIG_CIFS_FSCACHE
810 struct fscache_cookie *fscache; 1042 struct fscache_cookie *fscache;
811#endif 1043#endif
@@ -1130,7 +1362,7 @@ require use of the stronger protocol */
1130#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ 1362#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
1131#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ 1363#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
1132 1364
1133#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) 1365#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP)
1134#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) 1366#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
1135#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) 1367#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
1136/* 1368/*
@@ -1267,7 +1499,13 @@ extern mempool_t *cifs_mid_poolp;
1267#define SMB1_VERSION_STRING "1.0" 1499#define SMB1_VERSION_STRING "1.0"
1268extern struct smb_version_operations smb1_operations; 1500extern struct smb_version_operations smb1_operations;
1269extern struct smb_version_values smb1_values; 1501extern struct smb_version_values smb1_values;
1502#define SMB20_VERSION_STRING "2.0"
1503/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */
1504extern struct smb_version_values smb20_values;
1270#define SMB21_VERSION_STRING "2.1" 1505#define SMB21_VERSION_STRING "2.1"
1271extern struct smb_version_operations smb21_operations; 1506extern struct smb_version_operations smb21_operations;
1272extern struct smb_version_values smb21_values; 1507extern struct smb_version_values smb21_values;
1508#define SMB30_VERSION_STRING "3.0"
1509/*extern struct smb_version_operations smb30_operations; */ /* not needed yet */
1510extern struct smb_version_values smb30_values;
1273#endif /* _CIFS_GLOB_H */ 1511#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 3fb03e2c8e86..b9d59a948a2c 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2210,7 +2210,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
2210 __u8 DeletePending; 2210 __u8 DeletePending;
2211 __u8 Directory; 2211 __u8 Directory;
2212 __u16 Pad2; 2212 __u16 Pad2;
2213 __u64 IndexNumber; 2213 __le64 IndexNumber;
2214 __le32 EASize; 2214 __le32 EASize;
2215 __le32 AccessFlags; 2215 __le32 AccessFlags;
2216 __u64 IndexNumber1; 2216 __u64 IndexNumber1;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f1bbf8305d3a..5144e9fbeb8c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -24,6 +24,7 @@
24 24
25struct statfs; 25struct statfs;
26struct smb_vol; 26struct smb_vol;
27struct smb_rqst;
27 28
28/* 29/*
29 ***************************************************************** 30 *****************************************************************
@@ -35,6 +36,8 @@ extern struct smb_hdr *cifs_buf_get(void);
35extern void cifs_buf_release(void *); 36extern void cifs_buf_release(void *);
36extern struct smb_hdr *cifs_small_buf_get(void); 37extern struct smb_hdr *cifs_small_buf_get(void);
37extern void cifs_small_buf_release(void *); 38extern void cifs_small_buf_release(void *);
39extern void cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
40 struct kvec *iov);
38extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *, 41extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
39 unsigned int /* length */); 42 unsigned int /* length */);
40extern unsigned int _get_xid(void); 43extern unsigned int _get_xid(void);
@@ -65,21 +68,22 @@ extern char *cifs_compose_mount_options(const char *sb_mountdata,
65extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, 68extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
66 struct TCP_Server_Info *server); 69 struct TCP_Server_Info *server);
67extern void DeleteMidQEntry(struct mid_q_entry *midEntry); 70extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
71extern void cifs_delete_mid(struct mid_q_entry *mid);
68extern void cifs_wake_up_task(struct mid_q_entry *mid); 72extern void cifs_wake_up_task(struct mid_q_entry *mid);
69extern int cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, 73extern int cifs_call_async(struct TCP_Server_Info *server,
70 unsigned int nvec, mid_receive_t *receive, 74 struct smb_rqst *rqst,
71 mid_callback_t *callback, void *cbdata, 75 mid_receive_t *receive, mid_callback_t *callback,
72 const int flags); 76 void *cbdata, const int flags);
73extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *, 77extern int SendReceive(const unsigned int /* xid */ , struct cifs_ses *,
74 struct smb_hdr * /* input */ , 78 struct smb_hdr * /* input */ ,
75 struct smb_hdr * /* out */ , 79 struct smb_hdr * /* out */ ,
76 int * /* bytes returned */ , const int); 80 int * /* bytes returned */ , const int);
77extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses, 81extern int SendReceiveNoRsp(const unsigned int xid, struct cifs_ses *ses,
78 char *in_buf, int flags); 82 char *in_buf, int flags);
79extern int cifs_setup_request(struct cifs_ses *, struct kvec *, unsigned int, 83extern struct mid_q_entry *cifs_setup_request(struct cifs_ses *,
80 struct mid_q_entry **); 84 struct smb_rqst *);
81extern int cifs_setup_async_request(struct TCP_Server_Info *, struct kvec *, 85extern struct mid_q_entry *cifs_setup_async_request(struct TCP_Server_Info *,
82 unsigned int, struct mid_q_entry **); 86 struct smb_rqst *);
83extern int cifs_check_receive(struct mid_q_entry *mid, 87extern int cifs_check_receive(struct mid_q_entry *mid,
84 struct TCP_Server_Info *server, bool log_error); 88 struct TCP_Server_Info *server, bool log_error);
85extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *, 89extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
@@ -99,7 +103,7 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
99 unsigned int bytes_written); 103 unsigned int bytes_written);
100extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 104extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
101extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 105extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
102extern unsigned int smbCalcSize(struct smb_hdr *ptr); 106extern unsigned int smbCalcSize(void *buf);
103extern int decode_negTokenInit(unsigned char *security_blob, int length, 107extern int decode_negTokenInit(unsigned char *security_blob, int length,
104 struct TCP_Server_Info *server); 108 struct TCP_Server_Info *server);
105extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 109extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -120,10 +124,14 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
120extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 124extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
121 int offset); 125 int offset);
122extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); 126extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
123 127extern int cifs_unlock_range(struct cifsFileInfo *cfile,
124extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, 128 struct file_lock *flock, const unsigned int xid);
125 struct file *file, struct tcon_link *tlink, 129extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
126 __u32 oplock); 130
131extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
132 struct file *file,
133 struct tcon_link *tlink,
134 __u32 oplock);
127extern int cifs_posix_open(char *full_path, struct inode **inode, 135extern int cifs_posix_open(char *full_path, struct inode **inode,
128 struct super_block *sb, int mode, 136 struct super_block *sb, int mode,
129 unsigned int f_flags, __u32 *oplock, __u16 *netfid, 137 unsigned int f_flags, __u32 *oplock, __u16 *netfid,
@@ -132,18 +140,23 @@ void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr);
132extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, 140extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
133 FILE_UNIX_BASIC_INFO *info, 141 FILE_UNIX_BASIC_INFO *info,
134 struct cifs_sb_info *cifs_sb); 142 struct cifs_sb_info *cifs_sb);
143extern void cifs_dir_info_to_fattr(struct cifs_fattr *, FILE_DIRECTORY_INFO *,
144 struct cifs_sb_info *);
135extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr); 145extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
136extern struct inode *cifs_iget(struct super_block *sb, 146extern struct inode *cifs_iget(struct super_block *sb,
137 struct cifs_fattr *fattr); 147 struct cifs_fattr *fattr);
138 148
139extern int cifs_get_file_info(struct file *filp);
140extern int cifs_get_inode_info(struct inode **inode, const char *full_path, 149extern int cifs_get_inode_info(struct inode **inode, const char *full_path,
141 FILE_ALL_INFO *data, struct super_block *sb, 150 FILE_ALL_INFO *data, struct super_block *sb,
142 int xid, const __u16 *fid); 151 int xid, const __u16 *fid);
143extern int cifs_get_file_info_unix(struct file *filp);
144extern int cifs_get_inode_info_unix(struct inode **pinode, 152extern int cifs_get_inode_info_unix(struct inode **pinode,
145 const unsigned char *search_path, 153 const unsigned char *search_path,
146 struct super_block *sb, unsigned int xid); 154 struct super_block *sb, unsigned int xid);
155extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs,
156 unsigned int xid, char *full_path, __u32 dosattr);
157extern int cifs_rename_pending_delete(const char *full_path,
158 struct dentry *dentry,
159 const unsigned int xid);
147extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, 160extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
148 struct cifs_fattr *fattr, struct inode *inode, 161 struct cifs_fattr *fattr, struct inode *inode,
149 const char *path, const __u16 *pfid); 162 const char *path, const __u16 *pfid);
@@ -169,6 +182,17 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
169extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); 182extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
170extern void cifs_umount(struct cifs_sb_info *); 183extern void cifs_umount(struct cifs_sb_info *);
171extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon); 184extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
185extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
186 __u64 length, __u8 type,
187 struct cifsLockInfo **conf_lock,
188 bool rw_check);
189extern void cifs_add_pending_open(struct cifs_fid *fid,
190 struct tcon_link *tlink,
191 struct cifs_pending_open *open);
192extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
193 struct tcon_link *tlink,
194 struct cifs_pending_open *open);
195extern void cifs_del_pending_open(struct cifs_pending_open *open);
172 196
173#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) 197#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
174extern void cifs_dfs_release_automount_timer(void); 198extern void cifs_dfs_release_automount_timer(void);
@@ -179,6 +203,10 @@ extern void cifs_dfs_release_automount_timer(void);
179void cifs_proc_init(void); 203void cifs_proc_init(void);
180void cifs_proc_clean(void); 204void cifs_proc_clean(void);
181 205
206extern void cifs_move_llist(struct list_head *source, struct list_head *dest);
207extern void cifs_free_llist(struct list_head *llist);
208extern void cifs_del_lock_waiters(struct cifsLockInfo *lock);
209
182extern int cifs_negotiate_protocol(const unsigned int xid, 210extern int cifs_negotiate_protocol(const unsigned int xid,
183 struct cifs_ses *ses); 211 struct cifs_ses *ses);
184extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, 212extern int cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
@@ -190,10 +218,10 @@ extern int CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
190 const struct nls_table *); 218 const struct nls_table *);
191 219
192extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, 220extern int CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon,
193 const char *searchName, const struct nls_table *nls_codepage, 221 const char *searchName, struct cifs_sb_info *cifs_sb,
194 __u16 *searchHandle, __u16 search_flags, 222 __u16 *searchHandle, __u16 search_flags,
195 struct cifs_search_info *psrch_inf, 223 struct cifs_search_info *psrch_inf,
196 int map, const char dirsep); 224 bool msearch);
197 225
198extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon, 226extern int CIFSFindNext(const unsigned int xid, struct cifs_tcon *tcon,
199 __u16 searchHandle, __u16 search_flags, 227 __u16 searchHandle, __u16 search_flags,
@@ -265,13 +293,11 @@ extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon,
265 const struct nls_table *nls_codepage); 293 const struct nls_table *nls_codepage);
266#endif /* possibly unneeded function */ 294#endif /* possibly unneeded function */
267extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, 295extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
268 const char *fileName, __u64 size, 296 const char *file_name, __u64 size,
269 bool setAllocationSizeFlag, 297 struct cifs_sb_info *cifs_sb, bool set_allocation);
270 const struct nls_table *nls_codepage,
271 int remap_special_chars);
272extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, 298extern int CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon,
273 __u64 size, __u16 fileHandle, __u32 opener_pid, 299 struct cifsFileInfo *cfile, __u64 size,
274 bool AllocSizeFlag); 300 bool set_allocation);
275 301
276struct cifs_unix_set_info_args { 302struct cifs_unix_set_info_args {
277 __u64 ctime; 303 __u64 ctime;
@@ -303,22 +329,17 @@ extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon,
303 const struct nls_table *nls_codepage, 329 const struct nls_table *nls_codepage,
304 int remap_special_chars); 330 int remap_special_chars);
305extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, 331extern int CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon,
306 const char *name, 332 const char *name, struct cifs_sb_info *cifs_sb);
307 const struct nls_table *nls_codepage,
308 int remap_special_chars);
309extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, 333extern int CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
310 const char *fromName, const char *toName, 334 const char *from_name, const char *to_name,
311 const struct nls_table *nls_codepage, 335 struct cifs_sb_info *cifs_sb);
312 int remap_special_chars);
313extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon, 336extern int CIFSSMBRenameOpenFile(const unsigned int xid, struct cifs_tcon *tcon,
314 int netfid, const char *target_name, 337 int netfid, const char *target_name,
315 const struct nls_table *nls_codepage, 338 const struct nls_table *nls_codepage,
316 int remap_special_chars); 339 int remap_special_chars);
317extern int CIFSCreateHardLink(const unsigned int xid, 340extern int CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon,
318 struct cifs_tcon *tcon, 341 const char *from_name, const char *to_name,
319 const char *fromName, const char *toName, 342 struct cifs_sb_info *cifs_sb);
320 const struct nls_table *nls_codepage,
321 int remap_special_chars);
322extern int CIFSUnixCreateHardLink(const unsigned int xid, 343extern int CIFSUnixCreateHardLink(const unsigned int xid,
323 struct cifs_tcon *tcon, 344 struct cifs_tcon *tcon,
324 const char *fromName, const char *toName, 345 const char *fromName, const char *toName,
@@ -367,8 +388,7 @@ extern int CIFSSMBWrite(const unsigned int xid, struct cifs_io_parms *io_parms,
367 unsigned int *nbytes, const char *buf, 388 unsigned int *nbytes, const char *buf,
368 const char __user *ubuf, const int long_op); 389 const char __user *ubuf, const int long_op);
369extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, 390extern int CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
370 unsigned int *nbytes, struct kvec *iov, const int nvec, 391 unsigned int *nbytes, struct kvec *iov, const int nvec);
371 const int long_op);
372extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon, 392extern int CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon,
373 const char *search_name, __u64 *inode_number, 393 const char *search_name, __u64 *inode_number,
374 const struct nls_table *nls_codepage, 394 const struct nls_table *nls_codepage,
@@ -397,10 +417,12 @@ extern void sesInfoFree(struct cifs_ses *);
397extern struct cifs_tcon *tconInfoAlloc(void); 417extern struct cifs_tcon *tconInfoAlloc(void);
398extern void tconInfoFree(struct cifs_tcon *); 418extern void tconInfoFree(struct cifs_tcon *);
399 419
400extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); 420extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
421 __u32 *pexpected_response_sequence_number);
401extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *, 422extern int cifs_sign_smbv(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
402 __u32 *); 423 __u32 *);
403extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, 424extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
425extern int cifs_verify_signature(struct smb_rqst *rqst,
404 struct TCP_Server_Info *server, 426 struct TCP_Server_Info *server,
405 __u32 expected_sequence_number); 427 __u32 expected_sequence_number);
406extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, 428extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
@@ -462,45 +484,9 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
462extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, 484extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
463 unsigned char *p24); 485 unsigned char *p24);
464 486
465/* asynchronous read support */
466struct cifs_readdata {
467 struct kref refcount;
468 struct list_head list;
469 struct completion done;
470 struct cifsFileInfo *cfile;
471 struct address_space *mapping;
472 __u64 offset;
473 unsigned int bytes;
474 pid_t pid;
475 int result;
476 struct list_head pages;
477 struct work_struct work;
478 int (*marshal_iov) (struct cifs_readdata *rdata,
479 unsigned int remaining);
480 unsigned int nr_iov;
481 struct kvec iov[1];
482};
483
484void cifs_readdata_release(struct kref *refcount); 487void cifs_readdata_release(struct kref *refcount);
485int cifs_async_readv(struct cifs_readdata *rdata); 488int cifs_async_readv(struct cifs_readdata *rdata);
486 489int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid);
487/* asynchronous write support */
488struct cifs_writedata {
489 struct kref refcount;
490 struct list_head list;
491 struct completion done;
492 enum writeback_sync_modes sync_mode;
493 struct work_struct work;
494 struct cifsFileInfo *cfile;
495 __u64 offset;
496 pid_t pid;
497 unsigned int bytes;
498 int result;
499 void (*marshal_iov) (struct kvec *iov,
500 struct cifs_writedata *wdata);
501 unsigned int nr_pages;
502 struct page *pages[1];
503};
504 490
505int cifs_async_writev(struct cifs_writedata *wdata); 491int cifs_async_writev(struct cifs_writedata *wdata);
506void cifs_writev_complete(struct work_struct *work); 492void cifs_writev_complete(struct work_struct *work);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 074923ce593d..76d0d2998850 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,32 +86,6 @@ static struct {
86#endif /* CONFIG_CIFS_WEAK_PW_HASH */ 86#endif /* CONFIG_CIFS_WEAK_PW_HASH */
87#endif /* CIFS_POSIX */ 87#endif /* CIFS_POSIX */
88 88
89#ifdef CONFIG_HIGHMEM
90/*
91 * On arches that have high memory, kmap address space is limited. By
92 * serializing the kmap operations on those arches, we ensure that we don't
93 * end up with a bunch of threads in writeback with partially mapped page
94 * arrays, stuck waiting for kmap to come back. That situation prevents
95 * progress and can deadlock.
96 */
97static DEFINE_MUTEX(cifs_kmap_mutex);
98
99static inline void
100cifs_kmap_lock(void)
101{
102 mutex_lock(&cifs_kmap_mutex);
103}
104
105static inline void
106cifs_kmap_unlock(void)
107{
108 mutex_unlock(&cifs_kmap_mutex);
109}
110#else /* !CONFIG_HIGHMEM */
111#define cifs_kmap_lock() do { ; } while(0)
112#define cifs_kmap_unlock() do { ; } while(0)
113#endif /* CONFIG_HIGHMEM */
114
115/* 89/*
116 * Mark as invalid, all open files on tree connections since they 90 * Mark as invalid, all open files on tree connections since they
117 * were closed when session to server was lost. 91 * were closed when session to server was lost.
@@ -751,6 +725,8 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
751 ECHO_REQ *smb; 725 ECHO_REQ *smb;
752 int rc = 0; 726 int rc = 0;
753 struct kvec iov; 727 struct kvec iov;
728 struct smb_rqst rqst = { .rq_iov = &iov,
729 .rq_nvec = 1 };
754 730
755 cFYI(1, "In echo request"); 731 cFYI(1, "In echo request");
756 732
@@ -768,7 +744,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
768 iov.iov_base = smb; 744 iov.iov_base = smb;
769 iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; 745 iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
770 746
771 rc = cifs_call_async(server, &iov, 1, NULL, cifs_echo_callback, 747 rc = cifs_call_async(server, &rqst, NULL, cifs_echo_callback,
772 server, CIFS_ASYNC_OP | CIFS_ECHO_OP); 748 server, CIFS_ASYNC_OP | CIFS_ECHO_OP);
773 if (rc) 749 if (rc)
774 cFYI(1, "Echo request failed: %d", rc); 750 cFYI(1, "Echo request failed: %d", rc);
@@ -902,15 +878,15 @@ PsxDelete:
902} 878}
903 879
904int 880int
905CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, 881CIFSSMBDelFile(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
906 const char *fileName, const struct nls_table *nls_codepage, 882 struct cifs_sb_info *cifs_sb)
907 int remap)
908{ 883{
909 DELETE_FILE_REQ *pSMB = NULL; 884 DELETE_FILE_REQ *pSMB = NULL;
910 DELETE_FILE_RSP *pSMBr = NULL; 885 DELETE_FILE_RSP *pSMBr = NULL;
911 int rc = 0; 886 int rc = 0;
912 int bytes_returned; 887 int bytes_returned;
913 int name_len; 888 int name_len;
889 int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
914 890
915DelFileRetry: 891DelFileRetry:
916 rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, 892 rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB,
@@ -919,15 +895,15 @@ DelFileRetry:
919 return rc; 895 return rc;
920 896
921 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 897 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
922 name_len = 898 name_len = cifsConvertToUTF16((__le16 *) pSMB->fileName, name,
923 cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName, 899 PATH_MAX, cifs_sb->local_nls,
924 PATH_MAX, nls_codepage, remap); 900 remap);
925 name_len++; /* trailing null */ 901 name_len++; /* trailing null */
926 name_len *= 2; 902 name_len *= 2;
927 } else { /* BB improve check for buffer overruns BB */ 903 } else { /* BB improve check for buffer overruns BB */
928 name_len = strnlen(fileName, PATH_MAX); 904 name_len = strnlen(name, PATH_MAX);
929 name_len++; /* trailing null */ 905 name_len++; /* trailing null */
930 strncpy(pSMB->fileName, fileName, name_len); 906 strncpy(pSMB->fileName, name, name_len);
931 } 907 }
932 pSMB->SearchAttributes = 908 pSMB->SearchAttributes =
933 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); 909 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
@@ -1440,7 +1416,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1440 return 0; 1416 return 0;
1441} 1417}
1442 1418
1443static int 1419int
1444cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1420cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1445{ 1421{
1446 int length, len; 1422 int length, len;
@@ -1460,10 +1436,10 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1460 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) - 1436 len = min_t(unsigned int, buflen, server->vals->read_rsp_size) -
1461 HEADER_SIZE(server) + 1; 1437 HEADER_SIZE(server) + 1;
1462 1438
1463 rdata->iov[0].iov_base = buf + HEADER_SIZE(server) - 1; 1439 rdata->iov.iov_base = buf + HEADER_SIZE(server) - 1;
1464 rdata->iov[0].iov_len = len; 1440 rdata->iov.iov_len = len;
1465 1441
1466 length = cifs_readv_from_socket(server, rdata->iov, 1, len); 1442 length = cifs_readv_from_socket(server, &rdata->iov, 1, len);
1467 if (length < 0) 1443 if (length < 0)
1468 return length; 1444 return length;
1469 server->total_read += length; 1445 server->total_read += length;
@@ -1509,19 +1485,19 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1509 len = data_offset - server->total_read; 1485 len = data_offset - server->total_read;
1510 if (len > 0) { 1486 if (len > 0) {
1511 /* read any junk before data into the rest of smallbuf */ 1487 /* read any junk before data into the rest of smallbuf */
1512 rdata->iov[0].iov_base = buf + server->total_read; 1488 rdata->iov.iov_base = buf + server->total_read;
1513 rdata->iov[0].iov_len = len; 1489 rdata->iov.iov_len = len;
1514 length = cifs_readv_from_socket(server, rdata->iov, 1, len); 1490 length = cifs_readv_from_socket(server, &rdata->iov, 1, len);
1515 if (length < 0) 1491 if (length < 0)
1516 return length; 1492 return length;
1517 server->total_read += length; 1493 server->total_read += length;
1518 } 1494 }
1519 1495
1520 /* set up first iov for signature check */ 1496 /* set up first iov for signature check */
1521 rdata->iov[0].iov_base = buf; 1497 rdata->iov.iov_base = buf;
1522 rdata->iov[0].iov_len = server->total_read; 1498 rdata->iov.iov_len = server->total_read;
1523 cFYI(1, "0: iov_base=%p iov_len=%zu", 1499 cFYI(1, "0: iov_base=%p iov_len=%zu",
1524 rdata->iov[0].iov_base, rdata->iov[0].iov_len); 1500 rdata->iov.iov_base, rdata->iov.iov_len);
1525 1501
1526 /* how much data is in the response? */ 1502 /* how much data is in the response? */
1527 data_len = server->ops->read_data_length(buf); 1503 data_len = server->ops->read_data_length(buf);
@@ -1531,23 +1507,11 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1531 return cifs_readv_discard(server, mid); 1507 return cifs_readv_discard(server, mid);
1532 } 1508 }
1533 1509
1534 /* marshal up the page array */ 1510 length = rdata->read_into_pages(server, rdata, data_len);
1535 cifs_kmap_lock(); 1511 if (length < 0)
1536 len = rdata->marshal_iov(rdata, data_len); 1512 return length;
1537 cifs_kmap_unlock();
1538 data_len -= len;
1539
1540 /* issue the read if we have any iovecs left to fill */
1541 if (rdata->nr_iov > 1) {
1542 length = cifs_readv_from_socket(server, &rdata->iov[1],
1543 rdata->nr_iov - 1, len);
1544 if (length < 0)
1545 return length;
1546 server->total_read += length;
1547 } else {
1548 length = 0;
1549 }
1550 1513
1514 server->total_read += length;
1551 rdata->bytes = length; 1515 rdata->bytes = length;
1552 1516
1553 cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read, 1517 cFYI(1, "total_read=%u buflen=%u remaining=%u", server->total_read,
@@ -1567,6 +1531,12 @@ cifs_readv_callback(struct mid_q_entry *mid)
1567 struct cifs_readdata *rdata = mid->callback_data; 1531 struct cifs_readdata *rdata = mid->callback_data;
1568 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1532 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
1569 struct TCP_Server_Info *server = tcon->ses->server; 1533 struct TCP_Server_Info *server = tcon->ses->server;
1534 struct smb_rqst rqst = { .rq_iov = &rdata->iov,
1535 .rq_nvec = 1,
1536 .rq_pages = rdata->pages,
1537 .rq_npages = rdata->nr_pages,
1538 .rq_pagesz = rdata->pagesz,
1539 .rq_tailsz = rdata->tailsz };
1570 1540
1571 cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__, 1541 cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
1572 mid->mid, mid->mid_state, rdata->result, rdata->bytes); 1542 mid->mid, mid->mid_state, rdata->result, rdata->bytes);
@@ -1576,9 +1546,13 @@ cifs_readv_callback(struct mid_q_entry *mid)
1576 /* result already set, check signature */ 1546 /* result already set, check signature */
1577 if (server->sec_mode & 1547 if (server->sec_mode &
1578 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 1548 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1579 if (cifs_verify_signature(rdata->iov, rdata->nr_iov, 1549 int rc = 0;
1580 server, mid->sequence_number + 1)) 1550
1581 cERROR(1, "Unexpected SMB signature"); 1551 rc = cifs_verify_signature(&rqst, server,
1552 mid->sequence_number + 1);
1553 if (rc)
1554 cERROR(1, "SMB signature verification returned "
1555 "error = %d", rc);
1582 } 1556 }
1583 /* FIXME: should this be counted toward the initiating task? */ 1557 /* FIXME: should this be counted toward the initiating task? */
1584 task_io_account_read(rdata->bytes); 1558 task_io_account_read(rdata->bytes);
@@ -1605,6 +1579,8 @@ cifs_async_readv(struct cifs_readdata *rdata)
1605 READ_REQ *smb = NULL; 1579 READ_REQ *smb = NULL;
1606 int wct; 1580 int wct;
1607 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); 1581 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
1582 struct smb_rqst rqst = { .rq_iov = &rdata->iov,
1583 .rq_nvec = 1 };
1608 1584
1609 cFYI(1, "%s: offset=%llu bytes=%u", __func__, 1585 cFYI(1, "%s: offset=%llu bytes=%u", __func__,
1610 rdata->offset, rdata->bytes); 1586 rdata->offset, rdata->bytes);
@@ -1627,7 +1603,7 @@ cifs_async_readv(struct cifs_readdata *rdata)
1627 smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16)); 1603 smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16));
1628 1604
1629 smb->AndXCommand = 0xFF; /* none */ 1605 smb->AndXCommand = 0xFF; /* none */
1630 smb->Fid = rdata->cfile->netfid; 1606 smb->Fid = rdata->cfile->fid.netfid;
1631 smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF); 1607 smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF);
1632 if (wct == 12) 1608 if (wct == 12)
1633 smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32); 1609 smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32);
@@ -1644,13 +1620,12 @@ cifs_async_readv(struct cifs_readdata *rdata)
1644 } 1620 }
1645 1621
1646 /* 4 for RFC1001 length + 1 for BCC */ 1622 /* 4 for RFC1001 length + 1 for BCC */
1647 rdata->iov[0].iov_base = smb; 1623 rdata->iov.iov_base = smb;
1648 rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4; 1624 rdata->iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
1649 1625
1650 kref_get(&rdata->refcount); 1626 kref_get(&rdata->refcount);
1651 rc = cifs_call_async(tcon->ses->server, rdata->iov, 1, 1627 rc = cifs_call_async(tcon->ses->server, &rqst, cifs_readv_receive,
1652 cifs_readv_receive, cifs_readv_callback, 1628 cifs_readv_callback, rdata, 0);
1653 rdata, 0);
1654 1629
1655 if (rc == 0) 1630 if (rc == 0)
1656 cifs_stats_inc(&tcon->stats.cifs_stats.num_reads); 1631 cifs_stats_inc(&tcon->stats.cifs_stats.num_reads);
@@ -1921,6 +1896,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
1921{ 1896{
1922 int i, rc; 1897 int i, rc;
1923 struct inode *inode = wdata->cfile->dentry->d_inode; 1898 struct inode *inode = wdata->cfile->dentry->d_inode;
1899 struct TCP_Server_Info *server;
1924 1900
1925 for (i = 0; i < wdata->nr_pages; i++) { 1901 for (i = 0; i < wdata->nr_pages; i++) {
1926 lock_page(wdata->pages[i]); 1902 lock_page(wdata->pages[i]);
@@ -1928,7 +1904,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
1928 } 1904 }
1929 1905
1930 do { 1906 do {
1931 rc = cifs_async_writev(wdata); 1907 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1908 rc = server->ops->async_writev(wdata);
1932 } while (rc == -EAGAIN); 1909 } while (rc == -EAGAIN);
1933 1910
1934 for (i = 0; i < wdata->nr_pages; i++) { 1911 for (i = 0; i < wdata->nr_pages; i++) {
@@ -2048,11 +2025,12 @@ cifs_writev_callback(struct mid_q_entry *mid)
2048int 2025int
2049cifs_async_writev(struct cifs_writedata *wdata) 2026cifs_async_writev(struct cifs_writedata *wdata)
2050{ 2027{
2051 int i, rc = -EACCES; 2028 int rc = -EACCES;
2052 WRITE_REQ *smb = NULL; 2029 WRITE_REQ *smb = NULL;
2053 int wct; 2030 int wct;
2054 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink); 2031 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
2055 struct kvec *iov = NULL; 2032 struct kvec iov;
2033 struct smb_rqst rqst = { };
2056 2034
2057 if (tcon->ses->capabilities & CAP_LARGE_FILES) { 2035 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
2058 wct = 14; 2036 wct = 14;
@@ -2068,18 +2046,11 @@ cifs_async_writev(struct cifs_writedata *wdata)
2068 if (rc) 2046 if (rc)
2069 goto async_writev_out; 2047 goto async_writev_out;
2070 2048
2071 /* 1 iov per page + 1 for header */
2072 iov = kzalloc((wdata->nr_pages + 1) * sizeof(*iov), GFP_NOFS);
2073 if (iov == NULL) {
2074 rc = -ENOMEM;
2075 goto async_writev_out;
2076 }
2077
2078 smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid); 2049 smb->hdr.Pid = cpu_to_le16((__u16)wdata->pid);
2079 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16)); 2050 smb->hdr.PidHigh = cpu_to_le16((__u16)(wdata->pid >> 16));
2080 2051
2081 smb->AndXCommand = 0xFF; /* none */ 2052 smb->AndXCommand = 0xFF; /* none */
2082 smb->Fid = wdata->cfile->netfid; 2053 smb->Fid = wdata->cfile->fid.netfid;
2083 smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF); 2054 smb->OffsetLow = cpu_to_le32(wdata->offset & 0xFFFFFFFF);
2084 if (wct == 14) 2055 if (wct == 14)
2085 smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32); 2056 smb->OffsetHigh = cpu_to_le32(wdata->offset >> 32);
@@ -2091,18 +2062,15 @@ cifs_async_writev(struct cifs_writedata *wdata)
2091 cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4); 2062 cpu_to_le16(offsetof(struct smb_com_write_req, Data) - 4);
2092 2063
2093 /* 4 for RFC1001 length + 1 for BCC */ 2064 /* 4 for RFC1001 length + 1 for BCC */
2094 iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1; 2065 iov.iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4 + 1;
2095 iov[0].iov_base = smb; 2066 iov.iov_base = smb;
2096 2067
2097 /* 2068 rqst.rq_iov = &iov;
2098 * This function should marshal up the page array into the kvec 2069 rqst.rq_nvec = 1;
2099 * array, reserving [0] for the header. It should kmap the pages 2070 rqst.rq_pages = wdata->pages;
2100 * and set the iov_len properly for each one. It may also set 2071 rqst.rq_npages = wdata->nr_pages;
2101 * wdata->bytes too. 2072 rqst.rq_pagesz = wdata->pagesz;
2102 */ 2073 rqst.rq_tailsz = wdata->tailsz;
2103 cifs_kmap_lock();
2104 wdata->marshal_iov(iov, wdata);
2105 cifs_kmap_unlock();
2106 2074
2107 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); 2075 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
2108 2076
@@ -2118,32 +2086,26 @@ cifs_async_writev(struct cifs_writedata *wdata)
2118 (struct smb_com_writex_req *)smb; 2086 (struct smb_com_writex_req *)smb;
2119 inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5); 2087 inc_rfc1001_len(&smbw->hdr, wdata->bytes + 5);
2120 put_bcc(wdata->bytes + 5, &smbw->hdr); 2088 put_bcc(wdata->bytes + 5, &smbw->hdr);
2121 iov[0].iov_len += 4; /* pad bigger by four bytes */ 2089 iov.iov_len += 4; /* pad bigger by four bytes */
2122 } 2090 }
2123 2091
2124 kref_get(&wdata->refcount); 2092 kref_get(&wdata->refcount);
2125 rc = cifs_call_async(tcon->ses->server, iov, wdata->nr_pages + 1, 2093 rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
2126 NULL, cifs_writev_callback, wdata, 0); 2094 cifs_writev_callback, wdata, 0);
2127 2095
2128 if (rc == 0) 2096 if (rc == 0)
2129 cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); 2097 cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
2130 else 2098 else
2131 kref_put(&wdata->refcount, cifs_writedata_release); 2099 kref_put(&wdata->refcount, cifs_writedata_release);
2132 2100
2133 /* send is done, unmap pages */
2134 for (i = 0; i < wdata->nr_pages; i++)
2135 kunmap(wdata->pages[i]);
2136
2137async_writev_out: 2101async_writev_out:
2138 cifs_small_buf_release(smb); 2102 cifs_small_buf_release(smb);
2139 kfree(iov);
2140 return rc; 2103 return rc;
2141} 2104}
2142 2105
2143int 2106int
2144CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms, 2107CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
2145 unsigned int *nbytes, struct kvec *iov, int n_vec, 2108 unsigned int *nbytes, struct kvec *iov, int n_vec)
2146 const int long_op)
2147{ 2109{
2148 int rc = -EACCES; 2110 int rc = -EACCES;
2149 WRITE_REQ *pSMB = NULL; 2111 WRITE_REQ *pSMB = NULL;
@@ -2214,8 +2176,7 @@ CIFSSMBWrite2(const unsigned int xid, struct cifs_io_parms *io_parms,
2214 iov[0].iov_len = smb_hdr_len + 8; 2176 iov[0].iov_len = smb_hdr_len + 8;
2215 2177
2216 2178
2217 rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 2179 rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type, 0);
2218 long_op);
2219 cifs_stats_inc(&tcon->stats.cifs_stats.num_writes); 2180 cifs_stats_inc(&tcon->stats.cifs_stats.num_writes);
2220 if (rc) { 2181 if (rc) {
2221 cFYI(1, "Send error Write2 = %d", rc); 2182 cFYI(1, "Send error Write2 = %d", rc);
@@ -2552,8 +2513,8 @@ CIFSSMBFlush(const unsigned int xid, struct cifs_tcon *tcon, int smb_file_id)
2552 2513
2553int 2514int
2554CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon, 2515CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
2555 const char *fromName, const char *toName, 2516 const char *from_name, const char *to_name,
2556 const struct nls_table *nls_codepage, int remap) 2517 struct cifs_sb_info *cifs_sb)
2557{ 2518{
2558 int rc = 0; 2519 int rc = 0;
2559 RENAME_REQ *pSMB = NULL; 2520 RENAME_REQ *pSMB = NULL;
@@ -2561,6 +2522,7 @@ CIFSSMBRename(const unsigned int xid, struct cifs_tcon *tcon,
2561 int bytes_returned; 2522 int bytes_returned;
2562 int name_len, name_len2; 2523 int name_len, name_len2;
2563 __u16 count; 2524 __u16 count;
2525 int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
2564 2526
2565 cFYI(1, "In CIFSSMBRename"); 2527 cFYI(1, "In CIFSSMBRename");
2566renameRetry: 2528renameRetry:
@@ -2575,9 +2537,9 @@ renameRetry:
2575 ATTR_DIRECTORY); 2537 ATTR_DIRECTORY);
2576 2538
2577 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 2539 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
2578 name_len = 2540 name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName,
2579 cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, 2541 from_name, PATH_MAX,
2580 PATH_MAX, nls_codepage, remap); 2542 cifs_sb->local_nls, remap);
2581 name_len++; /* trailing null */ 2543 name_len++; /* trailing null */
2582 name_len *= 2; 2544 name_len *= 2;
2583 pSMB->OldFileName[name_len] = 0x04; /* pad */ 2545 pSMB->OldFileName[name_len] = 0x04; /* pad */
@@ -2585,17 +2547,18 @@ renameRetry:
2585 pSMB->OldFileName[name_len + 1] = 0x00; 2547 pSMB->OldFileName[name_len + 1] = 0x00;
2586 name_len2 = 2548 name_len2 =
2587 cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], 2549 cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2],
2588 toName, PATH_MAX, nls_codepage, remap); 2550 to_name, PATH_MAX, cifs_sb->local_nls,
2551 remap);
2589 name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; 2552 name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ;
2590 name_len2 *= 2; /* convert to bytes */ 2553 name_len2 *= 2; /* convert to bytes */
2591 } else { /* BB improve the check for buffer overruns BB */ 2554 } else { /* BB improve the check for buffer overruns BB */
2592 name_len = strnlen(fromName, PATH_MAX); 2555 name_len = strnlen(from_name, PATH_MAX);
2593 name_len++; /* trailing null */ 2556 name_len++; /* trailing null */
2594 strncpy(pSMB->OldFileName, fromName, name_len); 2557 strncpy(pSMB->OldFileName, from_name, name_len);
2595 name_len2 = strnlen(toName, PATH_MAX); 2558 name_len2 = strnlen(to_name, PATH_MAX);
2596 name_len2++; /* trailing null */ 2559 name_len2++; /* trailing null */
2597 pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ 2560 pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */
2598 strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); 2561 strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2);
2599 name_len2++; /* trailing null */ 2562 name_len2++; /* trailing null */
2600 name_len2++; /* signature byte */ 2563 name_len2++; /* signature byte */
2601 } 2564 }
@@ -2943,8 +2906,8 @@ createHardLinkRetry:
2943 2906
2944int 2907int
2945CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon, 2908CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon,
2946 const char *fromName, const char *toName, 2909 const char *from_name, const char *to_name,
2947 const struct nls_table *nls_codepage, int remap) 2910 struct cifs_sb_info *cifs_sb)
2948{ 2911{
2949 int rc = 0; 2912 int rc = 0;
2950 NT_RENAME_REQ *pSMB = NULL; 2913 NT_RENAME_REQ *pSMB = NULL;
@@ -2952,6 +2915,7 @@ CIFSCreateHardLink(const unsigned int xid, struct cifs_tcon *tcon,
2952 int bytes_returned; 2915 int bytes_returned;
2953 int name_len, name_len2; 2916 int name_len, name_len2;
2954 __u16 count; 2917 __u16 count;
2918 int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
2955 2919
2956 cFYI(1, "In CIFSCreateHardLink"); 2920 cFYI(1, "In CIFSCreateHardLink");
2957winCreateHardLinkRetry: 2921winCreateHardLinkRetry:
@@ -2971,8 +2935,8 @@ winCreateHardLinkRetry:
2971 2935
2972 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 2936 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
2973 name_len = 2937 name_len =
2974 cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, 2938 cifsConvertToUTF16((__le16 *) pSMB->OldFileName, from_name,
2975 PATH_MAX, nls_codepage, remap); 2939 PATH_MAX, cifs_sb->local_nls, remap);
2976 name_len++; /* trailing null */ 2940 name_len++; /* trailing null */
2977 name_len *= 2; 2941 name_len *= 2;
2978 2942
@@ -2981,17 +2945,18 @@ winCreateHardLinkRetry:
2981 pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ 2945 pSMB->OldFileName[name_len + 1] = 0x00; /* pad */
2982 name_len2 = 2946 name_len2 =
2983 cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], 2947 cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2],
2984 toName, PATH_MAX, nls_codepage, remap); 2948 to_name, PATH_MAX, cifs_sb->local_nls,
2949 remap);
2985 name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; 2950 name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ;
2986 name_len2 *= 2; /* convert to bytes */ 2951 name_len2 *= 2; /* convert to bytes */
2987 } else { /* BB improve the check for buffer overruns BB */ 2952 } else { /* BB improve the check for buffer overruns BB */
2988 name_len = strnlen(fromName, PATH_MAX); 2953 name_len = strnlen(from_name, PATH_MAX);
2989 name_len++; /* trailing null */ 2954 name_len++; /* trailing null */
2990 strncpy(pSMB->OldFileName, fromName, name_len); 2955 strncpy(pSMB->OldFileName, from_name, name_len);
2991 name_len2 = strnlen(toName, PATH_MAX); 2956 name_len2 = strnlen(to_name, PATH_MAX);
2992 name_len2++; /* trailing null */ 2957 name_len2++; /* trailing null */
2993 pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */ 2958 pSMB->OldFileName[name_len] = 0x04; /* 2nd buffer format */
2994 strncpy(&pSMB->OldFileName[name_len + 1], toName, name_len2); 2959 strncpy(&pSMB->OldFileName[name_len + 1], to_name, name_len2);
2995 name_len2++; /* trailing null */ 2960 name_len2++; /* trailing null */
2996 name_len2++; /* signature byte */ 2961 name_len2++; /* signature byte */
2997 } 2962 }
@@ -4249,10 +4214,9 @@ UnixQPathInfoRetry:
4249/* xid, tcon, searchName and codepage are input parms, rest are returned */ 4214/* xid, tcon, searchName and codepage are input parms, rest are returned */
4250int 4215int
4251CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon, 4216CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon,
4252 const char *searchName, 4217 const char *searchName, struct cifs_sb_info *cifs_sb,
4253 const struct nls_table *nls_codepage,
4254 __u16 *pnetfid, __u16 search_flags, 4218 __u16 *pnetfid, __u16 search_flags,
4255 struct cifs_search_info *psrch_inf, int remap, const char dirsep) 4219 struct cifs_search_info *psrch_inf, bool msearch)
4256{ 4220{
4257/* level 257 SMB_ */ 4221/* level 257 SMB_ */
4258 TRANSACTION2_FFIRST_REQ *pSMB = NULL; 4222 TRANSACTION2_FFIRST_REQ *pSMB = NULL;
@@ -4260,8 +4224,9 @@ CIFSFindFirst(const unsigned int xid, struct cifs_tcon *tcon,
4260 T2_FFIRST_RSP_PARMS *parms; 4224 T2_FFIRST_RSP_PARMS *parms;
4261 int rc = 0; 4225 int rc = 0;
4262 int bytes_returned = 0; 4226 int bytes_returned = 0;
4263 int name_len; 4227 int name_len, remap;
4264 __u16 params, byte_count; 4228 __u16 params, byte_count;
4229 struct nls_table *nls_codepage;
4265 4230
4266 cFYI(1, "In FindFirst for %s", searchName); 4231 cFYI(1, "In FindFirst for %s", searchName);
4267 4232
@@ -4271,6 +4236,9 @@ findFirstRetry:
4271 if (rc) 4236 if (rc)
4272 return rc; 4237 return rc;
4273 4238
4239 nls_codepage = cifs_sb->local_nls;
4240 remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
4241
4274 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 4242 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
4275 name_len = 4243 name_len =
4276 cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, 4244 cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName,
@@ -4279,24 +4247,29 @@ findFirstRetry:
4279 it got remapped to 0xF03A as if it were part of the 4247 it got remapped to 0xF03A as if it were part of the
4280 directory name instead of a wildcard */ 4248 directory name instead of a wildcard */
4281 name_len *= 2; 4249 name_len *= 2;
4282 pSMB->FileName[name_len] = dirsep; 4250 if (msearch) {
4283 pSMB->FileName[name_len+1] = 0; 4251 pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb);
4284 pSMB->FileName[name_len+2] = '*'; 4252 pSMB->FileName[name_len+1] = 0;
4285 pSMB->FileName[name_len+3] = 0; 4253 pSMB->FileName[name_len+2] = '*';
4286 name_len += 4; /* now the trailing null */ 4254 pSMB->FileName[name_len+3] = 0;
4287 pSMB->FileName[name_len] = 0; /* null terminate just in case */ 4255 name_len += 4; /* now the trailing null */
4288 pSMB->FileName[name_len+1] = 0; 4256 /* null terminate just in case */
4289 name_len += 2; 4257 pSMB->FileName[name_len] = 0;
4258 pSMB->FileName[name_len+1] = 0;
4259 name_len += 2;
4260 }
4290 } else { /* BB add check for overrun of SMB buf BB */ 4261 } else { /* BB add check for overrun of SMB buf BB */
4291 name_len = strnlen(searchName, PATH_MAX); 4262 name_len = strnlen(searchName, PATH_MAX);
4292/* BB fix here and in unicode clause above ie 4263/* BB fix here and in unicode clause above ie
4293 if (name_len > buffersize-header) 4264 if (name_len > buffersize-header)
4294 free buffer exit; BB */ 4265 free buffer exit; BB */
4295 strncpy(pSMB->FileName, searchName, name_len); 4266 strncpy(pSMB->FileName, searchName, name_len);
4296 pSMB->FileName[name_len] = dirsep; 4267 if (msearch) {
4297 pSMB->FileName[name_len+1] = '*'; 4268 pSMB->FileName[name_len] = CIFS_DIR_SEP(cifs_sb);
4298 pSMB->FileName[name_len+2] = 0; 4269 pSMB->FileName[name_len+1] = '*';
4299 name_len += 3; 4270 pSMB->FileName[name_len+2] = 0;
4271 name_len += 3;
4272 }
4300 } 4273 }
4301 4274
4302 params = 12 + name_len /* includes null */ ; 4275 params = 12 + name_len /* includes null */ ;
@@ -4384,7 +4357,8 @@ findFirstRetry:
4384 psrch_inf->last_entry = psrch_inf->srch_entries_start + 4357 psrch_inf->last_entry = psrch_inf->srch_entries_start +
4385 lnoff; 4358 lnoff;
4386 4359
4387 *pnetfid = parms->SearchHandle; 4360 if (pnetfid)
4361 *pnetfid = parms->SearchHandle;
4388 } else { 4362 } else {
4389 cifs_buf_release(pSMB); 4363 cifs_buf_release(pSMB);
4390 } 4364 }
@@ -5412,16 +5386,16 @@ QFSPosixRetry:
5412} 5386}
5413 5387
5414 5388
5415/* We can not use write of zero bytes trick to 5389/*
5416 set file size due to need for large file support. Also note that 5390 * We can not use write of zero bytes trick to set file size due to need for
5417 this SetPathInfo is preferred to SetFileInfo based method in next 5391 * large file support. Also note that this SetPathInfo is preferred to
5418 routine which is only needed to work around a sharing violation bug 5392 * SetFileInfo based method in next routine which is only needed to work around
5419 in Samba which this routine can run into */ 5393 * a sharing violation bugin Samba which this routine can run into.
5420 5394 */
5421int 5395int
5422CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon, 5396CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
5423 const char *fileName, __u64 size, bool SetAllocation, 5397 const char *file_name, __u64 size, struct cifs_sb_info *cifs_sb,
5424 const struct nls_table *nls_codepage, int remap) 5398 bool set_allocation)
5425{ 5399{
5426 struct smb_com_transaction2_spi_req *pSMB = NULL; 5400 struct smb_com_transaction2_spi_req *pSMB = NULL;
5427 struct smb_com_transaction2_spi_rsp *pSMBr = NULL; 5401 struct smb_com_transaction2_spi_rsp *pSMBr = NULL;
@@ -5429,6 +5403,8 @@ CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
5429 int name_len; 5403 int name_len;
5430 int rc = 0; 5404 int rc = 0;
5431 int bytes_returned = 0; 5405 int bytes_returned = 0;
5406 int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
5407
5432 __u16 params, byte_count, data_count, param_offset, offset; 5408 __u16 params, byte_count, data_count, param_offset, offset;
5433 5409
5434 cFYI(1, "In SetEOF"); 5410 cFYI(1, "In SetEOF");
@@ -5440,14 +5416,14 @@ SetEOFRetry:
5440 5416
5441 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { 5417 if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
5442 name_len = 5418 name_len =
5443 cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, 5419 cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name,
5444 PATH_MAX, nls_codepage, remap); 5420 PATH_MAX, cifs_sb->local_nls, remap);
5445 name_len++; /* trailing null */ 5421 name_len++; /* trailing null */
5446 name_len *= 2; 5422 name_len *= 2;
5447 } else { /* BB improve the check for buffer overruns BB */ 5423 } else { /* BB improve the check for buffer overruns BB */
5448 name_len = strnlen(fileName, PATH_MAX); 5424 name_len = strnlen(file_name, PATH_MAX);
5449 name_len++; /* trailing null */ 5425 name_len++; /* trailing null */
5450 strncpy(pSMB->FileName, fileName, name_len); 5426 strncpy(pSMB->FileName, file_name, name_len);
5451 } 5427 }
5452 params = 6 + name_len; 5428 params = 6 + name_len;
5453 data_count = sizeof(struct file_end_of_file_info); 5429 data_count = sizeof(struct file_end_of_file_info);
@@ -5461,7 +5437,7 @@ SetEOFRetry:
5461 param_offset = offsetof(struct smb_com_transaction2_spi_req, 5437 param_offset = offsetof(struct smb_com_transaction2_spi_req,
5462 InformationLevel) - 4; 5438 InformationLevel) - 4;
5463 offset = param_offset + params; 5439 offset = param_offset + params;
5464 if (SetAllocation) { 5440 if (set_allocation) {
5465 if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) 5441 if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)
5466 pSMB->InformationLevel = 5442 pSMB->InformationLevel =
5467 cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); 5443 cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2);
@@ -5508,8 +5484,8 @@ SetEOFRetry:
5508} 5484}
5509 5485
5510int 5486int
5511CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size, 5487CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon,
5512 __u16 fid, __u32 pid_of_opener, bool SetAllocation) 5488 struct cifsFileInfo *cfile, __u64 size, bool set_allocation)
5513{ 5489{
5514 struct smb_com_transaction2_sfi_req *pSMB = NULL; 5490 struct smb_com_transaction2_sfi_req *pSMB = NULL;
5515 struct file_end_of_file_info *parm_data; 5491 struct file_end_of_file_info *parm_data;
@@ -5523,8 +5499,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size,
5523 if (rc) 5499 if (rc)
5524 return rc; 5500 return rc;
5525 5501
5526 pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); 5502 pSMB->hdr.Pid = cpu_to_le16((__u16)cfile->pid);
5527 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); 5503 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(cfile->pid >> 16));
5528 5504
5529 params = 6; 5505 params = 6;
5530 pSMB->MaxSetupCount = 0; 5506 pSMB->MaxSetupCount = 0;
@@ -5553,8 +5529,8 @@ CIFSSMBSetFileSize(const unsigned int xid, struct cifs_tcon *tcon, __u64 size,
5553 + offset); 5529 + offset);
5554 pSMB->DataOffset = cpu_to_le16(offset); 5530 pSMB->DataOffset = cpu_to_le16(offset);
5555 parm_data->FileSize = cpu_to_le64(size); 5531 parm_data->FileSize = cpu_to_le64(size);
5556 pSMB->Fid = fid; 5532 pSMB->Fid = cfile->fid.netfid;
5557 if (SetAllocation) { 5533 if (set_allocation) {
5558 if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU) 5534 if (tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)
5559 pSMB->InformationLevel = 5535 pSMB->InformationLevel =
5560 cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2); 5536 cpu_to_le16(SMB_SET_FILE_ALLOCATION_INFO2);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6df6fa14cba8..2fdbe08a7a23 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -82,8 +82,7 @@ enum {
82 Opt_serverino, Opt_noserverino, 82 Opt_serverino, Opt_noserverino,
83 Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl, 83 Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl,
84 Opt_acl, Opt_noacl, Opt_locallease, 84 Opt_acl, Opt_noacl, Opt_locallease,
85 Opt_sign, Opt_seal, Opt_direct, 85 Opt_sign, Opt_seal, Opt_noac,
86 Opt_strictcache, Opt_noac,
87 Opt_fsc, Opt_mfsymlinks, 86 Opt_fsc, Opt_mfsymlinks,
88 Opt_multiuser, Opt_sloppy, 87 Opt_multiuser, Opt_sloppy,
89 88
@@ -160,10 +159,6 @@ static const match_table_t cifs_mount_option_tokens = {
160 { Opt_locallease, "locallease" }, 159 { Opt_locallease, "locallease" },
161 { Opt_sign, "sign" }, 160 { Opt_sign, "sign" },
162 { Opt_seal, "seal" }, 161 { Opt_seal, "seal" },
163 { Opt_direct, "direct" },
164 { Opt_direct, "directio" },
165 { Opt_direct, "forcedirectio" },
166 { Opt_strictcache, "strictcache" },
167 { Opt_noac, "noac" }, 162 { Opt_noac, "noac" },
168 { Opt_fsc, "fsc" }, 163 { Opt_fsc, "fsc" },
169 { Opt_mfsymlinks, "mfsymlinks" }, 164 { Opt_mfsymlinks, "mfsymlinks" },
@@ -277,6 +272,7 @@ static const match_table_t cifs_cacheflavor_tokens = {
277static const match_table_t cifs_smb_version_tokens = { 272static const match_table_t cifs_smb_version_tokens = {
278 { Smb_1, SMB1_VERSION_STRING }, 273 { Smb_1, SMB1_VERSION_STRING },
279 { Smb_21, SMB21_VERSION_STRING }, 274 { Smb_21, SMB21_VERSION_STRING },
275 { Smb_30, SMB30_VERSION_STRING },
280}; 276};
281 277
282static int ip_connect(struct TCP_Server_Info *server); 278static int ip_connect(struct TCP_Server_Info *server);
@@ -819,6 +815,10 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
819 cifs_dump_mem("Bad SMB: ", buf, 815 cifs_dump_mem("Bad SMB: ", buf,
820 min_t(unsigned int, server->total_read, 48)); 816 min_t(unsigned int, server->total_read, 48));
821 817
818 if (server->ops->is_status_pending &&
819 server->ops->is_status_pending(buf, server, length))
820 return -1;
821
822 if (!mid) 822 if (!mid)
823 return length; 823 return length;
824 824
@@ -1075,6 +1075,10 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
1075 vol->ops = &smb21_operations; 1075 vol->ops = &smb21_operations;
1076 vol->vals = &smb21_values; 1076 vol->vals = &smb21_values;
1077 break; 1077 break;
1078 case Smb_30:
1079 vol->ops = &smb21_operations; /* currently identical with 2.1 */
1080 vol->vals = &smb30_values;
1081 break;
1078#endif 1082#endif
1079 default: 1083 default:
1080 cERROR(1, "Unknown vers= option specified: %s", value); 1084 cERROR(1, "Unknown vers= option specified: %s", value);
@@ -1101,8 +1105,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1101 char *string = NULL; 1105 char *string = NULL;
1102 char *tmp_end, *value; 1106 char *tmp_end, *value;
1103 char delim; 1107 char delim;
1104 bool cache_specified = false;
1105 static bool cache_warned = false;
1106 1108
1107 separator[0] = ','; 1109 separator[0] = ',';
1108 separator[1] = 0; 1110 separator[1] = 0;
@@ -1134,6 +1136,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1134 /* default to using server inode numbers where available */ 1136 /* default to using server inode numbers where available */
1135 vol->server_ino = 1; 1137 vol->server_ino = 1;
1136 1138
1139 /* default is to use strict cifs caching semantics */
1140 vol->strict_io = true;
1141
1137 vol->actimeo = CIFS_DEF_ACTIMEO; 1142 vol->actimeo = CIFS_DEF_ACTIMEO;
1138 1143
1139 /* FIXME: add autonegotiation -- for now, SMB1 is default */ 1144 /* FIXME: add autonegotiation -- for now, SMB1 is default */
@@ -1317,22 +1322,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1317 */ 1322 */
1318 vol->seal = 1; 1323 vol->seal = 1;
1319 break; 1324 break;
1320 case Opt_direct:
1321 cache_specified = true;
1322 vol->direct_io = true;
1323 vol->strict_io = false;
1324 cERROR(1, "The \"directio\" option will be removed in "
1325 "3.7. Please switch to the \"cache=none\" "
1326 "option.");
1327 break;
1328 case Opt_strictcache:
1329 cache_specified = true;
1330 vol->direct_io = false;
1331 vol->strict_io = true;
1332 cERROR(1, "The \"strictcache\" option will be removed "
1333 "in 3.7. Please switch to the \"cache=strict\" "
1334 "option.");
1335 break;
1336 case Opt_noac: 1325 case Opt_noac:
1337 printk(KERN_WARNING "CIFS: Mount option noac not " 1326 printk(KERN_WARNING "CIFS: Mount option noac not "
1338 "supported. Instead set " 1327 "supported. Instead set "
@@ -1676,8 +1665,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1676 if (string == NULL) 1665 if (string == NULL)
1677 goto out_nomem; 1666 goto out_nomem;
1678 1667
1679 if (strnicmp(string, "TCP_NODELAY", 11) == 0) 1668 if (strnicmp(string, "TCP_NODELAY", 11) == 0) {
1669 printk(KERN_WARNING "CIFS: the "
1670 "sockopt=TCP_NODELAY option has been "
1671 "deprecated and will be removed "
1672 "in 3.9\n");
1680 vol->sockopt_tcp_nodelay = 1; 1673 vol->sockopt_tcp_nodelay = 1;
1674 }
1681 break; 1675 break;
1682 case Opt_netbiosname: 1676 case Opt_netbiosname:
1683 string = match_strdup(args); 1677 string = match_strdup(args);
@@ -1762,7 +1756,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1762 goto cifs_parse_mount_err; 1756 goto cifs_parse_mount_err;
1763 break; 1757 break;
1764 case Opt_cache: 1758 case Opt_cache:
1765 cache_specified = true;
1766 string = match_strdup(args); 1759 string = match_strdup(args);
1767 if (string == NULL) 1760 if (string == NULL)
1768 goto out_nomem; 1761 goto out_nomem;
@@ -1813,14 +1806,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1813 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1806 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1814 "specified with no gid= option.\n"); 1807 "specified with no gid= option.\n");
1815 1808
1816 /* FIXME: remove this block in 3.7 */
1817 if (!cache_specified && !cache_warned) {
1818 cache_warned = true;
1819 printk(KERN_NOTICE "CIFS: no cache= option specified, using "
1820 "\"cache=loose\". This default will change "
1821 "to \"cache=strict\" in 3.7.\n");
1822 }
1823
1824 kfree(mountdata_copy); 1809 kfree(mountdata_copy);
1825 return 0; 1810 return 0;
1826 1811
@@ -2636,6 +2621,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
2636 tcon->retry = volume_info->retry; 2621 tcon->retry = volume_info->retry;
2637 tcon->nocase = volume_info->nocase; 2622 tcon->nocase = volume_info->nocase;
2638 tcon->local_lease = volume_info->local_lease; 2623 tcon->local_lease = volume_info->local_lease;
2624 INIT_LIST_HEAD(&tcon->pending_opens);
2639 2625
2640 spin_lock(&cifs_tcp_ses_lock); 2626 spin_lock(&cifs_tcp_ses_lock);
2641 list_add(&tcon->tcon_list, &ses->tcon_list); 2627 list_add(&tcon->tcon_list, &ses->tcon_list);
@@ -3261,146 +3247,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
3261 "mount option supported"); 3247 "mount option supported");
3262} 3248}
3263 3249
3264/*
3265 * When the server supports very large reads and writes via POSIX extensions,
3266 * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not
3267 * including the RFC1001 length.
3268 *
3269 * Note that this might make for "interesting" allocation problems during
3270 * writeback however as we have to allocate an array of pointers for the
3271 * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
3272 *
3273 * For reads, there is a similar problem as we need to allocate an array
3274 * of kvecs to handle the receive, though that should only need to be done
3275 * once.
3276 */
3277#define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4)
3278#define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4)
3279
3280/*
3281 * When the server doesn't allow large posix writes, only allow a rsize/wsize
3282 * of 2^17-1 minus the size of the call header. That allows for a read or
3283 * write up to the maximum size described by RFC1002.
3284 */
3285#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
3286#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4)
3287
3288/*
3289 * The default wsize is 1M. find_get_pages seems to return a maximum of 256
3290 * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
3291 * a single wsize request with a single call.
3292 */
3293#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
3294
3295/*
3296 * Windows only supports a max of 60kb reads and 65535 byte writes. Default to
3297 * those values when posix extensions aren't in force. In actuality here, we
3298 * use 65536 to allow for a write that is a multiple of 4k. Most servers seem
3299 * to be ok with the extra byte even though Windows doesn't send writes that
3300 * are that large.
3301 *
3302 * Citation:
3303 *
3304 * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx
3305 */
3306#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
3307#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
3308
3309/*
3310 * On hosts with high memory, we can't currently support wsize/rsize that are
3311 * larger than we can kmap at once. Cap the rsize/wsize at
3312 * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
3313 * larger than that anyway.
3314 */
3315#ifdef CONFIG_HIGHMEM
3316#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE)
3317#else /* CONFIG_HIGHMEM */
3318#define CIFS_KMAP_SIZE_LIMIT (1<<24)
3319#endif /* CONFIG_HIGHMEM */
3320
3321static unsigned int
3322cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3323{
3324 __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
3325 struct TCP_Server_Info *server = tcon->ses->server;
3326 unsigned int wsize;
3327
3328 /* start with specified wsize, or default */
3329 if (pvolume_info->wsize)
3330 wsize = pvolume_info->wsize;
3331 else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
3332 wsize = CIFS_DEFAULT_IOSIZE;
3333 else
3334 wsize = CIFS_DEFAULT_NON_POSIX_WSIZE;
3335
3336 /* can server support 24-bit write sizes? (via UNIX extensions) */
3337 if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
3338 wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
3339
3340 /*
3341 * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
3342 * Limit it to max buffer offered by the server, minus the size of the
3343 * WRITEX header, not including the 4 byte RFC1001 length.
3344 */
3345 if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
3346 (!(server->capabilities & CAP_UNIX) &&
3347 (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
3348 wsize = min_t(unsigned int, wsize,
3349 server->maxBuf - sizeof(WRITE_REQ) + 4);
3350
3351 /* limit to the amount that we can kmap at once */
3352 wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
3353
3354 /* hard limit of CIFS_MAX_WSIZE */
3355 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
3356
3357 return wsize;
3358}
3359
3360static unsigned int
3361cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3362{
3363 __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
3364 struct TCP_Server_Info *server = tcon->ses->server;
3365 unsigned int rsize, defsize;
3366
3367 /*
3368 * Set default value...
3369 *
3370 * HACK alert! Ancient servers have very small buffers. Even though
3371 * MS-CIFS indicates that servers are only limited by the client's
3372 * bufsize for reads, testing against win98se shows that it throws
3373 * INVALID_PARAMETER errors if you try to request too large a read.
3374 * OS/2 just sends back short reads.
3375 *
3376 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
3377 * it can't handle a read request larger than its MaxBufferSize either.
3378 */
3379 if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
3380 defsize = CIFS_DEFAULT_IOSIZE;
3381 else if (server->capabilities & CAP_LARGE_READ_X)
3382 defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
3383 else
3384 defsize = server->maxBuf - sizeof(READ_RSP);
3385
3386 rsize = pvolume_info->rsize ? pvolume_info->rsize : defsize;
3387
3388 /*
3389 * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
3390 * the client's MaxBufferSize.
3391 */
3392 if (!(server->capabilities & CAP_LARGE_READ_X))
3393 rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
3394
3395 /* limit to the amount that we can kmap at once */
3396 rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
3397
3398 /* hard limit of CIFS_MAX_RSIZE */
3399 rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
3400
3401 return rsize;
3402}
3403
3404static void 3250static void
3405cleanup_volume_info_contents(struct smb_vol *volume_info) 3251cleanup_volume_info_contents(struct smb_vol *volume_info)
3406{ 3252{
@@ -3651,8 +3497,8 @@ try_mount_again:
3651 if (!tcon->ipc && server->ops->qfs_tcon) 3497 if (!tcon->ipc && server->ops->qfs_tcon)
3652 server->ops->qfs_tcon(xid, tcon); 3498 server->ops->qfs_tcon(xid, tcon);
3653 3499
3654 cifs_sb->wsize = cifs_negotiate_wsize(tcon, volume_info); 3500 cifs_sb->wsize = server->ops->negotiate_wsize(tcon, volume_info);
3655 cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); 3501 cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
3656 3502
3657 /* tune readahead according to rsize */ 3503 /* tune readahead according to rsize */
3658 cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; 3504 cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index cbe709ad6663..7c0a81283645 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -160,17 +160,18 @@ check_name(struct dentry *direntry)
160static int 160static int
161cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, 161cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
162 struct tcon_link *tlink, unsigned oflags, umode_t mode, 162 struct tcon_link *tlink, unsigned oflags, umode_t mode,
163 __u32 *oplock, __u16 *fileHandle, int *created) 163 __u32 *oplock, struct cifs_fid *fid, int *created)
164{ 164{
165 int rc = -ENOENT; 165 int rc = -ENOENT;
166 int create_options = CREATE_NOT_DIR; 166 int create_options = CREATE_NOT_DIR;
167 int desiredAccess; 167 int desired_access;
168 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 168 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
169 struct cifs_tcon *tcon = tlink_tcon(tlink); 169 struct cifs_tcon *tcon = tlink_tcon(tlink);
170 char *full_path = NULL; 170 char *full_path = NULL;
171 FILE_ALL_INFO *buf = NULL; 171 FILE_ALL_INFO *buf = NULL;
172 struct inode *newinode = NULL; 172 struct inode *newinode = NULL;
173 int disposition; 173 int disposition;
174 struct TCP_Server_Info *server = tcon->ses->server;
174 175
175 *oplock = 0; 176 *oplock = 0;
176 if (tcon->ses->server->oplocks) 177 if (tcon->ses->server->oplocks)
@@ -185,8 +186,8 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
185 if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open && 186 if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
186 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 187 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
187 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 188 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
188 rc = cifs_posix_open(full_path, &newinode, 189 rc = cifs_posix_open(full_path, &newinode, inode->i_sb, mode,
189 inode->i_sb, mode, oflags, oplock, fileHandle, xid); 190 oflags, oplock, &fid->netfid, xid);
190 switch (rc) { 191 switch (rc) {
191 case 0: 192 case 0:
192 if (newinode == NULL) { 193 if (newinode == NULL) {
@@ -202,7 +203,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
202 * close it and proceed as if it were a normal 203 * close it and proceed as if it were a normal
203 * lookup. 204 * lookup.
204 */ 205 */
205 CIFSSMBClose(xid, tcon, *fileHandle); 206 CIFSSMBClose(xid, tcon, fid->netfid);
206 goto cifs_create_get_file_info; 207 goto cifs_create_get_file_info;
207 } 208 }
208 /* success, no need to query */ 209 /* success, no need to query */
@@ -244,11 +245,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
244 */ 245 */
245 } 246 }
246 247
247 desiredAccess = 0; 248 desired_access = 0;
248 if (OPEN_FMODE(oflags) & FMODE_READ) 249 if (OPEN_FMODE(oflags) & FMODE_READ)
249 desiredAccess |= GENERIC_READ; /* is this too little? */ 250 desired_access |= GENERIC_READ; /* is this too little? */
250 if (OPEN_FMODE(oflags) & FMODE_WRITE) 251 if (OPEN_FMODE(oflags) & FMODE_WRITE)
251 desiredAccess |= GENERIC_WRITE; 252 desired_access |= GENERIC_WRITE;
252 253
253 disposition = FILE_OVERWRITE_IF; 254 disposition = FILE_OVERWRITE_IF;
254 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) 255 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -260,8 +261,15 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
260 else 261 else
261 cFYI(1, "Create flag not set in create function"); 262 cFYI(1, "Create flag not set in create function");
262 263
263 /* BB add processing to set equivalent of mode - e.g. via CreateX with 264 /*
264 ACLs */ 265 * BB add processing to set equivalent of mode - e.g. via CreateX with
266 * ACLs
267 */
268
269 if (!server->ops->open) {
270 rc = -ENOSYS;
271 goto out;
272 }
265 273
266 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 274 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
267 if (buf == NULL) { 275 if (buf == NULL) {
@@ -279,28 +287,18 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
279 if (backup_cred(cifs_sb)) 287 if (backup_cred(cifs_sb))
280 create_options |= CREATE_OPEN_BACKUP_INTENT; 288 create_options |= CREATE_OPEN_BACKUP_INTENT;
281 289
282 if (tcon->ses->capabilities & CAP_NT_SMBS) 290 rc = server->ops->open(xid, tcon, full_path, disposition,
283 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, 291 desired_access, create_options, fid, oplock,
284 desiredAccess, create_options, 292 buf, cifs_sb);
285 fileHandle, oplock, buf, cifs_sb->local_nls,
286 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
287 else
288 rc = -EIO; /* no NT SMB support fall into legacy open below */
289
290 if (rc == -EIO) {
291 /* old server, retry the open legacy style */
292 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
293 desiredAccess, create_options,
294 fileHandle, oplock, buf, cifs_sb->local_nls,
295 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
296 }
297 if (rc) { 293 if (rc) {
298 cFYI(1, "cifs_create returned 0x%x", rc); 294 cFYI(1, "cifs_create returned 0x%x", rc);
299 goto out; 295 goto out;
300 } 296 }
301 297
302 /* If Open reported that we actually created a file 298 /*
303 then we now have to set the mode if possible */ 299 * If Open reported that we actually created a file then we now have to
300 * set the mode if possible.
301 */
304 if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { 302 if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
305 struct cifs_unix_set_info_args args = { 303 struct cifs_unix_set_info_args args = {
306 .mode = mode, 304 .mode = mode,
@@ -321,11 +319,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
321 args.uid = NO_CHANGE_64; 319 args.uid = NO_CHANGE_64;
322 args.gid = NO_CHANGE_64; 320 args.gid = NO_CHANGE_64;
323 } 321 }
324 CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle, 322 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid->netfid,
325 current->tgid); 323 current->tgid);
326 } else { 324 } else {
327 /* BB implement mode setting via Windows security 325 /*
328 descriptors e.g. */ 326 * BB implement mode setting via Windows security
327 * descriptors e.g.
328 */
329 /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/ 329 /* CIFSSMBWinSetPerms(xid,tcon,path,mode,-1,-1,nls);*/
330 330
331 /* Could set r/o dos attribute if mode & 0222 == 0 */ 331 /* Could set r/o dos attribute if mode & 0222 == 0 */
@@ -334,12 +334,14 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
334cifs_create_get_file_info: 334cifs_create_get_file_info:
335 /* server might mask mode so we have to query for it */ 335 /* server might mask mode so we have to query for it */
336 if (tcon->unix_ext) 336 if (tcon->unix_ext)
337 rc = cifs_get_inode_info_unix(&newinode, full_path, 337 rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb,
338 inode->i_sb, xid); 338 xid);
339 else { 339 else {
340 rc = cifs_get_inode_info(&newinode, full_path, buf, 340 rc = cifs_get_inode_info(&newinode, full_path, buf, inode->i_sb,
341 inode->i_sb, xid, fileHandle); 341 xid, &fid->netfid);
342 if (newinode) { 342 if (newinode) {
343 if (server->ops->set_lease_key)
344 server->ops->set_lease_key(newinode, fid);
343 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 345 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
344 newinode->i_mode = mode; 346 newinode->i_mode = mode;
345 if ((*oplock & CIFS_CREATE_ACTION) && 347 if ((*oplock & CIFS_CREATE_ACTION) &&
@@ -356,19 +358,13 @@ cifs_create_get_file_info:
356cifs_create_set_dentry: 358cifs_create_set_dentry:
357 if (rc != 0) { 359 if (rc != 0) {
358 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 360 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
361 if (server->ops->close)
362 server->ops->close(xid, tcon, fid);
359 goto out; 363 goto out;
360 } 364 }
361 d_drop(direntry); 365 d_drop(direntry);
362 d_add(direntry, newinode); 366 d_add(direntry, newinode);
363 367
364 /* ENOENT for create? How weird... */
365 rc = -ENOENT;
366 if (!newinode) {
367 CIFSSMBClose(xid, tcon, *fileHandle);
368 goto out;
369 }
370 rc = 0;
371
372out: 368out:
373 kfree(buf); 369 kfree(buf);
374 kfree(full_path); 370 kfree(full_path);
@@ -384,11 +380,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
384 unsigned int xid; 380 unsigned int xid;
385 struct tcon_link *tlink; 381 struct tcon_link *tlink;
386 struct cifs_tcon *tcon; 382 struct cifs_tcon *tcon;
387 __u16 fileHandle; 383 struct TCP_Server_Info *server;
384 struct cifs_fid fid;
385 struct cifs_pending_open open;
388 __u32 oplock; 386 __u32 oplock;
389 struct cifsFileInfo *pfile_info; 387 struct cifsFileInfo *file_info;
390 388
391 /* Posix open is only called (at lookup time) for file create now. For 389 /*
390 * Posix open is only called (at lookup time) for file create now. For
392 * opens (rather than creates), because we do not know if it is a file 391 * opens (rather than creates), because we do not know if it is a file
393 * or directory yet, and current Samba no longer allows us to do posix 392 * or directory yet, and current Samba no longer allows us to do posix
394 * open on dirs, we could end up wasting an open call on what turns out 393 * open on dirs, we could end up wasting an open call on what turns out
@@ -420,22 +419,34 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
420 goto out_free_xid; 419 goto out_free_xid;
421 420
422 tcon = tlink_tcon(tlink); 421 tcon = tlink_tcon(tlink);
422 server = tcon->ses->server;
423
424 if (server->ops->new_lease_key)
425 server->ops->new_lease_key(&fid);
426
427 cifs_add_pending_open(&fid, tlink, &open);
423 428
424 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, 429 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
425 &oplock, &fileHandle, opened); 430 &oplock, &fid, opened);
426 431
427 if (rc) 432 if (rc) {
433 cifs_del_pending_open(&open);
428 goto out; 434 goto out;
435 }
429 436
430 rc = finish_open(file, direntry, generic_file_open, opened); 437 rc = finish_open(file, direntry, generic_file_open, opened);
431 if (rc) { 438 if (rc) {
432 CIFSSMBClose(xid, tcon, fileHandle); 439 if (server->ops->close)
440 server->ops->close(xid, tcon, &fid);
441 cifs_del_pending_open(&open);
433 goto out; 442 goto out;
434 } 443 }
435 444
436 pfile_info = cifs_new_fileinfo(fileHandle, file, tlink, oplock); 445 file_info = cifs_new_fileinfo(&fid, file, tlink, oplock);
437 if (pfile_info == NULL) { 446 if (file_info == NULL) {
438 CIFSSMBClose(xid, tcon, fileHandle); 447 if (server->ops->close)
448 server->ops->close(xid, tcon, &fid);
449 cifs_del_pending_open(&open);
439 rc = -ENOMEM; 450 rc = -ENOMEM;
440 } 451 }
441 452
@@ -460,7 +471,9 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
460 */ 471 */
461 unsigned oflags = O_EXCL | O_CREAT | O_RDWR; 472 unsigned oflags = O_EXCL | O_CREAT | O_RDWR;
462 struct tcon_link *tlink; 473 struct tcon_link *tlink;
463 __u16 fileHandle; 474 struct cifs_tcon *tcon;
475 struct TCP_Server_Info *server;
476 struct cifs_fid fid;
464 __u32 oplock; 477 __u32 oplock;
465 int created = FILE_CREATED; 478 int created = FILE_CREATED;
466 479
@@ -472,10 +485,16 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
472 if (IS_ERR(tlink)) 485 if (IS_ERR(tlink))
473 goto out_free_xid; 486 goto out_free_xid;
474 487
488 tcon = tlink_tcon(tlink);
489 server = tcon->ses->server;
490
491 if (server->ops->new_lease_key)
492 server->ops->new_lease_key(&fid);
493
475 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, 494 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
476 &oplock, &fileHandle, &created); 495 &oplock, &fid, &created);
477 if (!rc) 496 if (!rc && server->ops->close)
478 CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); 497 server->ops->close(xid, tcon, &fid);
479 498
480 cifs_put_tlink(tlink); 499 cifs_put_tlink(tlink);
481out_free_xid: 500out_free_xid:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9154192b0683..edb25b4bbb95 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -169,16 +169,20 @@ posix_open_ret:
169 169
170static int 170static int
171cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, 171cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock, 172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173 __u16 *pnetfid, unsigned int xid) 173 struct cifs_fid *fid, unsigned int xid)
174{ 174{
175 int rc; 175 int rc;
176 int desiredAccess; 176 int desired_access;
177 int disposition; 177 int disposition;
178 int create_options = CREATE_NOT_DIR; 178 int create_options = CREATE_NOT_DIR;
179 FILE_ALL_INFO *buf; 179 FILE_ALL_INFO *buf;
180 struct TCP_Server_Info *server = tcon->ses->server;
181
182 if (!server->ops->open)
183 return -ENOSYS;
180 184
181 desiredAccess = cifs_convert_flags(f_flags); 185 desired_access = cifs_convert_flags(f_flags);
182 186
183/********************************************************************* 187/*********************************************************************
184 * open flag mapping table: 188 * open flag mapping table:
@@ -215,16 +219,9 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
215 if (backup_cred(cifs_sb)) 219 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT; 220 create_options |= CREATE_OPEN_BACKUP_INTENT;
217 221
218 if (tcon->ses->capabilities & CAP_NT_SMBS) 222 rc = server->ops->open(xid, tcon, full_path, disposition,
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, 223 desired_access, create_options, fid, oplock, buf,
220 desiredAccess, create_options, pnetfid, poplock, buf, 224 cifs_sb);
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
223 else
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
228 225
229 if (rc) 226 if (rc)
230 goto out; 227 goto out;
@@ -234,7 +231,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
234 xid); 231 xid);
235 else 232 else
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 233 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
237 xid, pnetfid); 234 xid, &fid->netfid);
238 235
239out: 236out:
240 kfree(buf); 237 kfree(buf);
@@ -242,48 +239,62 @@ out:
242} 239}
243 240
244struct cifsFileInfo * 241struct cifsFileInfo *
245cifs_new_fileinfo(__u16 fileHandle, struct file *file, 242cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
246 struct tcon_link *tlink, __u32 oplock) 243 struct tcon_link *tlink, __u32 oplock)
247{ 244{
248 struct dentry *dentry = file->f_path.dentry; 245 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode; 246 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode); 247 struct cifsInodeInfo *cinode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile; 248 struct cifsFileInfo *cfile;
252 249 struct cifs_fid_locks *fdlocks;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 250 struct cifs_tcon *tcon = tlink_tcon(tlink);
254 if (pCifsFile == NULL) 251
255 return pCifsFile; 252 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
256 253 if (cfile == NULL)
257 pCifsFile->count = 1; 254 return cfile;
258 pCifsFile->netfid = fileHandle; 255
259 pCifsFile->pid = current->tgid; 256 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
260 pCifsFile->uid = current_fsuid(); 257 if (!fdlocks) {
261 pCifsFile->dentry = dget(dentry); 258 kfree(cfile);
262 pCifsFile->f_flags = file->f_flags; 259 return NULL;
263 pCifsFile->invalidHandle = false; 260 }
264 pCifsFile->tlink = cifs_get_tlink(tlink); 261
265 mutex_init(&pCifsFile->fh_mutex); 262 INIT_LIST_HEAD(&fdlocks->locks);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); 263 fdlocks->cfile = cfile;
267 INIT_LIST_HEAD(&pCifsFile->llist); 264 cfile->llist = fdlocks;
265 down_write(&cinode->lock_sem);
266 list_add(&fdlocks->llist, &cinode->llist);
267 up_write(&cinode->lock_sem);
268
269 cfile->count = 1;
270 cfile->pid = current->tgid;
271 cfile->uid = current_fsuid();
272 cfile->dentry = dget(dentry);
273 cfile->f_flags = file->f_flags;
274 cfile->invalidHandle = false;
275 cfile->tlink = cifs_get_tlink(tlink);
276 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277 mutex_init(&cfile->fh_mutex);
268 278
269 spin_lock(&cifs_file_list_lock); 279 spin_lock(&cifs_file_list_lock);
270 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList)); 280 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
281 oplock = fid->pending_open->oplock;
282 list_del(&fid->pending_open->olist);
283
284 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
285
286 list_add(&cfile->tlist, &tcon->openFileList);
271 /* if readable file instance put first in list*/ 287 /* if readable file instance put first in list*/
272 if (file->f_mode & FMODE_READ) 288 if (file->f_mode & FMODE_READ)
273 list_add(&pCifsFile->flist, &pCifsInode->openFileList); 289 list_add(&cfile->flist, &cinode->openFileList);
274 else 290 else
275 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); 291 list_add_tail(&cfile->flist, &cinode->openFileList);
276 spin_unlock(&cifs_file_list_lock); 292 spin_unlock(&cifs_file_list_lock);
277 293
278 cifs_set_oplock_level(pCifsInode, oplock); 294 file->private_data = cfile;
279 pCifsInode->can_cache_brlcks = pCifsInode->clientCanCacheAll; 295 return cfile;
280
281 file->private_data = pCifsFile;
282 return pCifsFile;
283} 296}
284 297
285static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
286
287struct cifsFileInfo * 298struct cifsFileInfo *
288cifsFileInfo_get(struct cifsFileInfo *cifs_file) 299cifsFileInfo_get(struct cifsFileInfo *cifs_file)
289{ 300{
@@ -302,9 +313,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
302{ 313{
303 struct inode *inode = cifs_file->dentry->d_inode; 314 struct inode *inode = cifs_file->dentry->d_inode;
304 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); 315 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
316 struct TCP_Server_Info *server = tcon->ses->server;
305 struct cifsInodeInfo *cifsi = CIFS_I(inode); 317 struct cifsInodeInfo *cifsi = CIFS_I(inode);
306 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 318 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
307 struct cifsLockInfo *li, *tmp; 319 struct cifsLockInfo *li, *tmp;
320 struct cifs_fid fid;
321 struct cifs_pending_open open;
308 322
309 spin_lock(&cifs_file_list_lock); 323 spin_lock(&cifs_file_list_lock);
310 if (--cifs_file->count > 0) { 324 if (--cifs_file->count > 0) {
@@ -312,6 +326,12 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
312 return; 326 return;
313 } 327 }
314 328
329 if (server->ops->get_lease_key)
330 server->ops->get_lease_key(inode, &fid);
331
332 /* store open in pending opens to make sure we don't miss lease break */
333 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
334
315 /* remove it from the lists */ 335 /* remove it from the lists */
316 list_del(&cifs_file->flist); 336 list_del(&cifs_file->flist);
317 list_del(&cifs_file->tlist); 337 list_del(&cifs_file->tlist);
@@ -319,13 +339,13 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
319 if (list_empty(&cifsi->openFileList)) { 339 if (list_empty(&cifsi->openFileList)) {
320 cFYI(1, "closing last open instance for inode %p", 340 cFYI(1, "closing last open instance for inode %p",
321 cifs_file->dentry->d_inode); 341 cifs_file->dentry->d_inode);
322 342 /*
323 /* in strict cache mode we need invalidate mapping on the last 343 * In strict cache mode we need invalidate mapping on the last
324 close because it may cause a error when we open this file 344 * close because it may cause a error when we open this file
325 again and get at least level II oplock */ 345 * again and get at least level II oplock.
346 */
326 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) 347 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
327 CIFS_I(inode)->invalid_mapping = true; 348 CIFS_I(inode)->invalid_mapping = true;
328
329 cifs_set_oplock_level(cifsi, 0); 349 cifs_set_oplock_level(cifsi, 0);
330 } 350 }
331 spin_unlock(&cifs_file_list_lock); 351 spin_unlock(&cifs_file_list_lock);
@@ -333,23 +353,30 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
333 cancel_work_sync(&cifs_file->oplock_break); 353 cancel_work_sync(&cifs_file->oplock_break);
334 354
335 if (!tcon->need_reconnect && !cifs_file->invalidHandle) { 355 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
356 struct TCP_Server_Info *server = tcon->ses->server;
336 unsigned int xid; 357 unsigned int xid;
337 int rc; 358
338 xid = get_xid(); 359 xid = get_xid();
339 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid); 360 if (server->ops->close)
340 free_xid(xid); 361 server->ops->close(xid, tcon, &cifs_file->fid);
362 _free_xid(xid);
341 } 363 }
342 364
343 /* Delete any outstanding lock records. We'll lose them when the file 365 cifs_del_pending_open(&open);
366
367 /*
368 * Delete any outstanding lock records. We'll lose them when the file
344 * is closed anyway. 369 * is closed anyway.
345 */ 370 */
346 mutex_lock(&cifsi->lock_mutex); 371 down_write(&cifsi->lock_sem);
347 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) { 372 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
348 list_del(&li->llist); 373 list_del(&li->llist);
349 cifs_del_lock_waiters(li); 374 cifs_del_lock_waiters(li);
350 kfree(li); 375 kfree(li);
351 } 376 }
352 mutex_unlock(&cifsi->lock_mutex); 377 list_del(&cifs_file->llist->llist);
378 kfree(cifs_file->llist);
379 up_write(&cifsi->lock_sem);
353 380
354 cifs_put_tlink(cifs_file->tlink); 381 cifs_put_tlink(cifs_file->tlink);
355 dput(cifs_file->dentry); 382 dput(cifs_file->dentry);
@@ -357,17 +384,20 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
357} 384}
358 385
359int cifs_open(struct inode *inode, struct file *file) 386int cifs_open(struct inode *inode, struct file *file)
387
360{ 388{
361 int rc = -EACCES; 389 int rc = -EACCES;
362 unsigned int xid; 390 unsigned int xid;
363 __u32 oplock; 391 __u32 oplock;
364 struct cifs_sb_info *cifs_sb; 392 struct cifs_sb_info *cifs_sb;
393 struct TCP_Server_Info *server;
365 struct cifs_tcon *tcon; 394 struct cifs_tcon *tcon;
366 struct tcon_link *tlink; 395 struct tcon_link *tlink;
367 struct cifsFileInfo *pCifsFile = NULL; 396 struct cifsFileInfo *cfile = NULL;
368 char *full_path = NULL; 397 char *full_path = NULL;
369 bool posix_open_ok = false; 398 bool posix_open_ok = false;
370 __u16 netfid; 399 struct cifs_fid fid;
400 struct cifs_pending_open open;
371 401
372 xid = get_xid(); 402 xid = get_xid();
373 403
@@ -378,6 +408,7 @@ int cifs_open(struct inode *inode, struct file *file)
378 return PTR_ERR(tlink); 408 return PTR_ERR(tlink);
379 } 409 }
380 tcon = tlink_tcon(tlink); 410 tcon = tlink_tcon(tlink);
411 server = tcon->ses->server;
381 412
382 full_path = build_path_from_dentry(file->f_path.dentry); 413 full_path = build_path_from_dentry(file->f_path.dentry);
383 if (full_path == NULL) { 414 if (full_path == NULL) {
@@ -388,7 +419,7 @@ int cifs_open(struct inode *inode, struct file *file)
388 cFYI(1, "inode = 0x%p file flags are 0x%x for %s", 419 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
389 inode, file->f_flags, full_path); 420 inode, file->f_flags, full_path);
390 421
391 if (tcon->ses->server->oplocks) 422 if (server->oplocks)
392 oplock = REQ_OPLOCK; 423 oplock = REQ_OPLOCK;
393 else 424 else
394 oplock = 0; 425 oplock = 0;
@@ -399,7 +430,7 @@ int cifs_open(struct inode *inode, struct file *file)
399 /* can not refresh inode info since size could be stale */ 430 /* can not refresh inode info since size could be stale */
400 rc = cifs_posix_open(full_path, &inode, inode->i_sb, 431 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
401 cifs_sb->mnt_file_mode /* ignored */, 432 cifs_sb->mnt_file_mode /* ignored */,
402 file->f_flags, &oplock, &netfid, xid); 433 file->f_flags, &oplock, &fid.netfid, xid);
403 if (rc == 0) { 434 if (rc == 0) {
404 cFYI(1, "posix open succeeded"); 435 cFYI(1, "posix open succeeded");
405 posix_open_ok = true; 436 posix_open_ok = true;
@@ -415,20 +446,34 @@ int cifs_open(struct inode *inode, struct file *file)
415 } else if ((rc != -EIO) && (rc != -EREMOTE) && 446 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
416 (rc != -EOPNOTSUPP)) /* path not found or net err */ 447 (rc != -EOPNOTSUPP)) /* path not found or net err */
417 goto out; 448 goto out;
418 /* else fallthrough to retry open the old way on network i/o 449 /*
419 or DFS errors */ 450 * Else fallthrough to retry open the old way on network i/o
451 * or DFS errors.
452 */
420 } 453 }
421 454
455 if (server->ops->get_lease_key)
456 server->ops->get_lease_key(inode, &fid);
457
458 cifs_add_pending_open(&fid, tlink, &open);
459
422 if (!posix_open_ok) { 460 if (!posix_open_ok) {
461 if (server->ops->get_lease_key)
462 server->ops->get_lease_key(inode, &fid);
463
423 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, 464 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
424 file->f_flags, &oplock, &netfid, xid); 465 file->f_flags, &oplock, &fid, xid);
425 if (rc) 466 if (rc) {
467 cifs_del_pending_open(&open);
426 goto out; 468 goto out;
469 }
427 } 470 }
428 471
429 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); 472 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
430 if (pCifsFile == NULL) { 473 if (cfile == NULL) {
431 CIFSSMBClose(xid, tcon, netfid); 474 if (server->ops->close)
475 server->ops->close(xid, tcon, &fid);
476 cifs_del_pending_open(&open);
432 rc = -ENOMEM; 477 rc = -ENOMEM;
433 goto out; 478 goto out;
434 } 479 }
@@ -436,8 +481,10 @@ int cifs_open(struct inode *inode, struct file *file)
436 cifs_fscache_set_inode_cookie(inode, file); 481 cifs_fscache_set_inode_cookie(inode, file);
437 482
438 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { 483 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
439 /* time to set mode which we can not set earlier due to 484 /*
440 problems creating new read-only files */ 485 * Time to set mode which we can not set earlier due to
486 * problems creating new read-only files.
487 */
441 struct cifs_unix_set_info_args args = { 488 struct cifs_unix_set_info_args args = {
442 .mode = inode->i_mode, 489 .mode = inode->i_mode,
443 .uid = NO_CHANGE_64, 490 .uid = NO_CHANGE_64,
@@ -447,8 +494,8 @@ int cifs_open(struct inode *inode, struct file *file)
447 .mtime = NO_CHANGE_64, 494 .mtime = NO_CHANGE_64,
448 .device = 0, 495 .device = 0,
449 }; 496 };
450 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid, 497 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
451 pCifsFile->pid); 498 cfile->pid);
452 } 499 }
453 500
454out: 501out:
@@ -458,59 +505,66 @@ out:
458 return rc; 505 return rc;
459} 506}
460 507
461/* Try to reacquire byte range locks that were released when session */ 508/*
462/* to server was lost */ 509 * Try to reacquire byte range locks that were released when session
510 * to server was lost
511 */
463static int cifs_relock_file(struct cifsFileInfo *cifsFile) 512static int cifs_relock_file(struct cifsFileInfo *cifsFile)
464{ 513{
465 int rc = 0; 514 int rc = 0;
466 515
467/* BB list all locks open on this file and relock */ 516 /* BB list all locks open on this file and relock */
468 517
469 return rc; 518 return rc;
470} 519}
471 520
472static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) 521static int
522cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
473{ 523{
474 int rc = -EACCES; 524 int rc = -EACCES;
475 unsigned int xid; 525 unsigned int xid;
476 __u32 oplock; 526 __u32 oplock;
477 struct cifs_sb_info *cifs_sb; 527 struct cifs_sb_info *cifs_sb;
478 struct cifs_tcon *tcon; 528 struct cifs_tcon *tcon;
479 struct cifsInodeInfo *pCifsInode; 529 struct TCP_Server_Info *server;
530 struct cifsInodeInfo *cinode;
480 struct inode *inode; 531 struct inode *inode;
481 char *full_path = NULL; 532 char *full_path = NULL;
482 int desiredAccess; 533 int desired_access;
483 int disposition = FILE_OPEN; 534 int disposition = FILE_OPEN;
484 int create_options = CREATE_NOT_DIR; 535 int create_options = CREATE_NOT_DIR;
485 __u16 netfid; 536 struct cifs_fid fid;
486 537
487 xid = get_xid(); 538 xid = get_xid();
488 mutex_lock(&pCifsFile->fh_mutex); 539 mutex_lock(&cfile->fh_mutex);
489 if (!pCifsFile->invalidHandle) { 540 if (!cfile->invalidHandle) {
490 mutex_unlock(&pCifsFile->fh_mutex); 541 mutex_unlock(&cfile->fh_mutex);
491 rc = 0; 542 rc = 0;
492 free_xid(xid); 543 free_xid(xid);
493 return rc; 544 return rc;
494 } 545 }
495 546
496 inode = pCifsFile->dentry->d_inode; 547 inode = cfile->dentry->d_inode;
497 cifs_sb = CIFS_SB(inode->i_sb); 548 cifs_sb = CIFS_SB(inode->i_sb);
498 tcon = tlink_tcon(pCifsFile->tlink); 549 tcon = tlink_tcon(cfile->tlink);
550 server = tcon->ses->server;
499 551
500/* can not grab rename sem here because various ops, including 552 /*
501 those that already have the rename sem can end up causing writepage 553 * Can not grab rename sem here because various ops, including those
502 to get called and if the server was down that means we end up here, 554 * that already have the rename sem can end up causing writepage to get
503 and we can never tell if the caller already has the rename_sem */ 555 * called and if the server was down that means we end up here, and we
504 full_path = build_path_from_dentry(pCifsFile->dentry); 556 * can never tell if the caller already has the rename_sem.
557 */
558 full_path = build_path_from_dentry(cfile->dentry);
505 if (full_path == NULL) { 559 if (full_path == NULL) {
506 rc = -ENOMEM; 560 rc = -ENOMEM;
507 mutex_unlock(&pCifsFile->fh_mutex); 561 mutex_unlock(&cfile->fh_mutex);
508 free_xid(xid); 562 free_xid(xid);
509 return rc; 563 return rc;
510 } 564 }
511 565
512 cFYI(1, "inode = 0x%p file flags 0x%x for %s", 566 cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
513 inode, pCifsFile->f_flags, full_path); 567 full_path);
514 568
515 if (tcon->ses->server->oplocks) 569 if (tcon->ses->server->oplocks)
516 oplock = REQ_OPLOCK; 570 oplock = REQ_OPLOCK;
@@ -524,69 +578,72 @@ static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
524 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the 578 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
525 * original open. Must mask them off for a reopen. 579 * original open. Must mask them off for a reopen.
526 */ 580 */
527 unsigned int oflags = pCifsFile->f_flags & 581 unsigned int oflags = cfile->f_flags &
528 ~(O_CREAT | O_EXCL | O_TRUNC); 582 ~(O_CREAT | O_EXCL | O_TRUNC);
529 583
530 rc = cifs_posix_open(full_path, NULL, inode->i_sb, 584 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
531 cifs_sb->mnt_file_mode /* ignored */, 585 cifs_sb->mnt_file_mode /* ignored */,
532 oflags, &oplock, &netfid, xid); 586 oflags, &oplock, &fid.netfid, xid);
533 if (rc == 0) { 587 if (rc == 0) {
534 cFYI(1, "posix reopen succeeded"); 588 cFYI(1, "posix reopen succeeded");
535 goto reopen_success; 589 goto reopen_success;
536 } 590 }
537 /* fallthrough to retry open the old way on errors, especially 591 /*
538 in the reconnect path it is important to retry hard */ 592 * fallthrough to retry open the old way on errors, especially
593 * in the reconnect path it is important to retry hard
594 */
539 } 595 }
540 596
541 desiredAccess = cifs_convert_flags(pCifsFile->f_flags); 597 desired_access = cifs_convert_flags(cfile->f_flags);
542 598
543 if (backup_cred(cifs_sb)) 599 if (backup_cred(cifs_sb))
544 create_options |= CREATE_OPEN_BACKUP_INTENT; 600 create_options |= CREATE_OPEN_BACKUP_INTENT;
545 601
546 /* Can not refresh inode by passing in file_info buf to be returned 602 if (server->ops->get_lease_key)
547 by SMBOpen and then calling get_inode_info with returned buf 603 server->ops->get_lease_key(inode, &fid);
548 since file might have write behind data that needs to be flushed
549 and server version of file size can be stale. If we knew for sure
550 that inode was not dirty locally we could do this */
551 604
552 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, 605 /*
553 create_options, &netfid, &oplock, NULL, 606 * Can not refresh inode by passing in file_info buf to be returned by
554 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 607 * CIFSSMBOpen and then calling get_inode_info with returned buf since
555 CIFS_MOUNT_MAP_SPECIAL_CHR); 608 * file might have write behind data that needs to be flushed and server
609 * version of file size can be stale. If we knew for sure that inode was
610 * not dirty locally we could do this.
611 */
612 rc = server->ops->open(xid, tcon, full_path, disposition,
613 desired_access, create_options, &fid, &oplock,
614 NULL, cifs_sb);
556 if (rc) { 615 if (rc) {
557 mutex_unlock(&pCifsFile->fh_mutex); 616 mutex_unlock(&cfile->fh_mutex);
558 cFYI(1, "cifs_open returned 0x%x", rc); 617 cFYI(1, "cifs_reopen returned 0x%x", rc);
559 cFYI(1, "oplock: %d", oplock); 618 cFYI(1, "oplock: %d", oplock);
560 goto reopen_error_exit; 619 goto reopen_error_exit;
561 } 620 }
562 621
563reopen_success: 622reopen_success:
564 pCifsFile->netfid = netfid; 623 cfile->invalidHandle = false;
565 pCifsFile->invalidHandle = false; 624 mutex_unlock(&cfile->fh_mutex);
566 mutex_unlock(&pCifsFile->fh_mutex); 625 cinode = CIFS_I(inode);
567 pCifsInode = CIFS_I(inode);
568 626
569 if (can_flush) { 627 if (can_flush) {
570 rc = filemap_write_and_wait(inode->i_mapping); 628 rc = filemap_write_and_wait(inode->i_mapping);
571 mapping_set_error(inode->i_mapping, rc); 629 mapping_set_error(inode->i_mapping, rc);
572 630
573 if (tcon->unix_ext) 631 if (tcon->unix_ext)
574 rc = cifs_get_inode_info_unix(&inode, 632 rc = cifs_get_inode_info_unix(&inode, full_path,
575 full_path, inode->i_sb, xid); 633 inode->i_sb, xid);
576 else 634 else
577 rc = cifs_get_inode_info(&inode, 635 rc = cifs_get_inode_info(&inode, full_path, NULL,
578 full_path, NULL, inode->i_sb, 636 inode->i_sb, xid, NULL);
579 xid, NULL); 637 }
580 } /* else we are writing out data to server already 638 /*
581 and could deadlock if we tried to flush data, and 639 * Else we are writing out data to server already and could deadlock if
582 since we do not know if we have data that would 640 * we tried to flush data, and since we do not know if we have data that
583 invalidate the current end of file on the server 641 * would invalidate the current end of file on the server we can not go
584 we can not go to the server to get the new inod 642 * to the server to get the new inode info.
585 info */ 643 */
586
587 cifs_set_oplock_level(pCifsInode, oplock);
588 644
589 cifs_relock_file(pCifsFile); 645 server->ops->set_fid(cfile, &fid, oplock);
646 cifs_relock_file(cfile);
590 647
591reopen_error_exit: 648reopen_error_exit:
592 kfree(full_path); 649 kfree(full_path);
@@ -609,42 +666,48 @@ int cifs_closedir(struct inode *inode, struct file *file)
609{ 666{
610 int rc = 0; 667 int rc = 0;
611 unsigned int xid; 668 unsigned int xid;
612 struct cifsFileInfo *pCFileStruct = file->private_data; 669 struct cifsFileInfo *cfile = file->private_data;
613 char *ptmp; 670 struct cifs_tcon *tcon;
671 struct TCP_Server_Info *server;
672 char *buf;
614 673
615 cFYI(1, "Closedir inode = 0x%p", inode); 674 cFYI(1, "Closedir inode = 0x%p", inode);
616 675
676 if (cfile == NULL)
677 return rc;
678
617 xid = get_xid(); 679 xid = get_xid();
680 tcon = tlink_tcon(cfile->tlink);
681 server = tcon->ses->server;
618 682
619 if (pCFileStruct) { 683 cFYI(1, "Freeing private data in close dir");
620 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink); 684 spin_lock(&cifs_file_list_lock);
685 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
686 cfile->invalidHandle = true;
687 spin_unlock(&cifs_file_list_lock);
688 if (server->ops->close_dir)
689 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
690 else
691 rc = -ENOSYS;
692 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
693 /* not much we can do if it fails anyway, ignore rc */
694 rc = 0;
695 } else
696 spin_unlock(&cifs_file_list_lock);
621 697
622 cFYI(1, "Freeing private data in close dir"); 698 buf = cfile->srch_inf.ntwrk_buf_start;
623 spin_lock(&cifs_file_list_lock); 699 if (buf) {
624 if (!pCFileStruct->srch_inf.endOfSearch && 700 cFYI(1, "closedir free smb buf in srch struct");
625 !pCFileStruct->invalidHandle) { 701 cfile->srch_inf.ntwrk_buf_start = NULL;
626 pCFileStruct->invalidHandle = true; 702 if (cfile->srch_inf.smallBuf)
627 spin_unlock(&cifs_file_list_lock); 703 cifs_small_buf_release(buf);
628 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); 704 else
629 cFYI(1, "Closing uncompleted readdir with rc %d", 705 cifs_buf_release(buf);
630 rc);
631 /* not much we can do if it fails anyway, ignore rc */
632 rc = 0;
633 } else
634 spin_unlock(&cifs_file_list_lock);
635 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
636 if (ptmp) {
637 cFYI(1, "closedir free smb buf in srch struct");
638 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
639 if (pCFileStruct->srch_inf.smallBuf)
640 cifs_small_buf_release(ptmp);
641 else
642 cifs_buf_release(ptmp);
643 }
644 cifs_put_tlink(pCFileStruct->tlink);
645 kfree(file->private_data);
646 file->private_data = NULL;
647 } 706 }
707
708 cifs_put_tlink(cfile->tlink);
709 kfree(file->private_data);
710 file->private_data = NULL;
648 /* BB can we lock the filestruct while this is going on? */ 711 /* BB can we lock the filestruct while this is going on? */
649 free_xid(xid); 712 free_xid(xid);
650 return rc; 713 return rc;
@@ -666,7 +729,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type)
666 return lock; 729 return lock;
667} 730}
668 731
669static void 732void
670cifs_del_lock_waiters(struct cifsLockInfo *lock) 733cifs_del_lock_waiters(struct cifsLockInfo *lock)
671{ 734{
672 struct cifsLockInfo *li, *tmp; 735 struct cifsLockInfo *li, *tmp;
@@ -677,45 +740,47 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
677} 740}
678 741
679static bool 742static bool
680cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, 743cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
681 __u64 length, __u8 type, struct cifsFileInfo *cur, 744 __u64 length, __u8 type, struct cifsFileInfo *cfile,
682 struct cifsLockInfo **conf_lock) 745 struct cifsLockInfo **conf_lock, bool rw_check)
683{ 746{
684 struct cifsLockInfo *li; 747 struct cifsLockInfo *li;
748 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
685 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 749 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
686 750
687 list_for_each_entry(li, &cfile->llist, llist) { 751 list_for_each_entry(li, &fdlocks->locks, llist) {
688 if (offset + length <= li->offset || 752 if (offset + length <= li->offset ||
689 offset >= li->offset + li->length) 753 offset >= li->offset + li->length)
690 continue; 754 continue;
691 else if ((type & server->vals->shared_lock_type) && 755 if (rw_check && server->ops->compare_fids(cfile, cur_cfile) &&
692 ((server->ops->compare_fids(cur, cfile) && 756 current->tgid == li->pid)
693 current->tgid == li->pid) || type == li->type))
694 continue; 757 continue;
695 else { 758 if ((type & server->vals->shared_lock_type) &&
759 ((server->ops->compare_fids(cfile, cur_cfile) &&
760 current->tgid == li->pid) || type == li->type))
761 continue;
762 if (conf_lock)
696 *conf_lock = li; 763 *conf_lock = li;
697 return true; 764 return true;
698 }
699 } 765 }
700 return false; 766 return false;
701} 767}
702 768
703static bool 769bool
704cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, 770cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
705 __u8 type, struct cifsLockInfo **conf_lock) 771 __u8 type, struct cifsLockInfo **conf_lock,
772 bool rw_check)
706{ 773{
707 bool rc = false; 774 bool rc = false;
708 struct cifsFileInfo *fid, *tmp; 775 struct cifs_fid_locks *cur;
709 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 776 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
710 777
711 spin_lock(&cifs_file_list_lock); 778 list_for_each_entry(cur, &cinode->llist, llist) {
712 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) { 779 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
713 rc = cifs_find_fid_lock_conflict(fid, offset, length, type, 780 cfile, conf_lock, rw_check);
714 cfile, conf_lock);
715 if (rc) 781 if (rc)
716 break; 782 break;
717 } 783 }
718 spin_unlock(&cifs_file_list_lock);
719 784
720 return rc; 785 return rc;
721} 786}
@@ -737,10 +802,10 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
737 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 802 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
738 bool exist; 803 bool exist;
739 804
740 mutex_lock(&cinode->lock_mutex); 805 down_read(&cinode->lock_sem);
741 806
742 exist = cifs_find_lock_conflict(cfile, offset, length, type, 807 exist = cifs_find_lock_conflict(cfile, offset, length, type,
743 &conf_lock); 808 &conf_lock, false);
744 if (exist) { 809 if (exist) {
745 flock->fl_start = conf_lock->offset; 810 flock->fl_start = conf_lock->offset;
746 flock->fl_end = conf_lock->offset + conf_lock->length - 1; 811 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
@@ -754,7 +819,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
754 else 819 else
755 flock->fl_type = F_UNLCK; 820 flock->fl_type = F_UNLCK;
756 821
757 mutex_unlock(&cinode->lock_mutex); 822 up_read(&cinode->lock_sem);
758 return rc; 823 return rc;
759} 824}
760 825
@@ -762,9 +827,9 @@ static void
762cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) 827cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
763{ 828{
764 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); 829 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
765 mutex_lock(&cinode->lock_mutex); 830 down_write(&cinode->lock_sem);
766 list_add_tail(&lock->llist, &cfile->llist); 831 list_add_tail(&lock->llist, &cfile->llist->locks);
767 mutex_unlock(&cinode->lock_mutex); 832 up_write(&cinode->lock_sem);
768} 833}
769 834
770/* 835/*
@@ -784,13 +849,13 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
784 849
785try_again: 850try_again:
786 exist = false; 851 exist = false;
787 mutex_lock(&cinode->lock_mutex); 852 down_write(&cinode->lock_sem);
788 853
789 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, 854 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
790 lock->type, &conf_lock); 855 lock->type, &conf_lock, false);
791 if (!exist && cinode->can_cache_brlcks) { 856 if (!exist && cinode->can_cache_brlcks) {
792 list_add_tail(&lock->llist, &cfile->llist); 857 list_add_tail(&lock->llist, &cfile->llist->locks);
793 mutex_unlock(&cinode->lock_mutex); 858 up_write(&cinode->lock_sem);
794 return rc; 859 return rc;
795 } 860 }
796 861
@@ -800,17 +865,17 @@ try_again:
800 rc = -EACCES; 865 rc = -EACCES;
801 else { 866 else {
802 list_add_tail(&lock->blist, &conf_lock->blist); 867 list_add_tail(&lock->blist, &conf_lock->blist);
803 mutex_unlock(&cinode->lock_mutex); 868 up_write(&cinode->lock_sem);
804 rc = wait_event_interruptible(lock->block_q, 869 rc = wait_event_interruptible(lock->block_q,
805 (lock->blist.prev == &lock->blist) && 870 (lock->blist.prev == &lock->blist) &&
806 (lock->blist.next == &lock->blist)); 871 (lock->blist.next == &lock->blist));
807 if (!rc) 872 if (!rc)
808 goto try_again; 873 goto try_again;
809 mutex_lock(&cinode->lock_mutex); 874 down_write(&cinode->lock_sem);
810 list_del_init(&lock->blist); 875 list_del_init(&lock->blist);
811 } 876 }
812 877
813 mutex_unlock(&cinode->lock_mutex); 878 up_write(&cinode->lock_sem);
814 return rc; 879 return rc;
815} 880}
816 881
@@ -831,7 +896,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
831 if ((flock->fl_flags & FL_POSIX) == 0) 896 if ((flock->fl_flags & FL_POSIX) == 0)
832 return 1; 897 return 1;
833 898
834 mutex_lock(&cinode->lock_mutex); 899 down_read(&cinode->lock_sem);
835 posix_test_lock(file, flock); 900 posix_test_lock(file, flock);
836 901
837 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { 902 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
@@ -839,7 +904,7 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
839 rc = 1; 904 rc = 1;
840 } 905 }
841 906
842 mutex_unlock(&cinode->lock_mutex); 907 up_read(&cinode->lock_sem);
843 return rc; 908 return rc;
844} 909}
845 910
@@ -859,14 +924,14 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
859 return rc; 924 return rc;
860 925
861try_again: 926try_again:
862 mutex_lock(&cinode->lock_mutex); 927 down_write(&cinode->lock_sem);
863 if (!cinode->can_cache_brlcks) { 928 if (!cinode->can_cache_brlcks) {
864 mutex_unlock(&cinode->lock_mutex); 929 up_write(&cinode->lock_sem);
865 return rc; 930 return rc;
866 } 931 }
867 932
868 rc = posix_lock_file(file, flock, NULL); 933 rc = posix_lock_file(file, flock, NULL);
869 mutex_unlock(&cinode->lock_mutex); 934 up_write(&cinode->lock_sem);
870 if (rc == FILE_LOCK_DEFERRED) { 935 if (rc == FILE_LOCK_DEFERRED) {
871 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); 936 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
872 if (!rc) 937 if (!rc)
@@ -876,7 +941,7 @@ try_again:
876 return rc; 941 return rc;
877} 942}
878 943
879static int 944int
880cifs_push_mandatory_locks(struct cifsFileInfo *cfile) 945cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
881{ 946{
882 unsigned int xid; 947 unsigned int xid;
@@ -893,9 +958,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
893 xid = get_xid(); 958 xid = get_xid();
894 tcon = tlink_tcon(cfile->tlink); 959 tcon = tlink_tcon(cfile->tlink);
895 960
896 mutex_lock(&cinode->lock_mutex); 961 /* we are going to update can_cache_brlcks here - need a write access */
962 down_write(&cinode->lock_sem);
897 if (!cinode->can_cache_brlcks) { 963 if (!cinode->can_cache_brlcks) {
898 mutex_unlock(&cinode->lock_mutex); 964 up_write(&cinode->lock_sem);
899 free_xid(xid); 965 free_xid(xid);
900 return rc; 966 return rc;
901 } 967 }
@@ -906,7 +972,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
906 */ 972 */
907 max_buf = tcon->ses->server->maxBuf; 973 max_buf = tcon->ses->server->maxBuf;
908 if (!max_buf) { 974 if (!max_buf) {
909 mutex_unlock(&cinode->lock_mutex); 975 up_write(&cinode->lock_sem);
910 free_xid(xid); 976 free_xid(xid);
911 return -EINVAL; 977 return -EINVAL;
912 } 978 }
@@ -915,15 +981,15 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
915 sizeof(LOCKING_ANDX_RANGE); 981 sizeof(LOCKING_ANDX_RANGE);
916 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); 982 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
917 if (!buf) { 983 if (!buf) {
918 mutex_unlock(&cinode->lock_mutex); 984 up_write(&cinode->lock_sem);
919 free_xid(xid); 985 free_xid(xid);
920 return rc; 986 return -ENOMEM;
921 } 987 }
922 988
923 for (i = 0; i < 2; i++) { 989 for (i = 0; i < 2; i++) {
924 cur = buf; 990 cur = buf;
925 num = 0; 991 num = 0;
926 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { 992 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
927 if (li->type != types[i]) 993 if (li->type != types[i])
928 continue; 994 continue;
929 cur->Pid = cpu_to_le16(li->pid); 995 cur->Pid = cpu_to_le16(li->pid);
@@ -932,7 +998,8 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
932 cur->OffsetLow = cpu_to_le32((u32)li->offset); 998 cur->OffsetLow = cpu_to_le32((u32)li->offset);
933 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); 999 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
934 if (++num == max_num) { 1000 if (++num == max_num) {
935 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1001 stored_rc = cifs_lockv(xid, tcon,
1002 cfile->fid.netfid,
936 (__u8)li->type, 0, num, 1003 (__u8)li->type, 0, num,
937 buf); 1004 buf);
938 if (stored_rc) 1005 if (stored_rc)
@@ -944,7 +1011,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
944 } 1011 }
945 1012
946 if (num) { 1013 if (num) {
947 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1014 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
948 (__u8)types[i], 0, num, buf); 1015 (__u8)types[i], 0, num, buf);
949 if (stored_rc) 1016 if (stored_rc)
950 rc = stored_rc; 1017 rc = stored_rc;
@@ -952,7 +1019,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
952 } 1019 }
953 1020
954 cinode->can_cache_brlcks = false; 1021 cinode->can_cache_brlcks = false;
955 mutex_unlock(&cinode->lock_mutex); 1022 up_write(&cinode->lock_sem);
956 1023
957 kfree(buf); 1024 kfree(buf);
958 free_xid(xid); 1025 free_xid(xid);
@@ -987,9 +1054,10 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
987 1054
988 xid = get_xid(); 1055 xid = get_xid();
989 1056
990 mutex_lock(&cinode->lock_mutex); 1057 /* we are going to update can_cache_brlcks here - need a write access */
1058 down_write(&cinode->lock_sem);
991 if (!cinode->can_cache_brlcks) { 1059 if (!cinode->can_cache_brlcks) {
992 mutex_unlock(&cinode->lock_mutex); 1060 up_write(&cinode->lock_sem);
993 free_xid(xid); 1061 free_xid(xid);
994 return rc; 1062 return rc;
995 } 1063 }
@@ -1005,7 +1073,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1005 1073
1006 /* 1074 /*
1007 * Allocating count locks is enough because no FL_POSIX locks can be 1075 * Allocating count locks is enough because no FL_POSIX locks can be
1008 * added to the list while we are holding cinode->lock_mutex that 1076 * added to the list while we are holding cinode->lock_sem that
1009 * protects locking operations of this inode. 1077 * protects locking operations of this inode.
1010 */ 1078 */
1011 for (; i < count; i++) { 1079 for (; i < count; i++) {
@@ -1038,7 +1106,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1038 type = CIFS_WRLCK; 1106 type = CIFS_WRLCK;
1039 lck = list_entry(el, struct lock_to_push, llist); 1107 lck = list_entry(el, struct lock_to_push, llist);
1040 lck->pid = flock->fl_pid; 1108 lck->pid = flock->fl_pid;
1041 lck->netfid = cfile->netfid; 1109 lck->netfid = cfile->fid.netfid;
1042 lck->length = length; 1110 lck->length = length;
1043 lck->type = type; 1111 lck->type = type;
1044 lck->offset = flock->fl_start; 1112 lck->offset = flock->fl_start;
@@ -1060,7 +1128,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
1060 1128
1061out: 1129out:
1062 cinode->can_cache_brlcks = false; 1130 cinode->can_cache_brlcks = false;
1063 mutex_unlock(&cinode->lock_mutex); 1131 up_write(&cinode->lock_sem);
1064 1132
1065 free_xid(xid); 1133 free_xid(xid);
1066 return rc; 1134 return rc;
@@ -1083,7 +1151,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
1083 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 1151 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1084 return cifs_push_posix_locks(cfile); 1152 return cifs_push_posix_locks(cfile);
1085 1153
1086 return cifs_push_mandatory_locks(cfile); 1154 return tcon->ses->server->ops->push_mand_locks(cfile);
1087} 1155}
1088 1156
1089static void 1157static void
@@ -1104,7 +1172,8 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1104 if (flock->fl_flags & FL_LEASE) 1172 if (flock->fl_flags & FL_LEASE)
1105 cFYI(1, "Lease on file - not implemented yet"); 1173 cFYI(1, "Lease on file - not implemented yet");
1106 if (flock->fl_flags & 1174 if (flock->fl_flags &
1107 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) 1175 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1176 FL_ACCESS | FL_LEASE | FL_CLOSE)))
1108 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags); 1177 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1109 1178
1110 *type = server->vals->large_lock_type; 1179 *type = server->vals->large_lock_type;
@@ -1134,15 +1203,6 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1134} 1203}
1135 1204
1136static int 1205static int
1137cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
1138 __u64 length, __u32 type, int lock, int unlock, bool wait)
1139{
1140 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->netfid,
1141 current->tgid, length, offset, unlock, lock,
1142 (__u8)type, wait, 0);
1143}
1144
1145static int
1146cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, 1206cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1147 bool wait_flag, bool posix_lck, unsigned int xid) 1207 bool wait_flag, bool posix_lck, unsigned int xid)
1148{ 1208{
@@ -1151,7 +1211,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1151 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1211 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1152 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1212 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1153 struct TCP_Server_Info *server = tcon->ses->server; 1213 struct TCP_Server_Info *server = tcon->ses->server;
1154 __u16 netfid = cfile->netfid; 1214 __u16 netfid = cfile->fid.netfid;
1155 1215
1156 if (posix_lck) { 1216 if (posix_lck) {
1157 int posix_lock_type; 1217 int posix_lock_type;
@@ -1175,11 +1235,11 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1175 return rc; 1235 return rc;
1176 1236
1177 /* BB we could chain these into one lock request BB */ 1237 /* BB we could chain these into one lock request BB */
1178 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type, 1238 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1179 1, 0, false); 1239 1, 0, false);
1180 if (rc == 0) { 1240 if (rc == 0) {
1181 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, 1241 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1182 type, 0, 1, false); 1242 type, 0, 1, false);
1183 flock->fl_type = F_UNLCK; 1243 flock->fl_type = F_UNLCK;
1184 if (rc != 0) 1244 if (rc != 0)
1185 cERROR(1, "Error unlocking previously locked " 1245 cERROR(1, "Error unlocking previously locked "
@@ -1192,13 +1252,14 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1192 return 0; 1252 return 0;
1193 } 1253 }
1194 1254
1195 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, 1255 type &= ~server->vals->exclusive_lock_type;
1196 type | server->vals->shared_lock_type, 1, 0, 1256
1197 false); 1257 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1258 type | server->vals->shared_lock_type,
1259 1, 0, false);
1198 if (rc == 0) { 1260 if (rc == 0) {
1199 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, 1261 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1200 type | server->vals->shared_lock_type, 1262 type | server->vals->shared_lock_type, 0, 1, false);
1201 0, 1, false);
1202 flock->fl_type = F_RDLCK; 1263 flock->fl_type = F_RDLCK;
1203 if (rc != 0) 1264 if (rc != 0)
1204 cERROR(1, "Error unlocking previously locked " 1265 cERROR(1, "Error unlocking previously locked "
@@ -1209,7 +1270,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1209 return 0; 1270 return 0;
1210} 1271}
1211 1272
1212static void 1273void
1213cifs_move_llist(struct list_head *source, struct list_head *dest) 1274cifs_move_llist(struct list_head *source, struct list_head *dest)
1214{ 1275{
1215 struct list_head *li, *tmp; 1276 struct list_head *li, *tmp;
@@ -1217,7 +1278,7 @@ cifs_move_llist(struct list_head *source, struct list_head *dest)
1217 list_move(li, dest); 1278 list_move(li, dest);
1218} 1279}
1219 1280
1220static void 1281void
1221cifs_free_llist(struct list_head *llist) 1282cifs_free_llist(struct list_head *llist)
1222{ 1283{
1223 struct cifsLockInfo *li, *tmp; 1284 struct cifsLockInfo *li, *tmp;
@@ -1228,7 +1289,7 @@ cifs_free_llist(struct list_head *llist)
1228 } 1289 }
1229} 1290}
1230 1291
1231static int 1292int
1232cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, 1293cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1233 unsigned int xid) 1294 unsigned int xid)
1234{ 1295{
@@ -1260,11 +1321,11 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1260 if (!buf) 1321 if (!buf)
1261 return -ENOMEM; 1322 return -ENOMEM;
1262 1323
1263 mutex_lock(&cinode->lock_mutex); 1324 down_write(&cinode->lock_sem);
1264 for (i = 0; i < 2; i++) { 1325 for (i = 0; i < 2; i++) {
1265 cur = buf; 1326 cur = buf;
1266 num = 0; 1327 num = 0;
1267 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) { 1328 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1268 if (flock->fl_start > li->offset || 1329 if (flock->fl_start > li->offset ||
1269 (flock->fl_start + length) < 1330 (flock->fl_start + length) <
1270 (li->offset + li->length)) 1331 (li->offset + li->length))
@@ -1295,7 +1356,8 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1295 */ 1356 */
1296 list_move(&li->llist, &tmp_llist); 1357 list_move(&li->llist, &tmp_llist);
1297 if (++num == max_num) { 1358 if (++num == max_num) {
1298 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1359 stored_rc = cifs_lockv(xid, tcon,
1360 cfile->fid.netfid,
1299 li->type, num, 0, buf); 1361 li->type, num, 0, buf);
1300 if (stored_rc) { 1362 if (stored_rc) {
1301 /* 1363 /*
@@ -1304,7 +1366,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1304 * list to the head of the file's list. 1366 * list to the head of the file's list.
1305 */ 1367 */
1306 cifs_move_llist(&tmp_llist, 1368 cifs_move_llist(&tmp_llist,
1307 &cfile->llist); 1369 &cfile->llist->locks);
1308 rc = stored_rc; 1370 rc = stored_rc;
1309 } else 1371 } else
1310 /* 1372 /*
@@ -1318,23 +1380,24 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1318 cur++; 1380 cur++;
1319 } 1381 }
1320 if (num) { 1382 if (num) {
1321 stored_rc = cifs_lockv(xid, tcon, cfile->netfid, 1383 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1322 types[i], num, 0, buf); 1384 types[i], num, 0, buf);
1323 if (stored_rc) { 1385 if (stored_rc) {
1324 cifs_move_llist(&tmp_llist, &cfile->llist); 1386 cifs_move_llist(&tmp_llist,
1387 &cfile->llist->locks);
1325 rc = stored_rc; 1388 rc = stored_rc;
1326 } else 1389 } else
1327 cifs_free_llist(&tmp_llist); 1390 cifs_free_llist(&tmp_llist);
1328 } 1391 }
1329 } 1392 }
1330 1393
1331 mutex_unlock(&cinode->lock_mutex); 1394 up_write(&cinode->lock_sem);
1332 kfree(buf); 1395 kfree(buf);
1333 return rc; 1396 return rc;
1334} 1397}
1335 1398
1336static int 1399static int
1337cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, 1400cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1338 bool wait_flag, bool posix_lck, int lock, int unlock, 1401 bool wait_flag, bool posix_lck, int lock, int unlock,
1339 unsigned int xid) 1402 unsigned int xid)
1340{ 1403{
@@ -1343,7 +1406,6 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1343 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; 1406 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1344 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 1407 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1345 struct TCP_Server_Info *server = tcon->ses->server; 1408 struct TCP_Server_Info *server = tcon->ses->server;
1346 __u16 netfid = cfile->netfid;
1347 1409
1348 if (posix_lck) { 1410 if (posix_lck) {
1349 int posix_lock_type; 1411 int posix_lock_type;
@@ -1360,9 +1422,9 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1360 if (unlock == 1) 1422 if (unlock == 1)
1361 posix_lock_type = CIFS_UNLCK; 1423 posix_lock_type = CIFS_UNLCK;
1362 1424
1363 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid, 1425 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1364 flock->fl_start, length, NULL, 1426 current->tgid, flock->fl_start, length,
1365 posix_lock_type, wait_flag); 1427 NULL, posix_lock_type, wait_flag);
1366 goto out; 1428 goto out;
1367 } 1429 }
1368 1430
@@ -1379,8 +1441,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1379 if (rc <= 0) 1441 if (rc <= 0)
1380 goto out; 1442 goto out;
1381 1443
1382 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, 1444 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1383 type, 1, 0, wait_flag); 1445 type, 1, 0, wait_flag);
1384 if (rc) { 1446 if (rc) {
1385 kfree(lock); 1447 kfree(lock);
1386 goto out; 1448 goto out;
@@ -1388,7 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1388 1450
1389 cifs_lock_add(cfile, lock); 1451 cifs_lock_add(cfile, lock);
1390 } else if (unlock) 1452 } else if (unlock)
1391 rc = cifs_unlock_range(cfile, flock, xid); 1453 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1392 1454
1393out: 1455out:
1394 if (flock->fl_flags & FL_POSIX) 1456 if (flock->fl_flags & FL_POSIX)
@@ -1423,7 +1485,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1423 tcon->ses->server); 1485 tcon->ses->server);
1424 1486
1425 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1487 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1426 netfid = cfile->netfid; 1488 netfid = cfile->fid.netfid;
1427 cinode = CIFS_I(file->f_path.dentry->d_inode); 1489 cinode = CIFS_I(file->f_path.dentry->d_inode);
1428 1490
1429 if (cap_unix(tcon->ses) && 1491 if (cap_unix(tcon->ses) &&
@@ -1469,15 +1531,16 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1469 cifsi->server_eof = end_of_write; 1531 cifsi->server_eof = end_of_write;
1470} 1532}
1471 1533
1472static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid, 1534static ssize_t
1473 const char *write_data, size_t write_size, 1535cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1474 loff_t *poffset) 1536 size_t write_size, loff_t *offset)
1475{ 1537{
1476 int rc = 0; 1538 int rc = 0;
1477 unsigned int bytes_written = 0; 1539 unsigned int bytes_written = 0;
1478 unsigned int total_written; 1540 unsigned int total_written;
1479 struct cifs_sb_info *cifs_sb; 1541 struct cifs_sb_info *cifs_sb;
1480 struct cifs_tcon *pTcon; 1542 struct cifs_tcon *tcon;
1543 struct TCP_Server_Info *server;
1481 unsigned int xid; 1544 unsigned int xid;
1482 struct dentry *dentry = open_file->dentry; 1545 struct dentry *dentry = open_file->dentry;
1483 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); 1546 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
@@ -1486,9 +1549,13 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1486 cifs_sb = CIFS_SB(dentry->d_sb); 1549 cifs_sb = CIFS_SB(dentry->d_sb);
1487 1550
1488 cFYI(1, "write %zd bytes to offset %lld of %s", write_size, 1551 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1489 *poffset, dentry->d_name.name); 1552 *offset, dentry->d_name.name);
1490 1553
1491 pTcon = tlink_tcon(open_file->tlink); 1554 tcon = tlink_tcon(open_file->tlink);
1555 server = tcon->ses->server;
1556
1557 if (!server->ops->sync_write)
1558 return -ENOSYS;
1492 1559
1493 xid = get_xid(); 1560 xid = get_xid();
1494 1561
@@ -1514,13 +1581,12 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1514 /* iov[0] is reserved for smb header */ 1581 /* iov[0] is reserved for smb header */
1515 iov[1].iov_base = (char *)write_data + total_written; 1582 iov[1].iov_base = (char *)write_data + total_written;
1516 iov[1].iov_len = len; 1583 iov[1].iov_len = len;
1517 io_parms.netfid = open_file->netfid;
1518 io_parms.pid = pid; 1584 io_parms.pid = pid;
1519 io_parms.tcon = pTcon; 1585 io_parms.tcon = tcon;
1520 io_parms.offset = *poffset; 1586 io_parms.offset = *offset;
1521 io_parms.length = len; 1587 io_parms.length = len;
1522 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov, 1588 rc = server->ops->sync_write(xid, open_file, &io_parms,
1523 1, 0); 1589 &bytes_written, iov, 1);
1524 } 1590 }
1525 if (rc || (bytes_written == 0)) { 1591 if (rc || (bytes_written == 0)) {
1526 if (total_written) 1592 if (total_written)
@@ -1531,18 +1597,18 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
1531 } 1597 }
1532 } else { 1598 } else {
1533 spin_lock(&dentry->d_inode->i_lock); 1599 spin_lock(&dentry->d_inode->i_lock);
1534 cifs_update_eof(cifsi, *poffset, bytes_written); 1600 cifs_update_eof(cifsi, *offset, bytes_written);
1535 spin_unlock(&dentry->d_inode->i_lock); 1601 spin_unlock(&dentry->d_inode->i_lock);
1536 *poffset += bytes_written; 1602 *offset += bytes_written;
1537 } 1603 }
1538 } 1604 }
1539 1605
1540 cifs_stats_bytes_written(pTcon, total_written); 1606 cifs_stats_bytes_written(tcon, total_written);
1541 1607
1542 if (total_written > 0) { 1608 if (total_written > 0) {
1543 spin_lock(&dentry->d_inode->i_lock); 1609 spin_lock(&dentry->d_inode->i_lock);
1544 if (*poffset > dentry->d_inode->i_size) 1610 if (*offset > dentry->d_inode->i_size)
1545 i_size_write(dentry->d_inode, *poffset); 1611 i_size_write(dentry->d_inode, *offset);
1546 spin_unlock(&dentry->d_inode->i_lock); 1612 spin_unlock(&dentry->d_inode->i_lock);
1547 } 1613 }
1548 mark_inode_dirty_sync(dentry->d_inode); 1614 mark_inode_dirty_sync(dentry->d_inode);
@@ -1718,27 +1784,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1718 return rc; 1784 return rc;
1719} 1785}
1720 1786
1721/*
1722 * Marshal up the iov array, reserving the first one for the header. Also,
1723 * set wdata->bytes.
1724 */
1725static void
1726cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1727{
1728 int i;
1729 struct inode *inode = wdata->cfile->dentry->d_inode;
1730 loff_t size = i_size_read(inode);
1731
1732 /* marshal up the pages into iov array */
1733 wdata->bytes = 0;
1734 for (i = 0; i < wdata->nr_pages; i++) {
1735 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1736 (loff_t)PAGE_CACHE_SIZE);
1737 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1738 wdata->bytes += iov[i + 1].iov_len;
1739 }
1740}
1741
1742static int cifs_writepages(struct address_space *mapping, 1787static int cifs_writepages(struct address_space *mapping,
1743 struct writeback_control *wbc) 1788 struct writeback_control *wbc)
1744{ 1789{
@@ -1746,8 +1791,10 @@ static int cifs_writepages(struct address_space *mapping,
1746 bool done = false, scanned = false, range_whole = false; 1791 bool done = false, scanned = false, range_whole = false;
1747 pgoff_t end, index; 1792 pgoff_t end, index;
1748 struct cifs_writedata *wdata; 1793 struct cifs_writedata *wdata;
1794 struct TCP_Server_Info *server;
1749 struct page *page; 1795 struct page *page;
1750 int rc = 0; 1796 int rc = 0;
1797 loff_t isize = i_size_read(mapping->host);
1751 1798
1752 /* 1799 /*
1753 * If wsize is smaller than the page cache size, default to writing 1800 * If wsize is smaller than the page cache size, default to writing
@@ -1852,7 +1899,7 @@ retry:
1852 */ 1899 */
1853 set_page_writeback(page); 1900 set_page_writeback(page);
1854 1901
1855 if (page_offset(page) >= mapping->host->i_size) { 1902 if (page_offset(page) >= isize) {
1856 done = true; 1903 done = true;
1857 unlock_page(page); 1904 unlock_page(page);
1858 end_page_writeback(page); 1905 end_page_writeback(page);
@@ -1883,7 +1930,12 @@ retry:
1883 wdata->sync_mode = wbc->sync_mode; 1930 wdata->sync_mode = wbc->sync_mode;
1884 wdata->nr_pages = nr_pages; 1931 wdata->nr_pages = nr_pages;
1885 wdata->offset = page_offset(wdata->pages[0]); 1932 wdata->offset = page_offset(wdata->pages[0]);
1886 wdata->marshal_iov = cifs_writepages_marshal_iov; 1933 wdata->pagesz = PAGE_CACHE_SIZE;
1934 wdata->tailsz =
1935 min(isize - page_offset(wdata->pages[nr_pages - 1]),
1936 (loff_t)PAGE_CACHE_SIZE);
1937 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1938 wdata->tailsz;
1887 1939
1888 do { 1940 do {
1889 if (wdata->cfile != NULL) 1941 if (wdata->cfile != NULL)
@@ -1896,7 +1948,8 @@ retry:
1896 break; 1948 break;
1897 } 1949 }
1898 wdata->pid = wdata->cfile->pid; 1950 wdata->pid = wdata->cfile->pid;
1899 rc = cifs_async_writev(wdata); 1951 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1952 rc = server->ops->async_writev(wdata);
1900 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); 1953 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1901 1954
1902 for (i = 0; i < nr_pages; ++i) 1955 for (i = 0; i < nr_pages; ++i)
@@ -2054,6 +2107,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2054 unsigned int xid; 2107 unsigned int xid;
2055 int rc = 0; 2108 int rc = 0;
2056 struct cifs_tcon *tcon; 2109 struct cifs_tcon *tcon;
2110 struct TCP_Server_Info *server;
2057 struct cifsFileInfo *smbfile = file->private_data; 2111 struct cifsFileInfo *smbfile = file->private_data;
2058 struct inode *inode = file->f_path.dentry->d_inode; 2112 struct inode *inode = file->f_path.dentry->d_inode;
2059 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 2113 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -2077,8 +2131,13 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2077 } 2131 }
2078 2132
2079 tcon = tlink_tcon(smbfile->tlink); 2133 tcon = tlink_tcon(smbfile->tlink);
2080 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 2134 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2081 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 2135 server = tcon->ses->server;
2136 if (server->ops->flush)
2137 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2138 else
2139 rc = -ENOSYS;
2140 }
2082 2141
2083 free_xid(xid); 2142 free_xid(xid);
2084 mutex_unlock(&inode->i_mutex); 2143 mutex_unlock(&inode->i_mutex);
@@ -2090,6 +2149,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2090 unsigned int xid; 2149 unsigned int xid;
2091 int rc = 0; 2150 int rc = 0;
2092 struct cifs_tcon *tcon; 2151 struct cifs_tcon *tcon;
2152 struct TCP_Server_Info *server;
2093 struct cifsFileInfo *smbfile = file->private_data; 2153 struct cifsFileInfo *smbfile = file->private_data;
2094 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2154 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2095 struct inode *inode = file->f_mapping->host; 2155 struct inode *inode = file->f_mapping->host;
@@ -2105,8 +2165,13 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2105 file->f_path.dentry->d_name.name, datasync); 2165 file->f_path.dentry->d_name.name, datasync);
2106 2166
2107 tcon = tlink_tcon(smbfile->tlink); 2167 tcon = tlink_tcon(smbfile->tlink);
2108 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 2168 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2109 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 2169 server = tcon->ses->server;
2170 if (server->ops->flush)
2171 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2172 else
2173 rc = -ENOSYS;
2174 }
2110 2175
2111 free_xid(xid); 2176 free_xid(xid);
2112 mutex_unlock(&inode->i_mutex); 2177 mutex_unlock(&inode->i_mutex);
@@ -2172,20 +2237,6 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2172} 2237}
2173 2238
2174static void 2239static void
2175cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2176{
2177 int i;
2178 size_t bytes = wdata->bytes;
2179
2180 /* marshal up the pages into iov array */
2181 for (i = 0; i < wdata->nr_pages; i++) {
2182 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2183 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2184 bytes -= iov[i + 1].iov_len;
2185 }
2186}
2187
2188static void
2189cifs_uncached_writev_complete(struct work_struct *work) 2240cifs_uncached_writev_complete(struct work_struct *work)
2190{ 2241{
2191 int i; 2242 int i;
@@ -2215,6 +2266,9 @@ static int
2215cifs_uncached_retry_writev(struct cifs_writedata *wdata) 2266cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2216{ 2267{
2217 int rc; 2268 int rc;
2269 struct TCP_Server_Info *server;
2270
2271 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2218 2272
2219 do { 2273 do {
2220 if (wdata->cfile->invalidHandle) { 2274 if (wdata->cfile->invalidHandle) {
@@ -2222,7 +2276,7 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2222 if (rc != 0) 2276 if (rc != 0)
2223 continue; 2277 continue;
2224 } 2278 }
2225 rc = cifs_async_writev(wdata); 2279 rc = server->ops->async_writev(wdata);
2226 } while (rc == -EAGAIN); 2280 } while (rc == -EAGAIN);
2227 2281
2228 return rc; 2282 return rc;
@@ -2257,6 +2311,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
2257 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2311 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2258 open_file = file->private_data; 2312 open_file = file->private_data;
2259 tcon = tlink_tcon(open_file->tlink); 2313 tcon = tlink_tcon(open_file->tlink);
2314
2315 if (!tcon->ses->server->ops->async_writev)
2316 return -ENOSYS;
2317
2260 offset = *poffset; 2318 offset = *poffset;
2261 2319
2262 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2320 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
@@ -2298,7 +2356,8 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
2298 wdata->cfile = cifsFileInfo_get(open_file); 2356 wdata->cfile = cifsFileInfo_get(open_file);
2299 wdata->pid = pid; 2357 wdata->pid = pid;
2300 wdata->bytes = cur_len; 2358 wdata->bytes = cur_len;
2301 wdata->marshal_iov = cifs_uncached_marshal_iov; 2359 wdata->pagesz = PAGE_SIZE;
2360 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2302 rc = cifs_uncached_retry_writev(wdata); 2361 rc = cifs_uncached_retry_writev(wdata);
2303 if (rc) { 2362 if (rc) {
2304 kref_put(&wdata->refcount, cifs_writedata_release); 2363 kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2376,40 +2435,110 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2376 return written; 2435 return written;
2377} 2436}
2378 2437
2379ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 2438static ssize_t
2380 unsigned long nr_segs, loff_t pos) 2439cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2440 unsigned long nr_segs, loff_t pos)
2381{ 2441{
2382 struct inode *inode; 2442 struct file *file = iocb->ki_filp;
2443 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2444 struct inode *inode = file->f_mapping->host;
2445 struct cifsInodeInfo *cinode = CIFS_I(inode);
2446 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2447 ssize_t rc = -EACCES;
2383 2448
2384 inode = iocb->ki_filp->f_path.dentry->d_inode; 2449 BUG_ON(iocb->ki_pos != pos);
2385 2450
2386 if (CIFS_I(inode)->clientCanCacheAll) 2451 sb_start_write(inode->i_sb);
2387 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2388 2452
2389 /* 2453 /*
2390 * In strict cache mode we need to write the data to the server exactly 2454 * We need to hold the sem to be sure nobody modifies lock list
2391 * from the pos to pos+len-1 rather than flush all affected pages 2455 * with a brlock that prevents writing.
2392 * because it may cause a error with mandatory locks on these pages but
2393 * not on the region from pos to ppos+len-1.
2394 */ 2456 */
2457 down_read(&cinode->lock_sem);
2458 if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2459 server->vals->exclusive_lock_type, NULL,
2460 true)) {
2461 mutex_lock(&inode->i_mutex);
2462 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2463 &iocb->ki_pos);
2464 mutex_unlock(&inode->i_mutex);
2465 }
2395 2466
2396 return cifs_user_writev(iocb, iov, nr_segs, pos); 2467 if (rc > 0 || rc == -EIOCBQUEUED) {
2468 ssize_t err;
2469
2470 err = generic_write_sync(file, pos, rc);
2471 if (err < 0 && rc > 0)
2472 rc = err;
2473 }
2474
2475 up_read(&cinode->lock_sem);
2476 sb_end_write(inode->i_sb);
2477 return rc;
2478}
2479
2480ssize_t
2481cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2482 unsigned long nr_segs, loff_t pos)
2483{
2484 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2485 struct cifsInodeInfo *cinode = CIFS_I(inode);
2486 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2487 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2488 iocb->ki_filp->private_data;
2489 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2490
2491#ifdef CONFIG_CIFS_SMB2
2492 /*
2493 * If we have an oplock for read and want to write a data to the file
2494 * we need to store it in the page cache and then push it to the server
2495 * to be sure the next read will get a valid data.
2496 */
2497 if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) {
2498 ssize_t written;
2499 int rc;
2500
2501 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
2502 rc = filemap_fdatawrite(inode->i_mapping);
2503 if (rc)
2504 return (ssize_t)rc;
2505
2506 return written;
2507 }
2508#endif
2509
2510 /*
2511 * For non-oplocked files in strict cache mode we need to write the data
2512 * to the server exactly from the pos to pos+len-1 rather than flush all
2513 * affected pages because it may cause a error with mandatory locks on
2514 * these pages but not on the region from pos to ppos+len-1.
2515 */
2516
2517 if (!cinode->clientCanCacheAll)
2518 return cifs_user_writev(iocb, iov, nr_segs, pos);
2519
2520 if (cap_unix(tcon->ses) &&
2521 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2522 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2523 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2524
2525 return cifs_writev(iocb, iov, nr_segs, pos);
2397} 2526}
2398 2527
2399static struct cifs_readdata * 2528static struct cifs_readdata *
2400cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete) 2529cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2401{ 2530{
2402 struct cifs_readdata *rdata; 2531 struct cifs_readdata *rdata;
2403 2532
2404 rdata = kzalloc(sizeof(*rdata) + 2533 rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2405 sizeof(struct kvec) * nr_vecs, GFP_KERNEL); 2534 GFP_KERNEL);
2406 if (rdata != NULL) { 2535 if (rdata != NULL) {
2407 kref_init(&rdata->refcount); 2536 kref_init(&rdata->refcount);
2408 INIT_LIST_HEAD(&rdata->list); 2537 INIT_LIST_HEAD(&rdata->list);
2409 init_completion(&rdata->done); 2538 init_completion(&rdata->done);
2410 INIT_WORK(&rdata->work, complete); 2539 INIT_WORK(&rdata->work, complete);
2411 INIT_LIST_HEAD(&rdata->pages);
2412 } 2540 }
2541
2413 return rdata; 2542 return rdata;
2414} 2543}
2415 2544
@@ -2426,25 +2555,25 @@ cifs_readdata_release(struct kref *refcount)
2426} 2555}
2427 2556
2428static int 2557static int
2429cifs_read_allocate_pages(struct list_head *list, unsigned int npages) 2558cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2430{ 2559{
2431 int rc = 0; 2560 int rc = 0;
2432 struct page *page, *tpage; 2561 struct page *page;
2433 unsigned int i; 2562 unsigned int i;
2434 2563
2435 for (i = 0; i < npages; i++) { 2564 for (i = 0; i < nr_pages; i++) {
2436 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); 2565 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2437 if (!page) { 2566 if (!page) {
2438 rc = -ENOMEM; 2567 rc = -ENOMEM;
2439 break; 2568 break;
2440 } 2569 }
2441 list_add(&page->lru, list); 2570 rdata->pages[i] = page;
2442 } 2571 }
2443 2572
2444 if (rc) { 2573 if (rc) {
2445 list_for_each_entry_safe(page, tpage, list, lru) { 2574 for (i = 0; i < nr_pages; i++) {
2446 list_del(&page->lru); 2575 put_page(rdata->pages[i]);
2447 put_page(page); 2576 rdata->pages[i] = NULL;
2448 } 2577 }
2449 } 2578 }
2450 return rc; 2579 return rc;
@@ -2453,13 +2582,13 @@ cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2453static void 2582static void
2454cifs_uncached_readdata_release(struct kref *refcount) 2583cifs_uncached_readdata_release(struct kref *refcount)
2455{ 2584{
2456 struct page *page, *tpage;
2457 struct cifs_readdata *rdata = container_of(refcount, 2585 struct cifs_readdata *rdata = container_of(refcount,
2458 struct cifs_readdata, refcount); 2586 struct cifs_readdata, refcount);
2587 unsigned int i;
2459 2588
2460 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { 2589 for (i = 0; i < rdata->nr_pages; i++) {
2461 list_del(&page->lru); 2590 put_page(rdata->pages[i]);
2462 put_page(page); 2591 rdata->pages[i] = NULL;
2463 } 2592 }
2464 cifs_readdata_release(refcount); 2593 cifs_readdata_release(refcount);
2465} 2594}
@@ -2468,6 +2597,9 @@ static int
2468cifs_retry_async_readv(struct cifs_readdata *rdata) 2597cifs_retry_async_readv(struct cifs_readdata *rdata)
2469{ 2598{
2470 int rc; 2599 int rc;
2600 struct TCP_Server_Info *server;
2601
2602 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2471 2603
2472 do { 2604 do {
2473 if (rdata->cfile->invalidHandle) { 2605 if (rdata->cfile->invalidHandle) {
@@ -2475,7 +2607,7 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
2475 if (rc != 0) 2607 if (rc != 0)
2476 continue; 2608 continue;
2477 } 2609 }
2478 rc = cifs_async_readv(rdata); 2610 rc = server->ops->async_readv(rdata);
2479 } while (rc == -EAGAIN); 2611 } while (rc == -EAGAIN);
2480 2612
2481 return rc; 2613 return rc;
@@ -2500,17 +2632,18 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2500 int rc = 0; 2632 int rc = 0;
2501 struct iov_iter ii; 2633 struct iov_iter ii;
2502 size_t pos = rdata->offset - offset; 2634 size_t pos = rdata->offset - offset;
2503 struct page *page, *tpage;
2504 ssize_t remaining = rdata->bytes; 2635 ssize_t remaining = rdata->bytes;
2505 unsigned char *pdata; 2636 unsigned char *pdata;
2637 unsigned int i;
2506 2638
2507 /* set up iov_iter and advance to the correct offset */ 2639 /* set up iov_iter and advance to the correct offset */
2508 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); 2640 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2509 iov_iter_advance(&ii, pos); 2641 iov_iter_advance(&ii, pos);
2510 2642
2511 *copied = 0; 2643 *copied = 0;
2512 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { 2644 for (i = 0; i < rdata->nr_pages; i++) {
2513 ssize_t copy; 2645 ssize_t copy;
2646 struct page *page = rdata->pages[i];
2514 2647
2515 /* copy a whole page or whatever's left */ 2648 /* copy a whole page or whatever's left */
2516 copy = min_t(ssize_t, remaining, PAGE_SIZE); 2649 copy = min_t(ssize_t, remaining, PAGE_SIZE);
@@ -2530,9 +2663,6 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2530 iov_iter_advance(&ii, copy); 2663 iov_iter_advance(&ii, copy);
2531 } 2664 }
2532 } 2665 }
2533
2534 list_del(&page->lru);
2535 put_page(page);
2536 } 2666 }
2537 2667
2538 return rc; 2668 return rc;
@@ -2544,59 +2674,56 @@ cifs_uncached_readv_complete(struct work_struct *work)
2544 struct cifs_readdata *rdata = container_of(work, 2674 struct cifs_readdata *rdata = container_of(work,
2545 struct cifs_readdata, work); 2675 struct cifs_readdata, work);
2546 2676
2547 /* if the result is non-zero then the pages weren't kmapped */
2548 if (rdata->result == 0) {
2549 struct page *page;
2550
2551 list_for_each_entry(page, &rdata->pages, lru)
2552 kunmap(page);
2553 }
2554
2555 complete(&rdata->done); 2677 complete(&rdata->done);
2556 kref_put(&rdata->refcount, cifs_uncached_readdata_release); 2678 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2557} 2679}
2558 2680
2559static int 2681static int
2560cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata, 2682cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2561 unsigned int remaining) 2683 struct cifs_readdata *rdata, unsigned int len)
2562{ 2684{
2563 int len = 0; 2685 int total_read = 0, result = 0;
2564 struct page *page, *tpage; 2686 unsigned int i;
2687 unsigned int nr_pages = rdata->nr_pages;
2688 struct kvec iov;
2689
2690 rdata->tailsz = PAGE_SIZE;
2691 for (i = 0; i < nr_pages; i++) {
2692 struct page *page = rdata->pages[i];
2565 2693
2566 rdata->nr_iov = 1; 2694 if (len >= PAGE_SIZE) {
2567 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2568 if (remaining >= PAGE_SIZE) {
2569 /* enough data to fill the page */ 2695 /* enough data to fill the page */
2570 rdata->iov[rdata->nr_iov].iov_base = kmap(page); 2696 iov.iov_base = kmap(page);
2571 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE; 2697 iov.iov_len = PAGE_SIZE;
2572 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", 2698 cFYI(1, "%u: iov_base=%p iov_len=%zu",
2573 rdata->nr_iov, page->index, 2699 i, iov.iov_base, iov.iov_len);
2574 rdata->iov[rdata->nr_iov].iov_base, 2700 len -= PAGE_SIZE;
2575 rdata->iov[rdata->nr_iov].iov_len); 2701 } else if (len > 0) {
2576 ++rdata->nr_iov;
2577 len += PAGE_SIZE;
2578 remaining -= PAGE_SIZE;
2579 } else if (remaining > 0) {
2580 /* enough for partial page, fill and zero the rest */ 2702 /* enough for partial page, fill and zero the rest */
2581 rdata->iov[rdata->nr_iov].iov_base = kmap(page); 2703 iov.iov_base = kmap(page);
2582 rdata->iov[rdata->nr_iov].iov_len = remaining; 2704 iov.iov_len = len;
2583 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", 2705 cFYI(1, "%u: iov_base=%p iov_len=%zu",
2584 rdata->nr_iov, page->index, 2706 i, iov.iov_base, iov.iov_len);
2585 rdata->iov[rdata->nr_iov].iov_base, 2707 memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2586 rdata->iov[rdata->nr_iov].iov_len); 2708 rdata->tailsz = len;
2587 memset(rdata->iov[rdata->nr_iov].iov_base + remaining, 2709 len = 0;
2588 '\0', PAGE_SIZE - remaining);
2589 ++rdata->nr_iov;
2590 len += remaining;
2591 remaining = 0;
2592 } else { 2710 } else {
2593 /* no need to hold page hostage */ 2711 /* no need to hold page hostage */
2594 list_del(&page->lru); 2712 rdata->pages[i] = NULL;
2713 rdata->nr_pages--;
2595 put_page(page); 2714 put_page(page);
2715 continue;
2596 } 2716 }
2717
2718 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2719 kunmap(page);
2720 if (result < 0)
2721 break;
2722
2723 total_read += result;
2597 } 2724 }
2598 2725
2599 return len; 2726 return total_read > 0 ? total_read : result;
2600} 2727}
2601 2728
2602static ssize_t 2729static ssize_t
@@ -2627,6 +2754,9 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
2627 open_file = file->private_data; 2754 open_file = file->private_data;
2628 tcon = tlink_tcon(open_file->tlink); 2755 tcon = tlink_tcon(open_file->tlink);
2629 2756
2757 if (!tcon->ses->server->ops->async_readv)
2758 return -ENOSYS;
2759
2630 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2760 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2631 pid = open_file->pid; 2761 pid = open_file->pid;
2632 else 2762 else
@@ -2647,15 +2777,17 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
2647 goto error; 2777 goto error;
2648 } 2778 }
2649 2779
2650 rc = cifs_read_allocate_pages(&rdata->pages, npages); 2780 rc = cifs_read_allocate_pages(rdata, npages);
2651 if (rc) 2781 if (rc)
2652 goto error; 2782 goto error;
2653 2783
2654 rdata->cfile = cifsFileInfo_get(open_file); 2784 rdata->cfile = cifsFileInfo_get(open_file);
2785 rdata->nr_pages = npages;
2655 rdata->offset = offset; 2786 rdata->offset = offset;
2656 rdata->bytes = cur_len; 2787 rdata->bytes = cur_len;
2657 rdata->pid = pid; 2788 rdata->pid = pid;
2658 rdata->marshal_iov = cifs_uncached_read_marshal_iov; 2789 rdata->pagesz = PAGE_SIZE;
2790 rdata->read_into_pages = cifs_uncached_read_into_pages;
2659 2791
2660 rc = cifs_retry_async_readv(rdata); 2792 rc = cifs_retry_async_readv(rdata);
2661error: 2793error:
@@ -2706,6 +2838,10 @@ restart_loop:
2706 cifs_stats_bytes_read(tcon, total_read); 2838 cifs_stats_bytes_read(tcon, total_read);
2707 *poffset += total_read; 2839 *poffset += total_read;
2708 2840
2841 /* mask nodata case */
2842 if (rc == -ENODATA)
2843 rc = 0;
2844
2709 return total_read ? total_read : rc; 2845 return total_read ? total_read : rc;
2710} 2846}
2711 2847
@@ -2721,15 +2857,17 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2721 return read; 2857 return read;
2722} 2858}
2723 2859
2724ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 2860ssize_t
2725 unsigned long nr_segs, loff_t pos) 2861cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2862 unsigned long nr_segs, loff_t pos)
2726{ 2863{
2727 struct inode *inode; 2864 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2728 2865 struct cifsInodeInfo *cinode = CIFS_I(inode);
2729 inode = iocb->ki_filp->f_path.dentry->d_inode; 2866 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2730 2867 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2731 if (CIFS_I(inode)->clientCanCacheRead) 2868 iocb->ki_filp->private_data;
2732 return generic_file_aio_read(iocb, iov, nr_segs, pos); 2869 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2870 int rc = -EACCES;
2733 2871
2734 /* 2872 /*
2735 * In strict cache mode we need to read from the server all the time 2873 * In strict cache mode we need to read from the server all the time
@@ -2739,12 +2877,29 @@ ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2739 * on pages affected by this read but not on the region from pos to 2877 * on pages affected by this read but not on the region from pos to
2740 * pos+len-1. 2878 * pos+len-1.
2741 */ 2879 */
2880 if (!cinode->clientCanCacheRead)
2881 return cifs_user_readv(iocb, iov, nr_segs, pos);
2882
2883 if (cap_unix(tcon->ses) &&
2884 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2885 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2886 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2742 2887
2743 return cifs_user_readv(iocb, iov, nr_segs, pos); 2888 /*
2889 * We need to hold the sem to be sure nobody modifies lock list
2890 * with a brlock that prevents reading.
2891 */
2892 down_read(&cinode->lock_sem);
2893 if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2894 tcon->ses->server->vals->shared_lock_type,
2895 NULL, true))
2896 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2897 up_read(&cinode->lock_sem);
2898 return rc;
2744} 2899}
2745 2900
2746static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, 2901static ssize_t
2747 loff_t *poffset) 2902cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2748{ 2903{
2749 int rc = -EACCES; 2904 int rc = -EACCES;
2750 unsigned int bytes_read = 0; 2905 unsigned int bytes_read = 0;
@@ -2753,8 +2908,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2753 unsigned int rsize; 2908 unsigned int rsize;
2754 struct cifs_sb_info *cifs_sb; 2909 struct cifs_sb_info *cifs_sb;
2755 struct cifs_tcon *tcon; 2910 struct cifs_tcon *tcon;
2911 struct TCP_Server_Info *server;
2756 unsigned int xid; 2912 unsigned int xid;
2757 char *current_offset; 2913 char *cur_offset;
2758 struct cifsFileInfo *open_file; 2914 struct cifsFileInfo *open_file;
2759 struct cifs_io_parms io_parms; 2915 struct cifs_io_parms io_parms;
2760 int buf_type = CIFS_NO_BUFFER; 2916 int buf_type = CIFS_NO_BUFFER;
@@ -2773,6 +2929,12 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2773 } 2929 }
2774 open_file = file->private_data; 2930 open_file = file->private_data;
2775 tcon = tlink_tcon(open_file->tlink); 2931 tcon = tlink_tcon(open_file->tlink);
2932 server = tcon->ses->server;
2933
2934 if (!server->ops->sync_read) {
2935 free_xid(xid);
2936 return -ENOSYS;
2937 }
2776 2938
2777 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2939 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2778 pid = open_file->pid; 2940 pid = open_file->pid;
@@ -2782,9 +2944,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2782 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 2944 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2783 cFYI(1, "attempting read on write only file instance"); 2945 cFYI(1, "attempting read on write only file instance");
2784 2946
2785 for (total_read = 0, current_offset = read_data; 2947 for (total_read = 0, cur_offset = read_data; read_size > total_read;
2786 read_size > total_read; 2948 total_read += bytes_read, cur_offset += bytes_read) {
2787 total_read += bytes_read, current_offset += bytes_read) {
2788 current_read_size = min_t(uint, read_size - total_read, rsize); 2949 current_read_size = min_t(uint, read_size - total_read, rsize);
2789 /* 2950 /*
2790 * For windows me and 9x we do not want to request more than it 2951 * For windows me and 9x we do not want to request more than it
@@ -2802,13 +2963,13 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2802 if (rc != 0) 2963 if (rc != 0)
2803 break; 2964 break;
2804 } 2965 }
2805 io_parms.netfid = open_file->netfid;
2806 io_parms.pid = pid; 2966 io_parms.pid = pid;
2807 io_parms.tcon = tcon; 2967 io_parms.tcon = tcon;
2808 io_parms.offset = *poffset; 2968 io_parms.offset = *offset;
2809 io_parms.length = current_read_size; 2969 io_parms.length = current_read_size;
2810 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, 2970 rc = server->ops->sync_read(xid, open_file, &io_parms,
2811 &current_offset, &buf_type); 2971 &bytes_read, &cur_offset,
2972 &buf_type);
2812 } 2973 }
2813 if (rc || (bytes_read == 0)) { 2974 if (rc || (bytes_read == 0)) {
2814 if (total_read) { 2975 if (total_read) {
@@ -2819,7 +2980,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
2819 } 2980 }
2820 } else { 2981 } else {
2821 cifs_stats_bytes_read(tcon, total_read); 2982 cifs_stats_bytes_read(tcon, total_read);
2822 *poffset += bytes_read; 2983 *offset += bytes_read;
2823 } 2984 }
2824 } 2985 }
2825 free_xid(xid); 2986 free_xid(xid);
@@ -2842,6 +3003,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2842static struct vm_operations_struct cifs_file_vm_ops = { 3003static struct vm_operations_struct cifs_file_vm_ops = {
2843 .fault = filemap_fault, 3004 .fault = filemap_fault,
2844 .page_mkwrite = cifs_page_mkwrite, 3005 .page_mkwrite = cifs_page_mkwrite,
3006 .remap_pages = generic_file_remap_pages,
2845}; 3007};
2846 3008
2847int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 3009int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
@@ -2885,16 +3047,16 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2885static void 3047static void
2886cifs_readv_complete(struct work_struct *work) 3048cifs_readv_complete(struct work_struct *work)
2887{ 3049{
3050 unsigned int i;
2888 struct cifs_readdata *rdata = container_of(work, 3051 struct cifs_readdata *rdata = container_of(work,
2889 struct cifs_readdata, work); 3052 struct cifs_readdata, work);
2890 struct page *page, *tpage;
2891 3053
2892 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { 3054 for (i = 0; i < rdata->nr_pages; i++) {
2893 list_del(&page->lru); 3055 struct page *page = rdata->pages[i];
3056
2894 lru_cache_add_file(page); 3057 lru_cache_add_file(page);
2895 3058
2896 if (rdata->result == 0) { 3059 if (rdata->result == 0) {
2897 kunmap(page);
2898 flush_dcache_page(page); 3060 flush_dcache_page(page);
2899 SetPageUptodate(page); 3061 SetPageUptodate(page);
2900 } 3062 }
@@ -2905,49 +3067,48 @@ cifs_readv_complete(struct work_struct *work)
2905 cifs_readpage_to_fscache(rdata->mapping->host, page); 3067 cifs_readpage_to_fscache(rdata->mapping->host, page);
2906 3068
2907 page_cache_release(page); 3069 page_cache_release(page);
3070 rdata->pages[i] = NULL;
2908 } 3071 }
2909 kref_put(&rdata->refcount, cifs_readdata_release); 3072 kref_put(&rdata->refcount, cifs_readdata_release);
2910} 3073}
2911 3074
2912static int 3075static int
2913cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining) 3076cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3077 struct cifs_readdata *rdata, unsigned int len)
2914{ 3078{
2915 int len = 0; 3079 int total_read = 0, result = 0;
2916 struct page *page, *tpage; 3080 unsigned int i;
2917 u64 eof; 3081 u64 eof;
2918 pgoff_t eof_index; 3082 pgoff_t eof_index;
3083 unsigned int nr_pages = rdata->nr_pages;
3084 struct kvec iov;
2919 3085
2920 /* determine the eof that the server (probably) has */ 3086 /* determine the eof that the server (probably) has */
2921 eof = CIFS_I(rdata->mapping->host)->server_eof; 3087 eof = CIFS_I(rdata->mapping->host)->server_eof;
2922 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; 3088 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2923 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index); 3089 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2924 3090
2925 rdata->nr_iov = 1; 3091 rdata->tailsz = PAGE_CACHE_SIZE;
2926 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) { 3092 for (i = 0; i < nr_pages; i++) {
2927 if (remaining >= PAGE_CACHE_SIZE) { 3093 struct page *page = rdata->pages[i];
3094
3095 if (len >= PAGE_CACHE_SIZE) {
2928 /* enough data to fill the page */ 3096 /* enough data to fill the page */
2929 rdata->iov[rdata->nr_iov].iov_base = kmap(page); 3097 iov.iov_base = kmap(page);
2930 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE; 3098 iov.iov_len = PAGE_CACHE_SIZE;
2931 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", 3099 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2932 rdata->nr_iov, page->index, 3100 i, page->index, iov.iov_base, iov.iov_len);
2933 rdata->iov[rdata->nr_iov].iov_base, 3101 len -= PAGE_CACHE_SIZE;
2934 rdata->iov[rdata->nr_iov].iov_len); 3102 } else if (len > 0) {
2935 ++rdata->nr_iov;
2936 len += PAGE_CACHE_SIZE;
2937 remaining -= PAGE_CACHE_SIZE;
2938 } else if (remaining > 0) {
2939 /* enough for partial page, fill and zero the rest */ 3103 /* enough for partial page, fill and zero the rest */
2940 rdata->iov[rdata->nr_iov].iov_base = kmap(page); 3104 iov.iov_base = kmap(page);
2941 rdata->iov[rdata->nr_iov].iov_len = remaining; 3105 iov.iov_len = len;
2942 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu", 3106 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2943 rdata->nr_iov, page->index, 3107 i, page->index, iov.iov_base, iov.iov_len);
2944 rdata->iov[rdata->nr_iov].iov_base, 3108 memset(iov.iov_base + len,
2945 rdata->iov[rdata->nr_iov].iov_len); 3109 '\0', PAGE_CACHE_SIZE - len);
2946 memset(rdata->iov[rdata->nr_iov].iov_base + remaining, 3110 rdata->tailsz = len;
2947 '\0', PAGE_CACHE_SIZE - remaining); 3111 len = 0;
2948 ++rdata->nr_iov;
2949 len += remaining;
2950 remaining = 0;
2951 } else if (page->index > eof_index) { 3112 } else if (page->index > eof_index) {
2952 /* 3113 /*
2953 * The VFS will not try to do readahead past the 3114 * The VFS will not try to do readahead past the
@@ -2958,22 +3119,33 @@ cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2958 * fill them until the writes are flushed. 3119 * fill them until the writes are flushed.
2959 */ 3120 */
2960 zero_user(page, 0, PAGE_CACHE_SIZE); 3121 zero_user(page, 0, PAGE_CACHE_SIZE);
2961 list_del(&page->lru);
2962 lru_cache_add_file(page); 3122 lru_cache_add_file(page);
2963 flush_dcache_page(page); 3123 flush_dcache_page(page);
2964 SetPageUptodate(page); 3124 SetPageUptodate(page);
2965 unlock_page(page); 3125 unlock_page(page);
2966 page_cache_release(page); 3126 page_cache_release(page);
3127 rdata->pages[i] = NULL;
3128 rdata->nr_pages--;
3129 continue;
2967 } else { 3130 } else {
2968 /* no need to hold page hostage */ 3131 /* no need to hold page hostage */
2969 list_del(&page->lru);
2970 lru_cache_add_file(page); 3132 lru_cache_add_file(page);
2971 unlock_page(page); 3133 unlock_page(page);
2972 page_cache_release(page); 3134 page_cache_release(page);
3135 rdata->pages[i] = NULL;
3136 rdata->nr_pages--;
3137 continue;
2973 } 3138 }
3139
3140 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3141 kunmap(page);
3142 if (result < 0)
3143 break;
3144
3145 total_read += result;
2974 } 3146 }
2975 3147
2976 return len; 3148 return total_read > 0 ? total_read : result;
2977} 3149}
2978 3150
2979static int cifs_readpages(struct file *file, struct address_space *mapping, 3151static int cifs_readpages(struct file *file, struct address_space *mapping,
@@ -3027,6 +3199,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3027 * the rdata->pages, then we want them in increasing order. 3199 * the rdata->pages, then we want them in increasing order.
3028 */ 3200 */
3029 while (!list_empty(page_list)) { 3201 while (!list_empty(page_list)) {
3202 unsigned int i;
3030 unsigned int bytes = PAGE_CACHE_SIZE; 3203 unsigned int bytes = PAGE_CACHE_SIZE;
3031 unsigned int expected_index; 3204 unsigned int expected_index;
3032 unsigned int nr_pages = 1; 3205 unsigned int nr_pages = 1;
@@ -3096,14 +3269,18 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
3096 rdata->offset = offset; 3269 rdata->offset = offset;
3097 rdata->bytes = bytes; 3270 rdata->bytes = bytes;
3098 rdata->pid = pid; 3271 rdata->pid = pid;
3099 rdata->marshal_iov = cifs_readpages_marshal_iov; 3272 rdata->pagesz = PAGE_CACHE_SIZE;
3100 list_splice_init(&tmplist, &rdata->pages); 3273 rdata->read_into_pages = cifs_readpages_read_into_pages;
3274
3275 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3276 list_del(&page->lru);
3277 rdata->pages[rdata->nr_pages++] = page;
3278 }
3101 3279
3102 rc = cifs_retry_async_readv(rdata); 3280 rc = cifs_retry_async_readv(rdata);
3103 if (rc != 0) { 3281 if (rc != 0) {
3104 list_for_each_entry_safe(page, tpage, &rdata->pages, 3282 for (i = 0; i < rdata->nr_pages; i++) {
3105 lru) { 3283 page = rdata->pages[i];
3106 list_del(&page->lru);
3107 lru_cache_add_file(page); 3284 lru_cache_add_file(page);
3108 unlock_page(page); 3285 unlock_page(page);
3109 page_cache_release(page); 3286 page_cache_release(page);
@@ -3347,6 +3524,7 @@ void cifs_oplock_break(struct work_struct *work)
3347 oplock_break); 3524 oplock_break);
3348 struct inode *inode = cfile->dentry->d_inode; 3525 struct inode *inode = cfile->dentry->d_inode;
3349 struct cifsInodeInfo *cinode = CIFS_I(inode); 3526 struct cifsInodeInfo *cinode = CIFS_I(inode);
3527 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3350 int rc = 0; 3528 int rc = 0;
3351 3529
3352 if (inode && S_ISREG(inode->i_mode)) { 3530 if (inode && S_ISREG(inode->i_mode)) {
@@ -3374,10 +3552,8 @@ void cifs_oplock_break(struct work_struct *work)
3374 * disconnected since oplock already released by the server 3552 * disconnected since oplock already released by the server
3375 */ 3553 */
3376 if (!cfile->oplock_break_cancelled) { 3554 if (!cfile->oplock_break_cancelled) {
3377 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 3555 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3378 current->tgid, 0, 0, 0, 0, 3556 cinode);
3379 LOCKING_ANDX_OPLOCK_RELEASE, false,
3380 cinode->clientCanCacheRead ? 1 : 0);
3381 cFYI(1, "Oplock release rc = %d", rc); 3557 cFYI(1, "Oplock release rc = %d", rc);
3382 } 3558 }
3383} 3559}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7354877fa3bd..afdff79651f1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -124,10 +124,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
124{ 124{
125 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 125 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
126 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 126 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
127 unsigned long oldtime = cifs_i->time;
128 127
129 cifs_revalidate_cache(inode, fattr); 128 cifs_revalidate_cache(inode, fattr);
130 129
130 spin_lock(&inode->i_lock);
131 inode->i_atime = fattr->cf_atime; 131 inode->i_atime = fattr->cf_atime;
132 inode->i_mtime = fattr->cf_mtime; 132 inode->i_mtime = fattr->cf_mtime;
133 inode->i_ctime = fattr->cf_ctime; 133 inode->i_ctime = fattr->cf_ctime;
@@ -148,9 +148,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
148 else 148 else
149 cifs_i->time = jiffies; 149 cifs_i->time = jiffies;
150 150
151 cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode,
152 oldtime, cifs_i->time);
153
154 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; 151 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
155 152
156 cifs_i->server_eof = fattr->cf_eof; 153 cifs_i->server_eof = fattr->cf_eof;
@@ -158,7 +155,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
158 * Can't safely change the file size here if the client is writing to 155 * Can't safely change the file size here if the client is writing to
159 * it due to potential races. 156 * it due to potential races.
160 */ 157 */
161 spin_lock(&inode->i_lock);
162 if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) { 158 if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
163 i_size_write(inode, fattr->cf_eof); 159 i_size_write(inode, fattr->cf_eof);
164 160
@@ -286,7 +282,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
286 fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL; 282 fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
287} 283}
288 284
289int cifs_get_file_info_unix(struct file *filp) 285static int
286cifs_get_file_info_unix(struct file *filp)
290{ 287{
291 int rc; 288 int rc;
292 unsigned int xid; 289 unsigned int xid;
@@ -298,7 +295,7 @@ int cifs_get_file_info_unix(struct file *filp)
298 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 295 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
299 296
300 xid = get_xid(); 297 xid = get_xid();
301 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); 298 rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->fid.netfid, &find_data);
302 if (!rc) { 299 if (!rc) {
303 cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); 300 cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
304 } else if (rc == -EREMOTE) { 301 } else if (rc == -EREMOTE) {
@@ -554,7 +551,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
554 fattr->cf_gid = cifs_sb->mnt_gid; 551 fattr->cf_gid = cifs_sb->mnt_gid;
555} 552}
556 553
557int cifs_get_file_info(struct file *filp) 554static int
555cifs_get_file_info(struct file *filp)
558{ 556{
559 int rc; 557 int rc;
560 unsigned int xid; 558 unsigned int xid;
@@ -564,9 +562,13 @@ int cifs_get_file_info(struct file *filp)
564 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 562 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
565 struct cifsFileInfo *cfile = filp->private_data; 563 struct cifsFileInfo *cfile = filp->private_data;
566 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 564 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
565 struct TCP_Server_Info *server = tcon->ses->server;
566
567 if (!server->ops->query_file_info)
568 return -ENOSYS;
567 569
568 xid = get_xid(); 570 xid = get_xid();
569 rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); 571 rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data);
570 switch (rc) { 572 switch (rc) {
571 case 0: 573 case 0:
572 cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); 574 cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false);
@@ -605,7 +607,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
605 FILE_ALL_INFO *data, struct super_block *sb, int xid, 607 FILE_ALL_INFO *data, struct super_block *sb, int xid,
606 const __u16 *fid) 608 const __u16 *fid)
607{ 609{
608 int rc = 0, tmprc; 610 bool validinum = false;
611 __u16 srchflgs;
612 int rc = 0, tmprc = ENOSYS;
609 struct cifs_tcon *tcon; 613 struct cifs_tcon *tcon;
610 struct TCP_Server_Info *server; 614 struct TCP_Server_Info *server;
611 struct tcon_link *tlink; 615 struct tcon_link *tlink;
@@ -613,6 +617,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
613 char *buf = NULL; 617 char *buf = NULL;
614 bool adjust_tz = false; 618 bool adjust_tz = false;
615 struct cifs_fattr fattr; 619 struct cifs_fattr fattr;
620 struct cifs_search_info *srchinf = NULL;
616 621
617 tlink = cifs_sb_tlink(cifs_sb); 622 tlink = cifs_sb_tlink(cifs_sb);
618 if (IS_ERR(tlink)) 623 if (IS_ERR(tlink))
@@ -651,9 +656,38 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
651 } else if (rc == -EREMOTE) { 656 } else if (rc == -EREMOTE) {
652 cifs_create_dfs_fattr(&fattr, sb); 657 cifs_create_dfs_fattr(&fattr, sb);
653 rc = 0; 658 rc = 0;
654 } else { 659 } else if (rc == -EACCES && backup_cred(cifs_sb)) {
660 srchinf = kzalloc(sizeof(struct cifs_search_info),
661 GFP_KERNEL);
662 if (srchinf == NULL) {
663 rc = -ENOMEM;
664 goto cgii_exit;
665 }
666
667 srchinf->endOfSearch = false;
668 srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
669
670 srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
671 CIFS_SEARCH_CLOSE_AT_END |
672 CIFS_SEARCH_BACKUP_SEARCH;
673
674 rc = CIFSFindFirst(xid, tcon, full_path,
675 cifs_sb, NULL, srchflgs, srchinf, false);
676 if (!rc) {
677 data =
678 (FILE_ALL_INFO *)srchinf->srch_entries_start;
679
680 cifs_dir_info_to_fattr(&fattr,
681 (FILE_DIRECTORY_INFO *)data, cifs_sb);
682 fattr.cf_uniqueid = le64_to_cpu(
683 ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
684 validinum = true;
685
686 cifs_buf_release(srchinf->ntwrk_buf_start);
687 }
688 kfree(srchinf);
689 } else
655 goto cgii_exit; 690 goto cgii_exit;
656 }
657 691
658 /* 692 /*
659 * If an inode wasn't passed in, then get the inode number 693 * If an inode wasn't passed in, then get the inode number
@@ -664,23 +698,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
664 */ 698 */
665 if (*inode == NULL) { 699 if (*inode == NULL) {
666 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 700 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
667 if (server->ops->get_srv_inum) 701 if (validinum == false) {
668 tmprc = server->ops->get_srv_inum(xid, tcon, 702 if (server->ops->get_srv_inum)
669 cifs_sb, full_path, &fattr.cf_uniqueid, 703 tmprc = server->ops->get_srv_inum(xid,
670 data); 704 tcon, cifs_sb, full_path,
671 else 705 &fattr.cf_uniqueid, data);
672 tmprc = -ENOSYS; 706 if (tmprc) {
673 if (tmprc || !fattr.cf_uniqueid) { 707 cFYI(1, "GetSrvInodeNum rc %d", tmprc);
674 cFYI(1, "GetSrvInodeNum rc %d", tmprc); 708 fattr.cf_uniqueid = iunique(sb, ROOT_I);
675 fattr.cf_uniqueid = iunique(sb, ROOT_I); 709 cifs_autodisable_serverino(cifs_sb);
676 cifs_autodisable_serverino(cifs_sb); 710 }
677 } 711 }
678 } else { 712 } else
679 fattr.cf_uniqueid = iunique(sb, ROOT_I); 713 fattr.cf_uniqueid = iunique(sb, ROOT_I);
680 } 714 } else
681 } else {
682 fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; 715 fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
683 }
684 716
685 /* query for SFU type info if supported and needed */ 717 /* query for SFU type info if supported and needed */
686 if (fattr.cf_cifsattrs & ATTR_SYSTEM && 718 if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
@@ -859,12 +891,14 @@ struct inode *cifs_root_iget(struct super_block *sb)
859 891
860 if (rc && tcon->ipc) { 892 if (rc && tcon->ipc) {
861 cFYI(1, "ipc connection - fake read inode"); 893 cFYI(1, "ipc connection - fake read inode");
894 spin_lock(&inode->i_lock);
862 inode->i_mode |= S_IFDIR; 895 inode->i_mode |= S_IFDIR;
863 set_nlink(inode, 2); 896 set_nlink(inode, 2);
864 inode->i_op = &cifs_ipc_inode_ops; 897 inode->i_op = &cifs_ipc_inode_ops;
865 inode->i_fop = &simple_dir_operations; 898 inode->i_fop = &simple_dir_operations;
866 inode->i_uid = cifs_sb->mnt_uid; 899 inode->i_uid = cifs_sb->mnt_uid;
867 inode->i_gid = cifs_sb->mnt_gid; 900 inode->i_gid = cifs_sb->mnt_gid;
901 spin_unlock(&inode->i_lock);
868 } else if (rc) { 902 } else if (rc) {
869 iget_failed(inode); 903 iget_failed(inode);
870 inode = ERR_PTR(rc); 904 inode = ERR_PTR(rc);
@@ -878,25 +912,22 @@ out:
878 return inode; 912 return inode;
879} 913}
880 914
881static int 915int
882cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid, 916cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
883 char *full_path, __u32 dosattr) 917 char *full_path, __u32 dosattr)
884{ 918{
885 int rc;
886 int oplock = 0;
887 __u16 netfid;
888 __u32 netpid;
889 bool set_time = false; 919 bool set_time = false;
890 struct cifsFileInfo *open_file;
891 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
892 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 920 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
893 struct tcon_link *tlink = NULL; 921 struct TCP_Server_Info *server;
894 struct cifs_tcon *pTcon;
895 FILE_BASIC_INFO info_buf; 922 FILE_BASIC_INFO info_buf;
896 923
897 if (attrs == NULL) 924 if (attrs == NULL)
898 return -EINVAL; 925 return -EINVAL;
899 926
927 server = cifs_sb_master_tcon(cifs_sb)->ses->server;
928 if (!server->ops->set_file_info)
929 return -ENOSYS;
930
900 if (attrs->ia_valid & ATTR_ATIME) { 931 if (attrs->ia_valid & ATTR_ATIME) {
901 set_time = true; 932 set_time = true;
902 info_buf.LastAccessTime = 933 info_buf.LastAccessTime =
@@ -927,81 +958,17 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
927 info_buf.CreationTime = 0; /* don't change */ 958 info_buf.CreationTime = 0; /* don't change */
928 info_buf.Attributes = cpu_to_le32(dosattr); 959 info_buf.Attributes = cpu_to_le32(dosattr);
929 960
930 /* 961 return server->ops->set_file_info(inode, full_path, &info_buf, xid);
931 * If the file is already open for write, just use that fileid
932 */
933 open_file = find_writable_file(cifsInode, true);
934 if (open_file) {
935 netfid = open_file->netfid;
936 netpid = open_file->pid;
937 pTcon = tlink_tcon(open_file->tlink);
938 goto set_via_filehandle;
939 }
940
941 tlink = cifs_sb_tlink(cifs_sb);
942 if (IS_ERR(tlink)) {
943 rc = PTR_ERR(tlink);
944 tlink = NULL;
945 goto out;
946 }
947 pTcon = tlink_tcon(tlink);
948
949 /*
950 * NT4 apparently returns success on this call, but it doesn't
951 * really work.
952 */
953 if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
954 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
955 &info_buf, cifs_sb->local_nls,
956 cifs_sb->mnt_cifs_flags &
957 CIFS_MOUNT_MAP_SPECIAL_CHR);
958 if (rc == 0) {
959 cifsInode->cifsAttrs = dosattr;
960 goto out;
961 } else if (rc != -EOPNOTSUPP && rc != -EINVAL)
962 goto out;
963 }
964
965 cFYI(1, "calling SetFileInfo since SetPathInfo for "
966 "times not supported by this server");
967 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
968 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
969 CREATE_NOT_DIR, &netfid, &oplock,
970 NULL, cifs_sb->local_nls,
971 cifs_sb->mnt_cifs_flags &
972 CIFS_MOUNT_MAP_SPECIAL_CHR);
973
974 if (rc != 0) {
975 if (rc == -EIO)
976 rc = -EINVAL;
977 goto out;
978 }
979
980 netpid = current->tgid;
981
982set_via_filehandle:
983 rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
984 if (!rc)
985 cifsInode->cifsAttrs = dosattr;
986
987 if (open_file == NULL)
988 CIFSSMBClose(xid, pTcon, netfid);
989 else
990 cifsFileInfo_put(open_file);
991out:
992 if (tlink != NULL)
993 cifs_put_tlink(tlink);
994 return rc;
995} 962}
996 963
997/* 964/*
998 * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit 965 * Open the given file (if it isn't already), set the DELETE_ON_CLOSE bit
999 * and rename it to a random name that hopefully won't conflict with 966 * and rename it to a random name that hopefully won't conflict with
1000 * anything else. 967 * anything else.
1001 */ 968 */
1002static int 969int
1003cifs_rename_pending_delete(char *full_path, struct dentry *dentry, 970cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
1004 unsigned int xid) 971 const unsigned int xid)
1005{ 972{
1006 int oplock = 0; 973 int oplock = 0;
1007 int rc; 974 int rc;
@@ -1110,6 +1077,15 @@ undo_setattr:
1110 goto out_close; 1077 goto out_close;
1111} 1078}
1112 1079
1080/* copied from fs/nfs/dir.c with small changes */
1081static void
1082cifs_drop_nlink(struct inode *inode)
1083{
1084 spin_lock(&inode->i_lock);
1085 if (inode->i_nlink > 0)
1086 drop_nlink(inode);
1087 spin_unlock(&inode->i_lock);
1088}
1113 1089
1114/* 1090/*
1115 * If dentry->d_inode is null (usually meaning the cached dentry 1091 * If dentry->d_inode is null (usually meaning the cached dentry
@@ -1129,6 +1105,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1129 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 1105 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
1130 struct tcon_link *tlink; 1106 struct tcon_link *tlink;
1131 struct cifs_tcon *tcon; 1107 struct cifs_tcon *tcon;
1108 struct TCP_Server_Info *server;
1132 struct iattr *attrs = NULL; 1109 struct iattr *attrs = NULL;
1133 __u32 dosattr = 0, origattr = 0; 1110 __u32 dosattr = 0, origattr = 0;
1134 1111
@@ -1138,6 +1115,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1138 if (IS_ERR(tlink)) 1115 if (IS_ERR(tlink))
1139 return PTR_ERR(tlink); 1116 return PTR_ERR(tlink);
1140 tcon = tlink_tcon(tlink); 1117 tcon = tlink_tcon(tlink);
1118 server = tcon->ses->server;
1141 1119
1142 xid = get_xid(); 1120 xid = get_xid();
1143 1121
@@ -1160,19 +1138,28 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1160 } 1138 }
1161 1139
1162retry_std_delete: 1140retry_std_delete:
1163 rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, 1141 if (!server->ops->unlink) {
1164 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 1142 rc = -ENOSYS;
1143 goto psx_del_no_retry;
1144 }
1145
1146 rc = server->ops->unlink(xid, tcon, full_path, cifs_sb);
1165 1147
1166psx_del_no_retry: 1148psx_del_no_retry:
1167 if (!rc) { 1149 if (!rc) {
1168 if (inode) 1150 if (inode)
1169 drop_nlink(inode); 1151 cifs_drop_nlink(inode);
1170 } else if (rc == -ENOENT) { 1152 } else if (rc == -ENOENT) {
1171 d_drop(dentry); 1153 d_drop(dentry);
1172 } else if (rc == -ETXTBSY) { 1154 } else if (rc == -ETXTBSY) {
1173 rc = cifs_rename_pending_delete(full_path, dentry, xid); 1155 if (server->ops->rename_pending_delete) {
1174 if (rc == 0) 1156 rc = server->ops->rename_pending_delete(full_path,
1175 drop_nlink(inode); 1157 dentry, xid);
1158 if (rc == 0)
1159 cifs_drop_nlink(inode);
1160 }
1161 if (rc == -ETXTBSY)
1162 rc = -EBUSY;
1176 } else if ((rc == -EACCES) && (dosattr == 0) && inode) { 1163 } else if ((rc == -EACCES) && (dosattr == 0) && inode) {
1177 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); 1164 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
1178 if (attrs == NULL) { 1165 if (attrs == NULL) {
@@ -1220,33 +1207,33 @@ unlink_out:
1220} 1207}
1221 1208
1222static int 1209static int
1223cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode, 1210cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
1224 const char *full_path, struct cifs_sb_info *cifs_sb, 1211 const char *full_path, struct cifs_sb_info *cifs_sb,
1225 struct cifs_tcon *tcon, const unsigned int xid) 1212 struct cifs_tcon *tcon, const unsigned int xid)
1226{ 1213{
1227 int rc = 0; 1214 int rc = 0;
1228 struct inode *newinode = NULL; 1215 struct inode *inode = NULL;
1229 1216
1230 if (tcon->unix_ext) 1217 if (tcon->unix_ext)
1231 rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, 1218 rc = cifs_get_inode_info_unix(&inode, full_path, parent->i_sb,
1232 xid); 1219 xid);
1233 else 1220 else
1234 rc = cifs_get_inode_info(&newinode, full_path, NULL, 1221 rc = cifs_get_inode_info(&inode, full_path, NULL, parent->i_sb,
1235 inode->i_sb, xid, NULL); 1222 xid, NULL);
1223
1236 if (rc) 1224 if (rc)
1237 return rc; 1225 return rc;
1238 1226
1239 d_instantiate(dentry, newinode);
1240 /* 1227 /*
1241 * setting nlink not necessary except in cases where we failed to get it 1228 * setting nlink not necessary except in cases where we failed to get it
1242 * from the server or was set bogus 1229 * from the server or was set bogus. Also, since this is a brand new
1230 * inode, no need to grab the i_lock before setting the i_nlink.
1243 */ 1231 */
1244 if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2)) 1232 if (inode->i_nlink < 2)
1245 set_nlink(dentry->d_inode, 2); 1233 set_nlink(inode, 2);
1246
1247 mode &= ~current_umask(); 1234 mode &= ~current_umask();
1248 /* must turn on setgid bit if parent dir has it */ 1235 /* must turn on setgid bit if parent dir has it */
1249 if (inode->i_mode & S_ISGID) 1236 if (parent->i_mode & S_ISGID)
1250 mode |= S_ISGID; 1237 mode |= S_ISGID;
1251 1238
1252 if (tcon->unix_ext) { 1239 if (tcon->unix_ext) {
@@ -1259,8 +1246,8 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
1259 }; 1246 };
1260 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1247 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
1261 args.uid = (__u64)current_fsuid(); 1248 args.uid = (__u64)current_fsuid();
1262 if (inode->i_mode & S_ISGID) 1249 if (parent->i_mode & S_ISGID)
1263 args.gid = (__u64)inode->i_gid; 1250 args.gid = (__u64)parent->i_gid;
1264 else 1251 else
1265 args.gid = (__u64)current_fsgid(); 1252 args.gid = (__u64)current_fsgid();
1266 } else { 1253 } else {
@@ -1275,22 +1262,20 @@ cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
1275 struct TCP_Server_Info *server = tcon->ses->server; 1262 struct TCP_Server_Info *server = tcon->ses->server;
1276 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && 1263 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
1277 (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo) 1264 (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo)
1278 server->ops->mkdir_setinfo(newinode, full_path, cifs_sb, 1265 server->ops->mkdir_setinfo(inode, full_path, cifs_sb,
1279 tcon, xid); 1266 tcon, xid);
1280 if (dentry->d_inode) { 1267 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
1281 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 1268 inode->i_mode = (mode | S_IFDIR);
1282 dentry->d_inode->i_mode = (mode | S_IFDIR); 1269
1283 1270 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
1284 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1271 inode->i_uid = current_fsuid();
1285 dentry->d_inode->i_uid = current_fsuid(); 1272 if (inode->i_mode & S_ISGID)
1286 if (inode->i_mode & S_ISGID) 1273 inode->i_gid = parent->i_gid;
1287 dentry->d_inode->i_gid = inode->i_gid; 1274 else
1288 else 1275 inode->i_gid = current_fsgid();
1289 dentry->d_inode->i_gid =
1290 current_fsgid();
1291 }
1292 } 1276 }
1293 } 1277 }
1278 d_instantiate(dentry, inode);
1294 return rc; 1279 return rc;
1295} 1280}
1296 1281
@@ -1487,29 +1472,32 @@ rmdir_exit:
1487} 1472}
1488 1473
1489static int 1474static int
1490cifs_do_rename(unsigned int xid, struct dentry *from_dentry, 1475cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
1491 const char *fromPath, struct dentry *to_dentry, 1476 const char *from_path, struct dentry *to_dentry,
1492 const char *toPath) 1477 const char *to_path)
1493{ 1478{
1494 struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); 1479 struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb);
1495 struct tcon_link *tlink; 1480 struct tcon_link *tlink;
1496 struct cifs_tcon *pTcon; 1481 struct cifs_tcon *tcon;
1482 struct TCP_Server_Info *server;
1497 __u16 srcfid; 1483 __u16 srcfid;
1498 int oplock, rc; 1484 int oplock, rc;
1499 1485
1500 tlink = cifs_sb_tlink(cifs_sb); 1486 tlink = cifs_sb_tlink(cifs_sb);
1501 if (IS_ERR(tlink)) 1487 if (IS_ERR(tlink))
1502 return PTR_ERR(tlink); 1488 return PTR_ERR(tlink);
1503 pTcon = tlink_tcon(tlink); 1489 tcon = tlink_tcon(tlink);
1490 server = tcon->ses->server;
1491
1492 if (!server->ops->rename)
1493 return -ENOSYS;
1504 1494
1505 /* try path-based rename first */ 1495 /* try path-based rename first */
1506 rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, 1496 rc = server->ops->rename(xid, tcon, from_path, to_path, cifs_sb);
1507 cifs_sb->mnt_cifs_flags &
1508 CIFS_MOUNT_MAP_SPECIAL_CHR);
1509 1497
1510 /* 1498 /*
1511 * don't bother with rename by filehandle unless file is busy and 1499 * Don't bother with rename by filehandle unless file is busy and
1512 * source Note that cross directory moves do not work with 1500 * source. Note that cross directory moves do not work with
1513 * rename by filehandle to various Windows servers. 1501 * rename by filehandle to various Windows servers.
1514 */ 1502 */
1515 if (rc == 0 || rc != -ETXTBSY) 1503 if (rc == 0 || rc != -ETXTBSY)
@@ -1520,29 +1508,28 @@ cifs_do_rename(unsigned int xid, struct dentry *from_dentry,
1520 goto do_rename_exit; 1508 goto do_rename_exit;
1521 1509
1522 /* open the file to be renamed -- we need DELETE perms */ 1510 /* open the file to be renamed -- we need DELETE perms */
1523 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, 1511 rc = CIFSSMBOpen(xid, tcon, from_path, FILE_OPEN, DELETE,
1524 CREATE_NOT_DIR, &srcfid, &oplock, NULL, 1512 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
1525 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 1513 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
1526 CIFS_MOUNT_MAP_SPECIAL_CHR); 1514 CIFS_MOUNT_MAP_SPECIAL_CHR);
1527
1528 if (rc == 0) { 1515 if (rc == 0) {
1529 rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, 1516 rc = CIFSSMBRenameOpenFile(xid, tcon, srcfid,
1530 (const char *) to_dentry->d_name.name, 1517 (const char *) to_dentry->d_name.name,
1531 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 1518 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
1532 CIFS_MOUNT_MAP_SPECIAL_CHR); 1519 CIFS_MOUNT_MAP_SPECIAL_CHR);
1533 1520 CIFSSMBClose(xid, tcon, srcfid);
1534 CIFSSMBClose(xid, pTcon, srcfid);
1535 } 1521 }
1536do_rename_exit: 1522do_rename_exit:
1537 cifs_put_tlink(tlink); 1523 cifs_put_tlink(tlink);
1538 return rc; 1524 return rc;
1539} 1525}
1540 1526
1541int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, 1527int
1542 struct inode *target_dir, struct dentry *target_dentry) 1528cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1529 struct inode *target_dir, struct dentry *target_dentry)
1543{ 1530{
1544 char *fromName = NULL; 1531 char *from_name = NULL;
1545 char *toName = NULL; 1532 char *to_name = NULL;
1546 struct cifs_sb_info *cifs_sb; 1533 struct cifs_sb_info *cifs_sb;
1547 struct tcon_link *tlink; 1534 struct tcon_link *tlink;
1548 struct cifs_tcon *tcon; 1535 struct cifs_tcon *tcon;
@@ -1563,25 +1550,25 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1563 * we already have the rename sem so we do not need to 1550 * we already have the rename sem so we do not need to
1564 * grab it again here to protect the path integrity 1551 * grab it again here to protect the path integrity
1565 */ 1552 */
1566 fromName = build_path_from_dentry(source_dentry); 1553 from_name = build_path_from_dentry(source_dentry);
1567 if (fromName == NULL) { 1554 if (from_name == NULL) {
1568 rc = -ENOMEM; 1555 rc = -ENOMEM;
1569 goto cifs_rename_exit; 1556 goto cifs_rename_exit;
1570 } 1557 }
1571 1558
1572 toName = build_path_from_dentry(target_dentry); 1559 to_name = build_path_from_dentry(target_dentry);
1573 if (toName == NULL) { 1560 if (to_name == NULL) {
1574 rc = -ENOMEM; 1561 rc = -ENOMEM;
1575 goto cifs_rename_exit; 1562 goto cifs_rename_exit;
1576 } 1563 }
1577 1564
1578 rc = cifs_do_rename(xid, source_dentry, fromName, 1565 rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
1579 target_dentry, toName); 1566 to_name);
1580 1567
1581 if (rc == -EEXIST && tcon->unix_ext) { 1568 if (rc == -EEXIST && tcon->unix_ext) {
1582 /* 1569 /*
1583 * Are src and dst hardlinks of same inode? We can 1570 * Are src and dst hardlinks of same inode? We can only tell
1584 * only tell with unix extensions enabled 1571 * with unix extensions enabled.
1585 */ 1572 */
1586 info_buf_source = 1573 info_buf_source =
1587 kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), 1574 kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO),
@@ -1592,19 +1579,19 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1592 } 1579 }
1593 1580
1594 info_buf_target = info_buf_source + 1; 1581 info_buf_target = info_buf_source + 1;
1595 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, 1582 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, from_name,
1596 info_buf_source, 1583 info_buf_source,
1597 cifs_sb->local_nls, 1584 cifs_sb->local_nls,
1598 cifs_sb->mnt_cifs_flags & 1585 cifs_sb->mnt_cifs_flags &
1599 CIFS_MOUNT_MAP_SPECIAL_CHR); 1586 CIFS_MOUNT_MAP_SPECIAL_CHR);
1600 if (tmprc != 0) 1587 if (tmprc != 0)
1601 goto unlink_target; 1588 goto unlink_target;
1602 1589
1603 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, 1590 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, to_name,
1604 info_buf_target, 1591 info_buf_target,
1605 cifs_sb->local_nls, 1592 cifs_sb->local_nls,
1606 cifs_sb->mnt_cifs_flags & 1593 cifs_sb->mnt_cifs_flags &
1607 CIFS_MOUNT_MAP_SPECIAL_CHR); 1594 CIFS_MOUNT_MAP_SPECIAL_CHR);
1608 1595
1609 if (tmprc == 0 && (info_buf_source->UniqueId == 1596 if (tmprc == 0 && (info_buf_source->UniqueId ==
1610 info_buf_target->UniqueId)) { 1597 info_buf_target->UniqueId)) {
@@ -1612,8 +1599,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1612 rc = 0; 1599 rc = 0;
1613 goto cifs_rename_exit; 1600 goto cifs_rename_exit;
1614 } 1601 }
1615 } /* else ... BB we could add the same check for Windows by 1602 }
1616 checking the UniqueId via FILE_INTERNAL_INFO */ 1603 /*
1604 * else ... BB we could add the same check for Windows by
1605 * checking the UniqueId via FILE_INTERNAL_INFO
1606 */
1617 1607
1618unlink_target: 1608unlink_target:
1619 /* Try unlinking the target dentry if it's not negative */ 1609 /* Try unlinking the target dentry if it's not negative */
@@ -1621,15 +1611,14 @@ unlink_target:
1621 tmprc = cifs_unlink(target_dir, target_dentry); 1611 tmprc = cifs_unlink(target_dir, target_dentry);
1622 if (tmprc) 1612 if (tmprc)
1623 goto cifs_rename_exit; 1613 goto cifs_rename_exit;
1624 1614 rc = cifs_do_rename(xid, source_dentry, from_name,
1625 rc = cifs_do_rename(xid, source_dentry, fromName, 1615 target_dentry, to_name);
1626 target_dentry, toName);
1627 } 1616 }
1628 1617
1629cifs_rename_exit: 1618cifs_rename_exit:
1630 kfree(info_buf_source); 1619 kfree(info_buf_source);
1631 kfree(fromName); 1620 kfree(from_name);
1632 kfree(toName); 1621 kfree(to_name);
1633 free_xid(xid); 1622 free_xid(xid);
1634 cifs_put_tlink(tlink); 1623 cifs_put_tlink(tlink);
1635 return rc; 1624 return rc;
@@ -1854,7 +1843,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1854 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 1843 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1855 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1844 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1856 struct tcon_link *tlink = NULL; 1845 struct tcon_link *tlink = NULL;
1857 struct cifs_tcon *pTcon = NULL; 1846 struct cifs_tcon *tcon = NULL;
1847 struct TCP_Server_Info *server;
1858 struct cifs_io_parms io_parms; 1848 struct cifs_io_parms io_parms;
1859 1849
1860 /* 1850 /*
@@ -1868,19 +1858,21 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1868 */ 1858 */
1869 open_file = find_writable_file(cifsInode, true); 1859 open_file = find_writable_file(cifsInode, true);
1870 if (open_file) { 1860 if (open_file) {
1871 __u16 nfid = open_file->netfid; 1861 tcon = tlink_tcon(open_file->tlink);
1872 __u32 npid = open_file->pid; 1862 server = tcon->ses->server;
1873 pTcon = tlink_tcon(open_file->tlink); 1863 if (server->ops->set_file_size)
1874 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, 1864 rc = server->ops->set_file_size(xid, tcon, open_file,
1875 npid, false); 1865 attrs->ia_size, false);
1866 else
1867 rc = -ENOSYS;
1876 cifsFileInfo_put(open_file); 1868 cifsFileInfo_put(open_file);
1877 cFYI(1, "SetFSize for attrs rc = %d", rc); 1869 cFYI(1, "SetFSize for attrs rc = %d", rc);
1878 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1870 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1879 unsigned int bytes_written; 1871 unsigned int bytes_written;
1880 1872
1881 io_parms.netfid = nfid; 1873 io_parms.netfid = open_file->fid.netfid;
1882 io_parms.pid = npid; 1874 io_parms.pid = open_file->pid;
1883 io_parms.tcon = pTcon; 1875 io_parms.tcon = tcon;
1884 io_parms.offset = 0; 1876 io_parms.offset = 0;
1885 io_parms.length = attrs->ia_size; 1877 io_parms.length = attrs->ia_size;
1886 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, 1878 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
@@ -1890,52 +1882,55 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1890 } else 1882 } else
1891 rc = -EINVAL; 1883 rc = -EINVAL;
1892 1884
1893 if (rc != 0) { 1885 if (!rc)
1894 if (pTcon == NULL) { 1886 goto set_size_out;
1895 tlink = cifs_sb_tlink(cifs_sb);
1896 if (IS_ERR(tlink))
1897 return PTR_ERR(tlink);
1898 pTcon = tlink_tcon(tlink);
1899 }
1900 1887
1901 /* Set file size by pathname rather than by handle 1888 if (tcon == NULL) {
1902 either because no valid, writeable file handle for 1889 tlink = cifs_sb_tlink(cifs_sb);
1903 it was found or because there was an error setting 1890 if (IS_ERR(tlink))
1904 it by handle */ 1891 return PTR_ERR(tlink);
1905 rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, 1892 tcon = tlink_tcon(tlink);
1906 false, cifs_sb->local_nls, 1893 server = tcon->ses->server;
1894 }
1895
1896 /*
1897 * Set file size by pathname rather than by handle either because no
1898 * valid, writeable file handle for it was found or because there was
1899 * an error setting it by handle.
1900 */
1901 if (server->ops->set_path_size)
1902 rc = server->ops->set_path_size(xid, tcon, full_path,
1903 attrs->ia_size, cifs_sb, false);
1904 else
1905 rc = -ENOSYS;
1906 cFYI(1, "SetEOF by path (setattrs) rc = %d", rc);
1907 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1908 __u16 netfid;
1909 int oplock = 0;
1910
1911 rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN,
1912 GENERIC_WRITE, CREATE_NOT_DIR, &netfid,
1913 &oplock, NULL, cifs_sb->local_nls,
1907 cifs_sb->mnt_cifs_flags & 1914 cifs_sb->mnt_cifs_flags &
1908 CIFS_MOUNT_MAP_SPECIAL_CHR); 1915 CIFS_MOUNT_MAP_SPECIAL_CHR);
1909 cFYI(1, "SetEOF by path (setattrs) rc = %d", rc); 1916 if (rc == 0) {
1910 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1917 unsigned int bytes_written;
1911 __u16 netfid; 1918
1912 int oplock = 0; 1919 io_parms.netfid = netfid;
1913 1920 io_parms.pid = current->tgid;
1914 rc = SMBLegacyOpen(xid, pTcon, full_path, 1921 io_parms.tcon = tcon;
1915 FILE_OPEN, GENERIC_WRITE, 1922 io_parms.offset = 0;
1916 CREATE_NOT_DIR, &netfid, &oplock, NULL, 1923 io_parms.length = attrs->ia_size;
1917 cifs_sb->local_nls, 1924 rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL,
1918 cifs_sb->mnt_cifs_flags & 1925 NULL, 1);
1919 CIFS_MOUNT_MAP_SPECIAL_CHR); 1926 cFYI(1, "wrt seteof rc %d", rc);
1920 if (rc == 0) { 1927 CIFSSMBClose(xid, tcon, netfid);
1921 unsigned int bytes_written;
1922
1923 io_parms.netfid = netfid;
1924 io_parms.pid = current->tgid;
1925 io_parms.tcon = pTcon;
1926 io_parms.offset = 0;
1927 io_parms.length = attrs->ia_size;
1928 rc = CIFSSMBWrite(xid, &io_parms,
1929 &bytes_written,
1930 NULL, NULL, 1);
1931 cFYI(1, "wrt seteof rc %d", rc);
1932 CIFSSMBClose(xid, pTcon, netfid);
1933 }
1934 } 1928 }
1935 if (tlink)
1936 cifs_put_tlink(tlink);
1937 } 1929 }
1930 if (tlink)
1931 cifs_put_tlink(tlink);
1938 1932
1933set_size_out:
1939 if (rc == 0) { 1934 if (rc == 0) {
1940 cifsInode->server_eof = attrs->ia_size; 1935 cifsInode->server_eof = attrs->ia_size;
1941 cifs_setsize(inode, attrs->ia_size); 1936 cifs_setsize(inode, attrs->ia_size);
@@ -2042,7 +2037,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
2042 args->device = 0; 2037 args->device = 0;
2043 open_file = find_writable_file(cifsInode, true); 2038 open_file = find_writable_file(cifsInode, true);
2044 if (open_file) { 2039 if (open_file) {
2045 u16 nfid = open_file->netfid; 2040 u16 nfid = open_file->fid.netfid;
2046 u32 npid = open_file->pid; 2041 u32 npid = open_file->pid;
2047 pTcon = tlink_tcon(open_file->tlink); 2042 pTcon = tlink_tcon(open_file->tlink);
2048 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); 2043 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index ae082a66de2f..fd5009d56f9f 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -28,8 +28,6 @@
28#include "cifs_debug.h" 28#include "cifs_debug.h"
29#include "cifsfs.h" 29#include "cifsfs.h"
30 30
31#define CIFS_IOC_CHECKUMOUNT _IO(0xCF, 2)
32
33long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) 31long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
34{ 32{
35 struct inode *inode = filep->f_dentry->d_inode; 33 struct inode *inode = filep->f_dentry->d_inode;
@@ -51,23 +49,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
51 cifs_sb = CIFS_SB(inode->i_sb); 49 cifs_sb = CIFS_SB(inode->i_sb);
52 50
53 switch (command) { 51 switch (command) {
54 static bool warned = false;
55 case CIFS_IOC_CHECKUMOUNT:
56 if (!warned) {
57 warned = true;
58 cERROR(1, "the CIFS_IOC_CHECKMOUNT ioctl will "
59 "be deprecated in 3.7. Please "
60 "migrate away from the use of "
61 "umount.cifs");
62 }
63 cFYI(1, "User unmount attempted");
64 if (cifs_sb->mnt_uid == current_uid())
65 rc = 0;
66 else {
67 rc = -EACCES;
68 cFYI(1, "uids do not match");
69 }
70 break;
71#ifdef CONFIG_CIFS_POSIX 52#ifdef CONFIG_CIFS_POSIX
72 case FS_IOC_GETFLAGS: 53 case FS_IOC_GETFLAGS:
73 if (pSMBFile == NULL) 54 if (pSMBFile == NULL)
@@ -75,8 +56,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
75 tcon = tlink_tcon(pSMBFile->tlink); 56 tcon = tlink_tcon(pSMBFile->tlink);
76 caps = le64_to_cpu(tcon->fsUnixInfo.Capability); 57 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
77 if (CIFS_UNIX_EXTATTR_CAP & caps) { 58 if (CIFS_UNIX_EXTATTR_CAP & caps) {
78 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, 59 rc = CIFSGetExtAttr(xid, tcon,
79 &ExtAttrBits, &ExtAttrMask); 60 pSMBFile->fid.netfid,
61 &ExtAttrBits, &ExtAttrMask);
80 if (rc == 0) 62 if (rc == 0)
81 rc = put_user(ExtAttrBits & 63 rc = put_user(ExtAttrBits &
82 FS_FL_USER_VISIBLE, 64 FS_FL_USER_VISIBLE,
@@ -94,8 +76,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
94 rc = -EFAULT; 76 rc = -EFAULT;
95 break; 77 break;
96 } 78 }
97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, 79 /*
98 extAttrBits, &ExtAttrMask);*/ 80 * rc = CIFSGetExtAttr(xid, tcon,
81 * pSMBFile->fid.netfid,
82 * extAttrBits,
83 * &ExtAttrMask);
84 */
99 } 85 }
100 cFYI(1, "set flags not implemented yet"); 86 cFYI(1, "set flags not implemented yet");
101 break; 87 break;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 09e4b3ae4564..51dc2fb6e854 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -391,70 +391,86 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
391{ 391{
392 int rc = -EACCES; 392 int rc = -EACCES;
393 unsigned int xid; 393 unsigned int xid;
394 char *fromName = NULL; 394 char *from_name = NULL;
395 char *toName = NULL; 395 char *to_name = NULL;
396 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 396 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
397 struct tcon_link *tlink; 397 struct tcon_link *tlink;
398 struct cifs_tcon *pTcon; 398 struct cifs_tcon *tcon;
399 struct TCP_Server_Info *server;
399 struct cifsInodeInfo *cifsInode; 400 struct cifsInodeInfo *cifsInode;
400 401
401 tlink = cifs_sb_tlink(cifs_sb); 402 tlink = cifs_sb_tlink(cifs_sb);
402 if (IS_ERR(tlink)) 403 if (IS_ERR(tlink))
403 return PTR_ERR(tlink); 404 return PTR_ERR(tlink);
404 pTcon = tlink_tcon(tlink); 405 tcon = tlink_tcon(tlink);
405 406
406 xid = get_xid(); 407 xid = get_xid();
407 408
408 fromName = build_path_from_dentry(old_file); 409 from_name = build_path_from_dentry(old_file);
409 toName = build_path_from_dentry(direntry); 410 to_name = build_path_from_dentry(direntry);
410 if ((fromName == NULL) || (toName == NULL)) { 411 if ((from_name == NULL) || (to_name == NULL)) {
411 rc = -ENOMEM; 412 rc = -ENOMEM;
412 goto cifs_hl_exit; 413 goto cifs_hl_exit;
413 } 414 }
414 415
415 if (pTcon->unix_ext) 416 if (tcon->unix_ext)
416 rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, 417 rc = CIFSUnixCreateHardLink(xid, tcon, from_name, to_name,
417 cifs_sb->local_nls, 418 cifs_sb->local_nls,
418 cifs_sb->mnt_cifs_flags & 419 cifs_sb->mnt_cifs_flags &
419 CIFS_MOUNT_MAP_SPECIAL_CHR); 420 CIFS_MOUNT_MAP_SPECIAL_CHR);
420 else { 421 else {
421 rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, 422 server = tcon->ses->server;
422 cifs_sb->local_nls, 423 if (!server->ops->create_hardlink)
423 cifs_sb->mnt_cifs_flags & 424 return -ENOSYS;
424 CIFS_MOUNT_MAP_SPECIAL_CHR); 425 rc = server->ops->create_hardlink(xid, tcon, from_name, to_name,
426 cifs_sb);
425 if ((rc == -EIO) || (rc == -EINVAL)) 427 if ((rc == -EIO) || (rc == -EINVAL))
426 rc = -EOPNOTSUPP; 428 rc = -EOPNOTSUPP;
427 } 429 }
428 430
429 d_drop(direntry); /* force new lookup from server of target */ 431 d_drop(direntry); /* force new lookup from server of target */
430 432
431 /* if source file is cached (oplocked) revalidate will not go to server 433 /*
432 until the file is closed or oplock broken so update nlinks locally */ 434 * if source file is cached (oplocked) revalidate will not go to server
435 * until the file is closed or oplock broken so update nlinks locally
436 */
433 if (old_file->d_inode) { 437 if (old_file->d_inode) {
434 cifsInode = CIFS_I(old_file->d_inode); 438 cifsInode = CIFS_I(old_file->d_inode);
435 if (rc == 0) { 439 if (rc == 0) {
440 spin_lock(&old_file->d_inode->i_lock);
436 inc_nlink(old_file->d_inode); 441 inc_nlink(old_file->d_inode);
437/* BB should we make this contingent on superblock flag NOATIME? */ 442 spin_unlock(&old_file->d_inode->i_lock);
438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ 443 /*
439 /* parent dir timestamps will update from srv 444 * BB should we make this contingent on superblock flag
440 within a second, would it really be worth it 445 * NOATIME?
441 to set the parent dir cifs inode time to zero 446 */
442 to force revalidate (faster) for it too? */ 447 /* old_file->d_inode->i_ctime = CURRENT_TIME; */
448 /*
449 * parent dir timestamps will update from srv within a
450 * second, would it really be worth it to set the parent
451 * dir cifs inode time to zero to force revalidate
452 * (faster) for it too?
453 */
443 } 454 }
444 /* if not oplocked will force revalidate to get info 455 /*
445 on source file from srv */ 456 * if not oplocked will force revalidate to get info on source
457 * file from srv
458 */
446 cifsInode->time = 0; 459 cifsInode->time = 0;
447 460
448 /* Will update parent dir timestamps from srv within a second. 461 /*
449 Would it really be worth it to set the parent dir (cifs 462 * Will update parent dir timestamps from srv within a second.
450 inode) time field to zero to force revalidate on parent 463 * Would it really be worth it to set the parent dir (cifs
451 directory faster ie 464 * inode) time field to zero to force revalidate on parent
452 CIFS_I(inode)->time = 0; */ 465 * directory faster ie
466 *
467 * CIFS_I(inode)->time = 0;
468 */
453 } 469 }
454 470
455cifs_hl_exit: 471cifs_hl_exit:
456 kfree(fromName); 472 kfree(from_name);
457 kfree(toName); 473 kfree(to_name);
458 free_xid(xid); 474 free_xid(xid);
459 cifs_put_tlink(tlink); 475 cifs_put_tlink(tlink);
460 return rc; 476 return rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index ce41fee07e5b..3a00c0d0cead 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -466,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
466 list_for_each(tmp2, &tcon->openFileList) { 466 list_for_each(tmp2, &tcon->openFileList) {
467 netfile = list_entry(tmp2, struct cifsFileInfo, 467 netfile = list_entry(tmp2, struct cifsFileInfo,
468 tlist); 468 tlist);
469 if (pSMB->Fid != netfile->netfid) 469 if (pSMB->Fid != netfile->fid.netfid)
470 continue; 470 continue;
471 471
472 cFYI(1, "file id match, oplock break"); 472 cFYI(1, "file id match, oplock break");
@@ -579,3 +579,33 @@ backup_cred(struct cifs_sb_info *cifs_sb)
579 579
580 return false; 580 return false;
581} 581}
582
583void
584cifs_del_pending_open(struct cifs_pending_open *open)
585{
586 spin_lock(&cifs_file_list_lock);
587 list_del(&open->olist);
588 spin_unlock(&cifs_file_list_lock);
589}
590
591void
592cifs_add_pending_open_locked(struct cifs_fid *fid, struct tcon_link *tlink,
593 struct cifs_pending_open *open)
594{
595#ifdef CONFIG_CIFS_SMB2
596 memcpy(open->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE);
597#endif
598 open->oplock = CIFS_OPLOCK_NO_CHANGE;
599 open->tlink = tlink;
600 fid->pending_open = open;
601 list_add_tail(&open->olist, &tlink_tcon(tlink)->pending_opens);
602}
603
604void
605cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
606 struct cifs_pending_open *open)
607{
608 spin_lock(&cifs_file_list_lock);
609 cifs_add_pending_open_locked(fid, tlink, open);
610 spin_unlock(&cifs_file_list_lock);
611}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 581c225f7f50..d5ce9e26696c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -110,7 +110,7 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
110 {ERRnoroom, -ENOSPC}, 110 {ERRnoroom, -ENOSPC},
111 {ERRrmuns, -EUSERS}, 111 {ERRrmuns, -EUSERS},
112 {ERRtimeout, -ETIME}, 112 {ERRtimeout, -ETIME},
113 {ERRnoresource, -ENOBUFS}, 113 {ERRnoresource, -EREMOTEIO},
114 {ERRtoomanyuids, -EUSERS}, 114 {ERRtoomanyuids, -EUSERS},
115 {ERRbaduid, -EACCES}, 115 {ERRbaduid, -EACCES},
116 {ERRusempx, -EIO}, 116 {ERRusempx, -EIO},
@@ -412,7 +412,7 @@ static const struct {
412 from NT_STATUS_INSUFFICIENT_RESOURCES to 412 from NT_STATUS_INSUFFICIENT_RESOURCES to
413 NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */ 413 NT_STATUS_INSUFF_SERVER_RESOURCES during the session setup } */
414 { 414 {
415 ERRDOS, ERRnomem, NT_STATUS_INSUFFICIENT_RESOURCES}, { 415 ERRDOS, ERRnoresource, NT_STATUS_INSUFFICIENT_RESOURCES}, {
416 ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, { 416 ERRDOS, ERRbadpath, NT_STATUS_DFS_EXIT_PATH_FOUND}, {
417 ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, { 417 ERRDOS, 23, NT_STATUS_DEVICE_DATA_ERROR}, {
418 ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, { 418 ERRHRD, ERRgeneral, NT_STATUS_DEVICE_NOT_CONNECTED}, {
@@ -682,7 +682,7 @@ static const struct {
682 ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, { 682 ERRHRD, ERRgeneral, NT_STATUS_NO_USER_SESSION_KEY}, {
683 ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, { 683 ERRDOS, 59, NT_STATUS_USER_SESSION_DELETED}, {
684 ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, { 684 ERRHRD, ERRgeneral, NT_STATUS_RESOURCE_LANG_NOT_FOUND}, {
685 ERRDOS, ERRnomem, NT_STATUS_INSUFF_SERVER_RESOURCES}, { 685 ERRDOS, ERRnoresource, NT_STATUS_INSUFF_SERVER_RESOURCES}, {
686 ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, { 686 ERRHRD, ERRgeneral, NT_STATUS_INVALID_BUFFER_SIZE}, {
687 ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, { 687 ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_COMPONENT}, {
688 ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, { 688 ERRHRD, ERRgeneral, NT_STATUS_INVALID_ADDRESS_WILDCARD}, {
@@ -913,8 +913,9 @@ map_smb_to_linux_error(char *buf, bool logErr)
913 * portion, the number of word parameters and the data portion of the message 913 * portion, the number of word parameters and the data portion of the message
914 */ 914 */
915unsigned int 915unsigned int
916smbCalcSize(struct smb_hdr *ptr) 916smbCalcSize(void *buf)
917{ 917{
918 struct smb_hdr *ptr = (struct smb_hdr *)buf;
918 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) + 919 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
919 2 /* size of the bcc field */ + get_bcc(ptr)); 920 2 /* size of the bcc field */ + get_bcc(ptr));
920} 921}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index d87f82678bc7..f9b5d3d6cf33 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -151,7 +151,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
151 } 151 }
152} 152}
153 153
154static void 154void
155cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, 155cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
156 struct cifs_sb_info *cifs_sb) 156 struct cifs_sb_info *cifs_sb)
157{ 157{
@@ -220,7 +220,8 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
220} 220}
221 */ 221 */
222 222
223static int initiate_cifs_search(const unsigned int xid, struct file *file) 223static int
224initiate_cifs_search(const unsigned int xid, struct file *file)
224{ 225{
225 __u16 search_flags; 226 __u16 search_flags;
226 int rc = 0; 227 int rc = 0;
@@ -229,6 +230,7 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file)
229 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 230 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
230 struct tcon_link *tlink = NULL; 231 struct tcon_link *tlink = NULL;
231 struct cifs_tcon *tcon; 232 struct cifs_tcon *tcon;
233 struct TCP_Server_Info *server;
232 234
233 if (file->private_data == NULL) { 235 if (file->private_data == NULL) {
234 tlink = cifs_sb_tlink(cifs_sb); 236 tlink = cifs_sb_tlink(cifs_sb);
@@ -248,6 +250,13 @@ static int initiate_cifs_search(const unsigned int xid, struct file *file)
248 tcon = tlink_tcon(cifsFile->tlink); 250 tcon = tlink_tcon(cifsFile->tlink);
249 } 251 }
250 252
253 server = tcon->ses->server;
254
255 if (!server->ops->query_dir_first) {
256 rc = -ENOSYS;
257 goto error_exit;
258 }
259
251 cifsFile->invalidHandle = true; 260 cifsFile->invalidHandle = true;
252 cifsFile->srch_inf.endOfSearch = false; 261 cifsFile->srch_inf.endOfSearch = false;
253 262
@@ -278,10 +287,10 @@ ffirst_retry:
278 if (backup_cred(cifs_sb)) 287 if (backup_cred(cifs_sb))
279 search_flags |= CIFS_SEARCH_BACKUP_SEARCH; 288 search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
280 289
281 rc = CIFSFindFirst(xid, tcon, full_path, cifs_sb->local_nls, 290 rc = server->ops->query_dir_first(xid, tcon, full_path, cifs_sb,
282 &cifsFile->netfid, search_flags, &cifsFile->srch_inf, 291 &cifsFile->fid, search_flags,
283 cifs_sb->mnt_cifs_flags & 292 &cifsFile->srch_inf);
284 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); 293
285 if (rc == 0) 294 if (rc == 0)
286 cifsFile->invalidHandle = false; 295 cifsFile->invalidHandle = false;
287 /* BB add following call to handle readdir on new NTFS symlink errors 296 /* BB add following call to handle readdir on new NTFS symlink errors
@@ -501,62 +510,67 @@ static int cifs_save_resume_key(const char *current_entry,
501 return rc; 510 return rc;
502} 511}
503 512
504/* find the corresponding entry in the search */ 513/*
505/* Note that the SMB server returns search entries for . and .. which 514 * Find the corresponding entry in the search. Note that the SMB server returns
506 complicates logic here if we choose to parse for them and we do not 515 * search entries for . and .. which complicates logic here if we choose to
507 assume that they are located in the findfirst return buffer.*/ 516 * parse for them and we do not assume that they are located in the findfirst
508/* We start counting in the buffer with entry 2 and increment for every 517 * return buffer. We start counting in the buffer with entry 2 and increment for
509 entry (do not increment for . or .. entry) */ 518 * every entry (do not increment for . or .. entry).
510static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon, 519 */
511 struct file *file, char **ppCurrentEntry, int *num_to_ret) 520static int
521find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon,
522 struct file *file, char **current_entry, int *num_to_ret)
512{ 523{
513 __u16 search_flags; 524 __u16 search_flags;
514 int rc = 0; 525 int rc = 0;
515 int pos_in_buf = 0; 526 int pos_in_buf = 0;
516 loff_t first_entry_in_buffer; 527 loff_t first_entry_in_buffer;
517 loff_t index_to_find = file->f_pos; 528 loff_t index_to_find = file->f_pos;
518 struct cifsFileInfo *cifsFile = file->private_data; 529 struct cifsFileInfo *cfile = file->private_data;
519 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 530 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
531 struct TCP_Server_Info *server = tcon->ses->server;
520 /* check if index in the buffer */ 532 /* check if index in the buffer */
521 533
522 if ((cifsFile == NULL) || (ppCurrentEntry == NULL) || 534 if (!server->ops->query_dir_first || !server->ops->query_dir_next)
523 (num_to_ret == NULL)) 535 return -ENOSYS;
536
537 if ((cfile == NULL) || (current_entry == NULL) || (num_to_ret == NULL))
524 return -ENOENT; 538 return -ENOENT;
525 539
526 *ppCurrentEntry = NULL; 540 *current_entry = NULL;
527 first_entry_in_buffer = 541 first_entry_in_buffer = cfile->srch_inf.index_of_last_entry -
528 cifsFile->srch_inf.index_of_last_entry - 542 cfile->srch_inf.entries_in_buffer;
529 cifsFile->srch_inf.entries_in_buffer;
530 543
531 /* if first entry in buf is zero then is first buffer 544 /*
532 in search response data which means it is likely . and .. 545 * If first entry in buf is zero then is first buffer
533 will be in this buffer, although some servers do not return 546 * in search response data which means it is likely . and ..
534 . and .. for the root of a drive and for those we need 547 * will be in this buffer, although some servers do not return
535 to start two entries earlier */ 548 * . and .. for the root of a drive and for those we need
549 * to start two entries earlier.
550 */
536 551
537 dump_cifs_file_struct(file, "In fce "); 552 dump_cifs_file_struct(file, "In fce ");
538 if (((index_to_find < cifsFile->srch_inf.index_of_last_entry) && 553 if (((index_to_find < cfile->srch_inf.index_of_last_entry) &&
539 is_dir_changed(file)) || 554 is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
540 (index_to_find < first_entry_in_buffer)) {
541 /* close and restart search */ 555 /* close and restart search */
542 cFYI(1, "search backing up - close and restart search"); 556 cFYI(1, "search backing up - close and restart search");
543 spin_lock(&cifs_file_list_lock); 557 spin_lock(&cifs_file_list_lock);
544 if (!cifsFile->srch_inf.endOfSearch && 558 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
545 !cifsFile->invalidHandle) { 559 cfile->invalidHandle = true;
546 cifsFile->invalidHandle = true;
547 spin_unlock(&cifs_file_list_lock); 560 spin_unlock(&cifs_file_list_lock);
548 CIFSFindClose(xid, pTcon, cifsFile->netfid); 561 if (server->ops->close)
562 server->ops->close(xid, tcon, &cfile->fid);
549 } else 563 } else
550 spin_unlock(&cifs_file_list_lock); 564 spin_unlock(&cifs_file_list_lock);
551 if (cifsFile->srch_inf.ntwrk_buf_start) { 565 if (cfile->srch_inf.ntwrk_buf_start) {
552 cFYI(1, "freeing SMB ff cache buf on search rewind"); 566 cFYI(1, "freeing SMB ff cache buf on search rewind");
553 if (cifsFile->srch_inf.smallBuf) 567 if (cfile->srch_inf.smallBuf)
554 cifs_small_buf_release(cifsFile->srch_inf. 568 cifs_small_buf_release(cfile->srch_inf.
555 ntwrk_buf_start); 569 ntwrk_buf_start);
556 else 570 else
557 cifs_buf_release(cifsFile->srch_inf. 571 cifs_buf_release(cfile->srch_inf.
558 ntwrk_buf_start); 572 ntwrk_buf_start);
559 cifsFile->srch_inf.ntwrk_buf_start = NULL; 573 cfile->srch_inf.ntwrk_buf_start = NULL;
560 } 574 }
561 rc = initiate_cifs_search(xid, file); 575 rc = initiate_cifs_search(xid, file);
562 if (rc) { 576 if (rc) {
@@ -565,65 +579,64 @@ static int find_cifs_entry(const unsigned int xid, struct cifs_tcon *pTcon,
565 return rc; 579 return rc;
566 } 580 }
567 /* FindFirst/Next set last_entry to NULL on malformed reply */ 581 /* FindFirst/Next set last_entry to NULL on malformed reply */
568 if (cifsFile->srch_inf.last_entry) 582 if (cfile->srch_inf.last_entry)
569 cifs_save_resume_key(cifsFile->srch_inf.last_entry, 583 cifs_save_resume_key(cfile->srch_inf.last_entry, cfile);
570 cifsFile);
571 } 584 }
572 585
573 search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; 586 search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME;
574 if (backup_cred(cifs_sb)) 587 if (backup_cred(cifs_sb))
575 search_flags |= CIFS_SEARCH_BACKUP_SEARCH; 588 search_flags |= CIFS_SEARCH_BACKUP_SEARCH;
576 589
577 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 590 while ((index_to_find >= cfile->srch_inf.index_of_last_entry) &&
578 (rc == 0) && !cifsFile->srch_inf.endOfSearch) { 591 (rc == 0) && !cfile->srch_inf.endOfSearch) {
579 cFYI(1, "calling findnext2"); 592 cFYI(1, "calling findnext2");
580 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, search_flags, 593 rc = server->ops->query_dir_next(xid, tcon, &cfile->fid,
581 &cifsFile->srch_inf); 594 search_flags,
595 &cfile->srch_inf);
582 /* FindFirst/Next set last_entry to NULL on malformed reply */ 596 /* FindFirst/Next set last_entry to NULL on malformed reply */
583 if (cifsFile->srch_inf.last_entry) 597 if (cfile->srch_inf.last_entry)
584 cifs_save_resume_key(cifsFile->srch_inf.last_entry, 598 cifs_save_resume_key(cfile->srch_inf.last_entry, cfile);
585 cifsFile);
586 if (rc) 599 if (rc)
587 return -ENOENT; 600 return -ENOENT;
588 } 601 }
589 if (index_to_find < cifsFile->srch_inf.index_of_last_entry) { 602 if (index_to_find < cfile->srch_inf.index_of_last_entry) {
590 /* we found the buffer that contains the entry */ 603 /* we found the buffer that contains the entry */
591 /* scan and find it */ 604 /* scan and find it */
592 int i; 605 int i;
593 char *current_entry; 606 char *cur_ent;
594 char *end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + 607 char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
595 smbCalcSize((struct smb_hdr *) 608 server->ops->calc_smb_size(
596 cifsFile->srch_inf.ntwrk_buf_start); 609 cfile->srch_inf.ntwrk_buf_start);
597 610
598 current_entry = cifsFile->srch_inf.srch_entries_start; 611 cur_ent = cfile->srch_inf.srch_entries_start;
599 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 612 first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
600 - cifsFile->srch_inf.entries_in_buffer; 613 - cfile->srch_inf.entries_in_buffer;
601 pos_in_buf = index_to_find - first_entry_in_buffer; 614 pos_in_buf = index_to_find - first_entry_in_buffer;
602 cFYI(1, "found entry - pos_in_buf %d", pos_in_buf); 615 cFYI(1, "found entry - pos_in_buf %d", pos_in_buf);
603 616
604 for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { 617 for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) {
605 /* go entry by entry figuring out which is first */ 618 /* go entry by entry figuring out which is first */
606 current_entry = nxt_dir_entry(current_entry, end_of_smb, 619 cur_ent = nxt_dir_entry(cur_ent, end_of_smb,
607 cifsFile->srch_inf.info_level); 620 cfile->srch_inf.info_level);
608 } 621 }
609 if ((current_entry == NULL) && (i < pos_in_buf)) { 622 if ((cur_ent == NULL) && (i < pos_in_buf)) {
610 /* BB fixme - check if we should flag this error */ 623 /* BB fixme - check if we should flag this error */
611 cERROR(1, "reached end of buf searching for pos in buf" 624 cERROR(1, "reached end of buf searching for pos in buf"
612 " %d index to find %lld rc %d", 625 " %d index to find %lld rc %d", pos_in_buf,
613 pos_in_buf, index_to_find, rc); 626 index_to_find, rc);
614 } 627 }
615 rc = 0; 628 rc = 0;
616 *ppCurrentEntry = current_entry; 629 *current_entry = cur_ent;
617 } else { 630 } else {
618 cFYI(1, "index not in buffer - could not findnext into it"); 631 cFYI(1, "index not in buffer - could not findnext into it");
619 return 0; 632 return 0;
620 } 633 }
621 634
622 if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { 635 if (pos_in_buf >= cfile->srch_inf.entries_in_buffer) {
623 cFYI(1, "can not return entries pos_in_buf beyond last"); 636 cFYI(1, "can not return entries pos_in_buf beyond last");
624 *num_to_ret = 0; 637 *num_to_ret = 0;
625 } else 638 } else
626 *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; 639 *num_to_ret = cfile->srch_inf.entries_in_buffer - pos_in_buf;
627 640
628 return rc; 641 return rc;
629} 642}
@@ -723,7 +736,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
723 int rc = 0; 736 int rc = 0;
724 unsigned int xid; 737 unsigned int xid;
725 int i; 738 int i;
726 struct cifs_tcon *pTcon; 739 struct cifs_tcon *tcon;
727 struct cifsFileInfo *cifsFile = NULL; 740 struct cifsFileInfo *cifsFile = NULL;
728 char *current_entry; 741 char *current_entry;
729 int num_to_fill = 0; 742 int num_to_fill = 0;
@@ -781,12 +794,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
781 } 794 }
782 } /* else { 795 } /* else {
783 cifsFile->invalidHandle = true; 796 cifsFile->invalidHandle = true;
784 CIFSFindClose(xid, pTcon, cifsFile->netfid); 797 tcon->ses->server->close(xid, tcon, &cifsFile->fid);
785 } */ 798 } */
786 799
787 pTcon = tlink_tcon(cifsFile->tlink); 800 tcon = tlink_tcon(cifsFile->tlink);
788 rc = find_cifs_entry(xid, pTcon, file, 801 rc = find_cifs_entry(xid, tcon, file, &current_entry,
789 &current_entry, &num_to_fill); 802 &num_to_fill);
790 if (rc) { 803 if (rc) {
791 cFYI(1, "fce error %d", rc); 804 cFYI(1, "fce error %d", rc);
792 goto rddir2_exit; 805 goto rddir2_exit;
@@ -798,7 +811,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
798 } 811 }
799 cFYI(1, "loop through %d times filling dir for net buf %p", 812 cFYI(1, "loop through %d times filling dir for net buf %p",
800 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); 813 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
801 max_len = smbCalcSize((struct smb_hdr *) 814 max_len = tcon->ses->server->ops->calc_smb_size(
802 cifsFile->srch_inf.ntwrk_buf_start); 815 cifsFile->srch_inf.ntwrk_buf_start);
803 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; 816 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
804 817
@@ -815,10 +828,12 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
815 num_to_fill, i); 828 num_to_fill, i);
816 break; 829 break;
817 } 830 }
818 /* if buggy server returns . and .. late do 831 /*
819 we want to check for that here? */ 832 * if buggy server returns . and .. late do we want to
820 rc = cifs_filldir(current_entry, file, 833 * check for that here?
821 filldir, direntry, tmp_buf, max_len); 834 */
835 rc = cifs_filldir(current_entry, file, filldir,
836 direntry, tmp_buf, max_len);
822 if (rc == -EOVERFLOW) { 837 if (rc == -EOVERFLOW) {
823 rc = 0; 838 rc = 0;
824 break; 839 break;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 382c06d01b38..76809f4d3428 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -876,7 +876,8 @@ ssetup_ntlmssp_authenticate:
876 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 876 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
877 smb_buf = (struct smb_hdr *)iov[0].iov_base; 877 smb_buf = (struct smb_hdr *)iov[0].iov_base;
878 878
879 if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == 879 if ((type == RawNTLMSSP) && (resp_buf_type != CIFS_NO_BUFFER) &&
880 (smb_buf->Status.CifsError ==
880 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { 881 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
881 if (phase != NtLmNegotiate) { 882 if (phase != NtLmNegotiate) {
882 cERROR(1, "Unexpected more processing error"); 883 cERROR(1, "Unexpected more processing error");
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3129ac74b819..56cc4be87807 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -17,6 +17,8 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/pagemap.h>
21#include <linux/vfs.h>
20#include "cifsglob.h" 22#include "cifsglob.h"
21#include "cifsproto.h" 23#include "cifsproto.h"
22#include "cifs_debug.h" 24#include "cifs_debug.h"
@@ -63,7 +65,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf,
63static bool 65static bool
64cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2) 66cifs_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
65{ 67{
66 return ob1->netfid == ob2->netfid; 68 return ob1->fid.netfid == ob2->fid.netfid;
67} 69}
68 70
69static unsigned int 71static unsigned int
@@ -410,6 +412,83 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses)
410 return rc; 412 return rc;
411} 413}
412 414
415static unsigned int
416cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
417{
418 __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
419 struct TCP_Server_Info *server = tcon->ses->server;
420 unsigned int wsize;
421
422 /* start with specified wsize, or default */
423 if (volume_info->wsize)
424 wsize = volume_info->wsize;
425 else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
426 wsize = CIFS_DEFAULT_IOSIZE;
427 else
428 wsize = CIFS_DEFAULT_NON_POSIX_WSIZE;
429
430 /* can server support 24-bit write sizes? (via UNIX extensions) */
431 if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
432 wsize = min_t(unsigned int, wsize, CIFS_MAX_RFC1002_WSIZE);
433
434 /*
435 * no CAP_LARGE_WRITE_X or is signing enabled without CAP_UNIX set?
436 * Limit it to max buffer offered by the server, minus the size of the
437 * WRITEX header, not including the 4 byte RFC1001 length.
438 */
439 if (!(server->capabilities & CAP_LARGE_WRITE_X) ||
440 (!(server->capabilities & CAP_UNIX) &&
441 (server->sec_mode & (SECMODE_SIGN_ENABLED|SECMODE_SIGN_REQUIRED))))
442 wsize = min_t(unsigned int, wsize,
443 server->maxBuf - sizeof(WRITE_REQ) + 4);
444
445 /* hard limit of CIFS_MAX_WSIZE */
446 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
447
448 return wsize;
449}
450
451static unsigned int
452cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
453{
454 __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
455 struct TCP_Server_Info *server = tcon->ses->server;
456 unsigned int rsize, defsize;
457
458 /*
459 * Set default value...
460 *
461 * HACK alert! Ancient servers have very small buffers. Even though
462 * MS-CIFS indicates that servers are only limited by the client's
463 * bufsize for reads, testing against win98se shows that it throws
464 * INVALID_PARAMETER errors if you try to request too large a read.
465 * OS/2 just sends back short reads.
466 *
467 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
468 * it can't handle a read request larger than its MaxBufferSize either.
469 */
470 if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
471 defsize = CIFS_DEFAULT_IOSIZE;
472 else if (server->capabilities & CAP_LARGE_READ_X)
473 defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
474 else
475 defsize = server->maxBuf - sizeof(READ_RSP);
476
477 rsize = volume_info->rsize ? volume_info->rsize : defsize;
478
479 /*
480 * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
481 * the client's MaxBufferSize.
482 */
483 if (!(server->capabilities & CAP_LARGE_READ_X))
484 rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
485
486 /* hard limit of CIFS_MAX_RSIZE */
487 rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
488
489 return rsize;
490}
491
413static void 492static void
414cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) 493cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
415{ 494{
@@ -489,6 +568,13 @@ cifs_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
489 CIFS_MOUNT_MAP_SPECIAL_CHR); 568 CIFS_MOUNT_MAP_SPECIAL_CHR);
490} 569}
491 570
571static int
572cifs_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
573 struct cifs_fid *fid, FILE_ALL_INFO *data)
574{
575 return CIFSSMBQFileInfo(xid, tcon, fid->netfid, data);
576}
577
492static char * 578static char *
493cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, 579cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
494 struct cifs_tcon *tcon) 580 struct cifs_tcon *tcon)
@@ -607,6 +693,219 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path,
607 cifsInode->cifsAttrs = dosattrs; 693 cifsInode->cifsAttrs = dosattrs;
608} 694}
609 695
696static int
697cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
698 int disposition, int desired_access, int create_options,
699 struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf,
700 struct cifs_sb_info *cifs_sb)
701{
702 if (!(tcon->ses->capabilities & CAP_NT_SMBS))
703 return SMBLegacyOpen(xid, tcon, path, disposition,
704 desired_access, create_options,
705 &fid->netfid, oplock, buf,
706 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
707 & CIFS_MOUNT_MAP_SPECIAL_CHR);
708 return CIFSSMBOpen(xid, tcon, path, disposition, desired_access,
709 create_options, &fid->netfid, oplock, buf,
710 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
711 CIFS_MOUNT_MAP_SPECIAL_CHR);
712}
713
714static void
715cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
716{
717 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
718 cfile->fid.netfid = fid->netfid;
719 cifs_set_oplock_level(cinode, oplock);
720 cinode->can_cache_brlcks = cinode->clientCanCacheAll;
721}
722
723static void
724cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
725 struct cifs_fid *fid)
726{
727 CIFSSMBClose(xid, tcon, fid->netfid);
728}
729
730static int
731cifs_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
732 struct cifs_fid *fid)
733{
734 return CIFSSMBFlush(xid, tcon, fid->netfid);
735}
736
737static int
738cifs_sync_read(const unsigned int xid, struct cifsFileInfo *cfile,
739 struct cifs_io_parms *parms, unsigned int *bytes_read,
740 char **buf, int *buf_type)
741{
742 parms->netfid = cfile->fid.netfid;
743 return CIFSSMBRead(xid, parms, bytes_read, buf, buf_type);
744}
745
746static int
747cifs_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
748 struct cifs_io_parms *parms, unsigned int *written,
749 struct kvec *iov, unsigned long nr_segs)
750{
751
752 parms->netfid = cfile->fid.netfid;
753 return CIFSSMBWrite2(xid, parms, written, iov, nr_segs);
754}
755
756static int
757smb_set_file_info(struct inode *inode, const char *full_path,
758 FILE_BASIC_INFO *buf, const unsigned int xid)
759{
760 int oplock = 0;
761 int rc;
762 __u16 netfid;
763 __u32 netpid;
764 struct cifsFileInfo *open_file;
765 struct cifsInodeInfo *cinode = CIFS_I(inode);
766 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
767 struct tcon_link *tlink = NULL;
768 struct cifs_tcon *tcon;
769 FILE_BASIC_INFO info_buf;
770
771 /* if the file is already open for write, just use that fileid */
772 open_file = find_writable_file(cinode, true);
773 if (open_file) {
774 netfid = open_file->fid.netfid;
775 netpid = open_file->pid;
776 tcon = tlink_tcon(open_file->tlink);
777 goto set_via_filehandle;
778 }
779
780 tlink = cifs_sb_tlink(cifs_sb);
781 if (IS_ERR(tlink)) {
782 rc = PTR_ERR(tlink);
783 tlink = NULL;
784 goto out;
785 }
786 tcon = tlink_tcon(tlink);
787
788 /*
789 * NT4 apparently returns success on this call, but it doesn't really
790 * work.
791 */
792 if (!(tcon->ses->flags & CIFS_SES_NT4)) {
793 rc = CIFSSMBSetPathInfo(xid, tcon, full_path, buf,
794 cifs_sb->local_nls,
795 cifs_sb->mnt_cifs_flags &
796 CIFS_MOUNT_MAP_SPECIAL_CHR);
797 if (rc == 0) {
798 cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
799 goto out;
800 } else if (rc != -EOPNOTSUPP && rc != -EINVAL)
801 goto out;
802 }
803
804 cFYI(1, "calling SetFileInfo since SetPathInfo for times not supported "
805 "by this server");
806 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
807 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
808 &netfid, &oplock, NULL, cifs_sb->local_nls,
809 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
810
811 if (rc != 0) {
812 if (rc == -EIO)
813 rc = -EINVAL;
814 goto out;
815 }
816
817 netpid = current->tgid;
818
819set_via_filehandle:
820 rc = CIFSSMBSetFileInfo(xid, tcon, &info_buf, netfid, netpid);
821 if (!rc)
822 cinode->cifsAttrs = le32_to_cpu(buf->Attributes);
823
824 if (open_file == NULL)
825 CIFSSMBClose(xid, tcon, netfid);
826 else
827 cifsFileInfo_put(open_file);
828out:
829 if (tlink != NULL)
830 cifs_put_tlink(tlink);
831 return rc;
832}
833
834static int
835cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
836 const char *path, struct cifs_sb_info *cifs_sb,
837 struct cifs_fid *fid, __u16 search_flags,
838 struct cifs_search_info *srch_inf)
839{
840 return CIFSFindFirst(xid, tcon, path, cifs_sb,
841 &fid->netfid, search_flags, srch_inf, true);
842}
843
844static int
845cifs_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon,
846 struct cifs_fid *fid, __u16 search_flags,
847 struct cifs_search_info *srch_inf)
848{
849 return CIFSFindNext(xid, tcon, fid->netfid, search_flags, srch_inf);
850}
851
852static int
853cifs_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
854 struct cifs_fid *fid)
855{
856 return CIFSFindClose(xid, tcon, fid->netfid);
857}
858
859static int
860cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
861 struct cifsInodeInfo *cinode)
862{
863 return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
864 LOCKING_ANDX_OPLOCK_RELEASE, false,
865 cinode->clientCanCacheRead ? 1 : 0);
866}
867
868static int
869cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
870 struct kstatfs *buf)
871{
872 int rc = -EOPNOTSUPP;
873
874 buf->f_type = CIFS_MAGIC_NUMBER;
875
876 /*
877 * We could add a second check for a QFS Unix capability bit
878 */
879 if ((tcon->ses->capabilities & CAP_UNIX) &&
880 (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability)))
881 rc = CIFSSMBQFSPosixInfo(xid, tcon, buf);
882
883 /*
884 * Only need to call the old QFSInfo if failed on newer one,
885 * e.g. by OS/2.
886 **/
887 if (rc && (tcon->ses->capabilities & CAP_NT_SMBS))
888 rc = CIFSSMBQFSInfo(xid, tcon, buf);
889
890 /*
891 * Some old Windows servers also do not support level 103, retry with
892 * older level one if old server failed the previous call or we
893 * bypassed it because we detected that this was an older LANMAN sess
894 */
895 if (rc)
896 rc = SMBOldQFSInfo(xid, tcon, buf);
897 return rc;
898}
899
900static int
901cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
902 __u64 length, __u32 type, int lock, int unlock, bool wait)
903{
904 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
905 current->tgid, length, offset, unlock, lock,
906 (__u8)type, wait, 0);
907}
908
610struct smb_version_operations smb1_operations = { 909struct smb_version_operations smb1_operations = {
611 .send_cancel = send_nt_cancel, 910 .send_cancel = send_nt_cancel,
612 .compare_fids = cifs_compare_fids, 911 .compare_fids = cifs_compare_fids,
@@ -630,6 +929,8 @@ struct smb_version_operations smb1_operations = {
630 .check_trans2 = cifs_check_trans2, 929 .check_trans2 = cifs_check_trans2,
631 .need_neg = cifs_need_neg, 930 .need_neg = cifs_need_neg,
632 .negotiate = cifs_negotiate, 931 .negotiate = cifs_negotiate,
932 .negotiate_wsize = cifs_negotiate_wsize,
933 .negotiate_rsize = cifs_negotiate_rsize,
633 .sess_setup = CIFS_SessSetup, 934 .sess_setup = CIFS_SessSetup,
634 .logoff = CIFSSMBLogoff, 935 .logoff = CIFSSMBLogoff,
635 .tree_connect = CIFSTCon, 936 .tree_connect = CIFSTCon,
@@ -638,12 +939,37 @@ struct smb_version_operations smb1_operations = {
638 .qfs_tcon = cifs_qfs_tcon, 939 .qfs_tcon = cifs_qfs_tcon,
639 .is_path_accessible = cifs_is_path_accessible, 940 .is_path_accessible = cifs_is_path_accessible,
640 .query_path_info = cifs_query_path_info, 941 .query_path_info = cifs_query_path_info,
942 .query_file_info = cifs_query_file_info,
641 .get_srv_inum = cifs_get_srv_inum, 943 .get_srv_inum = cifs_get_srv_inum,
944 .set_path_size = CIFSSMBSetEOF,
945 .set_file_size = CIFSSMBSetFileSize,
946 .set_file_info = smb_set_file_info,
642 .build_path_to_root = cifs_build_path_to_root, 947 .build_path_to_root = cifs_build_path_to_root,
643 .echo = CIFSSMBEcho, 948 .echo = CIFSSMBEcho,
644 .mkdir = CIFSSMBMkDir, 949 .mkdir = CIFSSMBMkDir,
645 .mkdir_setinfo = cifs_mkdir_setinfo, 950 .mkdir_setinfo = cifs_mkdir_setinfo,
646 .rmdir = CIFSSMBRmDir, 951 .rmdir = CIFSSMBRmDir,
952 .unlink = CIFSSMBDelFile,
953 .rename_pending_delete = cifs_rename_pending_delete,
954 .rename = CIFSSMBRename,
955 .create_hardlink = CIFSCreateHardLink,
956 .open = cifs_open_file,
957 .set_fid = cifs_set_fid,
958 .close = cifs_close_file,
959 .flush = cifs_flush_file,
960 .async_readv = cifs_async_readv,
961 .async_writev = cifs_async_writev,
962 .sync_read = cifs_sync_read,
963 .sync_write = cifs_sync_write,
964 .query_dir_first = cifs_query_dir_first,
965 .query_dir_next = cifs_query_dir_next,
966 .close_dir = cifs_close_dir,
967 .calc_smb_size = smbCalcSize,
968 .oplock_response = cifs_oplock_response,
969 .queryfs = cifs_queryfs,
970 .mand_lock = cifs_mand_lock,
971 .mand_unlock_range = cifs_unlock_range,
972 .push_mand_locks = cifs_push_mandatory_locks,
647}; 973};
648 974
649struct smb_version_values smb1_values = { 975struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
new file mode 100644
index 000000000000..a93eec30a50d
--- /dev/null
+++ b/fs/cifs/smb2file.c
@@ -0,0 +1,302 @@
1/*
2 * fs/cifs/smb2file.c
3 *
4 * Copyright (C) International Business Machines Corp., 2002, 2011
5 * Author(s): Steve French (sfrench@us.ibm.com),
6 * Pavel Shilovsky ((pshilovsky@samba.org) 2012
7 *
8 * This library is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License as published
10 * by the Free Software Foundation; either version 2.1 of the License, or
11 * (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
16 * the GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22#include <linux/fs.h>
23#include <linux/stat.h>
24#include <linux/slab.h>
25#include <linux/pagemap.h>
26#include <asm/div64.h>
27#include "cifsfs.h"
28#include "cifspdu.h"
29#include "cifsglob.h"
30#include "cifsproto.h"
31#include "cifs_debug.h"
32#include "cifs_fs_sb.h"
33#include "cifs_unicode.h"
34#include "fscache.h"
35#include "smb2proto.h"
36
37void
38smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
39{
40 oplock &= 0xFF;
41 if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
42 return;
43 if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
44 cinode->clientCanCacheAll = true;
45 cinode->clientCanCacheRead = true;
46 cFYI(1, "Exclusive Oplock granted on inode %p",
47 &cinode->vfs_inode);
48 } else if (oplock == SMB2_OPLOCK_LEVEL_II) {
49 cinode->clientCanCacheAll = false;
50 cinode->clientCanCacheRead = true;
51 cFYI(1, "Level II Oplock granted on inode %p",
52 &cinode->vfs_inode);
53 } else {
54 cinode->clientCanCacheAll = false;
55 cinode->clientCanCacheRead = false;
56 }
57}
58
59int
60smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
61 int disposition, int desired_access, int create_options,
62 struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf,
63 struct cifs_sb_info *cifs_sb)
64{
65 int rc;
66 __le16 *smb2_path;
67 struct smb2_file_all_info *smb2_data = NULL;
68 __u8 smb2_oplock[17];
69
70 smb2_path = cifs_convert_path_to_utf16(path, cifs_sb);
71 if (smb2_path == NULL) {
72 rc = -ENOMEM;
73 goto out;
74 }
75
76 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
77 GFP_KERNEL);
78 if (smb2_data == NULL) {
79 rc = -ENOMEM;
80 goto out;
81 }
82
83 desired_access |= FILE_READ_ATTRIBUTES;
84 *smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE;
85
86 if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
87 memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE);
88
89 rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid,
90 &fid->volatile_fid, desired_access, disposition,
91 0, 0, smb2_oplock, smb2_data);
92 if (rc)
93 goto out;
94
95 if (buf) {
96 /* open response does not have IndexNumber field - get it */
97 rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid,
98 fid->volatile_fid,
99 &smb2_data->IndexNumber);
100 if (rc) {
101 /* let get_inode_info disable server inode numbers */
102 smb2_data->IndexNumber = 0;
103 rc = 0;
104 }
105 move_smb2_info_to_cifs(buf, smb2_data);
106 }
107
108 *oplock = *smb2_oplock;
109out:
110 kfree(smb2_data);
111 kfree(smb2_path);
112 return rc;
113}
114
115int
116smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
117 const unsigned int xid)
118{
119 int rc = 0, stored_rc;
120 unsigned int max_num, num = 0, max_buf;
121 struct smb2_lock_element *buf, *cur;
122 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
123 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
124 struct cifsLockInfo *li, *tmp;
125 __u64 length = 1 + flock->fl_end - flock->fl_start;
126 struct list_head tmp_llist;
127
128 INIT_LIST_HEAD(&tmp_llist);
129
130 /*
131 * Accessing maxBuf is racy with cifs_reconnect - need to store value
132 * and check it for zero before using.
133 */
134 max_buf = tcon->ses->server->maxBuf;
135 if (!max_buf)
136 return -EINVAL;
137
138 max_num = max_buf / sizeof(struct smb2_lock_element);
139 buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
140 if (!buf)
141 return -ENOMEM;
142
143 cur = buf;
144
145 down_write(&cinode->lock_sem);
146 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
147 if (flock->fl_start > li->offset ||
148 (flock->fl_start + length) <
149 (li->offset + li->length))
150 continue;
151 if (current->tgid != li->pid)
152 continue;
153 if (cinode->can_cache_brlcks) {
154 /*
155 * We can cache brlock requests - simply remove a lock
156 * from the file's list.
157 */
158 list_del(&li->llist);
159 cifs_del_lock_waiters(li);
160 kfree(li);
161 continue;
162 }
163 cur->Length = cpu_to_le64(li->length);
164 cur->Offset = cpu_to_le64(li->offset);
165 cur->Flags = cpu_to_le32(SMB2_LOCKFLAG_UNLOCK);
166 /*
167 * We need to save a lock here to let us add it again to the
168 * file's list if the unlock range request fails on the server.
169 */
170 list_move(&li->llist, &tmp_llist);
171 if (++num == max_num) {
172 stored_rc = smb2_lockv(xid, tcon,
173 cfile->fid.persistent_fid,
174 cfile->fid.volatile_fid,
175 current->tgid, num, buf);
176 if (stored_rc) {
177 /*
178 * We failed on the unlock range request - add
179 * all locks from the tmp list to the head of
180 * the file's list.
181 */
182 cifs_move_llist(&tmp_llist,
183 &cfile->llist->locks);
184 rc = stored_rc;
185 } else
186 /*
187 * The unlock range request succeed - free the
188 * tmp list.
189 */
190 cifs_free_llist(&tmp_llist);
191 cur = buf;
192 num = 0;
193 } else
194 cur++;
195 }
196 if (num) {
197 stored_rc = smb2_lockv(xid, tcon, cfile->fid.persistent_fid,
198 cfile->fid.volatile_fid, current->tgid,
199 num, buf);
200 if (stored_rc) {
201 cifs_move_llist(&tmp_llist, &cfile->llist->locks);
202 rc = stored_rc;
203 } else
204 cifs_free_llist(&tmp_llist);
205 }
206 up_write(&cinode->lock_sem);
207
208 kfree(buf);
209 return rc;
210}
211
212static int
213smb2_push_mand_fdlocks(struct cifs_fid_locks *fdlocks, const unsigned int xid,
214 struct smb2_lock_element *buf, unsigned int max_num)
215{
216 int rc = 0, stored_rc;
217 struct cifsFileInfo *cfile = fdlocks->cfile;
218 struct cifsLockInfo *li;
219 unsigned int num = 0;
220 struct smb2_lock_element *cur = buf;
221 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
222
223 list_for_each_entry(li, &fdlocks->locks, llist) {
224 cur->Length = cpu_to_le64(li->length);
225 cur->Offset = cpu_to_le64(li->offset);
226 cur->Flags = cpu_to_le32(li->type |
227 SMB2_LOCKFLAG_FAIL_IMMEDIATELY);
228 if (++num == max_num) {
229 stored_rc = smb2_lockv(xid, tcon,
230 cfile->fid.persistent_fid,
231 cfile->fid.volatile_fid,
232 current->tgid, num, buf);
233 if (stored_rc)
234 rc = stored_rc;
235 cur = buf;
236 num = 0;
237 } else
238 cur++;
239 }
240 if (num) {
241 stored_rc = smb2_lockv(xid, tcon,
242 cfile->fid.persistent_fid,
243 cfile->fid.volatile_fid,
244 current->tgid, num, buf);
245 if (stored_rc)
246 rc = stored_rc;
247 }
248
249 return rc;
250}
251
252int
253smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
254{
255 int rc = 0, stored_rc;
256 unsigned int xid;
257 unsigned int max_num, max_buf;
258 struct smb2_lock_element *buf;
259 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
260 struct cifs_fid_locks *fdlocks;
261
262 xid = get_xid();
263 /* we are going to update can_cache_brlcks here - need a write access */
264 down_write(&cinode->lock_sem);
265 if (!cinode->can_cache_brlcks) {
266 up_write(&cinode->lock_sem);
267 free_xid(xid);
268 return rc;
269 }
270
271 /*
272 * Accessing maxBuf is racy with cifs_reconnect - need to store value
273 * and check it for zero before using.
274 */
275 max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf;
276 if (!max_buf) {
277 up_write(&cinode->lock_sem);
278 free_xid(xid);
279 return -EINVAL;
280 }
281
282 max_num = max_buf / sizeof(struct smb2_lock_element);
283 buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
284 if (!buf) {
285 up_write(&cinode->lock_sem);
286 free_xid(xid);
287 return -ENOMEM;
288 }
289
290 list_for_each_entry(fdlocks, &cinode->llist, llist) {
291 stored_rc = smb2_push_mand_fdlocks(fdlocks, xid, buf, max_num);
292 if (stored_rc)
293 rc = stored_rc;
294 }
295
296 cinode->can_cache_brlcks = false;
297 kfree(buf);
298
299 up_write(&cinode->lock_sem);
300 free_xid(xid);
301 return rc;
302}
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 33c1d89090c0..7c0e2143e775 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -23,6 +23,8 @@
23#ifndef _SMB2_GLOB_H 23#ifndef _SMB2_GLOB_H
24#define _SMB2_GLOB_H 24#define _SMB2_GLOB_H
25 25
26#define SMB2_MAGIC_NUMBER 0xFE534D42
27
26/* 28/*
27 ***************************************************************** 29 *****************************************************************
28 * Constants go here 30 * Constants go here
@@ -40,5 +42,17 @@
40#define SMB2_OP_MKDIR 5 42#define SMB2_OP_MKDIR 5
41#define SMB2_OP_RENAME 6 43#define SMB2_OP_RENAME 6
42#define SMB2_OP_DELETE 7 44#define SMB2_OP_DELETE 7
45#define SMB2_OP_HARDLINK 8
46#define SMB2_OP_SET_EOF 9
47
48/* Used when constructing chained read requests. */
49#define CHAINED_REQUEST 1
50#define START_OF_CHAIN 2
51#define END_OF_CHAIN 4
52#define RELATED_REQUEST 8
53
54#define SMB2_SIGNATURE_SIZE (16)
55#define SMB2_NTLMV2_SESSKEY_SIZE (16)
56#define SMB2_HMACSHA256_SIZE (32)
43 57
44#endif /* _SMB2_GLOB_H */ 58#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 2aa5cb08c526..706482452df4 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -47,6 +47,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
47 int rc, tmprc = 0; 47 int rc, tmprc = 0;
48 u64 persistent_fid, volatile_fid; 48 u64 persistent_fid, volatile_fid;
49 __le16 *utf16_path; 49 __le16 *utf16_path;
50 __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
50 51
51 utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); 52 utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
52 if (!utf16_path) 53 if (!utf16_path)
@@ -54,7 +55,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
54 55
55 rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, 56 rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
56 desired_access, create_disposition, file_attributes, 57 desired_access, create_disposition, file_attributes,
57 create_options); 58 create_options, &oplock, NULL);
58 if (rc) { 59 if (rc) {
59 kfree(utf16_path); 60 kfree(utf16_path);
60 return rc; 61 return rc;
@@ -74,6 +75,22 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
74 * SMB2_open() call. 75 * SMB2_open() call.
75 */ 76 */
76 break; 77 break;
78 case SMB2_OP_RENAME:
79 tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid,
80 (__le16 *)data);
81 break;
82 case SMB2_OP_HARDLINK:
83 tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid,
84 volatile_fid, (__le16 *)data);
85 break;
86 case SMB2_OP_SET_EOF:
87 tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid,
88 current->tgid, (__le64 *)data);
89 break;
90 case SMB2_OP_SET_INFO:
91 tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid,
92 (FILE_BASIC_INFO *)data);
93 break;
77 default: 94 default:
78 cERROR(1, "Invalid command"); 95 cERROR(1, "Invalid command");
79 break; 96 break;
@@ -86,7 +103,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
86 return rc; 103 return rc;
87} 104}
88 105
89static void 106void
90move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) 107move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src)
91{ 108{
92 memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src); 109 memcpy(dst, src, (size_t)(&src->CurrentByteOffset) - (size_t)src);
@@ -161,3 +178,80 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
161 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, 178 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE,
162 NULL, SMB2_OP_DELETE); 179 NULL, SMB2_OP_DELETE);
163} 180}
181
182int
183smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
184 struct cifs_sb_info *cifs_sb)
185{
186 return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
187 0, CREATE_DELETE_ON_CLOSE, NULL,
188 SMB2_OP_DELETE);
189}
190
191static int
192smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
193 const char *from_name, const char *to_name,
194 struct cifs_sb_info *cifs_sb, __u32 access, int command)
195{
196 __le16 *smb2_to_name = NULL;
197 int rc;
198
199 smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb);
200 if (smb2_to_name == NULL) {
201 rc = -ENOMEM;
202 goto smb2_rename_path;
203 }
204
205 rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access,
206 FILE_OPEN, 0, 0, smb2_to_name, command);
207smb2_rename_path:
208 kfree(smb2_to_name);
209 return rc;
210}
211
212int
213smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
214 const char *from_name, const char *to_name,
215 struct cifs_sb_info *cifs_sb)
216{
217 return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb,
218 DELETE, SMB2_OP_RENAME);
219}
220
221int
222smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
223 const char *from_name, const char *to_name,
224 struct cifs_sb_info *cifs_sb)
225{
226 return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb,
227 FILE_READ_ATTRIBUTES, SMB2_OP_HARDLINK);
228}
229
230int
231smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
232 const char *full_path, __u64 size,
233 struct cifs_sb_info *cifs_sb, bool set_alloc)
234{
235 __le64 eof = cpu_to_le64(size);
236 return smb2_open_op_close(xid, tcon, cifs_sb, full_path,
237 FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof,
238 SMB2_OP_SET_EOF);
239}
240
241int
242smb2_set_file_info(struct inode *inode, const char *full_path,
243 FILE_BASIC_INFO *buf, const unsigned int xid)
244{
245 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
246 struct tcon_link *tlink;
247 int rc;
248
249 tlink = cifs_sb_tlink(cifs_sb);
250 if (IS_ERR(tlink))
251 return PTR_ERR(tlink);
252 rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
253 FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf,
254 SMB2_OP_SET_INFO);
255 cifs_put_tlink(tlink);
256 return rc;
257}
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index be41478acc05..494c912c76fe 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -453,7 +453,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
453 {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"}, 453 {STATUS_FILE_INVALID, -EIO, "STATUS_FILE_INVALID"},
454 {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO, 454 {STATUS_ALLOTTED_SPACE_EXCEEDED, -EIO,
455 "STATUS_ALLOTTED_SPACE_EXCEEDED"}, 455 "STATUS_ALLOTTED_SPACE_EXCEEDED"},
456 {STATUS_INSUFFICIENT_RESOURCES, -EIO, "STATUS_INSUFFICIENT_RESOURCES"}, 456 {STATUS_INSUFFICIENT_RESOURCES, -EREMOTEIO,
457 "STATUS_INSUFFICIENT_RESOURCES"},
457 {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"}, 458 {STATUS_DFS_EXIT_PATH_FOUND, -EIO, "STATUS_DFS_EXIT_PATH_FOUND"},
458 {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"}, 459 {STATUS_DEVICE_DATA_ERROR, -EIO, "STATUS_DEVICE_DATA_ERROR"},
459 {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"}, 460 {STATUS_DEVICE_NOT_CONNECTED, -EIO, "STATUS_DEVICE_NOT_CONNECTED"},
@@ -2455,7 +2456,8 @@ map_smb2_to_linux_error(char *buf, bool log_err)
2455 return 0; 2456 return 0;
2456 2457
2457 /* mask facility */ 2458 /* mask facility */
2458 if (log_err && (smb2err != (STATUS_MORE_PROCESSING_REQUIRED))) 2459 if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) &&
2460 (smb2err != STATUS_END_OF_FILE))
2459 smb2_print_status(smb2err); 2461 smb2_print_status(smb2err);
2460 else if (cifsFYI & CIFS_RC) 2462 else if (cifsFYI & CIFS_RC)
2461 smb2_print_status(smb2err); 2463 smb2_print_status(smb2err);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index a4ff5d547554..7b1c5e3287fb 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -52,7 +52,8 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
52 cERROR(1, "Bad protocol string signature header %x", 52 cERROR(1, "Bad protocol string signature header %x",
53 *(unsigned int *) hdr->ProtocolId); 53 *(unsigned int *) hdr->ProtocolId);
54 if (mid != hdr->MessageId) 54 if (mid != hdr->MessageId)
55 cERROR(1, "Mids do not match"); 55 cERROR(1, "Mids do not match: %llu and %llu", mid,
56 hdr->MessageId);
56 } 57 }
57 cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId); 58 cERROR(1, "Bad SMB detected. The Mid=%llu", hdr->MessageId);
58 return 1; 59 return 1;
@@ -107,7 +108,7 @@ smb2_check_message(char *buf, unsigned int length)
107 * ie Validate the wct via smb2_struct_sizes table above 108 * ie Validate the wct via smb2_struct_sizes table above
108 */ 109 */
109 110
110 if (length < 2 + sizeof(struct smb2_hdr)) { 111 if (length < sizeof(struct smb2_pdu)) {
111 if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) { 112 if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
112 pdu->StructureSize2 = 0; 113 pdu->StructureSize2 = 0;
113 /* 114 /*
@@ -121,15 +122,15 @@ smb2_check_message(char *buf, unsigned int length)
121 return 1; 122 return 1;
122 } 123 }
123 if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) { 124 if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) {
124 cERROR(1, "SMB length greater than maximum, mid=%lld", mid); 125 cERROR(1, "SMB length greater than maximum, mid=%llu", mid);
125 return 1; 126 return 1;
126 } 127 }
127 128
128 if (check_smb2_hdr(hdr, mid)) 129 if (check_smb2_hdr(hdr, mid))
129 return 1; 130 return 1;
130 131
131 if (hdr->StructureSize != SMB2_HEADER_SIZE) { 132 if (hdr->StructureSize != SMB2_HEADER_STRUCTURE_SIZE) {
132 cERROR(1, "Illegal structure size %d", 133 cERROR(1, "Illegal structure size %u",
133 le16_to_cpu(hdr->StructureSize)); 134 le16_to_cpu(hdr->StructureSize));
134 return 1; 135 return 1;
135 } 136 }
@@ -141,12 +142,19 @@ smb2_check_message(char *buf, unsigned int length)
141 } 142 }
142 143
143 if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) { 144 if (smb2_rsp_struct_sizes[command] != pdu->StructureSize2) {
144 if (hdr->Status == 0 || 145 if (command != SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0 ||
145 pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2) { 146 pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2)) {
146 /* error packets have 9 byte structure size */ 147 /* error packets have 9 byte structure size */
147 cERROR(1, "Illegal response size %u for command %d", 148 cERROR(1, "Illegal response size %u for command %d",
148 le16_to_cpu(pdu->StructureSize2), command); 149 le16_to_cpu(pdu->StructureSize2), command);
149 return 1; 150 return 1;
151 } else if (command == SMB2_OPLOCK_BREAK_HE && (hdr->Status == 0)
152 && (le16_to_cpu(pdu->StructureSize2) != 44)
153 && (le16_to_cpu(pdu->StructureSize2) != 36)) {
154 /* special case for SMB2.1 lease break message */
155 cERROR(1, "Illegal response size %d for oplock break",
156 le16_to_cpu(pdu->StructureSize2));
157 return 1;
150 } 158 }
151 } 159 }
152 160
@@ -161,8 +169,12 @@ smb2_check_message(char *buf, unsigned int length)
161 if (4 + len != clc_len) { 169 if (4 + len != clc_len) {
162 cFYI(1, "Calculated size %u length %u mismatch mid %llu", 170 cFYI(1, "Calculated size %u length %u mismatch mid %llu",
163 clc_len, 4 + len, mid); 171 clc_len, 4 + len, mid);
164 if (clc_len == 4 + len + 1) /* BB FIXME (fix samba) */ 172 /* Windows 7 server returns 24 bytes more */
165 return 0; /* BB workaround Samba 3 bug SessSetup rsp */ 173 if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
174 return 0;
175 /* server can return one byte more */
176 if (clc_len == 4 + len + 1)
177 return 0;
166 return 1; 178 return 1;
167 } 179 }
168 return 0; 180 return 0;
@@ -242,7 +254,15 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
242 ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength); 254 ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength);
243 break; 255 break;
244 case SMB2_READ: 256 case SMB2_READ:
257 *off = ((struct smb2_read_rsp *)hdr)->DataOffset;
258 *len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength);
259 break;
245 case SMB2_QUERY_DIRECTORY: 260 case SMB2_QUERY_DIRECTORY:
261 *off = le16_to_cpu(
262 ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset);
263 *len = le32_to_cpu(
264 ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength);
265 break;
246 case SMB2_IOCTL: 266 case SMB2_IOCTL:
247 case SMB2_CHANGE_NOTIFY: 267 case SMB2_CHANGE_NOTIFY:
248 default: 268 default:
@@ -285,8 +305,9 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
285 * portion, the number of word parameters and the data portion of the message. 305 * portion, the number of word parameters and the data portion of the message.
286 */ 306 */
287unsigned int 307unsigned int
288smb2_calc_size(struct smb2_hdr *hdr) 308smb2_calc_size(void *buf)
289{ 309{
310 struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
290 struct smb2_pdu *pdu = (struct smb2_pdu *)hdr; 311 struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
291 int offset; /* the offset from the beginning of SMB to data area */ 312 int offset; /* the offset from the beginning of SMB to data area */
292 int data_length; /* the length of the variable length data area */ 313 int data_length; /* the length of the variable length data area */
@@ -345,3 +366,218 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
345 CIFS_MOUNT_MAP_SPECIAL_CHR); 366 CIFS_MOUNT_MAP_SPECIAL_CHR);
346 return to; 367 return to;
347} 368}
369
370__le32
371smb2_get_lease_state(struct cifsInodeInfo *cinode)
372{
373 if (cinode->clientCanCacheAll)
374 return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING;
375 else if (cinode->clientCanCacheRead)
376 return SMB2_LEASE_READ_CACHING;
377 return 0;
378}
379
380__u8 smb2_map_lease_to_oplock(__le32 lease_state)
381{
382 if (lease_state & SMB2_LEASE_WRITE_CACHING) {
383 if (lease_state & SMB2_LEASE_HANDLE_CACHING)
384 return SMB2_OPLOCK_LEVEL_BATCH;
385 else
386 return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
387 } else if (lease_state & SMB2_LEASE_READ_CACHING)
388 return SMB2_OPLOCK_LEVEL_II;
389 return 0;
390}
391
392struct smb2_lease_break_work {
393 struct work_struct lease_break;
394 struct tcon_link *tlink;
395 __u8 lease_key[16];
396 __le32 lease_state;
397};
398
399static void
400cifs_ses_oplock_break(struct work_struct *work)
401{
402 struct smb2_lease_break_work *lw = container_of(work,
403 struct smb2_lease_break_work, lease_break);
404 int rc;
405
406 rc = SMB2_lease_break(0, tlink_tcon(lw->tlink), lw->lease_key,
407 lw->lease_state);
408 cFYI(1, "Lease release rc %d", rc);
409 cifs_put_tlink(lw->tlink);
410 kfree(lw);
411}
412
413static bool
414smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
415{
416 struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
417 struct list_head *tmp, *tmp1, *tmp2;
418 struct cifs_ses *ses;
419 struct cifs_tcon *tcon;
420 struct cifsInodeInfo *cinode;
421 struct cifsFileInfo *cfile;
422 struct cifs_pending_open *open;
423 struct smb2_lease_break_work *lw;
424 bool found;
425 int ack_req = le32_to_cpu(rsp->Flags &
426 SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED);
427
428 lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
429 if (!lw) {
430 cERROR(1, "Memory allocation failed during lease break check");
431 return false;
432 }
433
434 INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
435 lw->lease_state = rsp->NewLeaseState;
436
437 cFYI(1, "Checking for lease break");
438
439 /* look up tcon based on tid & uid */
440 spin_lock(&cifs_tcp_ses_lock);
441 list_for_each(tmp, &server->smb_ses_list) {
442 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
443
444 spin_lock(&cifs_file_list_lock);
445 list_for_each(tmp1, &ses->tcon_list) {
446 tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
447
448 cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
449 list_for_each(tmp2, &tcon->openFileList) {
450 cfile = list_entry(tmp2, struct cifsFileInfo,
451 tlist);
452 cinode = CIFS_I(cfile->dentry->d_inode);
453
454 if (memcmp(cinode->lease_key, rsp->LeaseKey,
455 SMB2_LEASE_KEY_SIZE))
456 continue;
457
458 cFYI(1, "found in the open list");
459 cFYI(1, "lease key match, lease break 0x%d",
460 le32_to_cpu(rsp->NewLeaseState));
461
462 smb2_set_oplock_level(cinode,
463 smb2_map_lease_to_oplock(rsp->NewLeaseState));
464
465 if (ack_req)
466 cfile->oplock_break_cancelled = false;
467 else
468 cfile->oplock_break_cancelled = true;
469
470 queue_work(cifsiod_wq, &cfile->oplock_break);
471
472 spin_unlock(&cifs_file_list_lock);
473 spin_unlock(&cifs_tcp_ses_lock);
474 return true;
475 }
476
477 found = false;
478 list_for_each_entry(open, &tcon->pending_opens, olist) {
479 if (memcmp(open->lease_key, rsp->LeaseKey,
480 SMB2_LEASE_KEY_SIZE))
481 continue;
482
483 if (!found && ack_req) {
484 found = true;
485 memcpy(lw->lease_key, open->lease_key,
486 SMB2_LEASE_KEY_SIZE);
487 lw->tlink = cifs_get_tlink(open->tlink);
488 queue_work(cifsiod_wq,
489 &lw->lease_break);
490 }
491
492 cFYI(1, "found in the pending open list");
493 cFYI(1, "lease key match, lease break 0x%d",
494 le32_to_cpu(rsp->NewLeaseState));
495
496 open->oplock =
497 smb2_map_lease_to_oplock(rsp->NewLeaseState);
498 }
499 if (found) {
500 spin_unlock(&cifs_file_list_lock);
501 spin_unlock(&cifs_tcp_ses_lock);
502 return true;
503 }
504 }
505 spin_unlock(&cifs_file_list_lock);
506 }
507 spin_unlock(&cifs_tcp_ses_lock);
508 kfree(lw);
509 cFYI(1, "Can not process lease break - no lease matched");
510 return false;
511}
512
513bool
514smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
515{
516 struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
517 struct list_head *tmp, *tmp1, *tmp2;
518 struct cifs_ses *ses;
519 struct cifs_tcon *tcon;
520 struct cifsInodeInfo *cinode;
521 struct cifsFileInfo *cfile;
522
523 cFYI(1, "Checking for oplock break");
524
525 if (rsp->hdr.Command != SMB2_OPLOCK_BREAK)
526 return false;
527
528 if (rsp->StructureSize !=
529 smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
530 if (le16_to_cpu(rsp->StructureSize) == 44)
531 return smb2_is_valid_lease_break(buffer, server);
532 else
533 return false;
534 }
535
536 cFYI(1, "oplock level 0x%d", rsp->OplockLevel);
537
538 /* look up tcon based on tid & uid */
539 spin_lock(&cifs_tcp_ses_lock);
540 list_for_each(tmp, &server->smb_ses_list) {
541 ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
542 list_for_each(tmp1, &ses->tcon_list) {
543 tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
544
545 cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
546 spin_lock(&cifs_file_list_lock);
547 list_for_each(tmp2, &tcon->openFileList) {
548 cfile = list_entry(tmp2, struct cifsFileInfo,
549 tlist);
550 if (rsp->PersistentFid !=
551 cfile->fid.persistent_fid ||
552 rsp->VolatileFid !=
553 cfile->fid.volatile_fid)
554 continue;
555
556 cFYI(1, "file id match, oplock break");
557 cinode = CIFS_I(cfile->dentry->d_inode);
558
559 if (!cinode->clientCanCacheAll &&
560 rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
561 cfile->oplock_break_cancelled = true;
562 else
563 cfile->oplock_break_cancelled = false;
564
565 smb2_set_oplock_level(cinode,
566 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0);
567
568 queue_work(cifsiod_wq, &cfile->oplock_break);
569
570 spin_unlock(&cifs_file_list_lock);
571 spin_unlock(&cifs_tcp_ses_lock);
572 return true;
573 }
574 spin_unlock(&cifs_file_list_lock);
575 spin_unlock(&cifs_tcp_ses_lock);
576 cFYI(1, "No matching file for oplock break");
577 return true;
578 }
579 }
580 spin_unlock(&cifs_tcp_ses_lock);
581 cFYI(1, "Can not process oplock break for non-existent connection");
582 return false;
583}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 826209bf3684..4d9dbe0b7385 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -17,11 +17,15 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/pagemap.h>
21#include <linux/vfs.h>
20#include "cifsglob.h" 22#include "cifsglob.h"
21#include "smb2pdu.h" 23#include "smb2pdu.h"
22#include "smb2proto.h" 24#include "smb2proto.h"
23#include "cifsproto.h" 25#include "cifsproto.h"
24#include "cifs_debug.h" 26#include "cifs_debug.h"
27#include "smb2status.h"
28#include "smb2glob.h"
25 29
26static int 30static int
27change_conf(struct TCP_Server_Info *server) 31change_conf(struct TCP_Server_Info *server)
@@ -63,6 +67,17 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
63 server->in_flight--; 67 server->in_flight--;
64 if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP) 68 if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
65 rc = change_conf(server); 69 rc = change_conf(server);
70 /*
71 * Sometimes server returns 0 credits on oplock break ack - we need to
72 * rebalance credits in this case.
73 */
74 else if (server->in_flight > 0 && server->oplock_credits == 0 &&
75 server->oplocks) {
76 if (server->credits > 1) {
77 server->credits--;
78 server->oplock_credits++;
79 }
80 }
66 spin_unlock(&server->req_lock); 81 spin_unlock(&server->req_lock);
67 wake_up(&server->request_q); 82 wake_up(&server->request_q);
68 if (rc) 83 if (rc)
@@ -157,6 +172,42 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
157 return rc; 172 return rc;
158} 173}
159 174
175static unsigned int
176smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
177{
178 struct TCP_Server_Info *server = tcon->ses->server;
179 unsigned int wsize;
180
181 /* start with specified wsize, or default */
182 wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
183 wsize = min_t(unsigned int, wsize, server->max_write);
184 /*
185 * limit write size to 2 ** 16, because we don't support multicredit
186 * requests now.
187 */
188 wsize = min_t(unsigned int, wsize, 2 << 15);
189
190 return wsize;
191}
192
193static unsigned int
194smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
195{
196 struct TCP_Server_Info *server = tcon->ses->server;
197 unsigned int rsize;
198
199 /* start with specified rsize, or default */
200 rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
201 rsize = min_t(unsigned int, rsize, server->max_read);
202 /*
203 * limit write size to 2 ** 16, because we don't support multicredit
204 * requests now.
205 */
206 rsize = min_t(unsigned int, rsize, 2 << 15);
207
208 return rsize;
209}
210
160static int 211static int
161smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, 212smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
162 struct cifs_sb_info *cifs_sb, const char *full_path) 213 struct cifs_sb_info *cifs_sb, const char *full_path)
@@ -164,13 +215,14 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
164 int rc; 215 int rc;
165 __u64 persistent_fid, volatile_fid; 216 __u64 persistent_fid, volatile_fid;
166 __le16 *utf16_path; 217 __le16 *utf16_path;
218 __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
167 219
168 utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); 220 utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
169 if (!utf16_path) 221 if (!utf16_path)
170 return -ENOMEM; 222 return -ENOMEM;
171 223
172 rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, 224 rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
173 FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0); 225 FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL);
174 if (rc) { 226 if (rc) {
175 kfree(utf16_path); 227 kfree(utf16_path);
176 return rc; 228 return rc;
@@ -190,6 +242,26 @@ smb2_get_srv_inum(const unsigned int xid, struct cifs_tcon *tcon,
190 return 0; 242 return 0;
191} 243}
192 244
245static int
246smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
247 struct cifs_fid *fid, FILE_ALL_INFO *data)
248{
249 int rc;
250 struct smb2_file_all_info *smb2_data;
251
252 smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
253 GFP_KERNEL);
254 if (smb2_data == NULL)
255 return -ENOMEM;
256
257 rc = SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid,
258 smb2_data);
259 if (!rc)
260 move_smb2_info_to_cifs(data, smb2_data);
261 kfree(smb2_data);
262 return rc;
263}
264
193static char * 265static char *
194smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, 266smb2_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
195 struct cifs_tcon *tcon) 267 struct cifs_tcon *tcon)
@@ -292,7 +364,221 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
292#endif 364#endif
293} 365}
294 366
367static void
368smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
369{
370 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
371 cfile->fid.persistent_fid = fid->persistent_fid;
372 cfile->fid.volatile_fid = fid->volatile_fid;
373 smb2_set_oplock_level(cinode, oplock);
374 cinode->can_cache_brlcks = cinode->clientCanCacheAll;
375}
376
377static void
378smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
379 struct cifs_fid *fid)
380{
381 SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
382}
383
384static int
385smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon,
386 struct cifs_fid *fid)
387{
388 return SMB2_flush(xid, tcon, fid->persistent_fid, fid->volatile_fid);
389}
390
391static unsigned int
392smb2_read_data_offset(char *buf)
393{
394 struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf;
395 return rsp->DataOffset;
396}
397
398static unsigned int
399smb2_read_data_length(char *buf)
400{
401 struct smb2_read_rsp *rsp = (struct smb2_read_rsp *)buf;
402 return le32_to_cpu(rsp->DataLength);
403}
404
405
406static int
407smb2_sync_read(const unsigned int xid, struct cifsFileInfo *cfile,
408 struct cifs_io_parms *parms, unsigned int *bytes_read,
409 char **buf, int *buf_type)
410{
411 parms->persistent_fid = cfile->fid.persistent_fid;
412 parms->volatile_fid = cfile->fid.volatile_fid;
413 return SMB2_read(xid, parms, bytes_read, buf, buf_type);
414}
415
416static int
417smb2_sync_write(const unsigned int xid, struct cifsFileInfo *cfile,
418 struct cifs_io_parms *parms, unsigned int *written,
419 struct kvec *iov, unsigned long nr_segs)
420{
421
422 parms->persistent_fid = cfile->fid.persistent_fid;
423 parms->volatile_fid = cfile->fid.volatile_fid;
424 return SMB2_write(xid, parms, written, iov, nr_segs);
425}
426
427static int
428smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
429 struct cifsFileInfo *cfile, __u64 size, bool set_alloc)
430{
431 __le64 eof = cpu_to_le64(size);
432 return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
433 cfile->fid.volatile_fid, cfile->pid, &eof);
434}
435
436static int
437smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
438 const char *path, struct cifs_sb_info *cifs_sb,
439 struct cifs_fid *fid, __u16 search_flags,
440 struct cifs_search_info *srch_inf)
441{
442 __le16 *utf16_path;
443 int rc;
444 __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
445 __u64 persistent_fid, volatile_fid;
446
447 utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
448 if (!utf16_path)
449 return -ENOMEM;
450
451 rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid,
452 FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0,
453 &oplock, NULL);
454 kfree(utf16_path);
455 if (rc) {
456 cERROR(1, "open dir failed");
457 return rc;
458 }
459
460 srch_inf->entries_in_buffer = 0;
461 srch_inf->index_of_last_entry = 0;
462 fid->persistent_fid = persistent_fid;
463 fid->volatile_fid = volatile_fid;
464
465 rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0,
466 srch_inf);
467 if (rc) {
468 cERROR(1, "query directory failed");
469 SMB2_close(xid, tcon, persistent_fid, volatile_fid);
470 }
471 return rc;
472}
473
474static int
475smb2_query_dir_next(const unsigned int xid, struct cifs_tcon *tcon,
476 struct cifs_fid *fid, __u16 search_flags,
477 struct cifs_search_info *srch_inf)
478{
479 return SMB2_query_directory(xid, tcon, fid->persistent_fid,
480 fid->volatile_fid, 0, srch_inf);
481}
482
483static int
484smb2_close_dir(const unsigned int xid, struct cifs_tcon *tcon,
485 struct cifs_fid *fid)
486{
487 return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
488}
489
490/*
491* If we negotiate SMB2 protocol and get STATUS_PENDING - update
492* the number of credits and return true. Otherwise - return false.
493*/
494static bool
495smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
496{
497 struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
498
499 if (hdr->Status != STATUS_PENDING)
500 return false;
501
502 if (!length) {
503 spin_lock(&server->req_lock);
504 server->credits += le16_to_cpu(hdr->CreditRequest);
505 spin_unlock(&server->req_lock);
506 wake_up(&server->request_q);
507 }
508
509 return true;
510}
511
512static int
513smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
514 struct cifsInodeInfo *cinode)
515{
516 if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
517 return SMB2_lease_break(0, tcon, cinode->lease_key,
518 smb2_get_lease_state(cinode));
519
520 return SMB2_oplock_break(0, tcon, fid->persistent_fid,
521 fid->volatile_fid,
522 cinode->clientCanCacheRead ? 1 : 0);
523}
524
525static int
526smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
527 struct kstatfs *buf)
528{
529 int rc;
530 u64 persistent_fid, volatile_fid;
531 __le16 srch_path = 0; /* Null - open root of share */
532 u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
533
534 rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid,
535 FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL);
536 if (rc)
537 return rc;
538 buf->f_type = SMB2_MAGIC_NUMBER;
539 rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf);
540 SMB2_close(xid, tcon, persistent_fid, volatile_fid);
541 return rc;
542}
543
544static bool
545smb2_compare_fids(struct cifsFileInfo *ob1, struct cifsFileInfo *ob2)
546{
547 return ob1->fid.persistent_fid == ob2->fid.persistent_fid &&
548 ob1->fid.volatile_fid == ob2->fid.volatile_fid;
549}
550
551static int
552smb2_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
553 __u64 length, __u32 type, int lock, int unlock, bool wait)
554{
555 if (unlock && !lock)
556 type = SMB2_LOCKFLAG_UNLOCK;
557 return SMB2_lock(xid, tlink_tcon(cfile->tlink),
558 cfile->fid.persistent_fid, cfile->fid.volatile_fid,
559 current->tgid, length, offset, type, wait);
560}
561
562static void
563smb2_get_lease_key(struct inode *inode, struct cifs_fid *fid)
564{
565 memcpy(fid->lease_key, CIFS_I(inode)->lease_key, SMB2_LEASE_KEY_SIZE);
566}
567
568static void
569smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid)
570{
571 memcpy(CIFS_I(inode)->lease_key, fid->lease_key, SMB2_LEASE_KEY_SIZE);
572}
573
574static void
575smb2_new_lease_key(struct cifs_fid *fid)
576{
577 get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
578}
579
295struct smb_version_operations smb21_operations = { 580struct smb_version_operations smb21_operations = {
581 .compare_fids = smb2_compare_fids,
296 .setup_request = smb2_setup_request, 582 .setup_request = smb2_setup_request,
297 .setup_async_request = smb2_setup_async_request, 583 .setup_async_request = smb2_setup_async_request,
298 .check_receive = smb2_check_receive, 584 .check_receive = smb2_check_receive,
@@ -301,13 +587,19 @@ struct smb_version_operations smb21_operations = {
301 .get_credits_field = smb2_get_credits_field, 587 .get_credits_field = smb2_get_credits_field,
302 .get_credits = smb2_get_credits, 588 .get_credits = smb2_get_credits,
303 .get_next_mid = smb2_get_next_mid, 589 .get_next_mid = smb2_get_next_mid,
590 .read_data_offset = smb2_read_data_offset,
591 .read_data_length = smb2_read_data_length,
592 .map_error = map_smb2_to_linux_error,
304 .find_mid = smb2_find_mid, 593 .find_mid = smb2_find_mid,
305 .check_message = smb2_check_message, 594 .check_message = smb2_check_message,
306 .dump_detail = smb2_dump_detail, 595 .dump_detail = smb2_dump_detail,
307 .clear_stats = smb2_clear_stats, 596 .clear_stats = smb2_clear_stats,
308 .print_stats = smb2_print_stats, 597 .print_stats = smb2_print_stats,
598 .is_oplock_break = smb2_is_valid_oplock_break,
309 .need_neg = smb2_need_neg, 599 .need_neg = smb2_need_neg,
310 .negotiate = smb2_negotiate, 600 .negotiate = smb2_negotiate,
601 .negotiate_wsize = smb2_negotiate_wsize,
602 .negotiate_rsize = smb2_negotiate_rsize,
311 .sess_setup = SMB2_sess_setup, 603 .sess_setup = SMB2_sess_setup,
312 .logoff = SMB2_logoff, 604 .logoff = SMB2_logoff,
313 .tree_connect = SMB2_tcon, 605 .tree_connect = SMB2_tcon,
@@ -317,16 +609,68 @@ struct smb_version_operations smb21_operations = {
317 .echo = SMB2_echo, 609 .echo = SMB2_echo,
318 .query_path_info = smb2_query_path_info, 610 .query_path_info = smb2_query_path_info,
319 .get_srv_inum = smb2_get_srv_inum, 611 .get_srv_inum = smb2_get_srv_inum,
612 .query_file_info = smb2_query_file_info,
613 .set_path_size = smb2_set_path_size,
614 .set_file_size = smb2_set_file_size,
615 .set_file_info = smb2_set_file_info,
320 .build_path_to_root = smb2_build_path_to_root, 616 .build_path_to_root = smb2_build_path_to_root,
321 .mkdir = smb2_mkdir, 617 .mkdir = smb2_mkdir,
322 .mkdir_setinfo = smb2_mkdir_setinfo, 618 .mkdir_setinfo = smb2_mkdir_setinfo,
323 .rmdir = smb2_rmdir, 619 .rmdir = smb2_rmdir,
620 .unlink = smb2_unlink,
621 .rename = smb2_rename_path,
622 .create_hardlink = smb2_create_hardlink,
623 .open = smb2_open_file,
624 .set_fid = smb2_set_fid,
625 .close = smb2_close_file,
626 .flush = smb2_flush_file,
627 .async_readv = smb2_async_readv,
628 .async_writev = smb2_async_writev,
629 .sync_read = smb2_sync_read,
630 .sync_write = smb2_sync_write,
631 .query_dir_first = smb2_query_dir_first,
632 .query_dir_next = smb2_query_dir_next,
633 .close_dir = smb2_close_dir,
634 .calc_smb_size = smb2_calc_size,
635 .is_status_pending = smb2_is_status_pending,
636 .oplock_response = smb2_oplock_response,
637 .queryfs = smb2_queryfs,
638 .mand_lock = smb2_mand_lock,
639 .mand_unlock_range = smb2_unlock_range,
640 .push_mand_locks = smb2_push_mandatory_locks,
641 .get_lease_key = smb2_get_lease_key,
642 .set_lease_key = smb2_set_lease_key,
643 .new_lease_key = smb2_new_lease_key,
324}; 644};
325 645
326struct smb_version_values smb21_values = { 646struct smb_version_values smb21_values = {
327 .version_string = SMB21_VERSION_STRING, 647 .version_string = SMB21_VERSION_STRING,
648 .protocol_id = SMB21_PROT_ID,
649 .req_capabilities = 0, /* MBZ on negotiate req until SMB3 dialect */
650 .large_lock_type = 0,
651 .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
652 .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
653 .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
654 .header_size = sizeof(struct smb2_hdr),
655 .max_header_size = MAX_SMB2_HDR_SIZE,
656 .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
657 .lock_cmd = SMB2_LOCK,
658 .cap_unix = 0,
659 .cap_nt_find = SMB2_NT_FIND,
660 .cap_large_files = SMB2_LARGE_FILES,
661};
662
663struct smb_version_values smb30_values = {
664 .version_string = SMB30_VERSION_STRING,
665 .protocol_id = SMB30_PROT_ID,
666 .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU,
667 .large_lock_type = 0,
668 .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
669 .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
670 .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
328 .header_size = sizeof(struct smb2_hdr), 671 .header_size = sizeof(struct smb2_hdr),
329 .max_header_size = MAX_SMB2_HDR_SIZE, 672 .max_header_size = MAX_SMB2_HDR_SIZE,
673 .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
330 .lock_cmd = SMB2_LOCK, 674 .lock_cmd = SMB2_LOCK,
331 .cap_unix = 0, 675 .cap_unix = 0,
332 .cap_nt_find = SMB2_NT_FIND, 676 .cap_nt_find = SMB2_NT_FIND,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 62b3f17d0613..cf33622cdac8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/smb2pdu.c 2 * fs/cifs/smb2pdu.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2009, 2011 4 * Copyright (C) International Business Machines Corp., 2009, 2012
5 * Etersoft, 2012 5 * Etersoft, 2012
6 * Author(s): Steve French (sfrench@us.ibm.com) 6 * Author(s): Steve French (sfrench@us.ibm.com)
7 * Pavel Shilovsky (pshilovsky@samba.org) 2012 7 * Pavel Shilovsky (pshilovsky@samba.org) 2012
@@ -31,7 +31,9 @@
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kernel.h> 32#include <linux/kernel.h>
33#include <linux/vfs.h> 33#include <linux/vfs.h>
34#include <linux/task_io_accounting_ops.h>
34#include <linux/uaccess.h> 35#include <linux/uaccess.h>
36#include <linux/pagemap.h>
35#include <linux/xattr.h> 37#include <linux/xattr.h>
36#include "smb2pdu.h" 38#include "smb2pdu.h"
37#include "cifsglob.h" 39#include "cifsglob.h"
@@ -42,6 +44,8 @@
42#include "cifs_debug.h" 44#include "cifs_debug.h"
43#include "ntlmssp.h" 45#include "ntlmssp.h"
44#include "smb2status.h" 46#include "smb2status.h"
47#include "smb2glob.h"
48#include "cifspdu.h"
45 49
46/* 50/*
47 * The following table defines the expected "StructureSize" of SMB2 requests 51 * The following table defines the expected "StructureSize" of SMB2 requests
@@ -115,9 +119,9 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
115 /* BB how does SMB2 do case sensitive? */ 119 /* BB how does SMB2 do case sensitive? */
116 /* if (tcon->nocase) 120 /* if (tcon->nocase)
117 hdr->Flags |= SMBFLG_CASELESS; */ 121 hdr->Flags |= SMBFLG_CASELESS; */
118 /* if (tcon->ses && tcon->ses->server && 122 if (tcon->ses && tcon->ses->server &&
119 (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED)) 123 (tcon->ses->server->sec_mode & SECMODE_SIGN_REQUIRED))
120 hdr->Flags |= SMB2_FLAGS_SIGNED; */ 124 hdr->Flags |= SMB2_FLAGS_SIGNED;
121out: 125out:
122 pdu->StructureSize2 = cpu_to_le16(parmsize); 126 pdu->StructureSize2 = cpu_to_le16(parmsize);
123 return; 127 return;
@@ -300,24 +304,6 @@ free_rsp_buf(int resp_buftype, void *rsp)
300 cifs_buf_release(rsp); 304 cifs_buf_release(rsp);
301} 305}
302 306
303#define SMB2_NUM_PROT 1
304
305#define SMB2_PROT 0
306#define SMB21_PROT 1
307#define BAD_PROT 0xFFFF
308
309#define SMB2_PROT_ID 0x0202
310#define SMB21_PROT_ID 0x0210
311#define BAD_PROT_ID 0xFFFF
312
313static struct {
314 int index;
315 __le16 name;
316} smb2protocols[] = {
317 {SMB2_PROT, cpu_to_le16(SMB2_PROT_ID)},
318 {SMB21_PROT, cpu_to_le16(SMB21_PROT_ID)},
319 {BAD_PROT, cpu_to_le16(BAD_PROT_ID)}
320};
321 307
322/* 308/*
323 * 309 *
@@ -344,7 +330,6 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
344 int resp_buftype; 330 int resp_buftype;
345 struct TCP_Server_Info *server; 331 struct TCP_Server_Info *server;
346 unsigned int sec_flags; 332 unsigned int sec_flags;
347 u16 i;
348 u16 temp = 0; 333 u16 temp = 0;
349 int blob_offset, blob_length; 334 int blob_offset, blob_length;
350 char *security_blob; 335 char *security_blob;
@@ -373,11 +358,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
373 358
374 req->hdr.SessionId = 0; 359 req->hdr.SessionId = 0;
375 360
376 for (i = 0; i < SMB2_NUM_PROT; i++) 361 req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
377 req->Dialects[i] = smb2protocols[i].name;
378 362
379 req->DialectCount = cpu_to_le16(i); 363 req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */
380 inc_rfc1001_len(req, i * 2); 364 inc_rfc1001_len(req, 2);
381 365
382 /* only one of SMB2 signing flags may be set in SMB2 request */ 366 /* only one of SMB2 signing flags may be set in SMB2 request */
383 if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) 367 if ((sec_flags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN)
@@ -387,7 +371,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
387 371
388 req->SecurityMode = cpu_to_le16(temp); 372 req->SecurityMode = cpu_to_le16(temp);
389 373
390 req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); 374 req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities);
375
376 memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE);
391 377
392 iov[0].iov_base = (char *)req; 378 iov[0].iov_base = (char *)req;
393 /* 4 for rfc1002 length field */ 379 /* 4 for rfc1002 length field */
@@ -403,17 +389,16 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
403 if (rc != 0) 389 if (rc != 0)
404 goto neg_exit; 390 goto neg_exit;
405 391
406 if (rsp == NULL) {
407 rc = -EIO;
408 goto neg_exit;
409 }
410
411 cFYI(1, "mode 0x%x", rsp->SecurityMode); 392 cFYI(1, "mode 0x%x", rsp->SecurityMode);
412 393
413 if (rsp->DialectRevision == smb2protocols[SMB21_PROT].name) 394 /* BB we may eventually want to match the negotiated vs. requested
395 dialect, even though we are only requesting one at a time */
396 if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
397 cFYI(1, "negotiated smb2.0 dialect");
398 else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
414 cFYI(1, "negotiated smb2.1 dialect"); 399 cFYI(1, "negotiated smb2.1 dialect");
415 else if (rsp->DialectRevision == smb2protocols[SMB2_PROT].name) 400 else if (rsp->DialectRevision == cpu_to_le16(SMB30_PROT_ID))
416 cFYI(1, "negotiated smb2 dialect"); 401 cFYI(1, "negotiated smb3.0 dialect");
417 else { 402 else {
418 cERROR(1, "Illegal dialect returned by server %d", 403 cERROR(1, "Illegal dialect returned by server %d",
419 le16_to_cpu(rsp->DialectRevision)); 404 le16_to_cpu(rsp->DialectRevision));
@@ -438,6 +423,38 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
438 rc = -EIO; 423 rc = -EIO;
439 goto neg_exit; 424 goto neg_exit;
440 } 425 }
426
427 cFYI(1, "sec_flags 0x%x", sec_flags);
428 if (sec_flags & CIFSSEC_MUST_SIGN) {
429 cFYI(1, "Signing required");
430 if (!(server->sec_mode & (SMB2_NEGOTIATE_SIGNING_REQUIRED |
431 SMB2_NEGOTIATE_SIGNING_ENABLED))) {
432 cERROR(1, "signing required but server lacks support");
433 rc = -EOPNOTSUPP;
434 goto neg_exit;
435 }
436 server->sec_mode |= SECMODE_SIGN_REQUIRED;
437 } else if (sec_flags & CIFSSEC_MAY_SIGN) {
438 cFYI(1, "Signing optional");
439 if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
440 cFYI(1, "Server requires signing");
441 server->sec_mode |= SECMODE_SIGN_REQUIRED;
442 } else {
443 server->sec_mode &=
444 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
445 }
446 } else {
447 cFYI(1, "Signing disabled");
448 if (server->sec_mode & SMB2_NEGOTIATE_SIGNING_REQUIRED) {
449 cERROR(1, "Server requires packet signing to be enabled"
450 " in /proc/fs/cifs/SecurityFlags.");
451 rc = -EOPNOTSUPP;
452 goto neg_exit;
453 }
454 server->sec_mode &=
455 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
456 }
457
441#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */ 458#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */
442 rc = decode_neg_token_init(security_blob, blob_length, 459 rc = decode_neg_token_init(security_blob, blob_length,
443 &server->sec_type); 460 &server->sec_type);
@@ -599,13 +616,14 @@ ssetup_ntlmssp_authenticate:
599 616
600 kfree(security_blob); 617 kfree(security_blob);
601 rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base; 618 rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
602 if (rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) { 619 if (resp_buftype != CIFS_NO_BUFFER &&
620 rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
603 if (phase != NtLmNegotiate) { 621 if (phase != NtLmNegotiate) {
604 cERROR(1, "Unexpected more processing error"); 622 cERROR(1, "Unexpected more processing error");
605 goto ssetup_exit; 623 goto ssetup_exit;
606 } 624 }
607 if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 != 625 if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
608 le16_to_cpu(rsp->SecurityBufferOffset)) { 626 le16_to_cpu(rsp->SecurityBufferOffset)) {
609 cERROR(1, "Invalid security buffer offset %d", 627 cERROR(1, "Invalid security buffer offset %d",
610 le16_to_cpu(rsp->SecurityBufferOffset)); 628 le16_to_cpu(rsp->SecurityBufferOffset));
611 rc = -EIO; 629 rc = -EIO;
@@ -631,11 +649,6 @@ ssetup_ntlmssp_authenticate:
631 if (rc != 0) 649 if (rc != 0)
632 goto ssetup_exit; 650 goto ssetup_exit;
633 651
634 if (rsp == NULL) {
635 rc = -EIO;
636 goto ssetup_exit;
637 }
638
639 ses->session_flags = le16_to_cpu(rsp->SessionFlags); 652 ses->session_flags = le16_to_cpu(rsp->SessionFlags);
640ssetup_exit: 653ssetup_exit:
641 free_rsp_buf(resp_buftype, rsp); 654 free_rsp_buf(resp_buftype, rsp);
@@ -666,6 +679,8 @@ SMB2_logoff(const unsigned int xid, struct cifs_ses *ses)
666 679
667 /* since no tcon, smb2_init can not do this, so do here */ 680 /* since no tcon, smb2_init can not do this, so do here */
668 req->hdr.SessionId = ses->Suid; 681 req->hdr.SessionId = ses->Suid;
682 if (server->sec_mode & SECMODE_SIGN_REQUIRED)
683 req->hdr.Flags |= SMB2_FLAGS_SIGNED;
669 684
670 rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0); 685 rc = SendReceiveNoRsp(xid, ses, (char *) &req->hdr, 0);
671 /* 686 /*
@@ -753,11 +768,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
753 goto tcon_error_exit; 768 goto tcon_error_exit;
754 } 769 }
755 770
756 if (rsp == NULL) {
757 rc = -EIO;
758 goto tcon_exit;
759 }
760
761 if (tcon == NULL) { 771 if (tcon == NULL) {
762 ses->ipc_tid = rsp->hdr.TreeId; 772 ses->ipc_tid = rsp->hdr.TreeId;
763 goto tcon_exit; 773 goto tcon_exit;
@@ -830,18 +840,87 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
830 return rc; 840 return rc;
831} 841}
832 842
843static struct create_lease *
844create_lease_buf(u8 *lease_key, u8 oplock)
845{
846 struct create_lease *buf;
847
848 buf = kmalloc(sizeof(struct create_lease), GFP_KERNEL);
849 if (!buf)
850 return NULL;
851
852 memset(buf, 0, sizeof(struct create_lease));
853
854 buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
855 buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
856 if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
857 buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING |
858 SMB2_LEASE_READ_CACHING;
859 else if (oplock == SMB2_OPLOCK_LEVEL_II)
860 buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING;
861 else if (oplock == SMB2_OPLOCK_LEVEL_BATCH)
862 buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING |
863 SMB2_LEASE_READ_CACHING |
864 SMB2_LEASE_WRITE_CACHING;
865
866 buf->ccontext.DataOffset = cpu_to_le16(offsetof
867 (struct create_lease, lcontext));
868 buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
869 buf->ccontext.NameOffset = cpu_to_le16(offsetof
870 (struct create_lease, Name));
871 buf->ccontext.NameLength = cpu_to_le16(4);
872 buf->Name[0] = 'R';
873 buf->Name[1] = 'q';
874 buf->Name[2] = 'L';
875 buf->Name[3] = 's';
876 return buf;
877}
878
879static __u8
880parse_lease_state(struct smb2_create_rsp *rsp)
881{
882 char *data_offset;
883 struct create_lease *lc;
884 bool found = false;
885
886 data_offset = (char *)rsp;
887 data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset);
888 lc = (struct create_lease *)data_offset;
889 do {
890 char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc;
891 if (le16_to_cpu(lc->ccontext.NameLength) != 4 ||
892 strncmp(name, "RqLs", 4)) {
893 lc = (struct create_lease *)((char *)lc
894 + le32_to_cpu(lc->ccontext.Next));
895 continue;
896 }
897 if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
898 return SMB2_OPLOCK_LEVEL_NOCHANGE;
899 found = true;
900 break;
901 } while (le32_to_cpu(lc->ccontext.Next) != 0);
902
903 if (!found)
904 return 0;
905
906 return smb2_map_lease_to_oplock(lc->lcontext.LeaseState);
907}
908
833int 909int
834SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, 910SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
835 u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, 911 u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access,
836 __u32 create_disposition, __u32 file_attributes, __u32 create_options) 912 __u32 create_disposition, __u32 file_attributes, __u32 create_options,
913 __u8 *oplock, struct smb2_file_all_info *buf)
837{ 914{
838 struct smb2_create_req *req; 915 struct smb2_create_req *req;
839 struct smb2_create_rsp *rsp; 916 struct smb2_create_rsp *rsp;
840 struct TCP_Server_Info *server; 917 struct TCP_Server_Info *server;
841 struct cifs_ses *ses = tcon->ses; 918 struct cifs_ses *ses = tcon->ses;
842 struct kvec iov[2]; 919 struct kvec iov[3];
843 int resp_buftype; 920 int resp_buftype;
844 int uni_path_len; 921 int uni_path_len;
922 __le16 *copy_path = NULL;
923 int copy_size;
845 int rc = 0; 924 int rc = 0;
846 int num_iovecs = 2; 925 int num_iovecs = 2;
847 926
@@ -856,10 +935,6 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
856 if (rc) 935 if (rc)
857 return rc; 936 return rc;
858 937
859 if (enable_oplocks)
860 req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_BATCH;
861 else
862 req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_NONE;
863 req->ImpersonationLevel = IL_IMPERSONATION; 938 req->ImpersonationLevel = IL_IMPERSONATION;
864 req->DesiredAccess = cpu_to_le32(desired_access); 939 req->DesiredAccess = cpu_to_le32(desired_access);
865 /* File attributes ignored on open (used in create though) */ 940 /* File attributes ignored on open (used in create though) */
@@ -869,7 +944,7 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
869 req->CreateOptions = cpu_to_le32(create_options); 944 req->CreateOptions = cpu_to_le32(create_options);
870 uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; 945 uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
871 req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) 946 req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req)
872 - 1 /* pad */ - 4 /* do not count rfc1001 len field */); 947 - 8 /* pad */ - 4 /* do not count rfc1001 len field */);
873 948
874 iov[0].iov_base = (char *)req; 949 iov[0].iov_base = (char *)req;
875 /* 4 for rfc1002 length field */ 950 /* 4 for rfc1002 length field */
@@ -880,6 +955,20 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
880 req->NameLength = cpu_to_le16(uni_path_len - 2); 955 req->NameLength = cpu_to_le16(uni_path_len - 2);
881 /* -1 since last byte is buf[0] which is sent below (path) */ 956 /* -1 since last byte is buf[0] which is sent below (path) */
882 iov[0].iov_len--; 957 iov[0].iov_len--;
958 if (uni_path_len % 8 != 0) {
959 copy_size = uni_path_len / 8 * 8;
960 if (copy_size < uni_path_len)
961 copy_size += 8;
962
963 copy_path = kzalloc(copy_size, GFP_KERNEL);
964 if (!copy_path)
965 return -ENOMEM;
966 memcpy((char *)copy_path, (const char *)path,
967 uni_path_len);
968 uni_path_len = copy_size;
969 path = copy_path;
970 }
971
883 iov[1].iov_len = uni_path_len; 972 iov[1].iov_len = uni_path_len;
884 iov[1].iov_base = path; 973 iov[1].iov_base = path;
885 /* 974 /*
@@ -888,10 +977,37 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
888 */ 977 */
889 inc_rfc1001_len(req, uni_path_len - 1); 978 inc_rfc1001_len(req, uni_path_len - 1);
890 } else { 979 } else {
980 iov[0].iov_len += 7;
981 req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu(
982 req->hdr.smb2_buf_length) + 8 - 1);
891 num_iovecs = 1; 983 num_iovecs = 1;
892 req->NameLength = 0; 984 req->NameLength = 0;
893 } 985 }
894 986
987 if (!server->oplocks)
988 *oplock = SMB2_OPLOCK_LEVEL_NONE;
989
990 if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
991 *oplock == SMB2_OPLOCK_LEVEL_NONE)
992 req->RequestedOplockLevel = *oplock;
993 else {
994 iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock);
995 if (iov[num_iovecs].iov_base == NULL) {
996 cifs_small_buf_release(req);
997 kfree(copy_path);
998 return -ENOMEM;
999 }
1000 iov[num_iovecs].iov_len = sizeof(struct create_lease);
1001 req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
1002 req->CreateContextsOffset = cpu_to_le32(
1003 sizeof(struct smb2_create_req) - 4 - 8 +
1004 iov[num_iovecs-1].iov_len);
1005 req->CreateContextsLength = cpu_to_le32(
1006 sizeof(struct create_lease));
1007 inc_rfc1001_len(&req->hdr, sizeof(struct create_lease));
1008 num_iovecs++;
1009 }
1010
895 rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); 1011 rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0);
896 rsp = (struct smb2_create_rsp *)iov[0].iov_base; 1012 rsp = (struct smb2_create_rsp *)iov[0].iov_base;
897 1013
@@ -900,13 +1016,24 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path,
900 goto creat_exit; 1016 goto creat_exit;
901 } 1017 }
902 1018
903 if (rsp == NULL) {
904 rc = -EIO;
905 goto creat_exit;
906 }
907 *persistent_fid = rsp->PersistentFileId; 1019 *persistent_fid = rsp->PersistentFileId;
908 *volatile_fid = rsp->VolatileFileId; 1020 *volatile_fid = rsp->VolatileFileId;
1021
1022 if (buf) {
1023 memcpy(buf, &rsp->CreationTime, 32);
1024 buf->AllocationSize = rsp->AllocationSize;
1025 buf->EndOfFile = rsp->EndofFile;
1026 buf->Attributes = rsp->FileAttributes;
1027 buf->NumberOfLinks = cpu_to_le32(1);
1028 buf->DeletePending = 0;
1029 }
1030
1031 if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
1032 *oplock = parse_lease_state(rsp);
1033 else
1034 *oplock = rsp->OplockLevel;
909creat_exit: 1035creat_exit:
1036 kfree(copy_path);
910 free_rsp_buf(resp_buftype, rsp); 1037 free_rsp_buf(resp_buftype, rsp);
911 return rc; 1038 return rc;
912} 1039}
@@ -950,11 +1077,6 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
950 goto close_exit; 1077 goto close_exit;
951 } 1078 }
952 1079
953 if (rsp == NULL) {
954 rc = -EIO;
955 goto close_exit;
956 }
957
958 /* BB FIXME - decode close response, update inode for caching */ 1080 /* BB FIXME - decode close response, update inode for caching */
959 1081
960close_exit: 1082close_exit:
@@ -1019,10 +1141,10 @@ validate_and_copy_buf(unsigned int offset, unsigned int buffer_length,
1019 return 0; 1141 return 0;
1020} 1142}
1021 1143
1022int 1144static int
1023SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, 1145query_info(const unsigned int xid, struct cifs_tcon *tcon,
1024 u64 persistent_fid, u64 volatile_fid, 1146 u64 persistent_fid, u64 volatile_fid, u8 info_class,
1025 struct smb2_file_all_info *data) 1147 size_t output_len, size_t min_len, void *data)
1026{ 1148{
1027 struct smb2_query_info_req *req; 1149 struct smb2_query_info_req *req;
1028 struct smb2_query_info_rsp *rsp = NULL; 1150 struct smb2_query_info_rsp *rsp = NULL;
@@ -1044,37 +1166,56 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
1044 return rc; 1166 return rc;
1045 1167
1046 req->InfoType = SMB2_O_INFO_FILE; 1168 req->InfoType = SMB2_O_INFO_FILE;
1047 req->FileInfoClass = FILE_ALL_INFORMATION; 1169 req->FileInfoClass = info_class;
1048 req->PersistentFileId = persistent_fid; 1170 req->PersistentFileId = persistent_fid;
1049 req->VolatileFileId = volatile_fid; 1171 req->VolatileFileId = volatile_fid;
1050 /* 4 for rfc1002 length field and 1 for Buffer */ 1172 /* 4 for rfc1002 length field and 1 for Buffer */
1051 req->InputBufferOffset = 1173 req->InputBufferOffset =
1052 cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4); 1174 cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
1053 req->OutputBufferLength = 1175 req->OutputBufferLength = cpu_to_le32(output_len);
1054 cpu_to_le32(sizeof(struct smb2_file_all_info) + MAX_NAME * 2);
1055 1176
1056 iov[0].iov_base = (char *)req; 1177 iov[0].iov_base = (char *)req;
1057 /* 4 for rfc1002 length field */ 1178 /* 4 for rfc1002 length field */
1058 iov[0].iov_len = get_rfc1002_length(req) + 4; 1179 iov[0].iov_len = get_rfc1002_length(req) + 4;
1059 1180
1060 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); 1181 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
1182 rsp = (struct smb2_query_info_rsp *)iov[0].iov_base;
1183
1061 if (rc) { 1184 if (rc) {
1062 cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); 1185 cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
1063 goto qinf_exit; 1186 goto qinf_exit;
1064 } 1187 }
1065 1188
1066 rsp = (struct smb2_query_info_rsp *)iov[0].iov_base;
1067
1068 rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset), 1189 rc = validate_and_copy_buf(le16_to_cpu(rsp->OutputBufferOffset),
1069 le32_to_cpu(rsp->OutputBufferLength), 1190 le32_to_cpu(rsp->OutputBufferLength),
1070 &rsp->hdr, sizeof(struct smb2_file_all_info), 1191 &rsp->hdr, min_len, data);
1071 (char *)data);
1072 1192
1073qinf_exit: 1193qinf_exit:
1074 free_rsp_buf(resp_buftype, rsp); 1194 free_rsp_buf(resp_buftype, rsp);
1075 return rc; 1195 return rc;
1076} 1196}
1077 1197
1198int
1199SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
1200 u64 persistent_fid, u64 volatile_fid,
1201 struct smb2_file_all_info *data)
1202{
1203 return query_info(xid, tcon, persistent_fid, volatile_fid,
1204 FILE_ALL_INFORMATION,
1205 sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
1206 sizeof(struct smb2_file_all_info), data);
1207}
1208
1209int
1210SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
1211 u64 persistent_fid, u64 volatile_fid, __le64 *uniqueid)
1212{
1213 return query_info(xid, tcon, persistent_fid, volatile_fid,
1214 FILE_INTERNAL_INFORMATION,
1215 sizeof(struct smb2_file_internal_info),
1216 sizeof(struct smb2_file_internal_info), uniqueid);
1217}
1218
1078/* 1219/*
1079 * This is a no-op for now. We're not really interested in the reply, but 1220 * This is a no-op for now. We're not really interested in the reply, but
1080 * rather in the fact that the server sent one and that server->lstrp 1221 * rather in the fact that the server sent one and that server->lstrp
@@ -1102,6 +1243,8 @@ SMB2_echo(struct TCP_Server_Info *server)
1102 struct smb2_echo_req *req; 1243 struct smb2_echo_req *req;
1103 int rc = 0; 1244 int rc = 0;
1104 struct kvec iov; 1245 struct kvec iov;
1246 struct smb_rqst rqst = { .rq_iov = &iov,
1247 .rq_nvec = 1 };
1105 1248
1106 cFYI(1, "In echo request"); 1249 cFYI(1, "In echo request");
1107 1250
@@ -1115,7 +1258,7 @@ SMB2_echo(struct TCP_Server_Info *server)
1115 /* 4 for rfc1002 length field */ 1258 /* 4 for rfc1002 length field */
1116 iov.iov_len = get_rfc1002_length(req) + 4; 1259 iov.iov_len = get_rfc1002_length(req) + 4;
1117 1260
1118 rc = cifs_call_async(server, &iov, 1, NULL, smb2_echo_callback, server, 1261 rc = cifs_call_async(server, &rqst, NULL, smb2_echo_callback, server,
1119 CIFS_ECHO_OP); 1262 CIFS_ECHO_OP);
1120 if (rc) 1263 if (rc)
1121 cFYI(1, "Echo request failed: %d", rc); 1264 cFYI(1, "Echo request failed: %d", rc);
@@ -1123,3 +1266,945 @@ SMB2_echo(struct TCP_Server_Info *server)
1123 cifs_small_buf_release(req); 1266 cifs_small_buf_release(req);
1124 return rc; 1267 return rc;
1125} 1268}
1269
1270int
1271SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1272 u64 volatile_fid)
1273{
1274 struct smb2_flush_req *req;
1275 struct TCP_Server_Info *server;
1276 struct cifs_ses *ses = tcon->ses;
1277 struct kvec iov[1];
1278 int resp_buftype;
1279 int rc = 0;
1280
1281 cFYI(1, "Flush");
1282
1283 if (ses && (ses->server))
1284 server = ses->server;
1285 else
1286 return -EIO;
1287
1288 rc = small_smb2_init(SMB2_FLUSH, tcon, (void **) &req);
1289 if (rc)
1290 return rc;
1291
1292 req->PersistentFileId = persistent_fid;
1293 req->VolatileFileId = volatile_fid;
1294
1295 iov[0].iov_base = (char *)req;
1296 /* 4 for rfc1002 length field */
1297 iov[0].iov_len = get_rfc1002_length(req) + 4;
1298
1299 rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
1300
1301 if ((rc != 0) && tcon)
1302 cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
1303
1304 free_rsp_buf(resp_buftype, iov[0].iov_base);
1305 return rc;
1306}
1307
1308/*
1309 * To form a chain of read requests, any read requests after the first should
1310 * have the end_of_chain boolean set to true.
1311 */
1312static int
1313smb2_new_read_req(struct kvec *iov, struct cifs_io_parms *io_parms,
1314 unsigned int remaining_bytes, int request_type)
1315{
1316 int rc = -EACCES;
1317 struct smb2_read_req *req = NULL;
1318
1319 rc = small_smb2_init(SMB2_READ, io_parms->tcon, (void **) &req);
1320 if (rc)
1321 return rc;
1322 if (io_parms->tcon->ses->server == NULL)
1323 return -ECONNABORTED;
1324
1325 req->hdr.ProcessId = cpu_to_le32(io_parms->pid);
1326
1327 req->PersistentFileId = io_parms->persistent_fid;
1328 req->VolatileFileId = io_parms->volatile_fid;
1329 req->ReadChannelInfoOffset = 0; /* reserved */
1330 req->ReadChannelInfoLength = 0; /* reserved */
1331 req->Channel = 0; /* reserved */
1332 req->MinimumCount = 0;
1333 req->Length = cpu_to_le32(io_parms->length);
1334 req->Offset = cpu_to_le64(io_parms->offset);
1335
1336 if (request_type & CHAINED_REQUEST) {
1337 if (!(request_type & END_OF_CHAIN)) {
1338 /* 4 for rfc1002 length field */
1339 req->hdr.NextCommand =
1340 cpu_to_le32(get_rfc1002_length(req) + 4);
1341 } else /* END_OF_CHAIN */
1342 req->hdr.NextCommand = 0;
1343 if (request_type & RELATED_REQUEST) {
1344 req->hdr.Flags |= SMB2_FLAGS_RELATED_OPERATIONS;
1345 /*
1346 * Related requests use info from previous read request
1347 * in chain.
1348 */
1349 req->hdr.SessionId = 0xFFFFFFFF;
1350 req->hdr.TreeId = 0xFFFFFFFF;
1351 req->PersistentFileId = 0xFFFFFFFF;
1352 req->VolatileFileId = 0xFFFFFFFF;
1353 }
1354 }
1355 if (remaining_bytes > io_parms->length)
1356 req->RemainingBytes = cpu_to_le32(remaining_bytes);
1357 else
1358 req->RemainingBytes = 0;
1359
1360 iov[0].iov_base = (char *)req;
1361 /* 4 for rfc1002 length field */
1362 iov[0].iov_len = get_rfc1002_length(req) + 4;
1363 return rc;
1364}
1365
1366static void
1367smb2_readv_callback(struct mid_q_entry *mid)
1368{
1369 struct cifs_readdata *rdata = mid->callback_data;
1370 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
1371 struct TCP_Server_Info *server = tcon->ses->server;
1372 struct smb2_hdr *buf = (struct smb2_hdr *)rdata->iov.iov_base;
1373 unsigned int credits_received = 1;
1374 struct smb_rqst rqst = { .rq_iov = &rdata->iov,
1375 .rq_nvec = 1,
1376 .rq_pages = rdata->pages,
1377 .rq_npages = rdata->nr_pages,
1378 .rq_pagesz = rdata->pagesz,
1379 .rq_tailsz = rdata->tailsz };
1380
1381 cFYI(1, "%s: mid=%llu state=%d result=%d bytes=%u", __func__,
1382 mid->mid, mid->mid_state, rdata->result, rdata->bytes);
1383
1384 switch (mid->mid_state) {
1385 case MID_RESPONSE_RECEIVED:
1386 credits_received = le16_to_cpu(buf->CreditRequest);
1387 /* result already set, check signature */
1388 if (server->sec_mode &
1389 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1390 int rc;
1391
1392 rc = smb2_verify_signature(&rqst, server);
1393 if (rc)
1394 cERROR(1, "SMB signature verification returned "
1395 "error = %d", rc);
1396 }
1397 /* FIXME: should this be counted toward the initiating task? */
1398 task_io_account_read(rdata->bytes);
1399 cifs_stats_bytes_read(tcon, rdata->bytes);
1400 break;
1401 case MID_REQUEST_SUBMITTED:
1402 case MID_RETRY_NEEDED:
1403 rdata->result = -EAGAIN;
1404 break;
1405 default:
1406 if (rdata->result != -ENODATA)
1407 rdata->result = -EIO;
1408 }
1409
1410 if (rdata->result)
1411 cifs_stats_fail_inc(tcon, SMB2_READ_HE);
1412
1413 queue_work(cifsiod_wq, &rdata->work);
1414 DeleteMidQEntry(mid);
1415 add_credits(server, credits_received, 0);
1416}
1417
1418/* smb2_async_readv - send an async write, and set up mid to handle result */
1419int
1420smb2_async_readv(struct cifs_readdata *rdata)
1421{
1422 int rc;
1423 struct smb2_hdr *buf;
1424 struct cifs_io_parms io_parms;
1425 struct smb_rqst rqst = { .rq_iov = &rdata->iov,
1426 .rq_nvec = 1 };
1427
1428 cFYI(1, "%s: offset=%llu bytes=%u", __func__,
1429 rdata->offset, rdata->bytes);
1430
1431 io_parms.tcon = tlink_tcon(rdata->cfile->tlink);
1432 io_parms.offset = rdata->offset;
1433 io_parms.length = rdata->bytes;
1434 io_parms.persistent_fid = rdata->cfile->fid.persistent_fid;
1435 io_parms.volatile_fid = rdata->cfile->fid.volatile_fid;
1436 io_parms.pid = rdata->pid;
1437 rc = smb2_new_read_req(&rdata->iov, &io_parms, 0, 0);
1438 if (rc)
1439 return rc;
1440
1441 buf = (struct smb2_hdr *)rdata->iov.iov_base;
1442 /* 4 for rfc1002 length field */
1443 rdata->iov.iov_len = get_rfc1002_length(rdata->iov.iov_base) + 4;
1444
1445 kref_get(&rdata->refcount);
1446 rc = cifs_call_async(io_parms.tcon->ses->server, &rqst,
1447 cifs_readv_receive, smb2_readv_callback,
1448 rdata, 0);
1449 if (rc) {
1450 kref_put(&rdata->refcount, cifs_readdata_release);
1451 cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
1452 }
1453
1454 cifs_small_buf_release(buf);
1455 return rc;
1456}
1457
1458int
1459SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
1460 unsigned int *nbytes, char **buf, int *buf_type)
1461{
1462 int resp_buftype, rc = -EACCES;
1463 struct smb2_read_rsp *rsp = NULL;
1464 struct kvec iov[1];
1465
1466 *nbytes = 0;
1467 rc = smb2_new_read_req(iov, io_parms, 0, 0);
1468 if (rc)
1469 return rc;
1470
1471 rc = SendReceive2(xid, io_parms->tcon->ses, iov, 1,
1472 &resp_buftype, CIFS_LOG_ERROR);
1473
1474 rsp = (struct smb2_read_rsp *)iov[0].iov_base;
1475
1476 if (rsp->hdr.Status == STATUS_END_OF_FILE) {
1477 free_rsp_buf(resp_buftype, iov[0].iov_base);
1478 return 0;
1479 }
1480
1481 if (rc) {
1482 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
1483 cERROR(1, "Send error in read = %d", rc);
1484 } else {
1485 *nbytes = le32_to_cpu(rsp->DataLength);
1486 if ((*nbytes > CIFS_MAX_MSGSIZE) ||
1487 (*nbytes > io_parms->length)) {
1488 cFYI(1, "bad length %d for count %d", *nbytes,
1489 io_parms->length);
1490 rc = -EIO;
1491 *nbytes = 0;
1492 }
1493 }
1494
1495 if (*buf) {
1496 memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
1497 *nbytes);
1498 free_rsp_buf(resp_buftype, iov[0].iov_base);
1499 } else if (resp_buftype != CIFS_NO_BUFFER) {
1500 *buf = iov[0].iov_base;
1501 if (resp_buftype == CIFS_SMALL_BUFFER)
1502 *buf_type = CIFS_SMALL_BUFFER;
1503 else if (resp_buftype == CIFS_LARGE_BUFFER)
1504 *buf_type = CIFS_LARGE_BUFFER;
1505 }
1506 return rc;
1507}
1508
1509/*
1510 * Check the mid_state and signature on received buffer (if any), and queue the
1511 * workqueue completion task.
1512 */
1513static void
1514smb2_writev_callback(struct mid_q_entry *mid)
1515{
1516 struct cifs_writedata *wdata = mid->callback_data;
1517 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1518 unsigned int written;
1519 struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
1520 unsigned int credits_received = 1;
1521
1522 switch (mid->mid_state) {
1523 case MID_RESPONSE_RECEIVED:
1524 credits_received = le16_to_cpu(rsp->hdr.CreditRequest);
1525 wdata->result = smb2_check_receive(mid, tcon->ses->server, 0);
1526 if (wdata->result != 0)
1527 break;
1528
1529 written = le32_to_cpu(rsp->DataLength);
1530 /*
1531 * Mask off high 16 bits when bytes written as returned
1532 * by the server is greater than bytes requested by the
1533 * client. OS/2 servers are known to set incorrect
1534 * CountHigh values.
1535 */
1536 if (written > wdata->bytes)
1537 written &= 0xFFFF;
1538
1539 if (written < wdata->bytes)
1540 wdata->result = -ENOSPC;
1541 else
1542 wdata->bytes = written;
1543 break;
1544 case MID_REQUEST_SUBMITTED:
1545 case MID_RETRY_NEEDED:
1546 wdata->result = -EAGAIN;
1547 break;
1548 default:
1549 wdata->result = -EIO;
1550 break;
1551 }
1552
1553 if (wdata->result)
1554 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
1555
1556 queue_work(cifsiod_wq, &wdata->work);
1557 DeleteMidQEntry(mid);
1558 add_credits(tcon->ses->server, credits_received, 0);
1559}
1560
1561/* smb2_async_writev - send an async write, and set up mid to handle result */
1562int
1563smb2_async_writev(struct cifs_writedata *wdata)
1564{
1565 int rc = -EACCES;
1566 struct smb2_write_req *req = NULL;
1567 struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
1568 struct kvec iov;
1569 struct smb_rqst rqst;
1570
1571 rc = small_smb2_init(SMB2_WRITE, tcon, (void **) &req);
1572 if (rc)
1573 goto async_writev_out;
1574
1575 req->hdr.ProcessId = cpu_to_le32(wdata->cfile->pid);
1576
1577 req->PersistentFileId = wdata->cfile->fid.persistent_fid;
1578 req->VolatileFileId = wdata->cfile->fid.volatile_fid;
1579 req->WriteChannelInfoOffset = 0;
1580 req->WriteChannelInfoLength = 0;
1581 req->Channel = 0;
1582 req->Offset = cpu_to_le64(wdata->offset);
1583 /* 4 for rfc1002 length field */
1584 req->DataOffset = cpu_to_le16(
1585 offsetof(struct smb2_write_req, Buffer) - 4);
1586 req->RemainingBytes = 0;
1587
1588 /* 4 for rfc1002 length field and 1 for Buffer */
1589 iov.iov_len = get_rfc1002_length(req) + 4 - 1;
1590 iov.iov_base = req;
1591
1592 rqst.rq_iov = &iov;
1593 rqst.rq_nvec = 1;
1594 rqst.rq_pages = wdata->pages;
1595 rqst.rq_npages = wdata->nr_pages;
1596 rqst.rq_pagesz = wdata->pagesz;
1597 rqst.rq_tailsz = wdata->tailsz;
1598
1599 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
1600
1601 req->Length = cpu_to_le32(wdata->bytes);
1602
1603 inc_rfc1001_len(&req->hdr, wdata->bytes - 1 /* Buffer */);
1604
1605 kref_get(&wdata->refcount);
1606 rc = cifs_call_async(tcon->ses->server, &rqst, NULL,
1607 smb2_writev_callback, wdata, 0);
1608
1609 if (rc) {
1610 kref_put(&wdata->refcount, cifs_writedata_release);
1611 cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
1612 }
1613
1614async_writev_out:
1615 cifs_small_buf_release(req);
1616 return rc;
1617}
1618
1619/*
1620 * SMB2_write function gets iov pointer to kvec array with n_vec as a length.
1621 * The length field from io_parms must be at least 1 and indicates a number of
1622 * elements with data to write that begins with position 1 in iov array. All
1623 * data length is specified by count.
1624 */
1625int
1626SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
1627 unsigned int *nbytes, struct kvec *iov, int n_vec)
1628{
1629 int rc = 0;
1630 struct smb2_write_req *req = NULL;
1631 struct smb2_write_rsp *rsp = NULL;
1632 int resp_buftype;
1633 *nbytes = 0;
1634
1635 if (n_vec < 1)
1636 return rc;
1637
1638 rc = small_smb2_init(SMB2_WRITE, io_parms->tcon, (void **) &req);
1639 if (rc)
1640 return rc;
1641
1642 if (io_parms->tcon->ses->server == NULL)
1643 return -ECONNABORTED;
1644
1645 req->hdr.ProcessId = cpu_to_le32(io_parms->pid);
1646
1647 req->PersistentFileId = io_parms->persistent_fid;
1648 req->VolatileFileId = io_parms->volatile_fid;
1649 req->WriteChannelInfoOffset = 0;
1650 req->WriteChannelInfoLength = 0;
1651 req->Channel = 0;
1652 req->Length = cpu_to_le32(io_parms->length);
1653 req->Offset = cpu_to_le64(io_parms->offset);
1654 /* 4 for rfc1002 length field */
1655 req->DataOffset = cpu_to_le16(
1656 offsetof(struct smb2_write_req, Buffer) - 4);
1657 req->RemainingBytes = 0;
1658
1659 iov[0].iov_base = (char *)req;
1660 /* 4 for rfc1002 length field and 1 for Buffer */
1661 iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
1662
1663 /* length of entire message including data to be written */
1664 inc_rfc1001_len(req, io_parms->length - 1 /* Buffer */);
1665
1666 rc = SendReceive2(xid, io_parms->tcon->ses, iov, n_vec + 1,
1667 &resp_buftype, 0);
1668 rsp = (struct smb2_write_rsp *)iov[0].iov_base;
1669
1670 if (rc) {
1671 cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
1672 cERROR(1, "Send error in write = %d", rc);
1673 } else
1674 *nbytes = le32_to_cpu(rsp->DataLength);
1675
1676 free_rsp_buf(resp_buftype, rsp);
1677 return rc;
1678}
1679
1680static unsigned int
1681num_entries(char *bufstart, char *end_of_buf, char **lastentry, size_t size)
1682{
1683 int len;
1684 unsigned int entrycount = 0;
1685 unsigned int next_offset = 0;
1686 FILE_DIRECTORY_INFO *entryptr;
1687
1688 if (bufstart == NULL)
1689 return 0;
1690
1691 entryptr = (FILE_DIRECTORY_INFO *)bufstart;
1692
1693 while (1) {
1694 entryptr = (FILE_DIRECTORY_INFO *)
1695 ((char *)entryptr + next_offset);
1696
1697 if ((char *)entryptr + size > end_of_buf) {
1698 cERROR(1, "malformed search entry would overflow");
1699 break;
1700 }
1701
1702 len = le32_to_cpu(entryptr->FileNameLength);
1703 if ((char *)entryptr + len + size > end_of_buf) {
1704 cERROR(1, "directory entry name would overflow frame "
1705 "end of buf %p", end_of_buf);
1706 break;
1707 }
1708
1709 *lastentry = (char *)entryptr;
1710 entrycount++;
1711
1712 next_offset = le32_to_cpu(entryptr->NextEntryOffset);
1713 if (!next_offset)
1714 break;
1715 }
1716
1717 return entrycount;
1718}
1719
1720/*
1721 * Readdir/FindFirst
1722 */
1723int
1724SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
1725 u64 persistent_fid, u64 volatile_fid, int index,
1726 struct cifs_search_info *srch_inf)
1727{
1728 struct smb2_query_directory_req *req;
1729 struct smb2_query_directory_rsp *rsp = NULL;
1730 struct kvec iov[2];
1731 int rc = 0;
1732 int len;
1733 int resp_buftype;
1734 unsigned char *bufptr;
1735 struct TCP_Server_Info *server;
1736 struct cifs_ses *ses = tcon->ses;
1737 __le16 asteriks = cpu_to_le16('*');
1738 char *end_of_smb;
1739 unsigned int output_size = CIFSMaxBufSize;
1740 size_t info_buf_size;
1741
1742 if (ses && (ses->server))
1743 server = ses->server;
1744 else
1745 return -EIO;
1746
1747 rc = small_smb2_init(SMB2_QUERY_DIRECTORY, tcon, (void **) &req);
1748 if (rc)
1749 return rc;
1750
1751 switch (srch_inf->info_level) {
1752 case SMB_FIND_FILE_DIRECTORY_INFO:
1753 req->FileInformationClass = FILE_DIRECTORY_INFORMATION;
1754 info_buf_size = sizeof(FILE_DIRECTORY_INFO) - 1;
1755 break;
1756 case SMB_FIND_FILE_ID_FULL_DIR_INFO:
1757 req->FileInformationClass = FILEID_FULL_DIRECTORY_INFORMATION;
1758 info_buf_size = sizeof(SEARCH_ID_FULL_DIR_INFO) - 1;
1759 break;
1760 default:
1761 cERROR(1, "info level %u isn't supported",
1762 srch_inf->info_level);
1763 rc = -EINVAL;
1764 goto qdir_exit;
1765 }
1766
1767 req->FileIndex = cpu_to_le32(index);
1768 req->PersistentFileId = persistent_fid;
1769 req->VolatileFileId = volatile_fid;
1770
1771 len = 0x2;
1772 bufptr = req->Buffer;
1773 memcpy(bufptr, &asteriks, len);
1774
1775 req->FileNameOffset =
1776 cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1 - 4);
1777 req->FileNameLength = cpu_to_le16(len);
1778 /*
1779 * BB could be 30 bytes or so longer if we used SMB2 specific
1780 * buffer lengths, but this is safe and close enough.
1781 */
1782 output_size = min_t(unsigned int, output_size, server->maxBuf);
1783 output_size = min_t(unsigned int, output_size, 2 << 15);
1784 req->OutputBufferLength = cpu_to_le32(output_size);
1785
1786 iov[0].iov_base = (char *)req;
1787 /* 4 for RFC1001 length and 1 for Buffer */
1788 iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
1789
1790 iov[1].iov_base = (char *)(req->Buffer);
1791 iov[1].iov_len = len;
1792
1793 inc_rfc1001_len(req, len - 1 /* Buffer */);
1794
1795 rc = SendReceive2(xid, ses, iov, 2, &resp_buftype, 0);
1796 rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
1797
1798 if (rc) {
1799 cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
1800 goto qdir_exit;
1801 }
1802
1803 rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset),
1804 le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr,
1805 info_buf_size);
1806 if (rc)
1807 goto qdir_exit;
1808
1809 srch_inf->unicode = true;
1810
1811 if (srch_inf->ntwrk_buf_start) {
1812 if (srch_inf->smallBuf)
1813 cifs_small_buf_release(srch_inf->ntwrk_buf_start);
1814 else
1815 cifs_buf_release(srch_inf->ntwrk_buf_start);
1816 }
1817 srch_inf->ntwrk_buf_start = (char *)rsp;
1818 srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ +
1819 (char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset);
1820 /* 4 for rfc1002 length field */
1821 end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr;
1822 srch_inf->entries_in_buffer =
1823 num_entries(srch_inf->srch_entries_start, end_of_smb,
1824 &srch_inf->last_entry, info_buf_size);
1825 srch_inf->index_of_last_entry += srch_inf->entries_in_buffer;
1826 cFYI(1, "num entries %d last_index %lld srch start %p srch end %p",
1827 srch_inf->entries_in_buffer, srch_inf->index_of_last_entry,
1828 srch_inf->srch_entries_start, srch_inf->last_entry);
1829 if (resp_buftype == CIFS_LARGE_BUFFER)
1830 srch_inf->smallBuf = false;
1831 else if (resp_buftype == CIFS_SMALL_BUFFER)
1832 srch_inf->smallBuf = true;
1833 else
1834 cERROR(1, "illegal search buffer type");
1835
1836 if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
1837 srch_inf->endOfSearch = 1;
1838 else
1839 srch_inf->endOfSearch = 0;
1840
1841 return rc;
1842
1843qdir_exit:
1844 free_rsp_buf(resp_buftype, rsp);
1845 return rc;
1846}
1847
1848static int
1849send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
1850 u64 persistent_fid, u64 volatile_fid, u32 pid, int info_class,
1851 unsigned int num, void **data, unsigned int *size)
1852{
1853 struct smb2_set_info_req *req;
1854 struct smb2_set_info_rsp *rsp = NULL;
1855 struct kvec *iov;
1856 int rc = 0;
1857 int resp_buftype;
1858 unsigned int i;
1859 struct TCP_Server_Info *server;
1860 struct cifs_ses *ses = tcon->ses;
1861
1862 if (ses && (ses->server))
1863 server = ses->server;
1864 else
1865 return -EIO;
1866
1867 if (!num)
1868 return -EINVAL;
1869
1870 iov = kmalloc(sizeof(struct kvec) * num, GFP_KERNEL);
1871 if (!iov)
1872 return -ENOMEM;
1873
1874 rc = small_smb2_init(SMB2_SET_INFO, tcon, (void **) &req);
1875 if (rc) {
1876 kfree(iov);
1877 return rc;
1878 }
1879
1880 req->hdr.ProcessId = cpu_to_le32(pid);
1881
1882 req->InfoType = SMB2_O_INFO_FILE;
1883 req->FileInfoClass = info_class;
1884 req->PersistentFileId = persistent_fid;
1885 req->VolatileFileId = volatile_fid;
1886
1887 /* 4 for RFC1001 length and 1 for Buffer */
1888 req->BufferOffset =
1889 cpu_to_le16(sizeof(struct smb2_set_info_req) - 1 - 4);
1890 req->BufferLength = cpu_to_le32(*size);
1891
1892 inc_rfc1001_len(req, *size - 1 /* Buffer */);
1893
1894 memcpy(req->Buffer, *data, *size);
1895
1896 iov[0].iov_base = (char *)req;
1897 /* 4 for RFC1001 length */
1898 iov[0].iov_len = get_rfc1002_length(req) + 4;
1899
1900 for (i = 1; i < num; i++) {
1901 inc_rfc1001_len(req, size[i]);
1902 le32_add_cpu(&req->BufferLength, size[i]);
1903 iov[i].iov_base = (char *)data[i];
1904 iov[i].iov_len = size[i];
1905 }
1906
1907 rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0);
1908 rsp = (struct smb2_set_info_rsp *)iov[0].iov_base;
1909
1910 if (rc != 0) {
1911 cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
1912 goto out;
1913 }
1914out:
1915 free_rsp_buf(resp_buftype, rsp);
1916 kfree(iov);
1917 return rc;
1918}
1919
1920int
1921SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
1922 u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
1923{
1924 struct smb2_file_rename_info info;
1925 void **data;
1926 unsigned int size[2];
1927 int rc;
1928 int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
1929
1930 data = kmalloc(sizeof(void *) * 2, GFP_KERNEL);
1931 if (!data)
1932 return -ENOMEM;
1933
1934 info.ReplaceIfExists = 1; /* 1 = replace existing target with new */
1935 /* 0 = fail if target already exists */
1936 info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */
1937 info.FileNameLength = cpu_to_le32(len);
1938
1939 data[0] = &info;
1940 size[0] = sizeof(struct smb2_file_rename_info);
1941
1942 data[1] = target_file;
1943 size[1] = len + 2 /* null */;
1944
1945 rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
1946 current->tgid, FILE_RENAME_INFORMATION, 2, data,
1947 size);
1948 kfree(data);
1949 return rc;
1950}
1951
1952int
1953SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
1954 u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
1955{
1956 struct smb2_file_link_info info;
1957 void **data;
1958 unsigned int size[2];
1959 int rc;
1960 int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
1961
1962 data = kmalloc(sizeof(void *) * 2, GFP_KERNEL);
1963 if (!data)
1964 return -ENOMEM;
1965
1966 info.ReplaceIfExists = 0; /* 1 = replace existing link with new */
1967 /* 0 = fail if link already exists */
1968 info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */
1969 info.FileNameLength = cpu_to_le32(len);
1970
1971 data[0] = &info;
1972 size[0] = sizeof(struct smb2_file_link_info);
1973
1974 data[1] = target_file;
1975 size[1] = len + 2 /* null */;
1976
1977 rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
1978 current->tgid, FILE_LINK_INFORMATION, 2, data, size);
1979 kfree(data);
1980 return rc;
1981}
1982
1983int
1984SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
1985 u64 volatile_fid, u32 pid, __le64 *eof)
1986{
1987 struct smb2_file_eof_info info;
1988 void *data;
1989 unsigned int size;
1990
1991 info.EndOfFile = *eof;
1992
1993 data = &info;
1994 size = sizeof(struct smb2_file_eof_info);
1995
1996 return send_set_info(xid, tcon, persistent_fid, volatile_fid, pid,
1997 FILE_END_OF_FILE_INFORMATION, 1, &data, &size);
1998}
1999
2000int
2001SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
2002 u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf)
2003{
2004 unsigned int size;
2005 size = sizeof(FILE_BASIC_INFO);
2006 return send_set_info(xid, tcon, persistent_fid, volatile_fid,
2007 current->tgid, FILE_BASIC_INFORMATION, 1,
2008 (void **)&buf, &size);
2009}
2010
2011int
2012SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
2013 const u64 persistent_fid, const u64 volatile_fid,
2014 __u8 oplock_level)
2015{
2016 int rc;
2017 struct smb2_oplock_break *req = NULL;
2018
2019 cFYI(1, "SMB2_oplock_break");
2020 rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
2021
2022 if (rc)
2023 return rc;
2024
2025 req->VolatileFid = volatile_fid;
2026 req->PersistentFid = persistent_fid;
2027 req->OplockLevel = oplock_level;
2028 req->hdr.CreditRequest = cpu_to_le16(1);
2029
2030 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP);
2031 /* SMB2 buffer freed by function above */
2032
2033 if (rc) {
2034 cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
2035 cFYI(1, "Send error in Oplock Break = %d", rc);
2036 }
2037
2038 return rc;
2039}
2040
2041static void
2042copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
2043 struct kstatfs *kst)
2044{
2045 kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
2046 le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
2047 kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
2048 kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
2049 kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
2050 return;
2051}
2052
2053static int
2054build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
2055 int outbuf_len, u64 persistent_fid, u64 volatile_fid)
2056{
2057 int rc;
2058 struct smb2_query_info_req *req;
2059
2060 cFYI(1, "Query FSInfo level %d", level);
2061
2062 if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
2063 return -EIO;
2064
2065 rc = small_smb2_init(SMB2_QUERY_INFO, tcon, (void **) &req);
2066 if (rc)
2067 return rc;
2068
2069 req->InfoType = SMB2_O_INFO_FILESYSTEM;
2070 req->FileInfoClass = level;
2071 req->PersistentFileId = persistent_fid;
2072 req->VolatileFileId = volatile_fid;
2073 /* 4 for rfc1002 length field and 1 for pad */
2074 req->InputBufferOffset =
2075 cpu_to_le16(sizeof(struct smb2_query_info_req) - 1 - 4);
2076 req->OutputBufferLength = cpu_to_le32(
2077 outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4);
2078
2079 iov->iov_base = (char *)req;
2080 /* 4 for rfc1002 length field */
2081 iov->iov_len = get_rfc1002_length(req) + 4;
2082 return 0;
2083}
2084
2085int
2086SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
2087 u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
2088{
2089 struct smb2_query_info_rsp *rsp = NULL;
2090 struct kvec iov;
2091 int rc = 0;
2092 int resp_buftype;
2093 struct cifs_ses *ses = tcon->ses;
2094 struct smb2_fs_full_size_info *info = NULL;
2095
2096 rc = build_qfs_info_req(&iov, tcon, FS_FULL_SIZE_INFORMATION,
2097 sizeof(struct smb2_fs_full_size_info),
2098 persistent_fid, volatile_fid);
2099 if (rc)
2100 return rc;
2101
2102 rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0);
2103 if (rc) {
2104 cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
2105 goto qinf_exit;
2106 }
2107 rsp = (struct smb2_query_info_rsp *)iov.iov_base;
2108
2109 info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ +
2110 le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
2111 rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset),
2112 le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr,
2113 sizeof(struct smb2_fs_full_size_info));
2114 if (!rc)
2115 copy_fs_info_to_kstatfs(info, fsdata);
2116
2117qinf_exit:
2118 free_rsp_buf(resp_buftype, iov.iov_base);
2119 return rc;
2120}
2121
2122int
2123smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
2124 const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid,
2125 const __u32 num_lock, struct smb2_lock_element *buf)
2126{
2127 int rc = 0;
2128 struct smb2_lock_req *req = NULL;
2129 struct kvec iov[2];
2130 int resp_buf_type;
2131 unsigned int count;
2132
2133 cFYI(1, "smb2_lockv num lock %d", num_lock);
2134
2135 rc = small_smb2_init(SMB2_LOCK, tcon, (void **) &req);
2136 if (rc)
2137 return rc;
2138
2139 req->hdr.ProcessId = cpu_to_le32(pid);
2140 req->LockCount = cpu_to_le16(num_lock);
2141
2142 req->PersistentFileId = persist_fid;
2143 req->VolatileFileId = volatile_fid;
2144
2145 count = num_lock * sizeof(struct smb2_lock_element);
2146 inc_rfc1001_len(req, count - sizeof(struct smb2_lock_element));
2147
2148 iov[0].iov_base = (char *)req;
2149 /* 4 for rfc1002 length field and count for all locks */
2150 iov[0].iov_len = get_rfc1002_length(req) + 4 - count;
2151 iov[1].iov_base = (char *)buf;
2152 iov[1].iov_len = count;
2153
2154 cifs_stats_inc(&tcon->stats.cifs_stats.num_locks);
2155 rc = SendReceive2(xid, tcon->ses, iov, 2, &resp_buf_type, CIFS_NO_RESP);
2156 if (rc) {
2157 cFYI(1, "Send error in smb2_lockv = %d", rc);
2158 cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
2159 }
2160
2161 return rc;
2162}
2163
2164int
2165SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
2166 const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid,
2167 const __u64 length, const __u64 offset, const __u32 lock_flags,
2168 const bool wait)
2169{
2170 struct smb2_lock_element lock;
2171
2172 lock.Offset = cpu_to_le64(offset);
2173 lock.Length = cpu_to_le64(length);
2174 lock.Flags = cpu_to_le32(lock_flags);
2175 if (!wait && lock_flags != SMB2_LOCKFLAG_UNLOCK)
2176 lock.Flags |= cpu_to_le32(SMB2_LOCKFLAG_FAIL_IMMEDIATELY);
2177
2178 return smb2_lockv(xid, tcon, persist_fid, volatile_fid, pid, 1, &lock);
2179}
2180
2181int
2182SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
2183 __u8 *lease_key, const __le32 lease_state)
2184{
2185 int rc;
2186 struct smb2_lease_ack *req = NULL;
2187
2188 cFYI(1, "SMB2_lease_break");
2189 rc = small_smb2_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req);
2190
2191 if (rc)
2192 return rc;
2193
2194 req->hdr.CreditRequest = cpu_to_le16(1);
2195 req->StructureSize = cpu_to_le16(36);
2196 inc_rfc1001_len(req, 12);
2197
2198 memcpy(req->LeaseKey, lease_key, 16);
2199 req->LeaseState = lease_state;
2200
2201 rc = SendReceiveNoRsp(xid, tcon->ses, (char *) req, CIFS_OBREAK_OP);
2202 /* SMB2 buffer freed by function above */
2203
2204 if (rc) {
2205 cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
2206 cFYI(1, "Send error in Lease Break = %d", rc);
2207 }
2208
2209 return rc;
2210}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f37a1b41b402..4cb4ced258cb 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -87,10 +87,6 @@
87 87
88#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe) 88#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
89 89
90#define SMB2_HEADER_SIZE __constant_le16_to_cpu(64)
91
92#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_le16_to_cpu(9)
93
94/* 90/*
95 * SMB2 Header Definition 91 * SMB2 Header Definition
96 * 92 *
@@ -99,6 +95,9 @@
99 * "PDU" : "Protocol Data Unit" (ie a network "frame") 95 * "PDU" : "Protocol Data Unit" (ie a network "frame")
100 * 96 *
101 */ 97 */
98
99#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
100
102struct smb2_hdr { 101struct smb2_hdr {
103 __be32 smb2_buf_length; /* big endian on wire */ 102 __be32 smb2_buf_length; /* big endian on wire */
104 /* length is only two or three bytes - with 103 /* length is only two or three bytes - with
@@ -140,6 +139,9 @@ struct smb2_pdu {
140 * command code name for the struct. Note that structures must be packed. 139 * command code name for the struct. Note that structures must be packed.
141 * 140 *
142 */ 141 */
142
143#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
144
143struct smb2_err_rsp { 145struct smb2_err_rsp {
144 struct smb2_hdr hdr; 146 struct smb2_hdr hdr;
145 __le16 StructureSize; 147 __le16 StructureSize;
@@ -148,6 +150,10 @@ struct smb2_err_rsp {
148 __u8 ErrorData[1]; /* variable length */ 150 __u8 ErrorData[1]; /* variable length */
149} __packed; 151} __packed;
150 152
153#define SMB2_CLIENT_GUID_SIZE 16
154
155extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
156
151struct smb2_negotiate_req { 157struct smb2_negotiate_req {
152 struct smb2_hdr hdr; 158 struct smb2_hdr hdr;
153 __le16 StructureSize; /* Must be 36 */ 159 __le16 StructureSize; /* Must be 36 */
@@ -155,11 +161,17 @@ struct smb2_negotiate_req {
155 __le16 SecurityMode; 161 __le16 SecurityMode;
156 __le16 Reserved; /* MBZ */ 162 __le16 Reserved; /* MBZ */
157 __le32 Capabilities; 163 __le32 Capabilities;
158 __u8 ClientGUID[16]; /* MBZ */ 164 __u8 ClientGUID[SMB2_CLIENT_GUID_SIZE];
159 __le64 ClientStartTime; /* MBZ */ 165 __le64 ClientStartTime; /* MBZ */
160 __le16 Dialects[2]; /* variable length */ 166 __le16 Dialects[1]; /* One dialect (vers=) at a time for now */
161} __packed; 167} __packed;
162 168
169/* Dialects */
170#define SMB20_PROT_ID 0x0202
171#define SMB21_PROT_ID 0x0210
172#define SMB30_PROT_ID 0x0300
173#define BAD_PROT_ID 0xFFFF
174
163/* SecurityMode flags */ 175/* SecurityMode flags */
164#define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001 176#define SMB2_NEGOTIATE_SIGNING_ENABLED 0x0001
165#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002 177#define SMB2_NEGOTIATE_SIGNING_REQUIRED 0x0002
@@ -167,6 +179,10 @@ struct smb2_negotiate_req {
167#define SMB2_GLOBAL_CAP_DFS 0x00000001 179#define SMB2_GLOBAL_CAP_DFS 0x00000001
168#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */ 180#define SMB2_GLOBAL_CAP_LEASING 0x00000002 /* Resp only New to SMB2.1 */
169#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */ 181#define SMB2_GLOBAL_CAP_LARGE_MTU 0X00000004 /* Resp only New to SMB2.1 */
182#define SMB2_GLOBAL_CAP_MULTI_CHANNEL 0x00000008 /* New to SMB3 */
183#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
184#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING 0x00000020 /* New to SMB3 */
185#define SMB2_GLOBAL_CAP_ENCRYPTION 0x00000040 /* New to SMB3 */
170/* Internal types */ 186/* Internal types */
171#define SMB2_NT_FIND 0x00100000 187#define SMB2_NT_FIND 0x00100000
172#define SMB2_LARGE_FILES 0x00200000 188#define SMB2_LARGE_FILES 0x00200000
@@ -305,6 +321,8 @@ struct smb2_tree_disconnect_rsp {
305#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08 321#define SMB2_OPLOCK_LEVEL_EXCLUSIVE 0x08
306#define SMB2_OPLOCK_LEVEL_BATCH 0x09 322#define SMB2_OPLOCK_LEVEL_BATCH 0x09
307#define SMB2_OPLOCK_LEVEL_LEASE 0xFF 323#define SMB2_OPLOCK_LEVEL_LEASE 0xFF
324/* Non-spec internal type */
325#define SMB2_OPLOCK_LEVEL_NOCHANGE 0x99
308 326
309/* Desired Access Flags */ 327/* Desired Access Flags */
310#define FILE_READ_DATA_LE cpu_to_le32(0x00000001) 328#define FILE_READ_DATA_LE cpu_to_le32(0x00000001)
@@ -402,7 +420,7 @@ struct smb2_create_req {
402 __le16 NameLength; 420 __le16 NameLength;
403 __le32 CreateContextsOffset; 421 __le32 CreateContextsOffset;
404 __le32 CreateContextsLength; 422 __le32 CreateContextsLength;
405 __u8 Buffer[1]; 423 __u8 Buffer[8];
406} __packed; 424} __packed;
407 425
408struct smb2_create_rsp { 426struct smb2_create_rsp {
@@ -426,6 +444,39 @@ struct smb2_create_rsp {
426 __u8 Buffer[1]; 444 __u8 Buffer[1];
427} __packed; 445} __packed;
428 446
447struct create_context {
448 __le32 Next;
449 __le16 NameOffset;
450 __le16 NameLength;
451 __le16 Reserved;
452 __le16 DataOffset;
453 __le32 DataLength;
454 __u8 Buffer[0];
455} __packed;
456
457#define SMB2_LEASE_NONE __constant_cpu_to_le32(0x00)
458#define SMB2_LEASE_READ_CACHING __constant_cpu_to_le32(0x01)
459#define SMB2_LEASE_HANDLE_CACHING __constant_cpu_to_le32(0x02)
460#define SMB2_LEASE_WRITE_CACHING __constant_cpu_to_le32(0x04)
461
462#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
463
464#define SMB2_LEASE_KEY_SIZE 16
465
466struct lease_context {
467 __le64 LeaseKeyLow;
468 __le64 LeaseKeyHigh;
469 __le32 LeaseState;
470 __le32 LeaseFlags;
471 __le64 LeaseDuration;
472} __packed;
473
474struct create_lease {
475 struct create_context ccontext;
476 __u8 Name[8];
477 struct lease_context lcontext;
478} __packed;
479
429/* Currently defined values for close flags */ 480/* Currently defined values for close flags */
430#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001) 481#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB cpu_to_le16(0x0001)
431struct smb2_close_req { 482struct smb2_close_req {
@@ -451,6 +502,108 @@ struct smb2_close_rsp {
451 __le32 Attributes; 502 __le32 Attributes;
452} __packed; 503} __packed;
453 504
505struct smb2_flush_req {
506 struct smb2_hdr hdr;
507 __le16 StructureSize; /* Must be 24 */
508 __le16 Reserved1;
509 __le32 Reserved2;
510 __u64 PersistentFileId; /* opaque endianness */
511 __u64 VolatileFileId; /* opaque endianness */
512} __packed;
513
514struct smb2_flush_rsp {
515 struct smb2_hdr hdr;
516 __le16 StructureSize;
517 __le16 Reserved;
518} __packed;
519
520struct smb2_read_req {
521 struct smb2_hdr hdr;
522 __le16 StructureSize; /* Must be 49 */
523 __u8 Padding; /* offset from start of SMB2 header to place read */
524 __u8 Reserved;
525 __le32 Length;
526 __le64 Offset;
527 __u64 PersistentFileId; /* opaque endianness */
528 __u64 VolatileFileId; /* opaque endianness */
529 __le32 MinimumCount;
530 __le32 Channel; /* Reserved MBZ */
531 __le32 RemainingBytes;
532 __le16 ReadChannelInfoOffset; /* Reserved MBZ */
533 __le16 ReadChannelInfoLength; /* Reserved MBZ */
534 __u8 Buffer[1];
535} __packed;
536
537struct smb2_read_rsp {
538 struct smb2_hdr hdr;
539 __le16 StructureSize; /* Must be 17 */
540 __u8 DataOffset;
541 __u8 Reserved;
542 __le32 DataLength;
543 __le32 DataRemaining;
544 __u32 Reserved2;
545 __u8 Buffer[1];
546} __packed;
547
548/* For write request Flags field below the following flag is defined: */
549#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
550
551struct smb2_write_req {
552 struct smb2_hdr hdr;
553 __le16 StructureSize; /* Must be 49 */
554 __le16 DataOffset; /* offset from start of SMB2 header to write data */
555 __le32 Length;
556 __le64 Offset;
557 __u64 PersistentFileId; /* opaque endianness */
558 __u64 VolatileFileId; /* opaque endianness */
559 __le32 Channel; /* Reserved MBZ */
560 __le32 RemainingBytes;
561 __le16 WriteChannelInfoOffset; /* Reserved MBZ */
562 __le16 WriteChannelInfoLength; /* Reserved MBZ */
563 __le32 Flags;
564 __u8 Buffer[1];
565} __packed;
566
567struct smb2_write_rsp {
568 struct smb2_hdr hdr;
569 __le16 StructureSize; /* Must be 17 */
570 __u8 DataOffset;
571 __u8 Reserved;
572 __le32 DataLength;
573 __le32 DataRemaining;
574 __u32 Reserved2;
575 __u8 Buffer[1];
576} __packed;
577
578#define SMB2_LOCKFLAG_SHARED_LOCK 0x0001
579#define SMB2_LOCKFLAG_EXCLUSIVE_LOCK 0x0002
580#define SMB2_LOCKFLAG_UNLOCK 0x0004
581#define SMB2_LOCKFLAG_FAIL_IMMEDIATELY 0x0010
582
583struct smb2_lock_element {
584 __le64 Offset;
585 __le64 Length;
586 __le32 Flags;
587 __le32 Reserved;
588} __packed;
589
590struct smb2_lock_req {
591 struct smb2_hdr hdr;
592 __le16 StructureSize; /* Must be 48 */
593 __le16 LockCount;
594 __le32 Reserved;
595 __u64 PersistentFileId; /* opaque endianness */
596 __u64 VolatileFileId; /* opaque endianness */
597 /* Followed by at least one */
598 struct smb2_lock_element locks[1];
599} __packed;
600
601struct smb2_lock_rsp {
602 struct smb2_hdr hdr;
603 __le16 StructureSize; /* Must be 4 */
604 __le16 Reserved;
605} __packed;
606
454struct smb2_echo_req { 607struct smb2_echo_req {
455 struct smb2_hdr hdr; 608 struct smb2_hdr hdr;
456 __le16 StructureSize; /* Must be 4 */ 609 __le16 StructureSize; /* Must be 4 */
@@ -463,6 +616,34 @@ struct smb2_echo_rsp {
463 __u16 Reserved; 616 __u16 Reserved;
464} __packed; 617} __packed;
465 618
619/* search (query_directory) Flags field */
620#define SMB2_RESTART_SCANS 0x01
621#define SMB2_RETURN_SINGLE_ENTRY 0x02
622#define SMB2_INDEX_SPECIFIED 0x04
623#define SMB2_REOPEN 0x10
624
625struct smb2_query_directory_req {
626 struct smb2_hdr hdr;
627 __le16 StructureSize; /* Must be 33 */
628 __u8 FileInformationClass;
629 __u8 Flags;
630 __le32 FileIndex;
631 __u64 PersistentFileId; /* opaque endianness */
632 __u64 VolatileFileId; /* opaque endianness */
633 __le16 FileNameOffset;
634 __le16 FileNameLength;
635 __le32 OutputBufferLength;
636 __u8 Buffer[1];
637} __packed;
638
639struct smb2_query_directory_rsp {
640 struct smb2_hdr hdr;
641 __le16 StructureSize; /* Must be 9 */
642 __le16 OutputBufferOffset;
643 __le32 OutputBufferLength;
644 __u8 Buffer[1];
645} __packed;
646
466/* Possible InfoType values */ 647/* Possible InfoType values */
467#define SMB2_O_INFO_FILE 0x01 648#define SMB2_O_INFO_FILE 0x01
468#define SMB2_O_INFO_FILESYSTEM 0x02 649#define SMB2_O_INFO_FILESYSTEM 0x02
@@ -493,11 +674,84 @@ struct smb2_query_info_rsp {
493 __u8 Buffer[1]; 674 __u8 Buffer[1];
494} __packed; 675} __packed;
495 676
677struct smb2_set_info_req {
678 struct smb2_hdr hdr;
679 __le16 StructureSize; /* Must be 33 */
680 __u8 InfoType;
681 __u8 FileInfoClass;
682 __le32 BufferLength;
683 __le16 BufferOffset;
684 __u16 Reserved;
685 __le32 AdditionalInformation;
686 __u64 PersistentFileId; /* opaque endianness */
687 __u64 VolatileFileId; /* opaque endianness */
688 __u8 Buffer[1];
689} __packed;
690
691struct smb2_set_info_rsp {
692 struct smb2_hdr hdr;
693 __le16 StructureSize; /* Must be 2 */
694} __packed;
695
696struct smb2_oplock_break {
697 struct smb2_hdr hdr;
698 __le16 StructureSize; /* Must be 24 */
699 __u8 OplockLevel;
700 __u8 Reserved;
701 __le32 Reserved2;
702 __u64 PersistentFid;
703 __u64 VolatileFid;
704} __packed;
705
706#define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
707
708struct smb2_lease_break {
709 struct smb2_hdr hdr;
710 __le16 StructureSize; /* Must be 44 */
711 __le16 Reserved;
712 __le32 Flags;
713 __u8 LeaseKey[16];
714 __le32 CurrentLeaseState;
715 __le32 NewLeaseState;
716 __le32 BreakReason;
717 __le32 AccessMaskHint;
718 __le32 ShareMaskHint;
719} __packed;
720
721struct smb2_lease_ack {
722 struct smb2_hdr hdr;
723 __le16 StructureSize; /* Must be 36 */
724 __le16 Reserved;
725 __le32 Flags;
726 __u8 LeaseKey[16];
727 __le32 LeaseState;
728 __le64 LeaseDuration;
729} __packed;
730
496/* 731/*
497 * PDU infolevel structure definitions 732 * PDU infolevel structure definitions
498 * BB consider moving to a different header 733 * BB consider moving to a different header
499 */ 734 */
500 735
736/* File System Information Classes */
737#define FS_VOLUME_INFORMATION 1 /* Query */
738#define FS_LABEL_INFORMATION 2 /* Set */
739#define FS_SIZE_INFORMATION 3 /* Query */
740#define FS_DEVICE_INFORMATION 4 /* Query */
741#define FS_ATTRIBUTE_INFORMATION 5 /* Query */
742#define FS_CONTROL_INFORMATION 6 /* Query, Set */
743#define FS_FULL_SIZE_INFORMATION 7 /* Query */
744#define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */
745#define FS_DRIVER_PATH_INFORMATION 9 /* Query */
746
747struct smb2_fs_full_size_info {
748 __le64 TotalAllocationUnits;
749 __le64 CallerAvailableAllocationUnits;
750 __le64 ActualAvailableAllocationUnits;
751 __le32 SectorsPerAllocationUnit;
752 __le32 BytesPerSector;
753} __packed;
754
501/* partial list of QUERY INFO levels */ 755/* partial list of QUERY INFO levels */
502#define FILE_DIRECTORY_INFORMATION 1 756#define FILE_DIRECTORY_INFORMATION 1
503#define FILE_FULL_DIRECTORY_INFORMATION 2 757#define FILE_FULL_DIRECTORY_INFORMATION 2
@@ -546,6 +800,28 @@ struct smb2_query_info_rsp {
546#define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 800#define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50
547#define FILE_STANDARD_LINK_INFORMATION 54 801#define FILE_STANDARD_LINK_INFORMATION 54
548 802
803struct smb2_file_internal_info {
804 __le64 IndexNumber;
805} __packed; /* level 6 Query */
806
807struct smb2_file_rename_info { /* encoding of request for level 10 */
808 __u8 ReplaceIfExists; /* 1 = replace existing target with new */
809 /* 0 = fail if target already exists */
810 __u8 Reserved[7];
811 __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
812 __le32 FileNameLength;
813 char FileName[0]; /* New name to be assigned */
814} __packed; /* level 10 Set */
815
816struct smb2_file_link_info { /* encoding of request for level 11 */
817 __u8 ReplaceIfExists; /* 1 = replace existing link with new */
818 /* 0 = fail if link already exists */
819 __u8 Reserved[7];
820 __u64 RootDirectory; /* MBZ for network operations (why says spec?) */
821 __le32 FileNameLength;
822 char FileName[0]; /* Name to be assigned to new link */
823} __packed; /* level 11 Set */
824
549/* 825/*
550 * This level 18, although with struct with same name is different from cifs 826 * This level 18, although with struct with same name is different from cifs
551 * level 0x107. Level 0x107 has an extra u64 between AccessFlags and 827 * level 0x107. Level 0x107 has an extra u64 between AccessFlags and
@@ -574,4 +850,8 @@ struct smb2_file_all_info { /* data block encoding of response to level 18 */
574 char FileName[1]; 850 char FileName[1];
575} __packed; /* level 18 Query */ 851} __packed; /* level 18 Query */
576 852
853struct smb2_file_eof_info { /* encoding of request for level 10 */
854 __le64 EndOfFile; /* new end of file value */
855} __packed; /* level 20 Set */
856
577#endif /* _SMB2PDU_H */ 857#endif /* _SMB2PDU_H */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index bfaa7b148afd..7d25f8b14f93 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -26,6 +26,7 @@
26#include <linux/key-type.h> 26#include <linux/key-type.h>
27 27
28struct statfs; 28struct statfs;
29struct smb_rqst;
29 30
30/* 31/*
31 ***************************************************************** 32 *****************************************************************
@@ -34,24 +35,35 @@ struct statfs;
34 */ 35 */
35extern int map_smb2_to_linux_error(char *buf, bool log_err); 36extern int map_smb2_to_linux_error(char *buf, bool log_err);
36extern int smb2_check_message(char *buf, unsigned int length); 37extern int smb2_check_message(char *buf, unsigned int length);
37extern unsigned int smb2_calc_size(struct smb2_hdr *hdr); 38extern unsigned int smb2_calc_size(void *buf);
38extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr); 39extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
39extern __le16 *cifs_convert_path_to_utf16(const char *from, 40extern __le16 *cifs_convert_path_to_utf16(const char *from,
40 struct cifs_sb_info *cifs_sb); 41 struct cifs_sb_info *cifs_sb);
41 42
43extern int smb2_verify_signature(struct smb_rqst *, struct TCP_Server_Info *);
42extern int smb2_check_receive(struct mid_q_entry *mid, 44extern int smb2_check_receive(struct mid_q_entry *mid,
43 struct TCP_Server_Info *server, bool log_error); 45 struct TCP_Server_Info *server, bool log_error);
44extern int smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, 46extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
45 unsigned int nvec, struct mid_q_entry **ret_mid); 47 struct smb_rqst *rqst);
46extern int smb2_setup_async_request(struct TCP_Server_Info *server, 48extern struct mid_q_entry *smb2_setup_async_request(
47 struct kvec *iov, unsigned int nvec, 49 struct TCP_Server_Info *server, struct smb_rqst *rqst);
48 struct mid_q_entry **ret_mid);
49extern void smb2_echo_request(struct work_struct *work); 50extern void smb2_echo_request(struct work_struct *work);
51extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
52extern __u8 smb2_map_lease_to_oplock(__le32 lease_state);
53extern bool smb2_is_valid_oplock_break(char *buffer,
54 struct TCP_Server_Info *srv);
50 55
56extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
57 struct smb2_file_all_info *src);
51extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, 58extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
52 struct cifs_sb_info *cifs_sb, 59 struct cifs_sb_info *cifs_sb,
53 const char *full_path, FILE_ALL_INFO *data, 60 const char *full_path, FILE_ALL_INFO *data,
54 bool *adjust_tz); 61 bool *adjust_tz);
62extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
63 const char *full_path, __u64 size,
64 struct cifs_sb_info *cifs_sb, bool set_alloc);
65extern int smb2_set_file_info(struct inode *inode, const char *full_path,
66 FILE_BASIC_INFO *buf, const unsigned int xid);
55extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, 67extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon,
56 const char *name, struct cifs_sb_info *cifs_sb); 68 const char *name, struct cifs_sb_info *cifs_sb);
57extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path, 69extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
@@ -59,6 +71,24 @@ extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
59 struct cifs_tcon *tcon, const unsigned int xid); 71 struct cifs_tcon *tcon, const unsigned int xid);
60extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, 72extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
61 const char *name, struct cifs_sb_info *cifs_sb); 73 const char *name, struct cifs_sb_info *cifs_sb);
74extern int smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon,
75 const char *name, struct cifs_sb_info *cifs_sb);
76extern int smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon,
77 const char *from_name, const char *to_name,
78 struct cifs_sb_info *cifs_sb);
79extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
80 const char *from_name, const char *to_name,
81 struct cifs_sb_info *cifs_sb);
82
83extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon,
84 const char *full_path, int disposition,
85 int desired_access, int create_options,
86 struct cifs_fid *fid, __u32 *oplock,
87 FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb);
88extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
89extern int smb2_unlock_range(struct cifsFileInfo *cfile,
90 struct file_lock *flock, const unsigned int xid);
91extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
62 92
63/* 93/*
64 * SMB2 Worker functions - most of protocol specific implementation details 94 * SMB2 Worker functions - most of protocol specific implementation details
@@ -75,12 +105,55 @@ extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon);
75extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, 105extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon,
76 __le16 *path, u64 *persistent_fid, u64 *volatile_fid, 106 __le16 *path, u64 *persistent_fid, u64 *volatile_fid,
77 __u32 desired_access, __u32 create_disposition, 107 __u32 desired_access, __u32 create_disposition,
78 __u32 file_attributes, __u32 create_options); 108 __u32 file_attributes, __u32 create_options,
109 __u8 *oplock, struct smb2_file_all_info *buf);
79extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, 110extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
80 u64 persistent_file_id, u64 volatile_file_id); 111 u64 persistent_file_id, u64 volatile_file_id);
112extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
113 u64 persistent_file_id, u64 volatile_file_id);
81extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, 114extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
82 u64 persistent_file_id, u64 volatile_file_id, 115 u64 persistent_file_id, u64 volatile_file_id,
83 struct smb2_file_all_info *data); 116 struct smb2_file_all_info *data);
117extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
118 u64 persistent_fid, u64 volatile_fid,
119 __le64 *uniqueid);
120extern int smb2_async_readv(struct cifs_readdata *rdata);
121extern int SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
122 unsigned int *nbytes, char **buf, int *buf_type);
123extern int smb2_async_writev(struct cifs_writedata *wdata);
124extern int SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
125 unsigned int *nbytes, struct kvec *iov, int n_vec);
84extern int SMB2_echo(struct TCP_Server_Info *server); 126extern int SMB2_echo(struct TCP_Server_Info *server);
127extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
128 u64 persistent_fid, u64 volatile_fid, int index,
129 struct cifs_search_info *srch_inf);
130extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
131 u64 persistent_fid, u64 volatile_fid,
132 __le16 *target_file);
133extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
134 u64 persistent_fid, u64 volatile_fid,
135 __le16 *target_file);
136extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
137 u64 persistent_fid, u64 volatile_fid, u32 pid,
138 __le64 *eof);
139extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
140 u64 persistent_fid, u64 volatile_fid,
141 FILE_BASIC_INFO *buf);
142extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
143 const u64 persistent_fid, const u64 volatile_fid,
144 const __u8 oplock_level);
145extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
146 u64 persistent_file_id, u64 volatile_file_id,
147 struct kstatfs *FSData);
148extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon,
149 const __u64 persist_fid, const __u64 volatile_fid,
150 const __u32 pid, const __u64 length, const __u64 offset,
151 const __u32 lockFlags, const bool wait);
152extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon,
153 const __u64 persist_fid, const __u64 volatile_fid,
154 const __u32 pid, const __u32 num_lock,
155 struct smb2_lock_element *buf);
156extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
157 __u8 *lease_key, const __le32 lease_state);
85 158
86#endif /* _SMB2PROTO_H */ 159#endif /* _SMB2PROTO_H */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 31f5d420b3ea..2a5fdf26f79f 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -30,12 +30,156 @@
30#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <asm/processor.h> 31#include <asm/processor.h>
32#include <linux/mempool.h> 32#include <linux/mempool.h>
33#include <linux/highmem.h>
33#include "smb2pdu.h" 34#include "smb2pdu.h"
34#include "cifsglob.h" 35#include "cifsglob.h"
35#include "cifsproto.h" 36#include "cifsproto.h"
36#include "smb2proto.h" 37#include "smb2proto.h"
37#include "cifs_debug.h" 38#include "cifs_debug.h"
38#include "smb2status.h" 39#include "smb2status.h"
40#include "smb2glob.h"
41
42static int
43smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
44{
45 int i, rc;
46 unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
47 unsigned char *sigptr = smb2_signature;
48 struct kvec *iov = rqst->rq_iov;
49 int n_vec = rqst->rq_nvec;
50 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
51
52 memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
53 memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
54
55 rc = crypto_shash_setkey(server->secmech.hmacsha256,
56 server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
57 if (rc) {
58 cERROR(1, "%s: Could not update with response\n", __func__);
59 return rc;
60 }
61
62 rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
63 if (rc) {
64 cERROR(1, "%s: Could not init md5\n", __func__);
65 return rc;
66 }
67
68 for (i = 0; i < n_vec; i++) {
69 if (iov[i].iov_len == 0)
70 continue;
71 if (iov[i].iov_base == NULL) {
72 cERROR(1, "null iovec entry");
73 return -EIO;
74 }
75 /*
76 * The first entry includes a length field (which does not get
77 * signed that occupies the first 4 bytes before the header).
78 */
79 if (i == 0) {
80 if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
81 break; /* nothing to sign or corrupt header */
82 rc =
83 crypto_shash_update(
84 &server->secmech.sdeschmacsha256->shash,
85 iov[i].iov_base + 4, iov[i].iov_len - 4);
86 } else {
87 rc =
88 crypto_shash_update(
89 &server->secmech.sdeschmacsha256->shash,
90 iov[i].iov_base, iov[i].iov_len);
91 }
92 if (rc) {
93 cERROR(1, "%s: Could not update with payload\n",
94 __func__);
95 return rc;
96 }
97 }
98
99 /* now hash over the rq_pages array */
100 for (i = 0; i < rqst->rq_npages; i++) {
101 struct kvec p_iov;
102
103 cifs_rqst_page_to_kvec(rqst, i, &p_iov);
104 crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
105 p_iov.iov_base, p_iov.iov_len);
106 kunmap(rqst->rq_pages[i]);
107 }
108
109 rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
110 sigptr);
111 if (rc)
112 cERROR(1, "%s: Could not generate sha256 hash\n", __func__);
113
114 memcpy(smb2_pdu->Signature, sigptr, SMB2_SIGNATURE_SIZE);
115
116 return rc;
117}
118
119/* must be called with server->srv_mutex held */
120static int
121smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
122{
123 int rc = 0;
124 struct smb2_hdr *smb2_pdu = rqst->rq_iov[0].iov_base;
125
126 if (!(smb2_pdu->Flags & SMB2_FLAGS_SIGNED) ||
127 server->tcpStatus == CifsNeedNegotiate)
128 return rc;
129
130 if (!server->session_estab) {
131 strncpy(smb2_pdu->Signature, "BSRSPYL", 8);
132 return rc;
133 }
134
135 rc = smb2_calc_signature(rqst, server);
136
137 return rc;
138}
139
140int
141smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
142{
143 unsigned int rc;
144 char server_response_sig[16];
145 struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
146
147 if ((smb2_pdu->Command == SMB2_NEGOTIATE) ||
148 (smb2_pdu->Command == SMB2_OPLOCK_BREAK) ||
149 (!server->session_estab))
150 return 0;
151
152 /*
153 * BB what if signatures are supposed to be on for session but
154 * server does not send one? BB
155 */
156
157 /* Do not need to verify session setups with signature "BSRSPYL " */
158 if (memcmp(smb2_pdu->Signature, "BSRSPYL ", 8) == 0)
159 cFYI(1, "dummy signature received for smb command 0x%x",
160 smb2_pdu->Command);
161
162 /*
163 * Save off the origiginal signature so we can modify the smb and check
164 * our calculated signature against what the server sent.
165 */
166 memcpy(server_response_sig, smb2_pdu->Signature, SMB2_SIGNATURE_SIZE);
167
168 memset(smb2_pdu->Signature, 0, SMB2_SIGNATURE_SIZE);
169
170 mutex_lock(&server->srv_mutex);
171 rc = smb2_calc_signature(rqst, server);
172 mutex_unlock(&server->srv_mutex);
173
174 if (rc)
175 return rc;
176
177 if (memcmp(server_response_sig, smb2_pdu->Signature,
178 SMB2_SIGNATURE_SIZE))
179 return -EACCES;
180 else
181 return 0;
182}
39 183
40/* 184/*
41 * Set message id for the request. Should be called after wait_for_free_request 185 * Set message id for the request. Should be called after wait_for_free_request
@@ -115,58 +259,66 @@ smb2_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
115 bool log_error) 259 bool log_error)
116{ 260{
117 unsigned int len = get_rfc1002_length(mid->resp_buf); 261 unsigned int len = get_rfc1002_length(mid->resp_buf);
262 struct kvec iov;
263 struct smb_rqst rqst = { .rq_iov = &iov,
264 .rq_nvec = 1 };
265
266 iov.iov_base = (char *)mid->resp_buf;
267 iov.iov_len = get_rfc1002_length(mid->resp_buf) + 4;
118 268
119 dump_smb(mid->resp_buf, min_t(u32, 80, len)); 269 dump_smb(mid->resp_buf, min_t(u32, 80, len));
120 /* convert the length into a more usable form */ 270 /* convert the length into a more usable form */
121 /* BB - uncomment with SMB2 signing implementation */ 271 if ((len > 24) &&
122 /* if ((len > 24) &&
123 (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) { 272 (server->sec_mode & (SECMODE_SIGN_REQUIRED|SECMODE_SIGN_ENABLED))) {
124 if (smb2_verify_signature(mid->resp_buf, server)) 273 int rc;
125 cERROR(1, "Unexpected SMB signature"); 274
126 } */ 275 rc = smb2_verify_signature(&rqst, server);
276 if (rc)
277 cERROR(1, "SMB signature verification returned error = "
278 "%d", rc);
279 }
127 280
128 return map_smb2_to_linux_error(mid->resp_buf, log_error); 281 return map_smb2_to_linux_error(mid->resp_buf, log_error);
129} 282}
130 283
131int 284struct mid_q_entry *
132smb2_setup_request(struct cifs_ses *ses, struct kvec *iov, 285smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
133 unsigned int nvec, struct mid_q_entry **ret_mid)
134{ 286{
135 int rc; 287 int rc;
136 struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; 288 struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
137 struct mid_q_entry *mid; 289 struct mid_q_entry *mid;
138 290
139 smb2_seq_num_into_buf(ses->server, hdr); 291 smb2_seq_num_into_buf(ses->server, hdr);
140 292
141 rc = smb2_get_mid_entry(ses, hdr, &mid); 293 rc = smb2_get_mid_entry(ses, hdr, &mid);
142 if (rc) 294 if (rc)
143 return rc; 295 return ERR_PTR(rc);
144 /* rc = smb2_sign_smb2(iov, nvec, ses->server); 296 rc = smb2_sign_rqst(rqst, ses->server);
145 if (rc) 297 if (rc) {
146 delete_mid(mid); */ 298 cifs_delete_mid(mid);
147 *ret_mid = mid; 299 return ERR_PTR(rc);
148 return rc; 300 }
301 return mid;
149} 302}
150 303
151int 304struct mid_q_entry *
152smb2_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, 305smb2_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
153 unsigned int nvec, struct mid_q_entry **ret_mid)
154{ 306{
155 int rc = 0; 307 int rc;
156 struct smb2_hdr *hdr = (struct smb2_hdr *)iov[0].iov_base; 308 struct smb2_hdr *hdr = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
157 struct mid_q_entry *mid; 309 struct mid_q_entry *mid;
158 310
159 smb2_seq_num_into_buf(server, hdr); 311 smb2_seq_num_into_buf(server, hdr);
160 312
161 mid = smb2_mid_entry_alloc(hdr, server); 313 mid = smb2_mid_entry_alloc(hdr, server);
162 if (mid == NULL) 314 if (mid == NULL)
163 return -ENOMEM; 315 return ERR_PTR(-ENOMEM);
164 316
165 /* rc = smb2_sign_smb2(iov, nvec, server); 317 rc = smb2_sign_rqst(rqst, server);
166 if (rc) { 318 if (rc) {
167 DeleteMidQEntry(mid); 319 DeleteMidQEntry(mid);
168 return rc; 320 return ERR_PTR(rc);
169 }*/ 321 }
170 *ret_mid = mid; 322
171 return rc; 323 return mid;
172} 324}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 83867ef348df..2126ab185045 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -27,6 +27,8 @@
27#include <linux/net.h> 27#include <linux/net.h>
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/freezer.h> 29#include <linux/freezer.h>
30#include <linux/tcp.h>
31#include <linux/highmem.h>
30#include <asm/uaccess.h> 32#include <asm/uaccess.h>
31#include <asm/processor.h> 33#include <asm/processor.h>
32#include <linux/mempool.h> 34#include <linux/mempool.h>
@@ -109,8 +111,8 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
109 mempool_free(midEntry, cifs_mid_poolp); 111 mempool_free(midEntry, cifs_mid_poolp);
110} 112}
111 113
112static void 114void
113delete_mid(struct mid_q_entry *mid) 115cifs_delete_mid(struct mid_q_entry *mid)
114{ 116{
115 spin_lock(&GlobalMid_Lock); 117 spin_lock(&GlobalMid_Lock);
116 list_del(&mid->qhead); 118 list_del(&mid->qhead);
@@ -119,18 +121,29 @@ delete_mid(struct mid_q_entry *mid)
119 DeleteMidQEntry(mid); 121 DeleteMidQEntry(mid);
120} 122}
121 123
124/*
125 * smb_send_kvec - send an array of kvecs to the server
126 * @server: Server to send the data to
127 * @iov: Pointer to array of kvecs
128 * @n_vec: length of kvec array
129 * @sent: amount of data sent on socket is stored here
130 *
131 * Our basic "send data to server" function. Should be called with srv_mutex
132 * held. The caller is responsible for handling the results.
133 */
122static int 134static int
123smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec) 135smb_send_kvec(struct TCP_Server_Info *server, struct kvec *iov, size_t n_vec,
136 size_t *sent)
124{ 137{
125 int rc = 0; 138 int rc = 0;
126 int i = 0; 139 int i = 0;
127 struct msghdr smb_msg; 140 struct msghdr smb_msg;
128 unsigned int len = iov[0].iov_len; 141 unsigned int remaining;
129 unsigned int total_len; 142 size_t first_vec = 0;
130 int first_vec = 0;
131 unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
132 struct socket *ssocket = server->ssocket; 143 struct socket *ssocket = server->ssocket;
133 144
145 *sent = 0;
146
134 if (ssocket == NULL) 147 if (ssocket == NULL)
135 return -ENOTSOCK; /* BB eventually add reconnect code here */ 148 return -ENOTSOCK; /* BB eventually add reconnect code here */
136 149
@@ -143,56 +156,60 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
143 else 156 else
144 smb_msg.msg_flags = MSG_NOSIGNAL; 157 smb_msg.msg_flags = MSG_NOSIGNAL;
145 158
146 total_len = 0; 159 remaining = 0;
147 for (i = 0; i < n_vec; i++) 160 for (i = 0; i < n_vec; i++)
148 total_len += iov[i].iov_len; 161 remaining += iov[i].iov_len;
149
150 cFYI(1, "Sending smb: total_len %d", total_len);
151 dump_smb(iov[0].iov_base, len);
152 162
153 i = 0; 163 i = 0;
154 while (total_len) { 164 while (remaining) {
165 /*
166 * If blocking send, we try 3 times, since each can block
167 * for 5 seconds. For nonblocking we have to try more
168 * but wait increasing amounts of time allowing time for
169 * socket to clear. The overall time we wait in either
170 * case to send on the socket is about 15 seconds.
171 * Similarly we wait for 15 seconds for a response from
172 * the server in SendReceive[2] for the server to send
173 * a response back for most types of requests (except
174 * SMB Write past end of file which can be slow, and
175 * blocking lock operations). NFS waits slightly longer
176 * than CIFS, but this can make it take longer for
177 * nonresponsive servers to be detected and 15 seconds
178 * is more than enough time for modern networks to
179 * send a packet. In most cases if we fail to send
180 * after the retries we will kill the socket and
181 * reconnect which may clear the network problem.
182 */
155 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], 183 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
156 n_vec - first_vec, total_len); 184 n_vec - first_vec, remaining);
157 if ((rc == -ENOSPC) || (rc == -EAGAIN)) { 185 if (rc == -ENOSPC || rc == -EAGAIN) {
158 i++; 186 i++;
159 /* 187 if (i >= 14 || (!server->noblocksnd && (i > 2))) {
160 * If blocking send we try 3 times, since each can block 188 cERROR(1, "sends on sock %p stuck for 15 "
161 * for 5 seconds. For nonblocking we have to try more 189 "seconds", ssocket);
162 * but wait increasing amounts of time allowing time for
163 * socket to clear. The overall time we wait in either
164 * case to send on the socket is about 15 seconds.
165 * Similarly we wait for 15 seconds for a response from
166 * the server in SendReceive[2] for the server to send
167 * a response back for most types of requests (except
168 * SMB Write past end of file which can be slow, and
169 * blocking lock operations). NFS waits slightly longer
170 * than CIFS, but this can make it take longer for
171 * nonresponsive servers to be detected and 15 seconds
172 * is more than enough time for modern networks to
173 * send a packet. In most cases if we fail to send
174 * after the retries we will kill the socket and
175 * reconnect which may clear the network problem.
176 */
177 if ((i >= 14) || (!server->noblocksnd && (i > 2))) {
178 cERROR(1, "sends on sock %p stuck for 15 seconds",
179 ssocket);
180 rc = -EAGAIN; 190 rc = -EAGAIN;
181 break; 191 break;
182 } 192 }
183 msleep(1 << i); 193 msleep(1 << i);
184 continue; 194 continue;
185 } 195 }
196
186 if (rc < 0) 197 if (rc < 0)
187 break; 198 break;
188 199
189 if (rc == total_len) { 200 /* send was at least partially successful */
190 total_len = 0; 201 *sent += rc;
202
203 if (rc == remaining) {
204 remaining = 0;
191 break; 205 break;
192 } else if (rc > total_len) { 206 }
193 cERROR(1, "sent %d requested %d", rc, total_len); 207
208 if (rc > remaining) {
209 cERROR(1, "sent %d requested %d", rc, remaining);
194 break; 210 break;
195 } 211 }
212
196 if (rc == 0) { 213 if (rc == 0) {
197 /* should never happen, letting socket clear before 214 /* should never happen, letting socket clear before
198 retrying is our only obvious option here */ 215 retrying is our only obvious option here */
@@ -200,7 +217,9 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
200 msleep(500); 217 msleep(500);
201 continue; 218 continue;
202 } 219 }
203 total_len -= rc; 220
221 remaining -= rc;
222
204 /* the line below resets i */ 223 /* the line below resets i */
205 for (i = first_vec; i < n_vec; i++) { 224 for (i = first_vec; i < n_vec; i++) {
206 if (iov[i].iov_len) { 225 if (iov[i].iov_len) {
@@ -215,16 +234,97 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
215 } 234 }
216 } 235 }
217 } 236 }
237
218 i = 0; /* in case we get ENOSPC on the next send */ 238 i = 0; /* in case we get ENOSPC on the next send */
239 rc = 0;
219 } 240 }
241 return rc;
242}
243
244/**
245 * rqst_page_to_kvec - Turn a slot in the smb_rqst page array into a kvec
246 * @rqst: pointer to smb_rqst
247 * @idx: index into the array of the page
248 * @iov: pointer to struct kvec that will hold the result
249 *
250 * Helper function to convert a slot in the rqst->rq_pages array into a kvec.
251 * The page will be kmapped and the address placed into iov_base. The length
252 * will then be adjusted according to the ptailoff.
253 */
254void
255cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
256 struct kvec *iov)
257{
258 /*
259 * FIXME: We could avoid this kmap altogether if we used
260 * kernel_sendpage instead of kernel_sendmsg. That will only
261 * work if signing is disabled though as sendpage inlines the
262 * page directly into the fraglist. If userspace modifies the
263 * page after we calculate the signature, then the server will
264 * reject it and may break the connection. kernel_sendmsg does
265 * an extra copy of the data and avoids that issue.
266 */
267 iov->iov_base = kmap(rqst->rq_pages[idx]);
268
269 /* if last page, don't send beyond this offset into page */
270 if (idx == (rqst->rq_npages - 1))
271 iov->iov_len = rqst->rq_tailsz;
272 else
273 iov->iov_len = rqst->rq_pagesz;
274}
275
276static int
277smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
278{
279 int rc;
280 struct kvec *iov = rqst->rq_iov;
281 int n_vec = rqst->rq_nvec;
282 unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
283 unsigned int i;
284 size_t total_len = 0, sent;
285 struct socket *ssocket = server->ssocket;
286 int val = 1;
287
288 cFYI(1, "Sending smb: smb_len=%u", smb_buf_length);
289 dump_smb(iov[0].iov_base, iov[0].iov_len);
290
291 /* cork the socket */
292 kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
293 (char *)&val, sizeof(val));
294
295 rc = smb_send_kvec(server, iov, n_vec, &sent);
296 if (rc < 0)
297 goto uncork;
298
299 total_len += sent;
300
301 /* now walk the page array and send each page in it */
302 for (i = 0; i < rqst->rq_npages; i++) {
303 struct kvec p_iov;
304
305 cifs_rqst_page_to_kvec(rqst, i, &p_iov);
306 rc = smb_send_kvec(server, &p_iov, 1, &sent);
307 kunmap(rqst->rq_pages[i]);
308 if (rc < 0)
309 break;
310
311 total_len += sent;
312 }
313
314uncork:
315 /* uncork it */
316 val = 0;
317 kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
318 (char *)&val, sizeof(val));
220 319
221 if ((total_len > 0) && (total_len != smb_buf_length + 4)) { 320 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
222 cFYI(1, "partial send (%d remaining), terminating session", 321 cFYI(1, "partial send (wanted=%u sent=%zu): terminating "
223 total_len); 322 "session", smb_buf_length + 4, total_len);
224 /* If we have only sent part of an SMB then the next SMB 323 /*
225 could be taken as the remainder of this one. We need 324 * If we have only sent part of an SMB then the next SMB could
226 to kill the socket so the server throws away the partial 325 * be taken as the remainder of this one. We need to kill the
227 SMB */ 326 * socket so the server throws away the partial SMB
327 */
228 server->tcpStatus = CifsNeedReconnect; 328 server->tcpStatus = CifsNeedReconnect;
229 } 329 }
230 330
@@ -236,6 +336,15 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
236 return rc; 336 return rc;
237} 337}
238 338
339static int
340smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
341{
342 struct smb_rqst rqst = { .rq_iov = iov,
343 .rq_nvec = n_vec };
344
345 return smb_send_rqst(server, &rqst);
346}
347
239int 348int
240smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, 349smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer,
241 unsigned int smb_buf_length) 350 unsigned int smb_buf_length)
@@ -345,12 +454,11 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
345 return 0; 454 return 0;
346} 455}
347 456
348int 457struct mid_q_entry *
349cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, 458cifs_setup_async_request(struct TCP_Server_Info *server, struct smb_rqst *rqst)
350 unsigned int nvec, struct mid_q_entry **ret_mid)
351{ 459{
352 int rc; 460 int rc;
353 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; 461 struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
354 struct mid_q_entry *mid; 462 struct mid_q_entry *mid;
355 463
356 /* enable signing if server requires it */ 464 /* enable signing if server requires it */
@@ -359,16 +467,15 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
359 467
360 mid = AllocMidQEntry(hdr, server); 468 mid = AllocMidQEntry(hdr, server);
361 if (mid == NULL) 469 if (mid == NULL)
362 return -ENOMEM; 470 return ERR_PTR(-ENOMEM);
363 471
364 rc = cifs_sign_smbv(iov, nvec, server, &mid->sequence_number); 472 rc = cifs_sign_rqst(rqst, server, &mid->sequence_number);
365 if (rc) { 473 if (rc) {
366 DeleteMidQEntry(mid); 474 DeleteMidQEntry(mid);
367 return rc; 475 return ERR_PTR(rc);
368 } 476 }
369 477
370 *ret_mid = mid; 478 return mid;
371 return 0;
372} 479}
373 480
374/* 481/*
@@ -376,9 +483,9 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
376 * the result. Caller is responsible for dealing with timeouts. 483 * the result. Caller is responsible for dealing with timeouts.
377 */ 484 */
378int 485int
379cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, 486cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
380 unsigned int nvec, mid_receive_t *receive, 487 mid_receive_t *receive, mid_callback_t *callback,
381 mid_callback_t *callback, void *cbdata, const int flags) 488 void *cbdata, const int flags)
382{ 489{
383 int rc, timeout, optype; 490 int rc, timeout, optype;
384 struct mid_q_entry *mid; 491 struct mid_q_entry *mid;
@@ -391,12 +498,12 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
391 return rc; 498 return rc;
392 499
393 mutex_lock(&server->srv_mutex); 500 mutex_lock(&server->srv_mutex);
394 rc = server->ops->setup_async_request(server, iov, nvec, &mid); 501 mid = server->ops->setup_async_request(server, rqst);
395 if (rc) { 502 if (IS_ERR(mid)) {
396 mutex_unlock(&server->srv_mutex); 503 mutex_unlock(&server->srv_mutex);
397 add_credits(server, 1, optype); 504 add_credits(server, 1, optype);
398 wake_up(&server->request_q); 505 wake_up(&server->request_q);
399 return rc; 506 return PTR_ERR(mid);
400 } 507 }
401 508
402 mid->receive = receive; 509 mid->receive = receive;
@@ -411,7 +518,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
411 518
412 519
413 cifs_in_send_inc(server); 520 cifs_in_send_inc(server);
414 rc = smb_sendv(server, iov, nvec); 521 rc = smb_send_rqst(server, rqst);
415 cifs_in_send_dec(server); 522 cifs_in_send_dec(server);
416 cifs_save_when_sent(mid); 523 cifs_save_when_sent(mid);
417 mutex_unlock(&server->srv_mutex); 524 mutex_unlock(&server->srv_mutex);
@@ -419,7 +526,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
419 if (rc == 0) 526 if (rc == 0)
420 return 0; 527 return 0;
421 528
422 delete_mid(mid); 529 cifs_delete_mid(mid);
423 add_credits(server, 1, optype); 530 add_credits(server, 1, optype);
424 wake_up(&server->request_q); 531 wake_up(&server->request_q);
425 return rc; 532 return rc;
@@ -503,35 +610,40 @@ cifs_check_receive(struct mid_q_entry *mid, struct TCP_Server_Info *server,
503 /* convert the length into a more usable form */ 610 /* convert the length into a more usable form */
504 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { 611 if (server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
505 struct kvec iov; 612 struct kvec iov;
613 int rc = 0;
614 struct smb_rqst rqst = { .rq_iov = &iov,
615 .rq_nvec = 1 };
506 616
507 iov.iov_base = mid->resp_buf; 617 iov.iov_base = mid->resp_buf;
508 iov.iov_len = len; 618 iov.iov_len = len;
509 /* FIXME: add code to kill session */ 619 /* FIXME: add code to kill session */
510 if (cifs_verify_signature(&iov, 1, server, 620 rc = cifs_verify_signature(&rqst, server,
511 mid->sequence_number + 1) != 0) 621 mid->sequence_number + 1);
512 cERROR(1, "Unexpected SMB signature"); 622 if (rc)
623 cERROR(1, "SMB signature verification returned error = "
624 "%d", rc);
513 } 625 }
514 626
515 /* BB special case reconnect tid and uid here? */ 627 /* BB special case reconnect tid and uid here? */
516 return map_smb_to_linux_error(mid->resp_buf, log_error); 628 return map_smb_to_linux_error(mid->resp_buf, log_error);
517} 629}
518 630
519int 631struct mid_q_entry *
520cifs_setup_request(struct cifs_ses *ses, struct kvec *iov, 632cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
521 unsigned int nvec, struct mid_q_entry **ret_mid)
522{ 633{
523 int rc; 634 int rc;
524 struct smb_hdr *hdr = (struct smb_hdr *)iov[0].iov_base; 635 struct smb_hdr *hdr = (struct smb_hdr *)rqst->rq_iov[0].iov_base;
525 struct mid_q_entry *mid; 636 struct mid_q_entry *mid;
526 637
527 rc = allocate_mid(ses, hdr, &mid); 638 rc = allocate_mid(ses, hdr, &mid);
528 if (rc) 639 if (rc)
529 return rc; 640 return ERR_PTR(rc);
530 rc = cifs_sign_smbv(iov, nvec, ses->server, &mid->sequence_number); 641 rc = cifs_sign_rqst(rqst, ses->server, &mid->sequence_number);
531 if (rc) 642 if (rc) {
532 delete_mid(mid); 643 cifs_delete_mid(mid);
533 *ret_mid = mid; 644 return ERR_PTR(rc);
534 return rc; 645 }
646 return mid;
535} 647}
536 648
537int 649int
@@ -544,6 +656,8 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
544 struct mid_q_entry *midQ; 656 struct mid_q_entry *midQ;
545 char *buf = iov[0].iov_base; 657 char *buf = iov[0].iov_base;
546 unsigned int credits = 1; 658 unsigned int credits = 1;
659 struct smb_rqst rqst = { .rq_iov = iov,
660 .rq_nvec = n_vec };
547 661
548 timeout = flags & CIFS_TIMEOUT_MASK; 662 timeout = flags & CIFS_TIMEOUT_MASK;
549 optype = flags & CIFS_OP_MASK; 663 optype = flags & CIFS_OP_MASK;
@@ -581,13 +695,13 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
581 695
582 mutex_lock(&ses->server->srv_mutex); 696 mutex_lock(&ses->server->srv_mutex);
583 697
584 rc = ses->server->ops->setup_request(ses, iov, n_vec, &midQ); 698 midQ = ses->server->ops->setup_request(ses, &rqst);
585 if (rc) { 699 if (IS_ERR(midQ)) {
586 mutex_unlock(&ses->server->srv_mutex); 700 mutex_unlock(&ses->server->srv_mutex);
587 cifs_small_buf_release(buf); 701 cifs_small_buf_release(buf);
588 /* Update # of requests on wire to server */ 702 /* Update # of requests on wire to server */
589 add_credits(ses->server, 1, optype); 703 add_credits(ses->server, 1, optype);
590 return rc; 704 return PTR_ERR(midQ);
591 } 705 }
592 706
593 midQ->mid_state = MID_REQUEST_SUBMITTED; 707 midQ->mid_state = MID_REQUEST_SUBMITTED;
@@ -649,11 +763,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
649 rc = ses->server->ops->check_receive(midQ, ses->server, 763 rc = ses->server->ops->check_receive(midQ, ses->server,
650 flags & CIFS_LOG_ERROR); 764 flags & CIFS_LOG_ERROR);
651 765
652 /* mark it so buf will not be freed by delete_mid */ 766 /* mark it so buf will not be freed by cifs_delete_mid */
653 if ((flags & CIFS_NO_RESP) == 0) 767 if ((flags & CIFS_NO_RESP) == 0)
654 midQ->resp_buf = NULL; 768 midQ->resp_buf = NULL;
655out: 769out:
656 delete_mid(midQ); 770 cifs_delete_mid(midQ);
657 add_credits(ses->server, credits, optype); 771 add_credits(ses->server, credits, optype);
658 772
659 return rc; 773 return rc;
@@ -759,7 +873,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
759 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); 873 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
760 rc = cifs_check_receive(midQ, ses->server, 0); 874 rc = cifs_check_receive(midQ, ses->server, 0);
761out: 875out:
762 delete_mid(midQ); 876 cifs_delete_mid(midQ);
763 add_credits(ses->server, 1, 0); 877 add_credits(ses->server, 1, 0);
764 878
765 return rc; 879 return rc;
@@ -843,7 +957,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
843 957
844 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); 958 rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number);
845 if (rc) { 959 if (rc) {
846 delete_mid(midQ); 960 cifs_delete_mid(midQ);
847 mutex_unlock(&ses->server->srv_mutex); 961 mutex_unlock(&ses->server->srv_mutex);
848 return rc; 962 return rc;
849 } 963 }
@@ -856,7 +970,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
856 mutex_unlock(&ses->server->srv_mutex); 970 mutex_unlock(&ses->server->srv_mutex);
857 971
858 if (rc < 0) { 972 if (rc < 0) {
859 delete_mid(midQ); 973 cifs_delete_mid(midQ);
860 return rc; 974 return rc;
861 } 975 }
862 976
@@ -877,7 +991,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
877 blocking lock to return. */ 991 blocking lock to return. */
878 rc = send_cancel(ses->server, in_buf, midQ); 992 rc = send_cancel(ses->server, in_buf, midQ);
879 if (rc) { 993 if (rc) {
880 delete_mid(midQ); 994 cifs_delete_mid(midQ);
881 return rc; 995 return rc;
882 } 996 }
883 } else { 997 } else {
@@ -889,7 +1003,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
889 /* If we get -ENOLCK back the lock may have 1003 /* If we get -ENOLCK back the lock may have
890 already been removed. Don't exit in this case. */ 1004 already been removed. Don't exit in this case. */
891 if (rc && rc != -ENOLCK) { 1005 if (rc && rc != -ENOLCK) {
892 delete_mid(midQ); 1006 cifs_delete_mid(midQ);
893 return rc; 1007 return rc;
894 } 1008 }
895 } 1009 }
@@ -926,7 +1040,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
926 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4); 1040 memcpy(out_buf, midQ->resp_buf, *pbytes_returned + 4);
927 rc = cifs_check_receive(midQ, ses->server, 0); 1041 rc = cifs_check_receive(midQ, ses->server, 0);
928out: 1042out:
929 delete_mid(midQ); 1043 cifs_delete_mid(midQ);
930 if (rstart && rc == -EACCES) 1044 if (rstart && rc == -EACCES)
931 return -ERESTARTSYS; 1045 return -ERESTARTSYS;
932 return rc; 1046 return rc;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index f1813120d753..be2aa4909487 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -85,6 +85,11 @@ int coda_init_inodecache(void)
85 85
86void coda_destroy_inodecache(void) 86void coda_destroy_inodecache(void)
87{ 87{
88 /*
89 * Make sure all delayed rcu free inodes are flushed before we
90 * destroy cache.
91 */
92 rcu_barrier();
88 kmem_cache_destroy(coda_inode_cachep); 93 kmem_cache_destroy(coda_inode_cachep);
89} 94}
90 95
@@ -107,43 +112,41 @@ static const struct super_operations coda_super_operations =
107 112
108static int get_device_index(struct coda_mount_data *data) 113static int get_device_index(struct coda_mount_data *data)
109{ 114{
110 struct file *file; 115 struct fd f;
111 struct inode *inode; 116 struct inode *inode;
112 int idx; 117 int idx;
113 118
114 if(data == NULL) { 119 if (data == NULL) {
115 printk("coda_read_super: Bad mount data\n"); 120 printk("coda_read_super: Bad mount data\n");
116 return -1; 121 return -1;
117 } 122 }
118 123
119 if(data->version != CODA_MOUNT_VERSION) { 124 if (data->version != CODA_MOUNT_VERSION) {
120 printk("coda_read_super: Bad mount version\n"); 125 printk("coda_read_super: Bad mount version\n");
121 return -1; 126 return -1;
122 } 127 }
123 128
124 file = fget(data->fd); 129 f = fdget(data->fd);
125 inode = NULL; 130 if (!f.file)
126 if(file) 131 goto Ebadf;
127 inode = file->f_path.dentry->d_inode; 132 inode = f.file->f_path.dentry->d_inode;
128 133 if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
129 if(!inode || !S_ISCHR(inode->i_mode) || 134 fdput(f);
130 imajor(inode) != CODA_PSDEV_MAJOR) { 135 goto Ebadf;
131 if(file)
132 fput(file);
133
134 printk("coda_read_super: Bad file\n");
135 return -1;
136 } 136 }
137 137
138 idx = iminor(inode); 138 idx = iminor(inode);
139 fput(file); 139 fdput(f);
140 140
141 if(idx < 0 || idx >= MAX_CODADEVS) { 141 if (idx < 0 || idx >= MAX_CODADEVS) {
142 printk("coda_read_super: Bad minor number\n"); 142 printk("coda_read_super: Bad minor number\n");
143 return -1; 143 return -1;
144 } 144 }
145 145
146 return idx; 146 return idx;
147Ebadf:
148 printk("coda_read_super: Bad file\n");
149 return -1;
147} 150}
148 151
149static int coda_fill_super(struct super_block *sb, void *data, int silent) 152static int coda_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/compat.c b/fs/compat.c
index 6161255fac45..b7a24d0ca30d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -870,22 +870,20 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
870 struct compat_old_linux_dirent __user *dirent, unsigned int count) 870 struct compat_old_linux_dirent __user *dirent, unsigned int count)
871{ 871{
872 int error; 872 int error;
873 struct file *file; 873 struct fd f = fdget(fd);
874 int fput_needed;
875 struct compat_readdir_callback buf; 874 struct compat_readdir_callback buf;
876 875
877 file = fget_light(fd, &fput_needed); 876 if (!f.file)
878 if (!file)
879 return -EBADF; 877 return -EBADF;
880 878
881 buf.result = 0; 879 buf.result = 0;
882 buf.dirent = dirent; 880 buf.dirent = dirent;
883 881
884 error = vfs_readdir(file, compat_fillonedir, &buf); 882 error = vfs_readdir(f.file, compat_fillonedir, &buf);
885 if (buf.result) 883 if (buf.result)
886 error = buf.result; 884 error = buf.result;
887 885
888 fput_light(file, fput_needed); 886 fdput(f);
889 return error; 887 return error;
890} 888}
891 889
@@ -949,17 +947,16 @@ efault:
949asmlinkage long compat_sys_getdents(unsigned int fd, 947asmlinkage long compat_sys_getdents(unsigned int fd,
950 struct compat_linux_dirent __user *dirent, unsigned int count) 948 struct compat_linux_dirent __user *dirent, unsigned int count)
951{ 949{
952 struct file * file; 950 struct fd f;
953 struct compat_linux_dirent __user * lastdirent; 951 struct compat_linux_dirent __user * lastdirent;
954 struct compat_getdents_callback buf; 952 struct compat_getdents_callback buf;
955 int fput_needed;
956 int error; 953 int error;
957 954
958 if (!access_ok(VERIFY_WRITE, dirent, count)) 955 if (!access_ok(VERIFY_WRITE, dirent, count))
959 return -EFAULT; 956 return -EFAULT;
960 957
961 file = fget_light(fd, &fput_needed); 958 f = fdget(fd);
962 if (!file) 959 if (!f.file)
963 return -EBADF; 960 return -EBADF;
964 961
965 buf.current_dir = dirent; 962 buf.current_dir = dirent;
@@ -967,17 +964,17 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
967 buf.count = count; 964 buf.count = count;
968 buf.error = 0; 965 buf.error = 0;
969 966
970 error = vfs_readdir(file, compat_filldir, &buf); 967 error = vfs_readdir(f.file, compat_filldir, &buf);
971 if (error >= 0) 968 if (error >= 0)
972 error = buf.error; 969 error = buf.error;
973 lastdirent = buf.previous; 970 lastdirent = buf.previous;
974 if (lastdirent) { 971 if (lastdirent) {
975 if (put_user(file->f_pos, &lastdirent->d_off)) 972 if (put_user(f.file->f_pos, &lastdirent->d_off))
976 error = -EFAULT; 973 error = -EFAULT;
977 else 974 else
978 error = count - buf.count; 975 error = count - buf.count;
979 } 976 }
980 fput_light(file, fput_needed); 977 fdput(f);
981 return error; 978 return error;
982} 979}
983 980
@@ -1035,17 +1032,16 @@ efault:
1035asmlinkage long compat_sys_getdents64(unsigned int fd, 1032asmlinkage long compat_sys_getdents64(unsigned int fd,
1036 struct linux_dirent64 __user * dirent, unsigned int count) 1033 struct linux_dirent64 __user * dirent, unsigned int count)
1037{ 1034{
1038 struct file * file; 1035 struct fd f;
1039 struct linux_dirent64 __user * lastdirent; 1036 struct linux_dirent64 __user * lastdirent;
1040 struct compat_getdents_callback64 buf; 1037 struct compat_getdents_callback64 buf;
1041 int fput_needed;
1042 int error; 1038 int error;
1043 1039
1044 if (!access_ok(VERIFY_WRITE, dirent, count)) 1040 if (!access_ok(VERIFY_WRITE, dirent, count))
1045 return -EFAULT; 1041 return -EFAULT;
1046 1042
1047 file = fget_light(fd, &fput_needed); 1043 f = fdget(fd);
1048 if (!file) 1044 if (!f.file)
1049 return -EBADF; 1045 return -EBADF;
1050 1046
1051 buf.current_dir = dirent; 1047 buf.current_dir = dirent;
@@ -1053,18 +1049,18 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
1053 buf.count = count; 1049 buf.count = count;
1054 buf.error = 0; 1050 buf.error = 0;
1055 1051
1056 error = vfs_readdir(file, compat_filldir64, &buf); 1052 error = vfs_readdir(f.file, compat_filldir64, &buf);
1057 if (error >= 0) 1053 if (error >= 0)
1058 error = buf.error; 1054 error = buf.error;
1059 lastdirent = buf.previous; 1055 lastdirent = buf.previous;
1060 if (lastdirent) { 1056 if (lastdirent) {
1061 typeof(lastdirent->d_off) d_off = file->f_pos; 1057 typeof(lastdirent->d_off) d_off = f.file->f_pos;
1062 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1058 if (__put_user_unaligned(d_off, &lastdirent->d_off))
1063 error = -EFAULT; 1059 error = -EFAULT;
1064 else 1060 else
1065 error = count - buf.count; 1061 error = count - buf.count;
1066 } 1062 }
1067 fput_light(file, fput_needed); 1063 fdput(f);
1068 return error; 1064 return error;
1069} 1065}
1070#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ 1066#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
@@ -1152,15 +1148,16 @@ asmlinkage ssize_t
1152compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, 1148compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1153 unsigned long vlen) 1149 unsigned long vlen)
1154{ 1150{
1155 struct file *file; 1151 struct fd f = fdget(fd);
1156 int fput_needed;
1157 ssize_t ret; 1152 ssize_t ret;
1153 loff_t pos;
1158 1154
1159 file = fget_light(fd, &fput_needed); 1155 if (!f.file)
1160 if (!file)
1161 return -EBADF; 1156 return -EBADF;
1162 ret = compat_readv(file, vec, vlen, &file->f_pos); 1157 pos = f.file->f_pos;
1163 fput_light(file, fput_needed); 1158 ret = compat_readv(f.file, vec, vlen, &pos);
1159 f.file->f_pos = pos;
1160 fdput(f);
1164 return ret; 1161 return ret;
1165} 1162}
1166 1163
@@ -1168,19 +1165,18 @@ asmlinkage ssize_t
1168compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec, 1165compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
1169 unsigned long vlen, loff_t pos) 1166 unsigned long vlen, loff_t pos)
1170{ 1167{
1171 struct file *file; 1168 struct fd f;
1172 int fput_needed;
1173 ssize_t ret; 1169 ssize_t ret;
1174 1170
1175 if (pos < 0) 1171 if (pos < 0)
1176 return -EINVAL; 1172 return -EINVAL;
1177 file = fget_light(fd, &fput_needed); 1173 f = fdget(fd);
1178 if (!file) 1174 if (!f.file)
1179 return -EBADF; 1175 return -EBADF;
1180 ret = -ESPIPE; 1176 ret = -ESPIPE;
1181 if (file->f_mode & FMODE_PREAD) 1177 if (f.file->f_mode & FMODE_PREAD)
1182 ret = compat_readv(file, vec, vlen, &pos); 1178 ret = compat_readv(f.file, vec, vlen, &pos);
1183 fput_light(file, fput_needed); 1179 fdput(f);
1184 return ret; 1180 return ret;
1185} 1181}
1186 1182
@@ -1218,15 +1214,16 @@ asmlinkage ssize_t
1218compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, 1214compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1219 unsigned long vlen) 1215 unsigned long vlen)
1220{ 1216{
1221 struct file *file; 1217 struct fd f = fdget(fd);
1222 int fput_needed;
1223 ssize_t ret; 1218 ssize_t ret;
1219 loff_t pos;
1224 1220
1225 file = fget_light(fd, &fput_needed); 1221 if (!f.file)
1226 if (!file)
1227 return -EBADF; 1222 return -EBADF;
1228 ret = compat_writev(file, vec, vlen, &file->f_pos); 1223 pos = f.file->f_pos;
1229 fput_light(file, fput_needed); 1224 ret = compat_writev(f.file, vec, vlen, &pos);
1225 f.file->f_pos = pos;
1226 fdput(f);
1230 return ret; 1227 return ret;
1231} 1228}
1232 1229
@@ -1234,19 +1231,18 @@ asmlinkage ssize_t
1234compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, 1231compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
1235 unsigned long vlen, loff_t pos) 1232 unsigned long vlen, loff_t pos)
1236{ 1233{
1237 struct file *file; 1234 struct fd f;
1238 int fput_needed;
1239 ssize_t ret; 1235 ssize_t ret;
1240 1236
1241 if (pos < 0) 1237 if (pos < 0)
1242 return -EINVAL; 1238 return -EINVAL;
1243 file = fget_light(fd, &fput_needed); 1239 f = fdget(fd);
1244 if (!file) 1240 if (!f.file)
1245 return -EBADF; 1241 return -EBADF;
1246 ret = -ESPIPE; 1242 ret = -ESPIPE;
1247 if (file->f_mode & FMODE_PWRITE) 1243 if (f.file->f_mode & FMODE_PWRITE)
1248 ret = compat_writev(file, vec, vlen, &pos); 1244 ret = compat_writev(f.file, vec, vlen, &pos);
1249 fput_light(file, fput_needed); 1245 fdput(f);
1250 return ret; 1246 return ret;
1251} 1247}
1252 1248
@@ -1796,3 +1792,25 @@ compat_sys_open_by_handle_at(int mountdirfd,
1796 return do_handle_open(mountdirfd, handle, flags); 1792 return do_handle_open(mountdirfd, handle, flags);
1797} 1793}
1798#endif 1794#endif
1795
1796#ifdef __ARCH_WANT_COMPAT_SYS_SENDFILE
1797asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
1798 compat_off_t __user *offset, compat_size_t count)
1799{
1800 loff_t pos;
1801 off_t off;
1802 ssize_t ret;
1803
1804 if (offset) {
1805 if (unlikely(get_user(off, offset)))
1806 return -EFAULT;
1807 pos = off;
1808 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
1809 if (unlikely(put_user(pos, offset)))
1810 return -EFAULT;
1811 return ret;
1812 }
1813
1814 return do_sendfile(out_fd, in_fd, NULL, count, 0);
1815}
1816#endif /* __ARCH_WANT_COMPAT_SYS_SENDFILE */
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 112e45a17e99..a81147e2e4ef 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -38,6 +38,13 @@
38#define elf_addr_t Elf32_Addr 38#define elf_addr_t Elf32_Addr
39 39
40/* 40/*
41 * Some data types as stored in coredump.
42 */
43#define user_long_t compat_long_t
44#define user_siginfo_t compat_siginfo_t
45#define copy_siginfo_to_user copy_siginfo_to_user32
46
47/*
41 * The machine-dependent core note format types are defined in elfcore-compat.h, 48 * The machine-dependent core note format types are defined in elfcore-compat.h,
42 * which requires asm/elf.h to define compat_elf_gregset_t et al. 49 * which requires asm/elf.h to define compat_elf_gregset_t et al.
43 */ 50 */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index debdfe0fc809..f5054025f9da 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -866,6 +866,12 @@ COMPATIBLE_IOCTL(TIOCGPTN)
866COMPATIBLE_IOCTL(TIOCSPTLCK) 866COMPATIBLE_IOCTL(TIOCSPTLCK)
867COMPATIBLE_IOCTL(TIOCSERGETLSR) 867COMPATIBLE_IOCTL(TIOCSERGETLSR)
868COMPATIBLE_IOCTL(TIOCSIG) 868COMPATIBLE_IOCTL(TIOCSIG)
869#ifdef TIOCSRS485
870COMPATIBLE_IOCTL(TIOCSRS485)
871#endif
872#ifdef TIOCGRS485
873COMPATIBLE_IOCTL(TIOCGRS485)
874#endif
869#ifdef TCGETS2 875#ifdef TCGETS2
870COMPATIBLE_IOCTL(TCGETS2) 876COMPATIBLE_IOCTL(TCGETS2)
871COMPATIBLE_IOCTL(TCSETS2) 877COMPATIBLE_IOCTL(TCSETS2)
@@ -897,6 +903,8 @@ COMPATIBLE_IOCTL(KDGKBSENT)
897COMPATIBLE_IOCTL(KDSKBSENT) 903COMPATIBLE_IOCTL(KDSKBSENT)
898COMPATIBLE_IOCTL(KDGKBDIACR) 904COMPATIBLE_IOCTL(KDGKBDIACR)
899COMPATIBLE_IOCTL(KDSKBDIACR) 905COMPATIBLE_IOCTL(KDSKBDIACR)
906COMPATIBLE_IOCTL(KDGKBDIACRUC)
907COMPATIBLE_IOCTL(KDSKBDIACRUC)
900COMPATIBLE_IOCTL(KDKBDREP) 908COMPATIBLE_IOCTL(KDKBDREP)
901COMPATIBLE_IOCTL(KDGKBLED) 909COMPATIBLE_IOCTL(KDGKBLED)
902COMPATIBLE_IOCTL(KDGETLED) 910COMPATIBLE_IOCTL(KDGETLED)
@@ -1531,16 +1539,13 @@ static int compat_ioctl_check_table(unsigned int xcmd)
1531asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1539asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1532 unsigned long arg) 1540 unsigned long arg)
1533{ 1541{
1534 struct file *filp; 1542 struct fd f = fdget(fd);
1535 int error = -EBADF; 1543 int error = -EBADF;
1536 int fput_needed; 1544 if (!f.file)
1537
1538 filp = fget_light(fd, &fput_needed);
1539 if (!filp)
1540 goto out; 1545 goto out;
1541 1546
1542 /* RED-PEN how should LSM module know it's handling 32bit? */ 1547 /* RED-PEN how should LSM module know it's handling 32bit? */
1543 error = security_file_ioctl(filp, cmd, arg); 1548 error = security_file_ioctl(f.file, cmd, arg);
1544 if (error) 1549 if (error)
1545 goto out_fput; 1550 goto out_fput;
1546 1551
@@ -1560,30 +1565,30 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1560#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1565#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
1561 case FS_IOC_RESVSP_32: 1566 case FS_IOC_RESVSP_32:
1562 case FS_IOC_RESVSP64_32: 1567 case FS_IOC_RESVSP64_32:
1563 error = compat_ioctl_preallocate(filp, compat_ptr(arg)); 1568 error = compat_ioctl_preallocate(f.file, compat_ptr(arg));
1564 goto out_fput; 1569 goto out_fput;
1565#else 1570#else
1566 case FS_IOC_RESVSP: 1571 case FS_IOC_RESVSP:
1567 case FS_IOC_RESVSP64: 1572 case FS_IOC_RESVSP64:
1568 error = ioctl_preallocate(filp, compat_ptr(arg)); 1573 error = ioctl_preallocate(f.file, compat_ptr(arg));
1569 goto out_fput; 1574 goto out_fput;
1570#endif 1575#endif
1571 1576
1572 case FIBMAP: 1577 case FIBMAP:
1573 case FIGETBSZ: 1578 case FIGETBSZ:
1574 case FIONREAD: 1579 case FIONREAD:
1575 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 1580 if (S_ISREG(f.file->f_path.dentry->d_inode->i_mode))
1576 break; 1581 break;
1577 /*FALL THROUGH*/ 1582 /*FALL THROUGH*/
1578 1583
1579 default: 1584 default:
1580 if (filp->f_op && filp->f_op->compat_ioctl) { 1585 if (f.file->f_op && f.file->f_op->compat_ioctl) {
1581 error = filp->f_op->compat_ioctl(filp, cmd, arg); 1586 error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
1582 if (error != -ENOIOCTLCMD) 1587 if (error != -ENOIOCTLCMD)
1583 goto out_fput; 1588 goto out_fput;
1584 } 1589 }
1585 1590
1586 if (!filp->f_op || !filp->f_op->unlocked_ioctl) 1591 if (!f.file->f_op || !f.file->f_op->unlocked_ioctl)
1587 goto do_ioctl; 1592 goto do_ioctl;
1588 break; 1593 break;
1589 } 1594 }
@@ -1591,7 +1596,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1591 if (compat_ioctl_check_table(XFORM(cmd))) 1596 if (compat_ioctl_check_table(XFORM(cmd)))
1592 goto found_handler; 1597 goto found_handler;
1593 1598
1594 error = do_ioctl_trans(fd, cmd, arg, filp); 1599 error = do_ioctl_trans(fd, cmd, arg, f.file);
1595 if (error == -ENOIOCTLCMD) 1600 if (error == -ENOIOCTLCMD)
1596 error = -ENOTTY; 1601 error = -ENOTTY;
1597 1602
@@ -1600,9 +1605,9 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1600 found_handler: 1605 found_handler:
1601 arg = (unsigned long)compat_ptr(arg); 1606 arg = (unsigned long)compat_ptr(arg);
1602 do_ioctl: 1607 do_ioctl:
1603 error = do_vfs_ioctl(filp, fd, cmd, arg); 1608 error = do_vfs_ioctl(f.file, fd, cmd, arg);
1604 out_fput: 1609 out_fput:
1605 fput_light(filp, fput_needed); 1610 fdput(f);
1606 out: 1611 out:
1607 return error; 1612 return error;
1608} 1613}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 0074362d9f7f..a9d35b0e06cf 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -79,8 +79,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
79 return -ENOMEM; 79 return -ENOMEM;
80 /* assign default attributes */ 80 /* assign default attributes */
81 sd_iattr->ia_mode = sd->s_mode; 81 sd_iattr->ia_mode = sd->s_mode;
82 sd_iattr->ia_uid = 0; 82 sd_iattr->ia_uid = GLOBAL_ROOT_UID;
83 sd_iattr->ia_gid = 0; 83 sd_iattr->ia_gid = GLOBAL_ROOT_GID;
84 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; 84 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
85 sd->s_iattr = sd_iattr; 85 sd->s_iattr = sd_iattr;
86 } 86 }
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 000000000000..fd37facac8dc
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,692 @@
1#include <linux/slab.h>
2#include <linux/file.h>
3#include <linux/fdtable.h>
4#include <linux/mm.h>
5#include <linux/stat.h>
6#include <linux/fcntl.h>
7#include <linux/swap.h>
8#include <linux/string.h>
9#include <linux/init.h>
10#include <linux/pagemap.h>
11#include <linux/perf_event.h>
12#include <linux/highmem.h>
13#include <linux/spinlock.h>
14#include <linux/key.h>
15#include <linux/personality.h>
16#include <linux/binfmts.h>
17#include <linux/coredump.h>
18#include <linux/utsname.h>
19#include <linux/pid_namespace.h>
20#include <linux/module.h>
21#include <linux/namei.h>
22#include <linux/mount.h>
23#include <linux/security.h>
24#include <linux/syscalls.h>
25#include <linux/tsacct_kern.h>
26#include <linux/cn_proc.h>
27#include <linux/audit.h>
28#include <linux/tracehook.h>
29#include <linux/kmod.h>
30#include <linux/fsnotify.h>
31#include <linux/fs_struct.h>
32#include <linux/pipe_fs_i.h>
33#include <linux/oom.h>
34#include <linux/compat.h>
35
36#include <asm/uaccess.h>
37#include <asm/mmu_context.h>
38#include <asm/tlb.h>
39#include <asm/exec.h>
40
41#include <trace/events/task.h>
42#include "internal.h"
43#include "coredump.h"
44
45#include <trace/events/sched.h>
46
47int core_uses_pid;
48char core_pattern[CORENAME_MAX_SIZE] = "core";
49unsigned int core_pipe_limit;
50
51struct core_name {
52 char *corename;
53 int used, size;
54};
55static atomic_t call_count = ATOMIC_INIT(1);
56
57/* The maximal length of core_pattern is also specified in sysctl.c */
58
59static int expand_corename(struct core_name *cn)
60{
61 char *old_corename = cn->corename;
62
63 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
64 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
65
66 if (!cn->corename) {
67 kfree(old_corename);
68 return -ENOMEM;
69 }
70
71 return 0;
72}
73
74static int cn_printf(struct core_name *cn, const char *fmt, ...)
75{
76 char *cur;
77 int need;
78 int ret;
79 va_list arg;
80
81 va_start(arg, fmt);
82 need = vsnprintf(NULL, 0, fmt, arg);
83 va_end(arg);
84
85 if (likely(need < cn->size - cn->used - 1))
86 goto out_printf;
87
88 ret = expand_corename(cn);
89 if (ret)
90 goto expand_fail;
91
92out_printf:
93 cur = cn->corename + cn->used;
94 va_start(arg, fmt);
95 vsnprintf(cur, need + 1, fmt, arg);
96 va_end(arg);
97 cn->used += need;
98 return 0;
99
100expand_fail:
101 return ret;
102}
103
104static void cn_escape(char *str)
105{
106 for (; *str; str++)
107 if (*str == '/')
108 *str = '!';
109}
110
111static int cn_print_exe_file(struct core_name *cn)
112{
113 struct file *exe_file;
114 char *pathbuf, *path;
115 int ret;
116
117 exe_file = get_mm_exe_file(current->mm);
118 if (!exe_file) {
119 char *commstart = cn->corename + cn->used;
120 ret = cn_printf(cn, "%s (path unknown)", current->comm);
121 cn_escape(commstart);
122 return ret;
123 }
124
125 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
126 if (!pathbuf) {
127 ret = -ENOMEM;
128 goto put_exe_file;
129 }
130
131 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
132 if (IS_ERR(path)) {
133 ret = PTR_ERR(path);
134 goto free_buf;
135 }
136
137 cn_escape(path);
138
139 ret = cn_printf(cn, "%s", path);
140
141free_buf:
142 kfree(pathbuf);
143put_exe_file:
144 fput(exe_file);
145 return ret;
146}
147
148/* format_corename will inspect the pattern parameter, and output a
149 * name into corename, which must have space for at least
150 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
151 */
152static int format_corename(struct core_name *cn, struct coredump_params *cprm)
153{
154 const struct cred *cred = current_cred();
155 const char *pat_ptr = core_pattern;
156 int ispipe = (*pat_ptr == '|');
157 int pid_in_pattern = 0;
158 int err = 0;
159
160 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
161 cn->corename = kmalloc(cn->size, GFP_KERNEL);
162 cn->used = 0;
163
164 if (!cn->corename)
165 return -ENOMEM;
166
167 /* Repeat as long as we have more pattern to process and more output
168 space */
169 while (*pat_ptr) {
170 if (*pat_ptr != '%') {
171 if (*pat_ptr == 0)
172 goto out;
173 err = cn_printf(cn, "%c", *pat_ptr++);
174 } else {
175 switch (*++pat_ptr) {
176 /* single % at the end, drop that */
177 case 0:
178 goto out;
179 /* Double percent, output one percent */
180 case '%':
181 err = cn_printf(cn, "%c", '%');
182 break;
183 /* pid */
184 case 'p':
185 pid_in_pattern = 1;
186 err = cn_printf(cn, "%d",
187 task_tgid_vnr(current));
188 break;
189 /* uid */
190 case 'u':
191 err = cn_printf(cn, "%d", cred->uid);
192 break;
193 /* gid */
194 case 'g':
195 err = cn_printf(cn, "%d", cred->gid);
196 break;
197 case 'd':
198 err = cn_printf(cn, "%d",
199 __get_dumpable(cprm->mm_flags));
200 break;
201 /* signal that caused the coredump */
202 case 's':
203 err = cn_printf(cn, "%ld", cprm->siginfo->si_signo);
204 break;
205 /* UNIX time of coredump */
206 case 't': {
207 struct timeval tv;
208 do_gettimeofday(&tv);
209 err = cn_printf(cn, "%lu", tv.tv_sec);
210 break;
211 }
212 /* hostname */
213 case 'h': {
214 char *namestart = cn->corename + cn->used;
215 down_read(&uts_sem);
216 err = cn_printf(cn, "%s",
217 utsname()->nodename);
218 up_read(&uts_sem);
219 cn_escape(namestart);
220 break;
221 }
222 /* executable */
223 case 'e': {
224 char *commstart = cn->corename + cn->used;
225 err = cn_printf(cn, "%s", current->comm);
226 cn_escape(commstart);
227 break;
228 }
229 case 'E':
230 err = cn_print_exe_file(cn);
231 break;
232 /* core limit size */
233 case 'c':
234 err = cn_printf(cn, "%lu",
235 rlimit(RLIMIT_CORE));
236 break;
237 default:
238 break;
239 }
240 ++pat_ptr;
241 }
242
243 if (err)
244 return err;
245 }
246
247 /* Backward compatibility with core_uses_pid:
248 *
249 * If core_pattern does not include a %p (as is the default)
250 * and core_uses_pid is set, then .%pid will be appended to
251 * the filename. Do not do this for piped commands. */
252 if (!ispipe && !pid_in_pattern && core_uses_pid) {
253 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
254 if (err)
255 return err;
256 }
257out:
258 return ispipe;
259}
260
261static int zap_process(struct task_struct *start, int exit_code)
262{
263 struct task_struct *t;
264 int nr = 0;
265
266 start->signal->flags = SIGNAL_GROUP_EXIT;
267 start->signal->group_exit_code = exit_code;
268 start->signal->group_stop_count = 0;
269
270 t = start;
271 do {
272 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
273 if (t != current && t->mm) {
274 sigaddset(&t->pending.signal, SIGKILL);
275 signal_wake_up(t, 1);
276 nr++;
277 }
278 } while_each_thread(start, t);
279
280 return nr;
281}
282
283static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
284 struct core_state *core_state, int exit_code)
285{
286 struct task_struct *g, *p;
287 unsigned long flags;
288 int nr = -EAGAIN;
289
290 spin_lock_irq(&tsk->sighand->siglock);
291 if (!signal_group_exit(tsk->signal)) {
292 mm->core_state = core_state;
293 nr = zap_process(tsk, exit_code);
294 }
295 spin_unlock_irq(&tsk->sighand->siglock);
296 if (unlikely(nr < 0))
297 return nr;
298
299 if (atomic_read(&mm->mm_users) == nr + 1)
300 goto done;
301 /*
302 * We should find and kill all tasks which use this mm, and we should
303 * count them correctly into ->nr_threads. We don't take tasklist
304 * lock, but this is safe wrt:
305 *
306 * fork:
307 * None of sub-threads can fork after zap_process(leader). All
308 * processes which were created before this point should be
309 * visible to zap_threads() because copy_process() adds the new
310 * process to the tail of init_task.tasks list, and lock/unlock
311 * of ->siglock provides a memory barrier.
312 *
313 * do_exit:
314 * The caller holds mm->mmap_sem. This means that the task which
315 * uses this mm can't pass exit_mm(), so it can't exit or clear
316 * its ->mm.
317 *
318 * de_thread:
319 * It does list_replace_rcu(&leader->tasks, &current->tasks),
320 * we must see either old or new leader, this does not matter.
321 * However, it can change p->sighand, so lock_task_sighand(p)
322 * must be used. Since p->mm != NULL and we hold ->mmap_sem
323 * it can't fail.
324 *
325 * Note also that "g" can be the old leader with ->mm == NULL
326 * and already unhashed and thus removed from ->thread_group.
327 * This is OK, __unhash_process()->list_del_rcu() does not
328 * clear the ->next pointer, we will find the new leader via
329 * next_thread().
330 */
331 rcu_read_lock();
332 for_each_process(g) {
333 if (g == tsk->group_leader)
334 continue;
335 if (g->flags & PF_KTHREAD)
336 continue;
337 p = g;
338 do {
339 if (p->mm) {
340 if (unlikely(p->mm == mm)) {
341 lock_task_sighand(p, &flags);
342 nr += zap_process(p, exit_code);
343 unlock_task_sighand(p, &flags);
344 }
345 break;
346 }
347 } while_each_thread(g, p);
348 }
349 rcu_read_unlock();
350done:
351 atomic_set(&core_state->nr_threads, nr);
352 return nr;
353}
354
355static int coredump_wait(int exit_code, struct core_state *core_state)
356{
357 struct task_struct *tsk = current;
358 struct mm_struct *mm = tsk->mm;
359 int core_waiters = -EBUSY;
360
361 init_completion(&core_state->startup);
362 core_state->dumper.task = tsk;
363 core_state->dumper.next = NULL;
364
365 down_write(&mm->mmap_sem);
366 if (!mm->core_state)
367 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
368 up_write(&mm->mmap_sem);
369
370 if (core_waiters > 0) {
371 struct core_thread *ptr;
372
373 wait_for_completion(&core_state->startup);
374 /*
375 * Wait for all the threads to become inactive, so that
376 * all the thread context (extended register state, like
377 * fpu etc) gets copied to the memory.
378 */
379 ptr = core_state->dumper.next;
380 while (ptr != NULL) {
381 wait_task_inactive(ptr->task, 0);
382 ptr = ptr->next;
383 }
384 }
385
386 return core_waiters;
387}
388
389static void coredump_finish(struct mm_struct *mm)
390{
391 struct core_thread *curr, *next;
392 struct task_struct *task;
393
394 next = mm->core_state->dumper.next;
395 while ((curr = next) != NULL) {
396 next = curr->next;
397 task = curr->task;
398 /*
399 * see exit_mm(), curr->task must not see
400 * ->task == NULL before we read ->next.
401 */
402 smp_mb();
403 curr->task = NULL;
404 wake_up_process(task);
405 }
406
407 mm->core_state = NULL;
408}
409
410static void wait_for_dump_helpers(struct file *file)
411{
412 struct pipe_inode_info *pipe;
413
414 pipe = file->f_path.dentry->d_inode->i_pipe;
415
416 pipe_lock(pipe);
417 pipe->readers++;
418 pipe->writers--;
419
420 while ((pipe->readers > 1) && (!signal_pending(current))) {
421 wake_up_interruptible_sync(&pipe->wait);
422 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
423 pipe_wait(pipe);
424 }
425
426 pipe->readers--;
427 pipe->writers++;
428 pipe_unlock(pipe);
429
430}
431
432/*
433 * umh_pipe_setup
434 * helper function to customize the process used
435 * to collect the core in userspace. Specifically
436 * it sets up a pipe and installs it as fd 0 (stdin)
437 * for the process. Returns 0 on success, or
438 * PTR_ERR on failure.
439 * Note that it also sets the core limit to 1. This
440 * is a special value that we use to trap recursive
441 * core dumps
442 */
443static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
444{
445 struct file *files[2];
446 struct coredump_params *cp = (struct coredump_params *)info->data;
447 int err = create_pipe_files(files, 0);
448 if (err)
449 return err;
450
451 cp->file = files[1];
452
453 replace_fd(0, files[0], 0);
454 /* and disallow core files too */
455 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
456
457 return 0;
458}
459
460void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
461{
462 struct core_state core_state;
463 struct core_name cn;
464 struct mm_struct *mm = current->mm;
465 struct linux_binfmt * binfmt;
466 const struct cred *old_cred;
467 struct cred *cred;
468 int retval = 0;
469 int flag = 0;
470 int ispipe;
471 struct files_struct *displaced;
472 bool need_nonrelative = false;
473 static atomic_t core_dump_count = ATOMIC_INIT(0);
474 struct coredump_params cprm = {
475 .siginfo = siginfo,
476 .regs = regs,
477 .limit = rlimit(RLIMIT_CORE),
478 /*
479 * We must use the same mm->flags while dumping core to avoid
480 * inconsistency of bit flags, since this flag is not protected
481 * by any locks.
482 */
483 .mm_flags = mm->flags,
484 };
485
486 audit_core_dumps(siginfo->si_signo);
487
488 binfmt = mm->binfmt;
489 if (!binfmt || !binfmt->core_dump)
490 goto fail;
491 if (!__get_dumpable(cprm.mm_flags))
492 goto fail;
493
494 cred = prepare_creds();
495 if (!cred)
496 goto fail;
497 /*
498 * We cannot trust fsuid as being the "true" uid of the process
499 * nor do we know its entire history. We only know it was tainted
500 * so we dump it as root in mode 2, and only into a controlled
501 * environment (pipe handler or fully qualified path).
502 */
503 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
504 /* Setuid core dump mode */
505 flag = O_EXCL; /* Stop rewrite attacks */
506 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
507 need_nonrelative = true;
508 }
509
510 retval = coredump_wait(siginfo->si_signo, &core_state);
511 if (retval < 0)
512 goto fail_creds;
513
514 old_cred = override_creds(cred);
515
516 /*
517 * Clear any false indication of pending signals that might
518 * be seen by the filesystem code called to write the core file.
519 */
520 clear_thread_flag(TIF_SIGPENDING);
521
522 ispipe = format_corename(&cn, &cprm);
523
524 if (ispipe) {
525 int dump_count;
526 char **helper_argv;
527
528 if (ispipe < 0) {
529 printk(KERN_WARNING "format_corename failed\n");
530 printk(KERN_WARNING "Aborting core\n");
531 goto fail_corename;
532 }
533
534 if (cprm.limit == 1) {
535 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
536 *
537 * Normally core limits are irrelevant to pipes, since
538 * we're not writing to the file system, but we use
539 * cprm.limit of 1 here as a speacial value, this is a
540 * consistent way to catch recursive crashes.
541 * We can still crash if the core_pattern binary sets
542 * RLIM_CORE = !1, but it runs as root, and can do
543 * lots of stupid things.
544 *
545 * Note that we use task_tgid_vnr here to grab the pid
546 * of the process group leader. That way we get the
547 * right pid if a thread in a multi-threaded
548 * core_pattern process dies.
549 */
550 printk(KERN_WARNING
551 "Process %d(%s) has RLIMIT_CORE set to 1\n",
552 task_tgid_vnr(current), current->comm);
553 printk(KERN_WARNING "Aborting core\n");
554 goto fail_unlock;
555 }
556 cprm.limit = RLIM_INFINITY;
557
558 dump_count = atomic_inc_return(&core_dump_count);
559 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
560 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
561 task_tgid_vnr(current), current->comm);
562 printk(KERN_WARNING "Skipping core dump\n");
563 goto fail_dropcount;
564 }
565
566 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
567 if (!helper_argv) {
568 printk(KERN_WARNING "%s failed to allocate memory\n",
569 __func__);
570 goto fail_dropcount;
571 }
572
573 retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
574 NULL, UMH_WAIT_EXEC, umh_pipe_setup,
575 NULL, &cprm);
576 argv_free(helper_argv);
577 if (retval) {
578 printk(KERN_INFO "Core dump to %s pipe failed\n",
579 cn.corename);
580 goto close_fail;
581 }
582 } else {
583 struct inode *inode;
584
585 if (cprm.limit < binfmt->min_coredump)
586 goto fail_unlock;
587
588 if (need_nonrelative && cn.corename[0] != '/') {
589 printk(KERN_WARNING "Pid %d(%s) can only dump core "\
590 "to fully qualified path!\n",
591 task_tgid_vnr(current), current->comm);
592 printk(KERN_WARNING "Skipping core dump\n");
593 goto fail_unlock;
594 }
595
596 cprm.file = filp_open(cn.corename,
597 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
598 0600);
599 if (IS_ERR(cprm.file))
600 goto fail_unlock;
601
602 inode = cprm.file->f_path.dentry->d_inode;
603 if (inode->i_nlink > 1)
604 goto close_fail;
605 if (d_unhashed(cprm.file->f_path.dentry))
606 goto close_fail;
607 /*
608 * AK: actually i see no reason to not allow this for named
609 * pipes etc, but keep the previous behaviour for now.
610 */
611 if (!S_ISREG(inode->i_mode))
612 goto close_fail;
613 /*
614 * Dont allow local users get cute and trick others to coredump
615 * into their pre-created files.
616 */
617 if (!uid_eq(inode->i_uid, current_fsuid()))
618 goto close_fail;
619 if (!cprm.file->f_op || !cprm.file->f_op->write)
620 goto close_fail;
621 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
622 goto close_fail;
623 }
624
625 /* get us an unshared descriptor table; almost always a no-op */
626 retval = unshare_files(&displaced);
627 if (retval)
628 goto close_fail;
629 if (displaced)
630 put_files_struct(displaced);
631 retval = binfmt->core_dump(&cprm);
632 if (retval)
633 current->signal->group_exit_code |= 0x80;
634
635 if (ispipe && core_pipe_limit)
636 wait_for_dump_helpers(cprm.file);
637close_fail:
638 if (cprm.file)
639 filp_close(cprm.file, NULL);
640fail_dropcount:
641 if (ispipe)
642 atomic_dec(&core_dump_count);
643fail_unlock:
644 kfree(cn.corename);
645fail_corename:
646 coredump_finish(mm);
647 revert_creds(old_cred);
648fail_creds:
649 put_cred(cred);
650fail:
651 return;
652}
653
654/*
655 * Core dumping helper functions. These are the only things you should
656 * do on a core-file: use only these functions to write out all the
657 * necessary info.
658 */
659int dump_write(struct file *file, const void *addr, int nr)
660{
661 return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
662}
663EXPORT_SYMBOL(dump_write);
664
665int dump_seek(struct file *file, loff_t off)
666{
667 int ret = 1;
668
669 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
670 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
671 return 0;
672 } else {
673 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
674
675 if (!buf)
676 return 0;
677 while (off > 0) {
678 unsigned long n = off;
679
680 if (n > PAGE_SIZE)
681 n = PAGE_SIZE;
682 if (!dump_write(file, buf, n)) {
683 ret = 0;
684 break;
685 }
686 off -= n;
687 }
688 free_page((unsigned long)buf);
689 }
690 return ret;
691}
692EXPORT_SYMBOL(dump_seek);
diff --git a/fs/coredump.h b/fs/coredump.h
new file mode 100644
index 000000000000..e39ff072110d
--- /dev/null
+++ b/fs/coredump.h
@@ -0,0 +1,6 @@
1#ifndef _FS_COREDUMP_H
2#define _FS_COREDUMP_H
3
4extern int __get_dumpable(unsigned long mm_flags);
5
6#endif
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 28cca01ca9c9..c6c3f91ecf06 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -90,8 +90,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
90 } 90 }
91 91
92 inode->i_mode = cramfs_inode->mode; 92 inode->i_mode = cramfs_inode->mode;
93 inode->i_uid = cramfs_inode->uid; 93 i_uid_write(inode, cramfs_inode->uid);
94 inode->i_gid = cramfs_inode->gid; 94 i_gid_write(inode, cramfs_inode->gid);
95 95
96 /* if the lower 2 bits are zero, the inode contains data */ 96 /* if the lower 2 bits are zero, the inode contains data */
97 if (!(inode->i_ino & 3)) { 97 if (!(inode->i_ino & 3)) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 8086636bf796..3a463d0c4fe8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
389 * Inform try_to_ascend() that we are no longer attached to the 389 * Inform try_to_ascend() that we are no longer attached to the
390 * dentry tree 390 * dentry tree
391 */ 391 */
392 dentry->d_flags |= DCACHE_DISCONNECTED; 392 dentry->d_flags |= DCACHE_DENTRY_KILLED;
393 if (parent) 393 if (parent)
394 spin_unlock(&parent->d_lock); 394 spin_unlock(&parent->d_lock);
395 dentry_iput(dentry); 395 dentry_iput(dentry);
@@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
1048 * or deletion 1048 * or deletion
1049 */ 1049 */
1050 if (new != old->d_parent || 1050 if (new != old->d_parent ||
1051 (old->d_flags & DCACHE_DISCONNECTED) || 1051 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1052 (!locked && read_seqretry(&rename_lock, seq))) { 1052 (!locked && read_seqretry(&rename_lock, seq))) {
1053 spin_unlock(&new->d_lock); 1053 spin_unlock(&new->d_lock);
1054 new = NULL; 1054 new = NULL;
@@ -1134,6 +1134,8 @@ positive:
1134 return 1; 1134 return 1;
1135 1135
1136rename_retry: 1136rename_retry:
1137 if (locked)
1138 goto again;
1137 locked = 1; 1139 locked = 1;
1138 write_seqlock(&rename_lock); 1140 write_seqlock(&rename_lock);
1139 goto again; 1141 goto again;
@@ -1141,7 +1143,7 @@ rename_retry:
1141EXPORT_SYMBOL(have_submounts); 1143EXPORT_SYMBOL(have_submounts);
1142 1144
1143/* 1145/*
1144 * Search the dentry child list for the specified parent, 1146 * Search the dentry child list of the specified parent,
1145 * and move any unused dentries to the end of the unused 1147 * and move any unused dentries to the end of the unused
1146 * list for prune_dcache(). We descend to the next level 1148 * list for prune_dcache(). We descend to the next level
1147 * whenever the d_subdirs list is non-empty and continue 1149 * whenever the d_subdirs list is non-empty and continue
@@ -1236,6 +1238,8 @@ out:
1236rename_retry: 1238rename_retry:
1237 if (found) 1239 if (found)
1238 return found; 1240 return found;
1241 if (locked)
1242 goto again;
1239 locked = 1; 1243 locked = 1;
1240 write_seqlock(&rename_lock); 1244 write_seqlock(&rename_lock);
1241 goto again; 1245 goto again;
@@ -2109,7 +2113,7 @@ again:
2109 inode = dentry->d_inode; 2113 inode = dentry->d_inode;
2110 isdir = S_ISDIR(inode->i_mode); 2114 isdir = S_ISDIR(inode->i_mode);
2111 if (dentry->d_count == 1) { 2115 if (dentry->d_count == 1) {
2112 if (inode && !spin_trylock(&inode->i_lock)) { 2116 if (!spin_trylock(&inode->i_lock)) {
2113 spin_unlock(&dentry->d_lock); 2117 spin_unlock(&dentry->d_lock);
2114 cpu_relax(); 2118 cpu_relax();
2115 goto again; 2119 goto again;
@@ -3035,6 +3039,8 @@ resume:
3035 return; 3039 return;
3036 3040
3037rename_retry: 3041rename_retry:
3042 if (locked)
3043 goto again;
3038 locked = 1; 3044 locked = 1;
3039 write_seqlock(&rename_lock); 3045 write_seqlock(&rename_lock);
3040 goto again; 3046 goto again;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 2340f6978d6e..c5ca6ae5a30c 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -526,73 +526,51 @@ struct array_data {
526 u32 elements; 526 u32 elements;
527}; 527};
528 528
529static int u32_array_open(struct inode *inode, struct file *file) 529static size_t u32_format_array(char *buf, size_t bufsize,
530{ 530 u32 *array, int array_size)
531 file->private_data = NULL;
532 return nonseekable_open(inode, file);
533}
534
535static size_t format_array(char *buf, size_t bufsize, const char *fmt,
536 u32 *array, u32 array_size)
537{ 531{
538 size_t ret = 0; 532 size_t ret = 0;
539 u32 i;
540 533
541 for (i = 0; i < array_size; i++) { 534 while (--array_size >= 0) {
542 size_t len; 535 size_t len;
536 char term = array_size ? ' ' : '\n';
543 537
544 len = snprintf(buf, bufsize, fmt, array[i]); 538 len = snprintf(buf, bufsize, "%u%c", *array++, term);
545 len++; /* ' ' or '\n' */
546 ret += len; 539 ret += len;
547 540
548 if (buf) { 541 buf += len;
549 buf += len; 542 bufsize -= len;
550 bufsize -= len;
551 buf[-1] = (i == array_size-1) ? '\n' : ' ';
552 }
553 } 543 }
554
555 ret++; /* \0 */
556 if (buf)
557 *buf = '\0';
558
559 return ret; 544 return ret;
560} 545}
561 546
562static char *format_array_alloc(const char *fmt, u32 *array, 547static int u32_array_open(struct inode *inode, struct file *file)
563 u32 array_size)
564{ 548{
565 size_t len = format_array(NULL, 0, fmt, array, array_size); 549 struct array_data *data = inode->i_private;
566 char *ret; 550 int size, elements = data->elements;
567 551 char *buf;
568 ret = kmalloc(len, GFP_KERNEL); 552
569 if (ret == NULL) 553 /*
570 return NULL; 554 * Max size:
555 * - 10 digits + ' '/'\n' = 11 bytes per number
556 * - terminating NUL character
557 */
558 size = elements*11;
559 buf = kmalloc(size+1, GFP_KERNEL);
560 if (!buf)
561 return -ENOMEM;
562 buf[size] = 0;
563
564 file->private_data = buf;
565 u32_format_array(buf, size, data->array, data->elements);
571 566
572 format_array(ret, len, fmt, array, array_size); 567 return nonseekable_open(inode, file);
573 return ret;
574} 568}
575 569
576static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, 570static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
577 loff_t *ppos) 571 loff_t *ppos)
578{ 572{
579 struct inode *inode = file->f_path.dentry->d_inode; 573 size_t size = strlen(file->private_data);
580 struct array_data *data = inode->i_private;
581 size_t size;
582
583 if (*ppos == 0) {
584 if (file->private_data) {
585 kfree(file->private_data);
586 file->private_data = NULL;
587 }
588
589 file->private_data = format_array_alloc("%u", data->array,
590 data->elements);
591 }
592
593 size = 0;
594 if (file->private_data)
595 size = strlen(file->private_data);
596 574
597 return simple_read_from_buffer(buf, len, ppos, 575 return simple_read_from_buffer(buf, len, ppos,
598 file->private_data, size); 576 file->private_data, size);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4733eab34a23..b607d92cdf24 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -28,7 +28,7 @@
28#include <linux/magic.h> 28#include <linux/magic.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30 30
31#define DEBUGFS_DEFAULT_MODE 0755 31#define DEBUGFS_DEFAULT_MODE 0700
32 32
33static struct vfsmount *debugfs_mount; 33static struct vfsmount *debugfs_mount;
34static int debugfs_mount_count; 34static int debugfs_mount_count;
@@ -128,8 +128,8 @@ static inline int debugfs_positive(struct dentry *dentry)
128} 128}
129 129
130struct debugfs_mount_opts { 130struct debugfs_mount_opts {
131 uid_t uid; 131 kuid_t uid;
132 gid_t gid; 132 kgid_t gid;
133 umode_t mode; 133 umode_t mode;
134}; 134};
135 135
@@ -156,6 +156,8 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
156 substring_t args[MAX_OPT_ARGS]; 156 substring_t args[MAX_OPT_ARGS];
157 int option; 157 int option;
158 int token; 158 int token;
159 kuid_t uid;
160 kgid_t gid;
159 char *p; 161 char *p;
160 162
161 opts->mode = DEBUGFS_DEFAULT_MODE; 163 opts->mode = DEBUGFS_DEFAULT_MODE;
@@ -169,12 +171,18 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
169 case Opt_uid: 171 case Opt_uid:
170 if (match_int(&args[0], &option)) 172 if (match_int(&args[0], &option))
171 return -EINVAL; 173 return -EINVAL;
172 opts->uid = option; 174 uid = make_kuid(current_user_ns(), option);
175 if (!uid_valid(uid))
176 return -EINVAL;
177 opts->uid = uid;
173 break; 178 break;
174 case Opt_gid: 179 case Opt_gid:
175 if (match_octal(&args[0], &option)) 180 if (match_octal(&args[0], &option))
176 return -EINVAL; 181 return -EINVAL;
177 opts->gid = option; 182 gid = make_kgid(current_user_ns(), option);
183 if (!gid_valid(gid))
184 return -EINVAL;
185 opts->gid = gid;
178 break; 186 break;
179 case Opt_mode: 187 case Opt_mode:
180 if (match_octal(&args[0], &option)) 188 if (match_octal(&args[0], &option))
@@ -226,10 +234,12 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
226 struct debugfs_fs_info *fsi = root->d_sb->s_fs_info; 234 struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
227 struct debugfs_mount_opts *opts = &fsi->mount_opts; 235 struct debugfs_mount_opts *opts = &fsi->mount_opts;
228 236
229 if (opts->uid != 0) 237 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
230 seq_printf(m, ",uid=%u", opts->uid); 238 seq_printf(m, ",uid=%u",
231 if (opts->gid != 0) 239 from_kuid_munged(&init_user_ns, opts->uid));
232 seq_printf(m, ",gid=%u", opts->gid); 240 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
241 seq_printf(m, ",gid=%u",
242 from_kgid_munged(&init_user_ns, opts->gid));
233 if (opts->mode != DEBUGFS_DEFAULT_MODE) 243 if (opts->mode != DEBUGFS_DEFAULT_MODE)
234 seq_printf(m, ",mode=%o", opts->mode); 244 seq_printf(m, ",mode=%o", opts->mode);
235 245
@@ -291,9 +301,9 @@ static struct file_system_type debug_fs_type = {
291 .kill_sb = kill_litter_super, 301 .kill_sb = kill_litter_super,
292}; 302};
293 303
294struct dentry *__create_file(const char *name, umode_t mode, 304static struct dentry *__create_file(const char *name, umode_t mode,
295 struct dentry *parent, void *data, 305 struct dentry *parent, void *data,
296 const struct file_operations *fops) 306 const struct file_operations *fops)
297{ 307{
298 struct dentry *dentry = NULL; 308 struct dentry *dentry = NULL;
299 int error; 309 int error;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1faf4cb56f39..f86c720dba0e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1062 unsigned long user_addr; 1062 unsigned long user_addr;
1063 size_t bytes; 1063 size_t bytes;
1064 struct buffer_head map_bh = { 0, }; 1064 struct buffer_head map_bh = { 0, };
1065 struct blk_plug plug;
1065 1066
1066 if (rw & WRITE) 1067 if (rw & WRITE)
1067 rw = WRITE_ODIRECT; 1068 rw = WRITE_ODIRECT;
@@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1177 PAGE_SIZE - user_addr / PAGE_SIZE); 1178 PAGE_SIZE - user_addr / PAGE_SIZE);
1178 } 1179 }
1179 1180
1181 blk_start_plug(&plug);
1182
1180 for (seg = 0; seg < nr_segs; seg++) { 1183 for (seg = 0; seg < nr_segs; seg++) {
1181 user_addr = (unsigned long)iov[seg].iov_base; 1184 user_addr = (unsigned long)iov[seg].iov_base;
1182 sdio.size += bytes = iov[seg].iov_len; 1185 sdio.size += bytes = iov[seg].iov_len;
@@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1235 if (sdio.bio) 1238 if (sdio.bio)
1236 dio_bio_submit(dio, &sdio); 1239 dio_bio_submit(dio, &sdio);
1237 1240
1241 blk_finish_plug(&plug);
1242
1238 /* 1243 /*
1239 * It is possible that, we return short IO due to end of file. 1244 * It is possible that, we return short IO due to end of file.
1240 * In that case, we need to release all the pages we got hold on. 1245 * In that case, we need to release all the pages we got hold on.
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 63dc19c54d5a..27a6ba9aaeec 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -15,8 +15,8 @@
15#include "lock.h" 15#include "lock.h"
16#include "user.h" 16#include "user.h"
17 17
18static uint64_t dlm_cb_seq; 18static uint64_t dlm_cb_seq;
19static spinlock_t dlm_cb_seq_spin; 19static DEFINE_SPINLOCK(dlm_cb_seq_spin);
20 20
21static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) 21static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
22{ 22{
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 9ccf7346834a..a0387dd8b1f0 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -750,6 +750,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
750static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) 750static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
751{ 751{
752 struct sockaddr_storage *addr; 752 struct sockaddr_storage *addr;
753 int rv;
753 754
754 if (len != sizeof(struct sockaddr_storage)) 755 if (len != sizeof(struct sockaddr_storage))
755 return -EINVAL; 756 return -EINVAL;
@@ -762,6 +763,13 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
762 return -ENOMEM; 763 return -ENOMEM;
763 764
764 memcpy(addr, buf, len); 765 memcpy(addr, buf, len);
766
767 rv = dlm_lowcomms_addr(cm->nodeid, addr, len);
768 if (rv) {
769 kfree(addr);
770 return rv;
771 }
772
765 cm->addr[cm->addr_count++] = addr; 773 cm->addr[cm->addr_count++] = addr;
766 return len; 774 return len;
767} 775}
@@ -878,34 +886,7 @@ static void put_space(struct dlm_space *sp)
878 config_item_put(&sp->group.cg_item); 886 config_item_put(&sp->group.cg_item);
879} 887}
880 888
881static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) 889static struct dlm_comm *get_comm(int nodeid)
882{
883 switch (x->ss_family) {
884 case AF_INET: {
885 struct sockaddr_in *sinx = (struct sockaddr_in *)x;
886 struct sockaddr_in *siny = (struct sockaddr_in *)y;
887 if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
888 return 0;
889 if (sinx->sin_port != siny->sin_port)
890 return 0;
891 break;
892 }
893 case AF_INET6: {
894 struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
895 struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
896 if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
897 return 0;
898 if (sinx->sin6_port != siny->sin6_port)
899 return 0;
900 break;
901 }
902 default:
903 return 0;
904 }
905 return 1;
906}
907
908static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
909{ 890{
910 struct config_item *i; 891 struct config_item *i;
911 struct dlm_comm *cm = NULL; 892 struct dlm_comm *cm = NULL;
@@ -919,19 +900,11 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
919 list_for_each_entry(i, &comm_list->cg_children, ci_entry) { 900 list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
920 cm = config_item_to_comm(i); 901 cm = config_item_to_comm(i);
921 902
922 if (nodeid) { 903 if (cm->nodeid != nodeid)
923 if (cm->nodeid != nodeid) 904 continue;
924 continue; 905 found = 1;
925 found = 1; 906 config_item_get(i);
926 config_item_get(i); 907 break;
927 break;
928 } else {
929 if (!cm->addr_count || !addr_compare(cm->addr[0], addr))
930 continue;
931 found = 1;
932 config_item_get(i);
933 break;
934 }
935 } 908 }
936 mutex_unlock(&clusters_root.subsys.su_mutex); 909 mutex_unlock(&clusters_root.subsys.su_mutex);
937 910
@@ -995,7 +968,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
995 968
996int dlm_comm_seq(int nodeid, uint32_t *seq) 969int dlm_comm_seq(int nodeid, uint32_t *seq)
997{ 970{
998 struct dlm_comm *cm = get_comm(nodeid, NULL); 971 struct dlm_comm *cm = get_comm(nodeid);
999 if (!cm) 972 if (!cm)
1000 return -EEXIST; 973 return -EEXIST;
1001 *seq = cm->seq; 974 *seq = cm->seq;
@@ -1003,28 +976,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
1003 return 0; 976 return 0;
1004} 977}
1005 978
1006int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
1007{
1008 struct dlm_comm *cm = get_comm(nodeid, NULL);
1009 if (!cm)
1010 return -EEXIST;
1011 if (!cm->addr_count)
1012 return -ENOENT;
1013 memcpy(addr, cm->addr[0], sizeof(*addr));
1014 put_comm(cm);
1015 return 0;
1016}
1017
1018int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
1019{
1020 struct dlm_comm *cm = get_comm(0, addr);
1021 if (!cm)
1022 return -EEXIST;
1023 *nodeid = cm->nodeid;
1024 put_comm(cm);
1025 return 0;
1026}
1027
1028int dlm_our_nodeid(void) 979int dlm_our_nodeid(void)
1029{ 980{
1030 return local_comm ? local_comm->nodeid : 0; 981 return local_comm ? local_comm->nodeid : 0;
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index dbd35a08f3a5..f30697bc2780 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -46,8 +46,6 @@ void dlm_config_exit(void);
46int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, 46int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
47 int *count_out); 47 int *count_out);
48int dlm_comm_seq(int nodeid, uint32_t *seq); 48int dlm_comm_seq(int nodeid, uint32_t *seq);
49int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
50int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
51int dlm_our_nodeid(void); 49int dlm_our_nodeid(void);
52int dlm_our_addr(struct sockaddr_storage *addr, int num); 50int dlm_our_addr(struct sockaddr_storage *addr, int num);
53 51
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 9d3e485f88c8..871c1abf6029 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -604,6 +604,7 @@ struct dlm_ls {
604 struct idr ls_recover_idr; 604 struct idr ls_recover_idr;
605 spinlock_t ls_recover_idr_lock; 605 spinlock_t ls_recover_idr_lock;
606 wait_queue_head_t ls_wait_general; 606 wait_queue_head_t ls_wait_general;
607 wait_queue_head_t ls_recover_lock_wait;
607 struct mutex ls_clear_proc_locks; 608 struct mutex ls_clear_proc_locks;
608 609
609 struct list_head ls_root_list; /* root resources */ 610 struct list_head ls_root_list; /* root resources */
@@ -616,15 +617,40 @@ struct dlm_ls {
616 char ls_name[1]; 617 char ls_name[1];
617}; 618};
618 619
619#define LSFL_WORK 0 620/*
620#define LSFL_RUNNING 1 621 * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
621#define LSFL_RECOVERY_STOP 2 622 * that they should abort what they're doing so new recovery can be started.
622#define LSFL_RCOM_READY 3 623 *
623#define LSFL_RCOM_WAIT 4 624 * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
624#define LSFL_UEVENT_WAIT 5 625 * should do down_write() on the in_recovery rw_semaphore. (doing down_write
625#define LSFL_TIMEWARN 6 626 * within dlm_ls_stop causes complaints about the lock acquired/released
626#define LSFL_CB_DELAY 7 627 * in different contexts.)
627#define LSFL_NODIR 8 628 *
629 * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
630 * It sets this after it is done with down_write() on the in_recovery
631 * rw_semaphore and clears it after it has released the rw_semaphore.
632 *
633 * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
634 * should begin recovery of the lockspace.
635 *
636 * LSFL_RUNNING - set when normal locking activity is enabled.
637 * dlm_ls_stop() clears this to tell dlm locking routines that they should
638 * quit what they are doing so recovery can run. dlm_recoverd sets
639 * this after recovery is finished.
640 */
641
642#define LSFL_RECOVER_STOP 0
643#define LSFL_RECOVER_DOWN 1
644#define LSFL_RECOVER_LOCK 2
645#define LSFL_RECOVER_WORK 3
646#define LSFL_RUNNING 4
647
648#define LSFL_RCOM_READY 5
649#define LSFL_RCOM_WAIT 6
650#define LSFL_UEVENT_WAIT 7
651#define LSFL_TIMEWARN 8
652#define LSFL_CB_DELAY 9
653#define LSFL_NODIR 10
628 654
629/* much of this is just saving user space pointers associated with the 655/* much of this is just saving user space pointers associated with the
630 lock that we pass back to the user lib with an ast */ 656 lock that we pass back to the user lib with an ast */
@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)
667 693
668static inline int dlm_recovery_stopped(struct dlm_ls *ls) 694static inline int dlm_recovery_stopped(struct dlm_ls *ls)
669{ 695{
670 return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 696 return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
671} 697}
672 698
673static inline int dlm_no_directory(struct dlm_ls *ls) 699static inline int dlm_no_directory(struct dlm_ls *ls)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 952557d00ccd..2e99fb0c9737 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
582 INIT_LIST_HEAD(&ls->ls_root_list); 582 INIT_LIST_HEAD(&ls->ls_root_list);
583 init_rwsem(&ls->ls_root_sem); 583 init_rwsem(&ls->ls_root_sem);
584 584
585 down_write(&ls->ls_in_recovery);
586
587 spin_lock(&lslist_lock); 585 spin_lock(&lslist_lock);
588 ls->ls_create_count = 1; 586 ls->ls_create_count = 1;
589 list_add(&ls->ls_list, &lslist); 587 list_add(&ls->ls_list, &lslist);
@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
597 } 595 }
598 } 596 }
599 597
600 /* needs to find ls in lslist */ 598 init_waitqueue_head(&ls->ls_recover_lock_wait);
599
600 /*
601 * Once started, dlm_recoverd first looks for ls in lslist, then
602 * initializes ls_in_recovery as locked in "down" mode. We need
603 * to wait for the wakeup from dlm_recoverd because in_recovery
604 * has to start out in down mode.
605 */
606
601 error = dlm_recoverd_start(ls); 607 error = dlm_recoverd_start(ls);
602 if (error) { 608 if (error) {
603 log_error(ls, "can't start dlm_recoverd %d", error); 609 log_error(ls, "can't start dlm_recoverd %d", error);
604 goto out_callback; 610 goto out_callback;
605 } 611 }
606 612
613 wait_event(ls->ls_recover_lock_wait,
614 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
615
607 ls->ls_kobj.kset = dlm_kset; 616 ls->ls_kobj.kset = dlm_kset;
608 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, 617 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
609 "%s", ls->ls_name); 618 "%s", ls->ls_name);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5c1b0e38c7a4..331ea4f94efd 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -140,6 +140,16 @@ struct writequeue_entry {
140 struct connection *con; 140 struct connection *con;
141}; 141};
142 142
143struct dlm_node_addr {
144 struct list_head list;
145 int nodeid;
146 int addr_count;
147 struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
148};
149
150static LIST_HEAD(dlm_node_addrs);
151static DEFINE_SPINLOCK(dlm_node_addrs_spin);
152
143static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; 153static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
144static int dlm_local_count; 154static int dlm_local_count;
145static int dlm_allow_conn; 155static int dlm_allow_conn;
@@ -264,31 +274,146 @@ static struct connection *assoc2con(int assoc_id)
264 return NULL; 274 return NULL;
265} 275}
266 276
267static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) 277static struct dlm_node_addr *find_node_addr(int nodeid)
278{
279 struct dlm_node_addr *na;
280
281 list_for_each_entry(na, &dlm_node_addrs, list) {
282 if (na->nodeid == nodeid)
283 return na;
284 }
285 return NULL;
286}
287
288static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
289{
290 switch (x->ss_family) {
291 case AF_INET: {
292 struct sockaddr_in *sinx = (struct sockaddr_in *)x;
293 struct sockaddr_in *siny = (struct sockaddr_in *)y;
294 if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
295 return 0;
296 if (sinx->sin_port != siny->sin_port)
297 return 0;
298 break;
299 }
300 case AF_INET6: {
301 struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
302 struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
303 if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
304 return 0;
305 if (sinx->sin6_port != siny->sin6_port)
306 return 0;
307 break;
308 }
309 default:
310 return 0;
311 }
312 return 1;
313}
314
315static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
316 struct sockaddr *sa_out)
268{ 317{
269 struct sockaddr_storage addr; 318 struct sockaddr_storage sas;
270 int error; 319 struct dlm_node_addr *na;
271 320
272 if (!dlm_local_count) 321 if (!dlm_local_count)
273 return -1; 322 return -1;
274 323
275 error = dlm_nodeid_to_addr(nodeid, &addr); 324 spin_lock(&dlm_node_addrs_spin);
276 if (error) 325 na = find_node_addr(nodeid);
277 return error; 326 if (na && na->addr_count)
327 memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage));
328 spin_unlock(&dlm_node_addrs_spin);
329
330 if (!na)
331 return -EEXIST;
332
333 if (!na->addr_count)
334 return -ENOENT;
335
336 if (sas_out)
337 memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
338
339 if (!sa_out)
340 return 0;
278 341
279 if (dlm_local_addr[0]->ss_family == AF_INET) { 342 if (dlm_local_addr[0]->ss_family == AF_INET) {
280 struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; 343 struct sockaddr_in *in4 = (struct sockaddr_in *) &sas;
281 struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; 344 struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out;
282 ret4->sin_addr.s_addr = in4->sin_addr.s_addr; 345 ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
283 } else { 346 } else {
284 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; 347 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas;
285 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; 348 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out;
286 ret6->sin6_addr = in6->sin6_addr; 349 ret6->sin6_addr = in6->sin6_addr;
287 } 350 }
288 351
289 return 0; 352 return 0;
290} 353}
291 354
355static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
356{
357 struct dlm_node_addr *na;
358 int rv = -EEXIST;
359
360 spin_lock(&dlm_node_addrs_spin);
361 list_for_each_entry(na, &dlm_node_addrs, list) {
362 if (!na->addr_count)
363 continue;
364
365 if (!addr_compare(na->addr[0], addr))
366 continue;
367
368 *nodeid = na->nodeid;
369 rv = 0;
370 break;
371 }
372 spin_unlock(&dlm_node_addrs_spin);
373 return rv;
374}
375
376int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
377{
378 struct sockaddr_storage *new_addr;
379 struct dlm_node_addr *new_node, *na;
380
381 new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS);
382 if (!new_node)
383 return -ENOMEM;
384
385 new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS);
386 if (!new_addr) {
387 kfree(new_node);
388 return -ENOMEM;
389 }
390
391 memcpy(new_addr, addr, len);
392
393 spin_lock(&dlm_node_addrs_spin);
394 na = find_node_addr(nodeid);
395 if (!na) {
396 new_node->nodeid = nodeid;
397 new_node->addr[0] = new_addr;
398 new_node->addr_count = 1;
399 list_add(&new_node->list, &dlm_node_addrs);
400 spin_unlock(&dlm_node_addrs_spin);
401 return 0;
402 }
403
404 if (na->addr_count >= DLM_MAX_ADDR_COUNT) {
405 spin_unlock(&dlm_node_addrs_spin);
406 kfree(new_addr);
407 kfree(new_node);
408 return -ENOSPC;
409 }
410
411 na->addr[na->addr_count++] = new_addr;
412 spin_unlock(&dlm_node_addrs_spin);
413 kfree(new_node);
414 return 0;
415}
416
292/* Data available on socket or listen socket received a connect */ 417/* Data available on socket or listen socket received a connect */
293static void lowcomms_data_ready(struct sock *sk, int count_unused) 418static void lowcomms_data_ready(struct sock *sk, int count_unused)
294{ 419{
@@ -348,7 +473,7 @@ int dlm_lowcomms_connect_node(int nodeid)
348} 473}
349 474
350/* Make a socket active */ 475/* Make a socket active */
351static int add_sock(struct socket *sock, struct connection *con) 476static void add_sock(struct socket *sock, struct connection *con)
352{ 477{
353 con->sock = sock; 478 con->sock = sock;
354 479
@@ -358,7 +483,6 @@ static int add_sock(struct socket *sock, struct connection *con)
358 con->sock->sk->sk_state_change = lowcomms_state_change; 483 con->sock->sk->sk_state_change = lowcomms_state_change;
359 con->sock->sk->sk_user_data = con; 484 con->sock->sk->sk_user_data = con;
360 con->sock->sk->sk_allocation = GFP_NOFS; 485 con->sock->sk->sk_allocation = GFP_NOFS;
361 return 0;
362} 486}
363 487
364/* Add the port number to an IPv6 or 4 sockaddr and return the address 488/* Add the port number to an IPv6 or 4 sockaddr and return the address
@@ -510,7 +634,7 @@ static void process_sctp_notification(struct connection *con,
510 return; 634 return;
511 } 635 }
512 make_sockaddr(&prim.ssp_addr, 0, &addr_len); 636 make_sockaddr(&prim.ssp_addr, 0, &addr_len);
513 if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { 637 if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
514 unsigned char *b=(unsigned char *)&prim.ssp_addr; 638 unsigned char *b=(unsigned char *)&prim.ssp_addr;
515 log_print("reject connect from unknown addr"); 639 log_print("reject connect from unknown addr");
516 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 640 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
@@ -747,7 +871,7 @@ static int tcp_accept_from_sock(struct connection *con)
747 871
748 /* Get the new node's NODEID */ 872 /* Get the new node's NODEID */
749 make_sockaddr(&peeraddr, 0, &len); 873 make_sockaddr(&peeraddr, 0, &len);
750 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 874 if (addr_to_nodeid(&peeraddr, &nodeid)) {
751 unsigned char *b=(unsigned char *)&peeraddr; 875 unsigned char *b=(unsigned char *)&peeraddr;
752 log_print("connect from non cluster node"); 876 log_print("connect from non cluster node");
753 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 877 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
@@ -862,7 +986,7 @@ static void sctp_init_assoc(struct connection *con)
862 if (con->retries++ > MAX_CONNECT_RETRIES) 986 if (con->retries++ > MAX_CONNECT_RETRIES)
863 return; 987 return;
864 988
865 if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { 989 if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) {
866 log_print("no address for nodeid %d", con->nodeid); 990 log_print("no address for nodeid %d", con->nodeid);
867 return; 991 return;
868 } 992 }
@@ -928,11 +1052,11 @@ static void sctp_init_assoc(struct connection *con)
928/* Connect a new socket to its peer */ 1052/* Connect a new socket to its peer */
929static void tcp_connect_to_sock(struct connection *con) 1053static void tcp_connect_to_sock(struct connection *con)
930{ 1054{
931 int result = -EHOSTUNREACH;
932 struct sockaddr_storage saddr, src_addr; 1055 struct sockaddr_storage saddr, src_addr;
933 int addr_len; 1056 int addr_len;
934 struct socket *sock = NULL; 1057 struct socket *sock = NULL;
935 int one = 1; 1058 int one = 1;
1059 int result;
936 1060
937 if (con->nodeid == 0) { 1061 if (con->nodeid == 0) {
938 log_print("attempt to connect sock 0 foiled"); 1062 log_print("attempt to connect sock 0 foiled");
@@ -944,10 +1068,8 @@ static void tcp_connect_to_sock(struct connection *con)
944 goto out; 1068 goto out;
945 1069
946 /* Some odd races can cause double-connects, ignore them */ 1070 /* Some odd races can cause double-connects, ignore them */
947 if (con->sock) { 1071 if (con->sock)
948 result = 0;
949 goto out; 1072 goto out;
950 }
951 1073
952 /* Create a socket to communicate with */ 1074 /* Create a socket to communicate with */
953 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, 1075 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM,
@@ -956,8 +1078,11 @@ static void tcp_connect_to_sock(struct connection *con)
956 goto out_err; 1078 goto out_err;
957 1079
958 memset(&saddr, 0, sizeof(saddr)); 1080 memset(&saddr, 0, sizeof(saddr));
959 if (dlm_nodeid_to_addr(con->nodeid, &saddr)) 1081 result = nodeid_to_addr(con->nodeid, &saddr, NULL);
1082 if (result < 0) {
1083 log_print("no address for nodeid %d", con->nodeid);
960 goto out_err; 1084 goto out_err;
1085 }
961 1086
962 sock->sk->sk_user_data = con; 1087 sock->sk->sk_user_data = con;
963 con->rx_action = receive_from_sock; 1088 con->rx_action = receive_from_sock;
@@ -983,8 +1108,7 @@ static void tcp_connect_to_sock(struct connection *con)
983 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, 1108 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
984 sizeof(one)); 1109 sizeof(one));
985 1110
986 result = 1111 result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
987 sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
988 O_NONBLOCK); 1112 O_NONBLOCK);
989 if (result == -EINPROGRESS) 1113 if (result == -EINPROGRESS)
990 result = 0; 1114 result = 0;
@@ -1002,11 +1126,17 @@ out_err:
1002 * Some errors are fatal and this list might need adjusting. For other 1126 * Some errors are fatal and this list might need adjusting. For other
1003 * errors we try again until the max number of retries is reached. 1127 * errors we try again until the max number of retries is reached.
1004 */ 1128 */
1005 if (result != -EHOSTUNREACH && result != -ENETUNREACH && 1129 if (result != -EHOSTUNREACH &&
1006 result != -ENETDOWN && result != -EINVAL 1130 result != -ENETUNREACH &&
1007 && result != -EPROTONOSUPPORT) { 1131 result != -ENETDOWN &&
1132 result != -EINVAL &&
1133 result != -EPROTONOSUPPORT) {
1134 log_print("connect %d try %d error %d", con->nodeid,
1135 con->retries, result);
1136 mutex_unlock(&con->sock_mutex);
1137 msleep(1000);
1008 lowcomms_connect_sock(con); 1138 lowcomms_connect_sock(con);
1009 result = 0; 1139 return;
1010 } 1140 }
1011out: 1141out:
1012 mutex_unlock(&con->sock_mutex); 1142 mutex_unlock(&con->sock_mutex);
@@ -1044,10 +1174,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
1044 if (result < 0) { 1174 if (result < 0) {
1045 log_print("Failed to set SO_REUSEADDR on socket: %d", result); 1175 log_print("Failed to set SO_REUSEADDR on socket: %d", result);
1046 } 1176 }
1047 sock->sk->sk_user_data = con;
1048 con->rx_action = tcp_accept_from_sock; 1177 con->rx_action = tcp_accept_from_sock;
1049 con->connect_action = tcp_connect_to_sock; 1178 con->connect_action = tcp_connect_to_sock;
1050 con->sock = sock;
1051 1179
1052 /* Bind to our port */ 1180 /* Bind to our port */
1053 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); 1181 make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
@@ -1358,8 +1486,7 @@ static void send_to_sock(struct connection *con)
1358 } 1486 }
1359 cond_resched(); 1487 cond_resched();
1360 goto out; 1488 goto out;
1361 } 1489 } else if (ret < 0)
1362 if (ret <= 0)
1363 goto send_error; 1490 goto send_error;
1364 } 1491 }
1365 1492
@@ -1376,7 +1503,6 @@ static void send_to_sock(struct connection *con)
1376 if (e->len == 0 && e->users == 0) { 1503 if (e->len == 0 && e->users == 0) {
1377 list_del(&e->list); 1504 list_del(&e->list);
1378 free_entry(e); 1505 free_entry(e);
1379 continue;
1380 } 1506 }
1381 } 1507 }
1382 spin_unlock(&con->writequeue_lock); 1508 spin_unlock(&con->writequeue_lock);
@@ -1394,7 +1520,6 @@ out_connect:
1394 mutex_unlock(&con->sock_mutex); 1520 mutex_unlock(&con->sock_mutex);
1395 if (!test_bit(CF_INIT_PENDING, &con->flags)) 1521 if (!test_bit(CF_INIT_PENDING, &con->flags))
1396 lowcomms_connect_sock(con); 1522 lowcomms_connect_sock(con);
1397 return;
1398} 1523}
1399 1524
1400static void clean_one_writequeue(struct connection *con) 1525static void clean_one_writequeue(struct connection *con)
@@ -1414,6 +1539,7 @@ static void clean_one_writequeue(struct connection *con)
1414int dlm_lowcomms_close(int nodeid) 1539int dlm_lowcomms_close(int nodeid)
1415{ 1540{
1416 struct connection *con; 1541 struct connection *con;
1542 struct dlm_node_addr *na;
1417 1543
1418 log_print("closing connection to node %d", nodeid); 1544 log_print("closing connection to node %d", nodeid);
1419 con = nodeid2con(nodeid, 0); 1545 con = nodeid2con(nodeid, 0);
@@ -1428,6 +1554,17 @@ int dlm_lowcomms_close(int nodeid)
1428 clean_one_writequeue(con); 1554 clean_one_writequeue(con);
1429 close_connection(con, true); 1555 close_connection(con, true);
1430 } 1556 }
1557
1558 spin_lock(&dlm_node_addrs_spin);
1559 na = find_node_addr(nodeid);
1560 if (na) {
1561 list_del(&na->list);
1562 while (na->addr_count--)
1563 kfree(na->addr[na->addr_count]);
1564 kfree(na);
1565 }
1566 spin_unlock(&dlm_node_addrs_spin);
1567
1431 return 0; 1568 return 0;
1432} 1569}
1433 1570
@@ -1577,3 +1714,17 @@ fail_destroy:
1577fail: 1714fail:
1578 return error; 1715 return error;
1579} 1716}
1717
1718void dlm_lowcomms_exit(void)
1719{
1720 struct dlm_node_addr *na, *safe;
1721
1722 spin_lock(&dlm_node_addrs_spin);
1723 list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) {
1724 list_del(&na->list);
1725 while (na->addr_count--)
1726 kfree(na->addr[na->addr_count]);
1727 kfree(na);
1728 }
1729 spin_unlock(&dlm_node_addrs_spin);
1730}
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 1311e6426287..67462e54fc2f 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -16,10 +16,12 @@
16 16
17int dlm_lowcomms_start(void); 17int dlm_lowcomms_start(void);
18void dlm_lowcomms_stop(void); 18void dlm_lowcomms_stop(void);
19void dlm_lowcomms_exit(void);
19int dlm_lowcomms_close(int nodeid); 20int dlm_lowcomms_close(int nodeid);
20void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); 21void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
21void dlm_lowcomms_commit_buffer(void *mh); 22void dlm_lowcomms_commit_buffer(void *mh);
22int dlm_lowcomms_connect_node(int nodeid); 23int dlm_lowcomms_connect_node(int nodeid);
24int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
23 25
24#endif /* __LOWCOMMS_DOT_H__ */ 26#endif /* __LOWCOMMS_DOT_H__ */
25 27
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 5a59efa0bb46..079c0bd71ab7 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -17,6 +17,7 @@
17#include "user.h" 17#include "user.h"
18#include "memory.h" 18#include "memory.h"
19#include "config.h" 19#include "config.h"
20#include "lowcomms.h"
20 21
21static int __init init_dlm(void) 22static int __init init_dlm(void)
22{ 23{
@@ -78,6 +79,7 @@ static void __exit exit_dlm(void)
78 dlm_config_exit(); 79 dlm_config_exit();
79 dlm_memory_exit(); 80 dlm_memory_exit();
80 dlm_lockspace_exit(); 81 dlm_lockspace_exit();
82 dlm_lowcomms_exit();
81 dlm_unregister_debugfs(); 83 dlm_unregister_debugfs();
82} 84}
83 85
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 862640a36d5c..476557b54921 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
616 down_write(&ls->ls_recv_active); 616 down_write(&ls->ls_recv_active);
617 617
618 /* 618 /*
619 * Abort any recovery that's in progress (see RECOVERY_STOP, 619 * Abort any recovery that's in progress (see RECOVER_STOP,
620 * dlm_recovery_stopped()) and tell any other threads running in the 620 * dlm_recovery_stopped()) and tell any other threads running in the
621 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). 621 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
622 */ 622 */
623 623
624 spin_lock(&ls->ls_recover_lock); 624 spin_lock(&ls->ls_recover_lock);
625 set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 625 set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
626 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); 626 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
627 ls->ls_recover_seq++; 627 ls->ls_recover_seq++;
628 spin_unlock(&ls->ls_recover_lock); 628 spin_unlock(&ls->ls_recover_lock);
@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
642 * when recovery is complete. 642 * when recovery is complete.
643 */ 643 */
644 644
645 if (new) 645 if (new) {
646 down_write(&ls->ls_in_recovery); 646 set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
647 wake_up_process(ls->ls_recoverd_task);
648 wait_event(ls->ls_recover_lock_wait,
649 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
650 }
647 651
648 /* 652 /*
649 * The recoverd suspend/resume makes sure that dlm_recoverd (if 653 * The recoverd suspend/resume makes sure that dlm_recoverd (if
650 * running) has noticed RECOVERY_STOP above and quit processing the 654 * running) has noticed RECOVER_STOP above and quit processing the
651 * previous recovery. 655 * previous recovery.
652 */ 656 */
653 657
@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
709 kfree(rv_old); 713 kfree(rv_old);
710 } 714 }
711 715
712 dlm_recoverd_kick(ls); 716 set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
717 wake_up_process(ls->ls_recoverd_task);
713 return 0; 718 return 0;
714 719
715 fail: 720 fail:
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ef17e0169da1..60a327863b11 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -14,7 +14,7 @@
14#include "dlm_internal.h" 14#include "dlm_internal.h"
15 15
16static uint32_t dlm_nl_seqnum; 16static uint32_t dlm_nl_seqnum;
17static uint32_t listener_nlpid; 17static uint32_t listener_nlportid;
18 18
19static struct genl_family family = { 19static struct genl_family family = {
20 .id = GENL_ID_GENERATE, 20 .id = GENL_ID_GENERATE,
@@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb)
64 return rv; 64 return rv;
65 } 65 }
66 66
67 return genlmsg_unicast(&init_net, skb, listener_nlpid); 67 return genlmsg_unicast(&init_net, skb, listener_nlportid);
68} 68}
69 69
70static int user_cmd(struct sk_buff *skb, struct genl_info *info) 70static int user_cmd(struct sk_buff *skb, struct genl_info *info)
71{ 71{
72 listener_nlpid = info->snd_pid; 72 listener_nlportid = info->snd_portid;
73 printk("user_cmd nlpid %u\n", listener_nlpid); 73 printk("user_cmd nlpid %u\n", listener_nlportid);
74 return 0; 74 return 0;
75} 75}
76 76
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 87f1a56eab32..9d61947d473a 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
581 581
582 spin_lock(&ls->ls_recover_lock); 582 spin_lock(&ls->ls_recover_lock);
583 status = ls->ls_recover_status; 583 status = ls->ls_recover_status;
584 stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 584 stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
585 seq = ls->ls_recover_seq; 585 seq = ls->ls_recover_seq;
586 spin_unlock(&ls->ls_recover_lock); 586 spin_unlock(&ls->ls_recover_lock);
587 587
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 88ce65ff021e..32f9f8926ec3 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
41 set_bit(LSFL_RUNNING, &ls->ls_flags); 41 set_bit(LSFL_RUNNING, &ls->ls_flags);
42 /* unblocks processes waiting to enter the dlm */ 42 /* unblocks processes waiting to enter the dlm */
43 up_write(&ls->ls_in_recovery); 43 up_write(&ls->ls_in_recovery);
44 clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
44 error = 0; 45 error = 0;
45 } 46 }
46 spin_unlock(&ls->ls_recover_lock); 47 spin_unlock(&ls->ls_recover_lock);
@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
262 rv = ls->ls_recover_args; 263 rv = ls->ls_recover_args;
263 ls->ls_recover_args = NULL; 264 ls->ls_recover_args = NULL;
264 if (rv && ls->ls_recover_seq == rv->seq) 265 if (rv && ls->ls_recover_seq == rv->seq)
265 clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 266 clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
266 spin_unlock(&ls->ls_recover_lock); 267 spin_unlock(&ls->ls_recover_lock);
267 268
268 if (rv) { 269 if (rv) {
@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
282 return -1; 283 return -1;
283 } 284 }
284 285
286 down_write(&ls->ls_in_recovery);
287 set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
288 wake_up(&ls->ls_recover_lock_wait);
289
285 while (!kthread_should_stop()) { 290 while (!kthread_should_stop()) {
286 set_current_state(TASK_INTERRUPTIBLE); 291 set_current_state(TASK_INTERRUPTIBLE);
287 if (!test_bit(LSFL_WORK, &ls->ls_flags)) 292 if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
293 !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
288 schedule(); 294 schedule();
289 set_current_state(TASK_RUNNING); 295 set_current_state(TASK_RUNNING);
290 296
291 if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) 297 if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
298 down_write(&ls->ls_in_recovery);
299 set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
300 wake_up(&ls->ls_recover_lock_wait);
301 }
302
303 if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
292 do_ls_recovery(ls); 304 do_ls_recovery(ls);
293 } 305 }
294 306
307 if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
308 up_write(&ls->ls_in_recovery);
309
295 dlm_put_lockspace(ls); 310 dlm_put_lockspace(ls);
296 return 0; 311 return 0;
297} 312}
298 313
299void dlm_recoverd_kick(struct dlm_ls *ls)
300{
301 set_bit(LSFL_WORK, &ls->ls_flags);
302 wake_up_process(ls->ls_recoverd_task);
303}
304
305int dlm_recoverd_start(struct dlm_ls *ls) 314int dlm_recoverd_start(struct dlm_ls *ls)
306{ 315{
307 struct task_struct *p; 316 struct task_struct *p;
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h
index 866657c5d69d..8856079733fa 100644
--- a/fs/dlm/recoverd.h
+++ b/fs/dlm/recoverd.h
@@ -14,7 +14,6 @@
14#ifndef __RECOVERD_DOT_H__ 14#ifndef __RECOVERD_DOT_H__
15#define __RECOVERD_DOT_H__ 15#define __RECOVERD_DOT_H__
16 16
17void dlm_recoverd_kick(struct dlm_ls *ls);
18void dlm_recoverd_stop(struct dlm_ls *ls); 17void dlm_recoverd_stop(struct dlm_ls *ls);
19int dlm_recoverd_start(struct dlm_ls *ls); 18int dlm_recoverd_start(struct dlm_ls *ls);
20void dlm_recoverd_suspend(struct dlm_ls *ls); 19void dlm_recoverd_suspend(struct dlm_ls *ls);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index eb4ed9ba3098..7ff49852b0cb 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,6 +503,13 @@ static ssize_t device_write(struct file *file, const char __user *buf,
503#endif 503#endif
504 return -EINVAL; 504 return -EINVAL;
505 505
506#ifdef CONFIG_COMPAT
507 if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN)
508#else
509 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
510#endif
511 return -EINVAL;
512
506 kbuf = kzalloc(count + 1, GFP_NOFS); 513 kbuf = kzalloc(count + 1, GFP_NOFS);
507 if (!kbuf) 514 if (!kbuf)
508 return -ENOMEM; 515 return -ENOMEM;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 44ce5c6a541d..d45ba4568128 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -275,8 +275,14 @@ out:
275 275
276static int ecryptfs_flush(struct file *file, fl_owner_t td) 276static int ecryptfs_flush(struct file *file, fl_owner_t td)
277{ 277{
278 return file->f_mode & FMODE_WRITE 278 struct file *lower_file = ecryptfs_file_to_lower(file);
279 ? filemap_write_and_wait(file->f_mapping) : 0; 279
280 if (lower_file->f_op && lower_file->f_op->flush) {
281 filemap_write_and_wait(file->f_mapping);
282 return lower_file->f_op->flush(lower_file, td);
283 }
284
285 return 0;
280} 286}
281 287
282static int ecryptfs_release(struct inode *inode, struct file *file) 288static int ecryptfs_release(struct inode *inode, struct file *file)
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 534b129ea676..cc7709e7c508 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -619,6 +619,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
619 struct dentry *lower_old_dir_dentry; 619 struct dentry *lower_old_dir_dentry;
620 struct dentry *lower_new_dir_dentry; 620 struct dentry *lower_new_dir_dentry;
621 struct dentry *trap = NULL; 621 struct dentry *trap = NULL;
622 struct inode *target_inode;
622 623
623 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 624 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
624 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 625 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -626,6 +627,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
626 dget(lower_new_dentry); 627 dget(lower_new_dentry);
627 lower_old_dir_dentry = dget_parent(lower_old_dentry); 628 lower_old_dir_dentry = dget_parent(lower_old_dentry);
628 lower_new_dir_dentry = dget_parent(lower_new_dentry); 629 lower_new_dir_dentry = dget_parent(lower_new_dentry);
630 target_inode = new_dentry->d_inode;
629 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 631 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
630 /* source should not be ancestor of target */ 632 /* source should not be ancestor of target */
631 if (trap == lower_old_dentry) { 633 if (trap == lower_old_dentry) {
@@ -641,6 +643,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
641 lower_new_dir_dentry->d_inode, lower_new_dentry); 643 lower_new_dir_dentry->d_inode, lower_new_dentry);
642 if (rc) 644 if (rc)
643 goto out_lock; 645 goto out_lock;
646 if (target_inode)
647 fsstack_copy_attr_all(target_inode,
648 ecryptfs_inode_to_lower(target_inode));
644 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); 649 fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
645 if (new_dir != old_dir) 650 if (new_dir != old_dir)
646 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); 651 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 2768138eefee..4e0886c9e5c4 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode)
162 inode_info = ecryptfs_inode_to_private(inode); 162 inode_info = ecryptfs_inode_to_private(inode);
163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, 163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
164 &inode_info->lower_file_mutex)) { 164 &inode_info->lower_file_mutex)) {
165 filemap_write_and_wait(inode->i_mapping);
165 fput(inode_info->lower_file); 166 fput(inode_info->lower_file);
166 inode_info->lower_file = NULL; 167 inode_info->lower_file = NULL;
167 mutex_unlock(&inode_info->lower_file_mutex); 168 mutex_unlock(&inode_info->lower_file_mutex);
@@ -544,11 +545,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
544 goto out_free; 545 goto out_free;
545 } 546 }
546 547
547 if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) { 548 if (check_ruid && !uid_eq(path.dentry->d_inode->i_uid, current_uid())) {
548 rc = -EPERM; 549 rc = -EPERM;
549 printk(KERN_ERR "Mount of device (uid: %d) not owned by " 550 printk(KERN_ERR "Mount of device (uid: %d) not owned by "
550 "requested user (uid: %d)\n", 551 "requested user (uid: %d)\n",
551 path.dentry->d_inode->i_uid, current_uid()); 552 i_uid_read(path.dentry->d_inode),
553 from_kuid(&init_user_ns, current_uid()));
552 goto out_free; 554 goto out_free;
553 } 555 }
554 556
@@ -709,6 +711,12 @@ static void ecryptfs_free_kmem_caches(void)
709{ 711{
710 int i; 712 int i;
711 713
714 /*
715 * Make sure all delayed rcu free inodes are flushed before we
716 * destroy cache.
717 */
718 rcu_barrier();
719
712 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) { 720 for (i = 0; i < ARRAY_SIZE(ecryptfs_cache_infos); i++) {
713 struct ecryptfs_cache_info *info; 721 struct ecryptfs_cache_info *info;
714 722
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index b29bb8bfa8d9..5fa2471796c2 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -33,7 +33,7 @@ static struct hlist_head *ecryptfs_daemon_hash;
33struct mutex ecryptfs_daemon_hash_mux; 33struct mutex ecryptfs_daemon_hash_mux;
34static int ecryptfs_hash_bits; 34static int ecryptfs_hash_bits;
35#define ecryptfs_current_euid_hash(uid) \ 35#define ecryptfs_current_euid_hash(uid) \
36 hash_long((unsigned long)current_euid(), ecryptfs_hash_bits) 36 hash_long((unsigned long)from_kuid(&init_user_ns, current_euid()), ecryptfs_hash_bits)
37 37
38static u32 ecryptfs_msg_counter; 38static u32 ecryptfs_msg_counter;
39static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 39static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -121,8 +121,7 @@ int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
121 hlist_for_each_entry(*daemon, elem, 121 hlist_for_each_entry(*daemon, elem,
122 &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()], 122 &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
123 euid_chain) { 123 euid_chain) {
124 if ((*daemon)->file->f_cred->euid == current_euid() && 124 if (uid_eq((*daemon)->file->f_cred->euid, current_euid())) {
125 (*daemon)->file->f_cred->user_ns == current_user_ns()) {
126 rc = 0; 125 rc = 0;
127 goto out; 126 goto out;
128 } 127 }
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index bc84f365d75c..f3913eb2c474 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -97,8 +97,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
97 97
98 inode->i_mode = be16_to_cpu(efs_inode->di_mode); 98 inode->i_mode = be16_to_cpu(efs_inode->di_mode);
99 set_nlink(inode, be16_to_cpu(efs_inode->di_nlink)); 99 set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid); 100 i_uid_write(inode, (uid_t)be16_to_cpu(efs_inode->di_uid));
101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid); 101 i_gid_write(inode, (gid_t)be16_to_cpu(efs_inode->di_gid));
102 inode->i_size = be32_to_cpu(efs_inode->di_size); 102 inode->i_size = be32_to_cpu(efs_inode->di_size);
103 inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime); 103 inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime);
104 inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime); 104 inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index e755ec746c69..2002431ef9a0 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -96,6 +96,11 @@ static int init_inodecache(void)
96 96
97static void destroy_inodecache(void) 97static void destroy_inodecache(void)
98{ 98{
99 /*
100 * Make sure all delayed rcu free inodes are flushed before we
101 * destroy cache.
102 */
103 rcu_barrier();
99 kmem_cache_destroy(efs_inode_cachep); 104 kmem_cache_destroy(efs_inode_cachep);
100} 105}
101 106
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1c8b55670804..da72250ddc1c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ 346/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
347static inline int ep_op_has_event(int op) 347static inline int ep_op_has_event(int op)
348{ 348{
349 return op != EPOLL_CTL_DEL; 349 return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
350} 350}
351 351
352/* Initialize the poll safe wake up structure */ 352/* Initialize the poll safe wake up structure */
@@ -676,6 +676,34 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
676 return 0; 676 return 0;
677} 677}
678 678
679/*
680 * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
681 * had no event flags set, indicating that another thread may be currently
682 * handling that item's events (in the case that EPOLLONESHOT was being
683 * used). Otherwise a zero result indicates that the item has been disabled
684 * from receiving events. A disabled item may be re-enabled via
685 * EPOLL_CTL_MOD. Must be called with "mtx" held.
686 */
687static int ep_disable(struct eventpoll *ep, struct epitem *epi)
688{
689 int result = 0;
690 unsigned long flags;
691
692 spin_lock_irqsave(&ep->lock, flags);
693 if (epi->event.events & ~EP_PRIVATE_BITS) {
694 if (ep_is_linked(&epi->rdllink))
695 list_del_init(&epi->rdllink);
696 /* Ensure ep_poll_callback will not add epi back onto ready
697 list: */
698 epi->event.events &= EP_PRIVATE_BITS;
699 }
700 else
701 result = -EBUSY;
702 spin_unlock_irqrestore(&ep->lock, flags);
703
704 return result;
705}
706
679static void ep_free(struct eventpoll *ep) 707static void ep_free(struct eventpoll *ep)
680{ 708{
681 struct rb_node *rbp; 709 struct rb_node *rbp;
@@ -1020,8 +1048,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
1020 rb_insert_color(&epi->rbn, &ep->rbr); 1048 rb_insert_color(&epi->rbn, &ep->rbr);
1021} 1049}
1022 1050
1023
1024
1025#define PATH_ARR_SIZE 5 1051#define PATH_ARR_SIZE 5
1026/* 1052/*
1027 * These are the number paths of length 1 to 5, that we are allowing to emanate 1053 * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1654,8 +1680,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
1654 error = PTR_ERR(file); 1680 error = PTR_ERR(file);
1655 goto out_free_fd; 1681 goto out_free_fd;
1656 } 1682 }
1657 fd_install(fd, file);
1658 ep->file = file; 1683 ep->file = file;
1684 fd_install(fd, file);
1659 return fd; 1685 return fd;
1660 1686
1661out_free_fd: 1687out_free_fd:
@@ -1787,6 +1813,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1787 } else 1813 } else
1788 error = -ENOENT; 1814 error = -ENOENT;
1789 break; 1815 break;
1816 case EPOLL_CTL_DISABLE:
1817 if (epi)
1818 error = ep_disable(ep, epi);
1819 else
1820 error = -ENOENT;
1821 break;
1790 } 1822 }
1791 mutex_unlock(&ep->mtx); 1823 mutex_unlock(&ep->mtx);
1792 1824
@@ -1810,7 +1842,7 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1810 int, maxevents, int, timeout) 1842 int, maxevents, int, timeout)
1811{ 1843{
1812 int error; 1844 int error;
1813 struct file *file; 1845 struct fd f;
1814 struct eventpoll *ep; 1846 struct eventpoll *ep;
1815 1847
1816 /* The maximum number of event must be greater than zero */ 1848 /* The maximum number of event must be greater than zero */
@@ -1818,38 +1850,33 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
1818 return -EINVAL; 1850 return -EINVAL;
1819 1851
1820 /* Verify that the area passed by the user is writeable */ 1852 /* Verify that the area passed by the user is writeable */
1821 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event))) { 1853 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event)))
1822 error = -EFAULT; 1854 return -EFAULT;
1823 goto error_return;
1824 }
1825 1855
1826 /* Get the "struct file *" for the eventpoll file */ 1856 /* Get the "struct file *" for the eventpoll file */
1827 error = -EBADF; 1857 f = fdget(epfd);
1828 file = fget(epfd); 1858 if (!f.file)
1829 if (!file) 1859 return -EBADF;
1830 goto error_return;
1831 1860
1832 /* 1861 /*
1833 * We have to check that the file structure underneath the fd 1862 * We have to check that the file structure underneath the fd
1834 * the user passed to us _is_ an eventpoll file. 1863 * the user passed to us _is_ an eventpoll file.
1835 */ 1864 */
1836 error = -EINVAL; 1865 error = -EINVAL;
1837 if (!is_file_epoll(file)) 1866 if (!is_file_epoll(f.file))
1838 goto error_fput; 1867 goto error_fput;
1839 1868
1840 /* 1869 /*
1841 * At this point it is safe to assume that the "private_data" contains 1870 * At this point it is safe to assume that the "private_data" contains
1842 * our own data structure. 1871 * our own data structure.
1843 */ 1872 */
1844 ep = file->private_data; 1873 ep = f.file->private_data;
1845 1874
1846 /* Time to fish for events ... */ 1875 /* Time to fish for events ... */
1847 error = ep_poll(ep, events, maxevents, timeout); 1876 error = ep_poll(ep, events, maxevents, timeout);
1848 1877
1849error_fput: 1878error_fput:
1850 fput(file); 1879 fdput(f);
1851error_return:
1852
1853 return error; 1880 return error;
1854} 1881}
1855 1882
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4de4ec3..4f2bebc276c5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,22 +63,12 @@
63 63
64#include <trace/events/task.h> 64#include <trace/events/task.h>
65#include "internal.h" 65#include "internal.h"
66#include "coredump.h"
66 67
67#include <trace/events/sched.h> 68#include <trace/events/sched.h>
68 69
69int core_uses_pid;
70char core_pattern[CORENAME_MAX_SIZE] = "core";
71unsigned int core_pipe_limit;
72int suid_dumpable = 0; 70int suid_dumpable = 0;
73 71
74struct core_name {
75 char *corename;
76 int used, size;
77};
78static atomic_t call_count = ATOMIC_INIT(1);
79
80/* The maximal length of core_pattern is also specified in sysctl.c */
81
82static LIST_HEAD(formats); 72static LIST_HEAD(formats);
83static DEFINE_RWLOCK(binfmt_lock); 73static DEFINE_RWLOCK(binfmt_lock);
84 74
@@ -613,7 +603,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
613 * process cleanup to remove whatever mess we made. 603 * process cleanup to remove whatever mess we made.
614 */ 604 */
615 if (length != move_page_tables(vma, old_start, 605 if (length != move_page_tables(vma, old_start,
616 vma, new_start, length)) 606 vma, new_start, length, false))
617 return -ENOMEM; 607 return -ENOMEM;
618 608
619 lru_add_drain(); 609 lru_add_drain();
@@ -888,9 +878,11 @@ static int de_thread(struct task_struct *tsk)
888 sig->notify_count--; 878 sig->notify_count--;
889 879
890 while (sig->notify_count) { 880 while (sig->notify_count) {
891 __set_current_state(TASK_UNINTERRUPTIBLE); 881 __set_current_state(TASK_KILLABLE);
892 spin_unlock_irq(lock); 882 spin_unlock_irq(lock);
893 schedule(); 883 schedule();
884 if (unlikely(__fatal_signal_pending(tsk)))
885 goto killed;
894 spin_lock_irq(lock); 886 spin_lock_irq(lock);
895 } 887 }
896 spin_unlock_irq(lock); 888 spin_unlock_irq(lock);
@@ -908,9 +900,11 @@ static int de_thread(struct task_struct *tsk)
908 write_lock_irq(&tasklist_lock); 900 write_lock_irq(&tasklist_lock);
909 if (likely(leader->exit_state)) 901 if (likely(leader->exit_state))
910 break; 902 break;
911 __set_current_state(TASK_UNINTERRUPTIBLE); 903 __set_current_state(TASK_KILLABLE);
912 write_unlock_irq(&tasklist_lock); 904 write_unlock_irq(&tasklist_lock);
913 schedule(); 905 schedule();
906 if (unlikely(__fatal_signal_pending(tsk)))
907 goto killed;
914 } 908 }
915 909
916 /* 910 /*
@@ -1004,40 +998,14 @@ no_thread_group:
1004 998
1005 BUG_ON(!thread_group_leader(tsk)); 999 BUG_ON(!thread_group_leader(tsk));
1006 return 0; 1000 return 0;
1007}
1008
1009/*
1010 * These functions flushes out all traces of the currently running executable
1011 * so that a new one can be started
1012 */
1013static void flush_old_files(struct files_struct * files)
1014{
1015 long j = -1;
1016 struct fdtable *fdt;
1017
1018 spin_lock(&files->file_lock);
1019 for (;;) {
1020 unsigned long set, i;
1021 1001
1022 j++; 1002killed:
1023 i = j * BITS_PER_LONG; 1003 /* protects against exit_notify() and __exit_signal() */
1024 fdt = files_fdtable(files); 1004 read_lock(&tasklist_lock);
1025 if (i >= fdt->max_fds) 1005 sig->group_exit_task = NULL;
1026 break; 1006 sig->notify_count = 0;
1027 set = fdt->close_on_exec[j]; 1007 read_unlock(&tasklist_lock);
1028 if (!set) 1008 return -EAGAIN;
1029 continue;
1030 fdt->close_on_exec[j] = 0;
1031 spin_unlock(&files->file_lock);
1032 for ( ; set ; i++,set >>= 1) {
1033 if (set & 1) {
1034 sys_close(i);
1035 }
1036 }
1037 spin_lock(&files->file_lock);
1038
1039 }
1040 spin_unlock(&files->file_lock);
1041} 1009}
1042 1010
1043char *get_task_comm(char *buf, struct task_struct *tsk) 1011char *get_task_comm(char *buf, struct task_struct *tsk)
@@ -1050,6 +1018,11 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1050} 1018}
1051EXPORT_SYMBOL_GPL(get_task_comm); 1019EXPORT_SYMBOL_GPL(get_task_comm);
1052 1020
1021/*
1022 * These functions flushes out all traces of the currently running executable
1023 * so that a new one can be started
1024 */
1025
1053void set_task_comm(struct task_struct *tsk, char *buf) 1026void set_task_comm(struct task_struct *tsk, char *buf)
1054{ 1027{
1055 task_lock(tsk); 1028 task_lock(tsk);
@@ -1136,7 +1109,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1136 current->sas_ss_sp = current->sas_ss_size = 0; 1109 current->sas_ss_sp = current->sas_ss_size = 0;
1137 1110
1138 if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid())) 1111 if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
1139 set_dumpable(current->mm, 1); 1112 set_dumpable(current->mm, SUID_DUMPABLE_ENABLED);
1140 else 1113 else
1141 set_dumpable(current->mm, suid_dumpable); 1114 set_dumpable(current->mm, suid_dumpable);
1142 1115
@@ -1171,7 +1144,7 @@ void setup_new_exec(struct linux_binprm * bprm)
1171 current->self_exec_id++; 1144 current->self_exec_id++;
1172 1145
1173 flush_signal_handlers(current, 0); 1146 flush_signal_handlers(current, 0);
1174 flush_old_files(current->files); 1147 do_close_on_exec(current->files);
1175} 1148}
1176EXPORT_SYMBOL(setup_new_exec); 1149EXPORT_SYMBOL(setup_new_exec);
1177 1150
@@ -1632,353 +1605,6 @@ void set_binfmt(struct linux_binfmt *new)
1632 1605
1633EXPORT_SYMBOL(set_binfmt); 1606EXPORT_SYMBOL(set_binfmt);
1634 1607
1635static int expand_corename(struct core_name *cn)
1636{
1637 char *old_corename = cn->corename;
1638
1639 cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
1640 cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
1641
1642 if (!cn->corename) {
1643 kfree(old_corename);
1644 return -ENOMEM;
1645 }
1646
1647 return 0;
1648}
1649
1650static int cn_printf(struct core_name *cn, const char *fmt, ...)
1651{
1652 char *cur;
1653 int need;
1654 int ret;
1655 va_list arg;
1656
1657 va_start(arg, fmt);
1658 need = vsnprintf(NULL, 0, fmt, arg);
1659 va_end(arg);
1660
1661 if (likely(need < cn->size - cn->used - 1))
1662 goto out_printf;
1663
1664 ret = expand_corename(cn);
1665 if (ret)
1666 goto expand_fail;
1667
1668out_printf:
1669 cur = cn->corename + cn->used;
1670 va_start(arg, fmt);
1671 vsnprintf(cur, need + 1, fmt, arg);
1672 va_end(arg);
1673 cn->used += need;
1674 return 0;
1675
1676expand_fail:
1677 return ret;
1678}
1679
1680static void cn_escape(char *str)
1681{
1682 for (; *str; str++)
1683 if (*str == '/')
1684 *str = '!';
1685}
1686
1687static int cn_print_exe_file(struct core_name *cn)
1688{
1689 struct file *exe_file;
1690 char *pathbuf, *path;
1691 int ret;
1692
1693 exe_file = get_mm_exe_file(current->mm);
1694 if (!exe_file) {
1695 char *commstart = cn->corename + cn->used;
1696 ret = cn_printf(cn, "%s (path unknown)", current->comm);
1697 cn_escape(commstart);
1698 return ret;
1699 }
1700
1701 pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
1702 if (!pathbuf) {
1703 ret = -ENOMEM;
1704 goto put_exe_file;
1705 }
1706
1707 path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
1708 if (IS_ERR(path)) {
1709 ret = PTR_ERR(path);
1710 goto free_buf;
1711 }
1712
1713 cn_escape(path);
1714
1715 ret = cn_printf(cn, "%s", path);
1716
1717free_buf:
1718 kfree(pathbuf);
1719put_exe_file:
1720 fput(exe_file);
1721 return ret;
1722}
1723
1724/* format_corename will inspect the pattern parameter, and output a
1725 * name into corename, which must have space for at least
1726 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1727 */
1728static int format_corename(struct core_name *cn, long signr)
1729{
1730 const struct cred *cred = current_cred();
1731 const char *pat_ptr = core_pattern;
1732 int ispipe = (*pat_ptr == '|');
1733 int pid_in_pattern = 0;
1734 int err = 0;
1735
1736 cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
1737 cn->corename = kmalloc(cn->size, GFP_KERNEL);
1738 cn->used = 0;
1739
1740 if (!cn->corename)
1741 return -ENOMEM;
1742
1743 /* Repeat as long as we have more pattern to process and more output
1744 space */
1745 while (*pat_ptr) {
1746 if (*pat_ptr != '%') {
1747 if (*pat_ptr == 0)
1748 goto out;
1749 err = cn_printf(cn, "%c", *pat_ptr++);
1750 } else {
1751 switch (*++pat_ptr) {
1752 /* single % at the end, drop that */
1753 case 0:
1754 goto out;
1755 /* Double percent, output one percent */
1756 case '%':
1757 err = cn_printf(cn, "%c", '%');
1758 break;
1759 /* pid */
1760 case 'p':
1761 pid_in_pattern = 1;
1762 err = cn_printf(cn, "%d",
1763 task_tgid_vnr(current));
1764 break;
1765 /* uid */
1766 case 'u':
1767 err = cn_printf(cn, "%d", cred->uid);
1768 break;
1769 /* gid */
1770 case 'g':
1771 err = cn_printf(cn, "%d", cred->gid);
1772 break;
1773 /* signal that caused the coredump */
1774 case 's':
1775 err = cn_printf(cn, "%ld", signr);
1776 break;
1777 /* UNIX time of coredump */
1778 case 't': {
1779 struct timeval tv;
1780 do_gettimeofday(&tv);
1781 err = cn_printf(cn, "%lu", tv.tv_sec);
1782 break;
1783 }
1784 /* hostname */
1785 case 'h': {
1786 char *namestart = cn->corename + cn->used;
1787 down_read(&uts_sem);
1788 err = cn_printf(cn, "%s",
1789 utsname()->nodename);
1790 up_read(&uts_sem);
1791 cn_escape(namestart);
1792 break;
1793 }
1794 /* executable */
1795 case 'e': {
1796 char *commstart = cn->corename + cn->used;
1797 err = cn_printf(cn, "%s", current->comm);
1798 cn_escape(commstart);
1799 break;
1800 }
1801 case 'E':
1802 err = cn_print_exe_file(cn);
1803 break;
1804 /* core limit size */
1805 case 'c':
1806 err = cn_printf(cn, "%lu",
1807 rlimit(RLIMIT_CORE));
1808 break;
1809 default:
1810 break;
1811 }
1812 ++pat_ptr;
1813 }
1814
1815 if (err)
1816 return err;
1817 }
1818
1819 /* Backward compatibility with core_uses_pid:
1820 *
1821 * If core_pattern does not include a %p (as is the default)
1822 * and core_uses_pid is set, then .%pid will be appended to
1823 * the filename. Do not do this for piped commands. */
1824 if (!ispipe && !pid_in_pattern && core_uses_pid) {
1825 err = cn_printf(cn, ".%d", task_tgid_vnr(current));
1826 if (err)
1827 return err;
1828 }
1829out:
1830 return ispipe;
1831}
1832
1833static int zap_process(struct task_struct *start, int exit_code)
1834{
1835 struct task_struct *t;
1836 int nr = 0;
1837
1838 start->signal->flags = SIGNAL_GROUP_EXIT;
1839 start->signal->group_exit_code = exit_code;
1840 start->signal->group_stop_count = 0;
1841
1842 t = start;
1843 do {
1844 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
1845 if (t != current && t->mm) {
1846 sigaddset(&t->pending.signal, SIGKILL);
1847 signal_wake_up(t, 1);
1848 nr++;
1849 }
1850 } while_each_thread(start, t);
1851
1852 return nr;
1853}
1854
1855static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1856 struct core_state *core_state, int exit_code)
1857{
1858 struct task_struct *g, *p;
1859 unsigned long flags;
1860 int nr = -EAGAIN;
1861
1862 spin_lock_irq(&tsk->sighand->siglock);
1863 if (!signal_group_exit(tsk->signal)) {
1864 mm->core_state = core_state;
1865 nr = zap_process(tsk, exit_code);
1866 }
1867 spin_unlock_irq(&tsk->sighand->siglock);
1868 if (unlikely(nr < 0))
1869 return nr;
1870
1871 if (atomic_read(&mm->mm_users) == nr + 1)
1872 goto done;
1873 /*
1874 * We should find and kill all tasks which use this mm, and we should
1875 * count them correctly into ->nr_threads. We don't take tasklist
1876 * lock, but this is safe wrt:
1877 *
1878 * fork:
1879 * None of sub-threads can fork after zap_process(leader). All
1880 * processes which were created before this point should be
1881 * visible to zap_threads() because copy_process() adds the new
1882 * process to the tail of init_task.tasks list, and lock/unlock
1883 * of ->siglock provides a memory barrier.
1884 *
1885 * do_exit:
1886 * The caller holds mm->mmap_sem. This means that the task which
1887 * uses this mm can't pass exit_mm(), so it can't exit or clear
1888 * its ->mm.
1889 *
1890 * de_thread:
1891 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1892 * we must see either old or new leader, this does not matter.
1893 * However, it can change p->sighand, so lock_task_sighand(p)
1894 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1895 * it can't fail.
1896 *
1897 * Note also that "g" can be the old leader with ->mm == NULL
1898 * and already unhashed and thus removed from ->thread_group.
1899 * This is OK, __unhash_process()->list_del_rcu() does not
1900 * clear the ->next pointer, we will find the new leader via
1901 * next_thread().
1902 */
1903 rcu_read_lock();
1904 for_each_process(g) {
1905 if (g == tsk->group_leader)
1906 continue;
1907 if (g->flags & PF_KTHREAD)
1908 continue;
1909 p = g;
1910 do {
1911 if (p->mm) {
1912 if (unlikely(p->mm == mm)) {
1913 lock_task_sighand(p, &flags);
1914 nr += zap_process(p, exit_code);
1915 unlock_task_sighand(p, &flags);
1916 }
1917 break;
1918 }
1919 } while_each_thread(g, p);
1920 }
1921 rcu_read_unlock();
1922done:
1923 atomic_set(&core_state->nr_threads, nr);
1924 return nr;
1925}
1926
1927static int coredump_wait(int exit_code, struct core_state *core_state)
1928{
1929 struct task_struct *tsk = current;
1930 struct mm_struct *mm = tsk->mm;
1931 int core_waiters = -EBUSY;
1932
1933 init_completion(&core_state->startup);
1934 core_state->dumper.task = tsk;
1935 core_state->dumper.next = NULL;
1936
1937 down_write(&mm->mmap_sem);
1938 if (!mm->core_state)
1939 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1940 up_write(&mm->mmap_sem);
1941
1942 if (core_waiters > 0) {
1943 struct core_thread *ptr;
1944
1945 wait_for_completion(&core_state->startup);
1946 /*
1947 * Wait for all the threads to become inactive, so that
1948 * all the thread context (extended register state, like
1949 * fpu etc) gets copied to the memory.
1950 */
1951 ptr = core_state->dumper.next;
1952 while (ptr != NULL) {
1953 wait_task_inactive(ptr->task, 0);
1954 ptr = ptr->next;
1955 }
1956 }
1957
1958 return core_waiters;
1959}
1960
1961static void coredump_finish(struct mm_struct *mm)
1962{
1963 struct core_thread *curr, *next;
1964 struct task_struct *task;
1965
1966 next = mm->core_state->dumper.next;
1967 while ((curr = next) != NULL) {
1968 next = curr->next;
1969 task = curr->task;
1970 /*
1971 * see exit_mm(), curr->task must not see
1972 * ->task == NULL before we read ->next.
1973 */
1974 smp_mb();
1975 curr->task = NULL;
1976 wake_up_process(task);
1977 }
1978
1979 mm->core_state = NULL;
1980}
1981
1982/* 1608/*
1983 * set_dumpable converts traditional three-value dumpable to two flags and 1609 * set_dumpable converts traditional three-value dumpable to two flags and
1984 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1610 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1646,7 @@ void set_dumpable(struct mm_struct *mm, int value)
2020 } 1646 }
2021} 1647}
2022 1648
2023static int __get_dumpable(unsigned long mm_flags) 1649int __get_dumpable(unsigned long mm_flags)
2024{ 1650{
2025 int ret; 1651 int ret;
2026 1652
@@ -2032,290 +1658,3 @@ int get_dumpable(struct mm_struct *mm)
2032{ 1658{
2033 return __get_dumpable(mm->flags); 1659 return __get_dumpable(mm->flags);
2034} 1660}
2035
2036static void wait_for_dump_helpers(struct file *file)
2037{
2038 struct pipe_inode_info *pipe;
2039
2040 pipe = file->f_path.dentry->d_inode->i_pipe;
2041
2042 pipe_lock(pipe);
2043 pipe->readers++;
2044 pipe->writers--;
2045
2046 while ((pipe->readers > 1) && (!signal_pending(current))) {
2047 wake_up_interruptible_sync(&pipe->wait);
2048 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
2049 pipe_wait(pipe);
2050 }
2051
2052 pipe->readers--;
2053 pipe->writers++;
2054 pipe_unlock(pipe);
2055
2056}
2057
2058
2059/*
2060 * umh_pipe_setup
2061 * helper function to customize the process used
2062 * to collect the core in userspace. Specifically
2063 * it sets up a pipe and installs it as fd 0 (stdin)
2064 * for the process. Returns 0 on success, or
2065 * PTR_ERR on failure.
2066 * Note that it also sets the core limit to 1. This
2067 * is a special value that we use to trap recursive
2068 * core dumps
2069 */
2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2071{
2072 struct file *files[2];
2073 struct fdtable *fdt;
2074 struct coredump_params *cp = (struct coredump_params *)info->data;
2075 struct files_struct *cf = current->files;
2076 int err = create_pipe_files(files, 0);
2077 if (err)
2078 return err;
2079
2080 cp->file = files[1];
2081
2082 sys_close(0);
2083 fd_install(0, files[0]);
2084 spin_lock(&cf->file_lock);
2085 fdt = files_fdtable(cf);
2086 __set_open_fd(0, fdt);
2087 __clear_close_on_exec(0, fdt);
2088 spin_unlock(&cf->file_lock);
2089
2090 /* and disallow core files too */
2091 current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
2092
2093 return 0;
2094}
2095
2096void do_coredump(long signr, int exit_code, struct pt_regs *regs)
2097{
2098 struct core_state core_state;
2099 struct core_name cn;
2100 struct mm_struct *mm = current->mm;
2101 struct linux_binfmt * binfmt;
2102 const struct cred *old_cred;
2103 struct cred *cred;
2104 int retval = 0;
2105 int flag = 0;
2106 int ispipe;
2107 bool need_nonrelative = false;
2108 static atomic_t core_dump_count = ATOMIC_INIT(0);
2109 struct coredump_params cprm = {
2110 .signr = signr,
2111 .regs = regs,
2112 .limit = rlimit(RLIMIT_CORE),
2113 /*
2114 * We must use the same mm->flags while dumping core to avoid
2115 * inconsistency of bit flags, since this flag is not protected
2116 * by any locks.
2117 */
2118 .mm_flags = mm->flags,
2119 };
2120
2121 audit_core_dumps(signr);
2122
2123 binfmt = mm->binfmt;
2124 if (!binfmt || !binfmt->core_dump)
2125 goto fail;
2126 if (!__get_dumpable(cprm.mm_flags))
2127 goto fail;
2128
2129 cred = prepare_creds();
2130 if (!cred)
2131 goto fail;
2132 /*
2133 * We cannot trust fsuid as being the "true" uid of the process
2134 * nor do we know its entire history. We only know it was tainted
2135 * so we dump it as root in mode 2, and only into a controlled
2136 * environment (pipe handler or fully qualified path).
2137 */
2138 if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
2139 /* Setuid core dump mode */
2140 flag = O_EXCL; /* Stop rewrite attacks */
2141 cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
2142 need_nonrelative = true;
2143 }
2144
2145 retval = coredump_wait(exit_code, &core_state);
2146 if (retval < 0)
2147 goto fail_creds;
2148
2149 old_cred = override_creds(cred);
2150
2151 /*
2152 * Clear any false indication of pending signals that might
2153 * be seen by the filesystem code called to write the core file.
2154 */
2155 clear_thread_flag(TIF_SIGPENDING);
2156
2157 ispipe = format_corename(&cn, signr);
2158
2159 if (ispipe) {
2160 int dump_count;
2161 char **helper_argv;
2162
2163 if (ispipe < 0) {
2164 printk(KERN_WARNING "format_corename failed\n");
2165 printk(KERN_WARNING "Aborting core\n");
2166 goto fail_corename;
2167 }
2168
2169 if (cprm.limit == 1) {
2170 /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
2171 *
2172 * Normally core limits are irrelevant to pipes, since
2173 * we're not writing to the file system, but we use
2174 * cprm.limit of 1 here as a speacial value, this is a
2175 * consistent way to catch recursive crashes.
2176 * We can still crash if the core_pattern binary sets
2177 * RLIM_CORE = !1, but it runs as root, and can do
2178 * lots of stupid things.
2179 *
2180 * Note that we use task_tgid_vnr here to grab the pid
2181 * of the process group leader. That way we get the
2182 * right pid if a thread in a multi-threaded
2183 * core_pattern process dies.
2184 */
2185 printk(KERN_WARNING
2186 "Process %d(%s) has RLIMIT_CORE set to 1\n",
2187 task_tgid_vnr(current), current->comm);
2188 printk(KERN_WARNING "Aborting core\n");
2189 goto fail_unlock;
2190 }
2191 cprm.limit = RLIM_INFINITY;
2192
2193 dump_count = atomic_inc_return(&core_dump_count);
2194 if (core_pipe_limit && (core_pipe_limit < dump_count)) {
2195 printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
2196 task_tgid_vnr(current), current->comm);
2197 printk(KERN_WARNING "Skipping core dump\n");
2198 goto fail_dropcount;
2199 }
2200
2201 helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
2202 if (!helper_argv) {
2203 printk(KERN_WARNING "%s failed to allocate memory\n",
2204 __func__);
2205 goto fail_dropcount;
2206 }
2207
2208 retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
2209 NULL, UMH_WAIT_EXEC, umh_pipe_setup,
2210 NULL, &cprm);
2211 argv_free(helper_argv);
2212 if (retval) {
2213 printk(KERN_INFO "Core dump to %s pipe failed\n",
2214 cn.corename);
2215 goto close_fail;
2216 }
2217 } else {
2218 struct inode *inode;
2219
2220 if (cprm.limit < binfmt->min_coredump)
2221 goto fail_unlock;
2222
2223 if (need_nonrelative && cn.corename[0] != '/') {
2224 printk(KERN_WARNING "Pid %d(%s) can only dump core "\
2225 "to fully qualified path!\n",
2226 task_tgid_vnr(current), current->comm);
2227 printk(KERN_WARNING "Skipping core dump\n");
2228 goto fail_unlock;
2229 }
2230
2231 cprm.file = filp_open(cn.corename,
2232 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
2233 0600);
2234 if (IS_ERR(cprm.file))
2235 goto fail_unlock;
2236
2237 inode = cprm.file->f_path.dentry->d_inode;
2238 if (inode->i_nlink > 1)
2239 goto close_fail;
2240 if (d_unhashed(cprm.file->f_path.dentry))
2241 goto close_fail;
2242 /*
2243 * AK: actually i see no reason to not allow this for named
2244 * pipes etc, but keep the previous behaviour for now.
2245 */
2246 if (!S_ISREG(inode->i_mode))
2247 goto close_fail;
2248 /*
2249 * Dont allow local users get cute and trick others to coredump
2250 * into their pre-created files.
2251 */
2252 if (!uid_eq(inode->i_uid, current_fsuid()))
2253 goto close_fail;
2254 if (!cprm.file->f_op || !cprm.file->f_op->write)
2255 goto close_fail;
2256 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
2257 goto close_fail;
2258 }
2259
2260 retval = binfmt->core_dump(&cprm);
2261 if (retval)
2262 current->signal->group_exit_code |= 0x80;
2263
2264 if (ispipe && core_pipe_limit)
2265 wait_for_dump_helpers(cprm.file);
2266close_fail:
2267 if (cprm.file)
2268 filp_close(cprm.file, NULL);
2269fail_dropcount:
2270 if (ispipe)
2271 atomic_dec(&core_dump_count);
2272fail_unlock:
2273 kfree(cn.corename);
2274fail_corename:
2275 coredump_finish(mm);
2276 revert_creds(old_cred);
2277fail_creds:
2278 put_cred(cred);
2279fail:
2280 return;
2281}
2282
2283/*
2284 * Core dumping helper functions. These are the only things you should
2285 * do on a core-file: use only these functions to write out all the
2286 * necessary info.
2287 */
2288int dump_write(struct file *file, const void *addr, int nr)
2289{
2290 return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
2291}
2292EXPORT_SYMBOL(dump_write);
2293
2294int dump_seek(struct file *file, loff_t off)
2295{
2296 int ret = 1;
2297
2298 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
2299 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
2300 return 0;
2301 } else {
2302 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
2303
2304 if (!buf)
2305 return 0;
2306 while (off > 0) {
2307 unsigned long n = off;
2308
2309 if (n > PAGE_SIZE)
2310 n = PAGE_SIZE;
2311 if (!dump_write(file, buf, n)) {
2312 ret = 0;
2313 break;
2314 }
2315 off -= n;
2316 }
2317 free_page((unsigned long)buf);
2318 }
2319 return ret;
2320}
2321EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 1562c27a2fab..b56181047751 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1172,8 +1172,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1172 1172
1173 /* copy stuff from on-disk struct to in-memory struct */ 1173 /* copy stuff from on-disk struct to in-memory struct */
1174 inode->i_mode = le16_to_cpu(fcb.i_mode); 1174 inode->i_mode = le16_to_cpu(fcb.i_mode);
1175 inode->i_uid = le32_to_cpu(fcb.i_uid); 1175 i_uid_write(inode, le32_to_cpu(fcb.i_uid));
1176 inode->i_gid = le32_to_cpu(fcb.i_gid); 1176 i_gid_write(inode, le32_to_cpu(fcb.i_gid));
1177 set_nlink(inode, le16_to_cpu(fcb.i_links_count)); 1177 set_nlink(inode, le16_to_cpu(fcb.i_links_count));
1178 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1178 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
1179 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1179 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
@@ -1385,8 +1385,8 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1385 fcb = &args->fcb; 1385 fcb = &args->fcb;
1386 1386
1387 fcb->i_mode = cpu_to_le16(inode->i_mode); 1387 fcb->i_mode = cpu_to_le16(inode->i_mode);
1388 fcb->i_uid = cpu_to_le32(inode->i_uid); 1388 fcb->i_uid = cpu_to_le32(i_uid_read(inode));
1389 fcb->i_gid = cpu_to_le32(inode->i_gid); 1389 fcb->i_gid = cpu_to_le32(i_gid_read(inode));
1390 fcb->i_links_count = cpu_to_le16(inode->i_nlink); 1390 fcb->i_links_count = cpu_to_le16(inode->i_nlink);
1391 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 1391 fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
1392 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 1392 fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 5f376d14fdcc..b963f38ac298 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -203,7 +203,7 @@ static unsigned _sp2d_min_pg(struct __stripe_pages_2d *sp2d)
203 203
204static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d) 204static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d)
205{ 205{
206 unsigned p; 206 int p;
207 207
208 for (p = sp2d->pages_in_unit - 1; p >= 0; --p) { 208 for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
209 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 209 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index dde41a75c7c8..59e3bbfac0b1 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
206 */ 206 */
207static void destroy_inodecache(void) 207static void destroy_inodecache(void)
208{ 208{
209 /*
210 * Make sure all delayed rcu free inodes are flushed before we
211 * destroy cache.
212 */
213 rcu_barrier();
209 kmem_cache_destroy(exofs_inode_cachep); 214 kmem_cache_destroy(exofs_inode_cachep);
210} 215}
211 216
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 5a7b691e748b..1b4f2f95fc37 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -80,8 +80,13 @@ static ssize_t uri_show(struct exofs_dev *edp, char *buf)
80 80
81static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len) 81static ssize_t uri_store(struct exofs_dev *edp, const char *buf, size_t len)
82{ 82{
83 uint8_t *new_uri;
84
83 edp->urilen = strlen(buf) + 1; 85 edp->urilen = strlen(buf) + 1;
84 edp->uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL); 86 new_uri = krealloc(edp->uri, edp->urilen, GFP_KERNEL);
87 if (new_uri == NULL)
88 return -ENOMEM;
89 edp->uri = new_uri;
85 strncpy(edp->uri, buf, edp->urilen); 90 strncpy(edp->uri, buf, edp->urilen);
86 return edp->urilen; 91 return edp->urilen;
87} 92}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 35d6a3cfd9ff..110b6b371a4e 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -53,16 +53,23 @@ ext2_acl_from_disk(const void *value, size_t size)
53 case ACL_OTHER: 53 case ACL_OTHER:
54 value = (char *)value + 54 value = (char *)value +
55 sizeof(ext2_acl_entry_short); 55 sizeof(ext2_acl_entry_short);
56 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
57 break; 56 break;
58 57
59 case ACL_USER: 58 case ACL_USER:
59 value = (char *)value + sizeof(ext2_acl_entry);
60 if ((char *)value > end)
61 goto fail;
62 acl->a_entries[n].e_uid =
63 make_kuid(&init_user_ns,
64 le32_to_cpu(entry->e_id));
65 break;
60 case ACL_GROUP: 66 case ACL_GROUP:
61 value = (char *)value + sizeof(ext2_acl_entry); 67 value = (char *)value + sizeof(ext2_acl_entry);
62 if ((char *)value > end) 68 if ((char *)value > end)
63 goto fail; 69 goto fail;
64 acl->a_entries[n].e_id = 70 acl->a_entries[n].e_gid =
65 le32_to_cpu(entry->e_id); 71 make_kgid(&init_user_ns,
72 le32_to_cpu(entry->e_id));
66 break; 73 break;
67 74
68 default: 75 default:
@@ -96,14 +103,19 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size)
96 ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); 103 ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION);
97 e = (char *)ext_acl + sizeof(ext2_acl_header); 104 e = (char *)ext_acl + sizeof(ext2_acl_header);
98 for (n=0; n < acl->a_count; n++) { 105 for (n=0; n < acl->a_count; n++) {
106 const struct posix_acl_entry *acl_e = &acl->a_entries[n];
99 ext2_acl_entry *entry = (ext2_acl_entry *)e; 107 ext2_acl_entry *entry = (ext2_acl_entry *)e;
100 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 108 entry->e_tag = cpu_to_le16(acl_e->e_tag);
101 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 109 entry->e_perm = cpu_to_le16(acl_e->e_perm);
102 switch(acl->a_entries[n].e_tag) { 110 switch(acl_e->e_tag) {
103 case ACL_USER: 111 case ACL_USER:
112 entry->e_id = cpu_to_le32(
113 from_kuid(&init_user_ns, acl_e->e_uid));
114 e += sizeof(ext2_acl_entry);
115 break;
104 case ACL_GROUP: 116 case ACL_GROUP:
105 entry->e_id = 117 entry->e_id = cpu_to_le32(
106 cpu_to_le32(acl->a_entries[n].e_id); 118 from_kgid(&init_user_ns, acl_e->e_gid));
107 e += sizeof(ext2_acl_entry); 119 e += sizeof(ext2_acl_entry);
108 break; 120 break;
109 121
@@ -350,7 +362,7 @@ ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
350 return PTR_ERR(acl); 362 return PTR_ERR(acl);
351 if (acl == NULL) 363 if (acl == NULL)
352 return -ENODATA; 364 return -ENODATA;
353 error = posix_acl_to_xattr(acl, buffer, size); 365 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
354 posix_acl_release(acl); 366 posix_acl_release(acl);
355 367
356 return error; 368 return error;
@@ -371,7 +383,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
371 return -EPERM; 383 return -EPERM;
372 384
373 if (value) { 385 if (value) {
374 acl = posix_acl_from_xattr(value, size); 386 acl = posix_acl_from_xattr(&init_user_ns, value, size);
375 if (IS_ERR(acl)) 387 if (IS_ERR(acl))
376 return PTR_ERR(acl); 388 return PTR_ERR(acl);
377 else if (acl) { 389 else if (acl) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 376aa77f3ca7..2616d0ea5c5c 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -479,7 +479,7 @@ void ext2_discard_reservation(struct inode *inode)
479/** 479/**
480 * ext2_free_blocks() -- Free given blocks and update quota and i_blocks 480 * ext2_free_blocks() -- Free given blocks and update quota and i_blocks
481 * @inode: inode 481 * @inode: inode
482 * @block: start physcial block to free 482 * @block: start physical block to free
483 * @count: number of blocks to free 483 * @count: number of blocks to free
484 */ 484 */
485void ext2_free_blocks (struct inode * inode, unsigned long block, 485void ext2_free_blocks (struct inode * inode, unsigned long block,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index af74d9e27b71..6c205d0c565b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -206,6 +206,11 @@ static int init_inodecache(void)
206 206
207static void destroy_inodecache(void) 207static void destroy_inodecache(void)
208{ 208{
209 /*
210 * Make sure all delayed rcu free inodes are flushed before we
211 * destroy cache.
212 */
213 rcu_barrier();
209 kmem_cache_destroy(ext2_inode_cachep); 214 kmem_cache_destroy(ext2_inode_cachep);
210} 215}
211 216
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c76832c8d192..dbb5ad59a7fc 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -48,16 +48,23 @@ ext3_acl_from_disk(const void *value, size_t size)
48 case ACL_OTHER: 48 case ACL_OTHER:
49 value = (char *)value + 49 value = (char *)value +
50 sizeof(ext3_acl_entry_short); 50 sizeof(ext3_acl_entry_short);
51 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
52 break; 51 break;
53 52
54 case ACL_USER: 53 case ACL_USER:
54 value = (char *)value + sizeof(ext3_acl_entry);
55 if ((char *)value > end)
56 goto fail;
57 acl->a_entries[n].e_uid =
58 make_kuid(&init_user_ns,
59 le32_to_cpu(entry->e_id));
60 break;
55 case ACL_GROUP: 61 case ACL_GROUP:
56 value = (char *)value + sizeof(ext3_acl_entry); 62 value = (char *)value + sizeof(ext3_acl_entry);
57 if ((char *)value > end) 63 if ((char *)value > end)
58 goto fail; 64 goto fail;
59 acl->a_entries[n].e_id = 65 acl->a_entries[n].e_gid =
60 le32_to_cpu(entry->e_id); 66 make_kgid(&init_user_ns,
67 le32_to_cpu(entry->e_id));
61 break; 68 break;
62 69
63 default: 70 default:
@@ -91,14 +98,19 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); 98 ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
92 e = (char *)ext_acl + sizeof(ext3_acl_header); 99 e = (char *)ext_acl + sizeof(ext3_acl_header);
93 for (n=0; n < acl->a_count; n++) { 100 for (n=0; n < acl->a_count; n++) {
101 const struct posix_acl_entry *acl_e = &acl->a_entries[n];
94 ext3_acl_entry *entry = (ext3_acl_entry *)e; 102 ext3_acl_entry *entry = (ext3_acl_entry *)e;
95 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 103 entry->e_tag = cpu_to_le16(acl_e->e_tag);
96 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 104 entry->e_perm = cpu_to_le16(acl_e->e_perm);
97 switch(acl->a_entries[n].e_tag) { 105 switch(acl_e->e_tag) {
98 case ACL_USER: 106 case ACL_USER:
107 entry->e_id = cpu_to_le32(
108 from_kuid(&init_user_ns, acl_e->e_uid));
109 e += sizeof(ext3_acl_entry);
110 break;
99 case ACL_GROUP: 111 case ACL_GROUP:
100 entry->e_id = 112 entry->e_id = cpu_to_le32(
101 cpu_to_le32(acl->a_entries[n].e_id); 113 from_kgid(&init_user_ns, acl_e->e_gid));
102 e += sizeof(ext3_acl_entry); 114 e += sizeof(ext3_acl_entry);
103 break; 115 break;
104 116
@@ -369,7 +381,7 @@ ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
369 return PTR_ERR(acl); 381 return PTR_ERR(acl);
370 if (acl == NULL) 382 if (acl == NULL)
371 return -ENODATA; 383 return -ENODATA;
372 error = posix_acl_to_xattr(acl, buffer, size); 384 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
373 posix_acl_release(acl); 385 posix_acl_release(acl);
374 386
375 return error; 387 return error;
@@ -392,7 +404,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
392 return -EPERM; 404 return -EPERM;
393 405
394 if (value) { 406 if (value) {
395 acl = posix_acl_from_xattr(value, size); 407 acl = posix_acl_from_xattr(&init_user_ns, value, size);
396 if (IS_ERR(acl)) 408 if (IS_ERR(acl))
397 return PTR_ERR(acl); 409 return PTR_ERR(acl);
398 else if (acl) { 410 else if (acl) {
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 90d901f0486b..7320a66e958f 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -483,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode)
483 * ext3_free_blocks_sb() -- Free given blocks and update quota 483 * ext3_free_blocks_sb() -- Free given blocks and update quota
484 * @handle: handle to this transaction 484 * @handle: handle to this transaction
485 * @sb: super block 485 * @sb: super block
486 * @block: start physcial block to free 486 * @block: start physical block to free
487 * @count: number of blocks to free 487 * @count: number of blocks to free
488 * @pdquot_freed_blocks: pointer to quota 488 * @pdquot_freed_blocks: pointer to quota
489 */ 489 */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a07597307fd1..7e87e37a372a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3072,6 +3072,8 @@ static int ext3_do_update_inode(handle_t *handle,
3072 struct ext3_inode_info *ei = EXT3_I(inode); 3072 struct ext3_inode_info *ei = EXT3_I(inode);
3073 struct buffer_head *bh = iloc->bh; 3073 struct buffer_head *bh = iloc->bh;
3074 int err = 0, rc, block; 3074 int err = 0, rc, block;
3075 int need_datasync = 0;
3076 __le32 disksize;
3075 uid_t i_uid; 3077 uid_t i_uid;
3076 gid_t i_gid; 3078 gid_t i_gid;
3077 3079
@@ -3113,7 +3115,11 @@ again:
3113 raw_inode->i_gid_high = 0; 3115 raw_inode->i_gid_high = 0;
3114 } 3116 }
3115 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 3117 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
3116 raw_inode->i_size = cpu_to_le32(ei->i_disksize); 3118 disksize = cpu_to_le32(ei->i_disksize);
3119 if (disksize != raw_inode->i_size) {
3120 need_datasync = 1;
3121 raw_inode->i_size = disksize;
3122 }
3117 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); 3123 raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
3118 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); 3124 raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
3119 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); 3125 raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3129,8 +3135,11 @@ again:
3129 if (!S_ISREG(inode->i_mode)) { 3135 if (!S_ISREG(inode->i_mode)) {
3130 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); 3136 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
3131 } else { 3137 } else {
3132 raw_inode->i_size_high = 3138 disksize = cpu_to_le32(ei->i_disksize >> 32);
3133 cpu_to_le32(ei->i_disksize >> 32); 3139 if (disksize != raw_inode->i_size_high) {
3140 raw_inode->i_size_high = disksize;
3141 need_datasync = 1;
3142 }
3134 if (ei->i_disksize > 0x7fffffffULL) { 3143 if (ei->i_disksize > 0x7fffffffULL) {
3135 struct super_block *sb = inode->i_sb; 3144 struct super_block *sb = inode->i_sb;
3136 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, 3145 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3183,6 +3192,8 @@ again:
3183 ext3_clear_inode_state(inode, EXT3_STATE_NEW); 3192 ext3_clear_inode_state(inode, EXT3_STATE_NEW);
3184 3193
3185 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); 3194 atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
3195 if (need_datasync)
3196 atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
3186out_brelse: 3197out_brelse:
3187 brelse (bh); 3198 brelse (bh);
3188 ext3_std_error(inode->i_sb, err); 3199 ext3_std_error(inode->i_sb, err);
@@ -3196,7 +3207,7 @@ out_brelse:
3196 * 3207 *
3197 * - Within generic_file_write() for O_SYNC files. 3208 * - Within generic_file_write() for O_SYNC files.
3198 * Here, there will be no transaction running. We wait for any running 3209 * Here, there will be no transaction running. We wait for any running
3199 * trasnaction to commit. 3210 * transaction to commit.
3200 * 3211 *
3201 * - Within sys_sync(), kupdate and such. 3212 * - Within sys_sync(), kupdate and such.
3202 * We wait on commit, if tol to. 3213 * We wait on commit, if tol to.
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c892e93d8e7..17ae5c83d234 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -532,6 +532,11 @@ static int init_inodecache(void)
532 532
533static void destroy_inodecache(void) 533static void destroy_inodecache(void)
534{ 534{
535 /*
536 * Make sure all delayed rcu free inodes are flushed before we
537 * destroy cache.
538 */
539 rcu_barrier();
535 kmem_cache_destroy(ext3_inode_cachep); 540 kmem_cache_destroy(ext3_inode_cachep);
536} 541}
537 542
@@ -975,7 +980,7 @@ static int parse_options (char *options, struct super_block *sb,
975 * Initialize args struct so we know whether arg was 980 * Initialize args struct so we know whether arg was
976 * found; some options take optional arguments. 981 * found; some options take optional arguments.
977 */ 982 */
978 args[0].to = args[0].from = 0; 983 args[0].to = args[0].from = NULL;
979 token = match_token(p, tokens, args); 984 token = match_token(p, tokens, args);
980 switch (token) { 985 switch (token) {
981 case Opt_bsd_df: 986 case Opt_bsd_df:
@@ -1479,10 +1484,12 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1479 } 1484 }
1480 1485
1481 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1486 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1482 if (es->s_last_orphan) 1487 /* don't clear list on RO mount w/ errors */
1488 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
1483 jbd_debug(1, "Errors on filesystem, " 1489 jbd_debug(1, "Errors on filesystem, "
1484 "clearing orphan list.\n"); 1490 "clearing orphan list.\n");
1485 es->s_last_orphan = 0; 1491 es->s_last_orphan = 0;
1492 }
1486 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 1493 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1487 return; 1494 return;
1488 } 1495 }
@@ -2803,7 +2810,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2803 2810
2804static inline struct inode *dquot_to_inode(struct dquot *dquot) 2811static inline struct inode *dquot_to_inode(struct dquot *dquot)
2805{ 2812{
2806 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 2813 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
2807} 2814}
2808 2815
2809static int ext3_write_dquot(struct dquot *dquot) 2816static int ext3_write_dquot(struct dquot *dquot)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a5c29bb3b835..d3c5b88fd89f 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -55,16 +55,23 @@ ext4_acl_from_disk(const void *value, size_t size)
55 case ACL_OTHER: 55 case ACL_OTHER:
56 value = (char *)value + 56 value = (char *)value +
57 sizeof(ext4_acl_entry_short); 57 sizeof(ext4_acl_entry_short);
58 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
59 break; 58 break;
60 59
61 case ACL_USER: 60 case ACL_USER:
61 value = (char *)value + sizeof(ext4_acl_entry);
62 if ((char *)value > end)
63 goto fail;
64 acl->a_entries[n].e_uid =
65 make_kuid(&init_user_ns,
66 le32_to_cpu(entry->e_id));
67 break;
62 case ACL_GROUP: 68 case ACL_GROUP:
63 value = (char *)value + sizeof(ext4_acl_entry); 69 value = (char *)value + sizeof(ext4_acl_entry);
64 if ((char *)value > end) 70 if ((char *)value > end)
65 goto fail; 71 goto fail;
66 acl->a_entries[n].e_id = 72 acl->a_entries[n].e_gid =
67 le32_to_cpu(entry->e_id); 73 make_kgid(&init_user_ns,
74 le32_to_cpu(entry->e_id));
68 break; 75 break;
69 76
70 default: 77 default:
@@ -98,13 +105,19 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
98 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 105 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
99 e = (char *)ext_acl + sizeof(ext4_acl_header); 106 e = (char *)ext_acl + sizeof(ext4_acl_header);
100 for (n = 0; n < acl->a_count; n++) { 107 for (n = 0; n < acl->a_count; n++) {
108 const struct posix_acl_entry *acl_e = &acl->a_entries[n];
101 ext4_acl_entry *entry = (ext4_acl_entry *)e; 109 ext4_acl_entry *entry = (ext4_acl_entry *)e;
102 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 110 entry->e_tag = cpu_to_le16(acl_e->e_tag);
103 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 111 entry->e_perm = cpu_to_le16(acl_e->e_perm);
104 switch (acl->a_entries[n].e_tag) { 112 switch (acl_e->e_tag) {
105 case ACL_USER: 113 case ACL_USER:
114 entry->e_id = cpu_to_le32(
115 from_kuid(&init_user_ns, acl_e->e_uid));
116 e += sizeof(ext4_acl_entry);
117 break;
106 case ACL_GROUP: 118 case ACL_GROUP:
107 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); 119 entry->e_id = cpu_to_le32(
120 from_kgid(&init_user_ns, acl_e->e_gid));
108 e += sizeof(ext4_acl_entry); 121 e += sizeof(ext4_acl_entry);
109 break; 122 break;
110 123
@@ -374,7 +387,7 @@ ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
374 return PTR_ERR(acl); 387 return PTR_ERR(acl);
375 if (acl == NULL) 388 if (acl == NULL)
376 return -ENODATA; 389 return -ENODATA;
377 error = posix_acl_to_xattr(acl, buffer, size); 390 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
378 posix_acl_release(acl); 391 posix_acl_release(acl);
379 392
380 return error; 393 return error;
@@ -397,7 +410,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
397 return -EPERM; 410 return -EPERM;
398 411
399 if (value) { 412 if (value) {
400 acl = posix_acl_from_xattr(value, size); 413 acl = posix_acl_from_xattr(&init_user_ns, value, size);
401 if (IS_ERR(acl)) 414 if (IS_ERR(acl))
402 return PTR_ERR(acl); 415 return PTR_ERR(acl);
403 else if (acl) { 416 else if (acl) {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d23b31ca9d7a..1b5089067d01 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
280 return desc; 280 return desc;
281} 281}
282 282
283static int ext4_valid_block_bitmap(struct super_block *sb, 283/*
284 struct ext4_group_desc *desc, 284 * Return the block number which was discovered to be invalid, or 0 if
285 unsigned int block_group, 285 * the block bitmap is valid.
286 struct buffer_head *bh) 286 */
287static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
288 struct ext4_group_desc *desc,
289 unsigned int block_group,
290 struct buffer_head *bh)
287{ 291{
288 ext4_grpblk_t offset; 292 ext4_grpblk_t offset;
289 ext4_grpblk_t next_zero_bit; 293 ext4_grpblk_t next_zero_bit;
290 ext4_fsblk_t bitmap_blk; 294 ext4_fsblk_t blk;
291 ext4_fsblk_t group_first_block; 295 ext4_fsblk_t group_first_block;
292 296
293 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 297 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
@@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
297 * or it has to also read the block group where the bitmaps 301 * or it has to also read the block group where the bitmaps
298 * are located to verify they are set. 302 * are located to verify they are set.
299 */ 303 */
300 return 1; 304 return 0;
301 } 305 }
302 group_first_block = ext4_group_first_block_no(sb, block_group); 306 group_first_block = ext4_group_first_block_no(sb, block_group);
303 307
304 /* check whether block bitmap block number is set */ 308 /* check whether block bitmap block number is set */
305 bitmap_blk = ext4_block_bitmap(sb, desc); 309 blk = ext4_block_bitmap(sb, desc);
306 offset = bitmap_blk - group_first_block; 310 offset = blk - group_first_block;
307 if (!ext4_test_bit(offset, bh->b_data)) 311 if (!ext4_test_bit(offset, bh->b_data))
308 /* bad block bitmap */ 312 /* bad block bitmap */
309 goto err_out; 313 return blk;
310 314
311 /* check whether the inode bitmap block number is set */ 315 /* check whether the inode bitmap block number is set */
312 bitmap_blk = ext4_inode_bitmap(sb, desc); 316 blk = ext4_inode_bitmap(sb, desc);
313 offset = bitmap_blk - group_first_block; 317 offset = blk - group_first_block;
314 if (!ext4_test_bit(offset, bh->b_data)) 318 if (!ext4_test_bit(offset, bh->b_data))
315 /* bad block bitmap */ 319 /* bad block bitmap */
316 goto err_out; 320 return blk;
317 321
318 /* check whether the inode table block number is set */ 322 /* check whether the inode table block number is set */
319 bitmap_blk = ext4_inode_table(sb, desc); 323 blk = ext4_inode_table(sb, desc);
320 offset = bitmap_blk - group_first_block; 324 offset = blk - group_first_block;
321 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 325 next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
322 offset + EXT4_SB(sb)->s_itb_per_group, 326 offset + EXT4_SB(sb)->s_itb_per_group,
323 offset); 327 offset);
324 if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) 328 if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group)
325 /* good bitmap for inode tables */ 329 /* bad bitmap for inode tables */
326 return 1; 330 return blk;
327
328err_out:
329 ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
330 block_group, bitmap_blk);
331 return 0; 331 return 0;
332} 332}
333 333
@@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb,
336 unsigned int block_group, 336 unsigned int block_group,
337 struct buffer_head *bh) 337 struct buffer_head *bh)
338{ 338{
339 ext4_fsblk_t blk;
340
339 if (buffer_verified(bh)) 341 if (buffer_verified(bh))
340 return; 342 return;
341 343
342 ext4_lock_group(sb, block_group); 344 ext4_lock_group(sb, block_group);
343 if (ext4_valid_block_bitmap(sb, desc, block_group, bh) && 345 blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
344 ext4_block_bitmap_csum_verify(sb, block_group, desc, bh, 346 if (unlikely(blk != 0)) {
345 EXT4_BLOCKS_PER_GROUP(sb) / 8)) 347 ext4_unlock_group(sb, block_group);
346 set_buffer_verified(bh); 348 ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
349 block_group, blk);
350 return;
351 }
352 if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
353 desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
354 ext4_unlock_group(sb, block_group);
355 ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
356 return;
357 }
358 set_buffer_verified(bh);
347 ext4_unlock_group(sb, block_group); 359 ext4_unlock_group(sb, block_group);
348} 360}
349 361
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index f8716eab9995..5c2d1813ebe9 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -79,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
79 if (provided == calculated) 79 if (provided == calculated)
80 return 1; 80 return 1;
81 81
82 ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group);
83 return 0; 82 return 0;
84} 83}
85 84
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c3411d4ce2da..3ab2539b7b2e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -186,7 +186,6 @@ struct mpage_da_data {
186#define EXT4_IO_END_ERROR 0x0002 186#define EXT4_IO_END_ERROR 0x0002
187#define EXT4_IO_END_QUEUED 0x0004 187#define EXT4_IO_END_QUEUED 0x0004
188#define EXT4_IO_END_DIRECT 0x0008 188#define EXT4_IO_END_DIRECT 0x0008
189#define EXT4_IO_END_IN_FSYNC 0x0010
190 189
191struct ext4_io_page { 190struct ext4_io_page {
192 struct page *p_page; 191 struct page *p_page;
@@ -912,9 +911,7 @@ struct ext4_inode_info {
912 struct list_head i_completed_io_list; 911 struct list_head i_completed_io_list;
913 spinlock_t i_completed_io_lock; 912 spinlock_t i_completed_io_lock;
914 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 913 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
915 /* current io_end structure for async DIO write*/ 914 atomic_t i_unwritten; /* Nr. of inflight conversions pending */
916 ext4_io_end_t *cur_aio_dio;
917 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
918 915
919 spinlock_t i_block_reservation_lock; 916 spinlock_t i_block_reservation_lock;
920 917
@@ -1233,6 +1230,7 @@ struct ext4_sb_info {
1233 spinlock_t s_md_lock; 1230 spinlock_t s_md_lock;
1234 unsigned short *s_mb_offsets; 1231 unsigned short *s_mb_offsets;
1235 unsigned int *s_mb_maxs; 1232 unsigned int *s_mb_maxs;
1233 unsigned int s_group_info_size;
1236 1234
1237 /* tunables */ 1235 /* tunables */
1238 unsigned long s_stripe; 1236 unsigned long s_stripe;
@@ -1243,6 +1241,7 @@ struct ext4_sb_info {
1243 unsigned int s_mb_order2_reqs; 1241 unsigned int s_mb_order2_reqs;
1244 unsigned int s_mb_group_prealloc; 1242 unsigned int s_mb_group_prealloc;
1245 unsigned int s_max_writeback_mb_bump; 1243 unsigned int s_max_writeback_mb_bump;
1244 unsigned int s_max_dir_size_kb;
1246 /* where last allocation was done - for stream allocation */ 1245 /* where last allocation was done - for stream allocation */
1247 unsigned long s_mb_last_group; 1246 unsigned long s_mb_last_group;
1248 unsigned long s_mb_last_start; 1247 unsigned long s_mb_last_start;
@@ -1270,8 +1269,12 @@ struct ext4_sb_info {
1270 unsigned long s_sectors_written_start; 1269 unsigned long s_sectors_written_start;
1271 u64 s_kbytes_written; 1270 u64 s_kbytes_written;
1272 1271
1272 /* the size of zero-out chunk */
1273 unsigned int s_extent_max_zeroout_kb;
1274
1273 unsigned int s_log_groups_per_flex; 1275 unsigned int s_log_groups_per_flex;
1274 struct flex_groups *s_flex_groups; 1276 struct flex_groups *s_flex_groups;
1277 ext4_group_t s_flex_groups_allocated;
1275 1278
1276 /* workqueue for dio unwritten */ 1279 /* workqueue for dio unwritten */
1277 struct workqueue_struct *dio_unwritten_wq; 1280 struct workqueue_struct *dio_unwritten_wq;
@@ -1328,10 +1331,20 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1328{ 1331{
1329 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 1332 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1330 io_end->flag |= EXT4_IO_END_UNWRITTEN; 1333 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1331 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 1334 atomic_inc(&EXT4_I(inode)->i_unwritten);
1332 } 1335 }
1333} 1336}
1334 1337
1338static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
1339{
1340 return inode->i_private;
1341}
1342
1343static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
1344{
1345 inode->i_private = io;
1346}
1347
1335/* 1348/*
1336 * Inode dynamic state flags 1349 * Inode dynamic state flags
1337 */ 1350 */
@@ -1345,6 +1358,8 @@ enum {
1345 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1358 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1346 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1359 EXT4_STATE_NEWENTRY, /* File just added to dir */
1347 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ 1360 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1361 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
1362 nolocking */
1348}; 1363};
1349 1364
1350#define EXT4_INODE_BIT_FNS(name, field, offset) \ 1365#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1932 1947
1933/* fsync.c */ 1948/* fsync.c */
1934extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 1949extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1935extern int ext4_flush_completed_IO(struct inode *); 1950extern int ext4_flush_unwritten_io(struct inode *);
1936 1951
1937/* hash.c */ 1952/* hash.c */
1938extern int ext4fs_dirhash(const char *name, int len, struct 1953extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void);
1966extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1981extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1967 struct buffer_head *bh, ext4_fsblk_t block, 1982 struct buffer_head *bh, ext4_fsblk_t block,
1968 unsigned long count, int flags); 1983 unsigned long count, int flags);
1984extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
1985 ext4_group_t ngroups);
1969extern int ext4_mb_add_groupinfo(struct super_block *sb, 1986extern int ext4_mb_add_groupinfo(struct super_block *sb,
1970 ext4_group_t i, struct ext4_group_desc *desc); 1987 ext4_group_t i, struct ext4_group_desc *desc);
1971extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 1988extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
@@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb,
2051extern void *ext4_kvmalloc(size_t size, gfp_t flags); 2068extern void *ext4_kvmalloc(size_t size, gfp_t flags);
2052extern void *ext4_kvzalloc(size_t size, gfp_t flags); 2069extern void *ext4_kvzalloc(size_t size, gfp_t flags);
2053extern void ext4_kvfree(void *ptr); 2070extern void ext4_kvfree(void *ptr);
2071extern int ext4_alloc_flex_bg_array(struct super_block *sb,
2072 ext4_group_t ngroup);
2054extern __printf(4, 5) 2073extern __printf(4, 5)
2055void __ext4_error(struct super_block *, const char *, unsigned int, 2074void __ext4_error(struct super_block *, const char *, unsigned int,
2056 const char *, ...); 2075 const char *, ...);
@@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
2352extern const struct inode_operations ext4_file_inode_operations; 2371extern const struct inode_operations ext4_file_inode_operations;
2353extern const struct file_operations ext4_file_operations; 2372extern const struct file_operations ext4_file_operations;
2354extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2373extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2374extern void ext4_unwritten_wait(struct inode *inode);
2355 2375
2356/* namei.c */ 2376/* namei.c */
2357extern const struct inode_operations ext4_dir_inode_operations; 2377extern const struct inode_operations ext4_dir_inode_operations;
@@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2400 2420
2401/* page-io.c */ 2421/* page-io.c */
2402extern int __init ext4_init_pageio(void); 2422extern int __init ext4_init_pageio(void);
2423extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2403extern void ext4_exit_pageio(void); 2424extern void ext4_exit_pageio(void);
2404extern void ext4_ioend_wait(struct inode *); 2425extern void ext4_ioend_wait(struct inode *);
2405extern void ext4_free_io_end(ext4_io_end_t *io); 2426extern void ext4_free_io_end(ext4_io_end_t *io);
2406extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2427extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2407extern int ext4_end_io_nolock(ext4_io_end_t *io);
2408extern void ext4_io_submit(struct ext4_io_submit *io); 2428extern void ext4_io_submit(struct ext4_io_submit *io);
2409extern int ext4_bio_write_page(struct ext4_io_submit *io, 2429extern int ext4_bio_write_page(struct ext4_io_submit *io,
2410 struct page *page, 2430 struct page *page,
@@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2452 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); 2472 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
2453} 2473}
2454 2474
2475/*
2476 * Disable DIO read nolock optimization, so new dioreaders will be forced
2477 * to grab i_mutex
2478 */
2479static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
2480{
2481 ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2482 smp_mb();
2483}
2484static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
2485{
2486 smp_mb();
2487 ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2488}
2489
2455#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2490#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2456 2491
2457/* For ioend & aio unwritten conversion wait queues */ 2492/* For ioend & aio unwritten conversion wait queues */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd0c7ed06772..1c94cca35ed1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1179 1179
1180 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); 1180 le16_add_cpu(&neh->eh_depth, 1);
1181 ext4_mark_inode_dirty(handle, inode); 1181 ext4_mark_inode_dirty(handle, inode);
1182out: 1182out:
1183 brelse(bh); 1183 brelse(bh);
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1656} 1656}
1657 1657
1658/* 1658/*
1659 * This function does a very simple check to see if we can collapse
1660 * an extent tree with a single extent tree leaf block into the inode.
1661 */
1662static void ext4_ext_try_to_merge_up(handle_t *handle,
1663 struct inode *inode,
1664 struct ext4_ext_path *path)
1665{
1666 size_t s;
1667 unsigned max_root = ext4_ext_space_root(inode, 0);
1668 ext4_fsblk_t blk;
1669
1670 if ((path[0].p_depth != 1) ||
1671 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1672 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1673 return;
1674
1675 /*
1676 * We need to modify the block allocation bitmap and the block
1677 * group descriptor to release the extent tree block. If we
1678 * can't get the journal credits, give up.
1679 */
1680 if (ext4_journal_extend(handle, 2))
1681 return;
1682
1683 /*
1684 * Copy the extent data up to the inode
1685 */
1686 blk = ext4_idx_pblock(path[0].p_idx);
1687 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1688 sizeof(struct ext4_extent_idx);
1689 s += sizeof(struct ext4_extent_header);
1690
1691 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1692 path[0].p_depth = 0;
1693 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1694 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1695 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1696
1697 brelse(path[1].p_bh);
1698 ext4_free_blocks(handle, inode, NULL, blk, 1,
1699 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1700}
1701
1702/*
1659 * This function tries to merge the @ex extent to neighbours in the tree. 1703 * This function tries to merge the @ex extent to neighbours in the tree.
1660 * return 1 if merge left else 0. 1704 * return 1 if merge left else 0.
1661 */ 1705 */
1662static int ext4_ext_try_to_merge(struct inode *inode, 1706static void ext4_ext_try_to_merge(handle_t *handle,
1707 struct inode *inode,
1663 struct ext4_ext_path *path, 1708 struct ext4_ext_path *path,
1664 struct ext4_extent *ex) { 1709 struct ext4_extent *ex) {
1665 struct ext4_extent_header *eh; 1710 struct ext4_extent_header *eh;
1666 unsigned int depth; 1711 unsigned int depth;
1667 int merge_done = 0; 1712 int merge_done = 0;
1668 int ret = 0;
1669 1713
1670 depth = ext_depth(inode); 1714 depth = ext_depth(inode);
1671 BUG_ON(path[depth].p_hdr == NULL); 1715 BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1675 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1719 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1676 1720
1677 if (!merge_done) 1721 if (!merge_done)
1678 ret = ext4_ext_try_to_merge_right(inode, path, ex); 1722 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1679 1723
1680 return ret; 1724 ext4_ext_try_to_merge_up(handle, inode, path);
1681} 1725}
1682 1726
1683/* 1727/*
@@ -1893,7 +1937,7 @@ has_space:
1893merge: 1937merge:
1894 /* try to merge extents */ 1938 /* try to merge extents */
1895 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1939 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1896 ext4_ext_try_to_merge(inode, path, nearex); 1940 ext4_ext_try_to_merge(handle, inode, path, nearex);
1897 1941
1898 1942
1899 /* time to correct all indexes above */ 1943 /* time to correct all indexes above */
@@ -1901,7 +1945,7 @@ merge:
1901 if (err) 1945 if (err)
1902 goto cleanup; 1946 goto cleanup;
1903 1947
1904 err = ext4_ext_dirty(handle, inode, path + depth); 1948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
1905 1949
1906cleanup: 1950cleanup:
1907 if (npath) { 1951 if (npath) {
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2092} 2136}
2093 2137
2094/* 2138/*
2095 * ext4_ext_check_cache() 2139 * ext4_ext_in_cache()
2096 * Checks to see if the given block is in the cache. 2140 * Checks to see if the given block is in the cache.
2097 * If it is, the cached extent is stored in the given 2141 * If it is, the cached extent is stored in the given
2098 * cache extent pointer. If the cached extent is a hole, 2142 * cache extent pointer.
2099 * this routine should be used instead of
2100 * ext4_ext_in_cache if the calling function needs to
2101 * know the size of the hole.
2102 * 2143 *
2103 * @inode: The files inode 2144 * @inode: The files inode
2104 * @block: The block to look for in the cache 2145 * @block: The block to look for in the cache
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2107 * 2148 *
2108 * Return 0 if cache is invalid; 1 if the cache is valid 2149 * Return 0 if cache is invalid; 1 if the cache is valid
2109 */ 2150 */
2110static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, 2151static int
2111 struct ext4_ext_cache *ex){ 2152ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2153 struct ext4_extent *ex)
2154{
2112 struct ext4_ext_cache *cex; 2155 struct ext4_ext_cache *cex;
2113 struct ext4_sb_info *sbi; 2156 struct ext4_sb_info *sbi;
2114 int ret = 0; 2157 int ret = 0;
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2125 goto errout; 2168 goto errout;
2126 2169
2127 if (in_range(block, cex->ec_block, cex->ec_len)) { 2170 if (in_range(block, cex->ec_block, cex->ec_len)) {
2128 memcpy(ex, cex, sizeof(struct ext4_ext_cache)); 2171 ex->ee_block = cpu_to_le32(cex->ec_block);
2172 ext4_ext_store_pblock(ex, cex->ec_start);
2173 ex->ee_len = cpu_to_le16(cex->ec_len);
2129 ext_debug("%u cached by %u:%u:%llu\n", 2174 ext_debug("%u cached by %u:%u:%llu\n",
2130 block, 2175 block,
2131 cex->ec_block, cex->ec_len, cex->ec_start); 2176 cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2138,37 +2183,6 @@ errout:
2138} 2183}
2139 2184
2140/* 2185/*
2141 * ext4_ext_in_cache()
2142 * Checks to see if the given block is in the cache.
2143 * If it is, the cached extent is stored in the given
2144 * extent pointer.
2145 *
2146 * @inode: The files inode
2147 * @block: The block to look for in the cache
2148 * @ex: Pointer where the cached extent will be stored
2149 * if it contains block
2150 *
2151 * Return 0 if cache is invalid; 1 if the cache is valid
2152 */
2153static int
2154ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2155 struct ext4_extent *ex)
2156{
2157 struct ext4_ext_cache cex;
2158 int ret = 0;
2159
2160 if (ext4_ext_check_cache(inode, block, &cex)) {
2161 ex->ee_block = cpu_to_le32(cex.ec_block);
2162 ext4_ext_store_pblock(ex, cex.ec_start);
2163 ex->ee_len = cpu_to_le16(cex.ec_len);
2164 ret = 1;
2165 }
2166
2167 return ret;
2168}
2169
2170
2171/*
2172 * ext4_ext_rm_idx: 2186 * ext4_ext_rm_idx:
2173 * removes index from the index block. 2187 * removes index from the index block.
2174 */ 2188 */
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2274 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2288 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2275 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2289 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2276 ext4_fsblk_t pblk; 2290 ext4_fsblk_t pblk;
2277 int flags = EXT4_FREE_BLOCKS_FORGET; 2291 int flags = 0;
2278 2292
2279 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2293 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2280 flags |= EXT4_FREE_BLOCKS_METADATA; 2294 flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2295 else if (ext4_should_journal_data(inode))
2296 flags |= EXT4_FREE_BLOCKS_FORGET;
2297
2281 /* 2298 /*
2282 * For bigalloc file systems, we never free a partial cluster 2299 * For bigalloc file systems, we never free a partial cluster
2283 * at the beginning of the extent. Instead, we make a note 2300 * at the beginning of the extent. Instead, we make a note
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2572 struct ext4_ext_path *path = NULL; 2589 struct ext4_ext_path *path = NULL;
2573 ext4_fsblk_t partial_cluster = 0; 2590 ext4_fsblk_t partial_cluster = 0;
2574 handle_t *handle; 2591 handle_t *handle;
2575 int i = 0, err; 2592 int i = 0, err = 0;
2576 2593
2577 ext_debug("truncate since %u to %u\n", start, end); 2594 ext_debug("truncate since %u to %u\n", start, end);
2578 2595
@@ -2604,12 +2621,16 @@ again:
2604 return PTR_ERR(path); 2621 return PTR_ERR(path);
2605 } 2622 }
2606 depth = ext_depth(inode); 2623 depth = ext_depth(inode);
2624 /* Leaf not may not exist only if inode has no blocks at all */
2607 ex = path[depth].p_ext; 2625 ex = path[depth].p_ext;
2608 if (!ex) { 2626 if (!ex) {
2609 ext4_ext_drop_refs(path); 2627 if (depth) {
2610 kfree(path); 2628 EXT4_ERROR_INODE(inode,
2611 path = NULL; 2629 "path[%d].p_hdr == NULL",
2612 goto cont; 2630 depth);
2631 err = -EIO;
2632 }
2633 goto out;
2613 } 2634 }
2614 2635
2615 ee_block = le32_to_cpu(ex->ee_block); 2636 ee_block = le32_to_cpu(ex->ee_block);
@@ -2641,8 +2662,6 @@ again:
2641 goto out; 2662 goto out;
2642 } 2663 }
2643 } 2664 }
2644cont:
2645
2646 /* 2665 /*
2647 * We start scanning from right side, freeing all the blocks 2666 * We start scanning from right side, freeing all the blocks
2648 * after i_size and walking into the tree depth-wise. 2667 * after i_size and walking into the tree depth-wise.
@@ -2662,6 +2681,7 @@ cont:
2662 } 2681 }
2663 path[0].p_depth = depth; 2682 path[0].p_depth = depth;
2664 path[0].p_hdr = ext_inode_hdr(inode); 2683 path[0].p_hdr = ext_inode_hdr(inode);
2684 i = 0;
2665 2685
2666 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2686 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2667 err = -EIO; 2687 err = -EIO;
@@ -2923,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
2923 ext4_ext_mark_initialized(ex); 2943 ext4_ext_mark_initialized(ex);
2924 2944
2925 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2945 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2926 ext4_ext_try_to_merge(inode, path, ex); 2946 ext4_ext_try_to_merge(handle, inode, path, ex);
2927 2947
2928 err = ext4_ext_dirty(handle, inode, path + depth); 2948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2929 goto out; 2949 goto out;
2930 } 2950 }
2931 2951
@@ -2957,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
2957 goto fix_extent_len; 2977 goto fix_extent_len;
2958 /* update the extent length and mark as initialized */ 2978 /* update the extent length and mark as initialized */
2959 ex->ee_len = cpu_to_le16(ee_len); 2979 ex->ee_len = cpu_to_le16(ee_len);
2960 ext4_ext_try_to_merge(inode, path, ex); 2980 ext4_ext_try_to_merge(handle, inode, path, ex);
2961 err = ext4_ext_dirty(handle, inode, path + depth); 2981 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2962 goto out; 2982 goto out;
2963 } else if (err) 2983 } else if (err)
2964 goto fix_extent_len; 2984 goto fix_extent_len;
@@ -3040,7 +3060,6 @@ out:
3040 return err ? err : map->m_len; 3060 return err ? err : map->m_len;
3041} 3061}
3042 3062
3043#define EXT4_EXT_ZERO_LEN 7
3044/* 3063/*
3045 * This function is called by ext4_ext_map_blocks() if someone tries to write 3064 * This function is called by ext4_ext_map_blocks() if someone tries to write
3046 * to an uninitialized extent. It may result in splitting the uninitialized 3065 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3066,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3066 struct ext4_map_blocks *map, 3085 struct ext4_map_blocks *map,
3067 struct ext4_ext_path *path) 3086 struct ext4_ext_path *path)
3068{ 3087{
3088 struct ext4_sb_info *sbi;
3069 struct ext4_extent_header *eh; 3089 struct ext4_extent_header *eh;
3070 struct ext4_map_blocks split_map; 3090 struct ext4_map_blocks split_map;
3071 struct ext4_extent zero_ex; 3091 struct ext4_extent zero_ex;
3072 struct ext4_extent *ex; 3092 struct ext4_extent *ex;
3073 ext4_lblk_t ee_block, eof_block; 3093 ext4_lblk_t ee_block, eof_block;
3074 unsigned int ee_len, depth; 3094 unsigned int ee_len, depth;
3075 int allocated; 3095 int allocated, max_zeroout = 0;
3076 int err = 0; 3096 int err = 0;
3077 int split_flag = 0; 3097 int split_flag = 0;
3078 3098
@@ -3080,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3080 "block %llu, max_blocks %u\n", inode->i_ino, 3100 "block %llu, max_blocks %u\n", inode->i_ino,
3081 (unsigned long long)map->m_lblk, map->m_len); 3101 (unsigned long long)map->m_lblk, map->m_len);
3082 3102
3103 sbi = EXT4_SB(inode->i_sb);
3083 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3104 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3084 inode->i_sb->s_blocksize_bits; 3105 inode->i_sb->s_blocksize_bits;
3085 if (eof_block < map->m_lblk + map->m_len) 3106 if (eof_block < map->m_lblk + map->m_len)
@@ -3179,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3179 */ 3200 */
3180 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3201 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3181 3202
3182 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 3203 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3183 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 3204 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3184 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3205 inode->i_sb->s_blocksize_bits;
3206
3207 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3208 if (max_zeroout && (ee_len <= max_zeroout)) {
3185 err = ext4_ext_zeroout(inode, ex); 3209 err = ext4_ext_zeroout(inode, ex);
3186 if (err) 3210 if (err)
3187 goto out; 3211 goto out;
@@ -3190,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3190 if (err) 3214 if (err)
3191 goto out; 3215 goto out;
3192 ext4_ext_mark_initialized(ex); 3216 ext4_ext_mark_initialized(ex);
3193 ext4_ext_try_to_merge(inode, path, ex); 3217 ext4_ext_try_to_merge(handle, inode, path, ex);
3194 err = ext4_ext_dirty(handle, inode, path + depth); 3218 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3195 goto out; 3219 goto out;
3196 } 3220 }
3197 3221
@@ -3205,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3205 split_map.m_lblk = map->m_lblk; 3229 split_map.m_lblk = map->m_lblk;
3206 split_map.m_len = map->m_len; 3230 split_map.m_len = map->m_len;
3207 3231
3208 if (allocated > map->m_len) { 3232 if (max_zeroout && (allocated > map->m_len)) {
3209 if (allocated <= EXT4_EXT_ZERO_LEN && 3233 if (allocated <= max_zeroout) {
3210 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3211 /* case 3 */ 3234 /* case 3 */
3212 zero_ex.ee_block = 3235 zero_ex.ee_block =
3213 cpu_to_le32(map->m_lblk); 3236 cpu_to_le32(map->m_lblk);
@@ -3219,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3219 goto out; 3242 goto out;
3220 split_map.m_lblk = map->m_lblk; 3243 split_map.m_lblk = map->m_lblk;
3221 split_map.m_len = allocated; 3244 split_map.m_len = allocated;
3222 } else if ((map->m_lblk - ee_block + map->m_len < 3245 } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
3223 EXT4_EXT_ZERO_LEN) &&
3224 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3225 /* case 2 */ 3246 /* case 2 */
3226 if (map->m_lblk != ee_block) { 3247 if (map->m_lblk != ee_block) {
3227 zero_ex.ee_block = ex->ee_block; 3248 zero_ex.ee_block = ex->ee_block;
@@ -3241,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3241 } 3262 }
3242 3263
3243 allocated = ext4_split_extent(handle, inode, path, 3264 allocated = ext4_split_extent(handle, inode, path,
3244 &split_map, split_flag, 0); 3265 &split_map, split_flag, 0);
3245 if (allocated < 0) 3266 if (allocated < 0)
3246 err = allocated; 3267 err = allocated;
3247 3268
@@ -3255,7 +3276,7 @@ out:
3255 * to an uninitialized extent. 3276 * to an uninitialized extent.
3256 * 3277 *
3257 * Writing to an uninitialized extent may result in splitting the uninitialized 3278 * Writing to an uninitialized extent may result in splitting the uninitialized
3258 * extent into multiple /initialized uninitialized extents (up to three) 3279 * extent into multiple initialized/uninitialized extents (up to three)
3259 * There are three possibilities: 3280 * There are three possibilities:
3260 * a> There is no split required: Entire extent should be uninitialized 3281 * a> There is no split required: Entire extent should be uninitialized
3261 * b> Splits in two extents: Write is happening at either end of the extent 3282 * b> Splits in two extents: Write is happening at either end of the extent
@@ -3332,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3332 /* note: ext4_ext_correct_indexes() isn't needed here because 3353 /* note: ext4_ext_correct_indexes() isn't needed here because
3333 * borders are not changed 3354 * borders are not changed
3334 */ 3355 */
3335 ext4_ext_try_to_merge(inode, path, ex); 3356 ext4_ext_try_to_merge(handle, inode, path, ex);
3336 3357
3337 /* Mark modified extent as dirty */ 3358 /* Mark modified extent as dirty */
3338 err = ext4_ext_dirty(handle, inode, path + depth); 3359 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3339out: 3360out:
3340 ext4_ext_show_leaf(inode, path); 3361 ext4_ext_show_leaf(inode, path);
3341 return err; 3362 return err;
@@ -3599,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3599{ 3620{
3600 int ret = 0; 3621 int ret = 0;
3601 int err = 0; 3622 int err = 0;
3602 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3623 ext4_io_end_t *io = ext4_inode_aio(inode);
3603 3624
3604 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 3625 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3605 "block %llu, max_blocks %u, flags %x, allocated %u\n", 3626 "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3614,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3614 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3635 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3615 ret = ext4_split_unwritten_extents(handle, inode, map, 3636 ret = ext4_split_unwritten_extents(handle, inode, map,
3616 path, flags); 3637 path, flags);
3638 if (ret <= 0)
3639 goto out;
3617 /* 3640 /*
3618 * Flag the inode(non aio case) or end_io struct (aio case) 3641 * Flag the inode(non aio case) or end_io struct (aio case)
3619 * that this IO needs to conversion to written when IO is 3642 * that this IO needs to conversion to written when IO is
@@ -3857,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3857 unsigned int allocated = 0, offset = 0; 3880 unsigned int allocated = 0, offset = 0;
3858 unsigned int allocated_clusters = 0; 3881 unsigned int allocated_clusters = 0;
3859 struct ext4_allocation_request ar; 3882 struct ext4_allocation_request ar;
3860 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3883 ext4_io_end_t *io = ext4_inode_aio(inode);
3861 ext4_lblk_t cluster_offset; 3884 ext4_lblk_t cluster_offset;
3885 int set_unwritten = 0;
3862 3886
3863 ext_debug("blocks %u/%u requested for inode %lu\n", 3887 ext_debug("blocks %u/%u requested for inode %lu\n",
3864 map->m_lblk, map->m_len, inode->i_ino); 3888 map->m_lblk, map->m_len, inode->i_ino);
@@ -4081,13 +4105,8 @@ got_allocated_blocks:
4081 * For non asycn direct IO case, flag the inode state 4105 * For non asycn direct IO case, flag the inode state
4082 * that we need to perform conversion when IO is done. 4106 * that we need to perform conversion when IO is done.
4083 */ 4107 */
4084 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4108 if ((flags & EXT4_GET_BLOCKS_PRE_IO))
4085 if (io) 4109 set_unwritten = 1;
4086 ext4_set_io_unwritten_flag(inode, io);
4087 else
4088 ext4_set_inode_state(inode,
4089 EXT4_STATE_DIO_UNWRITTEN);
4090 }
4091 if (ext4_should_dioread_nolock(inode)) 4110 if (ext4_should_dioread_nolock(inode))
4092 map->m_flags |= EXT4_MAP_UNINIT; 4111 map->m_flags |= EXT4_MAP_UNINIT;
4093 } 4112 }
@@ -4099,6 +4118,15 @@ got_allocated_blocks:
4099 if (!err) 4118 if (!err)
4100 err = ext4_ext_insert_extent(handle, inode, path, 4119 err = ext4_ext_insert_extent(handle, inode, path,
4101 &newex, flags); 4120 &newex, flags);
4121
4122 if (!err && set_unwritten) {
4123 if (io)
4124 ext4_set_io_unwritten_flag(inode, io);
4125 else
4126 ext4_set_inode_state(inode,
4127 EXT4_STATE_DIO_UNWRITTEN);
4128 }
4129
4102 if (err && free_on_err) { 4130 if (err && free_on_err) {
4103 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4131 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
4104 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4132 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4240,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
4240 * finish any pending end_io work so we won't run the risk of 4268 * finish any pending end_io work so we won't run the risk of
4241 * converting any truncated blocks to initialized later 4269 * converting any truncated blocks to initialized later
4242 */ 4270 */
4243 ext4_flush_completed_IO(inode); 4271 ext4_flush_unwritten_io(inode);
4244 4272
4245 /* 4273 /*
4246 * probably first extent we're gonna free will be last in block 4274 * probably first extent we're gonna free will be last in block
@@ -4768,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4768 loff_t first_page_offset, last_page_offset; 4796 loff_t first_page_offset, last_page_offset;
4769 int credits, err = 0; 4797 int credits, err = 0;
4770 4798
4799 /*
4800 * Write out all dirty pages to avoid race conditions
4801 * Then release them.
4802 */
4803 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4804 err = filemap_write_and_wait_range(mapping,
4805 offset, offset + length - 1);
4806
4807 if (err)
4808 return err;
4809 }
4810
4811 mutex_lock(&inode->i_mutex);
4812 /* It's not possible punch hole on append only file */
4813 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4814 err = -EPERM;
4815 goto out_mutex;
4816 }
4817 if (IS_SWAPFILE(inode)) {
4818 err = -ETXTBSY;
4819 goto out_mutex;
4820 }
4821
4771 /* No need to punch hole beyond i_size */ 4822 /* No need to punch hole beyond i_size */
4772 if (offset >= inode->i_size) 4823 if (offset >= inode->i_size)
4773 return 0; 4824 goto out_mutex;
4774 4825
4775 /* 4826 /*
4776 * If the hole extends beyond i_size, set the hole 4827 * If the hole extends beyond i_size, set the hole
@@ -4788,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4788 first_page_offset = first_page << PAGE_CACHE_SHIFT; 4839 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4789 last_page_offset = last_page << PAGE_CACHE_SHIFT; 4840 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4790 4841
4791 /*
4792 * Write out all dirty pages to avoid race conditions
4793 * Then release them.
4794 */
4795 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4796 err = filemap_write_and_wait_range(mapping,
4797 offset, offset + length - 1);
4798
4799 if (err)
4800 return err;
4801 }
4802
4803 /* Now release the pages */ 4842 /* Now release the pages */
4804 if (last_page_offset > first_page_offset) { 4843 if (last_page_offset > first_page_offset) {
4805 truncate_pagecache_range(inode, first_page_offset, 4844 truncate_pagecache_range(inode, first_page_offset,
4806 last_page_offset - 1); 4845 last_page_offset - 1);
4807 } 4846 }
4808 4847
4809 /* finish any pending end_io work */ 4848 /* Wait all existing dio workers, newcomers will block on i_mutex */
4810 ext4_flush_completed_IO(inode); 4849 ext4_inode_block_unlocked_dio(inode);
4850 err = ext4_flush_unwritten_io(inode);
4851 if (err)
4852 goto out_dio;
4853 inode_dio_wait(inode);
4811 4854
4812 credits = ext4_writepage_trans_blocks(inode); 4855 credits = ext4_writepage_trans_blocks(inode);
4813 handle = ext4_journal_start(inode, credits); 4856 handle = ext4_journal_start(inode, credits);
4814 if (IS_ERR(handle)) 4857 if (IS_ERR(handle)) {
4815 return PTR_ERR(handle); 4858 err = PTR_ERR(handle);
4859 goto out_dio;
4860 }
4816 4861
4817 err = ext4_orphan_add(handle, inode);
4818 if (err)
4819 goto out;
4820 4862
4821 /* 4863 /*
4822 * Now we need to zero out the non-page-aligned data in the 4864 * Now we need to zero out the non-page-aligned data in the
@@ -4902,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4902 up_write(&EXT4_I(inode)->i_data_sem); 4944 up_write(&EXT4_I(inode)->i_data_sem);
4903 4945
4904out: 4946out:
4905 ext4_orphan_del(handle, inode);
4906 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4947 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4907 ext4_mark_inode_dirty(handle, inode); 4948 ext4_mark_inode_dirty(handle, inode);
4908 ext4_journal_stop(handle); 4949 ext4_journal_stop(handle);
4950out_dio:
4951 ext4_inode_resume_unlocked_dio(inode);
4952out_mutex:
4953 mutex_unlock(&inode->i_mutex);
4909 return err; 4954 return err;
4910} 4955}
4911int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4956int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3b0e3bdaabfc..bf3966bccd34 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode) 58void ext4_unwritten_wait(struct inode *inode)
59{ 59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode); 60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61 61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); 62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
63} 63}
64 64
65/* 65/*
@@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
116 "performance will be poor.", 116 "performance will be poor.",
117 inode->i_ino, current->comm); 117 inode->i_ino, current->comm);
118 mutex_lock(ext4_aio_mutex(inode)); 118 mutex_lock(ext4_aio_mutex(inode));
119 ext4_aiodio_wait(inode); 119 ext4_unwritten_wait(inode);
120 } 120 }
121 121
122 BUG_ON(iocb->ki_pos != pos); 122 BUG_ON(iocb->ki_pos != pos);
@@ -207,6 +207,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
207static const struct vm_operations_struct ext4_file_vm_ops = { 207static const struct vm_operations_struct ext4_file_vm_ops = {
208 .fault = filemap_fault, 208 .fault = filemap_fault,
209 .page_mkwrite = ext4_page_mkwrite, 209 .page_mkwrite = ext4_page_mkwrite,
210 .remap_pages = generic_file_remap_pages,
210}; 211};
211 212
212static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 213static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -217,7 +218,6 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
217 return -ENOEXEC; 218 return -ENOEXEC;
218 file_accessed(file); 219 file_accessed(file);
219 vma->vm_ops = &ext4_file_vm_ops; 220 vma->vm_ops = &ext4_file_vm_ops;
220 vma->vm_flags |= VM_CAN_NONLINEAR;
221 return 0; 221 return 0;
222} 222}
223 223
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a1dcea4f12e..be1d89f385b4 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,87 +34,6 @@
34 34
35#include <trace/events/ext4.h> 35#include <trace/events/ext4.h>
36 36
37static void dump_completed_IO(struct inode * inode)
38{
39#ifdef EXT4FS_DEBUG
40 struct list_head *cur, *before, *after;
41 ext4_io_end_t *io, *io0, *io1;
42 unsigned long flags;
43
44 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
45 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
46 return;
47 }
48
49 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
50 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
51 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
52 cur = &io->list;
53 before = cur->prev;
54 io0 = container_of(before, ext4_io_end_t, list);
55 after = cur->next;
56 io1 = container_of(after, ext4_io_end_t, list);
57
58 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
59 io, inode->i_ino, io0, io1);
60 }
61 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
62#endif
63}
64
65/*
66 * This function is called from ext4_sync_file().
67 *
68 * When IO is completed, the work to convert unwritten extents to
69 * written is queued on workqueue but may not get immediately
70 * scheduled. When fsync is called, we need to ensure the
71 * conversion is complete before fsync returns.
72 * The inode keeps track of a list of pending/completed IO that
73 * might needs to do the conversion. This function walks through
74 * the list and convert the related unwritten extents for completed IO
75 * to written.
76 * The function return the number of pending IOs on success.
77 */
78int ext4_flush_completed_IO(struct inode *inode)
79{
80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode);
82 unsigned long flags;
83 int ret = 0;
84 int ret2 = 0;
85
86 dump_completed_IO(inode);
87 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
88 while (!list_empty(&ei->i_completed_io_list)){
89 io = list_entry(ei->i_completed_io_list.next,
90 ext4_io_end_t, list);
91 list_del_init(&io->list);
92 io->flag |= EXT4_IO_END_IN_FSYNC;
93 /*
94 * Calling ext4_end_io_nolock() to convert completed
95 * IO to written.
96 *
97 * When ext4_sync_file() is called, run_queue() may already
98 * about to flush the work corresponding to this io structure.
99 * It will be upset if it founds the io structure related
100 * to the work-to-be schedule is freed.
101 *
102 * Thus we need to keep the io structure still valid here after
103 * conversion finished. The io structure has a flag to
104 * avoid double converting from both fsync and background work
105 * queue work.
106 */
107 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
108 ret = ext4_end_io_nolock(io);
109 if (ret < 0)
110 ret2 = ret;
111 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
112 io->flag &= ~EXT4_IO_END_IN_FSYNC;
113 }
114 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
115 return (ret2 < 0) ? ret2 : 0;
116}
117
118/* 37/*
119 * If we're not journaling and this is a just-created file, we have to 38 * If we're not journaling and this is a just-created file, we have to
120 * sync our parent directory (if it was freshly created) since 39 * sync our parent directory (if it was freshly created) since
@@ -203,7 +122,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
203 struct inode *inode = file->f_mapping->host; 122 struct inode *inode = file->f_mapping->host;
204 struct ext4_inode_info *ei = EXT4_I(inode); 123 struct ext4_inode_info *ei = EXT4_I(inode);
205 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 124 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
206 int ret; 125 int ret, err;
207 tid_t commit_tid; 126 tid_t commit_tid;
208 bool needs_barrier = false; 127 bool needs_barrier = false;
209 128
@@ -219,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
219 if (inode->i_sb->s_flags & MS_RDONLY) 138 if (inode->i_sb->s_flags & MS_RDONLY)
220 goto out; 139 goto out;
221 140
222 ret = ext4_flush_completed_IO(inode); 141 ret = ext4_flush_unwritten_io(inode);
223 if (ret < 0) 142 if (ret < 0)
224 goto out; 143 goto out;
225 144
@@ -255,8 +174,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
255 needs_barrier = true; 174 needs_barrier = true;
256 jbd2_log_start_commit(journal, commit_tid); 175 jbd2_log_start_commit(journal, commit_tid);
257 ret = jbd2_log_wait_commit(journal, commit_tid); 176 ret = jbd2_log_wait_commit(journal, commit_tid);
258 if (needs_barrier) 177 if (needs_barrier) {
259 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 178 err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
179 if (!ret)
180 ret = err;
181 }
260 out: 182 out:
261 mutex_unlock(&inode->i_mutex); 183 mutex_unlock(&inode->i_mutex);
262 trace_ext4_sync_file_exit(inode, ret); 184 trace_ext4_sync_file_exit(inode, ret);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 26154b81b836..fa36372f3fdf 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -697,6 +697,15 @@ got_group:
697 if (!gdp) 697 if (!gdp)
698 goto fail; 698 goto fail;
699 699
700 /*
701 * Check free inodes count before loading bitmap.
702 */
703 if (ext4_free_inodes_count(sb, gdp) == 0) {
704 if (++group == ngroups)
705 group = 0;
706 continue;
707 }
708
700 brelse(inode_bitmap_bh); 709 brelse(inode_bitmap_bh);
701 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 710 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
702 if (!inode_bitmap_bh) 711 if (!inode_bitmap_bh)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 830e1b2bf145..792e388e7b44 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -807,16 +807,30 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
807 807
808retry: 808retry:
809 if (rw == READ && ext4_should_dioread_nolock(inode)) { 809 if (rw == READ && ext4_should_dioread_nolock(inode)) {
810 if (unlikely(!list_empty(&ei->i_completed_io_list))) { 810 if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) {
811 mutex_lock(&inode->i_mutex); 811 mutex_lock(&inode->i_mutex);
812 ext4_flush_completed_IO(inode); 812 ext4_flush_unwritten_io(inode);
813 mutex_unlock(&inode->i_mutex); 813 mutex_unlock(&inode->i_mutex);
814 } 814 }
815 /*
816 * Nolock dioread optimization may be dynamically disabled
817 * via ext4_inode_block_unlocked_dio(). Check inode's state
818 * while holding extra i_dio_count ref.
819 */
820 atomic_inc(&inode->i_dio_count);
821 smp_mb();
822 if (unlikely(ext4_test_inode_state(inode,
823 EXT4_STATE_DIOREAD_LOCK))) {
824 inode_dio_done(inode);
825 goto locked;
826 }
815 ret = __blockdev_direct_IO(rw, iocb, inode, 827 ret = __blockdev_direct_IO(rw, iocb, inode,
816 inode->i_sb->s_bdev, iov, 828 inode->i_sb->s_bdev, iov,
817 offset, nr_segs, 829 offset, nr_segs,
818 ext4_get_block, NULL, NULL, 0); 830 ext4_get_block, NULL, NULL, 0);
831 inode_dio_done(inode);
819 } else { 832 } else {
833locked:
820 ret = blockdev_direct_IO(rw, iocb, inode, iov, 834 ret = blockdev_direct_IO(rw, iocb, inode, iov,
821 offset, nr_segs, ext4_get_block); 835 offset, nr_segs, ext4_get_block);
822 836
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dff171c3a123..b3c243b9afa5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
732 err = ext4_map_blocks(handle, inode, &map, 732 err = ext4_map_blocks(handle, inode, &map,
733 create ? EXT4_GET_BLOCKS_CREATE : 0); 733 create ? EXT4_GET_BLOCKS_CREATE : 0);
734 734
735 /* ensure we send some value back into *errp */
736 *errp = 0;
737
735 if (err < 0) 738 if (err < 0)
736 *errp = err; 739 *errp = err;
737 if (err <= 0) 740 if (err <= 0)
738 return NULL; 741 return NULL;
739 *errp = 0;
740 742
741 bh = sb_getblk(inode->i_sb, map.m_pblk); 743 bh = sb_getblk(inode->i_sb, map.m_pblk);
742 if (!bh) { 744 if (!bh) {
@@ -1954,9 +1956,6 @@ out:
1954 return ret; 1956 return ret;
1955} 1957}
1956 1958
1957static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
1958static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
1959
1960/* 1959/*
1961 * Note that we don't need to start a transaction unless we're journaling data 1960 * Note that we don't need to start a transaction unless we're journaling data
1962 * because we should have holes filled from ext4_page_mkwrite(). We even don't 1961 * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2463,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb)
2463 free_blocks = EXT4_C2B(sbi, 2462 free_blocks = EXT4_C2B(sbi,
2464 percpu_counter_read_positive(&sbi->s_freeclusters_counter)); 2463 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2465 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); 2464 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2465 /*
2466 * Start pushing delalloc when 1/2 of free blocks are dirty.
2467 */
2468 if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
2469 !writeback_in_progress(sb->s_bdi) &&
2470 down_read_trylock(&sb->s_umount)) {
2471 writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2472 up_read(&sb->s_umount);
2473 }
2474
2466 if (2 * free_blocks < 3 * dirty_blocks || 2475 if (2 * free_blocks < 3 * dirty_blocks ||
2467 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { 2476 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2468 /* 2477 /*
@@ -2471,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb)
2471 */ 2480 */
2472 return 1; 2481 return 1;
2473 } 2482 }
2474 /*
2475 * Even if we don't switch but are nearing capacity,
2476 * start pushing delalloc when 1/2 of free blocks are dirty.
2477 */
2478 if (free_blocks < 2 * dirty_blocks)
2479 writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
2480
2481 return 0; 2483 return 0;
2482} 2484}
2483 2485
@@ -2879,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2879{ 2881{
2880 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 2882 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2881 ext4_io_end_t *io_end = iocb->private; 2883 ext4_io_end_t *io_end = iocb->private;
2882 struct workqueue_struct *wq;
2883 unsigned long flags;
2884 struct ext4_inode_info *ei;
2885 2884
2886 /* if not async direct IO or dio with 0 bytes write, just return */ 2885 /* if not async direct IO or dio with 0 bytes write, just return */
2887 if (!io_end || !size) 2886 if (!io_end || !size)
@@ -2910,24 +2909,14 @@ out:
2910 io_end->iocb = iocb; 2909 io_end->iocb = iocb;
2911 io_end->result = ret; 2910 io_end->result = ret;
2912 } 2911 }
2913 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
2914 2912
2915 /* Add the io_end to per-inode completed aio dio list*/ 2913 ext4_add_complete_io(io_end);
2916 ei = EXT4_I(io_end->inode);
2917 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
2918 list_add_tail(&io_end->list, &ei->i_completed_io_list);
2919 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
2920
2921 /* queue the work to convert unwritten extents to written */
2922 queue_work(wq, &io_end->work);
2923} 2914}
2924 2915
2925static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) 2916static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2926{ 2917{
2927 ext4_io_end_t *io_end = bh->b_private; 2918 ext4_io_end_t *io_end = bh->b_private;
2928 struct workqueue_struct *wq;
2929 struct inode *inode; 2919 struct inode *inode;
2930 unsigned long flags;
2931 2920
2932 if (!test_clear_buffer_uninit(bh) || !io_end) 2921 if (!test_clear_buffer_uninit(bh) || !io_end)
2933 goto out; 2922 goto out;
@@ -2946,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2946 */ 2935 */
2947 inode = io_end->inode; 2936 inode = io_end->inode;
2948 ext4_set_io_unwritten_flag(inode, io_end); 2937 ext4_set_io_unwritten_flag(inode, io_end);
2949 2938 ext4_add_complete_io(io_end);
2950 /* Add the io_end to per-inode completed io list*/
2951 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
2952 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
2953 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
2954
2955 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
2956 /* queue the work to convert unwritten extents to written */
2957 queue_work(wq, &io_end->work);
2958out: 2939out:
2959 bh->b_private = NULL; 2940 bh->b_private = NULL;
2960 bh->b_end_io = NULL; 2941 bh->b_end_io = NULL;
@@ -3029,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3029 overwrite = *((int *)iocb->private); 3010 overwrite = *((int *)iocb->private);
3030 3011
3031 if (overwrite) { 3012 if (overwrite) {
3013 atomic_inc(&inode->i_dio_count);
3032 down_read(&EXT4_I(inode)->i_data_sem); 3014 down_read(&EXT4_I(inode)->i_data_sem);
3033 mutex_unlock(&inode->i_mutex); 3015 mutex_unlock(&inode->i_mutex);
3034 } 3016 }
@@ -3054,7 +3036,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3054 * hook to the iocb. 3036 * hook to the iocb.
3055 */ 3037 */
3056 iocb->private = NULL; 3038 iocb->private = NULL;
3057 EXT4_I(inode)->cur_aio_dio = NULL; 3039 ext4_inode_aio_set(inode, NULL);
3058 if (!is_sync_kiocb(iocb)) { 3040 if (!is_sync_kiocb(iocb)) {
3059 ext4_io_end_t *io_end = 3041 ext4_io_end_t *io_end =
3060 ext4_init_io_end(inode, GFP_NOFS); 3042 ext4_init_io_end(inode, GFP_NOFS);
@@ -3071,7 +3053,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3071 * is a unwritten extents needs to be converted 3053 * is a unwritten extents needs to be converted
3072 * when IO is completed. 3054 * when IO is completed.
3073 */ 3055 */
3074 EXT4_I(inode)->cur_aio_dio = iocb->private; 3056 ext4_inode_aio_set(inode, io_end);
3075 } 3057 }
3076 3058
3077 if (overwrite) 3059 if (overwrite)
@@ -3091,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3091 NULL, 3073 NULL,
3092 DIO_LOCKING); 3074 DIO_LOCKING);
3093 if (iocb->private) 3075 if (iocb->private)
3094 EXT4_I(inode)->cur_aio_dio = NULL; 3076 ext4_inode_aio_set(inode, NULL);
3095 /* 3077 /*
3096 * The io_end structure takes a reference to the inode, 3078 * The io_end structure takes a reference to the inode,
3097 * that structure needs to be destroyed and the 3079 * that structure needs to be destroyed and the
@@ -3126,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3126 retake_lock: 3108 retake_lock:
3127 /* take i_mutex locking again if we do a ovewrite dio */ 3109 /* take i_mutex locking again if we do a ovewrite dio */
3128 if (overwrite) { 3110 if (overwrite) {
3111 inode_dio_done(inode);
3129 up_read(&EXT4_I(inode)->i_data_sem); 3112 up_read(&EXT4_I(inode)->i_data_sem);
3130 mutex_lock(&inode->i_mutex); 3113 mutex_lock(&inode->i_mutex);
3131 } 3114 }
@@ -3313,7 +3296,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3313 * handle: The journal handle 3296 * handle: The journal handle
3314 * inode: The files inode 3297 * inode: The files inode
3315 * page: A locked page that contains the offset "from" 3298 * page: A locked page that contains the offset "from"
3316 * from: The starting byte offset (from the begining of the file) 3299 * from: The starting byte offset (from the beginning of the file)
3317 * to begin discarding 3300 * to begin discarding
3318 * len: The length of bytes to discard 3301 * len: The length of bytes to discard
3319 * flags: Optional flags that may be used: 3302 * flags: Optional flags that may be used:
@@ -3321,11 +3304,11 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3321 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 3304 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3322 * Only zero the regions of the page whose buffer heads 3305 * Only zero the regions of the page whose buffer heads
3323 * have already been unmapped. This flag is appropriate 3306 * have already been unmapped. This flag is appropriate
3324 * for updateing the contents of a page whose blocks may 3307 * for updating the contents of a page whose blocks may
3325 * have already been released, and we only want to zero 3308 * have already been released, and we only want to zero
3326 * out the regions that correspond to those released blocks. 3309 * out the regions that correspond to those released blocks.
3327 * 3310 *
3328 * Returns zero on sucess or negative on failure. 3311 * Returns zero on success or negative on failure.
3329 */ 3312 */
3330static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3313static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3331 struct inode *inode, struct page *page, loff_t from, 3314 struct inode *inode, struct page *page, loff_t from,
@@ -3486,7 +3469,7 @@ int ext4_can_truncate(struct inode *inode)
3486 * @offset: The offset where the hole will begin 3469 * @offset: The offset where the hole will begin
3487 * @len: The length of the hole 3470 * @len: The length of the hole
3488 * 3471 *
3489 * Returns: 0 on sucess or negative on failure 3472 * Returns: 0 on success or negative on failure
3490 */ 3473 */
3491 3474
3492int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 3475int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
@@ -4008,7 +3991,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
4008 3991
4009 if (i_blocks <= ~0U) { 3992 if (i_blocks <= ~0U) {
4010 /* 3993 /*
4011 * i_blocks can be represnted in a 32 bit variable 3994 * i_blocks can be represented in a 32 bit variable
4012 * as multiple of 512 bytes 3995 * as multiple of 512 bytes
4013 */ 3996 */
4014 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 3997 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
@@ -4052,6 +4035,7 @@ static int ext4_do_update_inode(handle_t *handle,
4052 struct ext4_inode_info *ei = EXT4_I(inode); 4035 struct ext4_inode_info *ei = EXT4_I(inode);
4053 struct buffer_head *bh = iloc->bh; 4036 struct buffer_head *bh = iloc->bh;
4054 int err = 0, rc, block; 4037 int err = 0, rc, block;
4038 int need_datasync = 0;
4055 uid_t i_uid; 4039 uid_t i_uid;
4056 gid_t i_gid; 4040 gid_t i_gid;
4057 4041
@@ -4102,7 +4086,10 @@ static int ext4_do_update_inode(handle_t *handle,
4102 raw_inode->i_file_acl_high = 4086 raw_inode->i_file_acl_high =
4103 cpu_to_le16(ei->i_file_acl >> 32); 4087 cpu_to_le16(ei->i_file_acl >> 32);
4104 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 4088 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
4105 ext4_isize_set(raw_inode, ei->i_disksize); 4089 if (ei->i_disksize != ext4_isize(raw_inode)) {
4090 ext4_isize_set(raw_inode, ei->i_disksize);
4091 need_datasync = 1;
4092 }
4106 if (ei->i_disksize > 0x7fffffffULL) { 4093 if (ei->i_disksize > 0x7fffffffULL) {
4107 struct super_block *sb = inode->i_sb; 4094 struct super_block *sb = inode->i_sb;
4108 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 4095 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -4155,7 +4142,7 @@ static int ext4_do_update_inode(handle_t *handle,
4155 err = rc; 4142 err = rc;
4156 ext4_clear_inode_state(inode, EXT4_STATE_NEW); 4143 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4157 4144
4158 ext4_update_inode_fsync_trans(handle, inode, 0); 4145 ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4159out_brelse: 4146out_brelse:
4160 brelse(bh); 4147 brelse(bh);
4161 ext4_std_error(inode->i_sb, err); 4148 ext4_std_error(inode->i_sb, err);
@@ -4169,7 +4156,7 @@ out_brelse:
4169 * 4156 *
4170 * - Within generic_file_write() for O_SYNC files. 4157 * - Within generic_file_write() for O_SYNC files.
4171 * Here, there will be no transaction running. We wait for any running 4158 * Here, there will be no transaction running. We wait for any running
4172 * trasnaction to commit. 4159 * transaction to commit.
4173 * 4160 *
4174 * - Within sys_sync(), kupdate and such. 4161 * - Within sys_sync(), kupdate and such.
4175 * We wait on commit, if tol to. 4162 * We wait on commit, if tol to.
@@ -4298,7 +4285,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4298 } 4285 }
4299 4286
4300 if (attr->ia_valid & ATTR_SIZE) { 4287 if (attr->ia_valid & ATTR_SIZE) {
4301 inode_dio_wait(inode);
4302 4288
4303 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4289 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4304 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4290 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4347,8 +4333,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4347 } 4333 }
4348 4334
4349 if (attr->ia_valid & ATTR_SIZE) { 4335 if (attr->ia_valid & ATTR_SIZE) {
4350 if (attr->ia_size != i_size_read(inode)) 4336 if (attr->ia_size != i_size_read(inode)) {
4351 truncate_setsize(inode, attr->ia_size); 4337 truncate_setsize(inode, attr->ia_size);
4338 /* Inode size will be reduced, wait for dio in flight.
4339 * Temporarily disable dioread_nolock to prevent
4340 * livelock. */
4341 if (orphan) {
4342 ext4_inode_block_unlocked_dio(inode);
4343 inode_dio_wait(inode);
4344 ext4_inode_resume_unlocked_dio(inode);
4345 }
4346 }
4352 ext4_truncate(inode); 4347 ext4_truncate(inode);
4353 } 4348 }
4354 4349
@@ -4413,7 +4408,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4413 * worse case, the indexs blocks spread over different block groups 4408 * worse case, the indexs blocks spread over different block groups
4414 * 4409 *
4415 * If datablocks are discontiguous, they are possible to spread over 4410 * If datablocks are discontiguous, they are possible to spread over
4416 * different block groups too. If they are contiuguous, with flexbg, 4411 * different block groups too. If they are contiguous, with flexbg,
4417 * they could still across block group boundary. 4412 * they could still across block group boundary.
4418 * 4413 *
4419 * Also account for superblock, inode, quota and xattr blocks 4414 * Also account for superblock, inode, quota and xattr blocks
@@ -4727,6 +4722,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4727 return err; 4722 return err;
4728 } 4723 }
4729 4724
4725 /* Wait for all existing dio workers */
4726 ext4_inode_block_unlocked_dio(inode);
4727 inode_dio_wait(inode);
4728
4730 jbd2_journal_lock_updates(journal); 4729 jbd2_journal_lock_updates(journal);
4731 4730
4732 /* 4731 /*
@@ -4746,6 +4745,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4746 ext4_set_aops(inode); 4745 ext4_set_aops(inode);
4747 4746
4748 jbd2_journal_unlock_updates(journal); 4747 jbd2_journal_unlock_updates(journal);
4748 ext4_inode_resume_unlocked_dio(inode);
4749 4749
4750 /* Finally we can mark the inode as dirty. */ 4750 /* Finally we can mark the inode as dirty. */
4751 4751
@@ -4780,6 +4780,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4780 int retries = 0; 4780 int retries = 0;
4781 4781
4782 sb_start_pagefault(inode->i_sb); 4782 sb_start_pagefault(inode->i_sb);
4783 file_update_time(vma->vm_file);
4783 /* Delalloc case is easy... */ 4784 /* Delalloc case is easy... */
4784 if (test_opt(inode->i_sb, DELALLOC) && 4785 if (test_opt(inode->i_sb, DELALLOC) &&
4785 !ext4_should_journal_data(inode) && 4786 !ext4_should_journal_data(inode) &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7f7dad787603..5747f52f7c72 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -233,7 +233,7 @@ group_extend_out:
233 233
234 case EXT4_IOC_MOVE_EXT: { 234 case EXT4_IOC_MOVE_EXT: {
235 struct move_extent me; 235 struct move_extent me;
236 struct file *donor_filp; 236 struct fd donor;
237 int err; 237 int err;
238 238
239 if (!(filp->f_mode & FMODE_READ) || 239 if (!(filp->f_mode & FMODE_READ) ||
@@ -245,11 +245,11 @@ group_extend_out:
245 return -EFAULT; 245 return -EFAULT;
246 me.moved_len = 0; 246 me.moved_len = 0;
247 247
248 donor_filp = fget(me.donor_fd); 248 donor = fdget(me.donor_fd);
249 if (!donor_filp) 249 if (!donor.file)
250 return -EBADF; 250 return -EBADF;
251 251
252 if (!(donor_filp->f_mode & FMODE_WRITE)) { 252 if (!(donor.file->f_mode & FMODE_WRITE)) {
253 err = -EBADF; 253 err = -EBADF;
254 goto mext_out; 254 goto mext_out;
255 } 255 }
@@ -258,14 +258,15 @@ group_extend_out:
258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 258 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
259 ext4_msg(sb, KERN_ERR, 259 ext4_msg(sb, KERN_ERR,
260 "Online defrag not supported with bigalloc"); 260 "Online defrag not supported with bigalloc");
261 return -EOPNOTSUPP; 261 err = -EOPNOTSUPP;
262 goto mext_out;
262 } 263 }
263 264
264 err = mnt_want_write_file(filp); 265 err = mnt_want_write_file(filp);
265 if (err) 266 if (err)
266 goto mext_out; 267 goto mext_out;
267 268
268 err = ext4_move_extents(filp, donor_filp, me.orig_start, 269 err = ext4_move_extents(filp, donor.file, me.orig_start,
269 me.donor_start, me.len, &me.moved_len); 270 me.donor_start, me.len, &me.moved_len);
270 mnt_drop_write_file(filp); 271 mnt_drop_write_file(filp);
271 272
@@ -273,7 +274,7 @@ group_extend_out:
273 &me, sizeof(me))) 274 &me, sizeof(me)))
274 err = -EFAULT; 275 err = -EFAULT;
275mext_out: 276mext_out:
276 fput(donor_filp); 277 fdput(donor);
277 return err; 278 return err;
278 } 279 }
279 280
@@ -365,26 +366,11 @@ group_add_out:
365 return -EOPNOTSUPP; 366 return -EOPNOTSUPP;
366 } 367 }
367 368
368 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
369 EXT4_FEATURE_INCOMPAT_META_BG)) {
370 ext4_msg(sb, KERN_ERR,
371 "Online resizing not (yet) supported with meta_bg");
372 return -EOPNOTSUPP;
373 }
374
375 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, 369 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
376 sizeof(__u64))) { 370 sizeof(__u64))) {
377 return -EFAULT; 371 return -EFAULT;
378 } 372 }
379 373
380 if (n_blocks_count > MAX_32_NUM &&
381 !EXT4_HAS_INCOMPAT_FEATURE(sb,
382 EXT4_FEATURE_INCOMPAT_64BIT)) {
383 ext4_msg(sb, KERN_ERR,
384 "File system only supports 32-bit block numbers");
385 return -EOPNOTSUPP;
386 }
387
388 err = ext4_resize_begin(sb); 374 err = ext4_resize_begin(sb);
389 if (err) 375 if (err)
390 return err; 376 return err;
@@ -419,13 +405,6 @@ resizefs_out:
419 if (!blk_queue_discard(q)) 405 if (!blk_queue_discard(q))
420 return -EOPNOTSUPP; 406 return -EOPNOTSUPP;
421 407
422 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
423 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
424 ext4_msg(sb, KERN_ERR,
425 "FITRIM not supported with bigalloc");
426 return -EOPNOTSUPP;
427 }
428
429 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 408 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
430 sizeof(range))) 409 sizeof(range)))
431 return -EFAULT; 410 return -EFAULT;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8eae94771c45..f8b27bf80aca 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -24,6 +24,7 @@
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "mballoc.h" 25#include "mballoc.h"
26#include <linux/debugfs.h> 26#include <linux/debugfs.h>
27#include <linux/log2.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <trace/events/ext4.h> 29#include <trace/events/ext4.h>
29 30
@@ -1338,17 +1339,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1338 mb_check_buddy(e4b); 1339 mb_check_buddy(e4b);
1339} 1340}
1340 1341
1341static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, 1342static int mb_find_extent(struct ext4_buddy *e4b, int block,
1342 int needed, struct ext4_free_extent *ex) 1343 int needed, struct ext4_free_extent *ex)
1343{ 1344{
1344 int next = block; 1345 int next = block;
1345 int max; 1346 int max, order;
1346 void *buddy; 1347 void *buddy;
1347 1348
1348 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); 1349 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1349 BUG_ON(ex == NULL); 1350 BUG_ON(ex == NULL);
1350 1351
1351 buddy = mb_find_buddy(e4b, order, &max); 1352 buddy = mb_find_buddy(e4b, 0, &max);
1352 BUG_ON(buddy == NULL); 1353 BUG_ON(buddy == NULL);
1353 BUG_ON(block >= max); 1354 BUG_ON(block >= max);
1354 if (mb_test_bit(block, buddy)) { 1355 if (mb_test_bit(block, buddy)) {
@@ -1358,12 +1359,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1358 return 0; 1359 return 0;
1359 } 1360 }
1360 1361
1361 /* FIXME dorp order completely ? */ 1362 /* find actual order */
1362 if (likely(order == 0)) { 1363 order = mb_find_order_for_block(e4b, block);
1363 /* find actual order */ 1364 block = block >> order;
1364 order = mb_find_order_for_block(e4b, block);
1365 block = block >> order;
1366 }
1367 1365
1368 ex->fe_len = 1 << order; 1366 ex->fe_len = 1 << order;
1369 ex->fe_start = block << order; 1367 ex->fe_start = block << order;
@@ -1549,7 +1547,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1549 /* recheck chunk's availability - we don't know 1547 /* recheck chunk's availability - we don't know
1550 * when it was found (within this lock-unlock 1548 * when it was found (within this lock-unlock
1551 * period or not) */ 1549 * period or not) */
1552 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); 1550 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1553 if (max >= gex->fe_len) { 1551 if (max >= gex->fe_len) {
1554 ext4_mb_use_best_found(ac, e4b); 1552 ext4_mb_use_best_found(ac, e4b);
1555 return; 1553 return;
@@ -1641,7 +1639,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1641 return err; 1639 return err;
1642 1640
1643 ext4_lock_group(ac->ac_sb, group); 1641 ext4_lock_group(ac->ac_sb, group);
1644 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); 1642 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1645 1643
1646 if (max > 0) { 1644 if (max > 0) {
1647 ac->ac_b_ex = ex; 1645 ac->ac_b_ex = ex;
@@ -1662,17 +1660,20 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1662 int max; 1660 int max;
1663 int err; 1661 int err;
1664 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 1662 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1663 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1665 struct ext4_free_extent ex; 1664 struct ext4_free_extent ex;
1666 1665
1667 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) 1666 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1668 return 0; 1667 return 0;
1668 if (grp->bb_free == 0)
1669 return 0;
1669 1670
1670 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); 1671 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1671 if (err) 1672 if (err)
1672 return err; 1673 return err;
1673 1674
1674 ext4_lock_group(ac->ac_sb, group); 1675 ext4_lock_group(ac->ac_sb, group);
1675 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, 1676 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1676 ac->ac_g_ex.fe_len, &ex); 1677 ac->ac_g_ex.fe_len, &ex);
1677 1678
1678 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1679 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
@@ -1788,7 +1789,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1788 break; 1789 break;
1789 } 1790 }
1790 1791
1791 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1792 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1792 BUG_ON(ex.fe_len <= 0); 1793 BUG_ON(ex.fe_len <= 0);
1793 if (free < ex.fe_len) { 1794 if (free < ex.fe_len) {
1794 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1795 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
@@ -1840,7 +1841,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1840 1841
1841 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { 1842 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1842 if (!mb_test_bit(i, bitmap)) { 1843 if (!mb_test_bit(i, bitmap)) {
1843 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); 1844 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1844 if (max >= sbi->s_stripe) { 1845 if (max >= sbi->s_stripe) {
1845 ac->ac_found++; 1846 ac->ac_found++;
1846 ac->ac_b_ex = ex; 1847 ac->ac_b_ex = ex;
@@ -1862,6 +1863,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1862 1863
1863 BUG_ON(cr < 0 || cr >= 4); 1864 BUG_ON(cr < 0 || cr >= 4);
1864 1865
1866 free = grp->bb_free;
1867 if (free == 0)
1868 return 0;
1869 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1870 return 0;
1871
1865 /* We only do this if the grp has never been initialized */ 1872 /* We only do this if the grp has never been initialized */
1866 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { 1873 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1867 int ret = ext4_mb_init_group(ac->ac_sb, group); 1874 int ret = ext4_mb_init_group(ac->ac_sb, group);
@@ -1869,10 +1876,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1869 return 0; 1876 return 0;
1870 } 1877 }
1871 1878
1872 free = grp->bb_free;
1873 fragments = grp->bb_fragments; 1879 fragments = grp->bb_fragments;
1874 if (free == 0)
1875 return 0;
1876 if (fragments == 0) 1880 if (fragments == 0)
1877 return 0; 1881 return 0;
1878 1882
@@ -2163,6 +2167,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2163 return cachep; 2167 return cachep;
2164} 2168}
2165 2169
2170/*
2171 * Allocate the top-level s_group_info array for the specified number
2172 * of groups
2173 */
2174int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2175{
2176 struct ext4_sb_info *sbi = EXT4_SB(sb);
2177 unsigned size;
2178 struct ext4_group_info ***new_groupinfo;
2179
2180 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2181 EXT4_DESC_PER_BLOCK_BITS(sb);
2182 if (size <= sbi->s_group_info_size)
2183 return 0;
2184
2185 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2186 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2187 if (!new_groupinfo) {
2188 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2189 return -ENOMEM;
2190 }
2191 if (sbi->s_group_info) {
2192 memcpy(new_groupinfo, sbi->s_group_info,
2193 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2194 ext4_kvfree(sbi->s_group_info);
2195 }
2196 sbi->s_group_info = new_groupinfo;
2197 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2198 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2199 sbi->s_group_info_size);
2200 return 0;
2201}
2202
2166/* Create and initialize ext4_group_info data for the given group. */ 2203/* Create and initialize ext4_group_info data for the given group. */
2167int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2204int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2168 struct ext4_group_desc *desc) 2205 struct ext4_group_desc *desc)
@@ -2195,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2195 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2232 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2196 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2233 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2197 2234
2198 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2235 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
2199 if (meta_group_info[i] == NULL) { 2236 if (meta_group_info[i] == NULL) {
2200 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); 2237 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2201 goto exit_group_info; 2238 goto exit_group_info;
2202 } 2239 }
2203 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2204 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2240 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2205 &(meta_group_info[i]->bb_state)); 2241 &(meta_group_info[i]->bb_state));
2206 2242
@@ -2252,49 +2288,14 @@ static int ext4_mb_init_backend(struct super_block *sb)
2252 ext4_group_t ngroups = ext4_get_groups_count(sb); 2288 ext4_group_t ngroups = ext4_get_groups_count(sb);
2253 ext4_group_t i; 2289 ext4_group_t i;
2254 struct ext4_sb_info *sbi = EXT4_SB(sb); 2290 struct ext4_sb_info *sbi = EXT4_SB(sb);
2255 struct ext4_super_block *es = sbi->s_es; 2291 int err;
2256 int num_meta_group_infos;
2257 int num_meta_group_infos_max;
2258 int array_size;
2259 struct ext4_group_desc *desc; 2292 struct ext4_group_desc *desc;
2260 struct kmem_cache *cachep; 2293 struct kmem_cache *cachep;
2261 2294
2262 /* This is the number of blocks used by GDT */ 2295 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2263 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2296 if (err)
2264 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2297 return err;
2265
2266 /*
2267 * This is the total number of blocks used by GDT including
2268 * the number of reserved blocks for GDT.
2269 * The s_group_info array is allocated with this value
2270 * to allow a clean online resize without a complex
2271 * manipulation of pointer.
2272 * The drawback is the unused memory when no resize
2273 * occurs but it's very low in terms of pages
2274 * (see comments below)
2275 * Need to handle this properly when META_BG resizing is allowed
2276 */
2277 num_meta_group_infos_max = num_meta_group_infos +
2278 le16_to_cpu(es->s_reserved_gdt_blocks);
2279 2298
2280 /*
2281 * array_size is the size of s_group_info array. We round it
2282 * to the next power of two because this approximation is done
2283 * internally by kmalloc so we can have some more memory
2284 * for free here (e.g. may be used for META_BG resize).
2285 */
2286 array_size = 1;
2287 while (array_size < sizeof(*sbi->s_group_info) *
2288 num_meta_group_infos_max)
2289 array_size = array_size << 1;
2290 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2291 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2292 * So a two level scheme suffices for now. */
2293 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2294 if (sbi->s_group_info == NULL) {
2295 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2296 return -ENOMEM;
2297 }
2298 sbi->s_buddy_cache = new_inode(sb); 2299 sbi->s_buddy_cache = new_inode(sb);
2299 if (sbi->s_buddy_cache == NULL) { 2300 if (sbi->s_buddy_cache == NULL) {
2300 ext4_msg(sb, KERN_ERR, "can't get new inode"); 2301 ext4_msg(sb, KERN_ERR, "can't get new inode");
@@ -2322,7 +2323,7 @@ err_freebuddy:
2322 cachep = get_groupinfo_cache(sb->s_blocksize_bits); 2323 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2323 while (i-- > 0) 2324 while (i-- > 0)
2324 kmem_cache_free(cachep, ext4_get_group_info(sb, i)); 2325 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2325 i = num_meta_group_infos; 2326 i = sbi->s_group_info_size;
2326 while (i-- > 0) 2327 while (i-- > 0)
2327 kfree(sbi->s_group_info[i]); 2328 kfree(sbi->s_group_info[i]);
2328 iput(sbi->s_buddy_cache); 2329 iput(sbi->s_buddy_cache);
@@ -4008,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4008 ext4_get_group_no_and_offset(sb, goal, &group, &block); 4009 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4009 4010
4010 /* set up allocation goals */ 4011 /* set up allocation goals */
4011 memset(ac, 0, sizeof(struct ext4_allocation_context));
4012 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); 4012 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4013 ac->ac_status = AC_STATUS_CONTINUE; 4013 ac->ac_status = AC_STATUS_CONTINUE;
4014 ac->ac_sb = sb; 4014 ac->ac_sb = sb;
@@ -4291,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4291 } 4291 }
4292 } 4292 }
4293 4293
4294 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4294 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4295 if (!ac) { 4295 if (!ac) {
4296 ar->len = 0; 4296 ar->len = 0;
4297 *errp = -ENOMEM; 4297 *errp = -ENOMEM;
@@ -4657,6 +4657,8 @@ do_more:
4657 * with group lock held. generate_buddy look at 4657 * with group lock held. generate_buddy look at
4658 * them with group lock_held 4658 * them with group lock_held
4659 */ 4659 */
4660 if (test_opt(sb, DISCARD))
4661 ext4_issue_discard(sb, block_group, bit, count);
4660 ext4_lock_group(sb, block_group); 4662 ext4_lock_group(sb, block_group);
4661 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4663 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4662 mb_free_blocks(inode, &e4b, bit, count_clusters); 4664 mb_free_blocks(inode, &e4b, bit, count_clusters);
@@ -4709,7 +4711,7 @@ error_return:
4709 * ext4_group_add_blocks() -- Add given blocks to an existing group 4711 * ext4_group_add_blocks() -- Add given blocks to an existing group
4710 * @handle: handle to this transaction 4712 * @handle: handle to this transaction
4711 * @sb: super block 4713 * @sb: super block
4712 * @block: start physcial block to add to the block group 4714 * @block: start physical block to add to the block group
4713 * @count: number of blocks to free 4715 * @count: number of blocks to free
4714 * 4716 *
4715 * This marks the blocks as free in the bitmap and buddy. 4717 * This marks the blocks as free in the bitmap and buddy.
@@ -4988,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4988 4990
4989 start = range->start >> sb->s_blocksize_bits; 4991 start = range->start >> sb->s_blocksize_bits;
4990 end = start + (range->len >> sb->s_blocksize_bits) - 1; 4992 end = start + (range->len >> sb->s_blocksize_bits) - 1;
4991 minlen = range->minlen >> sb->s_blocksize_bits; 4993 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
4994 range->minlen >> sb->s_blocksize_bits);
4992 4995
4993 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || 4996 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
4994 unlikely(start >= max_blks)) 4997 unlikely(start >= max_blks))
@@ -5048,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5048 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); 5051 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5049 5052
5050out: 5053out:
5051 range->len = trimmed * sb->s_blocksize; 5054 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5052 return ret; 5055 return ret;
5053} 5056}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index c070618c21ce..3ccd889ba953 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -65,11 +65,6 @@ extern u8 mb_enable_debug;
65#define MB_DEFAULT_MIN_TO_SCAN 10 65#define MB_DEFAULT_MIN_TO_SCAN 10
66 66
67/* 67/*
68 * How many groups mballoc will scan looking for the best chunk
69 */
70#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
71
72/*
73 * with 'ext4_mb_stats' allocator will collect stats that will be 68 * with 'ext4_mb_stats' allocator will collect stats that will be
74 * shown at umount. The collecting costs though! 69 * shown at umount. The collecting costs though!
75 */ 70 */
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c5826c623e7a..292daeeed455 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
141} 141}
142 142
143/** 143/**
144 * mext_check_null_inode - NULL check for two inodes
145 *
146 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
147 */
148static int
149mext_check_null_inode(struct inode *inode1, struct inode *inode2,
150 const char *function, unsigned int line)
151{
152 int ret = 0;
153
154 if (inode1 == NULL) {
155 __ext4_error(inode2->i_sb, function, line,
156 "Both inodes should not be NULL: "
157 "inode1 NULL inode2 %lu", inode2->i_ino);
158 ret = -EIO;
159 } else if (inode2 == NULL) {
160 __ext4_error(inode1->i_sb, function, line,
161 "Both inodes should not be NULL: "
162 "inode1 %lu inode2 NULL", inode1->i_ino);
163 ret = -EIO;
164 }
165 return ret;
166}
167
168/**
169 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem 144 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
170 * 145 *
171 * @orig_inode: original inode structure 146 * Acquire write lock of i_data_sem of the two inodes
172 * @donor_inode: donor inode structure
173 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
174 * i_ino order.
175 */ 147 */
176static void 148static void
177double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) 149double_down_write_data_sem(struct inode *first, struct inode *second)
178{ 150{
179 struct inode *first = orig_inode, *second = donor_inode; 151 if (first < second) {
152 down_write(&EXT4_I(first)->i_data_sem);
153 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
154 } else {
155 down_write(&EXT4_I(second)->i_data_sem);
156 down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
180 157
181 /*
182 * Use the inode number to provide the stable locking order instead
183 * of its address, because the C language doesn't guarantee you can
184 * compare pointers that don't come from the same array.
185 */
186 if (donor_inode->i_ino < orig_inode->i_ino) {
187 first = donor_inode;
188 second = orig_inode;
189 } 158 }
190
191 down_write(&EXT4_I(first)->i_data_sem);
192 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
193} 159}
194 160
195/** 161/**
@@ -604,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
604 diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 570 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
605 571
606 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); 572 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
607 tmp_dext->ee_block = 573 le32_add_cpu(&tmp_dext->ee_block, diff);
608 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); 574 le16_add_cpu(&tmp_dext->ee_len, -diff);
609 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
610 575
611 if (max_count < ext4_ext_get_actual_len(tmp_dext)) 576 if (max_count < ext4_ext_get_actual_len(tmp_dext))
612 tmp_dext->ee_len = cpu_to_le16(max_count); 577 tmp_dext->ee_len = cpu_to_le16(max_count);
@@ -629,6 +594,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
629} 594}
630 595
631/** 596/**
597 * mext_check_coverage - Check that all extents in range has the same type
598 *
599 * @inode: inode in question
600 * @from: block offset of inode
601 * @count: block count to be checked
602 * @uninit: extents expected to be uninitialized
603 * @err: pointer to save error value
604 *
605 * Return 1 if all extents in range has expected type, and zero otherwise.
606 */
607static int
608mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
609 int uninit, int *err)
610{
611 struct ext4_ext_path *path = NULL;
612 struct ext4_extent *ext;
613 ext4_lblk_t last = from + count;
614 while (from < last) {
615 *err = get_ext_path(inode, from, &path);
616 if (*err)
617 return 0;
618 ext = path[ext_depth(inode)].p_ext;
619 if (!ext) {
620 ext4_ext_drop_refs(path);
621 return 0;
622 }
623 if (uninit != ext4_ext_is_uninitialized(ext)) {
624 ext4_ext_drop_refs(path);
625 return 0;
626 }
627 from += ext4_ext_get_actual_len(ext);
628 ext4_ext_drop_refs(path);
629 }
630 return 1;
631}
632
633/**
632 * mext_replace_branches - Replace original extents with new extents 634 * mext_replace_branches - Replace original extents with new extents
633 * 635 *
634 * @handle: journal handle 636 * @handle: journal handle
@@ -663,9 +665,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
663 int replaced_count = 0; 665 int replaced_count = 0;
664 int dext_alen; 666 int dext_alen;
665 667
666 /* Protect extent trees against block allocations via delalloc */
667 double_down_write_data_sem(orig_inode, donor_inode);
668
669 /* Get the original extent for the block "orig_off" */ 668 /* Get the original extent for the block "orig_off" */
670 *err = get_ext_path(orig_inode, orig_off, &orig_path); 669 *err = get_ext_path(orig_inode, orig_off, &orig_path);
671 if (*err) 670 if (*err)
@@ -764,12 +763,122 @@ out:
764 ext4_ext_invalidate_cache(orig_inode); 763 ext4_ext_invalidate_cache(orig_inode);
765 ext4_ext_invalidate_cache(donor_inode); 764 ext4_ext_invalidate_cache(donor_inode);
766 765
767 double_up_write_data_sem(orig_inode, donor_inode);
768
769 return replaced_count; 766 return replaced_count;
770} 767}
771 768
772/** 769/**
770 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
771 *
772 * @inode1: the inode structure
773 * @inode2: the inode structure
774 * @index: page index
775 * @page: result page vector
776 *
777 * Grab two locked pages for inode's by inode order
778 */
779static int
780mext_page_double_lock(struct inode *inode1, struct inode *inode2,
781 pgoff_t index, struct page *page[2])
782{
783 struct address_space *mapping[2];
784 unsigned fl = AOP_FLAG_NOFS;
785
786 BUG_ON(!inode1 || !inode2);
787 if (inode1 < inode2) {
788 mapping[0] = inode1->i_mapping;
789 mapping[1] = inode2->i_mapping;
790 } else {
791 mapping[0] = inode2->i_mapping;
792 mapping[1] = inode1->i_mapping;
793 }
794
795 page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
796 if (!page[0])
797 return -ENOMEM;
798
799 page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
800 if (!page[1]) {
801 unlock_page(page[0]);
802 page_cache_release(page[0]);
803 return -ENOMEM;
804 }
805
806 if (inode1 > inode2) {
807 struct page *tmp;
808 tmp = page[0];
809 page[0] = page[1];
810 page[1] = tmp;
811 }
812 return 0;
813}
814
815/* Force page buffers uptodate w/o dropping page's lock */
816static int
817mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
818{
819 struct inode *inode = page->mapping->host;
820 sector_t block;
821 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
822 unsigned int blocksize, block_start, block_end;
823 int i, err, nr = 0, partial = 0;
824 BUG_ON(!PageLocked(page));
825 BUG_ON(PageWriteback(page));
826
827 if (PageUptodate(page))
828 return 0;
829
830 blocksize = 1 << inode->i_blkbits;
831 if (!page_has_buffers(page))
832 create_empty_buffers(page, blocksize, 0);
833
834 head = page_buffers(page);
835 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
836 for (bh = head, block_start = 0; bh != head || !block_start;
837 block++, block_start = block_end, bh = bh->b_this_page) {
838 block_end = block_start + blocksize;
839 if (block_end <= from || block_start >= to) {
840 if (!buffer_uptodate(bh))
841 partial = 1;
842 continue;
843 }
844 if (buffer_uptodate(bh))
845 continue;
846 if (!buffer_mapped(bh)) {
847 int err = 0;
848 err = ext4_get_block(inode, block, bh, 0);
849 if (err) {
850 SetPageError(page);
851 return err;
852 }
853 if (!buffer_mapped(bh)) {
854 zero_user(page, block_start, blocksize);
855 if (!err)
856 set_buffer_uptodate(bh);
857 continue;
858 }
859 }
860 BUG_ON(nr >= MAX_BUF_PER_PAGE);
861 arr[nr++] = bh;
862 }
863 /* No io required */
864 if (!nr)
865 goto out;
866
867 for (i = 0; i < nr; i++) {
868 bh = arr[i];
869 if (!bh_uptodate_or_lock(bh)) {
870 err = bh_submit_read(bh);
871 if (err)
872 return err;
873 }
874 }
875out:
876 if (!partial)
877 SetPageUptodate(page);
878 return 0;
879}
880
881/**
773 * move_extent_per_page - Move extent data per page 882 * move_extent_per_page - Move extent data per page
774 * 883 *
775 * @o_filp: file structure of original file 884 * @o_filp: file structure of original file
@@ -791,26 +900,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
791 int block_len_in_page, int uninit, int *err) 900 int block_len_in_page, int uninit, int *err)
792{ 901{
793 struct inode *orig_inode = o_filp->f_dentry->d_inode; 902 struct inode *orig_inode = o_filp->f_dentry->d_inode;
794 struct address_space *mapping = orig_inode->i_mapping; 903 struct page *pagep[2] = {NULL, NULL};
795 struct buffer_head *bh;
796 struct page *page = NULL;
797 const struct address_space_operations *a_ops = mapping->a_ops;
798 handle_t *handle; 904 handle_t *handle;
799 ext4_lblk_t orig_blk_offset; 905 ext4_lblk_t orig_blk_offset;
800 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 906 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
801 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 907 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
802 unsigned int w_flags = 0; 908 unsigned int w_flags = 0;
803 unsigned int tmp_data_size, data_size, replaced_size; 909 unsigned int tmp_data_size, data_size, replaced_size;
804 void *fsdata; 910 int err2, jblocks, retries = 0;
805 int i, jblocks;
806 int err2 = 0;
807 int replaced_count = 0; 911 int replaced_count = 0;
912 int from = data_offset_in_page << orig_inode->i_blkbits;
808 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 913 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
809 914
810 /* 915 /*
811 * It needs twice the amount of ordinary journal buffers because 916 * It needs twice the amount of ordinary journal buffers because
812 * inode and donor_inode may change each different metadata blocks. 917 * inode and donor_inode may change each different metadata blocks.
813 */ 918 */
919again:
920 *err = 0;
814 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 921 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
815 handle = ext4_journal_start(orig_inode, jblocks); 922 handle = ext4_journal_start(orig_inode, jblocks);
816 if (IS_ERR(handle)) { 923 if (IS_ERR(handle)) {
@@ -824,19 +931,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
824 orig_blk_offset = orig_page_offset * blocks_per_page + 931 orig_blk_offset = orig_page_offset * blocks_per_page +
825 data_offset_in_page; 932 data_offset_in_page;
826 933
827 /*
828 * If orig extent is uninitialized one,
829 * it's not necessary force the page into memory
830 * and then force it to be written out again.
831 * Just swap data blocks between orig and donor.
832 */
833 if (uninit) {
834 replaced_count = mext_replace_branches(handle, orig_inode,
835 donor_inode, orig_blk_offset,
836 block_len_in_page, err);
837 goto out2;
838 }
839
840 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 934 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
841 935
842 /* Calculate data_size */ 936 /* Calculate data_size */
@@ -858,75 +952,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 952
859 replaced_size = data_size; 953 replaced_size = data_size;
860 954
861 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, 955 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
862 &page, &fsdata); 956 pagep);
863 if (unlikely(*err < 0)) 957 if (unlikely(*err < 0))
864 goto out; 958 goto stop_journal;
865
866 if (!PageUptodate(page)) {
867 mapping->a_ops->readpage(o_filp, page);
868 lock_page(page);
869 }
870
871 /* 959 /*
872 * try_to_release_page() doesn't call releasepage in writeback mode. 960 * If orig extent was uninitialized it can become initialized
873 * We should care about the order of writing to the same file 961 * at any time after i_data_sem was dropped, in order to
874 * by multiple move extent processes. 962 * serialize with delalloc we have recheck extent while we
875 * It needs to call wait_on_page_writeback() to wait for the 963 * hold page's lock, if it is still the case data copy is not
876 * writeback of the page. 964 * necessary, just swap data blocks between orig and donor.
877 */ 965 */
878 wait_on_page_writeback(page); 966 if (uninit) {
967 double_down_write_data_sem(orig_inode, donor_inode);
968 /* If any of extents in range became initialized we have to
969 * fallback to data copying */
970 uninit = mext_check_coverage(orig_inode, orig_blk_offset,
971 block_len_in_page, 1, err);
972 if (*err)
973 goto drop_data_sem;
879 974
880 /* Release old bh and drop refs */ 975 uninit &= mext_check_coverage(donor_inode, orig_blk_offset,
881 try_to_release_page(page, 0); 976 block_len_in_page, 1, err);
977 if (*err)
978 goto drop_data_sem;
979
980 if (!uninit) {
981 double_up_write_data_sem(orig_inode, donor_inode);
982 goto data_copy;
983 }
984 if ((page_has_private(pagep[0]) &&
985 !try_to_release_page(pagep[0], 0)) ||
986 (page_has_private(pagep[1]) &&
987 !try_to_release_page(pagep[1], 0))) {
988 *err = -EBUSY;
989 goto drop_data_sem;
990 }
991 replaced_count = mext_replace_branches(handle, orig_inode,
992 donor_inode, orig_blk_offset,
993 block_len_in_page, err);
994 drop_data_sem:
995 double_up_write_data_sem(orig_inode, donor_inode);
996 goto unlock_pages;
997 }
998data_copy:
999 *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
1000 if (*err)
1001 goto unlock_pages;
1002
1003 /* At this point all buffers in range are uptodate, old mapping layout
1004 * is no longer required, try to drop it now. */
1005 if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
1006 (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
1007 *err = -EBUSY;
1008 goto unlock_pages;
1009 }
882 1010
883 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 1011 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
884 orig_blk_offset, block_len_in_page, 1012 orig_blk_offset,
885 &err2); 1013 block_len_in_page, err);
886 if (err2) { 1014 if (*err) {
887 if (replaced_count) { 1015 if (replaced_count) {
888 block_len_in_page = replaced_count; 1016 block_len_in_page = replaced_count;
889 replaced_size = 1017 replaced_size =
890 block_len_in_page << orig_inode->i_blkbits; 1018 block_len_in_page << orig_inode->i_blkbits;
891 } else 1019 } else
892 goto out; 1020 goto unlock_pages;
893 } 1021 }
1022 /* Perform all necessary steps similar write_begin()/write_end()
1023 * but keeping in mind that i_size will not change */
1024 *err = __block_write_begin(pagep[0], from, from + replaced_size,
1025 ext4_get_block);
1026 if (!*err)
1027 *err = block_commit_write(pagep[0], from, from + replaced_size);
894 1028
895 if (!page_has_buffers(page)) 1029 if (unlikely(*err < 0))
896 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 1030 goto repair_branches;
897 1031
898 bh = page_buffers(page); 1032 /* Even in case of data=writeback it is reasonable to pin
899 for (i = 0; i < data_offset_in_page; i++) 1033 * inode to transaction, to prevent unexpected data loss */
900 bh = bh->b_this_page; 1034 *err = ext4_jbd2_file_inode(handle, orig_inode);
901 1035
902 for (i = 0; i < block_len_in_page; i++) { 1036unlock_pages:
903 *err = ext4_get_block(orig_inode, 1037 unlock_page(pagep[0]);
904 (sector_t)(orig_blk_offset + i), bh, 0); 1038 page_cache_release(pagep[0]);
905 if (*err < 0) 1039 unlock_page(pagep[1]);
906 goto out; 1040 page_cache_release(pagep[1]);
907 1041stop_journal:
908 if (bh->b_this_page != NULL)
909 bh = bh->b_this_page;
910 }
911
912 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
913 page, fsdata);
914 page = NULL;
915
916out:
917 if (unlikely(page)) {
918 if (PageLocked(page))
919 unlock_page(page);
920 page_cache_release(page);
921 ext4_journal_stop(handle);
922 }
923out2:
924 ext4_journal_stop(handle); 1042 ext4_journal_stop(handle);
925 1043 /* Buffer was busy because probably is pinned to journal transaction,
926 if (err2) 1044 * force transaction commit may help to free it. */
927 *err = err2; 1045 if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
928 1046 &retries))
1047 goto again;
929 return replaced_count; 1048 return replaced_count;
1049
1050repair_branches:
1051 /*
1052 * This should never ever happen!
1053 * Extents are swapped already, but we are not able to copy data.
1054 * Try to swap extents to it's original places
1055 */
1056 double_down_write_data_sem(orig_inode, donor_inode);
1057 replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
1058 orig_blk_offset,
1059 block_len_in_page, &err2);
1060 double_up_write_data_sem(orig_inode, donor_inode);
1061 if (replaced_count != block_len_in_page) {
1062 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
1063 "Unable to copy data block,"
1064 " data will be lost.");
1065 *err = -EIO;
1066 }
1067 replaced_count = 0;
1068 goto unlock_pages;
930} 1069}
931 1070
932/** 1071/**
@@ -969,14 +1108,6 @@ mext_check_arguments(struct inode *orig_inode,
969 return -EINVAL; 1108 return -EINVAL;
970 } 1109 }
971 1110
972 /* Files should be in the same ext4 FS */
973 if (orig_inode->i_sb != donor_inode->i_sb) {
974 ext4_debug("ext4 move extent: The argument files "
975 "should be in same FS [ino:orig %lu, donor %lu]\n",
976 orig_inode->i_ino, donor_inode->i_ino);
977 return -EINVAL;
978 }
979
980 /* Ext4 move extent supports only extent based file */ 1111 /* Ext4 move extent supports only extent based file */
981 if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { 1112 if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
982 ext4_debug("ext4 move extent: orig file is not extents " 1113 ext4_debug("ext4 move extent: orig file is not extents "
@@ -1002,7 +1133,6 @@ mext_check_arguments(struct inode *orig_inode,
1002 } 1133 }
1003 1134
1004 if ((orig_start >= EXT_MAX_BLOCKS) || 1135 if ((orig_start >= EXT_MAX_BLOCKS) ||
1005 (donor_start >= EXT_MAX_BLOCKS) ||
1006 (*len > EXT_MAX_BLOCKS) || 1136 (*len > EXT_MAX_BLOCKS) ||
1007 (orig_start + *len >= EXT_MAX_BLOCKS)) { 1137 (orig_start + *len >= EXT_MAX_BLOCKS)) {
1008 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 1138 ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
@@ -1072,35 +1202,19 @@ mext_check_arguments(struct inode *orig_inode,
1072 * @inode1: the inode structure 1202 * @inode1: the inode structure
1073 * @inode2: the inode structure 1203 * @inode2: the inode structure
1074 * 1204 *
1075 * Lock two inodes' i_mutex by i_ino order. 1205 * Lock two inodes' i_mutex
1076 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1077 */ 1206 */
1078static int 1207static void
1079mext_inode_double_lock(struct inode *inode1, struct inode *inode2) 1208mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1080{ 1209{
1081 int ret = 0; 1210 BUG_ON(inode1 == inode2);
1082 1211 if (inode1 < inode2) {
1083 BUG_ON(inode1 == NULL && inode2 == NULL);
1084
1085 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1086 if (ret < 0)
1087 goto out;
1088
1089 if (inode1 == inode2) {
1090 mutex_lock(&inode1->i_mutex);
1091 goto out;
1092 }
1093
1094 if (inode1->i_ino < inode2->i_ino) {
1095 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 1212 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1096 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 1213 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1097 } else { 1214 } else {
1098 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); 1215 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1099 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); 1216 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1100 } 1217 }
1101
1102out:
1103 return ret;
1104} 1218}
1105 1219
1106/** 1220/**
@@ -1109,28 +1223,13 @@ out:
1109 * @inode1: the inode that is released first 1223 * @inode1: the inode that is released first
1110 * @inode2: the inode that is released second 1224 * @inode2: the inode that is released second
1111 * 1225 *
1112 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1113 */ 1226 */
1114 1227
1115static int 1228static void
1116mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) 1229mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1117{ 1230{
1118 int ret = 0; 1231 mutex_unlock(&inode1->i_mutex);
1119 1232 mutex_unlock(&inode2->i_mutex);
1120 BUG_ON(inode1 == NULL && inode2 == NULL);
1121
1122 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1123 if (ret < 0)
1124 goto out;
1125
1126 if (inode1)
1127 mutex_unlock(&inode1->i_mutex);
1128
1129 if (inode2 && inode2 != inode1)
1130 mutex_unlock(&inode2->i_mutex);
1131
1132out:
1133 return ret;
1134} 1233}
1135 1234
1136/** 1235/**
@@ -1187,16 +1286,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1187 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; 1286 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1188 ext4_lblk_t rest_blocks; 1287 ext4_lblk_t rest_blocks;
1189 pgoff_t orig_page_offset = 0, seq_end_page; 1288 pgoff_t orig_page_offset = 0, seq_end_page;
1190 int ret1, ret2, depth, last_extent = 0; 1289 int ret, depth, last_extent = 0;
1191 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 1290 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1192 int data_offset_in_page; 1291 int data_offset_in_page;
1193 int block_len_in_page; 1292 int block_len_in_page;
1194 int uninit; 1293 int uninit;
1195 1294
1196 /* orig and donor should be different file */ 1295 if (orig_inode->i_sb != donor_inode->i_sb) {
1197 if (orig_inode->i_ino == donor_inode->i_ino) { 1296 ext4_debug("ext4 move extent: The argument files "
1297 "should be in same FS [ino:orig %lu, donor %lu]\n",
1298 orig_inode->i_ino, donor_inode->i_ino);
1299 return -EINVAL;
1300 }
1301
1302 /* orig and donor should be different inodes */
1303 if (orig_inode == donor_inode) {
1198 ext4_debug("ext4 move extent: The argument files should not " 1304 ext4_debug("ext4 move extent: The argument files should not "
1199 "be same file [ino:orig %lu, donor %lu]\n", 1305 "be same inode [ino:orig %lu, donor %lu]\n",
1200 orig_inode->i_ino, donor_inode->i_ino); 1306 orig_inode->i_ino, donor_inode->i_ino);
1201 return -EINVAL; 1307 return -EINVAL;
1202 } 1308 }
@@ -1208,18 +1314,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1208 orig_inode->i_ino, donor_inode->i_ino); 1314 orig_inode->i_ino, donor_inode->i_ino);
1209 return -EINVAL; 1315 return -EINVAL;
1210 } 1316 }
1211 1317 /* TODO: This is non obvious task to swap blocks for inodes with full
1318 jornaling enabled */
1319 if (ext4_should_journal_data(orig_inode) ||
1320 ext4_should_journal_data(donor_inode)) {
1321 return -EINVAL;
1322 }
1212 /* Protect orig and donor inodes against a truncate */ 1323 /* Protect orig and donor inodes against a truncate */
1213 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1324 mext_inode_double_lock(orig_inode, donor_inode);
1214 if (ret1 < 0) 1325
1215 return ret1; 1326 /* Wait for all existing dio workers */
1327 ext4_inode_block_unlocked_dio(orig_inode);
1328 ext4_inode_block_unlocked_dio(donor_inode);
1329 inode_dio_wait(orig_inode);
1330 inode_dio_wait(donor_inode);
1216 1331
1217 /* Protect extent tree against block allocations via delalloc */ 1332 /* Protect extent tree against block allocations via delalloc */
1218 double_down_write_data_sem(orig_inode, donor_inode); 1333 double_down_write_data_sem(orig_inode, donor_inode);
1219 /* Check the filesystem environment whether move_extent can be done */ 1334 /* Check the filesystem environment whether move_extent can be done */
1220 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1335 ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
1221 donor_start, &len); 1336 donor_start, &len);
1222 if (ret1) 1337 if (ret)
1223 goto out; 1338 goto out;
1224 1339
1225 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; 1340 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
@@ -1227,13 +1342,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1227 if (file_end < block_end) 1342 if (file_end < block_end)
1228 len -= block_end - file_end; 1343 len -= block_end - file_end;
1229 1344
1230 ret1 = get_ext_path(orig_inode, block_start, &orig_path); 1345 ret = get_ext_path(orig_inode, block_start, &orig_path);
1231 if (ret1) 1346 if (ret)
1232 goto out; 1347 goto out;
1233 1348
1234 /* Get path structure to check the hole */ 1349 /* Get path structure to check the hole */
1235 ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); 1350 ret = get_ext_path(orig_inode, block_start, &holecheck_path);
1236 if (ret1) 1351 if (ret)
1237 goto out; 1352 goto out;
1238 1353
1239 depth = ext_depth(orig_inode); 1354 depth = ext_depth(orig_inode);
@@ -1252,13 +1367,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1252 last_extent = mext_next_extent(orig_inode, 1367 last_extent = mext_next_extent(orig_inode,
1253 holecheck_path, &ext_cur); 1368 holecheck_path, &ext_cur);
1254 if (last_extent < 0) { 1369 if (last_extent < 0) {
1255 ret1 = last_extent; 1370 ret = last_extent;
1256 goto out; 1371 goto out;
1257 } 1372 }
1258 last_extent = mext_next_extent(orig_inode, orig_path, 1373 last_extent = mext_next_extent(orig_inode, orig_path,
1259 &ext_dummy); 1374 &ext_dummy);
1260 if (last_extent < 0) { 1375 if (last_extent < 0) {
1261 ret1 = last_extent; 1376 ret = last_extent;
1262 goto out; 1377 goto out;
1263 } 1378 }
1264 seq_start = le32_to_cpu(ext_cur->ee_block); 1379 seq_start = le32_to_cpu(ext_cur->ee_block);
@@ -1272,7 +1387,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1272 if (le32_to_cpu(ext_cur->ee_block) > block_end) { 1387 if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1273 ext4_debug("ext4 move extent: The specified range of file " 1388 ext4_debug("ext4 move extent: The specified range of file "
1274 "may be the hole\n"); 1389 "may be the hole\n");
1275 ret1 = -EINVAL; 1390 ret = -EINVAL;
1276 goto out; 1391 goto out;
1277 } 1392 }
1278 1393
@@ -1292,7 +1407,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1292 last_extent = mext_next_extent(orig_inode, holecheck_path, 1407 last_extent = mext_next_extent(orig_inode, holecheck_path,
1293 &ext_cur); 1408 &ext_cur);
1294 if (last_extent < 0) { 1409 if (last_extent < 0) {
1295 ret1 = last_extent; 1410 ret = last_extent;
1296 break; 1411 break;
1297 } 1412 }
1298 add_blocks = ext4_ext_get_actual_len(ext_cur); 1413 add_blocks = ext4_ext_get_actual_len(ext_cur);
@@ -1349,18 +1464,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1349 orig_page_offset, 1464 orig_page_offset,
1350 data_offset_in_page, 1465 data_offset_in_page,
1351 block_len_in_page, uninit, 1466 block_len_in_page, uninit,
1352 &ret1); 1467 &ret);
1353 1468
1354 /* Count how many blocks we have exchanged */ 1469 /* Count how many blocks we have exchanged */
1355 *moved_len += block_len_in_page; 1470 *moved_len += block_len_in_page;
1356 if (ret1 < 0) 1471 if (ret < 0)
1357 break; 1472 break;
1358 if (*moved_len > len) { 1473 if (*moved_len > len) {
1359 EXT4_ERROR_INODE(orig_inode, 1474 EXT4_ERROR_INODE(orig_inode,
1360 "We replaced blocks too much! " 1475 "We replaced blocks too much! "
1361 "sum of replaced: %llu requested: %llu", 1476 "sum of replaced: %llu requested: %llu",
1362 *moved_len, len); 1477 *moved_len, len);
1363 ret1 = -EIO; 1478 ret = -EIO;
1364 break; 1479 break;
1365 } 1480 }
1366 1481
@@ -1374,22 +1489,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1374 } 1489 }
1375 1490
1376 double_down_write_data_sem(orig_inode, donor_inode); 1491 double_down_write_data_sem(orig_inode, donor_inode);
1377 if (ret1 < 0) 1492 if (ret < 0)
1378 break; 1493 break;
1379 1494
1380 /* Decrease buffer counter */ 1495 /* Decrease buffer counter */
1381 if (holecheck_path) 1496 if (holecheck_path)
1382 ext4_ext_drop_refs(holecheck_path); 1497 ext4_ext_drop_refs(holecheck_path);
1383 ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); 1498 ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
1384 if (ret1) 1499 if (ret)
1385 break; 1500 break;
1386 depth = holecheck_path->p_depth; 1501 depth = holecheck_path->p_depth;
1387 1502
1388 /* Decrease buffer counter */ 1503 /* Decrease buffer counter */
1389 if (orig_path) 1504 if (orig_path)
1390 ext4_ext_drop_refs(orig_path); 1505 ext4_ext_drop_refs(orig_path);
1391 ret1 = get_ext_path(orig_inode, seq_start, &orig_path); 1506 ret = get_ext_path(orig_inode, seq_start, &orig_path);
1392 if (ret1) 1507 if (ret)
1393 break; 1508 break;
1394 1509
1395 ext_cur = holecheck_path[depth].p_ext; 1510 ext_cur = holecheck_path[depth].p_ext;
@@ -1412,12 +1527,9 @@ out:
1412 kfree(holecheck_path); 1527 kfree(holecheck_path);
1413 } 1528 }
1414 double_up_write_data_sem(orig_inode, donor_inode); 1529 double_up_write_data_sem(orig_inode, donor_inode);
1415 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1530 ext4_inode_resume_unlocked_dio(orig_inode);
1416 1531 ext4_inode_resume_unlocked_dio(donor_inode);
1417 if (ret1) 1532 mext_inode_double_unlock(orig_inode, donor_inode);
1418 return ret1;
1419 else if (ret2)
1420 return ret2;
1421 1533
1422 return 0; 1534 return ret;
1423} 1535}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a42cc04466f..6d600a69fc9d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle,
55{ 55{
56 struct buffer_head *bh; 56 struct buffer_head *bh;
57 57
58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 ((inode->i_size >> 10) >=
60 EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
61 *err = -ENOSPC;
62 return NULL;
63 }
64
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 65 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 66
60 bh = ext4_bread(handle, inode, *block, 1, err); 67 bh = ext4_bread(handle, inode, *block, 1, err);
@@ -67,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle,
67 bh = NULL; 74 bh = NULL;
68 } 75 }
69 } 76 }
77 if (!bh && !(*err)) {
78 *err = -EIO;
79 ext4_error(inode->i_sb,
80 "Directory hole detected on inode %lu\n",
81 inode->i_ino);
82 }
70 return bh; 83 return bh;
71} 84}
72 85
@@ -594,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
594 u32 hash; 607 u32 hash;
595 608
596 frame->bh = NULL; 609 frame->bh = NULL;
597 if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) 610 if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
611 if (*err == 0)
612 *err = ERR_BAD_DX_DIR;
598 goto fail; 613 goto fail;
614 }
599 root = (struct dx_root *) bh->b_data; 615 root = (struct dx_root *) bh->b_data;
600 if (root->info.hash_version != DX_HASH_TEA && 616 if (root->info.hash_version != DX_HASH_TEA &&
601 root->info.hash_version != DX_HASH_HALF_MD4 && 617 root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -696,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
696 frame->entries = entries; 712 frame->entries = entries;
697 frame->at = at; 713 frame->at = at;
698 if (!indirect--) return frame; 714 if (!indirect--) return frame;
699 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) 715 if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
716 if (!(*err))
717 *err = ERR_BAD_DX_DIR;
700 goto fail2; 718 goto fail2;
719 }
701 at = entries = ((struct dx_node *) bh->b_data)->entries; 720 at = entries = ((struct dx_node *) bh->b_data)->entries;
702 721
703 if (!buffer_verified(bh) && 722 if (!buffer_verified(bh) &&
@@ -807,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
807 */ 826 */
808 while (num_frames--) { 827 while (num_frames--) {
809 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 828 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
810 0, &err))) 829 0, &err))) {
830 if (!err) {
831 ext4_error(dir->i_sb,
832 "Directory hole detected on inode %lu\n",
833 dir->i_ino);
834 return -EIO;
835 }
811 return err; /* Failure */ 836 return err; /* Failure */
837 }
812 838
813 if (!buffer_verified(bh) && 839 if (!buffer_verified(bh) &&
814 !ext4_dx_csum_verify(dir, 840 !ext4_dx_csum_verify(dir,
@@ -839,12 +865,19 @@ static int htree_dirblock_to_tree(struct file *dir_file,
839{ 865{
840 struct buffer_head *bh; 866 struct buffer_head *bh;
841 struct ext4_dir_entry_2 *de, *top; 867 struct ext4_dir_entry_2 *de, *top;
842 int err, count = 0; 868 int err = 0, count = 0;
843 869
844 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", 870 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
845 (unsigned long)block)); 871 (unsigned long)block));
846 if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) 872 if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
873 if (!err) {
874 err = -EIO;
875 ext4_error(dir->i_sb,
876 "Directory hole detected on inode %lu\n",
877 dir->i_ino);
878 }
847 return err; 879 return err;
880 }
848 881
849 if (!buffer_verified(bh) && 882 if (!buffer_verified(bh) &&
850 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) 883 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
@@ -1267,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1267 return NULL; 1300 return NULL;
1268 do { 1301 do {
1269 block = dx_get_block(frame->at); 1302 block = dx_get_block(frame->at);
1270 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) 1303 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
1304 if (!(*err)) {
1305 *err = -EIO;
1306 ext4_error(dir->i_sb,
1307 "Directory hole detected on inode %lu\n",
1308 dir->i_ino);
1309 }
1271 goto errout; 1310 goto errout;
1311 }
1272 1312
1273 if (!buffer_verified(bh) && 1313 if (!buffer_verified(bh) &&
1274 !ext4_dirent_csum_verify(dir, 1314 !ext4_dirent_csum_verify(dir,
@@ -1801,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1801 } 1841 }
1802 blocks = dir->i_size >> sb->s_blocksize_bits; 1842 blocks = dir->i_size >> sb->s_blocksize_bits;
1803 for (block = 0; block < blocks; block++) { 1843 for (block = 0; block < blocks; block++) {
1804 bh = ext4_bread(handle, dir, block, 0, &retval); 1844 if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) {
1805 if(!bh) 1845 if (!retval) {
1846 retval = -EIO;
1847 ext4_error(inode->i_sb,
1848 "Directory hole detected on inode %lu\n",
1849 inode->i_ino);
1850 }
1806 return retval; 1851 return retval;
1852 }
1807 if (!buffer_verified(bh) && 1853 if (!buffer_verified(bh) &&
1808 !ext4_dirent_csum_verify(dir, 1854 !ext4_dirent_csum_verify(dir,
1809 (struct ext4_dir_entry *)bh->b_data)) 1855 (struct ext4_dir_entry *)bh->b_data))
@@ -1860,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1860 entries = frame->entries; 1906 entries = frame->entries;
1861 at = frame->at; 1907 at = frame->at;
1862 1908
1863 if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) 1909 if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) {
1910 if (!err) {
1911 err = -EIO;
1912 ext4_error(dir->i_sb,
1913 "Directory hole detected on inode %lu\n",
1914 dir->i_ino);
1915 }
1864 goto cleanup; 1916 goto cleanup;
1917 }
1865 1918
1866 if (!buffer_verified(bh) && 1919 if (!buffer_verified(bh) &&
1867 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) 1920 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
@@ -2149,9 +2202,7 @@ retry:
2149 err = PTR_ERR(inode); 2202 err = PTR_ERR(inode);
2150 if (!IS_ERR(inode)) { 2203 if (!IS_ERR(inode)) {
2151 init_special_inode(inode, inode->i_mode, rdev); 2204 init_special_inode(inode, inode->i_mode, rdev);
2152#ifdef CONFIG_EXT4_FS_XATTR
2153 inode->i_op = &ext4_special_inode_operations; 2205 inode->i_op = &ext4_special_inode_operations;
2154#endif
2155 err = ext4_add_nondir(handle, dentry, inode); 2206 err = ext4_add_nondir(handle, dentry, inode);
2156 } 2207 }
2157 ext4_journal_stop(handle); 2208 ext4_journal_stop(handle);
@@ -2199,9 +2250,15 @@ retry:
2199 inode->i_op = &ext4_dir_inode_operations; 2250 inode->i_op = &ext4_dir_inode_operations;
2200 inode->i_fop = &ext4_dir_operations; 2251 inode->i_fop = &ext4_dir_operations;
2201 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 2252 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
2202 dir_block = ext4_bread(handle, inode, 0, 1, &err); 2253 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
2203 if (!dir_block) 2254 if (!err) {
2255 err = -EIO;
2256 ext4_error(inode->i_sb,
2257 "Directory hole detected on inode %lu\n",
2258 inode->i_ino);
2259 }
2204 goto out_clear_inode; 2260 goto out_clear_inode;
2261 }
2205 BUFFER_TRACE(dir_block, "get_write_access"); 2262 BUFFER_TRACE(dir_block, "get_write_access");
2206 err = ext4_journal_get_write_access(handle, dir_block); 2263 err = ext4_journal_get_write_access(handle, dir_block);
2207 if (err) 2264 if (err)
@@ -2318,6 +2375,11 @@ static int empty_dir(struct inode *inode)
2318 EXT4_ERROR_INODE(inode, 2375 EXT4_ERROR_INODE(inode,
2319 "error %d reading directory " 2376 "error %d reading directory "
2320 "lblock %u", err, lblock); 2377 "lblock %u", err, lblock);
2378 else
2379 ext4_warning(inode->i_sb,
2380 "bad directory (dir #%lu) - no data block",
2381 inode->i_ino);
2382
2321 offset += sb->s_blocksize; 2383 offset += sb->s_blocksize;
2322 continue; 2384 continue;
2323 } 2385 }
@@ -2362,7 +2424,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2362 struct ext4_iloc iloc; 2424 struct ext4_iloc iloc;
2363 int err = 0, rc; 2425 int err = 0, rc;
2364 2426
2365 if (!ext4_handle_valid(handle)) 2427 if (!EXT4_SB(sb)->s_journal)
2366 return 0; 2428 return 0;
2367 2429
2368 mutex_lock(&EXT4_SB(sb)->s_orphan_lock); 2430 mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
@@ -2436,8 +2498,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2436 struct ext4_iloc iloc; 2498 struct ext4_iloc iloc;
2437 int err = 0; 2499 int err = 0;
2438 2500
2439 /* ext4_handle_valid() assumes a valid handle_t pointer */ 2501 if (!EXT4_SB(inode->i_sb)->s_journal)
2440 if (handle && !ext4_handle_valid(handle))
2441 return 0; 2502 return 0;
2442 2503
2443 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); 2504 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
@@ -2456,7 +2517,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2456 * transaction handle with which to update the orphan list on 2517 * transaction handle with which to update the orphan list on
2457 * disk, but we still need to remove the inode from the linked 2518 * disk, but we still need to remove the inode from the linked
2458 * list in memory. */ 2519 * list in memory. */
2459 if (sbi->s_journal && !handle) 2520 if (!handle)
2460 goto out; 2521 goto out;
2461 2522
2462 err = ext4_reserve_inode_write(handle, inode, &iloc); 2523 err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2826,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2826 goto end_rename; 2887 goto end_rename;
2827 } 2888 }
2828 retval = -EIO; 2889 retval = -EIO;
2829 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); 2890 if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) {
2830 if (!dir_bh) 2891 if (!retval) {
2892 retval = -EIO;
2893 ext4_error(old_inode->i_sb,
2894 "Directory hole detected on inode %lu\n",
2895 old_inode->i_ino);
2896 }
2831 goto end_rename; 2897 goto end_rename;
2898 }
2832 if (!buffer_verified(dir_bh) && 2899 if (!buffer_verified(dir_bh) &&
2833 !ext4_dirent_csum_verify(old_inode, 2900 !ext4_dirent_csum_verify(old_inode,
2834 (struct ext4_dir_entry *)dir_bh->b_data)) 2901 (struct ext4_dir_entry *)dir_bh->b_data))
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index dcdeef169a69..68e896e12a67 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -71,6 +71,9 @@ void ext4_free_io_end(ext4_io_end_t *io)
71 int i; 71 int i;
72 72
73 BUG_ON(!io); 73 BUG_ON(!io);
74 BUG_ON(!list_empty(&io->list));
75 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
76
74 if (io->page) 77 if (io->page)
75 put_page(io->page); 78 put_page(io->page);
76 for (i = 0; i < io->num_io_pages; i++) 79 for (i = 0; i < io->num_io_pages; i++)
@@ -81,13 +84,8 @@ void ext4_free_io_end(ext4_io_end_t *io)
81 kmem_cache_free(io_end_cachep, io); 84 kmem_cache_free(io_end_cachep, io);
82} 85}
83 86
84/* 87/* check a range of space and convert unwritten extents to written. */
85 * check a range of space and convert unwritten extents to written. 88static int ext4_end_io(ext4_io_end_t *io)
86 *
87 * Called with inode->i_mutex; we depend on this when we manipulate
88 * io->flag, since we could otherwise race with ext4_flush_completed_IO()
89 */
90int ext4_end_io_nolock(ext4_io_end_t *io)
91{ 89{
92 struct inode *inode = io->inode; 90 struct inode *inode = io->inode;
93 loff_t offset = io->offset; 91 loff_t offset = io->offset;
@@ -106,63 +104,136 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
106 "(inode %lu, offset %llu, size %zd, error %d)", 104 "(inode %lu, offset %llu, size %zd, error %d)",
107 inode->i_ino, offset, size, ret); 105 inode->i_ino, offset, size, ret);
108 } 106 }
109
110 if (io->iocb) 107 if (io->iocb)
111 aio_complete(io->iocb, io->result, 0); 108 aio_complete(io->iocb, io->result, 0);
112 109
113 if (io->flag & EXT4_IO_END_DIRECT) 110 if (io->flag & EXT4_IO_END_DIRECT)
114 inode_dio_done(inode); 111 inode_dio_done(inode);
115 /* Wake up anyone waiting on unwritten extent conversion */ 112 /* Wake up anyone waiting on unwritten extent conversion */
116 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) 113 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
117 wake_up_all(ext4_ioend_wq(io->inode)); 114 wake_up_all(ext4_ioend_wq(io->inode));
118 return ret; 115 return ret;
119} 116}
120 117
121/* 118static void dump_completed_IO(struct inode *inode)
122 * work on completed aio dio IO, to convert unwritten extents to extents 119{
123 */ 120#ifdef EXT4FS_DEBUG
124static void ext4_end_io_work(struct work_struct *work) 121 struct list_head *cur, *before, *after;
122 ext4_io_end_t *io, *io0, *io1;
123 unsigned long flags;
124
125 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
126 ext4_debug("inode %lu completed_io list is empty\n",
127 inode->i_ino);
128 return;
129 }
130
131 ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino);
132 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) {
133 cur = &io->list;
134 before = cur->prev;
135 io0 = container_of(before, ext4_io_end_t, list);
136 after = cur->next;
137 io1 = container_of(after, ext4_io_end_t, list);
138
139 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
140 io, inode->i_ino, io0, io1);
141 }
142#endif
143}
144
145/* Add the io_end to per-inode completed end_io list. */
146void ext4_add_complete_io(ext4_io_end_t *io_end)
125{ 147{
126 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 148 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
127 struct inode *inode = io->inode; 149 struct workqueue_struct *wq;
128 struct ext4_inode_info *ei = EXT4_I(inode); 150 unsigned long flags;
129 unsigned long flags; 151
152 BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
153 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
130 154
131 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 155 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
132 if (io->flag & EXT4_IO_END_IN_FSYNC) 156 if (list_empty(&ei->i_completed_io_list)) {
133 goto requeue; 157 io_end->flag |= EXT4_IO_END_QUEUED;
134 if (list_empty(&io->list)) { 158 queue_work(wq, &io_end->work);
135 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
136 goto free;
137 } 159 }
160 list_add_tail(&io_end->list, &ei->i_completed_io_list);
161 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
162}
138 163
139 if (!mutex_trylock(&inode->i_mutex)) { 164static int ext4_do_flush_completed_IO(struct inode *inode,
140 bool was_queued; 165 ext4_io_end_t *work_io)
141requeue: 166{
142 was_queued = !!(io->flag & EXT4_IO_END_QUEUED); 167 ext4_io_end_t *io;
143 io->flag |= EXT4_IO_END_QUEUED; 168 struct list_head unwritten, complete, to_free;
144 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 169 unsigned long flags;
145 /* 170 struct ext4_inode_info *ei = EXT4_I(inode);
146 * Requeue the work instead of waiting so that the work 171 int err, ret = 0;
147 * items queued after this can be processed. 172
148 */ 173 INIT_LIST_HEAD(&complete);
149 queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); 174 INIT_LIST_HEAD(&to_free);
150 /* 175
151 * To prevent the ext4-dio-unwritten thread from keeping 176 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
152 * requeueing end_io requests and occupying cpu for too long, 177 dump_completed_IO(inode);
153 * yield the cpu if it sees an end_io request that has already 178 list_replace_init(&ei->i_completed_io_list, &unwritten);
154 * been requeued. 179 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
155 */ 180
156 if (was_queued) 181 while (!list_empty(&unwritten)) {
157 yield(); 182 io = list_entry(unwritten.next, ext4_io_end_t, list);
158 return; 183 BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
184 list_del_init(&io->list);
185
186 err = ext4_end_io(io);
187 if (unlikely(!ret && err))
188 ret = err;
189
190 list_add_tail(&io->list, &complete);
191 }
192 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
193 while (!list_empty(&complete)) {
194 io = list_entry(complete.next, ext4_io_end_t, list);
195 io->flag &= ~EXT4_IO_END_UNWRITTEN;
196 /* end_io context can not be destroyed now because it still
197 * used by queued worker. Worker thread will destroy it later */
198 if (io->flag & EXT4_IO_END_QUEUED)
199 list_del_init(&io->list);
200 else
201 list_move(&io->list, &to_free);
202 }
203 /* If we are called from worker context, it is time to clear queued
204 * flag, and destroy it's end_io if it was converted already */
205 if (work_io) {
206 work_io->flag &= ~EXT4_IO_END_QUEUED;
207 if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
208 list_add_tail(&work_io->list, &to_free);
159 } 209 }
160 list_del_init(&io->list);
161 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 210 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
162 (void) ext4_end_io_nolock(io); 211
163 mutex_unlock(&inode->i_mutex); 212 while (!list_empty(&to_free)) {
164free: 213 io = list_entry(to_free.next, ext4_io_end_t, list);
165 ext4_free_io_end(io); 214 list_del_init(&io->list);
215 ext4_free_io_end(io);
216 }
217 return ret;
218}
219
220/*
221 * work on completed aio dio IO, to convert unwritten extents to extents
222 */
223static void ext4_end_io_work(struct work_struct *work)
224{
225 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
226 ext4_do_flush_completed_IO(io->inode, io);
227}
228
229int ext4_flush_unwritten_io(struct inode *inode)
230{
231 int ret;
232 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
233 !(inode->i_state & I_FREEING));
234 ret = ext4_do_flush_completed_IO(inode, NULL);
235 ext4_unwritten_wait(inode);
236 return ret;
166} 237}
167 238
168ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 239ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
@@ -195,9 +266,7 @@ static void buffer_io_error(struct buffer_head *bh)
195static void ext4_end_bio(struct bio *bio, int error) 266static void ext4_end_bio(struct bio *bio, int error)
196{ 267{
197 ext4_io_end_t *io_end = bio->bi_private; 268 ext4_io_end_t *io_end = bio->bi_private;
198 struct workqueue_struct *wq;
199 struct inode *inode; 269 struct inode *inode;
200 unsigned long flags;
201 int i; 270 int i;
202 sector_t bi_sector = bio->bi_sector; 271 sector_t bi_sector = bio->bi_sector;
203 272
@@ -255,14 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
255 return; 324 return;
256 } 325 }
257 326
258 /* Add the io_end to per-inode completed io list*/ 327 ext4_add_complete_io(io_end);
259 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
260 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
261 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
262
263 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
264 /* queue the work to convert unwritten extents to written */
265 queue_work(wq, &io_end->work);
266} 328}
267 329
268void ext4_io_submit(struct ext4_io_submit *io) 330void ext4_io_submit(struct ext4_io_submit *io)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 41f6ef68e2e1..7a75e1086961 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb)
45 smp_mb__after_clear_bit(); 45 smp_mb__after_clear_bit();
46} 46}
47 47
48static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
49 ext4_group_t group) {
50 return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
51 EXT4_DESC_PER_BLOCK_BITS(sb);
52}
53
54static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
55 ext4_group_t group) {
56 group = ext4_meta_bg_first_group(sb, group);
57 return ext4_group_first_block_no(sb, group);
58}
59
60static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
61 ext4_group_t group) {
62 ext4_grpblk_t overhead;
63 overhead = ext4_bg_num_gdb(sb, group);
64 if (ext4_bg_has_super(sb, group))
65 overhead += 1 +
66 le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
67 return overhead;
68}
69
48#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 70#define outside(b, first, last) ((b) < (first) || (b) >= (last))
49#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 71#define inside(b, first, last) ((b) >= (first) && (b) < (last))
50 72
@@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb,
57 ext4_fsblk_t end = start + input->blocks_count; 79 ext4_fsblk_t end = start + input->blocks_count;
58 ext4_group_t group = input->group; 80 ext4_group_t group = input->group;
59 ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; 81 ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
60 unsigned overhead = ext4_bg_has_super(sb, group) ? 82 unsigned overhead = ext4_group_overhead_blocks(sb, group);
61 (1 + ext4_bg_num_gdb(sb, group) +
62 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
63 ext4_fsblk_t metaend = start + overhead; 83 ext4_fsblk_t metaend = start + overhead;
64 struct buffer_head *bh = NULL; 84 struct buffer_head *bh = NULL;
65 ext4_grpblk_t free_blocks_count, offset; 85 ext4_grpblk_t free_blocks_count, offset;
@@ -200,13 +220,15 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
200 * be a partial of a flex group. 220 * be a partial of a flex group.
201 * 221 *
202 * @sb: super block of fs to which the groups belongs 222 * @sb: super block of fs to which the groups belongs
223 *
224 * Returns 0 on a successful allocation of the metadata blocks in the
225 * block group.
203 */ 226 */
204static void ext4_alloc_group_tables(struct super_block *sb, 227static int ext4_alloc_group_tables(struct super_block *sb,
205 struct ext4_new_flex_group_data *flex_gd, 228 struct ext4_new_flex_group_data *flex_gd,
206 int flexbg_size) 229 int flexbg_size)
207{ 230{
208 struct ext4_new_group_data *group_data = flex_gd->groups; 231 struct ext4_new_group_data *group_data = flex_gd->groups;
209 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
210 ext4_fsblk_t start_blk; 232 ext4_fsblk_t start_blk;
211 ext4_fsblk_t last_blk; 233 ext4_fsblk_t last_blk;
212 ext4_group_t src_group; 234 ext4_group_t src_group;
@@ -226,23 +248,24 @@ static void ext4_alloc_group_tables(struct super_block *sb,
226 (last_group & ~(flexbg_size - 1)))); 248 (last_group & ~(flexbg_size - 1))));
227next_group: 249next_group:
228 group = group_data[0].group; 250 group = group_data[0].group;
251 if (src_group >= group_data[0].group + flex_gd->count)
252 return -ENOSPC;
229 start_blk = ext4_group_first_block_no(sb, src_group); 253 start_blk = ext4_group_first_block_no(sb, src_group);
230 last_blk = start_blk + group_data[src_group - group].blocks_count; 254 last_blk = start_blk + group_data[src_group - group].blocks_count;
231 255
232 overhead = ext4_bg_has_super(sb, src_group) ? 256 overhead = ext4_group_overhead_blocks(sb, src_group);
233 (1 + ext4_bg_num_gdb(sb, src_group) +
234 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
235 257
236 start_blk += overhead; 258 start_blk += overhead;
237 259
238 BUG_ON(src_group >= group_data[0].group + flex_gd->count);
239 /* We collect contiguous blocks as much as possible. */ 260 /* We collect contiguous blocks as much as possible. */
240 src_group++; 261 src_group++;
241 for (; src_group <= last_group; src_group++) 262 for (; src_group <= last_group; src_group++) {
242 if (!ext4_bg_has_super(sb, src_group)) 263 overhead = ext4_group_overhead_blocks(sb, src_group);
264 if (overhead != 0)
243 last_blk += group_data[src_group - group].blocks_count; 265 last_blk += group_data[src_group - group].blocks_count;
244 else 266 else
245 break; 267 break;
268 }
246 269
247 /* Allocate block bitmaps */ 270 /* Allocate block bitmaps */
248 for (; bb_index < flex_gd->count; bb_index++) { 271 for (; bb_index < flex_gd->count; bb_index++) {
@@ -300,6 +323,7 @@ next_group:
300 group_data[i].free_blocks_count); 323 group_data[i].free_blocks_count);
301 } 324 }
302 } 325 }
326 return 0;
303} 327}
304 328
305static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 329static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
@@ -433,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
433 ext4_group_t group, count; 457 ext4_group_t group, count;
434 struct buffer_head *bh = NULL; 458 struct buffer_head *bh = NULL;
435 int reserved_gdb, i, j, err = 0, err2; 459 int reserved_gdb, i, j, err = 0, err2;
460 int meta_bg;
436 461
437 BUG_ON(!flex_gd->count || !group_data || 462 BUG_ON(!flex_gd->count || !group_data ||
438 group_data[0].group != sbi->s_groups_count); 463 group_data[0].group != sbi->s_groups_count);
439 464
440 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); 465 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
466 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
441 467
442 /* This transaction may be extended/restarted along the way */ 468 /* This transaction may be extended/restarted along the way */
443 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 469 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
@@ -447,12 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
447 group = group_data[0].group; 473 group = group_data[0].group;
448 for (i = 0; i < flex_gd->count; i++, group++) { 474 for (i = 0; i < flex_gd->count; i++, group++) {
449 unsigned long gdblocks; 475 unsigned long gdblocks;
476 ext4_grpblk_t overhead;
450 477
451 gdblocks = ext4_bg_num_gdb(sb, group); 478 gdblocks = ext4_bg_num_gdb(sb, group);
452 start = ext4_group_first_block_no(sb, group); 479 start = ext4_group_first_block_no(sb, group);
453 480
481 if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
482 goto handle_itb;
483
484 if (meta_bg == 1) {
485 ext4_group_t first_group;
486 first_group = ext4_meta_bg_first_group(sb, group);
487 if (first_group != group + 1 &&
488 first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
489 goto handle_itb;
490 }
491
492 block = start + ext4_bg_has_super(sb, group);
454 /* Copy all of the GDT blocks into the backup in this group */ 493 /* Copy all of the GDT blocks into the backup in this group */
455 for (j = 0, block = start + 1; j < gdblocks; j++, block++) { 494 for (j = 0; j < gdblocks; j++, block++) {
456 struct buffer_head *gdb; 495 struct buffer_head *gdb;
457 496
458 ext4_debug("update backup group %#04llx\n", block); 497 ext4_debug("update backup group %#04llx\n", block);
@@ -493,6 +532,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
493 goto out; 532 goto out;
494 } 533 }
495 534
535handle_itb:
496 /* Initialize group tables of the grop @group */ 536 /* Initialize group tables of the grop @group */
497 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) 537 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
498 goto handle_bb; 538 goto handle_bb;
@@ -521,11 +561,11 @@ handle_bb:
521 err = PTR_ERR(bh); 561 err = PTR_ERR(bh);
522 goto out; 562 goto out;
523 } 563 }
524 if (ext4_bg_has_super(sb, group)) { 564 overhead = ext4_group_overhead_blocks(sb, group);
565 if (overhead != 0) {
525 ext4_debug("mark backup superblock %#04llx (+0)\n", 566 ext4_debug("mark backup superblock %#04llx (+0)\n",
526 start); 567 start);
527 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 568 ext4_set_bits(bh->b_data, 0, overhead);
528 1);
529 } 569 }
530 ext4_mark_bitmap_end(group_data[i].blocks_count, 570 ext4_mark_bitmap_end(group_data[i].blocks_count,
531 sb->s_blocksize * 8, bh->b_data); 571 sb->s_blocksize * 8, bh->b_data);
@@ -822,6 +862,45 @@ exit_bh:
822} 862}
823 863
824/* 864/*
865 * add_new_gdb_meta_bg is the sister of add_new_gdb.
866 */
867static int add_new_gdb_meta_bg(struct super_block *sb,
868 handle_t *handle, ext4_group_t group) {
869 ext4_fsblk_t gdblock;
870 struct buffer_head *gdb_bh;
871 struct buffer_head **o_group_desc, **n_group_desc;
872 unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
873 int err;
874
875 gdblock = ext4_meta_bg_first_block_no(sb, group) +
876 ext4_bg_has_super(sb, group);
877 gdb_bh = sb_bread(sb, gdblock);
878 if (!gdb_bh)
879 return -EIO;
880 n_group_desc = ext4_kvmalloc((gdb_num + 1) *
881 sizeof(struct buffer_head *),
882 GFP_NOFS);
883 if (!n_group_desc) {
884 err = -ENOMEM;
885 ext4_warning(sb, "not enough memory for %lu groups",
886 gdb_num + 1);
887 return err;
888 }
889
890 o_group_desc = EXT4_SB(sb)->s_group_desc;
891 memcpy(n_group_desc, o_group_desc,
892 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
893 n_group_desc[gdb_num] = gdb_bh;
894 EXT4_SB(sb)->s_group_desc = n_group_desc;
895 EXT4_SB(sb)->s_gdb_count++;
896 ext4_kvfree(o_group_desc);
897 err = ext4_journal_get_write_access(handle, gdb_bh);
898 if (unlikely(err))
899 brelse(gdb_bh);
900 return err;
901}
902
903/*
825 * Called when we are adding a new group which has a backup copy of each of 904 * Called when we are adding a new group which has a backup copy of each of
826 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. 905 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
827 * We need to add these reserved backup GDT blocks to the resize inode, so 906 * We need to add these reserved backup GDT blocks to the resize inode, so
@@ -949,16 +1028,16 @@ exit_free:
949 * do not copy the full number of backups at this time. The resize 1028 * do not copy the full number of backups at this time. The resize
950 * which changed s_groups_count will backup again. 1029 * which changed s_groups_count will backup again.
951 */ 1030 */
952static void update_backups(struct super_block *sb, 1031static void update_backups(struct super_block *sb, int blk_off, char *data,
953 int blk_off, char *data, int size) 1032 int size, int meta_bg)
954{ 1033{
955 struct ext4_sb_info *sbi = EXT4_SB(sb); 1034 struct ext4_sb_info *sbi = EXT4_SB(sb);
956 const ext4_group_t last = sbi->s_groups_count; 1035 ext4_group_t last;
957 const int bpg = EXT4_BLOCKS_PER_GROUP(sb); 1036 const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
958 unsigned three = 1; 1037 unsigned three = 1;
959 unsigned five = 5; 1038 unsigned five = 5;
960 unsigned seven = 7; 1039 unsigned seven = 7;
961 ext4_group_t group; 1040 ext4_group_t group = 0;
962 int rest = sb->s_blocksize - size; 1041 int rest = sb->s_blocksize - size;
963 handle_t *handle; 1042 handle_t *handle;
964 int err = 0, err2; 1043 int err = 0, err2;
@@ -970,10 +1049,17 @@ static void update_backups(struct super_block *sb,
970 goto exit_err; 1049 goto exit_err;
971 } 1050 }
972 1051
973 ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); 1052 if (meta_bg == 0) {
1053 group = ext4_list_backups(sb, &three, &five, &seven);
1054 last = sbi->s_groups_count;
1055 } else {
1056 group = ext4_meta_bg_first_group(sb, group) + 1;
1057 last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
1058 }
974 1059
975 while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { 1060 while (group < sbi->s_groups_count) {
976 struct buffer_head *bh; 1061 struct buffer_head *bh;
1062 ext4_fsblk_t backup_block;
977 1063
978 /* Out of journal space, and can't get more - abort - so sad */ 1064 /* Out of journal space, and can't get more - abort - so sad */
979 if (ext4_handle_valid(handle) && 1065 if (ext4_handle_valid(handle) &&
@@ -982,13 +1068,20 @@ static void update_backups(struct super_block *sb,
982 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 1068 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
983 break; 1069 break;
984 1070
985 bh = sb_getblk(sb, group * bpg + blk_off); 1071 if (meta_bg == 0)
1072 backup_block = group * bpg + blk_off;
1073 else
1074 backup_block = (ext4_group_first_block_no(sb, group) +
1075 ext4_bg_has_super(sb, group));
1076
1077 bh = sb_getblk(sb, backup_block);
986 if (!bh) { 1078 if (!bh) {
987 err = -EIO; 1079 err = -EIO;
988 break; 1080 break;
989 } 1081 }
990 ext4_debug("update metadata backup %#04lx\n", 1082 ext4_debug("update metadata backup %llu(+%llu)\n",
991 (unsigned long)bh->b_blocknr); 1083 backup_block, backup_block -
1084 ext4_group_first_block_no(sb, group));
992 if ((err = ext4_journal_get_write_access(handle, bh))) 1085 if ((err = ext4_journal_get_write_access(handle, bh)))
993 break; 1086 break;
994 lock_buffer(bh); 1087 lock_buffer(bh);
@@ -1001,6 +1094,13 @@ static void update_backups(struct super_block *sb,
1001 if (unlikely(err)) 1094 if (unlikely(err))
1002 ext4_std_error(sb, err); 1095 ext4_std_error(sb, err);
1003 brelse(bh); 1096 brelse(bh);
1097
1098 if (meta_bg == 0)
1099 group = ext4_list_backups(sb, &three, &five, &seven);
1100 else if (group == last)
1101 break;
1102 else
1103 group = last;
1004 } 1104 }
1005 if ((err2 = ext4_journal_stop(handle)) && !err) 1105 if ((err2 = ext4_journal_stop(handle)) && !err)
1006 err = err2; 1106 err = err2;
@@ -1043,7 +1143,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1043 struct ext4_super_block *es = sbi->s_es; 1143 struct ext4_super_block *es = sbi->s_es;
1044 struct buffer_head *gdb_bh; 1144 struct buffer_head *gdb_bh;
1045 int i, gdb_off, gdb_num, err = 0; 1145 int i, gdb_off, gdb_num, err = 0;
1146 int meta_bg;
1046 1147
1148 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
1047 for (i = 0; i < count; i++, group++) { 1149 for (i = 0; i < count; i++, group++) {
1048 int reserved_gdb = ext4_bg_has_super(sb, group) ? 1150 int reserved_gdb = ext4_bg_has_super(sb, group) ?
1049 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1151 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
@@ -1063,8 +1165,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1063 1165
1064 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) 1166 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
1065 err = reserve_backup_gdb(handle, resize_inode, group); 1167 err = reserve_backup_gdb(handle, resize_inode, group);
1066 } else 1168 } else if (meta_bg != 0) {
1169 err = add_new_gdb_meta_bg(sb, handle, group);
1170 } else {
1067 err = add_new_gdb(handle, resize_inode, group); 1171 err = add_new_gdb(handle, resize_inode, group);
1172 }
1068 if (err) 1173 if (err)
1069 break; 1174 break;
1070 } 1175 }
@@ -1076,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
1076 struct buffer_head *bh = sb_getblk(sb, block); 1181 struct buffer_head *bh = sb_getblk(sb, block);
1077 if (!bh) 1182 if (!bh)
1078 return NULL; 1183 return NULL;
1079 1184 if (!bh_uptodate_or_lock(bh)) {
1080 if (bitmap_uptodate(bh)) 1185 if (bh_submit_read(bh) < 0) {
1081 return bh; 1186 brelse(bh);
1082 1187 return NULL;
1083 lock_buffer(bh); 1188 }
1084 if (bh_submit_read(bh) < 0) {
1085 unlock_buffer(bh);
1086 brelse(bh);
1087 return NULL;
1088 } 1189 }
1089 unlock_buffer(bh);
1090 1190
1091 return bh; 1191 return bh;
1092} 1192}
@@ -1161,6 +1261,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
1161 ext4_free_group_clusters_set(sb, gdp, 1261 ext4_free_group_clusters_set(sb, gdp,
1162 EXT4_B2C(sbi, group_data->free_blocks_count)); 1262 EXT4_B2C(sbi, group_data->free_blocks_count));
1163 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1263 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
1264 if (ext4_has_group_desc_csum(sb))
1265 ext4_itable_unused_set(sb, gdp,
1266 EXT4_INODES_PER_GROUP(sb));
1164 gdp->bg_flags = cpu_to_le16(*bg_flags); 1267 gdp->bg_flags = cpu_to_le16(*bg_flags);
1165 ext4_group_desc_csum_set(sb, group, gdp); 1268 ext4_group_desc_csum_set(sb, group, gdp);
1166 1269
@@ -1216,7 +1319,7 @@ static void ext4_update_super(struct super_block *sb,
1216 } 1319 }
1217 1320
1218 reserved_blocks = ext4_r_blocks_count(es) * 100; 1321 reserved_blocks = ext4_r_blocks_count(es) * 100;
1219 do_div(reserved_blocks, ext4_blocks_count(es)); 1322 reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es));
1220 reserved_blocks *= blocks_count; 1323 reserved_blocks *= blocks_count;
1221 do_div(reserved_blocks, 100); 1324 do_div(reserved_blocks, 100);
1222 1325
@@ -1227,6 +1330,7 @@ static void ext4_update_super(struct super_block *sb,
1227 le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1330 le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1228 flex_gd->count); 1331 flex_gd->count);
1229 1332
1333 ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
1230 /* 1334 /*
1231 * We need to protect s_groups_count against other CPUs seeing 1335 * We need to protect s_groups_count against other CPUs seeing
1232 * inconsistent state in the superblock. 1336 * inconsistent state in the superblock.
@@ -1261,6 +1365,8 @@ static void ext4_update_super(struct super_block *sb,
1261 percpu_counter_add(&sbi->s_freeinodes_counter, 1365 percpu_counter_add(&sbi->s_freeinodes_counter,
1262 EXT4_INODES_PER_GROUP(sb) * flex_gd->count); 1366 EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
1263 1367
1368 ext4_debug("free blocks count %llu",
1369 percpu_counter_read(&sbi->s_freeclusters_counter));
1264 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 1370 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1265 EXT4_FEATURE_INCOMPAT_FLEX_BG) && 1371 EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1266 sbi->s_log_groups_per_flex) { 1372 sbi->s_log_groups_per_flex) {
@@ -1349,16 +1455,24 @@ exit_journal:
1349 err = err2; 1455 err = err2;
1350 1456
1351 if (!err) { 1457 if (!err) {
1352 int i; 1458 int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1459 int gdb_num_end = ((group + flex_gd->count - 1) /
1460 EXT4_DESC_PER_BLOCK(sb));
1461 int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb,
1462 EXT4_FEATURE_INCOMPAT_META_BG);
1463 sector_t old_gdb = 0;
1464
1353 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1465 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
1354 sizeof(struct ext4_super_block)); 1466 sizeof(struct ext4_super_block), 0);
1355 for (i = 0; i < flex_gd->count; i++, group++) { 1467 for (; gdb_num <= gdb_num_end; gdb_num++) {
1356 struct buffer_head *gdb_bh; 1468 struct buffer_head *gdb_bh;
1357 int gdb_num; 1469
1358 gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
1359 gdb_bh = sbi->s_group_desc[gdb_num]; 1470 gdb_bh = sbi->s_group_desc[gdb_num];
1471 if (old_gdb == gdb_bh->b_blocknr)
1472 continue;
1360 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, 1473 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
1361 gdb_bh->b_size); 1474 gdb_bh->b_size, meta_bg);
1475 old_gdb = gdb_bh->b_blocknr;
1362 } 1476 }
1363 } 1477 }
1364exit: 1478exit:
@@ -1402,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
1402 1516
1403 group_data[i].group = group + i; 1517 group_data[i].group = group + i;
1404 group_data[i].blocks_count = blocks_per_group; 1518 group_data[i].blocks_count = blocks_per_group;
1405 overhead = ext4_bg_has_super(sb, group + i) ? 1519 overhead = ext4_group_overhead_blocks(sb, group + i);
1406 (1 + ext4_bg_num_gdb(sb, group + i) +
1407 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
1408 group_data[i].free_blocks_count = blocks_per_group - overhead; 1520 group_data[i].free_blocks_count = blocks_per_group - overhead;
1409 if (ext4_has_group_desc_csum(sb)) 1521 if (ext4_has_group_desc_csum(sb))
1410 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | 1522 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
@@ -1492,6 +1604,14 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
1492 if (err) 1604 if (err)
1493 goto out; 1605 goto out;
1494 1606
1607 err = ext4_alloc_flex_bg_array(sb, input->group + 1);
1608 if (err)
1609 return err;
1610
1611 err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
1612 if (err)
1613 goto out;
1614
1495 flex_gd.count = 1; 1615 flex_gd.count = 1;
1496 flex_gd.groups = input; 1616 flex_gd.groups = input;
1497 flex_gd.bg_flags = &bg_flags; 1617 flex_gd.bg_flags = &bg_flags;
@@ -1544,11 +1664,13 @@ errout:
1544 err = err2; 1664 err = err2;
1545 1665
1546 if (!err) { 1666 if (!err) {
1667 ext4_fsblk_t first_block;
1668 first_block = ext4_group_first_block_no(sb, 0);
1547 if (test_opt(sb, DEBUG)) 1669 if (test_opt(sb, DEBUG))
1548 printk(KERN_DEBUG "EXT4-fs: extended group to %llu " 1670 printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
1549 "blocks\n", ext4_blocks_count(es)); 1671 "blocks\n", ext4_blocks_count(es));
1550 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 1672 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block,
1551 sizeof(struct ext4_super_block)); 1673 (char *)es, sizeof(struct ext4_super_block), 0);
1552 } 1674 }
1553 return err; 1675 return err;
1554} 1676}
@@ -1631,6 +1753,94 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1631 return err; 1753 return err;
1632} /* ext4_group_extend */ 1754} /* ext4_group_extend */
1633 1755
1756
1757static int num_desc_blocks(struct super_block *sb, ext4_group_t groups)
1758{
1759 return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
1760}
1761
1762/*
1763 * Release the resize inode and drop the resize_inode feature if there
1764 * are no more reserved gdt blocks, and then convert the file system
1765 * to enable meta_bg
1766 */
1767static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
1768{
1769 handle_t *handle;
1770 struct ext4_sb_info *sbi = EXT4_SB(sb);
1771 struct ext4_super_block *es = sbi->s_es;
1772 struct ext4_inode_info *ei = EXT4_I(inode);
1773 ext4_fsblk_t nr;
1774 int i, ret, err = 0;
1775 int credits = 1;
1776
1777 ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg");
1778 if (inode) {
1779 if (es->s_reserved_gdt_blocks) {
1780 ext4_error(sb, "Unexpected non-zero "
1781 "s_reserved_gdt_blocks");
1782 return -EPERM;
1783 }
1784
1785 /* Do a quick sanity check of the resize inode */
1786 if (inode->i_blocks != 1 << (inode->i_blkbits - 9))
1787 goto invalid_resize_inode;
1788 for (i = 0; i < EXT4_N_BLOCKS; i++) {
1789 if (i == EXT4_DIND_BLOCK) {
1790 if (ei->i_data[i])
1791 continue;
1792 else
1793 goto invalid_resize_inode;
1794 }
1795 if (ei->i_data[i])
1796 goto invalid_resize_inode;
1797 }
1798 credits += 3; /* block bitmap, bg descriptor, resize inode */
1799 }
1800
1801 handle = ext4_journal_start_sb(sb, credits);
1802 if (IS_ERR(handle))
1803 return PTR_ERR(handle);
1804
1805 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1806 if (err)
1807 goto errout;
1808
1809 EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE);
1810 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
1811 sbi->s_es->s_first_meta_bg =
1812 cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
1813
1814 err = ext4_handle_dirty_super(handle, sb);
1815 if (err) {
1816 ext4_std_error(sb, err);
1817 goto errout;
1818 }
1819
1820 if (inode) {
1821 nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]);
1822 ext4_free_blocks(handle, inode, NULL, nr, 1,
1823 EXT4_FREE_BLOCKS_METADATA |
1824 EXT4_FREE_BLOCKS_FORGET);
1825 ei->i_data[EXT4_DIND_BLOCK] = 0;
1826 inode->i_blocks = 0;
1827
1828 err = ext4_mark_inode_dirty(handle, inode);
1829 if (err)
1830 ext4_std_error(sb, err);
1831 }
1832
1833errout:
1834 ret = ext4_journal_stop(handle);
1835 if (!err)
1836 err = ret;
1837 return ret;
1838
1839invalid_resize_inode:
1840 ext4_error(sb, "corrupted/inconsistent resize inode");
1841 return -EINVAL;
1842}
1843
1634/* 1844/*
1635 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count 1845 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
1636 * 1846 *
@@ -1643,21 +1853,31 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1643 struct ext4_sb_info *sbi = EXT4_SB(sb); 1853 struct ext4_sb_info *sbi = EXT4_SB(sb);
1644 struct ext4_super_block *es = sbi->s_es; 1854 struct ext4_super_block *es = sbi->s_es;
1645 struct buffer_head *bh; 1855 struct buffer_head *bh;
1646 struct inode *resize_inode; 1856 struct inode *resize_inode = NULL;
1647 ext4_fsblk_t o_blocks_count; 1857 ext4_grpblk_t add, offset;
1648 ext4_group_t o_group;
1649 ext4_group_t n_group;
1650 ext4_grpblk_t offset, add;
1651 unsigned long n_desc_blocks; 1858 unsigned long n_desc_blocks;
1652 unsigned long o_desc_blocks; 1859 unsigned long o_desc_blocks;
1653 unsigned long desc_blocks; 1860 ext4_group_t o_group;
1654 int err = 0, flexbg_size = 1; 1861 ext4_group_t n_group;
1862 ext4_fsblk_t o_blocks_count;
1863 ext4_fsblk_t n_blocks_count_retry = 0;
1864 unsigned long last_update_time = 0;
1865 int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
1866 int meta_bg;
1655 1867
1868 /* See if the device is actually as big as what was requested */
1869 bh = sb_bread(sb, n_blocks_count - 1);
1870 if (!bh) {
1871 ext4_warning(sb, "can't read last block, resize aborted");
1872 return -ENOSPC;
1873 }
1874 brelse(bh);
1875
1876retry:
1656 o_blocks_count = ext4_blocks_count(es); 1877 o_blocks_count = ext4_blocks_count(es);
1657 1878
1658 if (test_opt(sb, DEBUG)) 1879 ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu "
1659 ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " 1880 "to %llu blocks", o_blocks_count, n_blocks_count);
1660 "to %llu blocks", o_blocks_count, n_blocks_count);
1661 1881
1662 if (n_blocks_count < o_blocks_count) { 1882 if (n_blocks_count < o_blocks_count) {
1663 /* On-line shrinking not supported */ 1883 /* On-line shrinking not supported */
@@ -1672,32 +1892,49 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1672 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); 1892 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
1673 ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); 1893 ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
1674 1894
1675 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / 1895 n_desc_blocks = num_desc_blocks(sb, n_group + 1);
1676 EXT4_DESC_PER_BLOCK(sb); 1896 o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count);
1677 o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
1678 EXT4_DESC_PER_BLOCK(sb);
1679 desc_blocks = n_desc_blocks - o_desc_blocks;
1680 1897
1681 if (desc_blocks && 1898 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
1682 (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
1683 le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
1684 ext4_warning(sb, "No reserved GDT blocks, can't resize");
1685 return -EPERM;
1686 }
1687 1899
1688 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); 1900 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) {
1689 if (IS_ERR(resize_inode)) { 1901 if (meta_bg) {
1690 ext4_warning(sb, "Error opening resize inode"); 1902 ext4_error(sb, "resize_inode and meta_bg enabled "
1691 return PTR_ERR(resize_inode); 1903 "simultaneously");
1904 return -EINVAL;
1905 }
1906 if (n_desc_blocks > o_desc_blocks +
1907 le16_to_cpu(es->s_reserved_gdt_blocks)) {
1908 n_blocks_count_retry = n_blocks_count;
1909 n_desc_blocks = o_desc_blocks +
1910 le16_to_cpu(es->s_reserved_gdt_blocks);
1911 n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
1912 n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb);
1913 n_group--; /* set to last group number */
1914 }
1915
1916 if (!resize_inode)
1917 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
1918 if (IS_ERR(resize_inode)) {
1919 ext4_warning(sb, "Error opening resize inode");
1920 return PTR_ERR(resize_inode);
1921 }
1692 } 1922 }
1693 1923
1694 /* See if the device is actually as big as what was requested */ 1924 if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) {
1695 bh = sb_bread(sb, n_blocks_count - 1); 1925 err = ext4_convert_meta_bg(sb, resize_inode);
1696 if (!bh) { 1926 if (err)
1697 ext4_warning(sb, "can't read last block, resize aborted"); 1927 goto out;
1698 return -ENOSPC; 1928 if (resize_inode) {
1929 iput(resize_inode);
1930 resize_inode = NULL;
1931 }
1932 if (n_blocks_count_retry) {
1933 n_blocks_count = n_blocks_count_retry;
1934 n_blocks_count_retry = 0;
1935 goto retry;
1936 }
1699 } 1937 }
1700 brelse(bh);
1701 1938
1702 /* extend the last group */ 1939 /* extend the last group */
1703 if (n_group == o_group) 1940 if (n_group == o_group)
@@ -1710,12 +1947,15 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1710 goto out; 1947 goto out;
1711 } 1948 }
1712 1949
1713 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 1950 if (ext4_blocks_count(es) == n_blocks_count)
1714 es->s_log_groups_per_flex) 1951 goto out;
1715 flexbg_size = 1 << es->s_log_groups_per_flex;
1716 1952
1717 o_blocks_count = ext4_blocks_count(es); 1953 err = ext4_alloc_flex_bg_array(sb, n_group + 1);
1718 if (o_blocks_count == n_blocks_count) 1954 if (err)
1955 return err;
1956
1957 err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
1958 if (err)
1719 goto out; 1959 goto out;
1720 1960
1721 flex_gd = alloc_flex_gd(flexbg_size); 1961 flex_gd = alloc_flex_gd(flexbg_size);
@@ -1729,19 +1969,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1729 */ 1969 */
1730 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, 1970 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
1731 flexbg_size)) { 1971 flexbg_size)) {
1732 ext4_alloc_group_tables(sb, flex_gd, flexbg_size); 1972 if (jiffies - last_update_time > HZ * 10) {
1973 if (last_update_time)
1974 ext4_msg(sb, KERN_INFO,
1975 "resized to %llu blocks",
1976 ext4_blocks_count(es));
1977 last_update_time = jiffies;
1978 }
1979 if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
1980 break;
1733 err = ext4_flex_group_add(sb, resize_inode, flex_gd); 1981 err = ext4_flex_group_add(sb, resize_inode, flex_gd);
1734 if (unlikely(err)) 1982 if (unlikely(err))
1735 break; 1983 break;
1736 } 1984 }
1737 1985
1986 if (!err && n_blocks_count_retry) {
1987 n_blocks_count = n_blocks_count_retry;
1988 n_blocks_count_retry = 0;
1989 free_flex_gd(flex_gd);
1990 flex_gd = NULL;
1991 goto retry;
1992 }
1993
1738out: 1994out:
1739 if (flex_gd) 1995 if (flex_gd)
1740 free_flex_gd(flex_gd); 1996 free_flex_gd(flex_gd);
1741 1997 if (resize_inode != NULL)
1742 iput(resize_inode); 1998 iput(resize_inode);
1743 if (test_opt(sb, DEBUG)) 1999 ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count);
1744 ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
1745 "upto %llu blocks", o_blocks_count, n_blocks_count);
1746 return err; 2000 return err;
1747} 2001}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3e0851e4f468..7265a0367476 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -420,7 +420,7 @@ static void __save_error_info(struct super_block *sb, const char *func,
420 */ 420 */
421 if (!es->s_error_count) 421 if (!es->s_error_count)
422 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 422 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
423 es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); 423 le32_add_cpu(&es->s_error_count, 1);
424} 424}
425 425
426static void save_error_info(struct super_block *sb, const char *func, 426static void save_error_info(struct super_block *sb, const char *func,
@@ -850,7 +850,6 @@ static void ext4_put_super(struct super_block *sb)
850 flush_workqueue(sbi->dio_unwritten_wq); 850 flush_workqueue(sbi->dio_unwritten_wq);
851 destroy_workqueue(sbi->dio_unwritten_wq); 851 destroy_workqueue(sbi->dio_unwritten_wq);
852 852
853 lock_super(sb);
854 if (sbi->s_journal) { 853 if (sbi->s_journal) {
855 err = jbd2_journal_destroy(sbi->s_journal); 854 err = jbd2_journal_destroy(sbi->s_journal);
856 sbi->s_journal = NULL; 855 sbi->s_journal = NULL;
@@ -917,7 +916,6 @@ static void ext4_put_super(struct super_block *sb)
917 * Now that we are completely done shutting down the 916 * Now that we are completely done shutting down the
918 * superblock, we need to actually destroy the kobject. 917 * superblock, we need to actually destroy the kobject.
919 */ 918 */
920 unlock_super(sb);
921 kobject_put(&sbi->s_kobj); 919 kobject_put(&sbi->s_kobj);
922 wait_for_completion(&sbi->s_kobj_unregister); 920 wait_for_completion(&sbi->s_kobj_unregister);
923 if (sbi->s_chksum_driver) 921 if (sbi->s_chksum_driver)
@@ -948,6 +946,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
948 ei->i_reserved_meta_blocks = 0; 946 ei->i_reserved_meta_blocks = 0;
949 ei->i_allocated_meta_blocks = 0; 947 ei->i_allocated_meta_blocks = 0;
950 ei->i_da_metadata_calc_len = 0; 948 ei->i_da_metadata_calc_len = 0;
949 ei->i_da_metadata_calc_last_lblock = 0;
951 spin_lock_init(&(ei->i_block_reservation_lock)); 950 spin_lock_init(&(ei->i_block_reservation_lock));
952#ifdef CONFIG_QUOTA 951#ifdef CONFIG_QUOTA
953 ei->i_reserved_quota = 0; 952 ei->i_reserved_quota = 0;
@@ -955,11 +954,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
955 ei->jinode = NULL; 954 ei->jinode = NULL;
956 INIT_LIST_HEAD(&ei->i_completed_io_list); 955 INIT_LIST_HEAD(&ei->i_completed_io_list);
957 spin_lock_init(&ei->i_completed_io_lock); 956 spin_lock_init(&ei->i_completed_io_lock);
958 ei->cur_aio_dio = NULL;
959 ei->i_sync_tid = 0; 957 ei->i_sync_tid = 0;
960 ei->i_datasync_tid = 0; 958 ei->i_datasync_tid = 0;
961 atomic_set(&ei->i_ioend_count, 0); 959 atomic_set(&ei->i_ioend_count, 0);
962 atomic_set(&ei->i_aiodio_unwritten, 0); 960 atomic_set(&ei->i_unwritten, 0);
963 961
964 return &ei->vfs_inode; 962 return &ei->vfs_inode;
965} 963}
@@ -1018,6 +1016,11 @@ static int init_inodecache(void)
1018 1016
1019static void destroy_inodecache(void) 1017static void destroy_inodecache(void)
1020{ 1018{
1019 /*
1020 * Make sure all delayed rcu free inodes are flushed before we
1021 * destroy cache.
1022 */
1023 rcu_barrier();
1021 kmem_cache_destroy(ext4_inode_cachep); 1024 kmem_cache_destroy(ext4_inode_cachep);
1022} 1025}
1023 1026
@@ -1218,6 +1221,7 @@ enum {
1218 Opt_inode_readahead_blks, Opt_journal_ioprio, 1221 Opt_inode_readahead_blks, Opt_journal_ioprio,
1219 Opt_dioread_nolock, Opt_dioread_lock, 1222 Opt_dioread_nolock, Opt_dioread_lock,
1220 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1223 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1224 Opt_max_dir_size_kb,
1221}; 1225};
1222 1226
1223static const match_table_t tokens = { 1227static const match_table_t tokens = {
@@ -1291,6 +1295,7 @@ static const match_table_t tokens = {
1291 {Opt_init_itable, "init_itable=%u"}, 1295 {Opt_init_itable, "init_itable=%u"},
1292 {Opt_init_itable, "init_itable"}, 1296 {Opt_init_itable, "init_itable"},
1293 {Opt_noinit_itable, "noinit_itable"}, 1297 {Opt_noinit_itable, "noinit_itable"},
1298 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1294 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1299 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1295 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1300 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1296 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1301 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1471,6 +1476,7 @@ static const struct mount_opts {
1471 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1476 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1472 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1477 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1473 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1478 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1479 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1474 {Opt_err, 0, 0} 1480 {Opt_err, 0, 0}
1475}; 1481};
1476 1482
@@ -1586,6 +1592,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1586 if (!args->from) 1592 if (!args->from)
1587 arg = EXT4_DEF_LI_WAIT_MULT; 1593 arg = EXT4_DEF_LI_WAIT_MULT;
1588 sbi->s_li_wait_mult = arg; 1594 sbi->s_li_wait_mult = arg;
1595 } else if (token == Opt_max_dir_size_kb) {
1596 sbi->s_max_dir_size_kb = arg;
1589 } else if (token == Opt_stripe) { 1597 } else if (token == Opt_stripe) {
1590 sbi->s_stripe = arg; 1598 sbi->s_stripe = arg;
1591 } else if (m->flags & MOPT_DATAJ) { 1599 } else if (m->flags & MOPT_DATAJ) {
@@ -1658,7 +1666,7 @@ static int parse_options(char *options, struct super_block *sb,
1658 * Initialize args struct so we know whether arg was 1666 * Initialize args struct so we know whether arg was
1659 * found; some options take optional arguments. 1667 * found; some options take optional arguments.
1660 */ 1668 */
1661 args[0].to = args[0].from = 0; 1669 args[0].to = args[0].from = NULL;
1662 token = match_token(p, tokens, args); 1670 token = match_token(p, tokens, args);
1663 if (handle_mount_opt(sb, p, token, args, journal_devnum, 1671 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1664 journal_ioprio, is_remount) < 0) 1672 journal_ioprio, is_remount) < 0)
@@ -1734,7 +1742,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
1734 1742
1735static const char *token2str(int token) 1743static const char *token2str(int token)
1736{ 1744{
1737 static const struct match_token *t; 1745 const struct match_token *t;
1738 1746
1739 for (t = tokens; t->token != Opt_err; t++) 1747 for (t = tokens; t->token != Opt_err; t++)
1740 if (t->token == token && !strchr(t->pattern, '=')) 1748 if (t->token == token && !strchr(t->pattern, '='))
@@ -1817,6 +1825,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1817 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 1825 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1818 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 1826 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1819 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 1827 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1828 if (nodefs || sbi->s_max_dir_size_kb)
1829 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1820 1830
1821 ext4_show_quota_options(seq, sb); 1831 ext4_show_quota_options(seq, sb);
1822 return 0; 1832 return 0;
@@ -1908,15 +1918,45 @@ done:
1908 return res; 1918 return res;
1909} 1919}
1910 1920
1921int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1922{
1923 struct ext4_sb_info *sbi = EXT4_SB(sb);
1924 struct flex_groups *new_groups;
1925 int size;
1926
1927 if (!sbi->s_log_groups_per_flex)
1928 return 0;
1929
1930 size = ext4_flex_group(sbi, ngroup - 1) + 1;
1931 if (size <= sbi->s_flex_groups_allocated)
1932 return 0;
1933
1934 size = roundup_pow_of_two(size * sizeof(struct flex_groups));
1935 new_groups = ext4_kvzalloc(size, GFP_KERNEL);
1936 if (!new_groups) {
1937 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
1938 size / (int) sizeof(struct flex_groups));
1939 return -ENOMEM;
1940 }
1941
1942 if (sbi->s_flex_groups) {
1943 memcpy(new_groups, sbi->s_flex_groups,
1944 (sbi->s_flex_groups_allocated *
1945 sizeof(struct flex_groups)));
1946 ext4_kvfree(sbi->s_flex_groups);
1947 }
1948 sbi->s_flex_groups = new_groups;
1949 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
1950 return 0;
1951}
1952
1911static int ext4_fill_flex_info(struct super_block *sb) 1953static int ext4_fill_flex_info(struct super_block *sb)
1912{ 1954{
1913 struct ext4_sb_info *sbi = EXT4_SB(sb); 1955 struct ext4_sb_info *sbi = EXT4_SB(sb);
1914 struct ext4_group_desc *gdp = NULL; 1956 struct ext4_group_desc *gdp = NULL;
1915 ext4_group_t flex_group_count;
1916 ext4_group_t flex_group; 1957 ext4_group_t flex_group;
1917 unsigned int groups_per_flex = 0; 1958 unsigned int groups_per_flex = 0;
1918 size_t size; 1959 int i, err;
1919 int i;
1920 1960
1921 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1961 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1922 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { 1962 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
@@ -1925,17 +1965,9 @@ static int ext4_fill_flex_info(struct super_block *sb)
1925 } 1965 }
1926 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1966 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1927 1967
1928 /* We allocate both existing and potentially added groups */ 1968 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
1929 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1969 if (err)
1930 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1931 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1932 size = flex_group_count * sizeof(struct flex_groups);
1933 sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
1934 if (sbi->s_flex_groups == NULL) {
1935 ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
1936 flex_group_count);
1937 goto failed; 1970 goto failed;
1938 }
1939 1971
1940 for (i = 0; i < sbi->s_groups_count; i++) { 1972 for (i = 0; i < sbi->s_groups_count; i++) {
1941 gdp = ext4_get_group_desc(sb, i, NULL); 1973 gdp = ext4_get_group_desc(sb, i, NULL);
@@ -2138,10 +2170,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2138 } 2170 }
2139 2171
2140 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2172 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2141 if (es->s_last_orphan) 2173 /* don't clear list on RO mount w/ errors */
2174 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2142 jbd_debug(1, "Errors on filesystem, " 2175 jbd_debug(1, "Errors on filesystem, "
2143 "clearing orphan list.\n"); 2176 "clearing orphan list.\n");
2144 es->s_last_orphan = 0; 2177 es->s_last_orphan = 0;
2178 }
2145 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2179 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2146 return; 2180 return;
2147 } 2181 }
@@ -2522,6 +2556,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2522EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2556EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2523EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2557EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2524EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2558EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
2559EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2525EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2560EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2526 2561
2527static struct attribute *ext4_attrs[] = { 2562static struct attribute *ext4_attrs[] = {
@@ -2537,6 +2572,7 @@ static struct attribute *ext4_attrs[] = {
2537 ATTR_LIST(mb_stream_req), 2572 ATTR_LIST(mb_stream_req),
2538 ATTR_LIST(mb_group_prealloc), 2573 ATTR_LIST(mb_group_prealloc),
2539 ATTR_LIST(max_writeback_mb_bump), 2574 ATTR_LIST(max_writeback_mb_bump),
2575 ATTR_LIST(extent_max_zeroout_kb),
2540 ATTR_LIST(trigger_fs_error), 2576 ATTR_LIST(trigger_fs_error),
2541 NULL, 2577 NULL,
2542}; 2578};
@@ -2544,10 +2580,12 @@ static struct attribute *ext4_attrs[] = {
2544/* Features this copy of ext4 supports */ 2580/* Features this copy of ext4 supports */
2545EXT4_INFO_ATTR(lazy_itable_init); 2581EXT4_INFO_ATTR(lazy_itable_init);
2546EXT4_INFO_ATTR(batched_discard); 2582EXT4_INFO_ATTR(batched_discard);
2583EXT4_INFO_ATTR(meta_bg_resize);
2547 2584
2548static struct attribute *ext4_feat_attrs[] = { 2585static struct attribute *ext4_feat_attrs[] = {
2549 ATTR_LIST(lazy_itable_init), 2586 ATTR_LIST(lazy_itable_init),
2550 ATTR_LIST(batched_discard), 2587 ATTR_LIST(batched_discard),
2588 ATTR_LIST(meta_bg_resize),
2551 NULL, 2589 NULL,
2552}; 2590};
2553 2591
@@ -3108,6 +3146,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
3108 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 3146 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
3109 int s, j, count = 0; 3147 int s, j, count = 0;
3110 3148
3149 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
3150 return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
3151 sbi->s_itb_per_group + 2);
3152
3111 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + 3153 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
3112 (grp * EXT4_BLOCKS_PER_GROUP(sb)); 3154 (grp * EXT4_BLOCKS_PER_GROUP(sb));
3113 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; 3155 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
@@ -3364,7 +3406,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3364 * enable delayed allocation by default 3406 * enable delayed allocation by default
3365 * Use -o nodelalloc to turn it off 3407 * Use -o nodelalloc to turn it off
3366 */ 3408 */
3367 if (!IS_EXT3_SB(sb) && 3409 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3368 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3410 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3369 set_opt(sb, DELALLOC); 3411 set_opt(sb, DELALLOC);
3370 3412
@@ -3733,6 +3775,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3733 3775
3734 sbi->s_stripe = ext4_get_stripe_size(sbi); 3776 sbi->s_stripe = ext4_get_stripe_size(sbi);
3735 sbi->s_max_writeback_mb_bump = 128; 3777 sbi->s_max_writeback_mb_bump = 128;
3778 sbi->s_extent_max_zeroout_kb = 32;
3736 3779
3737 /* 3780 /*
3738 * set up enough so that it can read an inode 3781 * set up enough so that it can read an inode
@@ -4419,6 +4462,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
4419 ext4_commit_super(sb, 1); 4462 ext4_commit_super(sb, 1);
4420 4463
4421 jbd2_journal_clear_err(journal); 4464 jbd2_journal_clear_err(journal);
4465 jbd2_journal_update_sb_errno(journal);
4422 } 4466 }
4423} 4467}
4424 4468
@@ -4508,11 +4552,9 @@ static int ext4_unfreeze(struct super_block *sb)
4508 if (sb->s_flags & MS_RDONLY) 4552 if (sb->s_flags & MS_RDONLY)
4509 return 0; 4553 return 0;
4510 4554
4511 lock_super(sb);
4512 /* Reset the needs_recovery flag before the fs is unlocked. */ 4555 /* Reset the needs_recovery flag before the fs is unlocked. */
4513 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4556 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4514 ext4_commit_super(sb, 1); 4557 ext4_commit_super(sb, 1);
4515 unlock_super(sb);
4516 return 0; 4558 return 0;
4517} 4559}
4518 4560
@@ -4548,7 +4590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4548 char *orig_data = kstrdup(data, GFP_KERNEL); 4590 char *orig_data = kstrdup(data, GFP_KERNEL);
4549 4591
4550 /* Store the original options */ 4592 /* Store the original options */
4551 lock_super(sb);
4552 old_sb_flags = sb->s_flags; 4593 old_sb_flags = sb->s_flags;
4553 old_opts.s_mount_opt = sbi->s_mount_opt; 4594 old_opts.s_mount_opt = sbi->s_mount_opt;
4554 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4595 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
@@ -4690,7 +4731,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4690 if (sbi->s_journal == NULL) 4731 if (sbi->s_journal == NULL)
4691 ext4_commit_super(sb, 1); 4732 ext4_commit_super(sb, 1);
4692 4733
4693 unlock_super(sb);
4694#ifdef CONFIG_QUOTA 4734#ifdef CONFIG_QUOTA
4695 /* Release old quota file names */ 4735 /* Release old quota file names */
4696 for (i = 0; i < MAXQUOTAS; i++) 4736 for (i = 0; i < MAXQUOTAS; i++)
@@ -4703,10 +4743,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4703 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4743 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4704 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 4744 EXT4_FEATURE_RO_COMPAT_QUOTA)) {
4705 err = ext4_enable_quotas(sb); 4745 err = ext4_enable_quotas(sb);
4706 if (err) { 4746 if (err)
4707 lock_super(sb);
4708 goto restore_opts; 4747 goto restore_opts;
4709 }
4710 } 4748 }
4711 } 4749 }
4712#endif 4750#endif
@@ -4733,7 +4771,6 @@ restore_opts:
4733 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4771 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4734 } 4772 }
4735#endif 4773#endif
4736 unlock_super(sb);
4737 kfree(orig_data); 4774 kfree(orig_data);
4738 return err; 4775 return err;
4739} 4776}
@@ -4785,7 +4822,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4785 4822
4786static inline struct inode *dquot_to_inode(struct dquot *dquot) 4823static inline struct inode *dquot_to_inode(struct dquot *dquot)
4787{ 4824{
4788 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; 4825 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
4789} 4826}
4790 4827
4791static int ext4_write_dquot(struct dquot *dquot) 4828static int ext4_write_dquot(struct dquot *dquot)
@@ -5258,8 +5295,10 @@ static int __init ext4_init_fs(void)
5258 if (err) 5295 if (err)
5259 goto out6; 5296 goto out6;
5260 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5297 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5261 if (!ext4_kset) 5298 if (!ext4_kset) {
5299 err = -ENOMEM;
5262 goto out5; 5300 goto out5;
5301 }
5263 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 5302 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5264 5303
5265 err = ext4_init_feat_adverts(); 5304 err = ext4_init_feat_adverts();
diff --git a/fs/fat/Makefile b/fs/fat/Makefile
index e06190322c1c..964b634f6667 100644
--- a/fs/fat/Makefile
+++ b/fs/fat/Makefile
@@ -6,6 +6,6 @@ obj-$(CONFIG_FAT_FS) += fat.o
6obj-$(CONFIG_VFAT_FS) += vfat.o 6obj-$(CONFIG_VFAT_FS) += vfat.o
7obj-$(CONFIG_MSDOS_FS) += msdos.o 7obj-$(CONFIG_MSDOS_FS) += msdos.o
8 8
9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o 9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o nfs.o
10vfat-y := namei_vfat.o 10vfat-y := namei_vfat.o
11msdos-y := namei_msdos.o 11msdos-y := namei_msdos.o
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 1cc7038e273d..91ad9e1c9441 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -190,7 +190,8 @@ static void __fat_cache_inval_inode(struct inode *inode)
190 struct fat_cache *cache; 190 struct fat_cache *cache;
191 191
192 while (!list_empty(&i->cache_lru)) { 192 while (!list_empty(&i->cache_lru)) {
193 cache = list_entry(i->cache_lru.next, struct fat_cache, cache_list); 193 cache = list_entry(i->cache_lru.next,
194 struct fat_cache, cache_list);
194 list_del_init(&cache->cache_list); 195 list_del_init(&cache->cache_list);
195 i->nr_caches--; 196 i->nr_caches--;
196 fat_cache_free(cache); 197 fat_cache_free(cache);
@@ -261,9 +262,10 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
261 if (nr < 0) 262 if (nr < 0)
262 goto out; 263 goto out;
263 else if (nr == FAT_ENT_FREE) { 264 else if (nr == FAT_ENT_FREE) {
264 fat_fs_error_ratelimit(sb, "%s: invalid cluster chain" 265 fat_fs_error_ratelimit(sb,
265 " (i_pos %lld)", __func__, 266 "%s: invalid cluster chain (i_pos %lld)",
266 MSDOS_I(inode)->i_pos); 267 __func__,
268 MSDOS_I(inode)->i_pos);
267 nr = -EIO; 269 nr = -EIO;
268 goto out; 270 goto out;
269 } else if (nr == FAT_ENT_EOF) { 271 } else if (nr == FAT_ENT_EOF) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index dc49ed2cbffa..bca6d0a1255e 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -18,7 +18,7 @@
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include <linux/compat.h> 20#include <linux/compat.h>
21#include <asm/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23#include "fat.h" 23#include "fat.h"
24 24
@@ -123,7 +123,8 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
123{ 123{
124 /* Fast stuff first */ 124 /* Fast stuff first */
125 if (*bh && *de && 125 if (*bh && *de &&
126 (*de - (struct msdos_dir_entry *)(*bh)->b_data) < MSDOS_SB(dir->i_sb)->dir_per_block - 1) { 126 (*de - (struct msdos_dir_entry *)(*bh)->b_data) <
127 MSDOS_SB(dir->i_sb)->dir_per_block - 1) {
127 *pos += sizeof(struct msdos_dir_entry); 128 *pos += sizeof(struct msdos_dir_entry);
128 (*de)++; 129 (*de)++;
129 return 0; 130 return 0;
@@ -155,7 +156,8 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
155 156
156 while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) { 157 while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) {
157 ec = *ip++; 158 ec = *ip++;
158 if ((charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { 159 charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE);
160 if (charlen > 0) {
159 op += charlen; 161 op += charlen;
160 len -= charlen; 162 len -= charlen;
161 } else { 163 } else {
@@ -172,12 +174,12 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
172 } 174 }
173 175
174 if (unlikely(*ip)) { 176 if (unlikely(*ip)) {
175 fat_msg(sb, KERN_WARNING, "filename was truncated while " 177 fat_msg(sb, KERN_WARNING,
176 "converting."); 178 "filename was truncated while converting.");
177 } 179 }
178 180
179 *op = 0; 181 *op = 0;
180 return (op - ascii); 182 return op - ascii;
181} 183}
182 184
183static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni, 185static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
@@ -205,7 +207,8 @@ fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
205} 207}
206 208
207static inline int 209static inline int
208fat_short2lower_uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) 210fat_short2lower_uni(struct nls_table *t, unsigned char *c,
211 int clen, wchar_t *uni)
209{ 212{
210 int charlen; 213 int charlen;
211 wchar_t wc; 214 wchar_t wc;
@@ -220,7 +223,8 @@ fat_short2lower_uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *un
220 if (!nc) 223 if (!nc)
221 nc = *c; 224 nc = *c;
222 225
223 if ( (charlen = t->char2uni(&nc, 1, uni)) < 0) { 226 charlen = t->char2uni(&nc, 1, uni);
227 if (charlen < 0) {
224 *uni = 0x003f; /* a question mark */ 228 *uni = 0x003f; /* a question mark */
225 charlen = 1; 229 charlen = 1;
226 } 230 }
@@ -537,7 +541,6 @@ end_of_dir:
537 541
538 return err; 542 return err;
539} 543}
540
541EXPORT_SYMBOL_GPL(fat_search_long); 544EXPORT_SYMBOL_GPL(fat_search_long);
542 545
543struct fat_ioctl_filldir_callback { 546struct fat_ioctl_filldir_callback {
@@ -574,7 +577,8 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
574 /* Fake . and .. for the root directory. */ 577 /* Fake . and .. for the root directory. */
575 if (inode->i_ino == MSDOS_ROOT_INO) { 578 if (inode->i_ino == MSDOS_ROOT_INO) {
576 while (cpos < 2) { 579 while (cpos < 2) {
577 if (filldir(dirent, "..", cpos+1, cpos, MSDOS_ROOT_INO, DT_DIR) < 0) 580 if (filldir(dirent, "..", cpos+1, cpos,
581 MSDOS_ROOT_INO, DT_DIR) < 0)
578 goto out; 582 goto out;
579 cpos++; 583 cpos++;
580 filp->f_pos++; 584 filp->f_pos++;
@@ -872,25 +876,26 @@ static int fat_get_short_entry(struct inode *dir, loff_t *pos,
872} 876}
873 877
874/* 878/*
875 * The ".." entry can not provide the "struct fat_slot_info" informations 879 * The ".." entry can not provide the "struct fat_slot_info" information
876 * for inode. So, this function provide the some informations only. 880 * for inode, nor a usable i_pos. So, this function provides some information
881 * only.
882 *
883 * Since this function walks through the on-disk inodes within a directory,
884 * callers are responsible for taking any locks necessary to prevent the
885 * directory from changing.
877 */ 886 */
878int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, 887int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
879 struct msdos_dir_entry **de, loff_t *i_pos) 888 struct msdos_dir_entry **de)
880{ 889{
881 loff_t offset; 890 loff_t offset = 0;
882 891
883 offset = 0; 892 *de = NULL;
884 *bh = NULL;
885 while (fat_get_short_entry(dir, &offset, bh, de) >= 0) { 893 while (fat_get_short_entry(dir, &offset, bh, de) >= 0) {
886 if (!strncmp((*de)->name, MSDOS_DOTDOT, MSDOS_NAME)) { 894 if (!strncmp((*de)->name, MSDOS_DOTDOT, MSDOS_NAME))
887 *i_pos = fat_make_i_pos(dir->i_sb, *bh, *de);
888 return 0; 895 return 0;
889 }
890 } 896 }
891 return -ENOENT; 897 return -ENOENT;
892} 898}
893
894EXPORT_SYMBOL_GPL(fat_get_dotdot_entry); 899EXPORT_SYMBOL_GPL(fat_get_dotdot_entry);
895 900
896/* See if directory is empty */ 901/* See if directory is empty */
@@ -913,7 +918,6 @@ int fat_dir_empty(struct inode *dir)
913 brelse(bh); 918 brelse(bh);
914 return result; 919 return result;
915} 920}
916
917EXPORT_SYMBOL_GPL(fat_dir_empty); 921EXPORT_SYMBOL_GPL(fat_dir_empty);
918 922
919/* 923/*
@@ -959,7 +963,6 @@ int fat_scan(struct inode *dir, const unsigned char *name,
959 } 963 }
960 return -ENOENT; 964 return -ENOENT;
961} 965}
962
963EXPORT_SYMBOL_GPL(fat_scan); 966EXPORT_SYMBOL_GPL(fat_scan);
964 967
965static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) 968static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
@@ -1047,7 +1050,6 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
1047 1050
1048 return 0; 1051 return 0;
1049} 1052}
1050
1051EXPORT_SYMBOL_GPL(fat_remove_entries); 1053EXPORT_SYMBOL_GPL(fat_remove_entries);
1052 1054
1053static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, 1055static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
@@ -1141,10 +1143,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1141 de[0].ctime_cs = de[1].ctime_cs = 0; 1143 de[0].ctime_cs = de[1].ctime_cs = 0;
1142 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; 1144 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0;
1143 } 1145 }
1144 de[0].start = cpu_to_le16(cluster); 1146 fat_set_start(&de[0], cluster);
1145 de[0].starthi = cpu_to_le16(cluster >> 16); 1147 fat_set_start(&de[1], MSDOS_I(dir)->i_logstart);
1146 de[1].start = cpu_to_le16(MSDOS_I(dir)->i_logstart);
1147 de[1].starthi = cpu_to_le16(MSDOS_I(dir)->i_logstart >> 16);
1148 de[0].size = de[1].size = 0; 1148 de[0].size = de[1].size = 0;
1149 memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); 1149 memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
1150 set_buffer_uptodate(bhs[0]); 1150 set_buffer_uptodate(bhs[0]);
@@ -1161,7 +1161,6 @@ error_free:
1161error: 1161error:
1162 return err; 1162 return err;
1163} 1163}
1164
1165EXPORT_SYMBOL_GPL(fat_alloc_new_dir); 1164EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
1166 1165
1167static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, 1166static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
@@ -1377,5 +1376,4 @@ error_remove:
1377 __fat_remove_entries(dir, pos, free_slots); 1376 __fat_remove_entries(dir, pos, free_slots);
1378 return err; 1377 return err;
1379} 1378}
1380
1381EXPORT_SYMBOL_GPL(fat_add_entries); 1379EXPORT_SYMBOL_GPL(fat_add_entries);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 2deeeb86f331..ca7e8f8bad7c 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -5,6 +5,7 @@
5#include <linux/string.h> 5#include <linux/string.h>
6#include <linux/nls.h> 6#include <linux/nls.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/hash.h>
8#include <linux/mutex.h> 9#include <linux/mutex.h>
9#include <linux/ratelimit.h> 10#include <linux/ratelimit.h>
10#include <linux/msdos_fs.h> 11#include <linux/msdos_fs.h>
@@ -23,30 +24,31 @@
23#define FAT_ERRORS_RO 3 /* remount r/o on error */ 24#define FAT_ERRORS_RO 3 /* remount r/o on error */
24 25
25struct fat_mount_options { 26struct fat_mount_options {
26 uid_t fs_uid; 27 kuid_t fs_uid;
27 gid_t fs_gid; 28 kgid_t fs_gid;
28 unsigned short fs_fmask; 29 unsigned short fs_fmask;
29 unsigned short fs_dmask; 30 unsigned short fs_dmask;
30 unsigned short codepage; /* Codepage for shortname conversions */ 31 unsigned short codepage; /* Codepage for shortname conversions */
31 char *iocharset; /* Charset used for filename input/display */ 32 char *iocharset; /* Charset used for filename input/display */
32 unsigned short shortname; /* flags for shortname display/create rule */ 33 unsigned short shortname; /* flags for shortname display/create rule */
33 unsigned char name_check; /* r = relaxed, n = normal, s = strict */ 34 unsigned char name_check; /* r = relaxed, n = normal, s = strict */
34 unsigned char errors; /* On error: continue, panic, remount-ro */ 35 unsigned char errors; /* On error: continue, panic, remount-ro */
35 unsigned short allow_utime;/* permission for setting the [am]time */ 36 unsigned short allow_utime;/* permission for setting the [am]time */
36 unsigned quiet:1, /* set = fake successful chmods and chowns */ 37 unsigned quiet:1, /* set = fake successful chmods and chowns */
37 showexec:1, /* set = only set x bit for com/exe/bat */ 38 showexec:1, /* set = only set x bit for com/exe/bat */
38 sys_immutable:1, /* set = system files are immutable */ 39 sys_immutable:1, /* set = system files are immutable */
39 dotsOK:1, /* set = hidden and system files are named '.filename' */ 40 dotsOK:1, /* set = hidden and system files are named '.filename' */
40 isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ 41 isvfat:1, /* 0=no vfat long filename support, 1=vfat support */
41 utf8:1, /* Use of UTF-8 character set (Default) */ 42 utf8:1, /* Use of UTF-8 character set (Default) */
42 unicode_xlate:1, /* create escape sequences for unhandled Unicode */ 43 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
43 numtail:1, /* Does first alias have a numeric '~1' type tail? */ 44 numtail:1, /* Does first alias have a numeric '~1' type tail? */
44 flush:1, /* write things quickly */ 45 flush:1, /* write things quickly */
45 nocase:1, /* Does this need case conversion? 0=need case conversion*/ 46 nocase:1, /* Does this need case conversion? 0=need case conversion*/
46 usefree:1, /* Use free_clusters for FAT32 */ 47 usefree:1, /* Use free_clusters for FAT32 */
47 tz_utc:1, /* Filesystem timestamps are in UTC */ 48 tz_utc:1, /* Filesystem timestamps are in UTC */
48 rodir:1, /* allow ATTR_RO for directory */ 49 rodir:1, /* allow ATTR_RO for directory */
49 discard:1; /* Issue discard requests on deletions */ 50 discard:1, /* Issue discard requests on deletions */
51 nfs:1; /* Do extra work needed for NFS export */
50}; 52};
51 53
52#define FAT_HASH_BITS 8 54#define FAT_HASH_BITS 8
@@ -56,28 +58,28 @@ struct fat_mount_options {
56 * MS-DOS file system in-core superblock data 58 * MS-DOS file system in-core superblock data
57 */ 59 */
58struct msdos_sb_info { 60struct msdos_sb_info {
59 unsigned short sec_per_clus; /* sectors/cluster */ 61 unsigned short sec_per_clus; /* sectors/cluster */
60 unsigned short cluster_bits; /* log2(cluster_size) */ 62 unsigned short cluster_bits; /* log2(cluster_size) */
61 unsigned int cluster_size; /* cluster size */ 63 unsigned int cluster_size; /* cluster size */
62 unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ 64 unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */
63 unsigned short fat_start; 65 unsigned short fat_start;
64 unsigned long fat_length; /* FAT start & length (sec.) */ 66 unsigned long fat_length; /* FAT start & length (sec.) */
65 unsigned long dir_start; 67 unsigned long dir_start;
66 unsigned short dir_entries; /* root dir start & entries */ 68 unsigned short dir_entries; /* root dir start & entries */
67 unsigned long data_start; /* first data sector */ 69 unsigned long data_start; /* first data sector */
68 unsigned long max_cluster; /* maximum cluster number */ 70 unsigned long max_cluster; /* maximum cluster number */
69 unsigned long root_cluster; /* first cluster of the root directory */ 71 unsigned long root_cluster; /* first cluster of the root directory */
70 unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ 72 unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
71 struct mutex fat_lock; 73 struct mutex fat_lock;
72 unsigned int prev_free; /* previously allocated cluster number */ 74 unsigned int prev_free; /* previously allocated cluster number */
73 unsigned int free_clusters; /* -1 if undefined */ 75 unsigned int free_clusters; /* -1 if undefined */
74 unsigned int free_clus_valid; /* is free_clusters valid? */ 76 unsigned int free_clus_valid; /* is free_clusters valid? */
75 struct fat_mount_options options; 77 struct fat_mount_options options;
76 struct nls_table *nls_disk; /* Codepage used on disk */ 78 struct nls_table *nls_disk; /* Codepage used on disk */
77 struct nls_table *nls_io; /* Charset used for input and display */ 79 struct nls_table *nls_io; /* Charset used for input and display */
78 const void *dir_ops; /* Opaque; default directory operations */ 80 const void *dir_ops; /* Opaque; default directory operations */
79 int dir_per_block; /* dir entries per block */ 81 int dir_per_block; /* dir entries per block */
80 int dir_per_block_bits; /* log2(dir_per_block) */ 82 int dir_per_block_bits; /* log2(dir_per_block) */
81 83
82 int fatent_shift; 84 int fatent_shift;
83 struct fatent_operations *fatent_ops; 85 struct fatent_operations *fatent_ops;
@@ -88,6 +90,9 @@ struct msdos_sb_info {
88 90
89 spinlock_t inode_hash_lock; 91 spinlock_t inode_hash_lock;
90 struct hlist_head inode_hashtable[FAT_HASH_SIZE]; 92 struct hlist_head inode_hashtable[FAT_HASH_SIZE];
93
94 spinlock_t dir_hash_lock;
95 struct hlist_head dir_hashtable[FAT_HASH_SIZE];
91}; 96};
92 97
93#define FAT_CACHE_VALID 0 /* special case for valid cache */ 98#define FAT_CACHE_VALID 0 /* special case for valid cache */
@@ -110,6 +115,7 @@ struct msdos_inode_info {
110 int i_attrs; /* unused attribute bits */ 115 int i_attrs; /* unused attribute bits */
111 loff_t i_pos; /* on-disk position of directory entry or 0 */ 116 loff_t i_pos; /* on-disk position of directory entry or 0 */
112 struct hlist_node i_fat_hash; /* hash by i_location */ 117 struct hlist_node i_fat_hash; /* hash by i_location */
118 struct hlist_node i_dir_hash; /* hash by i_logstart */
113 struct rw_semaphore truncate_lock; /* protect bmap against truncate */ 119 struct rw_semaphore truncate_lock; /* protect bmap against truncate */
114 struct inode vfs_inode; 120 struct inode vfs_inode;
115}; 121};
@@ -262,7 +268,7 @@ extern int fat_subdirs(struct inode *dir);
262extern int fat_scan(struct inode *dir, const unsigned char *name, 268extern int fat_scan(struct inode *dir, const unsigned char *name,
263 struct fat_slot_info *sinfo); 269 struct fat_slot_info *sinfo);
264extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, 270extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
265 struct msdos_dir_entry **de, loff_t *i_pos); 271 struct msdos_dir_entry **de);
266extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); 272extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
267extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, 273extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
268 struct fat_slot_info *sinfo); 274 struct fat_slot_info *sinfo);
@@ -322,7 +328,7 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
322 unsigned long arg); 328 unsigned long arg);
323extern const struct file_operations fat_file_operations; 329extern const struct file_operations fat_file_operations;
324extern const struct inode_operations fat_file_inode_operations; 330extern const struct inode_operations fat_file_inode_operations;
325extern int fat_setattr(struct dentry * dentry, struct iattr * attr); 331extern int fat_setattr(struct dentry *dentry, struct iattr *attr);
326extern void fat_truncate_blocks(struct inode *inode, loff_t offset); 332extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
327extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, 333extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
328 struct kstat *stat); 334 struct kstat *stat);
@@ -340,7 +346,12 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent,
340 int isvfat, void (*setup)(struct super_block *)); 346 int isvfat, void (*setup)(struct super_block *));
341 347
342extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 348extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
343 struct inode *i2); 349 struct inode *i2);
350static inline unsigned long fat_dir_hash(int logstart)
351{
352 return hash_32(logstart, FAT_HASH_BITS);
353}
354
344/* fat/misc.c */ 355/* fat/misc.c */
345extern __printf(3, 4) __cold 356extern __printf(3, 4) __cold
346void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...); 357void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...);
@@ -366,6 +377,14 @@ extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
366int fat_cache_init(void); 377int fat_cache_init(void);
367void fat_cache_destroy(void); 378void fat_cache_destroy(void);
368 379
380/* fat/nfs.c */
381struct fid;
382extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
383 int fh_len, int fh_type);
384extern struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
385 int fh_len, int fh_type);
386extern struct dentry *fat_get_parent(struct dentry *child_dir);
387
369/* helper for printk */ 388/* helper for printk */
370typedef unsigned long long llu; 389typedef unsigned long long llu;
371 390
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 31f08ab62c56..260705c58062 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -186,9 +186,6 @@ static void fat16_ent_put(struct fat_entry *fatent, int new)
186 186
187static void fat32_ent_put(struct fat_entry *fatent, int new) 187static void fat32_ent_put(struct fat_entry *fatent, int new)
188{ 188{
189 if (new == FAT_ENT_EOF)
190 new = EOF_FAT32;
191
192 WARN_ON(new & 0xf0000000); 189 WARN_ON(new & 0xf0000000);
193 new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff; 190 new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff;
194 *fatent->u.ent32_p = cpu_to_le32(new); 191 *fatent->u.ent32_p = cpu_to_le32(new);
@@ -203,15 +200,18 @@ static int fat12_ent_next(struct fat_entry *fatent)
203 200
204 fatent->entry++; 201 fatent->entry++;
205 if (fatent->nr_bhs == 1) { 202 if (fatent->nr_bhs == 1) {
206 WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 2))); 203 WARN_ON(ent12_p[0] > (u8 *)(bhs[0]->b_data +
207 WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); 204 (bhs[0]->b_size - 2)));
205 WARN_ON(ent12_p[1] > (u8 *)(bhs[0]->b_data +
206 (bhs[0]->b_size - 1)));
208 if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) { 207 if (nextp < (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))) {
209 ent12_p[0] = nextp - 1; 208 ent12_p[0] = nextp - 1;
210 ent12_p[1] = nextp; 209 ent12_p[1] = nextp;
211 return 1; 210 return 1;
212 } 211 }
213 } else { 212 } else {
214 WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data + (bhs[0]->b_size - 1))); 213 WARN_ON(ent12_p[0] != (u8 *)(bhs[0]->b_data +
214 (bhs[0]->b_size - 1)));
215 WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data); 215 WARN_ON(ent12_p[1] != (u8 *)bhs[1]->b_data);
216 ent12_p[0] = nextp - 1; 216 ent12_p[0] = nextp - 1;
217 ent12_p[1] = nextp; 217 ent12_p[1] = nextp;
@@ -631,7 +631,6 @@ error:
631 631
632 return err; 632 return err;
633} 633}
634
635EXPORT_SYMBOL_GPL(fat_free_clusters); 634EXPORT_SYMBOL_GPL(fat_free_clusters);
636 635
637/* 128kb is the whole sectors for FAT12 and FAT16 */ 636/* 128kb is the whole sectors for FAT12 and FAT16 */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index e007b8bd8e5e..a62e0ecbe2db 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -352,7 +352,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
352{ 352{
353 umode_t allow_utime = sbi->options.allow_utime; 353 umode_t allow_utime = sbi->options.allow_utime;
354 354
355 if (current_fsuid() != inode->i_uid) { 355 if (!uid_eq(current_fsuid(), inode->i_uid)) {
356 if (in_group_p(inode->i_gid)) 356 if (in_group_p(inode->i_gid))
357 allow_utime >>= 3; 357 allow_utime >>= 3;
358 if (allow_utime & MAY_WRITE) 358 if (allow_utime & MAY_WRITE)
@@ -407,9 +407,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
407 } 407 }
408 408
409 if (((attr->ia_valid & ATTR_UID) && 409 if (((attr->ia_valid & ATTR_UID) &&
410 (attr->ia_uid != sbi->options.fs_uid)) || 410 (!uid_eq(attr->ia_uid, sbi->options.fs_uid))) ||
411 ((attr->ia_valid & ATTR_GID) && 411 ((attr->ia_valid & ATTR_GID) &&
412 (attr->ia_gid != sbi->options.fs_gid)) || 412 (!gid_eq(attr->ia_gid, sbi->options.fs_gid))) ||
413 ((attr->ia_valid & ATTR_MODE) && 413 ((attr->ia_valid & ATTR_MODE) &&
414 (attr->ia_mode & ~FAT_VALID_MODE))) 414 (attr->ia_mode & ~FAT_VALID_MODE)))
415 error = -EPERM; 415 error = -EPERM;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 05e897fe9866..76f60c642c06 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -281,15 +281,42 @@ static inline unsigned long fat_hash(loff_t i_pos)
281 return hash_32(i_pos, FAT_HASH_BITS); 281 return hash_32(i_pos, FAT_HASH_BITS);
282} 282}
283 283
284static void dir_hash_init(struct super_block *sb)
285{
286 struct msdos_sb_info *sbi = MSDOS_SB(sb);
287 int i;
288
289 spin_lock_init(&sbi->dir_hash_lock);
290 for (i = 0; i < FAT_HASH_SIZE; i++)
291 INIT_HLIST_HEAD(&sbi->dir_hashtable[i]);
292}
293
284void fat_attach(struct inode *inode, loff_t i_pos) 294void fat_attach(struct inode *inode, loff_t i_pos)
285{ 295{
286 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 296 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
287 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
288 297
289 spin_lock(&sbi->inode_hash_lock); 298 if (inode->i_ino != MSDOS_ROOT_INO) {
290 MSDOS_I(inode)->i_pos = i_pos; 299 struct hlist_head *head = sbi->inode_hashtable
291 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); 300 + fat_hash(i_pos);
292 spin_unlock(&sbi->inode_hash_lock); 301
302 spin_lock(&sbi->inode_hash_lock);
303 MSDOS_I(inode)->i_pos = i_pos;
304 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
305 spin_unlock(&sbi->inode_hash_lock);
306 }
307
308 /* If NFS support is enabled, cache the mapping of start cluster
309 * to directory inode. This is used during reconnection of
310 * dentries to the filesystem root.
311 */
312 if (S_ISDIR(inode->i_mode) && sbi->options.nfs) {
313 struct hlist_head *d_head = sbi->dir_hashtable;
314 d_head += fat_dir_hash(MSDOS_I(inode)->i_logstart);
315
316 spin_lock(&sbi->dir_hash_lock);
317 hlist_add_head(&MSDOS_I(inode)->i_dir_hash, d_head);
318 spin_unlock(&sbi->dir_hash_lock);
319 }
293} 320}
294EXPORT_SYMBOL_GPL(fat_attach); 321EXPORT_SYMBOL_GPL(fat_attach);
295 322
@@ -300,6 +327,12 @@ void fat_detach(struct inode *inode)
300 MSDOS_I(inode)->i_pos = 0; 327 MSDOS_I(inode)->i_pos = 0;
301 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 328 hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
302 spin_unlock(&sbi->inode_hash_lock); 329 spin_unlock(&sbi->inode_hash_lock);
330
331 if (S_ISDIR(inode->i_mode) && sbi->options.nfs) {
332 spin_lock(&sbi->dir_hash_lock);
333 hlist_del_init(&MSDOS_I(inode)->i_dir_hash);
334 spin_unlock(&sbi->dir_hash_lock);
335 }
303} 336}
304EXPORT_SYMBOL_GPL(fat_detach); 337EXPORT_SYMBOL_GPL(fat_detach);
305 338
@@ -504,6 +537,7 @@ static void init_once(void *foo)
504 ei->cache_valid_id = FAT_CACHE_VALID + 1; 537 ei->cache_valid_id = FAT_CACHE_VALID + 1;
505 INIT_LIST_HEAD(&ei->cache_lru); 538 INIT_LIST_HEAD(&ei->cache_lru);
506 INIT_HLIST_NODE(&ei->i_fat_hash); 539 INIT_HLIST_NODE(&ei->i_fat_hash);
540 INIT_HLIST_NODE(&ei->i_dir_hash);
507 inode_init_once(&ei->vfs_inode); 541 inode_init_once(&ei->vfs_inode);
508} 542}
509 543
@@ -521,6 +555,11 @@ static int __init fat_init_inodecache(void)
521 555
522static void __exit fat_destroy_inodecache(void) 556static void __exit fat_destroy_inodecache(void)
523{ 557{
558 /*
559 * Make sure all delayed rcu free inodes are flushed before we
560 * destroy cache.
561 */
562 rcu_barrier();
524 kmem_cache_destroy(fat_inode_cachep); 563 kmem_cache_destroy(fat_inode_cachep);
525} 564}
526 565
@@ -663,125 +702,9 @@ static const struct super_operations fat_sops = {
663 .show_options = fat_show_options, 702 .show_options = fat_show_options,
664}; 703};
665 704
666/*
667 * a FAT file handle with fhtype 3 is
668 * 0/ i_ino - for fast, reliable lookup if still in the cache
669 * 1/ i_generation - to see if i_ino is still valid
670 * bit 0 == 0 iff directory
671 * 2/ i_pos(8-39) - if ino has changed, but still in cache
672 * 3/ i_pos(4-7)|i_logstart - to semi-verify inode found at i_pos
673 * 4/ i_pos(0-3)|parent->i_logstart - maybe used to hunt for the file on disc
674 *
675 * Hack for NFSv2: Maximum FAT entry number is 28bits and maximum
676 * i_pos is 40bits (blocknr(32) + dir offset(8)), so two 4bits
677 * of i_logstart is used to store the directory entry offset.
678 */
679
680static struct dentry *fat_fh_to_dentry(struct super_block *sb,
681 struct fid *fid, int fh_len, int fh_type)
682{
683 struct inode *inode = NULL;
684 u32 *fh = fid->raw;
685
686 if (fh_len < 5 || fh_type != 3)
687 return NULL;
688
689 inode = ilookup(sb, fh[0]);
690 if (!inode || inode->i_generation != fh[1]) {
691 if (inode)
692 iput(inode);
693 inode = NULL;
694 }
695 if (!inode) {
696 loff_t i_pos;
697 int i_logstart = fh[3] & 0x0fffffff;
698
699 i_pos = (loff_t)fh[2] << 8;
700 i_pos |= ((fh[3] >> 24) & 0xf0) | (fh[4] >> 28);
701
702 /* try 2 - see if i_pos is in F-d-c
703 * require i_logstart to be the same
704 * Will fail if you truncate and then re-write
705 */
706
707 inode = fat_iget(sb, i_pos);
708 if (inode && MSDOS_I(inode)->i_logstart != i_logstart) {
709 iput(inode);
710 inode = NULL;
711 }
712 }
713
714 /*
715 * For now, do nothing if the inode is not found.
716 *
717 * What we could do is:
718 *
719 * - follow the file starting at fh[4], and record the ".." entry,
720 * and the name of the fh[2] entry.
721 * - then follow the ".." file finding the next step up.
722 *
723 * This way we build a path to the root of the tree. If this works, we
724 * lookup the path and so get this inode into the cache. Finally try
725 * the fat_iget lookup again. If that fails, then we are totally out
726 * of luck. But all that is for another day
727 */
728 return d_obtain_alias(inode);
729}
730
731static int
732fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
733{
734 int len = *lenp;
735 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
736 loff_t i_pos;
737
738 if (len < 5) {
739 *lenp = 5;
740 return 255; /* no room */
741 }
742
743 i_pos = fat_i_pos_read(sbi, inode);
744 *lenp = 5;
745 fh[0] = inode->i_ino;
746 fh[1] = inode->i_generation;
747 fh[2] = i_pos >> 8;
748 fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
749 fh[4] = (i_pos & 0x0f) << 28;
750 if (parent)
751 fh[4] |= MSDOS_I(parent)->i_logstart;
752 return 3;
753}
754
755static struct dentry *fat_get_parent(struct dentry *child)
756{
757 struct super_block *sb = child->d_sb;
758 struct buffer_head *bh;
759 struct msdos_dir_entry *de;
760 loff_t i_pos;
761 struct dentry *parent;
762 struct inode *inode;
763 int err;
764
765 lock_super(sb);
766
767 err = fat_get_dotdot_entry(child->d_inode, &bh, &de, &i_pos);
768 if (err) {
769 parent = ERR_PTR(err);
770 goto out;
771 }
772 inode = fat_build_inode(sb, de, i_pos);
773 brelse(bh);
774
775 parent = d_obtain_alias(inode);
776out:
777 unlock_super(sb);
778
779 return parent;
780}
781
782static const struct export_operations fat_export_ops = { 705static const struct export_operations fat_export_ops = {
783 .encode_fh = fat_encode_fh,
784 .fh_to_dentry = fat_fh_to_dentry, 706 .fh_to_dentry = fat_fh_to_dentry,
707 .fh_to_parent = fat_fh_to_parent,
785 .get_parent = fat_get_parent, 708 .get_parent = fat_get_parent,
786}; 709};
787 710
@@ -791,10 +714,12 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
791 struct fat_mount_options *opts = &sbi->options; 714 struct fat_mount_options *opts = &sbi->options;
792 int isvfat = opts->isvfat; 715 int isvfat = opts->isvfat;
793 716
794 if (opts->fs_uid != 0) 717 if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID))
795 seq_printf(m, ",uid=%u", opts->fs_uid); 718 seq_printf(m, ",uid=%u",
796 if (opts->fs_gid != 0) 719 from_kuid_munged(&init_user_ns, opts->fs_uid));
797 seq_printf(m, ",gid=%u", opts->fs_gid); 720 if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID))
721 seq_printf(m, ",gid=%u",
722 from_kgid_munged(&init_user_ns, opts->fs_gid));
798 seq_printf(m, ",fmask=%04o", opts->fs_fmask); 723 seq_printf(m, ",fmask=%04o", opts->fs_fmask);
799 seq_printf(m, ",dmask=%04o", opts->fs_dmask); 724 seq_printf(m, ",dmask=%04o", opts->fs_dmask);
800 if (opts->allow_utime) 725 if (opts->allow_utime)
@@ -829,6 +754,8 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
829 seq_puts(m, ",usefree"); 754 seq_puts(m, ",usefree");
830 if (opts->quiet) 755 if (opts->quiet)
831 seq_puts(m, ",quiet"); 756 seq_puts(m, ",quiet");
757 if (opts->nfs)
758 seq_puts(m, ",nfs");
832 if (opts->showexec) 759 if (opts->showexec)
833 seq_puts(m, ",showexec"); 760 seq_puts(m, ",showexec");
834 if (opts->sys_immutable) 761 if (opts->sys_immutable)
@@ -873,7 +800,7 @@ enum {
873 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 800 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
874 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 801 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
875 Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 802 Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
876 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err, 803 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_err,
877}; 804};
878 805
879static const match_table_t fat_tokens = { 806static const match_table_t fat_tokens = {
@@ -902,6 +829,7 @@ static const match_table_t fat_tokens = {
902 {Opt_err_panic, "errors=panic"}, 829 {Opt_err_panic, "errors=panic"},
903 {Opt_err_ro, "errors=remount-ro"}, 830 {Opt_err_ro, "errors=remount-ro"},
904 {Opt_discard, "discard"}, 831 {Opt_discard, "discard"},
832 {Opt_nfs, "nfs"},
905 {Opt_obsolete, "conv=binary"}, 833 {Opt_obsolete, "conv=binary"},
906 {Opt_obsolete, "conv=text"}, 834 {Opt_obsolete, "conv=text"},
907 {Opt_obsolete, "conv=auto"}, 835 {Opt_obsolete, "conv=auto"},
@@ -982,6 +910,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
982 opts->numtail = 1; 910 opts->numtail = 1;
983 opts->usefree = opts->nocase = 0; 911 opts->usefree = opts->nocase = 0;
984 opts->tz_utc = 0; 912 opts->tz_utc = 0;
913 opts->nfs = 0;
985 opts->errors = FAT_ERRORS_RO; 914 opts->errors = FAT_ERRORS_RO;
986 *debug = 0; 915 *debug = 0;
987 916
@@ -1037,12 +966,16 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
1037 case Opt_uid: 966 case Opt_uid:
1038 if (match_int(&args[0], &option)) 967 if (match_int(&args[0], &option))
1039 return 0; 968 return 0;
1040 opts->fs_uid = option; 969 opts->fs_uid = make_kuid(current_user_ns(), option);
970 if (!uid_valid(opts->fs_uid))
971 return 0;
1041 break; 972 break;
1042 case Opt_gid: 973 case Opt_gid:
1043 if (match_int(&args[0], &option)) 974 if (match_int(&args[0], &option))
1044 return 0; 975 return 0;
1045 opts->fs_gid = option; 976 opts->fs_gid = make_kgid(current_user_ns(), option);
977 if (!gid_valid(opts->fs_gid))
978 return 0;
1046 break; 979 break;
1047 case Opt_umask: 980 case Opt_umask:
1048 if (match_octal(&args[0], &option)) 981 if (match_octal(&args[0], &option))
@@ -1142,6 +1075,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
1142 case Opt_discard: 1075 case Opt_discard:
1143 opts->discard = 1; 1076 opts->discard = 1;
1144 break; 1077 break;
1078 case Opt_nfs:
1079 opts->nfs = 1;
1080 break;
1145 1081
1146 /* obsolete mount options */ 1082 /* obsolete mount options */
1147 case Opt_obsolete: 1083 case Opt_obsolete:
@@ -1432,6 +1368,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1432 1368
1433 /* set up enough so that it can read an inode */ 1369 /* set up enough so that it can read an inode */
1434 fat_hash_init(sb); 1370 fat_hash_init(sb);
1371 dir_hash_init(sb);
1435 fat_ent_access_init(sb); 1372 fat_ent_access_init(sb);
1436 1373
1437 /* 1374 /*
@@ -1486,6 +1423,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1486 } 1423 }
1487 error = -ENOMEM; 1424 error = -ENOMEM;
1488 insert_inode_hash(root_inode); 1425 insert_inode_hash(root_inode);
1426 fat_attach(root_inode, 0);
1489 sb->s_root = d_make_root(root_inode); 1427 sb->s_root = d_make_root(root_inode);
1490 if (!sb->s_root) { 1428 if (!sb->s_root) {
1491 fat_msg(sb, KERN_ERR, "get root inode failed"); 1429 fat_msg(sb, KERN_ERR, "get root inode failed");
@@ -1525,18 +1463,14 @@ static int writeback_inode(struct inode *inode)
1525{ 1463{
1526 1464
1527 int ret; 1465 int ret;
1528 struct address_space *mapping = inode->i_mapping; 1466
1529 struct writeback_control wbc = { 1467 /* if we used wait=1, sync_inode_metadata waits for the io for the
1530 .sync_mode = WB_SYNC_NONE, 1468 * inode to finish. So wait=0 is sent down to sync_inode_metadata
1531 .nr_to_write = 0,
1532 };
1533 /* if we used WB_SYNC_ALL, sync_inode waits for the io for the
1534 * inode to finish. So WB_SYNC_NONE is sent down to sync_inode
1535 * and filemap_fdatawrite is used for the data blocks 1469 * and filemap_fdatawrite is used for the data blocks
1536 */ 1470 */
1537 ret = sync_inode(inode, &wbc); 1471 ret = sync_inode_metadata(inode, 0);
1538 if (!ret) 1472 if (!ret)
1539 ret = filemap_fdatawrite(mapping); 1473 ret = filemap_fdatawrite(inode->i_mapping);
1540 return ret; 1474 return ret;
1541} 1475}
1542 1476
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index b0e12bf9f4a1..c1055e778fff 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -407,7 +407,7 @@ out:
407static int msdos_unlink(struct inode *dir, struct dentry *dentry) 407static int msdos_unlink(struct inode *dir, struct dentry *dentry)
408{ 408{
409 struct inode *inode = dentry->d_inode; 409 struct inode *inode = dentry->d_inode;
410 struct super_block *sb= inode->i_sb; 410 struct super_block *sb = inode->i_sb;
411 struct fat_slot_info sinfo; 411 struct fat_slot_info sinfo;
412 int err; 412 int err;
413 413
@@ -440,7 +440,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
440 struct inode *old_inode, *new_inode; 440 struct inode *old_inode, *new_inode;
441 struct fat_slot_info old_sinfo, sinfo; 441 struct fat_slot_info old_sinfo, sinfo;
442 struct timespec ts; 442 struct timespec ts;
443 loff_t dotdot_i_pos, new_i_pos; 443 loff_t new_i_pos;
444 int err, old_attrs, is_dir, update_dotdot, corrupt = 0; 444 int err, old_attrs, is_dir, update_dotdot, corrupt = 0;
445 445
446 old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; 446 old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
@@ -456,8 +456,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
456 is_dir = S_ISDIR(old_inode->i_mode); 456 is_dir = S_ISDIR(old_inode->i_mode);
457 update_dotdot = (is_dir && old_dir != new_dir); 457 update_dotdot = (is_dir && old_dir != new_dir);
458 if (update_dotdot) { 458 if (update_dotdot) {
459 if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, 459 if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) {
460 &dotdot_i_pos) < 0) {
461 err = -EIO; 460 err = -EIO;
462 goto out; 461 goto out;
463 } 462 }
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6a6d8c0715a1..e535dd75b986 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -914,7 +914,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
914 struct inode *old_inode, *new_inode; 914 struct inode *old_inode, *new_inode;
915 struct fat_slot_info old_sinfo, sinfo; 915 struct fat_slot_info old_sinfo, sinfo;
916 struct timespec ts; 916 struct timespec ts;
917 loff_t dotdot_i_pos, new_i_pos; 917 loff_t new_i_pos;
918 int err, is_dir, update_dotdot, corrupt = 0; 918 int err, is_dir, update_dotdot, corrupt = 0;
919 struct super_block *sb = old_dir->i_sb; 919 struct super_block *sb = old_dir->i_sb;
920 920
@@ -929,8 +929,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
929 is_dir = S_ISDIR(old_inode->i_mode); 929 is_dir = S_ISDIR(old_inode->i_mode);
930 update_dotdot = (is_dir && old_dir != new_dir); 930 update_dotdot = (is_dir && old_dir != new_dir);
931 if (update_dotdot) { 931 if (update_dotdot) {
932 if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de, 932 if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) {
933 &dotdot_i_pos) < 0) {
934 err = -EIO; 933 err = -EIO;
935 goto out; 934 goto out;
936 } 935 }
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
new file mode 100644
index 000000000000..ef4b5faba87b
--- /dev/null
+++ b/fs/fat/nfs.c
@@ -0,0 +1,101 @@
1/* fs/fat/nfs.c
2 *
3 * This software is licensed under the terms of the GNU General Public
4 * License version 2, as published by the Free Software Foundation, and
5 * may be copied, distributed, and modified under those terms.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
14#include <linux/exportfs.h>
15#include "fat.h"
16
17/**
18 * Look up a directory inode given its starting cluster.
19 */
20static struct inode *fat_dget(struct super_block *sb, int i_logstart)
21{
22 struct msdos_sb_info *sbi = MSDOS_SB(sb);
23 struct hlist_head *head;
24 struct hlist_node *_p;
25 struct msdos_inode_info *i;
26 struct inode *inode = NULL;
27
28 head = sbi->dir_hashtable + fat_dir_hash(i_logstart);
29 spin_lock(&sbi->dir_hash_lock);
30 hlist_for_each_entry(i, _p, head, i_dir_hash) {
31 BUG_ON(i->vfs_inode.i_sb != sb);
32 if (i->i_logstart != i_logstart)
33 continue;
34 inode = igrab(&i->vfs_inode);
35 if (inode)
36 break;
37 }
38 spin_unlock(&sbi->dir_hash_lock);
39 return inode;
40}
41
42static struct inode *fat_nfs_get_inode(struct super_block *sb,
43 u64 ino, u32 generation)
44{
45 struct inode *inode;
46
47 if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO))
48 return NULL;
49
50 inode = ilookup(sb, ino);
51 if (inode && generation && (inode->i_generation != generation)) {
52 iput(inode);
53 inode = NULL;
54 }
55
56 return inode;
57}
58
59/**
60 * Map a NFS file handle to a corresponding dentry.
61 * The dentry may or may not be connected to the filesystem root.
62 */
63struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
64 int fh_len, int fh_type)
65{
66 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
67 fat_nfs_get_inode);
68}
69
70/*
71 * Find the parent for a file specified by NFS handle.
72 * This requires that the handle contain the i_ino of the parent.
73 */
74struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
75 int fh_len, int fh_type)
76{
77 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
78 fat_nfs_get_inode);
79}
80
81/*
82 * Find the parent for a directory that is not currently connected to
83 * the filesystem root.
84 *
85 * On entry, the caller holds child_dir->d_inode->i_mutex.
86 */
87struct dentry *fat_get_parent(struct dentry *child_dir)
88{
89 struct super_block *sb = child_dir->d_sb;
90 struct buffer_head *bh = NULL;
91 struct msdos_dir_entry *de;
92 struct inode *parent_inode = NULL;
93
94 if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) {
95 int parent_logstart = fat_get_start(MSDOS_SB(sb), de);
96 parent_inode = fat_dget(sb, parent_logstart);
97 }
98 brelse(bh);
99
100 return d_obtain_alias(parent_inode);
101}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 887b5ba8c9b5..71a600a19f06 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -26,124 +26,6 @@
26#include <asm/siginfo.h> 26#include <asm/siginfo.h>
27#include <asm/uaccess.h> 27#include <asm/uaccess.h>
28 28
29void set_close_on_exec(unsigned int fd, int flag)
30{
31 struct files_struct *files = current->files;
32 struct fdtable *fdt;
33 spin_lock(&files->file_lock);
34 fdt = files_fdtable(files);
35 if (flag)
36 __set_close_on_exec(fd, fdt);
37 else
38 __clear_close_on_exec(fd, fdt);
39 spin_unlock(&files->file_lock);
40}
41
42static bool get_close_on_exec(unsigned int fd)
43{
44 struct files_struct *files = current->files;
45 struct fdtable *fdt;
46 bool res;
47 rcu_read_lock();
48 fdt = files_fdtable(files);
49 res = close_on_exec(fd, fdt);
50 rcu_read_unlock();
51 return res;
52}
53
54SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
55{
56 int err = -EBADF;
57 struct file * file, *tofree;
58 struct files_struct * files = current->files;
59 struct fdtable *fdt;
60
61 if ((flags & ~O_CLOEXEC) != 0)
62 return -EINVAL;
63
64 if (unlikely(oldfd == newfd))
65 return -EINVAL;
66
67 spin_lock(&files->file_lock);
68 err = expand_files(files, newfd);
69 file = fcheck(oldfd);
70 if (unlikely(!file))
71 goto Ebadf;
72 if (unlikely(err < 0)) {
73 if (err == -EMFILE)
74 goto Ebadf;
75 goto out_unlock;
76 }
77 /*
78 * We need to detect attempts to do dup2() over allocated but still
79 * not finished descriptor. NB: OpenBSD avoids that at the price of
80 * extra work in their equivalent of fget() - they insert struct
81 * file immediately after grabbing descriptor, mark it larval if
82 * more work (e.g. actual opening) is needed and make sure that
83 * fget() treats larval files as absent. Potentially interesting,
84 * but while extra work in fget() is trivial, locking implications
85 * and amount of surgery on open()-related paths in VFS are not.
86 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
87 * deadlocks in rather amusing ways, AFAICS. All of that is out of
88 * scope of POSIX or SUS, since neither considers shared descriptor
89 * tables and this condition does not arise without those.
90 */
91 err = -EBUSY;
92 fdt = files_fdtable(files);
93 tofree = fdt->fd[newfd];
94 if (!tofree && fd_is_open(newfd, fdt))
95 goto out_unlock;
96 get_file(file);
97 rcu_assign_pointer(fdt->fd[newfd], file);
98 __set_open_fd(newfd, fdt);
99 if (flags & O_CLOEXEC)
100 __set_close_on_exec(newfd, fdt);
101 else
102 __clear_close_on_exec(newfd, fdt);
103 spin_unlock(&files->file_lock);
104
105 if (tofree)
106 filp_close(tofree, files);
107
108 return newfd;
109
110Ebadf:
111 err = -EBADF;
112out_unlock:
113 spin_unlock(&files->file_lock);
114 return err;
115}
116
117SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
118{
119 if (unlikely(newfd == oldfd)) { /* corner case */
120 struct files_struct *files = current->files;
121 int retval = oldfd;
122
123 rcu_read_lock();
124 if (!fcheck_files(files, oldfd))
125 retval = -EBADF;
126 rcu_read_unlock();
127 return retval;
128 }
129 return sys_dup3(oldfd, newfd, 0);
130}
131
132SYSCALL_DEFINE1(dup, unsigned int, fildes)
133{
134 int ret = -EBADF;
135 struct file *file = fget_raw(fildes);
136
137 if (file) {
138 ret = get_unused_fd();
139 if (ret >= 0)
140 fd_install(ret, file);
141 else
142 fput(file);
143 }
144 return ret;
145}
146
147#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) 29#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
148 30
149static int setfl(int fd, struct file * filp, unsigned long arg) 31static int setfl(int fd, struct file * filp, unsigned long arg)
@@ -267,7 +149,7 @@ pid_t f_getown(struct file *filp)
267 149
268static int f_setown_ex(struct file *filp, unsigned long arg) 150static int f_setown_ex(struct file *filp, unsigned long arg)
269{ 151{
270 struct f_owner_ex * __user owner_p = (void * __user)arg; 152 struct f_owner_ex __user *owner_p = (void __user *)arg;
271 struct f_owner_ex owner; 153 struct f_owner_ex owner;
272 struct pid *pid; 154 struct pid *pid;
273 int type; 155 int type;
@@ -307,7 +189,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
307 189
308static int f_getown_ex(struct file *filp, unsigned long arg) 190static int f_getown_ex(struct file *filp, unsigned long arg)
309{ 191{
310 struct f_owner_ex * __user owner_p = (void * __user)arg; 192 struct f_owner_ex __user *owner_p = (void __user *)arg;
311 struct f_owner_ex owner; 193 struct f_owner_ex owner;
312 int ret = 0; 194 int ret = 0;
313 195
@@ -345,7 +227,7 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
345static int f_getowner_uids(struct file *filp, unsigned long arg) 227static int f_getowner_uids(struct file *filp, unsigned long arg)
346{ 228{
347 struct user_namespace *user_ns = current_user_ns(); 229 struct user_namespace *user_ns = current_user_ns();
348 uid_t * __user dst = (void * __user)arg; 230 uid_t __user *dst = (void __user *)arg;
349 uid_t src[2]; 231 uid_t src[2];
350 int err; 232 int err;
351 233
@@ -373,14 +255,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
373 255
374 switch (cmd) { 256 switch (cmd) {
375 case F_DUPFD: 257 case F_DUPFD:
258 err = f_dupfd(arg, filp, 0);
259 break;
376 case F_DUPFD_CLOEXEC: 260 case F_DUPFD_CLOEXEC:
377 if (arg >= rlimit(RLIMIT_NOFILE)) 261 err = f_dupfd(arg, filp, O_CLOEXEC);
378 break;
379 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
380 if (err >= 0) {
381 get_file(filp);
382 fd_install(err, filp);
383 }
384 break; 262 break;
385 case F_GETFD: 263 case F_GETFD:
386 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 264 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
@@ -470,25 +348,23 @@ static int check_fcntl_cmd(unsigned cmd)
470 348
471SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 349SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
472{ 350{
473 struct file *filp; 351 struct fd f = fdget_raw(fd);
474 int fput_needed;
475 long err = -EBADF; 352 long err = -EBADF;
476 353
477 filp = fget_raw_light(fd, &fput_needed); 354 if (!f.file)
478 if (!filp)
479 goto out; 355 goto out;
480 356
481 if (unlikely(filp->f_mode & FMODE_PATH)) { 357 if (unlikely(f.file->f_mode & FMODE_PATH)) {
482 if (!check_fcntl_cmd(cmd)) 358 if (!check_fcntl_cmd(cmd))
483 goto out1; 359 goto out1;
484 } 360 }
485 361
486 err = security_file_fcntl(filp, cmd, arg); 362 err = security_file_fcntl(f.file, cmd, arg);
487 if (!err) 363 if (!err)
488 err = do_fcntl(fd, cmd, arg, filp); 364 err = do_fcntl(fd, cmd, arg, f.file);
489 365
490out1: 366out1:
491 fput_light(filp, fput_needed); 367 fdput(f);
492out: 368out:
493 return err; 369 return err;
494} 370}
@@ -497,38 +373,36 @@ out:
497SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, 373SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
498 unsigned long, arg) 374 unsigned long, arg)
499{ 375{
500 struct file * filp; 376 struct fd f = fdget_raw(fd);
501 long err = -EBADF; 377 long err = -EBADF;
502 int fput_needed;
503 378
504 filp = fget_raw_light(fd, &fput_needed); 379 if (!f.file)
505 if (!filp)
506 goto out; 380 goto out;
507 381
508 if (unlikely(filp->f_mode & FMODE_PATH)) { 382 if (unlikely(f.file->f_mode & FMODE_PATH)) {
509 if (!check_fcntl_cmd(cmd)) 383 if (!check_fcntl_cmd(cmd))
510 goto out1; 384 goto out1;
511 } 385 }
512 386
513 err = security_file_fcntl(filp, cmd, arg); 387 err = security_file_fcntl(f.file, cmd, arg);
514 if (err) 388 if (err)
515 goto out1; 389 goto out1;
516 390
517 switch (cmd) { 391 switch (cmd) {
518 case F_GETLK64: 392 case F_GETLK64:
519 err = fcntl_getlk64(filp, (struct flock64 __user *) arg); 393 err = fcntl_getlk64(f.file, (struct flock64 __user *) arg);
520 break; 394 break;
521 case F_SETLK64: 395 case F_SETLK64:
522 case F_SETLKW64: 396 case F_SETLKW64:
523 err = fcntl_setlk64(fd, filp, cmd, 397 err = fcntl_setlk64(fd, f.file, cmd,
524 (struct flock64 __user *) arg); 398 (struct flock64 __user *) arg);
525 break; 399 break;
526 default: 400 default:
527 err = do_fcntl(fd, cmd, arg, filp); 401 err = do_fcntl(fd, cmd, arg, f.file);
528 break; 402 break;
529 } 403 }
530out1: 404out1:
531 fput_light(filp, fput_needed); 405 fdput(f);
532out: 406out:
533 return err; 407 return err;
534} 408}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index a48e4a139be1..f775bfdd6e4a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -113,24 +113,21 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
113 113
114static struct vfsmount *get_vfsmount_from_fd(int fd) 114static struct vfsmount *get_vfsmount_from_fd(int fd)
115{ 115{
116 struct path path; 116 struct vfsmount *mnt;
117 117
118 if (fd == AT_FDCWD) { 118 if (fd == AT_FDCWD) {
119 struct fs_struct *fs = current->fs; 119 struct fs_struct *fs = current->fs;
120 spin_lock(&fs->lock); 120 spin_lock(&fs->lock);
121 path = fs->pwd; 121 mnt = mntget(fs->pwd.mnt);
122 mntget(path.mnt);
123 spin_unlock(&fs->lock); 122 spin_unlock(&fs->lock);
124 } else { 123 } else {
125 int fput_needed; 124 struct fd f = fdget(fd);
126 struct file *file = fget_light(fd, &fput_needed); 125 if (!f.file)
127 if (!file)
128 return ERR_PTR(-EBADF); 126 return ERR_PTR(-EBADF);
129 path = file->f_path; 127 mnt = mntget(f.file->f_path.mnt);
130 mntget(path.mnt); 128 fdput(f);
131 fput_light(file, fput_needed);
132 } 129 }
133 return path.mnt; 130 return mnt;
134} 131}
135 132
136static int vfs_dentry_acceptable(void *context, struct dentry *dentry) 133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
diff --git a/fs/file.c b/fs/file.c
index ba3f6053025c..0f1bda4bebfa 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/syscalls.h>
9#include <linux/export.h> 10#include <linux/export.h>
10#include <linux/fs.h> 11#include <linux/fs.h>
11#include <linux/mm.h> 12#include <linux/mm.h>
@@ -84,22 +85,14 @@ static void free_fdtable_work(struct work_struct *work)
84 } 85 }
85} 86}
86 87
87void free_fdtable_rcu(struct rcu_head *rcu) 88static void free_fdtable_rcu(struct rcu_head *rcu)
88{ 89{
89 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 90 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
90 struct fdtable_defer *fddef; 91 struct fdtable_defer *fddef;
91 92
92 BUG_ON(!fdt); 93 BUG_ON(!fdt);
94 BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
93 95
94 if (fdt->max_fds <= NR_OPEN_DEFAULT) {
95 /*
96 * This fdtable is embedded in the files structure and that
97 * structure itself is getting destroyed.
98 */
99 kmem_cache_free(files_cachep,
100 container_of(fdt, struct files_struct, fdtab));
101 return;
102 }
103 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { 96 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
104 kfree(fdt->fd); 97 kfree(fdt->fd);
105 kfree(fdt->open_fds); 98 kfree(fdt->open_fds);
@@ -229,7 +222,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
229 copy_fdtable(new_fdt, cur_fdt); 222 copy_fdtable(new_fdt, cur_fdt);
230 rcu_assign_pointer(files->fdt, new_fdt); 223 rcu_assign_pointer(files->fdt, new_fdt);
231 if (cur_fdt->max_fds > NR_OPEN_DEFAULT) 224 if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
232 free_fdtable(cur_fdt); 225 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
233 } else { 226 } else {
234 /* Somebody else expanded, so undo our attempt */ 227 /* Somebody else expanded, so undo our attempt */
235 __free_fdtable(new_fdt); 228 __free_fdtable(new_fdt);
@@ -245,19 +238,12 @@ static int expand_fdtable(struct files_struct *files, int nr)
245 * expanded and execution may have blocked. 238 * expanded and execution may have blocked.
246 * The files->file_lock should be held on entry, and will be held on exit. 239 * The files->file_lock should be held on entry, and will be held on exit.
247 */ 240 */
248int expand_files(struct files_struct *files, int nr) 241static int expand_files(struct files_struct *files, int nr)
249{ 242{
250 struct fdtable *fdt; 243 struct fdtable *fdt;
251 244
252 fdt = files_fdtable(files); 245 fdt = files_fdtable(files);
253 246
254 /*
255 * N.B. For clone tasks sharing a files structure, this test
256 * will limit the total number of files that can be opened.
257 */
258 if (nr >= rlimit(RLIMIT_NOFILE))
259 return -EMFILE;
260
261 /* Do we need to expand? */ 247 /* Do we need to expand? */
262 if (nr < fdt->max_fds) 248 if (nr < fdt->max_fds)
263 return 0; 249 return 0;
@@ -270,6 +256,26 @@ int expand_files(struct files_struct *files, int nr)
270 return expand_fdtable(files, nr); 256 return expand_fdtable(files, nr);
271} 257}
272 258
259static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
260{
261 __set_bit(fd, fdt->close_on_exec);
262}
263
264static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
265{
266 __clear_bit(fd, fdt->close_on_exec);
267}
268
269static inline void __set_open_fd(int fd, struct fdtable *fdt)
270{
271 __set_bit(fd, fdt->open_fds);
272}
273
274static inline void __clear_open_fd(int fd, struct fdtable *fdt)
275{
276 __clear_bit(fd, fdt->open_fds);
277}
278
273static int count_open_files(struct fdtable *fdt) 279static int count_open_files(struct fdtable *fdt)
274{ 280{
275 int size = fdt->max_fds; 281 int size = fdt->max_fds;
@@ -395,6 +401,95 @@ out:
395 return NULL; 401 return NULL;
396} 402}
397 403
404static void close_files(struct files_struct * files)
405{
406 int i, j;
407 struct fdtable *fdt;
408
409 j = 0;
410
411 /*
412 * It is safe to dereference the fd table without RCU or
413 * ->file_lock because this is the last reference to the
414 * files structure. But use RCU to shut RCU-lockdep up.
415 */
416 rcu_read_lock();
417 fdt = files_fdtable(files);
418 rcu_read_unlock();
419 for (;;) {
420 unsigned long set;
421 i = j * BITS_PER_LONG;
422 if (i >= fdt->max_fds)
423 break;
424 set = fdt->open_fds[j++];
425 while (set) {
426 if (set & 1) {
427 struct file * file = xchg(&fdt->fd[i], NULL);
428 if (file) {
429 filp_close(file, files);
430 cond_resched();
431 }
432 }
433 i++;
434 set >>= 1;
435 }
436 }
437}
438
439struct files_struct *get_files_struct(struct task_struct *task)
440{
441 struct files_struct *files;
442
443 task_lock(task);
444 files = task->files;
445 if (files)
446 atomic_inc(&files->count);
447 task_unlock(task);
448
449 return files;
450}
451
452void put_files_struct(struct files_struct *files)
453{
454 struct fdtable *fdt;
455
456 if (atomic_dec_and_test(&files->count)) {
457 close_files(files);
458 /* not really needed, since nobody can see us */
459 rcu_read_lock();
460 fdt = files_fdtable(files);
461 rcu_read_unlock();
462 /* free the arrays if they are not embedded */
463 if (fdt != &files->fdtab)
464 __free_fdtable(fdt);
465 kmem_cache_free(files_cachep, files);
466 }
467}
468
469void reset_files_struct(struct files_struct *files)
470{
471 struct task_struct *tsk = current;
472 struct files_struct *old;
473
474 old = tsk->files;
475 task_lock(tsk);
476 tsk->files = files;
477 task_unlock(tsk);
478 put_files_struct(old);
479}
480
481void exit_files(struct task_struct *tsk)
482{
483 struct files_struct * files = tsk->files;
484
485 if (files) {
486 task_lock(tsk);
487 tsk->files = NULL;
488 task_unlock(tsk);
489 put_files_struct(files);
490 }
491}
492
398static void __devinit fdtable_defer_list_init(int cpu) 493static void __devinit fdtable_defer_list_init(int cpu)
399{ 494{
400 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
@@ -424,12 +519,18 @@ struct files_struct init_files = {
424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 519 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
425}; 520};
426 521
522void daemonize_descriptors(void)
523{
524 atomic_inc(&init_files.count);
525 reset_files_struct(&init_files);
526}
527
427/* 528/*
428 * allocate a file descriptor, mark it busy. 529 * allocate a file descriptor, mark it busy.
429 */ 530 */
430int alloc_fd(unsigned start, unsigned flags) 531int __alloc_fd(struct files_struct *files,
532 unsigned start, unsigned end, unsigned flags)
431{ 533{
432 struct files_struct *files = current->files;
433 unsigned int fd; 534 unsigned int fd;
434 int error; 535 int error;
435 struct fdtable *fdt; 536 struct fdtable *fdt;
@@ -444,6 +545,14 @@ repeat:
444 if (fd < fdt->max_fds) 545 if (fd < fdt->max_fds)
445 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); 546 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
446 547
548 /*
549 * N.B. For clone tasks sharing a files structure, this test
550 * will limit the total number of files that can be opened.
551 */
552 error = -EMFILE;
553 if (fd >= end)
554 goto out;
555
447 error = expand_files(files, fd); 556 error = expand_files(files, fd);
448 if (error < 0) 557 if (error < 0)
449 goto out; 558 goto out;
@@ -477,8 +586,424 @@ out:
477 return error; 586 return error;
478} 587}
479 588
480int get_unused_fd(void) 589static int alloc_fd(unsigned start, unsigned flags)
590{
591 return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
592}
593
594int get_unused_fd_flags(unsigned flags)
595{
596 return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
597}
598EXPORT_SYMBOL(get_unused_fd_flags);
599
600static void __put_unused_fd(struct files_struct *files, unsigned int fd)
601{
602 struct fdtable *fdt = files_fdtable(files);
603 __clear_open_fd(fd, fdt);
604 if (fd < files->next_fd)
605 files->next_fd = fd;
606}
607
608void put_unused_fd(unsigned int fd)
609{
610 struct files_struct *files = current->files;
611 spin_lock(&files->file_lock);
612 __put_unused_fd(files, fd);
613 spin_unlock(&files->file_lock);
614}
615
616EXPORT_SYMBOL(put_unused_fd);
617
618/*
619 * Install a file pointer in the fd array.
620 *
621 * The VFS is full of places where we drop the files lock between
622 * setting the open_fds bitmap and installing the file in the file
623 * array. At any such point, we are vulnerable to a dup2() race
624 * installing a file in the array before us. We need to detect this and
625 * fput() the struct file we are about to overwrite in this case.
626 *
627 * It should never happen - if we allow dup2() do it, _really_ bad things
628 * will follow.
629 *
630 * NOTE: __fd_install() variant is really, really low-level; don't
631 * use it unless you are forced to by truly lousy API shoved down
632 * your throat. 'files' *MUST* be either current->files or obtained
633 * by get_files_struct(current) done by whoever had given it to you,
634 * or really bad things will happen. Normally you want to use
635 * fd_install() instead.
636 */
637
638void __fd_install(struct files_struct *files, unsigned int fd,
639 struct file *file)
640{
641 struct fdtable *fdt;
642 spin_lock(&files->file_lock);
643 fdt = files_fdtable(files);
644 BUG_ON(fdt->fd[fd] != NULL);
645 rcu_assign_pointer(fdt->fd[fd], file);
646 spin_unlock(&files->file_lock);
647}
648
649void fd_install(unsigned int fd, struct file *file)
481{ 650{
482 return alloc_fd(0, 0); 651 __fd_install(current->files, fd, file);
652}
653
654EXPORT_SYMBOL(fd_install);
655
656/*
657 * The same warnings as for __alloc_fd()/__fd_install() apply here...
658 */
659int __close_fd(struct files_struct *files, unsigned fd)
660{
661 struct file *file;
662 struct fdtable *fdt;
663
664 spin_lock(&files->file_lock);
665 fdt = files_fdtable(files);
666 if (fd >= fdt->max_fds)
667 goto out_unlock;
668 file = fdt->fd[fd];
669 if (!file)
670 goto out_unlock;
671 rcu_assign_pointer(fdt->fd[fd], NULL);
672 __clear_close_on_exec(fd, fdt);
673 __put_unused_fd(files, fd);
674 spin_unlock(&files->file_lock);
675 return filp_close(file, files);
676
677out_unlock:
678 spin_unlock(&files->file_lock);
679 return -EBADF;
680}
681
682void do_close_on_exec(struct files_struct *files)
683{
684 unsigned i;
685 struct fdtable *fdt;
686
687 /* exec unshares first */
688 BUG_ON(atomic_read(&files->count) != 1);
689 spin_lock(&files->file_lock);
690 for (i = 0; ; i++) {
691 unsigned long set;
692 unsigned fd = i * BITS_PER_LONG;
693 fdt = files_fdtable(files);
694 if (fd >= fdt->max_fds)
695 break;
696 set = fdt->close_on_exec[i];
697 if (!set)
698 continue;
699 fdt->close_on_exec[i] = 0;
700 for ( ; set ; fd++, set >>= 1) {
701 struct file *file;
702 if (!(set & 1))
703 continue;
704 file = fdt->fd[fd];
705 if (!file)
706 continue;
707 rcu_assign_pointer(fdt->fd[fd], NULL);
708 __put_unused_fd(files, fd);
709 spin_unlock(&files->file_lock);
710 filp_close(file, files);
711 cond_resched();
712 spin_lock(&files->file_lock);
713 }
714
715 }
716 spin_unlock(&files->file_lock);
717}
718
719struct file *fget(unsigned int fd)
720{
721 struct file *file;
722 struct files_struct *files = current->files;
723
724 rcu_read_lock();
725 file = fcheck_files(files, fd);
726 if (file) {
727 /* File object ref couldn't be taken */
728 if (file->f_mode & FMODE_PATH ||
729 !atomic_long_inc_not_zero(&file->f_count))
730 file = NULL;
731 }
732 rcu_read_unlock();
733
734 return file;
735}
736
737EXPORT_SYMBOL(fget);
738
739struct file *fget_raw(unsigned int fd)
740{
741 struct file *file;
742 struct files_struct *files = current->files;
743
744 rcu_read_lock();
745 file = fcheck_files(files, fd);
746 if (file) {
747 /* File object ref couldn't be taken */
748 if (!atomic_long_inc_not_zero(&file->f_count))
749 file = NULL;
750 }
751 rcu_read_unlock();
752
753 return file;
754}
755
756EXPORT_SYMBOL(fget_raw);
757
758/*
759 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
760 *
761 * You can use this instead of fget if you satisfy all of the following
762 * conditions:
763 * 1) You must call fput_light before exiting the syscall and returning control
764 * to userspace (i.e. you cannot remember the returned struct file * after
765 * returning to userspace).
766 * 2) You must not call filp_close on the returned struct file * in between
767 * calls to fget_light and fput_light.
768 * 3) You must not clone the current task in between the calls to fget_light
769 * and fput_light.
770 *
771 * The fput_needed flag returned by fget_light should be passed to the
772 * corresponding fput_light.
773 */
774struct file *fget_light(unsigned int fd, int *fput_needed)
775{
776 struct file *file;
777 struct files_struct *files = current->files;
778
779 *fput_needed = 0;
780 if (atomic_read(&files->count) == 1) {
781 file = fcheck_files(files, fd);
782 if (file && (file->f_mode & FMODE_PATH))
783 file = NULL;
784 } else {
785 rcu_read_lock();
786 file = fcheck_files(files, fd);
787 if (file) {
788 if (!(file->f_mode & FMODE_PATH) &&
789 atomic_long_inc_not_zero(&file->f_count))
790 *fput_needed = 1;
791 else
792 /* Didn't get the reference, someone's freed */
793 file = NULL;
794 }
795 rcu_read_unlock();
796 }
797
798 return file;
799}
800EXPORT_SYMBOL(fget_light);
801
802struct file *fget_raw_light(unsigned int fd, int *fput_needed)
803{
804 struct file *file;
805 struct files_struct *files = current->files;
806
807 *fput_needed = 0;
808 if (atomic_read(&files->count) == 1) {
809 file = fcheck_files(files, fd);
810 } else {
811 rcu_read_lock();
812 file = fcheck_files(files, fd);
813 if (file) {
814 if (atomic_long_inc_not_zero(&file->f_count))
815 *fput_needed = 1;
816 else
817 /* Didn't get the reference, someone's freed */
818 file = NULL;
819 }
820 rcu_read_unlock();
821 }
822
823 return file;
824}
825
826void set_close_on_exec(unsigned int fd, int flag)
827{
828 struct files_struct *files = current->files;
829 struct fdtable *fdt;
830 spin_lock(&files->file_lock);
831 fdt = files_fdtable(files);
832 if (flag)
833 __set_close_on_exec(fd, fdt);
834 else
835 __clear_close_on_exec(fd, fdt);
836 spin_unlock(&files->file_lock);
837}
838
839bool get_close_on_exec(unsigned int fd)
840{
841 struct files_struct *files = current->files;
842 struct fdtable *fdt;
843 bool res;
844 rcu_read_lock();
845 fdt = files_fdtable(files);
846 res = close_on_exec(fd, fdt);
847 rcu_read_unlock();
848 return res;
849}
850
851static int do_dup2(struct files_struct *files,
852 struct file *file, unsigned fd, unsigned flags)
853{
854 struct file *tofree;
855 struct fdtable *fdt;
856
857 /*
858 * We need to detect attempts to do dup2() over allocated but still
859 * not finished descriptor. NB: OpenBSD avoids that at the price of
860 * extra work in their equivalent of fget() - they insert struct
861 * file immediately after grabbing descriptor, mark it larval if
862 * more work (e.g. actual opening) is needed and make sure that
863 * fget() treats larval files as absent. Potentially interesting,
864 * but while extra work in fget() is trivial, locking implications
865 * and amount of surgery on open()-related paths in VFS are not.
866 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
867 * deadlocks in rather amusing ways, AFAICS. All of that is out of
868 * scope of POSIX or SUS, since neither considers shared descriptor
869 * tables and this condition does not arise without those.
870 */
871 fdt = files_fdtable(files);
872 tofree = fdt->fd[fd];
873 if (!tofree && fd_is_open(fd, fdt))
874 goto Ebusy;
875 get_file(file);
876 rcu_assign_pointer(fdt->fd[fd], file);
877 __set_open_fd(fd, fdt);
878 if (flags & O_CLOEXEC)
879 __set_close_on_exec(fd, fdt);
880 else
881 __clear_close_on_exec(fd, fdt);
882 spin_unlock(&files->file_lock);
883
884 if (tofree)
885 filp_close(tofree, files);
886
887 return fd;
888
889Ebusy:
890 spin_unlock(&files->file_lock);
891 return -EBUSY;
892}
893
894int replace_fd(unsigned fd, struct file *file, unsigned flags)
895{
896 int err;
897 struct files_struct *files = current->files;
898
899 if (!file)
900 return __close_fd(files, fd);
901
902 if (fd >= rlimit(RLIMIT_NOFILE))
903 return -EMFILE;
904
905 spin_lock(&files->file_lock);
906 err = expand_files(files, fd);
907 if (unlikely(err < 0))
908 goto out_unlock;
909 return do_dup2(files, file, fd, flags);
910
911out_unlock:
912 spin_unlock(&files->file_lock);
913 return err;
914}
915
916SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
917{
918 int err = -EBADF;
919 struct file *file;
920 struct files_struct *files = current->files;
921
922 if ((flags & ~O_CLOEXEC) != 0)
923 return -EINVAL;
924
925 if (newfd >= rlimit(RLIMIT_NOFILE))
926 return -EMFILE;
927
928 spin_lock(&files->file_lock);
929 err = expand_files(files, newfd);
930 file = fcheck(oldfd);
931 if (unlikely(!file))
932 goto Ebadf;
933 if (unlikely(err < 0)) {
934 if (err == -EMFILE)
935 goto Ebadf;
936 goto out_unlock;
937 }
938 return do_dup2(files, file, newfd, flags);
939
940Ebadf:
941 err = -EBADF;
942out_unlock:
943 spin_unlock(&files->file_lock);
944 return err;
945}
946
947SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
948{
949 if (unlikely(newfd == oldfd)) { /* corner case */
950 struct files_struct *files = current->files;
951 int retval = oldfd;
952
953 rcu_read_lock();
954 if (!fcheck_files(files, oldfd))
955 retval = -EBADF;
956 rcu_read_unlock();
957 return retval;
958 }
959 return sys_dup3(oldfd, newfd, 0);
960}
961
962SYSCALL_DEFINE1(dup, unsigned int, fildes)
963{
964 int ret = -EBADF;
965 struct file *file = fget_raw(fildes);
966
967 if (file) {
968 ret = get_unused_fd();
969 if (ret >= 0)
970 fd_install(ret, file);
971 else
972 fput(file);
973 }
974 return ret;
975}
976
977int f_dupfd(unsigned int from, struct file *file, unsigned flags)
978{
979 int err;
980 if (from >= rlimit(RLIMIT_NOFILE))
981 return -EINVAL;
982 err = alloc_fd(from, flags);
983 if (err >= 0) {
984 get_file(file);
985 fd_install(err, file);
986 }
987 return err;
988}
989
990int iterate_fd(struct files_struct *files, unsigned n,
991 int (*f)(const void *, struct file *, unsigned),
992 const void *p)
993{
994 struct fdtable *fdt;
995 struct file *file;
996 int res = 0;
997 if (!files)
998 return 0;
999 spin_lock(&files->file_lock);
1000 fdt = files_fdtable(files);
1001 while (!res && n < fdt->max_fds) {
1002 file = rcu_dereference_check_fdtable(files, fdt->fd[n++]);
1003 if (file)
1004 res = f(p, file, n);
1005 }
1006 spin_unlock(&files->file_lock);
1007 return res;
483} 1008}
484EXPORT_SYMBOL(get_unused_fd); 1009EXPORT_SYMBOL(iterate_fd);
diff --git a/fs/file_table.c b/fs/file_table.c
index 701985e4ccda..dac67923330f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -243,10 +243,10 @@ static void __fput(struct file *file)
243 if (file->f_op && file->f_op->fasync) 243 if (file->f_op && file->f_op->fasync)
244 file->f_op->fasync(-1, file, 0); 244 file->f_op->fasync(-1, file, 0);
245 } 245 }
246 ima_file_free(file);
246 if (file->f_op && file->f_op->release) 247 if (file->f_op && file->f_op->release)
247 file->f_op->release(inode, file); 248 file->f_op->release(inode, file);
248 security_file_free(file); 249 security_file_free(file);
249 ima_file_free(file);
250 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && 250 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
251 !(file->f_mode & FMODE_PATH))) { 251 !(file->f_mode & FMODE_PATH))) {
252 cdev_put(inode->i_cdev); 252 cdev_put(inode->i_cdev);
@@ -339,112 +339,6 @@ void __fput_sync(struct file *file)
339 339
340EXPORT_SYMBOL(fput); 340EXPORT_SYMBOL(fput);
341 341
342struct file *fget(unsigned int fd)
343{
344 struct file *file;
345 struct files_struct *files = current->files;
346
347 rcu_read_lock();
348 file = fcheck_files(files, fd);
349 if (file) {
350 /* File object ref couldn't be taken */
351 if (file->f_mode & FMODE_PATH ||
352 !atomic_long_inc_not_zero(&file->f_count))
353 file = NULL;
354 }
355 rcu_read_unlock();
356
357 return file;
358}
359
360EXPORT_SYMBOL(fget);
361
362struct file *fget_raw(unsigned int fd)
363{
364 struct file *file;
365 struct files_struct *files = current->files;
366
367 rcu_read_lock();
368 file = fcheck_files(files, fd);
369 if (file) {
370 /* File object ref couldn't be taken */
371 if (!atomic_long_inc_not_zero(&file->f_count))
372 file = NULL;
373 }
374 rcu_read_unlock();
375
376 return file;
377}
378
379EXPORT_SYMBOL(fget_raw);
380
381/*
382 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
383 *
384 * You can use this instead of fget if you satisfy all of the following
385 * conditions:
386 * 1) You must call fput_light before exiting the syscall and returning control
387 * to userspace (i.e. you cannot remember the returned struct file * after
388 * returning to userspace).
389 * 2) You must not call filp_close on the returned struct file * in between
390 * calls to fget_light and fput_light.
391 * 3) You must not clone the current task in between the calls to fget_light
392 * and fput_light.
393 *
394 * The fput_needed flag returned by fget_light should be passed to the
395 * corresponding fput_light.
396 */
397struct file *fget_light(unsigned int fd, int *fput_needed)
398{
399 struct file *file;
400 struct files_struct *files = current->files;
401
402 *fput_needed = 0;
403 if (atomic_read(&files->count) == 1) {
404 file = fcheck_files(files, fd);
405 if (file && (file->f_mode & FMODE_PATH))
406 file = NULL;
407 } else {
408 rcu_read_lock();
409 file = fcheck_files(files, fd);
410 if (file) {
411 if (!(file->f_mode & FMODE_PATH) &&
412 atomic_long_inc_not_zero(&file->f_count))
413 *fput_needed = 1;
414 else
415 /* Didn't get the reference, someone's freed */
416 file = NULL;
417 }
418 rcu_read_unlock();
419 }
420
421 return file;
422}
423
424struct file *fget_raw_light(unsigned int fd, int *fput_needed)
425{
426 struct file *file;
427 struct files_struct *files = current->files;
428
429 *fput_needed = 0;
430 if (atomic_read(&files->count) == 1) {
431 file = fcheck_files(files, fd);
432 } else {
433 rcu_read_lock();
434 file = fcheck_files(files, fd);
435 if (file) {
436 if (atomic_long_inc_not_zero(&file->f_count))
437 *fput_needed = 1;
438 else
439 /* Didn't get the reference, someone's freed */
440 file = NULL;
441 }
442 rcu_read_unlock();
443 }
444
445 return file;
446}
447
448void put_filp(struct file *file) 342void put_filp(struct file *file)
449{ 343{
450 if (atomic_long_dec_and_test(&file->f_count)) { 344 if (atomic_long_dec_and_test(&file->f_count)) {
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ef67c95f12d4..f47df72cef17 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -224,8 +224,8 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
224{ 224{
225 225
226 ip->i_mode = vxfs_transmod(vip); 226 ip->i_mode = vxfs_transmod(vip);
227 ip->i_uid = (uid_t)vip->vii_uid; 227 i_uid_write(ip, (uid_t)vip->vii_uid);
228 ip->i_gid = (gid_t)vip->vii_gid; 228 i_gid_write(ip, (gid_t)vip->vii_gid);
229 229
230 set_nlink(ip, vip->vii_nlink); 230 set_nlink(ip, vip->vii_nlink);
231 ip->i_size = vip->vii_size; 231 ip->i_size = vip->vii_size;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index d4fabd26084e..fed2c8afb3a9 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -279,6 +279,11 @@ static void __exit
279vxfs_cleanup(void) 279vxfs_cleanup(void)
280{ 280{
281 unregister_filesystem(&vxfs_fs_type); 281 unregister_filesystem(&vxfs_fs_type);
282 /*
283 * Make sure all delayed rcu free inodes are flushed before we
284 * destroy cache.
285 */
286 rcu_barrier();
282 kmem_cache_destroy(vxfs_inode_cachep); 287 kmem_cache_destroy(vxfs_inode_cachep);
283} 288}
284 289
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index be3efc4f64f4..401b6c6248ae 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
63{ 63{
64 return test_bit(BDI_writeback_running, &bdi->state); 64 return test_bit(BDI_writeback_running, &bdi->state);
65} 65}
66EXPORT_SYMBOL(writeback_in_progress);
66 67
67static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) 68static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
68{ 69{
@@ -438,8 +439,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
438 * setting I_SYNC flag and calling inode_sync_complete() to clear it. 439 * setting I_SYNC flag and calling inode_sync_complete() to clear it.
439 */ 440 */
440static int 441static int
441__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, 442__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
442 struct writeback_control *wbc)
443{ 443{
444 struct address_space *mapping = inode->i_mapping; 444 struct address_space *mapping = inode->i_mapping;
445 long nr_to_write = wbc->nr_to_write; 445 long nr_to_write = wbc->nr_to_write;
@@ -526,7 +526,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
526 inode->i_state |= I_SYNC; 526 inode->i_state |= I_SYNC;
527 spin_unlock(&inode->i_lock); 527 spin_unlock(&inode->i_lock);
528 528
529 ret = __writeback_single_inode(inode, wb, wbc); 529 ret = __writeback_single_inode(inode, wbc);
530 530
531 spin_lock(&wb->list_lock); 531 spin_lock(&wb->list_lock);
532 spin_lock(&inode->i_lock); 532 spin_lock(&inode->i_lock);
@@ -577,10 +577,6 @@ static long writeback_chunk_size(struct backing_dev_info *bdi,
577/* 577/*
578 * Write a portion of b_io inodes which belong to @sb. 578 * Write a portion of b_io inodes which belong to @sb.
579 * 579 *
580 * If @only_this_sb is true, then find and write all such
581 * inodes. Otherwise write only ones which go sequentially
582 * in reverse order.
583 *
584 * Return the number of pages and/or inodes written. 580 * Return the number of pages and/or inodes written.
585 */ 581 */
586static long writeback_sb_inodes(struct super_block *sb, 582static long writeback_sb_inodes(struct super_block *sb,
@@ -673,7 +669,7 @@ static long writeback_sb_inodes(struct super_block *sb,
673 * We use I_SYNC to pin the inode in memory. While it is set 669 * We use I_SYNC to pin the inode in memory. While it is set
674 * evict_inode() will wait so the inode cannot be freed. 670 * evict_inode() will wait so the inode cannot be freed.
675 */ 671 */
676 __writeback_single_inode(inode, wb, &wbc); 672 __writeback_single_inode(inode, &wbc);
677 673
678 work->nr_pages -= write_chunk - wbc.nr_to_write; 674 work->nr_pages -= write_chunk - wbc.nr_to_write;
679 wrote += write_chunk - wbc.nr_to_write; 675 wrote += write_chunk - wbc.nr_to_write;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 03ff5b1eba93..75a20c092dd4 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
117 const char __user *buf, 117 const char __user *buf,
118 size_t count, loff_t *ppos) 118 size_t count, loff_t *ppos)
119{ 119{
120 unsigned val; 120 unsigned uninitialized_var(val);
121 ssize_t ret; 121 ssize_t ret;
122 122
123 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 123 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -154,7 +154,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
154 const char __user *buf, 154 const char __user *buf,
155 size_t count, loff_t *ppos) 155 size_t count, loff_t *ppos)
156{ 156{
157 unsigned val; 157 unsigned uninitialized_var(val);
158 ssize_t ret; 158 ssize_t ret;
159 159
160 ret = fuse_conn_limit_write(file, buf, count, ppos, &val, 160 ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3426521f3205..ee8d55042298 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -396,7 +396,7 @@ err_device:
396err_region: 396err_region:
397 unregister_chrdev_region(devt, 1); 397 unregister_chrdev_region(devt, 1);
398err: 398err:
399 fc->conn_error = 1; 399 fuse_conn_kill(fc);
400 goto out; 400 goto out;
401} 401}
402 402
@@ -532,8 +532,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
532 cdev_del(cc->cdev); 532 cdev_del(cc->cdev);
533 } 533 }
534 534
535 /* kill connection and shutdown channel */
536 fuse_conn_kill(&cc->fc);
537 rc = fuse_dev_release(inode, file); /* puts the base reference */ 535 rc = fuse_dev_release(inode, file); /* puts the base reference */
538 536
539 return rc; 537 return rc;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e8fbe1..8c23fa7a91e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,8 +148,7 @@ static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
148 if (ff->reserved_req) { 148 if (ff->reserved_req) {
149 req = ff->reserved_req; 149 req = ff->reserved_req;
150 ff->reserved_req = NULL; 150 ff->reserved_req = NULL;
151 get_file(file); 151 req->stolen_file = get_file(file);
152 req->stolen_file = file;
153 } 152 }
154 spin_unlock(&fc->lock); 153 spin_unlock(&fc->lock);
155 } while (!req); 154 } while (!req);
@@ -1576,6 +1575,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1576 req->pages[req->num_pages] = page; 1575 req->pages[req->num_pages] = page;
1577 req->num_pages++; 1576 req->num_pages++;
1578 1577
1578 offset = 0;
1579 num -= this_num; 1579 num -= this_num;
1580 total_len += this_num; 1580 total_len += this_num;
1581 index++; 1581 index++;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8964cf3999b2..324bc0850534 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
383 struct fuse_entry_out outentry; 383 struct fuse_entry_out outentry;
384 struct fuse_file *ff; 384 struct fuse_file *ff;
385 385
386 /* Userspace expects S_IFREG in create mode */
387 BUG_ON((mode & S_IFMT) != S_IFREG);
388
386 forget = fuse_alloc_forget(); 389 forget = fuse_alloc_forget();
387 err = -ENOMEM; 390 err = -ENOMEM;
388 if (!forget) 391 if (!forget)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index aba15f1b7ad2..78d2837bc940 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1379,6 +1379,7 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
1379 .close = fuse_vma_close, 1379 .close = fuse_vma_close,
1380 .fault = filemap_fault, 1380 .fault = filemap_fault,
1381 .page_mkwrite = fuse_page_mkwrite, 1381 .page_mkwrite = fuse_page_mkwrite,
1382 .remap_pages = generic_file_remap_pages,
1382}; 1383};
1383 1384
1384static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1385static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ce0a2838ccd0..f0eda124cffb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -367,11 +367,6 @@ void fuse_conn_kill(struct fuse_conn *fc)
367 wake_up_all(&fc->waitq); 367 wake_up_all(&fc->waitq);
368 wake_up_all(&fc->blocked_waitq); 368 wake_up_all(&fc->blocked_waitq);
369 wake_up_all(&fc->reserved_req_waitq); 369 wake_up_all(&fc->reserved_req_waitq);
370 mutex_lock(&fuse_mutex);
371 list_del(&fc->entry);
372 fuse_ctl_remove_conn(fc);
373 mutex_unlock(&fuse_mutex);
374 fuse_bdi_destroy(fc);
375} 370}
376EXPORT_SYMBOL_GPL(fuse_conn_kill); 371EXPORT_SYMBOL_GPL(fuse_conn_kill);
377 372
@@ -380,7 +375,14 @@ static void fuse_put_super(struct super_block *sb)
380 struct fuse_conn *fc = get_fuse_conn_super(sb); 375 struct fuse_conn *fc = get_fuse_conn_super(sb);
381 376
382 fuse_send_destroy(fc); 377 fuse_send_destroy(fc);
378
383 fuse_conn_kill(fc); 379 fuse_conn_kill(fc);
380 mutex_lock(&fuse_mutex);
381 list_del(&fc->entry);
382 fuse_ctl_remove_conn(fc);
383 mutex_unlock(&fuse_mutex);
384 fuse_bdi_destroy(fc);
385
384 fuse_conn_put(fc); 386 fuse_conn_put(fc);
385} 387}
386 388
@@ -1195,6 +1197,12 @@ static void fuse_fs_cleanup(void)
1195{ 1197{
1196 unregister_filesystem(&fuse_fs_type); 1198 unregister_filesystem(&fuse_fs_type);
1197 unregister_fuseblk(); 1199 unregister_fuseblk();
1200
1201 /*
1202 * Make sure all delayed rcu free inodes are flushed before we
1203 * destroy cache.
1204 */
1205 rcu_barrier();
1198 kmem_cache_destroy(fuse_inode_cachep); 1206 kmem_cache_destroy(fuse_inode_cachep);
1199} 1207}
1200 1208
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index d0dddaceac59..b3f3676796d3 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -56,7 +56,7 @@ generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
56 acl = get_cached_acl(dentry->d_inode, type); 56 acl = get_cached_acl(dentry->d_inode, type);
57 if (!acl) 57 if (!acl)
58 return -ENODATA; 58 return -ENODATA;
59 error = posix_acl_to_xattr(acl, buffer, size); 59 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
60 posix_acl_release(acl); 60 posix_acl_release(acl);
61 61
62 return error; 62 return error;
@@ -77,7 +77,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
77 if (!inode_owner_or_capable(inode)) 77 if (!inode_owner_or_capable(inode))
78 return -EPERM; 78 return -EPERM;
79 if (value) { 79 if (value) {
80 acl = posix_acl_from_xattr(value, size); 80 acl = posix_acl_from_xattr(&init_user_ns, value, size);
81 if (IS_ERR(acl)) 81 if (IS_ERR(acl))
82 return PTR_ERR(acl); 82 return PTR_ERR(acl);
83 } 83 }
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index bd4a5892c93c..f850020ad906 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -63,7 +63,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
63 if (len == 0) 63 if (len == 0)
64 return NULL; 64 return NULL;
65 65
66 acl = posix_acl_from_xattr(data, len); 66 acl = posix_acl_from_xattr(&init_user_ns, data, len);
67 kfree(data); 67 kfree(data);
68 return acl; 68 return acl;
69} 69}
@@ -88,13 +88,13 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
88 const char *name = gfs2_acl_name(type); 88 const char *name = gfs2_acl_name(type);
89 89
90 BUG_ON(name == NULL); 90 BUG_ON(name == NULL);
91 len = posix_acl_to_xattr(acl, NULL, 0); 91 len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
92 if (len == 0) 92 if (len == 0)
93 return 0; 93 return 0;
94 data = kmalloc(len, GFP_NOFS); 94 data = kmalloc(len, GFP_NOFS);
95 if (data == NULL) 95 if (data == NULL)
96 return -ENOMEM; 96 return -ENOMEM;
97 error = posix_acl_to_xattr(acl, data, len); 97 error = posix_acl_to_xattr(&init_user_ns, acl, data, len);
98 if (error < 0) 98 if (error < 0)
99 goto out; 99 goto out;
100 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); 100 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
@@ -166,12 +166,12 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
166 if (error) 166 if (error)
167 return error; 167 return error;
168 168
169 len = posix_acl_to_xattr(acl, NULL, 0); 169 len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0);
170 data = kmalloc(len, GFP_NOFS); 170 data = kmalloc(len, GFP_NOFS);
171 error = -ENOMEM; 171 error = -ENOMEM;
172 if (data == NULL) 172 if (data == NULL)
173 goto out; 173 goto out;
174 posix_acl_to_xattr(acl, data, len); 174 posix_acl_to_xattr(&init_user_ns, acl, data, len);
175 error = gfs2_xattr_acl_chmod(ip, attr, data); 175 error = gfs2_xattr_acl_chmod(ip, attr, data);
176 kfree(data); 176 kfree(data);
177 set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); 177 set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl);
@@ -212,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
212 if (acl == NULL) 212 if (acl == NULL)
213 return -ENODATA; 213 return -ENODATA;
214 214
215 error = posix_acl_to_xattr(acl, buffer, size); 215 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
216 posix_acl_release(acl); 216 posix_acl_release(acl);
217 217
218 return error; 218 return error;
@@ -245,7 +245,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
245 if (!value) 245 if (!value)
246 goto set_acl; 246 goto set_acl;
247 247
248 acl = posix_acl_from_xattr(value, size); 248 acl = posix_acl_from_xattr(&init_user_ns, value, size);
249 if (!acl) { 249 if (!acl) {
250 /* 250 /*
251 * acl_set_file(3) may request that we set default ACLs with 251 * acl_set_file(3) may request that we set default ACLs with
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d6526347d386..01c4975da4bc 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
612 struct gfs2_sbd *sdp = GFS2_SB(mapping->host); 612 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
613 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 613 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
615 unsigned requested = 0;
615 int alloc_required; 616 int alloc_required;
616 int error = 0; 617 int error = 0;
617 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 618 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
@@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
641 if (error) 642 if (error)
642 goto out_unlock; 643 goto out_unlock;
643 644
644 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 645 requested = data_blocks + ind_blocks;
646 error = gfs2_inplace_reserve(ip, requested);
645 if (error) 647 if (error)
646 goto out_qunlock; 648 goto out_qunlock;
647 } 649 }
@@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
654 if (&ip->i_inode == sdp->sd_rindex) 656 if (&ip->i_inode == sdp->sd_rindex)
655 rblocks += 2 * RES_STATFS; 657 rblocks += 2 * RES_STATFS;
656 if (alloc_required) 658 if (alloc_required)
657 rblocks += gfs2_rg_blocks(ip); 659 rblocks += gfs2_rg_blocks(ip, requested);
658 660
659 error = gfs2_trans_begin(sdp, rblocks, 661 error = gfs2_trans_begin(sdp, rblocks,
660 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); 662 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
868 brelse(dibh); 870 brelse(dibh);
869failed: 871failed:
870 gfs2_trans_end(sdp); 872 gfs2_trans_end(sdp);
871 if (gfs2_mb_reserved(ip)) 873 gfs2_inplace_release(ip);
872 gfs2_inplace_release(ip);
873 if (ip->i_res->rs_qa_qd_num) 874 if (ip->i_res->rs_qa_qd_num)
874 gfs2_quota_unlock(ip); 875 gfs2_quota_unlock(ip);
875 if (inode == sdp->sd_rindex) { 876 if (inode == sdp->sd_rindex) {
@@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1023 offset, nr_segs, gfs2_get_block_direct, 1024 offset, nr_segs, gfs2_get_block_direct,
1024 NULL, NULL, 0); 1025 NULL, NULL, 0);
1025out: 1026out:
1026 gfs2_glock_dq_m(1, &gh); 1027 gfs2_glock_dq(&gh);
1027 gfs2_holder_uninit(&gh); 1028 gfs2_holder_uninit(&gh);
1028 return rv; 1029 return rv;
1029} 1030}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 49cd7dd4a9fa..1fd3ae237bdd 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
786 goto out_rlist; 786 goto out_rlist;
787 787
788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ 788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
789 gfs2_rs_deltree(ip->i_res); 789 gfs2_rs_deltree(ip, ip->i_res);
790 790
791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + 791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
792 RES_INDIRECT + RES_STATFS + RES_QUOTA, 792 RES_INDIRECT + RES_STATFS + RES_QUOTA,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d1d791ef38de..0def0504afc1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -323,6 +323,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
323} 323}
324 324
325/** 325/**
326 * gfs2_size_hint - Give a hint to the size of a write request
327 * @file: The struct file
328 * @offset: The file offset of the write
329 * @size: The length of the write
330 *
331 * When we are about to do a write, this function records the total
332 * write size in order to provide a suitable hint to the lower layers
333 * about how many blocks will be required.
334 *
335 */
336
337static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
338{
339 struct inode *inode = filep->f_dentry->d_inode;
340 struct gfs2_sbd *sdp = GFS2_SB(inode);
341 struct gfs2_inode *ip = GFS2_I(inode);
342 size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
343 int hint = min_t(size_t, INT_MAX, blks);
344
345 atomic_set(&ip->i_res->rs_sizehint, hint);
346}
347
348/**
326 * gfs2_allocate_page_backing - Use bmap to allocate blocks 349 * gfs2_allocate_page_backing - Use bmap to allocate blocks
327 * @page: The (locked) page to allocate backing for 350 * @page: The (locked) page to allocate backing for
328 * 351 *
@@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
382 if (ret) 405 if (ret)
383 return ret; 406 return ret;
384 407
385 atomic_set(&ip->i_res->rs_sizehint, 408 gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
386 PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
387 409
388 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 410 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
389 ret = gfs2_glock_nq(&gh); 411 ret = gfs2_glock_nq(&gh);
@@ -419,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
419 rblocks += data_blocks ? data_blocks : 1; 441 rblocks += data_blocks ? data_blocks : 1;
420 if (ind_blocks || data_blocks) { 442 if (ind_blocks || data_blocks) {
421 rblocks += RES_STATFS + RES_QUOTA; 443 rblocks += RES_STATFS + RES_QUOTA;
422 rblocks += gfs2_rg_blocks(ip); 444 rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
423 } 445 }
424 ret = gfs2_trans_begin(sdp, rblocks, 0); 446 ret = gfs2_trans_begin(sdp, rblocks, 0);
425 if (ret) 447 if (ret)
@@ -470,6 +492,7 @@ out:
470static const struct vm_operations_struct gfs2_vm_ops = { 492static const struct vm_operations_struct gfs2_vm_ops = {
471 .fault = filemap_fault, 493 .fault = filemap_fault,
472 .page_mkwrite = gfs2_page_mkwrite, 494 .page_mkwrite = gfs2_page_mkwrite,
495 .remap_pages = generic_file_remap_pages,
473}; 496};
474 497
475/** 498/**
@@ -504,7 +527,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
504 return error; 527 return error;
505 } 528 }
506 vma->vm_ops = &gfs2_vm_ops; 529 vma->vm_ops = &gfs2_vm_ops;
507 vma->vm_flags |= VM_CAN_NONLINEAR;
508 530
509 return 0; 531 return 0;
510} 532}
@@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
663 if (ret) 685 if (ret)
664 return ret; 686 return ret;
665 687
666 atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); 688 gfs2_size_hint(file, pos, writesize);
689
667 if (file->f_flags & O_APPEND) { 690 if (file->f_flags & O_APPEND) {
668 struct gfs2_holder gh; 691 struct gfs2_holder gh;
669 692
@@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
789 if (unlikely(error)) 812 if (unlikely(error))
790 goto out_uninit; 813 goto out_uninit;
791 814
792 atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); 815 gfs2_size_hint(file, offset, len);
793 816
794 while (len > 0) { 817 while (len > 0) {
795 if (len < bytes) 818 if (len < bytes)
@@ -822,7 +845,7 @@ retry:
822 &max_bytes, &data_blocks, &ind_blocks); 845 &max_bytes, &data_blocks, &ind_blocks);
823 846
824 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 847 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
825 RES_RG_HDR + gfs2_rg_blocks(ip); 848 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
826 if (gfs2_is_jdata(ip)) 849 if (gfs2_is_jdata(ip))
827 rblocks += data_blocks ? data_blocks : 1; 850 rblocks += data_blocks ? data_blocks : 1;
828 851
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1ed81f40da0d..e6c2fd53cab2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
186} 186}
187 187
188/** 188/**
189 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
190 * @gl: the glock
191 *
192 * If the glock is demotable, then we add it (or move it) to the end
193 * of the glock LRU list.
194 */
195
196static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
197{
198 if (demote_ok(gl))
199 gfs2_glock_add_to_lru(gl);
200}
201
202/**
203 * gfs2_glock_put_nolock() - Decrement reference count on glock 189 * gfs2_glock_put_nolock() - Decrement reference count on glock
204 * @gl: The glock to put 190 * @gl: The glock to put
205 * 191 *
@@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word)
883 return 0; 869 return 0;
884} 870}
885 871
886static void wait_on_holder(struct gfs2_holder *gh) 872/**
873 * gfs2_glock_wait - wait on a glock acquisition
874 * @gh: the glock holder
875 *
876 * Returns: 0 on success
877 */
878
879int gfs2_glock_wait(struct gfs2_holder *gh)
887{ 880{
888 unsigned long time1 = jiffies; 881 unsigned long time1 = jiffies;
889 882
@@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh)
894 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + 887 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
895 GL_GLOCK_HOLD_INCR, 888 GL_GLOCK_HOLD_INCR,
896 GL_GLOCK_MAX_HOLD); 889 GL_GLOCK_MAX_HOLD);
897} 890 return gh->gh_error;
898
899static void wait_on_demote(struct gfs2_glock *gl)
900{
901 might_sleep();
902 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
903} 891}
904 892
905/** 893/**
@@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
929 trace_gfs2_demote_rq(gl); 917 trace_gfs2_demote_rq(gl);
930} 918}
931 919
932/**
933 * gfs2_glock_wait - wait on a glock acquisition
934 * @gh: the glock holder
935 *
936 * Returns: 0 on success
937 */
938
939int gfs2_glock_wait(struct gfs2_holder *gh)
940{
941 wait_on_holder(gh);
942 return gh->gh_error;
943}
944
945void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 920void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
946{ 921{
947 struct va_format vaf; 922 struct va_format vaf;
@@ -979,7 +954,7 @@ __acquires(&gl->gl_spin)
979 struct gfs2_sbd *sdp = gl->gl_sbd; 954 struct gfs2_sbd *sdp = gl->gl_sbd;
980 struct list_head *insert_pt = NULL; 955 struct list_head *insert_pt = NULL;
981 struct gfs2_holder *gh2; 956 struct gfs2_holder *gh2;
982 int try_lock = 0; 957 int try_futile = 0;
983 958
984 BUG_ON(gh->gh_owner_pid == NULL); 959 BUG_ON(gh->gh_owner_pid == NULL);
985 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) 960 if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
@@ -987,7 +962,7 @@ __acquires(&gl->gl_spin)
987 962
988 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { 963 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
989 if (test_bit(GLF_LOCK, &gl->gl_flags)) 964 if (test_bit(GLF_LOCK, &gl->gl_flags))
990 try_lock = 1; 965 try_futile = !may_grant(gl, gh);
991 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) 966 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
992 goto fail; 967 goto fail;
993 } 968 }
@@ -996,9 +971,8 @@ __acquires(&gl->gl_spin)
996 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && 971 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
997 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) 972 (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
998 goto trap_recursive; 973 goto trap_recursive;
999 if (try_lock && 974 if (try_futile &&
1000 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && 975 !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1001 !may_grant(gl, gh)) {
1002fail: 976fail:
1003 gh->gh_error = GLR_TRYFAILED; 977 gh->gh_error = GLR_TRYFAILED;
1004 gfs2_holder_wake(gh); 978 gfs2_holder_wake(gh);
@@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1121 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1095 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1122 fast_path = 1; 1096 fast_path = 1;
1123 } 1097 }
1124 if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) 1098 if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1125 __gfs2_glock_schedule_for_reclaim(gl); 1099 gfs2_glock_add_to_lru(gl);
1100
1126 trace_gfs2_glock_queue(gh, 0); 1101 trace_gfs2_glock_queue(gh, 0);
1127 spin_unlock(&gl->gl_spin); 1102 spin_unlock(&gl->gl_spin);
1128 if (likely(fast_path)) 1103 if (likely(fast_path))
@@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1141{ 1116{
1142 struct gfs2_glock *gl = gh->gh_gl; 1117 struct gfs2_glock *gl = gh->gh_gl;
1143 gfs2_glock_dq(gh); 1118 gfs2_glock_dq(gh);
1144 wait_on_demote(gl); 1119 might_sleep();
1120 wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
1145} 1121}
1146 1122
1147/** 1123/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4bdcf3784187..32cc4fde975c 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
94 /* A shortened, inline version of gfs2_trans_begin() */ 94 /* A shortened, inline version of gfs2_trans_begin() */
95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); 95 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
96 tr.tr_ip = (unsigned long)__builtin_return_address(0); 96 tr.tr_ip = (unsigned long)__builtin_return_address(0);
97 sb_start_intwrite(sdp->sd_vfs);
97 gfs2_log_reserve(sdp, tr.tr_reserved); 98 gfs2_log_reserve(sdp, tr.tr_reserved);
98 BUG_ON(current->journal_info); 99 BUG_ON(current->journal_info);
99 current->journal_info = &tr; 100 current->journal_info = &tr;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index aaecc8085fc5..3d469d37345e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -99,9 +99,26 @@ struct gfs2_rgrpd {
99#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ 99#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
100 spinlock_t rd_rsspin; /* protects reservation related vars */ 100 spinlock_t rd_rsspin; /* protects reservation related vars */
101 struct rb_root rd_rstree; /* multi-block reservation tree */ 101 struct rb_root rd_rstree; /* multi-block reservation tree */
102 u32 rd_rs_cnt; /* count of current reservations */
103}; 102};
104 103
104struct gfs2_rbm {
105 struct gfs2_rgrpd *rgd;
106 struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */
107 u32 offset; /* The offset is bitmap relative */
108};
109
110static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
111{
112 return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset;
113}
114
115static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
116 const struct gfs2_rbm *rbm2)
117{
118 return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) &&
119 (rbm1->offset == rbm2->offset);
120}
121
105enum gfs2_state_bits { 122enum gfs2_state_bits {
106 BH_Pinned = BH_PrivateStart, 123 BH_Pinned = BH_PrivateStart,
107 BH_Escaped = BH_PrivateStart + 1, 124 BH_Escaped = BH_PrivateStart + 1,
@@ -250,18 +267,11 @@ struct gfs2_blkreserv {
250 /* components used during write (step 1): */ 267 /* components used during write (step 1): */
251 atomic_t rs_sizehint; /* hint of the write size */ 268 atomic_t rs_sizehint; /* hint of the write size */
252 269
253 /* components used during inplace_reserve (step 2): */
254 u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
255
256 /* components used during get_local_rgrp (step 3): */
257 struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */
258 struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ 270 struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
259 struct rb_node rs_node; /* link to other block reservations */ 271 struct rb_node rs_node; /* link to other block reservations */
260 272 struct gfs2_rbm rs_rbm; /* Start of reservation */
261 /* components used during block searches and assignments (step 4): */
262 struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */
263 u32 rs_biblk; /* start block relative to the bi */
264 u32 rs_free; /* how many blocks are still free */ 273 u32 rs_free; /* how many blocks are still free */
274 u64 rs_inum; /* Inode number for reservation */
265 275
266 /* ancillary quota stuff */ 276 /* ancillary quota stuff */
267 struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; 277 struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4ce22e547308..381893ceefa4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
712 if (error) 712 if (error)
713 goto fail_gunlock2; 713 goto fail_gunlock2;
714 714
715 /* The newly created inode needs a reservation so it can allocate 715 error = gfs2_rs_alloc(ip);
716 xattrs. At the same time, we want new blocks allocated to the new 716 if (error)
717 dinode to be as contiguous as possible. Since we allocated the 717 goto fail_gunlock2;
718 dinode block under the directory's reservation, we transfer
719 ownership of that reservation to the new inode. The directory
720 doesn't need a reservation unless it needs a new allocation. */
721 ip->i_res = dip->i_res;
722 dip->i_res = NULL;
723 718
724 error = gfs2_acl_create(dip, inode); 719 error = gfs2_acl_create(dip, inode);
725 if (error) 720 if (error)
@@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
737 brelse(bh); 732 brelse(bh);
738 733
739 gfs2_trans_end(sdp); 734 gfs2_trans_end(sdp);
740 /* Check if we reserved space in the rgrp. Function link_dinode may 735 gfs2_inplace_release(dip);
741 not, depending on whether alloc is required. */
742 if (gfs2_mb_reserved(dip))
743 gfs2_inplace_release(dip);
744 gfs2_quota_unlock(dip); 736 gfs2_quota_unlock(dip);
745 mark_inode_dirty(inode); 737 mark_inode_dirty(inode);
746 gfs2_glock_dq_uninit_m(2, ghs); 738 gfs2_glock_dq_uninit_m(2, ghs);
@@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
897 goto out_gunlock_q; 889 goto out_gunlock_q;
898 890
899 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 891 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
900 gfs2_rg_blocks(dip) + 892 gfs2_rg_blocks(dip, sdp->sd_max_dirres) +
901 2 * RES_DINODE + RES_STATFS + 893 2 * RES_DINODE + RES_STATFS +
902 RES_QUOTA, 0); 894 RES_QUOTA, 0);
903 if (error) 895 if (error)
@@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1378 goto out_gunlock_q; 1370 goto out_gunlock_q;
1379 1371
1380 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + 1372 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1381 gfs2_rg_blocks(ndip) + 1373 gfs2_rg_blocks(ndip, sdp->sd_max_dirres) +
1382 4 * RES_DINODE + 4 * RES_LEAF + 1374 4 * RES_DINODE + 4 * RES_LEAF +
1383 RES_STATFS + RES_QUOTA + 4, 0); 1375 RES_STATFS + RES_QUOTA + 4, 0);
1384 if (error) 1376 if (error)
@@ -1722,7 +1714,9 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
1722 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1714 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1723 ret = gfs2_glock_nq(&gh); 1715 ret = gfs2_glock_nq(&gh);
1724 if (ret == 0) { 1716 if (ret == 0) {
1725 ret = generic_setxattr(dentry, name, data, size, flags); 1717 ret = gfs2_rs_alloc(ip);
1718 if (ret == 0)
1719 ret = generic_setxattr(dentry, name, data, size, flags);
1726 gfs2_glock_dq(&gh); 1720 gfs2_glock_dq(&gh);
1727 } 1721 }
1728 gfs2_holder_uninit(&gh); 1722 gfs2_holder_uninit(&gh);
@@ -1757,7 +1751,9 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1757 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1751 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1758 ret = gfs2_glock_nq(&gh); 1752 ret = gfs2_glock_nq(&gh);
1759 if (ret == 0) { 1753 if (ret == 0) {
1760 ret = generic_removexattr(dentry, name); 1754 ret = gfs2_rs_alloc(ip);
1755 if (ret == 0)
1756 ret = generic_removexattr(dentry, name);
1761 gfs2_glock_dq(&gh); 1757 gfs2_glock_dq(&gh);
1762 } 1758 }
1763 gfs2_holder_uninit(&gh); 1759 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 4a38db739ca0..0fb6539b0c8c 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
1289 spin_lock(&ls->ls_recover_spin); 1289 spin_lock(&ls->ls_recover_spin);
1290 set_bit(DFL_UNMOUNT, &ls->ls_recover_flags); 1290 set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
1291 spin_unlock(&ls->ls_recover_spin); 1291 spin_unlock(&ls->ls_recover_spin);
1292 flush_delayed_work_sync(&sdp->sd_control_work); 1292 flush_delayed_work(&sdp->sd_control_work);
1293 1293
1294 /* mounted_lock and control_lock will be purged in dlm recovery */ 1294 /* mounted_lock and control_lock will be purged in dlm recovery */
1295release: 1295release:
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e5af9dc420ef..e443966c8106 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -19,6 +19,7 @@
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/gfs2_ondisk.h> 20#include <linux/gfs2_ondisk.h>
21#include <linux/quotaops.h> 21#include <linux/quotaops.h>
22#include <linux/lockdep.h>
22 23
23#include "gfs2.h" 24#include "gfs2.h"
24#include "incore.h" 25#include "incore.h"
@@ -766,6 +767,7 @@ fail:
766 return error; 767 return error;
767} 768}
768 769
770static struct lock_class_key gfs2_quota_imutex_key;
769 771
770static int init_inodes(struct gfs2_sbd *sdp, int undo) 772static int init_inodes(struct gfs2_sbd *sdp, int undo)
771{ 773{
@@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
803 fs_err(sdp, "can't get quota file inode: %d\n", error); 805 fs_err(sdp, "can't get quota file inode: %d\n", error);
804 goto fail_rindex; 806 goto fail_rindex;
805 } 807 }
808 /*
809 * i_mutex on quota files is special. Since this inode is hidden system
810 * file, we are safe to define locking ourselves.
811 */
812 lockdep_set_class(&sdp->sd_quota_inode->i_mutex,
813 &gfs2_quota_imutex_key);
806 814
807 error = gfs2_rindex_update(sdp); 815 error = gfs2_rindex_update(sdp);
808 if (error) 816 if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a3bde91645c2..40c4b0d42fa8 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
765 struct gfs2_holder *ghs, i_gh; 765 struct gfs2_holder *ghs, i_gh;
766 unsigned int qx, x; 766 unsigned int qx, x;
767 struct gfs2_quota_data *qd; 767 struct gfs2_quota_data *qd;
768 unsigned reserved;
768 loff_t offset; 769 loff_t offset;
769 unsigned int nalloc = 0, blocks; 770 unsigned int nalloc = 0, blocks;
770 int error; 771 int error;
@@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
781 return -ENOMEM; 782 return -ENOMEM;
782 783
783 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); 784 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
784 mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); 785 mutex_lock(&ip->i_inode.i_mutex);
785 for (qx = 0; qx < num_qd; qx++) { 786 for (qx = 0; qx < num_qd; qx++) {
786 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, 787 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
787 GL_NOCACHE, &ghs[qx]); 788 GL_NOCACHE, &ghs[qx]);
@@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
811 * two blocks need to be updated instead of 1 */ 812 * two blocks need to be updated instead of 1 */
812 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 813 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
813 814
814 error = gfs2_inplace_reserve(ip, 1 + 815 reserved = 1 + (nalloc * (data_blocks + ind_blocks));
815 (nalloc * (data_blocks + ind_blocks))); 816 error = gfs2_inplace_reserve(ip, reserved);
816 if (error) 817 if (error)
817 goto out_alloc; 818 goto out_alloc;
818 819
819 if (nalloc) 820 if (nalloc)
820 blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; 821 blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS;
821 822
822 error = gfs2_trans_begin(sdp, blocks, 0); 823 error = gfs2_trans_begin(sdp, blocks, 0);
823 if (error) 824 if (error)
@@ -1070,8 +1071,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1070 1071
1071 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1072 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
1072 print_message(qd, "exceeded"); 1073 print_message(qd, "exceeded");
1073 quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? 1074 quota_send_warning(make_kqid(&init_user_ns,
1074 USRQUOTA : GRPQUOTA, qd->qd_id, 1075 test_bit(QDF_USER, &qd->qd_flags) ?
1076 USRQUOTA : GRPQUOTA,
1077 qd->qd_id),
1075 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1078 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
1076 1079
1077 error = -EDQUOT; 1080 error = -EDQUOT;
@@ -1081,8 +1084,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1081 time_after_eq(jiffies, qd->qd_last_warn + 1084 time_after_eq(jiffies, qd->qd_last_warn +
1082 gfs2_tune_get(sdp, 1085 gfs2_tune_get(sdp,
1083 gt_quota_warn_period) * HZ)) { 1086 gt_quota_warn_period) * HZ)) {
1084 quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? 1087 quota_send_warning(make_kqid(&init_user_ns,
1085 USRQUOTA : GRPQUOTA, qd->qd_id, 1088 test_bit(QDF_USER, &qd->qd_flags) ?
1089 USRQUOTA : GRPQUOTA,
1090 qd->qd_id),
1086 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1091 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1087 error = print_message(qd, "warning"); 1092 error = print_message(qd, "warning");
1088 qd->qd_last_warn = jiffies; 1093 qd->qd_last_warn = jiffies;
@@ -1469,7 +1474,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1469 return 0; 1474 return 0;
1470} 1475}
1471 1476
1472static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, 1477static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid,
1473 struct fs_disk_quota *fdq) 1478 struct fs_disk_quota *fdq)
1474{ 1479{
1475 struct gfs2_sbd *sdp = sb->s_fs_info; 1480 struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1477,20 +1482,21 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1477 struct gfs2_quota_data *qd; 1482 struct gfs2_quota_data *qd;
1478 struct gfs2_holder q_gh; 1483 struct gfs2_holder q_gh;
1479 int error; 1484 int error;
1485 int type;
1480 1486
1481 memset(fdq, 0, sizeof(struct fs_disk_quota)); 1487 memset(fdq, 0, sizeof(struct fs_disk_quota));
1482 1488
1483 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1489 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1484 return -ESRCH; /* Crazy XFS error code */ 1490 return -ESRCH; /* Crazy XFS error code */
1485 1491
1486 if (type == USRQUOTA) 1492 if (qid.type == USRQUOTA)
1487 type = QUOTA_USER; 1493 type = QUOTA_USER;
1488 else if (type == GRPQUOTA) 1494 else if (qid.type == GRPQUOTA)
1489 type = QUOTA_GROUP; 1495 type = QUOTA_GROUP;
1490 else 1496 else
1491 return -EINVAL; 1497 return -EINVAL;
1492 1498
1493 error = qd_get(sdp, type, id, &qd); 1499 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
1494 if (error) 1500 if (error)
1495 return error; 1501 return error;
1496 error = do_glock(qd, FORCE, &q_gh); 1502 error = do_glock(qd, FORCE, &q_gh);
@@ -1500,7 +1506,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1500 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 1506 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
1501 fdq->d_version = FS_DQUOT_VERSION; 1507 fdq->d_version = FS_DQUOT_VERSION;
1502 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1508 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1503 fdq->d_id = id; 1509 fdq->d_id = from_kqid(&init_user_ns, qid);
1504 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; 1510 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1505 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; 1511 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1506 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; 1512 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
@@ -1514,7 +1520,7 @@ out:
1514/* GFS2 only supports a subset of the XFS fields */ 1520/* GFS2 only supports a subset of the XFS fields */
1515#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) 1521#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
1516 1522
1517static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, 1523static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
1518 struct fs_disk_quota *fdq) 1524 struct fs_disk_quota *fdq)
1519{ 1525{
1520 struct gfs2_sbd *sdp = sb->s_fs_info; 1526 struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1526,11 +1532,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1526 int alloc_required; 1532 int alloc_required;
1527 loff_t offset; 1533 loff_t offset;
1528 int error; 1534 int error;
1535 int type;
1529 1536
1530 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 1537 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1531 return -ESRCH; /* Crazy XFS error code */ 1538 return -ESRCH; /* Crazy XFS error code */
1532 1539
1533 switch(type) { 1540 switch(qid.type) {
1534 case USRQUOTA: 1541 case USRQUOTA:
1535 type = QUOTA_USER; 1542 type = QUOTA_USER;
1536 if (fdq->d_flags != FS_USER_QUOTA) 1543 if (fdq->d_flags != FS_USER_QUOTA)
@@ -1547,10 +1554,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1547 1554
1548 if (fdq->d_fieldmask & ~GFS2_FIELDMASK) 1555 if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
1549 return -EINVAL; 1556 return -EINVAL;
1550 if (fdq->d_id != id) 1557 if (fdq->d_id != from_kqid(&init_user_ns, qid))
1551 return -EINVAL; 1558 return -EINVAL;
1552 1559
1553 error = qd_get(sdp, type, id, &qd); 1560 error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd);
1554 if (error) 1561 if (error)
1555 return error; 1562 return error;
1556 1563
@@ -1598,7 +1605,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1598 error = gfs2_inplace_reserve(ip, blocks); 1605 error = gfs2_inplace_reserve(ip, blocks);
1599 if (error) 1606 if (error)
1600 goto out_i; 1607 goto out_i;
1601 blocks += gfs2_rg_blocks(ip); 1608 blocks += gfs2_rg_blocks(ip, blocks);
1602 } 1609 }
1603 1610
1604 /* Some quotas span block boundaries and can update two blocks, 1611 /* Some quotas span block boundaries and can update two blocks,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d34887a601d..3cc402ce6fea 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,9 +35,6 @@
35#define BFITNOENT ((u32)~0) 35#define BFITNOENT ((u32)~0)
36#define NO_BLOCK ((u64)~0) 36#define NO_BLOCK ((u64)~0)
37 37
38#define RSRV_CONTENTION_FACTOR 4
39#define RGRP_RSRV_MAX_CONTENDERS 2
40
41#if BITS_PER_LONG == 32 38#if BITS_PER_LONG == 32
42#define LBITMASK (0x55555555UL) 39#define LBITMASK (0x55555555UL)
43#define LBITSKIP55 (0x55555555UL) 40#define LBITSKIP55 (0x55555555UL)
@@ -67,53 +64,48 @@ static const char valid_change[16] = {
67 1, 0, 0, 0 64 1, 0, 0, 0
68}; 65};
69 66
70static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 67static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
71 unsigned char old_state, 68 const struct gfs2_inode *ip, bool nowrap);
72 struct gfs2_bitmap **rbi); 69
73 70
74/** 71/**
75 * gfs2_setbit - Set a bit in the bitmaps 72 * gfs2_setbit - Set a bit in the bitmaps
76 * @rgd: the resource group descriptor 73 * @rbm: The position of the bit to set
77 * @buf2: the clone buffer that holds the bitmaps 74 * @do_clone: Also set the clone bitmap, if it exists
78 * @bi: the bitmap structure
79 * @block: the block to set
80 * @new_state: the new state of the block 75 * @new_state: the new state of the block
81 * 76 *
82 */ 77 */
83 78
84static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, 79static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
85 struct gfs2_bitmap *bi, u32 block,
86 unsigned char new_state) 80 unsigned char new_state)
87{ 81{
88 unsigned char *byte1, *byte2, *end, cur_state; 82 unsigned char *byte1, *byte2, *end, cur_state;
89 unsigned int buflen = bi->bi_len; 83 unsigned int buflen = rbm->bi->bi_len;
90 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 84 const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
91 85
92 byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); 86 byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
93 end = bi->bi_bh->b_data + bi->bi_offset + buflen; 87 end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
94 88
95 BUG_ON(byte1 >= end); 89 BUG_ON(byte1 >= end);
96 90
97 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 91 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
98 92
99 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 93 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
100 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " 94 printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
101 "new_state=%d\n", 95 "new_state=%d\n", rbm->offset, cur_state, new_state);
102 (unsigned long long)block, cur_state, new_state); 96 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
103 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", 97 (unsigned long long)rbm->rgd->rd_addr,
104 (unsigned long long)rgd->rd_addr, 98 rbm->bi->bi_start);
105 (unsigned long)bi->bi_start); 99 printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
106 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", 100 rbm->bi->bi_offset, rbm->bi->bi_len);
107 (unsigned long)bi->bi_offset,
108 (unsigned long)bi->bi_len);
109 dump_stack(); 101 dump_stack();
110 gfs2_consist_rgrpd(rgd); 102 gfs2_consist_rgrpd(rbm->rgd);
111 return; 103 return;
112 } 104 }
113 *byte1 ^= (cur_state ^ new_state) << bit; 105 *byte1 ^= (cur_state ^ new_state) << bit;
114 106
115 if (buf2) { 107 if (do_clone && rbm->bi->bi_clone) {
116 byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); 108 byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
117 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; 109 cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
118 *byte2 ^= (cur_state ^ new_state) << bit; 110 *byte2 ^= (cur_state ^ new_state) << bit;
119 } 111 }
@@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
121 113
122/** 114/**
123 * gfs2_testbit - test a bit in the bitmaps 115 * gfs2_testbit - test a bit in the bitmaps
124 * @rgd: the resource group descriptor 116 * @rbm: The bit to test
125 * @buffer: the buffer that holds the bitmaps
126 * @buflen: the length (in bytes) of the buffer
127 * @block: the block to read
128 * 117 *
118 * Returns: The two bit block state of the requested bit
129 */ 119 */
130 120
131static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, 121static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
132 const unsigned char *buffer,
133 unsigned int buflen, u32 block)
134{ 122{
135 const unsigned char *byte, *end; 123 const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
136 unsigned char cur_state; 124 const u8 *byte;
137 unsigned int bit; 125 unsigned int bit;
138 126
139 byte = buffer + (block / GFS2_NBBY); 127 byte = buffer + (rbm->offset / GFS2_NBBY);
140 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 128 bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
141 end = buffer + buflen;
142
143 gfs2_assert(rgd->rd_sbd, byte < end);
144 129
145 cur_state = (*byte >> bit) & GFS2_BIT_MASK; 130 return (*byte >> bit) & GFS2_BIT_MASK;
146
147 return cur_state;
148} 131}
149 132
150/** 133/**
@@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
192 */ 175 */
193static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) 176static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
194{ 177{
195 u64 startblk = gfs2_rs_startblk(rs); 178 u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
196 179
197 if (blk >= startblk + rs->rs_free) 180 if (blk >= startblk + rs->rs_free)
198 return 1; 181 return 1;
@@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
202} 185}
203 186
204/** 187/**
205 * rs_find - Find a rgrp multi-block reservation that contains a given block
206 * @rgd: The rgrp
207 * @rgblk: The block we're looking for, relative to the rgrp
208 */
209static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
210{
211 struct rb_node **newn;
212 int rc;
213 u64 fsblk = rgblk + rgd->rd_data0;
214
215 spin_lock(&rgd->rd_rsspin);
216 newn = &rgd->rd_rstree.rb_node;
217 while (*newn) {
218 struct gfs2_blkreserv *cur =
219 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
220 rc = rs_cmp(fsblk, 1, cur);
221 if (rc < 0)
222 newn = &((*newn)->rb_left);
223 else if (rc > 0)
224 newn = &((*newn)->rb_right);
225 else {
226 spin_unlock(&rgd->rd_rsspin);
227 return cur;
228 }
229 }
230 spin_unlock(&rgd->rd_rsspin);
231 return NULL;
232}
233
234/**
235 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 188 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
236 * a block in a given allocation state. 189 * a block in a given allocation state.
237 * @buf: the buffer that holds the bitmaps 190 * @buf: the buffer that holds the bitmaps
@@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
262 u64 mask = 0x5555555555555555ULL; 215 u64 mask = 0x5555555555555555ULL;
263 u32 bit; 216 u32 bit;
264 217
265 BUG_ON(state > 3);
266
267 /* Mask off bits we don't care about at the start of the search */ 218 /* Mask off bits we don't care about at the start of the search */
268 mask <<= spoint; 219 mask <<= spoint;
269 tmp = gfs2_bit_search(ptr, mask, state); 220 tmp = gfs2_bit_search(ptr, mask, state);
@@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
285} 236}
286 237
287/** 238/**
239 * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
240 * @rbm: The rbm with rgd already set correctly
241 * @block: The block number (filesystem relative)
242 *
243 * This sets the bi and offset members of an rbm based on a
244 * resource group and a filesystem relative block number. The
245 * resource group must be set in the rbm on entry, the bi and
246 * offset members will be set by this function.
247 *
248 * Returns: 0 on success, or an error code
249 */
250
251static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
252{
253 u64 rblock = block - rbm->rgd->rd_data0;
254 u32 goal = (u32)rblock;
255 int x;
256
257 if (WARN_ON_ONCE(rblock > UINT_MAX))
258 return -EINVAL;
259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
260 return -E2BIG;
261
262 for (x = 0; x < rbm->rgd->rd_length; x++) {
263 rbm->bi = rbm->rgd->rd_bits + x;
264 if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
265 rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
266 break;
267 }
268 }
269
270 return 0;
271}
272
273/**
274 * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
275 * @rbm: Position to search (value/result)
276 * @n_unaligned: Number of unaligned blocks to check
277 * @len: Decremented for each block found (terminate on zero)
278 *
279 * Returns: true if a non-free block is encountered
280 */
281
282static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
283{
284 u64 block;
285 u32 n;
286 u8 res;
287
288 for (n = 0; n < n_unaligned; n++) {
289 res = gfs2_testbit(rbm);
290 if (res != GFS2_BLKST_FREE)
291 return true;
292 (*len)--;
293 if (*len == 0)
294 return true;
295 block = gfs2_rbm_to_block(rbm);
296 if (gfs2_rbm_from_block(rbm, block + 1))
297 return true;
298 }
299
300 return false;
301}
302
303/**
304 * gfs2_free_extlen - Return extent length of free blocks
305 * @rbm: Starting position
306 * @len: Max length to check
307 *
308 * Starting at the block specified by the rbm, see how many free blocks
309 * there are, not reading more than len blocks ahead. This can be done
310 * using memchr_inv when the blocks are byte aligned, but has to be done
311 * on a block by block basis in case of unaligned blocks. Also this
312 * function can cope with bitmap boundaries (although it must stop on
313 * a resource group boundary)
314 *
315 * Returns: Number of free blocks in the extent
316 */
317
318static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
319{
320 struct gfs2_rbm rbm = *rrbm;
321 u32 n_unaligned = rbm.offset & 3;
322 u32 size = len;
323 u32 bytes;
324 u32 chunk_size;
325 u8 *ptr, *start, *end;
326 u64 block;
327
328 if (n_unaligned &&
329 gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
330 goto out;
331
332 n_unaligned = len & 3;
333 /* Start is now byte aligned */
334 while (len > 3) {
335 start = rbm.bi->bi_bh->b_data;
336 if (rbm.bi->bi_clone)
337 start = rbm.bi->bi_clone;
338 end = start + rbm.bi->bi_bh->b_size;
339 start += rbm.bi->bi_offset;
340 BUG_ON(rbm.offset & 3);
341 start += (rbm.offset / GFS2_NBBY);
342 bytes = min_t(u32, len / GFS2_NBBY, (end - start));
343 ptr = memchr_inv(start, 0, bytes);
344 chunk_size = ((ptr == NULL) ? bytes : (ptr - start));
345 chunk_size *= GFS2_NBBY;
346 BUG_ON(len < chunk_size);
347 len -= chunk_size;
348 block = gfs2_rbm_to_block(&rbm);
349 gfs2_rbm_from_block(&rbm, block + chunk_size);
350 n_unaligned = 3;
351 if (ptr)
352 break;
353 n_unaligned = len & 3;
354 }
355
356 /* Deal with any bits left over at the end */
357 if (n_unaligned)
358 gfs2_unaligned_extlen(&rbm, n_unaligned, &len);
359out:
360 return size - len;
361}
362
363/**
288 * gfs2_bitcount - count the number of bits in a certain state 364 * gfs2_bitcount - count the number of bits in a certain state
289 * @rgd: the resource group descriptor 365 * @rgd: the resource group descriptor
290 * @buffer: the buffer that holds the bitmaps 366 * @buffer: the buffer that holds the bitmaps
@@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
487 if (!res) 563 if (!res)
488 error = -ENOMEM; 564 error = -ENOMEM;
489 565
566 RB_CLEAR_NODE(&res->rs_node);
567
490 down_write(&ip->i_rw_mutex); 568 down_write(&ip->i_rw_mutex);
491 if (ip->i_res) 569 if (ip->i_res)
492 kmem_cache_free(gfs2_rsrv_cachep, res); 570 kmem_cache_free(gfs2_rsrv_cachep, res);
@@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
496 return error; 574 return error;
497} 575}
498 576
499static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) 577static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
500{ 578{
501 gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", 579 gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
502 rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, 580 (unsigned long long)rs->rs_inum,
503 rs->rs_free); 581 (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
582 rs->rs_rbm.offset, rs->rs_free);
504} 583}
505 584
506/** 585/**
@@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
508 * @rs: The reservation to remove 587 * @rs: The reservation to remove
509 * 588 *
510 */ 589 */
511static void __rs_deltree(struct gfs2_blkreserv *rs) 590static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
512{ 591{
513 struct gfs2_rgrpd *rgd; 592 struct gfs2_rgrpd *rgd;
514 593
515 if (!gfs2_rs_active(rs)) 594 if (!gfs2_rs_active(rs))
516 return; 595 return;
517 596
518 rgd = rs->rs_rgd; 597 rgd = rs->rs_rbm.rgd;
519 /* We can't do this: The reason is that when the rgrp is invalidated, 598 trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
520 it's in the "middle" of acquiring the glock, but the HOLDER bit 599 rb_erase(&rs->rs_node, &rgd->rd_rstree);
521 isn't set yet: 600 RB_CLEAR_NODE(&rs->rs_node);
522 BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
523 trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
524
525 if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
526 rb_erase(&rs->rs_node, &rgd->rd_rstree);
527 BUG_ON(!rgd->rd_rs_cnt);
528 rgd->rd_rs_cnt--;
529 601
530 if (rs->rs_free) { 602 if (rs->rs_free) {
531 /* return reserved blocks to the rgrp and the ip */ 603 /* return reserved blocks to the rgrp and the ip */
532 BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); 604 BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
533 rs->rs_rgd->rd_reserved -= rs->rs_free; 605 rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
534 rs->rs_free = 0; 606 rs->rs_free = 0;
535 clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); 607 clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
536 smp_mb__after_clear_bit(); 608 smp_mb__after_clear_bit();
537 } 609 }
538 /* We can't change any of the step 1 or step 2 components of the rs.
539 E.g. We can't set rs_rgd to NULL because the rgd glock is held and
540 dequeued through this pointer.
541 Can't: atomic_set(&rs->rs_sizehint, 0);
542 Can't: rs->rs_requested = 0;
543 Can't: rs->rs_rgd = NULL;*/
544 rs->rs_bi = NULL;
545 rs->rs_biblk = 0;
546} 610}
547 611
548/** 612/**
@@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
550 * @rs: The reservation to remove 614 * @rs: The reservation to remove
551 * 615 *
552 */ 616 */
553void gfs2_rs_deltree(struct gfs2_blkreserv *rs) 617void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
554{ 618{
555 struct gfs2_rgrpd *rgd; 619 struct gfs2_rgrpd *rgd;
556 620
557 if (!gfs2_rs_active(rs)) 621 rgd = rs->rs_rbm.rgd;
558 return; 622 if (rgd) {
559 623 spin_lock(&rgd->rd_rsspin);
560 rgd = rs->rs_rgd; 624 __rs_deltree(ip, rs);
561 spin_lock(&rgd->rd_rsspin); 625 spin_unlock(&rgd->rd_rsspin);
562 __rs_deltree(rs); 626 }
563 spin_unlock(&rgd->rd_rsspin);
564} 627}
565 628
566/** 629/**
@@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
572{ 635{
573 down_write(&ip->i_rw_mutex); 636 down_write(&ip->i_rw_mutex);
574 if (ip->i_res) { 637 if (ip->i_res) {
575 gfs2_rs_deltree(ip->i_res); 638 gfs2_rs_deltree(ip, ip->i_res);
576 trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
577 BUG_ON(ip->i_res->rs_free); 639 BUG_ON(ip->i_res->rs_free);
578 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 640 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
579 ip->i_res = NULL; 641 ip->i_res = NULL;
@@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
597 spin_lock(&rgd->rd_rsspin); 659 spin_lock(&rgd->rd_rsspin);
598 while ((n = rb_first(&rgd->rd_rstree))) { 660 while ((n = rb_first(&rgd->rd_rstree))) {
599 rs = rb_entry(n, struct gfs2_blkreserv, rs_node); 661 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
600 __rs_deltree(rs); 662 __rs_deltree(NULL, rs);
601 } 663 }
602 spin_unlock(&rgd->rd_rsspin); 664 spin_unlock(&rgd->rd_rsspin);
603} 665}
@@ -1270,211 +1332,276 @@ out:
1270 1332
1271/** 1333/**
1272 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree 1334 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
1273 * @bi: the bitmap with the blocks
1274 * @ip: the inode structure 1335 * @ip: the inode structure
1275 * @biblk: the 32-bit block number relative to the start of the bitmap
1276 * @amount: the number of blocks to reserve
1277 * 1336 *
1278 * Returns: NULL - reservation was already taken, so not inserted
1279 * pointer to the inserted reservation
1280 */ 1337 */
1281static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, 1338static void rs_insert(struct gfs2_inode *ip)
1282 struct gfs2_inode *ip, u32 biblk,
1283 int amount)
1284{ 1339{
1285 struct rb_node **newn, *parent = NULL; 1340 struct rb_node **newn, *parent = NULL;
1286 int rc; 1341 int rc;
1287 struct gfs2_blkreserv *rs = ip->i_res; 1342 struct gfs2_blkreserv *rs = ip->i_res;
1288 struct gfs2_rgrpd *rgd = rs->rs_rgd; 1343 struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
1289 u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; 1344 u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
1345
1346 BUG_ON(gfs2_rs_active(rs));
1290 1347
1291 spin_lock(&rgd->rd_rsspin); 1348 spin_lock(&rgd->rd_rsspin);
1292 newn = &rgd->rd_rstree.rb_node; 1349 newn = &rgd->rd_rstree.rb_node;
1293 BUG_ON(!ip->i_res);
1294 BUG_ON(gfs2_rs_active(rs));
1295 /* Figure out where to put new node */
1296 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1297 while (*newn) { 1350 while (*newn) {
1298 struct gfs2_blkreserv *cur = 1351 struct gfs2_blkreserv *cur =
1299 rb_entry(*newn, struct gfs2_blkreserv, rs_node); 1352 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1300 1353
1301 parent = *newn; 1354 parent = *newn;
1302 rc = rs_cmp(fsblock, amount, cur); 1355 rc = rs_cmp(fsblock, rs->rs_free, cur);
1303 if (rc > 0) 1356 if (rc > 0)
1304 newn = &((*newn)->rb_right); 1357 newn = &((*newn)->rb_right);
1305 else if (rc < 0) 1358 else if (rc < 0)
1306 newn = &((*newn)->rb_left); 1359 newn = &((*newn)->rb_left);
1307 else { 1360 else {
1308 spin_unlock(&rgd->rd_rsspin); 1361 spin_unlock(&rgd->rd_rsspin);
1309 return NULL; /* reservation already in use */ 1362 WARN_ON(1);
1363 return;
1310 } 1364 }
1311 } 1365 }
1312 1366
1313 /* Do our reservation work */
1314 rs = ip->i_res;
1315 rs->rs_free = amount;
1316 rs->rs_biblk = biblk;
1317 rs->rs_bi = bi;
1318 rb_link_node(&rs->rs_node, parent, newn); 1367 rb_link_node(&rs->rs_node, parent, newn);
1319 rb_insert_color(&rs->rs_node, &rgd->rd_rstree); 1368 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1320 1369
1321 /* Do our inode accounting for the reservation */
1322 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1323
1324 /* Do our rgrp accounting for the reservation */ 1370 /* Do our rgrp accounting for the reservation */
1325 rgd->rd_reserved += amount; /* blocks reserved */ 1371 rgd->rd_reserved += rs->rs_free; /* blocks reserved */
1326 rgd->rd_rs_cnt++; /* number of in-tree reservations */
1327 spin_unlock(&rgd->rd_rsspin); 1372 spin_unlock(&rgd->rd_rsspin);
1328 trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); 1373 trace_gfs2_rs(rs, TRACE_RS_INSERT);
1329 return rs;
1330} 1374}
1331 1375
1332/** 1376/**
1333 * unclaimed_blocks - return number of blocks that aren't spoken for 1377 * rg_mblk_search - find a group of multiple free blocks to form a reservation
1334 */
1335static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
1336{
1337 return rgd->rd_free_clone - rgd->rd_reserved;
1338}
1339
1340/**
1341 * rg_mblk_search - find a group of multiple free blocks
1342 * @rgd: the resource group descriptor 1378 * @rgd: the resource group descriptor
1343 * @rs: the block reservation
1344 * @ip: pointer to the inode for which we're reserving blocks 1379 * @ip: pointer to the inode for which we're reserving blocks
1380 * @requested: number of blocks required for this allocation
1345 * 1381 *
1346 * This is very similar to rgblk_search, except we're looking for whole
1347 * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
1348 * on aligned dwords for speed's sake.
1349 *
1350 * Returns: 0 if successful or BFITNOENT if there isn't enough free space
1351 */ 1382 */
1352 1383
1353static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1384static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
1385 unsigned requested)
1354{ 1386{
1355 struct gfs2_bitmap *bi = rgd->rd_bits; 1387 struct gfs2_rbm rbm = { .rgd = rgd, };
1356 const u32 length = rgd->rd_length; 1388 u64 goal;
1357 u32 blk; 1389 struct gfs2_blkreserv *rs = ip->i_res;
1358 unsigned int buf, x, search_bytes; 1390 u32 extlen;
1359 u8 *buffer = NULL; 1391 u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
1360 u8 *ptr, *end, *nonzero; 1392 int ret;
1361 u32 goal, rsv_bytes; 1393
1362 struct gfs2_blkreserv *rs; 1394 extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
1363 u32 best_rs_bytes, unclaimed; 1395 extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
1364 int best_rs_blocks; 1396 if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
1397 return;
1365 1398
1366 /* Find bitmap block that contains bits for goal block */ 1399 /* Find bitmap block that contains bits for goal block */
1367 if (rgrp_contains_block(rgd, ip->i_goal)) 1400 if (rgrp_contains_block(rgd, ip->i_goal))
1368 goal = ip->i_goal - rgd->rd_data0; 1401 goal = ip->i_goal;
1369 else 1402 else
1370 goal = rgd->rd_last_alloc; 1403 goal = rgd->rd_last_alloc + rgd->rd_data0;
1371 for (buf = 0; buf < length; buf++) { 1404
1372 bi = rgd->rd_bits + buf; 1405 if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
1373 /* Convert scope of "goal" from rgrp-wide to within 1406 return;
1374 found bit block */ 1407
1375 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { 1408 ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
1376 goal -= bi->bi_start * GFS2_NBBY; 1409 if (ret == 0) {
1377 goto do_search; 1410 rs->rs_rbm = rbm;
1378 } 1411 rs->rs_free = extlen;
1412 rs->rs_inum = ip->i_no_addr;
1413 rs_insert(ip);
1379 } 1414 }
1380 buf = 0; 1415}
1381 goal = 0;
1382
1383do_search:
1384 best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
1385 (RGRP_RSRV_MINBLKS * rgd->rd_length));
1386 best_rs_bytes = (best_rs_blocks *
1387 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
1388 GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
1389 best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
1390 unclaimed = unclaimed_blocks(rgd);
1391 if (best_rs_bytes * GFS2_NBBY > unclaimed)
1392 best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
1393
1394 for (x = 0; x <= length; x++) {
1395 bi = rgd->rd_bits + buf;
1396 1416
1397 if (test_bit(GBF_FULL, &bi->bi_flags)) 1417/**
1398 goto skip; 1418 * gfs2_next_unreserved_block - Return next block that is not reserved
1419 * @rgd: The resource group
1420 * @block: The starting block
1421 * @length: The required length
1422 * @ip: Ignore any reservations for this inode
1423 *
1424 * If the block does not appear in any reservation, then return the
1425 * block number unchanged. If it does appear in the reservation, then
1426 * keep looking through the tree of reservations in order to find the
1427 * first block number which is not reserved.
1428 */
1399 1429
1400 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1430static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
1401 if (bi->bi_clone) 1431 u32 length,
1402 buffer = bi->bi_clone + bi->bi_offset; 1432 const struct gfs2_inode *ip)
1433{
1434 struct gfs2_blkreserv *rs;
1435 struct rb_node *n;
1436 int rc;
1437
1438 spin_lock(&rgd->rd_rsspin);
1439 n = rgd->rd_rstree.rb_node;
1440 while (n) {
1441 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1442 rc = rs_cmp(block, length, rs);
1443 if (rc < 0)
1444 n = n->rb_left;
1445 else if (rc > 0)
1446 n = n->rb_right;
1403 else 1447 else
1404 buffer = bi->bi_bh->b_data + bi->bi_offset; 1448 break;
1405 1449 }
1406 /* We have to keep the reservations aligned on u64 boundaries 1450
1407 otherwise we could get situations where a byte can't be 1451 if (n) {
1408 used because it's after a reservation, but a free bit still 1452 while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
1409 is within the reservation's area. */ 1453 block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
1410 ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); 1454 n = n->rb_right;
1411 end = (buffer + bi->bi_len); 1455 if (n == NULL)
1412 while (ptr < end) { 1456 break;
1413 rsv_bytes = 0; 1457 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1414 if ((ptr + best_rs_bytes) <= end)
1415 search_bytes = best_rs_bytes;
1416 else
1417 search_bytes = end - ptr;
1418 BUG_ON(!search_bytes);
1419 nonzero = memchr_inv(ptr, 0, search_bytes);
1420 /* If the lot is all zeroes, reserve the whole size. If
1421 there's enough zeroes to satisfy the request, use
1422 what we can. If there's not enough, keep looking. */
1423 if (nonzero == NULL)
1424 rsv_bytes = search_bytes;
1425 else if ((nonzero - ptr) * GFS2_NBBY >=
1426 ip->i_res->rs_requested)
1427 rsv_bytes = (nonzero - ptr);
1428
1429 if (rsv_bytes) {
1430 blk = ((ptr - buffer) * GFS2_NBBY);
1431 BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
1432 rs = rs_insert(bi, ip, blk,
1433 rsv_bytes * GFS2_NBBY);
1434 if (IS_ERR(rs))
1435 return PTR_ERR(rs);
1436 if (rs)
1437 return 0;
1438 }
1439 ptr += ALIGN(search_bytes, sizeof(u64));
1440 } 1458 }
1441skip:
1442 /* Try next bitmap block (wrap back to rgrp header
1443 if at end) */
1444 buf++;
1445 buf %= length;
1446 goal = 0;
1447 } 1459 }
1448 1460
1449 return BFITNOENT; 1461 spin_unlock(&rgd->rd_rsspin);
1462 return block;
1450} 1463}
1451 1464
1452/** 1465/**
1453 * try_rgrp_fit - See if a given reservation will fit in a given RG 1466 * gfs2_reservation_check_and_update - Check for reservations during block alloc
1454 * @rgd: the RG data 1467 * @rbm: The current position in the resource group
1455 * @ip: the inode 1468 * @ip: The inode for which we are searching for blocks
1469 * @minext: The minimum extent length
1456 * 1470 *
1457 * If there's room for the requested blocks to be allocated from the RG: 1471 * This checks the current position in the rgrp to see whether there is
1458 * This will try to get a multi-block reservation first, and if that doesn't 1472 * a reservation covering this block. If not then this function is a
1459 * fit, it will take what it can. 1473 * no-op. If there is, then the position is moved to the end of the
1474 * contiguous reservation(s) so that we are pointing at the first
1475 * non-reserved block.
1460 * 1476 *
1461 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 1477 * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error
1462 */ 1478 */
1463 1479
1464static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) 1480static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
1481 const struct gfs2_inode *ip,
1482 u32 minext)
1465{ 1483{
1466 struct gfs2_blkreserv *rs = ip->i_res; 1484 u64 block = gfs2_rbm_to_block(rbm);
1485 u32 extlen = 1;
1486 u64 nblock;
1487 int ret;
1467 1488
1468 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1489 /*
1490 * If we have a minimum extent length, then skip over any extent
1491 * which is less than the min extent length in size.
1492 */
1493 if (minext) {
1494 extlen = gfs2_free_extlen(rbm, minext);
1495 nblock = block + extlen;
1496 if (extlen < minext)
1497 goto fail;
1498 }
1499
1500 /*
1501 * Check the extent which has been found against the reservations
1502 * and skip if parts of it are already reserved
1503 */
1504 nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
1505 if (nblock == block)
1469 return 0; 1506 return 0;
1470 /* Look for a multi-block reservation. */ 1507fail:
1471 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && 1508 ret = gfs2_rbm_from_block(rbm, nblock);
1472 rg_mblk_search(rgd, ip) != BFITNOENT) 1509 if (ret < 0)
1473 return 1; 1510 return ret;
1474 if (unclaimed_blocks(rgd) >= rs->rs_requested) 1511 return 1;
1475 return 1; 1512}
1476 1513
1477 return 0; 1514/**
1515 * gfs2_rbm_find - Look for blocks of a particular state
1516 * @rbm: Value/result starting position and final position
1517 * @state: The state which we want to find
1518 * @minext: The requested extent length (0 for a single block)
1519 * @ip: If set, check for reservations
1520 * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
1521 * around until we've reached the starting point.
1522 *
1523 * Side effects:
1524 * - If looking for free blocks, we set GBF_FULL on each bitmap which
1525 * has no free blocks in it.
1526 *
1527 * Returns: 0 on success, -ENOSPC if there is no block of the requested state
1528 */
1529
1530static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
1531 const struct gfs2_inode *ip, bool nowrap)
1532{
1533 struct buffer_head *bh;
1534 struct gfs2_bitmap *initial_bi;
1535 u32 initial_offset;
1536 u32 offset;
1537 u8 *buffer;
1538 int index;
1539 int n = 0;
1540 int iters = rbm->rgd->rd_length;
1541 int ret;
1542
1543 /* If we are not starting at the beginning of a bitmap, then we
1544 * need to add one to the bitmap count to ensure that we search
1545 * the starting bitmap twice.
1546 */
1547 if (rbm->offset != 0)
1548 iters++;
1549
1550 while(1) {
1551 if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
1552 (state == GFS2_BLKST_FREE))
1553 goto next_bitmap;
1554
1555 bh = rbm->bi->bi_bh;
1556 buffer = bh->b_data + rbm->bi->bi_offset;
1557 WARN_ON(!buffer_uptodate(bh));
1558 if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
1559 buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
1560 initial_offset = rbm->offset;
1561 offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
1562 if (offset == BFITNOENT)
1563 goto bitmap_full;
1564 rbm->offset = offset;
1565 if (ip == NULL)
1566 return 0;
1567
1568 initial_bi = rbm->bi;
1569 ret = gfs2_reservation_check_and_update(rbm, ip, minext);
1570 if (ret == 0)
1571 return 0;
1572 if (ret > 0) {
1573 n += (rbm->bi - initial_bi);
1574 goto next_iter;
1575 }
1576 if (ret == -E2BIG) {
1577 index = 0;
1578 rbm->offset = 0;
1579 n += (rbm->bi - initial_bi);
1580 goto res_covered_end_of_rgrp;
1581 }
1582 return ret;
1583
1584bitmap_full: /* Mark bitmap as full and fall through */
1585 if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
1586 set_bit(GBF_FULL, &rbm->bi->bi_flags);
1587
1588next_bitmap: /* Find next bitmap in the rgrp */
1589 rbm->offset = 0;
1590 index = rbm->bi - rbm->rgd->rd_bits;
1591 index++;
1592 if (index == rbm->rgd->rd_length)
1593 index = 0;
1594res_covered_end_of_rgrp:
1595 rbm->bi = &rbm->rgd->rd_bits[index];
1596 if ((index == 0) && nowrap)
1597 break;
1598 n++;
1599next_iter:
1600 if (n >= iters)
1601 break;
1602 }
1603
1604 return -ENOSPC;
1478} 1605}
1479 1606
1480/** 1607/**
@@ -1489,34 +1616,33 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1489 1616
1490static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) 1617static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
1491{ 1618{
1492 u32 goal = 0, block; 1619 u64 block;
1493 u64 no_addr;
1494 struct gfs2_sbd *sdp = rgd->rd_sbd; 1620 struct gfs2_sbd *sdp = rgd->rd_sbd;
1495 struct gfs2_glock *gl; 1621 struct gfs2_glock *gl;
1496 struct gfs2_inode *ip; 1622 struct gfs2_inode *ip;
1497 int error; 1623 int error;
1498 int found = 0; 1624 int found = 0;
1499 struct gfs2_bitmap *bi; 1625 struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
1500 1626
1501 while (goal < rgd->rd_data) { 1627 while (1) {
1502 down_write(&sdp->sd_log_flush_lock); 1628 down_write(&sdp->sd_log_flush_lock);
1503 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi); 1629 error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
1504 up_write(&sdp->sd_log_flush_lock); 1630 up_write(&sdp->sd_log_flush_lock);
1505 if (block == BFITNOENT) 1631 if (error == -ENOSPC)
1632 break;
1633 if (WARN_ON_ONCE(error))
1506 break; 1634 break;
1507 1635
1508 block = gfs2_bi2rgd_blk(bi, block); 1636 block = gfs2_rbm_to_block(&rbm);
1509 /* rgblk_search can return a block < goal, so we need to 1637 if (gfs2_rbm_from_block(&rbm, block + 1))
1510 keep it marching forward. */ 1638 break;
1511 no_addr = block + rgd->rd_data0; 1639 if (*last_unlinked != NO_BLOCK && block <= *last_unlinked)
1512 goal = max(block + 1, goal + 1);
1513 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
1514 continue; 1640 continue;
1515 if (no_addr == skip) 1641 if (block == skip)
1516 continue; 1642 continue;
1517 *last_unlinked = no_addr; 1643 *last_unlinked = block;
1518 1644
1519 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); 1645 error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
1520 if (error) 1646 if (error)
1521 continue; 1647 continue;
1522 1648
@@ -1543,6 +1669,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1543 return; 1669 return;
1544} 1670}
1545 1671
1672static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1673{
1674 struct gfs2_rgrpd *rgd = *pos;
1675
1676 rgd = gfs2_rgrpd_get_next(rgd);
1677 if (rgd == NULL)
1678 rgd = gfs2_rgrpd_get_next(NULL);
1679 *pos = rgd;
1680 if (rgd != begin) /* If we didn't wrap */
1681 return true;
1682 return false;
1683}
1684
1546/** 1685/**
1547 * gfs2_inplace_reserve - Reserve space in the filesystem 1686 * gfs2_inplace_reserve - Reserve space in the filesystem
1548 * @ip: the inode to reserve space for 1687 * @ip: the inode to reserve space for
@@ -1562,103 +1701,96 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1562 1701
1563 if (sdp->sd_args.ar_rgrplvb) 1702 if (sdp->sd_args.ar_rgrplvb)
1564 flags |= GL_SKIP; 1703 flags |= GL_SKIP;
1565 rs->rs_requested = requested; 1704 if (gfs2_assert_warn(sdp, requested))
1566 if (gfs2_assert_warn(sdp, requested)) { 1705 return -EINVAL;
1567 error = -EINVAL;
1568 goto out;
1569 }
1570 if (gfs2_rs_active(rs)) { 1706 if (gfs2_rs_active(rs)) {
1571 begin = rs->rs_rgd; 1707 begin = rs->rs_rbm.rgd;
1572 flags = 0; /* Yoda: Do or do not. There is no try */ 1708 flags = 0; /* Yoda: Do or do not. There is no try */
1573 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { 1709 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1574 rs->rs_rgd = begin = ip->i_rgd; 1710 rs->rs_rbm.rgd = begin = ip->i_rgd;
1575 } else { 1711 } else {
1576 rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1712 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1577 } 1713 }
1578 if (rs->rs_rgd == NULL) 1714 if (rs->rs_rbm.rgd == NULL)
1579 return -EBADSLT; 1715 return -EBADSLT;
1580 1716
1581 while (loops < 3) { 1717 while (loops < 3) {
1582 rg_locked = 0; 1718 rg_locked = 1;
1583 1719
1584 if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { 1720 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1585 rg_locked = 1; 1721 rg_locked = 0;
1586 error = 0; 1722 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1587 } else if (!loops && !gfs2_rs_active(rs) &&
1588 rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
1589 /* If the rgrp already is maxed out for contenders,
1590 we can eliminate it as a "first pass" without even
1591 requesting the rgrp glock. */
1592 error = GLR_TRYFAILED;
1593 } else {
1594 error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
1595 LM_ST_EXCLUSIVE, flags, 1723 LM_ST_EXCLUSIVE, flags,
1596 &rs->rs_rgd_gh); 1724 &rs->rs_rgd_gh);
1597 if (!error && sdp->sd_args.ar_rgrplvb) { 1725 if (error == GLR_TRYFAILED)
1598 error = update_rgrp_lvb(rs->rs_rgd); 1726 goto next_rgrp;
1599 if (error) { 1727 if (unlikely(error))
1728 return error;
1729 if (sdp->sd_args.ar_rgrplvb) {
1730 error = update_rgrp_lvb(rs->rs_rbm.rgd);
1731 if (unlikely(error)) {
1600 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1732 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1601 return error; 1733 return error;
1602 } 1734 }
1603 } 1735 }
1604 } 1736 }
1605 switch (error) {
1606 case 0:
1607 if (gfs2_rs_active(rs)) {
1608 if (unclaimed_blocks(rs->rs_rgd) +
1609 rs->rs_free >= rs->rs_requested) {
1610 ip->i_rgd = rs->rs_rgd;
1611 return 0;
1612 }
1613 /* We have a multi-block reservation, but the
1614 rgrp doesn't have enough free blocks to
1615 satisfy the request. Free the reservation
1616 and look for a suitable rgrp. */
1617 gfs2_rs_deltree(rs);
1618 }
1619 if (try_rgrp_fit(rs->rs_rgd, ip)) {
1620 if (sdp->sd_args.ar_rgrplvb)
1621 gfs2_rgrp_bh_get(rs->rs_rgd);
1622 ip->i_rgd = rs->rs_rgd;
1623 return 0;
1624 }
1625 if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
1626 if (sdp->sd_args.ar_rgrplvb)
1627 gfs2_rgrp_bh_get(rs->rs_rgd);
1628 try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
1629 ip->i_no_addr);
1630 }
1631 if (!rg_locked)
1632 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1633 /* fall through */
1634 case GLR_TRYFAILED:
1635 rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
1636 rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
1637 if (rs->rs_rgd != begin) /* If we didn't wrap */
1638 break;
1639 1737
1640 flags &= ~LM_FLAG_TRY; 1738 /* Skip unuseable resource groups */
1641 loops++; 1739 if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
1642 /* Check that fs hasn't grown if writing to rindex */ 1740 goto skip_rgrp;
1643 if (ip == GFS2_I(sdp->sd_rindex) && 1741
1644 !sdp->sd_rindex_uptodate) { 1742 if (sdp->sd_args.ar_rgrplvb)
1645 error = gfs2_ri_update(ip); 1743 gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
1646 if (error) 1744
1647 goto out; 1745 /* Get a reservation if we don't already have one */
1648 } else if (loops == 2) 1746 if (!gfs2_rs_active(rs))
1649 /* Flushing the log may release space */ 1747 rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
1650 gfs2_log_flush(sdp, NULL); 1748
1651 break; 1749 /* Skip rgrps when we can't get a reservation on first pass */
1652 default: 1750 if (!gfs2_rs_active(rs) && (loops < 1))
1653 goto out; 1751 goto check_rgrp;
1752
1753 /* If rgrp has enough free space, use it */
1754 if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
1755 ip->i_rgd = rs->rs_rbm.rgd;
1756 return 0;
1757 }
1758
1759 /* Drop reservation, if we couldn't use reserved rgrp */
1760 if (gfs2_rs_active(rs))
1761 gfs2_rs_deltree(ip, rs);
1762check_rgrp:
1763 /* Check for unlinked inodes which can be reclaimed */
1764 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
1765 try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
1766 ip->i_no_addr);
1767skip_rgrp:
1768 /* Unlock rgrp if required */
1769 if (!rg_locked)
1770 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1771next_rgrp:
1772 /* Find the next rgrp, and continue looking */
1773 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1774 continue;
1775
1776 /* If we've scanned all the rgrps, but found no free blocks
1777 * then this checks for some less likely conditions before
1778 * trying again.
1779 */
1780 flags &= ~LM_FLAG_TRY;
1781 loops++;
1782 /* Check that fs hasn't grown if writing to rindex */
1783 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1784 error = gfs2_ri_update(ip);
1785 if (error)
1786 return error;
1654 } 1787 }
1788 /* Flushing the log may release space */
1789 if (loops == 2)
1790 gfs2_log_flush(sdp, NULL);
1655 } 1791 }
1656 error = -ENOSPC;
1657 1792
1658out: 1793 return -ENOSPC;
1659 if (error)
1660 rs->rs_requested = 0;
1661 return error;
1662} 1794}
1663 1795
1664/** 1796/**
@@ -1672,15 +1804,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1672{ 1804{
1673 struct gfs2_blkreserv *rs = ip->i_res; 1805 struct gfs2_blkreserv *rs = ip->i_res;
1674 1806
1675 if (!rs)
1676 return;
1677
1678 if (!rs->rs_free)
1679 gfs2_rs_deltree(rs);
1680
1681 if (rs->rs_rgd_gh.gh_gl) 1807 if (rs->rs_rgd_gh.gh_gl)
1682 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1808 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1683 rs->rs_requested = 0;
1684} 1809}
1685 1810
1686/** 1811/**
@@ -1693,173 +1818,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1693 1818
1694static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1819static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1695{ 1820{
1696 struct gfs2_bitmap *bi = NULL; 1821 struct gfs2_rbm rbm = { .rgd = rgd, };
1697 u32 length, rgrp_block, buf_block; 1822 int ret;
1698 unsigned int buf;
1699 unsigned char type;
1700
1701 length = rgd->rd_length;
1702 rgrp_block = block - rgd->rd_data0;
1703
1704 for (buf = 0; buf < length; buf++) {
1705 bi = rgd->rd_bits + buf;
1706 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1707 break;
1708 }
1709 1823
1710 gfs2_assert(rgd->rd_sbd, buf < length); 1824 ret = gfs2_rbm_from_block(&rbm, block);
1711 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; 1825 WARN_ON_ONCE(ret != 0);
1712 1826
1713 type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, 1827 return gfs2_testbit(&rbm);
1714 bi->bi_len, buf_block);
1715
1716 return type;
1717} 1828}
1718 1829
1719/**
1720 * rgblk_search - find a block in @state
1721 * @rgd: the resource group descriptor
1722 * @goal: the goal block within the RG (start here to search for avail block)
1723 * @state: GFS2_BLKST_XXX the before-allocation state to find
1724 * @rbi: address of the pointer to the bitmap containing the block found
1725 *
1726 * Walk rgrp's bitmap to find bits that represent a block in @state.
1727 *
1728 * This function never fails, because we wouldn't call it unless we
1729 * know (from reservation results, etc.) that a block is available.
1730 *
1731 * Scope of @goal is just within rgrp, not the whole filesystem.
1732 * Scope of @returned block is just within bitmap, not the whole filesystem.
1733 *
1734 * Returns: the block number found relative to the bitmap rbi
1735 */
1736
1737static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
1738 struct gfs2_bitmap **rbi)
1739{
1740 struct gfs2_bitmap *bi = NULL;
1741 const u32 length = rgd->rd_length;
1742 u32 biblk = BFITNOENT;
1743 unsigned int buf, x;
1744 const u8 *buffer = NULL;
1745
1746 *rbi = NULL;
1747 /* Find bitmap block that contains bits for goal block */
1748 for (buf = 0; buf < length; buf++) {
1749 bi = rgd->rd_bits + buf;
1750 /* Convert scope of "goal" from rgrp-wide to within found bit block */
1751 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
1752 goal -= bi->bi_start * GFS2_NBBY;
1753 goto do_search;
1754 }
1755 }
1756 buf = 0;
1757 goal = 0;
1758
1759do_search:
1760 /* Search (up to entire) bitmap in this rgrp for allocatable block.
1761 "x <= length", instead of "x < length", because we typically start
1762 the search in the middle of a bit block, but if we can't find an
1763 allocatable block anywhere else, we want to be able wrap around and
1764 search in the first part of our first-searched bit block. */
1765 for (x = 0; x <= length; x++) {
1766 bi = rgd->rd_bits + buf;
1767
1768 if (test_bit(GBF_FULL, &bi->bi_flags) &&
1769 (state == GFS2_BLKST_FREE))
1770 goto skip;
1771
1772 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1773 bitmaps, so we must search the originals for that. */
1774 buffer = bi->bi_bh->b_data + bi->bi_offset;
1775 WARN_ON(!buffer_uptodate(bi->bi_bh));
1776 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1777 buffer = bi->bi_clone + bi->bi_offset;
1778
1779 while (1) {
1780 struct gfs2_blkreserv *rs;
1781 u32 rgblk;
1782
1783 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1784 if (biblk == BFITNOENT)
1785 break;
1786 /* Check if this block is reserved() */
1787 rgblk = gfs2_bi2rgd_blk(bi, biblk);
1788 rs = rs_find(rgd, rgblk);
1789 if (rs == NULL)
1790 break;
1791
1792 BUG_ON(rs->rs_bi != bi);
1793 biblk = BFITNOENT;
1794 /* This should jump to the first block after the
1795 reservation. */
1796 goal = rs->rs_biblk + rs->rs_free;
1797 if (goal >= bi->bi_len * GFS2_NBBY)
1798 break;
1799 }
1800 if (biblk != BFITNOENT)
1801 break;
1802
1803 if ((goal == 0) && (state == GFS2_BLKST_FREE))
1804 set_bit(GBF_FULL, &bi->bi_flags);
1805
1806 /* Try next bitmap block (wrap back to rgrp header if at end) */
1807skip:
1808 buf++;
1809 buf %= length;
1810 goal = 0;
1811 }
1812
1813 if (biblk != BFITNOENT)
1814 *rbi = bi;
1815
1816 return biblk;
1817}
1818 1830
1819/** 1831/**
1820 * gfs2_alloc_extent - allocate an extent from a given bitmap 1832 * gfs2_alloc_extent - allocate an extent from a given bitmap
1821 * @rgd: the resource group descriptor 1833 * @rbm: the resource group information
1822 * @bi: the bitmap within the rgrp
1823 * @blk: the block within the bitmap
1824 * @dinode: TRUE if the first block we allocate is for a dinode 1834 * @dinode: TRUE if the first block we allocate is for a dinode
1825 * @n: The extent length 1835 * @n: The extent length (value/result)
1826 * 1836 *
1827 * Add the found bitmap buffer to the transaction. 1837 * Add the bitmap buffer to the transaction.
1828 * Set the found bits to @new_state to change block's allocation state. 1838 * Set the found bits to @new_state to change block's allocation state.
1829 * Returns: starting block number of the extent (fs scope)
1830 */ 1839 */
1831static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, 1840static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
1832 u32 blk, bool dinode, unsigned int *n) 1841 unsigned int *n)
1833{ 1842{
1843 struct gfs2_rbm pos = { .rgd = rbm->rgd, };
1834 const unsigned int elen = *n; 1844 const unsigned int elen = *n;
1835 u32 goal, rgblk; 1845 u64 block;
1836 const u8 *buffer = NULL; 1846 int ret;
1837 struct gfs2_blkreserv *rs; 1847
1838 1848 *n = 1;
1839 *n = 0; 1849 block = gfs2_rbm_to_block(rbm);
1840 buffer = bi->bi_bh->b_data + bi->bi_offset; 1850 gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
1841 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1851 gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1842 gfs2_setbit(rgd, bi->bi_clone, bi, blk, 1852 block++;
1843 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1844 (*n)++;
1845 goal = blk;
1846 while (*n < elen) { 1853 while (*n < elen) {
1847 goal++; 1854 ret = gfs2_rbm_from_block(&pos, block);
1848 if (goal >= (bi->bi_len * GFS2_NBBY)) 1855 if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
1849 break;
1850 rgblk = gfs2_bi2rgd_blk(bi, goal);
1851 rs = rs_find(rgd, rgblk);
1852 if (rs) /* Oops, we bumped into someone's reservation */
1853 break;
1854 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1855 GFS2_BLKST_FREE)
1856 break; 1856 break;
1857 gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED); 1857 gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
1858 gfs2_setbit(&pos, true, GFS2_BLKST_USED);
1858 (*n)++; 1859 (*n)++;
1860 block++;
1859 } 1861 }
1860 blk = gfs2_bi2rgd_blk(bi, blk);
1861 rgd->rd_last_alloc = blk + *n - 1;
1862 return rgd->rd_data0 + blk;
1863} 1862}
1864 1863
1865/** 1864/**
@@ -1875,46 +1874,30 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1875static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, 1874static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1876 u32 blen, unsigned char new_state) 1875 u32 blen, unsigned char new_state)
1877{ 1876{
1878 struct gfs2_rgrpd *rgd; 1877 struct gfs2_rbm rbm;
1879 struct gfs2_bitmap *bi = NULL;
1880 u32 length, rgrp_blk, buf_blk;
1881 unsigned int buf;
1882 1878
1883 rgd = gfs2_blk2rgrpd(sdp, bstart, 1); 1879 rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1884 if (!rgd) { 1880 if (!rbm.rgd) {
1885 if (gfs2_consist(sdp)) 1881 if (gfs2_consist(sdp))
1886 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1882 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1887 return NULL; 1883 return NULL;
1888 } 1884 }
1889 1885
1890 length = rgd->rd_length;
1891
1892 rgrp_blk = bstart - rgd->rd_data0;
1893
1894 while (blen--) { 1886 while (blen--) {
1895 for (buf = 0; buf < length; buf++) { 1887 gfs2_rbm_from_block(&rbm, bstart);
1896 bi = rgd->rd_bits + buf; 1888 bstart++;
1897 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) 1889 if (!rbm.bi->bi_clone) {
1898 break; 1890 rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
1899 } 1891 GFP_NOFS | __GFP_NOFAIL);
1900 1892 memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
1901 gfs2_assert(rgd->rd_sbd, buf < length); 1893 rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
1902 1894 rbm.bi->bi_len);
1903 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1904 rgrp_blk++;
1905
1906 if (!bi->bi_clone) {
1907 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1908 GFP_NOFS | __GFP_NOFAIL);
1909 memcpy(bi->bi_clone + bi->bi_offset,
1910 bi->bi_bh->b_data + bi->bi_offset,
1911 bi->bi_len);
1912 } 1895 }
1913 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1896 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
1914 gfs2_setbit(rgd, NULL, bi, buf_blk, new_state); 1897 gfs2_setbit(&rbm, false, new_state);
1915 } 1898 }
1916 1899
1917 return rgd; 1900 return rbm.rgd;
1918} 1901}
1919 1902
1920/** 1903/**
@@ -1956,56 +1939,41 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1956} 1939}
1957 1940
1958/** 1941/**
1959 * claim_reserved_blks - Claim previously reserved blocks 1942 * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation
1960 * @ip: the inode that's claiming the reservation 1943 * @ip: The inode we have just allocated blocks for
1961 * @dinode: 1 if this block is a dinode block, otherwise data block 1944 * @rbm: The start of the allocated blocks
1962 * @nblocks: desired extent length 1945 * @len: The extent length
1963 * 1946 *
1964 * Lay claim to previously allocated block reservation blocks. 1947 * Adjusts a reservation after an allocation has taken place. If the
1965 * Returns: Starting block number of the blocks claimed. 1948 * reservation does not match the allocation, or if it is now empty
1966 * Sets *nblocks to the actual extent length allocated. 1949 * then it is removed.
1967 */ 1950 */
1968static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, 1951
1969 unsigned int *nblocks) 1952static void gfs2_adjust_reservation(struct gfs2_inode *ip,
1953 const struct gfs2_rbm *rbm, unsigned len)
1970{ 1954{
1971 struct gfs2_blkreserv *rs = ip->i_res; 1955 struct gfs2_blkreserv *rs = ip->i_res;
1972 struct gfs2_rgrpd *rgd = rs->rs_rgd; 1956 struct gfs2_rgrpd *rgd = rbm->rgd;
1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1957 unsigned rlen;
1974 struct gfs2_bitmap *bi; 1958 u64 block;
1975 u64 start_block = gfs2_rs_startblk(rs); 1959 int ret;
1976 const unsigned int elen = *nblocks;
1977
1978 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1979 gfs2_assert_withdraw(sdp, rgd);
1980 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1981 bi = rs->rs_bi;
1982 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1983
1984 for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
1985 /* Make sure the bitmap hasn't changed */
1986 gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
1987 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1988 rs->rs_biblk++;
1989 rs->rs_free--;
1990
1991 BUG_ON(!rgd->rd_reserved);
1992 rgd->rd_reserved--;
1993 dinode = false;
1994 trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
1995 }
1996
1997 if (!rs->rs_free) {
1998 struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
1999 1960
2000 gfs2_rs_deltree(rs); 1961 spin_lock(&rgd->rd_rsspin);
2001 /* -nblocks because we haven't returned to do the math yet. 1962 if (gfs2_rs_active(rs)) {
2002 I'm doing the math backwards to prevent negative numbers, 1963 if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
2003 but think of it as: 1964 block = gfs2_rbm_to_block(rbm);
2004 if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ 1965 ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
2005 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) 1966 rlen = min(rs->rs_free, len);
2006 rg_mblk_search(rgd, ip); 1967 rs->rs_free -= rlen;
1968 rgd->rd_reserved -= rlen;
1969 trace_gfs2_rs(rs, TRACE_RS_CLAIM);
1970 if (rs->rs_free && !ret)
1971 goto out;
1972 }
1973 __rs_deltree(ip, rs);
2007 } 1974 }
2008 return start_block; 1975out:
1976 spin_unlock(&rgd->rd_rsspin);
2009} 1977}
2010 1978
2011/** 1979/**
@@ -2024,47 +1992,40 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2024{ 1992{
2025 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1993 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
2026 struct buffer_head *dibh; 1994 struct buffer_head *dibh;
2027 struct gfs2_rgrpd *rgd; 1995 struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
2028 unsigned int ndata; 1996 unsigned int ndata;
2029 u32 goal, blk; /* block, within the rgrp scope */ 1997 u64 goal;
2030 u64 block; /* block, within the file system scope */ 1998 u64 block; /* block, within the file system scope */
2031 int error; 1999 int error;
2032 struct gfs2_bitmap *bi;
2033 2000
2034 /* Only happens if there is a bug in gfs2, return something distinctive 2001 if (gfs2_rs_active(ip->i_res))
2035 * to ensure that it is noticed. 2002 goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
2036 */ 2003 else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
2037 if (ip->i_res->rs_requested == 0) 2004 goal = ip->i_goal;
2038 return -ECANCELED; 2005 else
2039 2006 goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
2040 /* Check if we have a multi-block reservation, and if so, claim the
2041 next free block from it. */
2042 if (gfs2_rs_active(ip->i_res)) {
2043 BUG_ON(!ip->i_res->rs_free);
2044 rgd = ip->i_res->rs_rgd;
2045 block = claim_reserved_blks(ip, dinode, nblocks);
2046 } else {
2047 rgd = ip->i_rgd;
2048 2007
2049 if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) 2008 gfs2_rbm_from_block(&rbm, goal);
2050 goal = ip->i_goal - rgd->rd_data0; 2009 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
2051 else
2052 goal = rgd->rd_last_alloc;
2053
2054 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
2055
2056 /* Since all blocks are reserved in advance, this shouldn't
2057 happen */
2058 if (blk == BFITNOENT) {
2059 printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
2060 *nblocks);
2061 printk(KERN_WARNING "FULL=%d\n",
2062 test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
2063 goto rgrp_error;
2064 }
2065 2010
2066 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); 2011 if (error == -ENOSPC) {
2012 gfs2_rbm_from_block(&rbm, goal);
2013 error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
2014 }
2015
2016 /* Since all blocks are reserved in advance, this shouldn't happen */
2017 if (error) {
2018 fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
2019 (unsigned long long)ip->i_no_addr, error, *nblocks,
2020 test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
2021 goto rgrp_error;
2067 } 2022 }
2023
2024 gfs2_alloc_extent(&rbm, dinode, nblocks);
2025 block = gfs2_rbm_to_block(&rbm);
2026 rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2027 if (gfs2_rs_active(ip->i_res))
2028 gfs2_adjust_reservation(ip, &rbm, *nblocks);
2068 ndata = *nblocks; 2029 ndata = *nblocks;
2069 if (dinode) 2030 if (dinode)
2070 ndata--; 2031 ndata--;
@@ -2081,22 +2042,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2081 brelse(dibh); 2042 brelse(dibh);
2082 } 2043 }
2083 } 2044 }
2084 if (rgd->rd_free < *nblocks) { 2045 if (rbm.rgd->rd_free < *nblocks) {
2085 printk(KERN_WARNING "nblocks=%u\n", *nblocks); 2046 printk(KERN_WARNING "nblocks=%u\n", *nblocks);
2086 goto rgrp_error; 2047 goto rgrp_error;
2087 } 2048 }
2088 2049
2089 rgd->rd_free -= *nblocks; 2050 rbm.rgd->rd_free -= *nblocks;
2090 if (dinode) { 2051 if (dinode) {
2091 rgd->rd_dinodes++; 2052 rbm.rgd->rd_dinodes++;
2092 *generation = rgd->rd_igeneration++; 2053 *generation = rbm.rgd->rd_igeneration++;
2093 if (*generation == 0) 2054 if (*generation == 0)
2094 *generation = rgd->rd_igeneration++; 2055 *generation = rbm.rgd->rd_igeneration++;
2095 } 2056 }
2096 2057
2097 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2058 gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1);
2098 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2059 gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
2099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); 2060 gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
2100 2061
2101 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2062 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
2102 if (dinode) 2063 if (dinode)
@@ -2110,14 +2071,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
2110 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, 2071 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
2111 ip->i_inode.i_gid); 2072 ip->i_inode.i_gid);
2112 2073
2113 rgd->rd_free_clone -= *nblocks; 2074 rbm.rgd->rd_free_clone -= *nblocks;
2114 trace_gfs2_block_alloc(ip, rgd, block, *nblocks, 2075 trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
2115 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); 2076 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
2116 *bn = block; 2077 *bn = block;
2117 return 0; 2078 return 0;
2118 2079
2119rgrp_error: 2080rgrp_error:
2120 gfs2_rgrp_error(rgd); 2081 gfs2_rgrp_error(rbm.rgd);
2121 return -EIO; 2082 return -EIO;
2122} 2083}
2123 2084
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ca6e26729b86..24077958dcf6 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -46,7 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
46 bool dinode, u64 *generation); 46 bool dinode, u64 *generation);
47 47
48extern int gfs2_rs_alloc(struct gfs2_inode *ip); 48extern int gfs2_rs_alloc(struct gfs2_inode *ip);
49extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); 49extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs);
50extern void gfs2_rs_delete(struct gfs2_inode *ip); 50extern void gfs2_rs_delete(struct gfs2_inode *ip);
51extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); 51extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
52extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 52extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@@ -73,30 +73,10 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
73 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); 73 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
74extern int gfs2_fitrim(struct file *filp, void __user *argp); 74extern int gfs2_fitrim(struct file *filp, void __user *argp);
75 75
76/* This is how to tell if a multi-block reservation is "inplace" reserved: */ 76/* This is how to tell if a reservation is in the rgrp tree: */
77static inline int gfs2_mb_reserved(struct gfs2_inode *ip) 77static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
78{ 78{
79 if (ip->i_res && ip->i_res->rs_requested) 79 return rs && !RB_EMPTY_NODE(&rs->rs_node);
80 return 1;
81 return 0;
82}
83
84/* This is how to tell if a multi-block reservation is in the rgrp tree: */
85static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
86{
87 if (rs && rs->rs_bi)
88 return 1;
89 return 0;
90}
91
92static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
93{
94 return (bi->bi_start * GFS2_NBBY) + blk;
95}
96
97static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
98{
99 return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
100} 80}
101 81
102#endif /* __RGRP_DOT_H__ */ 82#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a14..bc737261f234 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1366 val = sdp->sd_tune.gt_statfs_quantum; 1366 val = sdp->sd_tune.gt_statfs_quantum;
1367 if (val != 30) 1367 if (val != 30)
1368 seq_printf(s, ",statfs_quantum=%d", val); 1368 seq_printf(s, ",statfs_quantum=%d", val);
1369 else if (sdp->sd_tune.gt_statfs_slow)
1370 seq_puts(s, ",statfs_quantum=0");
1369 val = sdp->sd_tune.gt_quota_quantum; 1371 val = sdp->sd_tune.gt_quota_quantum;
1370 if (val != 60) 1372 if (val != 60)
1371 seq_printf(s, ",quota_quantum=%d", val); 1373 seq_printf(s, ",quota_quantum=%d", val);
@@ -1543,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode)
1543 1545
1544out_truncate: 1546out_truncate:
1545 gfs2_log_flush(sdp, ip->i_gl); 1547 gfs2_log_flush(sdp, ip->i_gl);
1548 if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1549 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
1550 filemap_fdatawrite(metamapping);
1551 filemap_fdatawait(metamapping);
1552 }
1546 write_inode_now(inode, 1); 1553 write_inode_now(inode, 1);
1547 gfs2_ail_flush(ip->i_gl, 0); 1554 gfs2_ail_flush(ip->i_gl, 0);
1548 1555
@@ -1557,7 +1564,7 @@ out_truncate:
1557out_unlock: 1564out_unlock:
1558 /* Error path for case 1 */ 1565 /* Error path for case 1 */
1559 if (gfs2_rs_active(ip->i_res)) 1566 if (gfs2_rs_active(ip->i_res))
1560 gfs2_rs_deltree(ip->i_res); 1567 gfs2_rs_deltree(ip, ip->i_res);
1561 1568
1562 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) 1569 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
1563 gfs2_glock_dq(&ip->i_iopen_gh); 1570 gfs2_glock_dq(&ip->i_iopen_gh);
@@ -1572,7 +1579,7 @@ out:
1572 clear_inode(inode); 1579 clear_inode(inode);
1573 gfs2_dir_hash_inval(ip); 1580 gfs2_dir_hash_inval(ip);
1574 ip->i_gl->gl_object = NULL; 1581 ip->i_gl->gl_object = NULL;
1575 flush_delayed_work_sync(&ip->i_gl->gl_work); 1582 flush_delayed_work(&ip->i_gl->gl_work);
1576 gfs2_glock_add_to_lru(ip->i_gl); 1583 gfs2_glock_add_to_lru(ip->i_gl);
1577 gfs2_glock_put(ip->i_gl); 1584 gfs2_glock_put(ip->i_gl);
1578 ip->i_gl = NULL; 1585 ip->i_gl = NULL;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index a25c252fe412..bbdc78af60ca 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -509,10 +509,9 @@ TRACE_EVENT(gfs2_block_alloc,
509/* Keep track of multi-block reservations as they are allocated/freed */ 509/* Keep track of multi-block reservations as they are allocated/freed */
510TRACE_EVENT(gfs2_rs, 510TRACE_EVENT(gfs2_rs,
511 511
512 TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, 512 TP_PROTO(const struct gfs2_blkreserv *rs, u8 func),
513 u8 func),
514 513
515 TP_ARGS(ip, rs, func), 514 TP_ARGS(rs, func),
516 515
517 TP_STRUCT__entry( 516 TP_STRUCT__entry(
518 __field( dev_t, dev ) 517 __field( dev_t, dev )
@@ -526,18 +525,17 @@ TRACE_EVENT(gfs2_rs,
526 ), 525 ),
527 526
528 TP_fast_assign( 527 TP_fast_assign(
529 __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; 528 __entry->dev = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
530 __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; 529 __entry->rd_addr = rs->rs_rbm.rgd->rd_addr;
531 __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; 530 __entry->rd_free_clone = rs->rs_rbm.rgd->rd_free_clone;
532 __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; 531 __entry->rd_reserved = rs->rs_rbm.rgd->rd_reserved;
533 __entry->inum = ip ? ip->i_no_addr : 0; 532 __entry->inum = rs->rs_inum;
534 __entry->start = gfs2_rs_startblk(rs); 533 __entry->start = gfs2_rbm_to_block(&rs->rs_rbm);
535 __entry->free = rs->rs_free; 534 __entry->free = rs->rs_free;
536 __entry->func = func; 535 __entry->func = func;
537 ), 536 ),
538 537
539 TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " 538 TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
540 "f:%lu",
541 MAJOR(__entry->dev), MINOR(__entry->dev), 539 MAJOR(__entry->dev), MINOR(__entry->dev),
542 (unsigned long long)__entry->inum, 540 (unsigned long long)__entry->inum,
543 (unsigned long long)__entry->start, 541 (unsigned long long)__entry->start,
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 41f42cdccbb8..bf2ae9aeee7a 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -28,11 +28,10 @@ struct gfs2_glock;
28 28
29/* reserve either the number of blocks to be allocated plus the rg header 29/* reserve either the number of blocks to be allocated plus the rg header
30 * block, or all of the blocks in the rg, whichever is smaller */ 30 * block, or all of the blocks in the rg, whichever is smaller */
31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) 31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
32{ 32{
33 const struct gfs2_blkreserv *rs = ip->i_res; 33 if (requested < ip->i_rgd->rd_length)
34 if (rs && rs->rs_requested < ip->i_rgd->rd_length) 34 return requested + 1;
35 return rs->rs_requested + 1;
36 return ip->i_rgd->rd_length; 35 return ip->i_rgd->rd_length;
37} 36}
38 37
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 27a0b4a901f5..db330e5518cd 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
448} 448}
449 449
450/** 450/**
451 * ea_get_unstuffed - actually copies the unstuffed data into the 451 * ea_iter_unstuffed - copies the unstuffed xattr data to/from the
452 * request buffer 452 * request buffer
453 * @ip: The GFS2 inode 453 * @ip: The GFS2 inode
454 * @ea: The extended attribute header structure 454 * @ea: The extended attribute header structure
455 * @data: The data to be copied 455 * @din: The data to be copied in
456 * @dout: The data to be copied out (one of din,dout will be NULL)
456 * 457 *
457 * Returns: errno 458 * Returns: errno
458 */ 459 */
459 460
460static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, 461static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
461 char *data) 462 const char *din, char *dout)
462{ 463{
463 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 464 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
464 struct buffer_head **bh; 465 struct buffer_head **bh;
@@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
467 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); 468 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
468 unsigned int x; 469 unsigned int x;
469 int error = 0; 470 int error = 0;
471 unsigned char *pos;
472 unsigned cp_size;
470 473
471 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); 474 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
472 if (!bh) 475 if (!bh)
@@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
497 goto out; 500 goto out;
498 } 501 }
499 502
500 memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header), 503 pos = bh[x]->b_data + sizeof(struct gfs2_meta_header);
501 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); 504 cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize;
502 505
503 amount -= sdp->sd_jbsize; 506 if (dout) {
504 data += sdp->sd_jbsize; 507 memcpy(dout, pos, cp_size);
508 dout += sdp->sd_jbsize;
509 }
510
511 if (din) {
512 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
513 memcpy(pos, din, cp_size);
514 din += sdp->sd_jbsize;
515 }
505 516
517 amount -= sdp->sd_jbsize;
506 brelse(bh[x]); 518 brelse(bh[x]);
507 } 519 }
508 520
@@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
523 memcpy(data, GFS2_EA2DATA(el->el_ea), len); 535 memcpy(data, GFS2_EA2DATA(el->el_ea), len);
524 return len; 536 return len;
525 } 537 }
526 ret = ea_get_unstuffed(ip, el->el_ea, data); 538 ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data);
527 if (ret < 0) 539 if (ret < 0)
528 return ret; 540 return ret;
529 return len; 541 return len;
@@ -727,7 +739,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
727 goto out_gunlock_q; 739 goto out_gunlock_q;
728 740
729 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), 741 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
730 blks + gfs2_rg_blocks(ip) + 742 blks + gfs2_rg_blocks(ip, blks) +
731 RES_DINODE + RES_STATFS + RES_QUOTA, 0); 743 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
732 if (error) 744 if (error)
733 goto out_ipres; 745 goto out_ipres;
@@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const char *name,
1220 size, flags, type); 1232 size, flags, type);
1221} 1233}
1222 1234
1235
1223static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, 1236static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1224 struct gfs2_ea_header *ea, char *data) 1237 struct gfs2_ea_header *ea, char *data)
1225{ 1238{
1226 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1239 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1227 struct buffer_head **bh;
1228 unsigned int amount = GFS2_EA_DATA_LEN(ea); 1240 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1229 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize); 1241 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1230 __be64 *dataptrs = GFS2_EA2DATAPTRS(ea); 1242 int ret;
1231 unsigned int x;
1232 int error;
1233
1234 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
1235 if (!bh)
1236 return -ENOMEM;
1237
1238 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1239 if (error)
1240 goto out;
1241
1242 for (x = 0; x < nptrs; x++) {
1243 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
1244 bh + x);
1245 if (error) {
1246 while (x--)
1247 brelse(bh[x]);
1248 goto fail;
1249 }
1250 dataptrs++;
1251 }
1252
1253 for (x = 0; x < nptrs; x++) {
1254 error = gfs2_meta_wait(sdp, bh[x]);
1255 if (error) {
1256 for (; x < nptrs; x++)
1257 brelse(bh[x]);
1258 goto fail;
1259 }
1260 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1261 for (; x < nptrs; x++)
1262 brelse(bh[x]);
1263 error = -EIO;
1264 goto fail;
1265 }
1266
1267 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1268
1269 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header), data,
1270 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1271
1272 amount -= sdp->sd_jbsize;
1273 data += sdp->sd_jbsize;
1274
1275 brelse(bh[x]);
1276 }
1277 1243
1278out: 1244 ret = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1279 kfree(bh); 1245 if (ret)
1280 return error; 1246 return ret;
1281 1247
1282fail: 1248 ret = gfs2_iter_unstuffed(ip, ea, data, NULL);
1283 gfs2_trans_end(sdp); 1249 gfs2_trans_end(sdp);
1284 kfree(bh); 1250
1285 return error; 1251 return ret;
1286} 1252}
1287 1253
1288int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1254int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 8275175acf6e..693df9fe52b2 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -134,8 +134,8 @@ struct hfs_sb_info {
134 permissions on all files */ 134 permissions on all files */
135 umode_t s_dir_umask; /* The umask applied to the 135 umode_t s_dir_umask; /* The umask applied to the
136 permissions on all dirs */ 136 permissions on all dirs */
137 uid_t s_uid; /* The uid of all files */ 137 kuid_t s_uid; /* The uid of all files */
138 gid_t s_gid; /* The gid of all files */ 138 kgid_t s_gid; /* The gid of all files */
139 139
140 int session, part; 140 int session, part;
141 struct nls_table *nls_io, *nls_disk; 141 struct nls_table *nls_io, *nls_disk;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index ee1bc55677f1..0b35903219bc 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -594,9 +594,9 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
594 594
595 /* no uig/gid changes and limit which mode bits can be set */ 595 /* no uig/gid changes and limit which mode bits can be set */
596 if (((attr->ia_valid & ATTR_UID) && 596 if (((attr->ia_valid & ATTR_UID) &&
597 (attr->ia_uid != hsb->s_uid)) || 597 (!uid_eq(attr->ia_uid, hsb->s_uid))) ||
598 ((attr->ia_valid & ATTR_GID) && 598 ((attr->ia_valid & ATTR_GID) &&
599 (attr->ia_gid != hsb->s_gid)) || 599 (!gid_eq(attr->ia_gid, hsb->s_gid))) ||
600 ((attr->ia_valid & ATTR_MODE) && 600 ((attr->ia_valid & ATTR_MODE) &&
601 ((S_ISDIR(inode->i_mode) && 601 ((S_ISDIR(inode->i_mode) &&
602 (attr->ia_mode != inode->i_mode)) || 602 (attr->ia_mode != inode->i_mode)) ||
@@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
644 644
645 /* sync the superblock to buffers */ 645 /* sync the superblock to buffers */
646 sb = inode->i_sb; 646 sb = inode->i_sb;
647 flush_delayed_work_sync(&HFS_SB(sb)->mdb_work); 647 flush_delayed_work(&HFS_SB(sb)->mdb_work);
648 /* .. finally sync the buffers to disk */ 648 /* .. finally sync the buffers to disk */
649 err = sync_blockdev(sb->s_bdev); 649 err = sync_blockdev(sb->s_bdev);
650 if (!ret) 650 if (!ret)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4eb873e0c07b..e93ddaadfd1e 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -138,7 +138,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root)
138 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); 138 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
139 if (sbi->s_type != cpu_to_be32(0x3f3f3f3f)) 139 if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
140 seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type); 140 seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
141 seq_printf(seq, ",uid=%u,gid=%u", sbi->s_uid, sbi->s_gid); 141 seq_printf(seq, ",uid=%u,gid=%u",
142 from_kuid_munged(&init_user_ns, sbi->s_uid),
143 from_kgid_munged(&init_user_ns, sbi->s_gid));
142 if (sbi->s_file_umask != 0133) 144 if (sbi->s_file_umask != 0133)
143 seq_printf(seq, ",file_umask=%o", sbi->s_file_umask); 145 seq_printf(seq, ",file_umask=%o", sbi->s_file_umask);
144 if (sbi->s_dir_umask != 0022) 146 if (sbi->s_dir_umask != 0022)
@@ -254,14 +256,22 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
254 printk(KERN_ERR "hfs: uid requires an argument\n"); 256 printk(KERN_ERR "hfs: uid requires an argument\n");
255 return 0; 257 return 0;
256 } 258 }
257 hsb->s_uid = (uid_t)tmp; 259 hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp);
260 if (!uid_valid(hsb->s_uid)) {
261 printk(KERN_ERR "hfs: invalid uid %d\n", tmp);
262 return 0;
263 }
258 break; 264 break;
259 case opt_gid: 265 case opt_gid:
260 if (match_int(&args[0], &tmp)) { 266 if (match_int(&args[0], &tmp)) {
261 printk(KERN_ERR "hfs: gid requires an argument\n"); 267 printk(KERN_ERR "hfs: gid requires an argument\n");
262 return 0; 268 return 0;
263 } 269 }
264 hsb->s_gid = (gid_t)tmp; 270 hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp);
271 if (!gid_valid(hsb->s_gid)) {
272 printk(KERN_ERR "hfs: invalid gid %d\n", tmp);
273 return 0;
274 }
265 break; 275 break;
266 case opt_umask: 276 case opt_umask:
267 if (match_octal(&args[0], &tmp)) { 277 if (match_octal(&args[0], &tmp)) {
@@ -482,6 +492,12 @@ static int __init init_hfs_fs(void)
482static void __exit exit_hfs_fs(void) 492static void __exit exit_hfs_fs(void)
483{ 493{
484 unregister_filesystem(&hfs_fs_type); 494 unregister_filesystem(&hfs_fs_type);
495
496 /*
497 * Make sure all delayed rcu free inodes are flushed before we
498 * destroy cache.
499 */
500 rcu_barrier();
485 kmem_cache_destroy(hfs_inode_cachep); 501 kmem_cache_destroy(hfs_inode_cachep);
486} 502}
487 503
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index ec2a9c23f0c9..798d9c4c5e71 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -80,8 +80,8 @@ void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms)
80 80
81 perms->userflags = HFSPLUS_I(inode)->userflags; 81 perms->userflags = HFSPLUS_I(inode)->userflags;
82 perms->mode = cpu_to_be16(inode->i_mode); 82 perms->mode = cpu_to_be16(inode->i_mode);
83 perms->owner = cpu_to_be32(inode->i_uid); 83 perms->owner = cpu_to_be32(i_uid_read(inode));
84 perms->group = cpu_to_be32(inode->i_gid); 84 perms->group = cpu_to_be32(i_gid_read(inode));
85 85
86 if (S_ISREG(inode->i_mode)) 86 if (S_ISREG(inode->i_mode))
87 perms->dev = cpu_to_be32(inode->i_nlink); 87 perms->dev = cpu_to_be32(inode->i_nlink);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 558dbb463a4e..c571de224b15 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -149,8 +149,8 @@ struct hfsplus_sb_info {
149 u32 type; 149 u32 type;
150 150
151 umode_t umask; 151 umode_t umask;
152 uid_t uid; 152 kuid_t uid;
153 gid_t gid; 153 kgid_t gid;
154 154
155 int part, session; 155 int part, session;
156 unsigned long flags; 156 unsigned long flags;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 3d8b4a675ba0..2172aa5976f5 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -233,12 +233,12 @@ static void hfsplus_get_perms(struct inode *inode,
233 233
234 mode = be16_to_cpu(perms->mode); 234 mode = be16_to_cpu(perms->mode);
235 235
236 inode->i_uid = be32_to_cpu(perms->owner); 236 i_uid_write(inode, be32_to_cpu(perms->owner));
237 if (!inode->i_uid && !mode) 237 if (!i_uid_read(inode) && !mode)
238 inode->i_uid = sbi->uid; 238 inode->i_uid = sbi->uid;
239 239
240 inode->i_gid = be32_to_cpu(perms->group); 240 i_gid_write(inode, be32_to_cpu(perms->group));
241 if (!inode->i_gid && !mode) 241 if (!i_gid_read(inode) && !mode)
242 inode->i_gid = sbi->gid; 242 inode->i_gid = sbi->gid;
243 243
244 if (dir) { 244 if (dir) {
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 06fa5618600c..ed257c671615 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -135,14 +135,22 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
135 printk(KERN_ERR "hfs: uid requires an argument\n"); 135 printk(KERN_ERR "hfs: uid requires an argument\n");
136 return 0; 136 return 0;
137 } 137 }
138 sbi->uid = (uid_t)tmp; 138 sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp);
139 if (!uid_valid(sbi->uid)) {
140 printk(KERN_ERR "hfs: invalid uid specified\n");
141 return 0;
142 }
139 break; 143 break;
140 case opt_gid: 144 case opt_gid:
141 if (match_int(&args[0], &tmp)) { 145 if (match_int(&args[0], &tmp)) {
142 printk(KERN_ERR "hfs: gid requires an argument\n"); 146 printk(KERN_ERR "hfs: gid requires an argument\n");
143 return 0; 147 return 0;
144 } 148 }
145 sbi->gid = (gid_t)tmp; 149 sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp);
150 if (!gid_valid(sbi->gid)) {
151 printk(KERN_ERR "hfs: invalid gid specified\n");
152 return 0;
153 }
146 break; 154 break;
147 case opt_part: 155 case opt_part:
148 if (match_int(&args[0], &sbi->part)) { 156 if (match_int(&args[0], &sbi->part)) {
@@ -215,7 +223,8 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
215 if (sbi->type != HFSPLUS_DEF_CR_TYPE) 223 if (sbi->type != HFSPLUS_DEF_CR_TYPE)
216 seq_printf(seq, ",type=%.4s", (char *)&sbi->type); 224 seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
217 seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask, 225 seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask,
218 sbi->uid, sbi->gid); 226 from_kuid_munged(&init_user_ns, sbi->uid),
227 from_kgid_munged(&init_user_ns, sbi->gid));
219 if (sbi->part >= 0) 228 if (sbi->part >= 0)
220 seq_printf(seq, ",part=%u", sbi->part); 229 seq_printf(seq, ",part=%u", sbi->part);
221 if (sbi->session >= 0) 230 if (sbi->session >= 0)
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index fdafb2d71654..811a84d2d964 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -635,6 +635,12 @@ static int __init init_hfsplus_fs(void)
635static void __exit exit_hfsplus_fs(void) 635static void __exit exit_hfsplus_fs(void)
636{ 636{
637 unregister_filesystem(&hfsplus_fs_type); 637 unregister_filesystem(&hfsplus_fs_type);
638
639 /*
640 * Make sure all delayed rcu free inodes are flushed before we
641 * destroy cache.
642 */
643 rcu_barrier();
638 kmem_cache_destroy(hfsplus_inode_cachep); 644 kmem_cache_destroy(hfsplus_inode_cachep);
639} 645}
640 646
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 124146543aa7..6c9f3a9d5e21 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -542,8 +542,8 @@ static int read_name(struct inode *ino, char *name)
542 ino->i_ino = st.ino; 542 ino->i_ino = st.ino;
543 ino->i_mode = st.mode; 543 ino->i_mode = st.mode;
544 set_nlink(ino, st.nlink); 544 set_nlink(ino, st.nlink);
545 ino->i_uid = st.uid; 545 i_uid_write(ino, st.uid);
546 ino->i_gid = st.gid; 546 i_gid_write(ino, st.gid);
547 ino->i_atime = st.atime; 547 ino->i_atime = st.atime;
548 ino->i_mtime = st.mtime; 548 ino->i_mtime = st.mtime;
549 ino->i_ctime = st.ctime; 549 ino->i_ctime = st.ctime;
@@ -808,11 +808,11 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
808 } 808 }
809 if (attr->ia_valid & ATTR_UID) { 809 if (attr->ia_valid & ATTR_UID) {
810 attrs.ia_valid |= HOSTFS_ATTR_UID; 810 attrs.ia_valid |= HOSTFS_ATTR_UID;
811 attrs.ia_uid = attr->ia_uid; 811 attrs.ia_uid = from_kuid(&init_user_ns, attr->ia_uid);
812 } 812 }
813 if (attr->ia_valid & ATTR_GID) { 813 if (attr->ia_valid & ATTR_GID) {
814 attrs.ia_valid |= HOSTFS_ATTR_GID; 814 attrs.ia_valid |= HOSTFS_ATTR_GID;
815 attrs.ia_gid = attr->ia_gid; 815 attrs.ia_gid = from_kgid(&init_user_ns, attr->ia_gid);
816 } 816 }
817 if (attr->ia_valid & ATTR_SIZE) { 817 if (attr->ia_valid & ATTR_SIZE) {
818 attrs.ia_valid |= HOSTFS_ATTR_SIZE; 818 attrs.ia_valid |= HOSTFS_ATTR_SIZE;
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 4bae4a4a60b1..2d5b254ad9e2 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -102,7 +102,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
102 return -1; 102 return -1;
103 } 103 }
104 if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) { 104 if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) {
105 btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1); 105 le32_add_cpu(&btree->u.external[n].length, 1);
106 mark_buffer_dirty(bh); 106 mark_buffer_dirty(bh);
107 brelse(bh); 107 brelse(bh);
108 return se; 108 return se;
@@ -153,7 +153,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
153 btree = &anode->btree; 153 btree = &anode->btree;
154 } 154 }
155 btree->n_free_nodes--; n = btree->n_used_nodes++; 155 btree->n_free_nodes--; n = btree->n_used_nodes++;
156 btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12); 156 le16_add_cpu(&btree->first_free, 12);
157 btree->u.external[n].disk_secno = cpu_to_le32(se); 157 btree->u.external[n].disk_secno = cpu_to_le32(se);
158 btree->u.external[n].file_secno = cpu_to_le32(fs); 158 btree->u.external[n].file_secno = cpu_to_le32(fs);
159 btree->u.external[n].length = cpu_to_le32(1); 159 btree->u.external[n].length = cpu_to_le32(1);
@@ -174,7 +174,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
174 } 174 }
175 if (btree->n_free_nodes) { 175 if (btree->n_free_nodes) {
176 btree->n_free_nodes--; n = btree->n_used_nodes++; 176 btree->n_free_nodes--; n = btree->n_used_nodes++;
177 btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8); 177 le16_add_cpu(&btree->first_free, 8);
178 btree->u.internal[n].file_secno = cpu_to_le32(-1); 178 btree->u.internal[n].file_secno = cpu_to_le32(-1);
179 btree->u.internal[n].down = cpu_to_le32(na); 179 btree->u.internal[n].down = cpu_to_le32(na);
180 btree->u.internal[n-1].file_secno = cpu_to_le32(fs); 180 btree->u.internal[n-1].file_secno = cpu_to_le32(fs);
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 3228c524ebe5..4364b2a02c5d 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -145,10 +145,10 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
145 } 145 }
146 } 146 }
147 if (ptr) { 147 if (ptr) {
148 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4); 148 le32_add_cpu(&d->first_free, 4);
149 if (le32_to_cpu(d->first_free) > 2048) { 149 if (le32_to_cpu(d->first_free) > 2048) {
150 hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self)); 150 hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self));
151 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4); 151 le32_add_cpu(&d->first_free, -4);
152 return; 152 return;
153 } 153 }
154 de->length = cpu_to_le16(36); 154 de->length = cpu_to_le16(36);
@@ -184,7 +184,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
184 de->not_8x3 = hpfs_is_name_long(name, namelen); 184 de->not_8x3 = hpfs_is_name_long(name, namelen);
185 de->namelen = namelen; 185 de->namelen = namelen;
186 memcpy(de->name, name, namelen); 186 memcpy(de->name, name, namelen);
187 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size); 187 le32_add_cpu(&d->first_free, d_size);
188 return de; 188 return de;
189} 189}
190 190
@@ -314,7 +314,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); 314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
315 de = de_next_de(de); 315 de = de_next_de(de);
316 memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de); 316 memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de);
317 nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20)); 317 le32_add_cpu(&nd->first_free, -((char *)de - (char *)nd - 20));
318 memcpy(d, nd, le32_to_cpu(nd->first_free)); 318 memcpy(d, nd, le32_to_cpu(nd->first_free));
319 for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); 319 for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos);
320 fix_up_ptrs(i->i_sb, ad); 320 fix_up_ptrs(i->i_sb, ad);
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
474 hpfs_brelse4(&qbh); 474 hpfs_brelse4(&qbh);
475 return 0; 475 return 0;
476 } 476 }
477 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); 477 le32_add_cpu(&dnode->first_free, -4);
478 de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); 478 le16_add_cpu(&de->length, -4);
479 de->down = 0; 479 de->down = 0;
480 hpfs_mark_4buffers_dirty(&qbh); 480 hpfs_mark_4buffers_dirty(&qbh);
481 dno = up; 481 dno = up;
@@ -570,8 +570,8 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
570 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); 570 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p);
571 if (!down) { 571 if (!down) {
572 de->down = 0; 572 de->down = 0;
573 de->length = cpu_to_le16(le16_to_cpu(de->length) - 4); 573 le16_add_cpu(&de->length, -4);
574 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4); 574 le32_add_cpu(&dnode->first_free, -4);
575 memmove(de_next_de(de), (char *)de_next_de(de) + 4, 575 memmove(de_next_de(de), (char *)de_next_de(de) + 4,
576 (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de)); 576 (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de));
577 } else { 577 } else {
@@ -647,14 +647,14 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
647 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); 647 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
648 printk("HPFS: warning: goin'on\n"); 648 printk("HPFS: warning: goin'on\n");
649 } 649 }
650 del->length = cpu_to_le16(le16_to_cpu(del->length) + 4); 650 le16_add_cpu(&del->length, 4);
651 del->down = 1; 651 del->down = 1;
652 d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4); 652 le32_add_cpu(&d1->first_free, 4);
653 } 653 }
654 if (dlp && !down) { 654 if (dlp && !down) {
655 del->length = cpu_to_le16(le16_to_cpu(del->length) - 4); 655 le16_add_cpu(&del->length, -4);
656 del->down = 0; 656 del->down = 0;
657 d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); 657 le32_add_cpu(&d1->first_free, -4);
658 } else if (down) 658 } else if (down)
659 *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); 659 *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down);
660 } else goto endm; 660 } else goto endm;
@@ -668,9 +668,9 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
668 memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length)); 668 memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length));
669 hpfs_delete_de(i->i_sb, dnode, de_prev); 669 hpfs_delete_de(i->i_sb, dnode, de_prev);
670 if (!de_prev->down) { 670 if (!de_prev->down) {
671 de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4); 671 le16_add_cpu(&de_prev->length, 4);
672 de_prev->down = 1; 672 de_prev->down = 1;
673 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); 673 le32_add_cpu(&dnode->first_free, 4);
674 } 674 }
675 *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); 675 *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown);
676 hpfs_mark_4buffers_dirty(&qbh); 676 hpfs_mark_4buffers_dirty(&qbh);
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index ac1ead194db5..7102aaecc244 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -63,8 +63,8 @@ struct hpfs_sb_info {
63 unsigned sb_dmap; /* sector number of dnode bit map */ 63 unsigned sb_dmap; /* sector number of dnode bit map */
64 unsigned sb_n_free; /* free blocks for statfs, or -1 */ 64 unsigned sb_n_free; /* free blocks for statfs, or -1 */
65 unsigned sb_n_free_dnodes; /* free dnodes for statfs, or -1 */ 65 unsigned sb_n_free_dnodes; /* free dnodes for statfs, or -1 */
66 uid_t sb_uid; /* uid from mount options */ 66 kuid_t sb_uid; /* uid from mount options */
67 gid_t sb_gid; /* gid from mount options */ 67 kgid_t sb_gid; /* gid from mount options */
68 umode_t sb_mode; /* mode from mount options */ 68 umode_t sb_mode; /* mode from mount options */
69 unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ 69 unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */
70 unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ 70 unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index ed671e0ea784..804a9a842cbc 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/user_namespace.h>
10#include "hpfs_fn.h" 11#include "hpfs_fn.h"
11 12
12void hpfs_init_inode(struct inode *i) 13void hpfs_init_inode(struct inode *i)
@@ -60,14 +61,14 @@ void hpfs_read_inode(struct inode *i)
60 if (hpfs_sb(i->i_sb)->sb_eas) { 61 if (hpfs_sb(i->i_sb)->sb_eas) {
61 if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) { 62 if ((ea = hpfs_get_ea(i->i_sb, fnode, "UID", &ea_size))) {
62 if (ea_size == 2) { 63 if (ea_size == 2) {
63 i->i_uid = le16_to_cpu(*(__le16*)ea); 64 i_uid_write(i, le16_to_cpu(*(__le16*)ea));
64 hpfs_inode->i_ea_uid = 1; 65 hpfs_inode->i_ea_uid = 1;
65 } 66 }
66 kfree(ea); 67 kfree(ea);
67 } 68 }
68 if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) { 69 if ((ea = hpfs_get_ea(i->i_sb, fnode, "GID", &ea_size))) {
69 if (ea_size == 2) { 70 if (ea_size == 2) {
70 i->i_gid = le16_to_cpu(*(__le16*)ea); 71 i_gid_write(i, le16_to_cpu(*(__le16*)ea));
71 hpfs_inode->i_ea_gid = 1; 72 hpfs_inode->i_ea_gid = 1;
72 } 73 }
73 kfree(ea); 74 kfree(ea);
@@ -149,13 +150,13 @@ static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
149 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
150 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) { 151 } else*/ if (hpfs_sb(i->i_sb)->sb_eas >= 2) {
151 __le32 ea; 152 __le32 ea;
152 if ((i->i_uid != hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) { 153 if (!uid_eq(i->i_uid, hpfs_sb(i->i_sb)->sb_uid) || hpfs_inode->i_ea_uid) {
153 ea = cpu_to_le32(i->i_uid); 154 ea = cpu_to_le32(i_uid_read(i));
154 hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2); 155 hpfs_set_ea(i, fnode, "UID", (char*)&ea, 2);
155 hpfs_inode->i_ea_uid = 1; 156 hpfs_inode->i_ea_uid = 1;
156 } 157 }
157 if ((i->i_gid != hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) { 158 if (!gid_eq(i->i_gid, hpfs_sb(i->i_sb)->sb_gid) || hpfs_inode->i_ea_gid) {
158 ea = cpu_to_le32(i->i_gid); 159 ea = cpu_to_le32(i_gid_read(i));
159 hpfs_set_ea(i, fnode, "GID", (char *)&ea, 2); 160 hpfs_set_ea(i, fnode, "GID", (char *)&ea, 2);
160 hpfs_inode->i_ea_gid = 1; 161 hpfs_inode->i_ea_gid = 1;
161 } 162 }
@@ -261,9 +262,11 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
261 hpfs_lock(inode->i_sb); 262 hpfs_lock(inode->i_sb);
262 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) 263 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
263 goto out_unlock; 264 goto out_unlock;
264 if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000) 265 if ((attr->ia_valid & ATTR_UID) &&
266 from_kuid(&init_user_ns, attr->ia_uid) >= 0x10000)
265 goto out_unlock; 267 goto out_unlock;
266 if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000) 268 if ((attr->ia_valid & ATTR_GID) &&
269 from_kgid(&init_user_ns, attr->ia_gid) >= 0x10000)
267 goto out_unlock; 270 goto out_unlock;
268 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) 271 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
269 goto out_unlock; 272 goto out_unlock;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index bc9082482f68..345713d2f8f3 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -91,8 +91,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
91 inc_nlink(dir); 91 inc_nlink(dir);
92 insert_inode_hash(result); 92 insert_inode_hash(result);
93 93
94 if (result->i_uid != current_fsuid() || 94 if (!uid_eq(result->i_uid, current_fsuid()) ||
95 result->i_gid != current_fsgid() || 95 !gid_eq(result->i_gid, current_fsgid()) ||
96 result->i_mode != (mode | S_IFDIR)) { 96 result->i_mode != (mode | S_IFDIR)) {
97 result->i_uid = current_fsuid(); 97 result->i_uid = current_fsuid();
98 result->i_gid = current_fsgid(); 98 result->i_gid = current_fsgid();
@@ -179,8 +179,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b
179 179
180 insert_inode_hash(result); 180 insert_inode_hash(result);
181 181
182 if (result->i_uid != current_fsuid() || 182 if (!uid_eq(result->i_uid, current_fsuid()) ||
183 result->i_gid != current_fsgid() || 183 !gid_eq(result->i_gid, current_fsgid()) ||
184 result->i_mode != (mode | S_IFREG)) { 184 result->i_mode != (mode | S_IFREG)) {
185 result->i_uid = current_fsuid(); 185 result->i_uid = current_fsuid();
186 result->i_gid = current_fsgid(); 186 result->i_gid = current_fsgid();
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 706a12c083ea..bc28bf077a6a 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -210,6 +210,11 @@ static int init_inodecache(void)
210 210
211static void destroy_inodecache(void) 211static void destroy_inodecache(void)
212{ 212{
213 /*
214 * Make sure all delayed rcu free inodes are flushed before we
215 * destroy cache.
216 */
217 rcu_barrier();
213 kmem_cache_destroy(hpfs_inode_cachep); 218 kmem_cache_destroy(hpfs_inode_cachep);
214} 219}
215 220
@@ -251,7 +256,7 @@ static const match_table_t tokens = {
251 {Opt_err, NULL}, 256 {Opt_err, NULL},
252}; 257};
253 258
254static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, 259static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask,
255 int *lowercase, int *eas, int *chk, int *errs, 260 int *lowercase, int *eas, int *chk, int *errs,
256 int *chkdsk, int *timeshift) 261 int *chkdsk, int *timeshift)
257{ 262{
@@ -276,12 +281,16 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
276 case Opt_uid: 281 case Opt_uid:
277 if (match_int(args, &option)) 282 if (match_int(args, &option))
278 return 0; 283 return 0;
279 *uid = option; 284 *uid = make_kuid(current_user_ns(), option);
285 if (!uid_valid(*uid))
286 return 0;
280 break; 287 break;
281 case Opt_gid: 288 case Opt_gid:
282 if (match_int(args, &option)) 289 if (match_int(args, &option))
283 return 0; 290 return 0;
284 *gid = option; 291 *gid = make_kgid(current_user_ns(), option);
292 if (!gid_valid(*gid))
293 return 0;
285 break; 294 break;
286 case Opt_umask: 295 case Opt_umask:
287 if (match_octal(args, &option)) 296 if (match_octal(args, &option))
@@ -378,8 +387,8 @@ HPFS filesystem options:\n\
378 387
379static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) 388static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
380{ 389{
381 uid_t uid; 390 kuid_t uid;
382 gid_t gid; 391 kgid_t gid;
383 umode_t umask; 392 umode_t umask;
384 int lowercase, eas, chk, errs, chkdsk, timeshift; 393 int lowercase, eas, chk, errs, chkdsk, timeshift;
385 int o; 394 int o;
@@ -455,8 +464,8 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
455 struct hpfs_sb_info *sbi; 464 struct hpfs_sb_info *sbi;
456 struct inode *root; 465 struct inode *root;
457 466
458 uid_t uid; 467 kuid_t uid;
459 gid_t gid; 468 kgid_t gid;
460 umode_t umask; 469 umode_t umask;
461 int lowercase, eas, chk, errs, chkdsk, timeshift; 470 int lowercase, eas, chk, errs, chkdsk, timeshift;
462 471
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8349a899912e..c5bc355d8243 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -42,8 +42,8 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
42static const struct inode_operations hugetlbfs_inode_operations; 42static const struct inode_operations hugetlbfs_inode_operations;
43 43
44struct hugetlbfs_config { 44struct hugetlbfs_config {
45 uid_t uid; 45 kuid_t uid;
46 gid_t gid; 46 kgid_t gid;
47 umode_t mode; 47 umode_t mode;
48 long nr_blocks; 48 long nr_blocks;
49 long nr_inodes; 49 long nr_inodes;
@@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
110 * way when do_mmap_pgoff unwinds (may be important on powerpc 110 * way when do_mmap_pgoff unwinds (may be important on powerpc
111 * and ia64). 111 * and ia64).
112 */ 112 */
113 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 113 vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP;
114 vma->vm_ops = &hugetlb_vm_ops; 114 vma->vm_ops = &hugetlb_vm_ops;
115 115
116 if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) 116 if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
@@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
397} 397}
398 398
399static inline void 399static inline void
400hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 400hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
401{ 401{
402 struct vm_area_struct *vma; 402 struct vm_area_struct *vma;
403 struct prio_tree_iter iter;
404 403
405 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 404 vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
406 unsigned long v_offset; 405 unsigned long v_offset;
407 406
408 /* 407 /*
409 * Can the expression below overflow on 32-bit arches? 408 * Can the expression below overflow on 32-bit arches?
410 * No, because the prio_tree returns us only those vmas 409 * No, because the interval tree returns us only those vmas
411 * which overlap the truncated area starting at pgoff, 410 * which overlap the truncated area starting at pgoff,
412 * and no vma on a 32-bit arch can span beyond the 4GB. 411 * and no vma on a 32-bit arch can span beyond the 4GB.
413 */ 412 */
@@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
432 431
433 i_size_write(inode, offset); 432 i_size_write(inode, offset);
434 mutex_lock(&mapping->i_mmap_mutex); 433 mutex_lock(&mapping->i_mmap_mutex);
435 if (!prio_tree_empty(&mapping->i_mmap)) 434 if (!RB_EMPTY_ROOT(&mapping->i_mmap))
436 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 435 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
437 mutex_unlock(&mapping->i_mmap_mutex); 436 mutex_unlock(&mapping->i_mmap_mutex);
438 truncate_hugepages(inode, offset); 437 truncate_hugepages(inode, offset);
@@ -785,13 +784,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
785 case Opt_uid: 784 case Opt_uid:
786 if (match_int(&args[0], &option)) 785 if (match_int(&args[0], &option))
787 goto bad_val; 786 goto bad_val;
788 pconfig->uid = option; 787 pconfig->uid = make_kuid(current_user_ns(), option);
788 if (!uid_valid(pconfig->uid))
789 goto bad_val;
789 break; 790 break;
790 791
791 case Opt_gid: 792 case Opt_gid:
792 if (match_int(&args[0], &option)) 793 if (match_int(&args[0], &option))
793 goto bad_val; 794 goto bad_val;
794 pconfig->gid = option; 795 pconfig->gid = make_kgid(current_user_ns(), option);
796 if (!gid_valid(pconfig->gid))
797 goto bad_val;
795 break; 798 break;
796 799
797 case Opt_mode: 800 case Opt_mode:
@@ -924,7 +927,9 @@ static struct vfsmount *hugetlbfs_vfsmount;
924 927
925static int can_do_hugetlb_shm(void) 928static int can_do_hugetlb_shm(void)
926{ 929{
927 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 930 kgid_t shm_group;
931 shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group);
932 return capable(CAP_IPC_LOCK) || in_group_p(shm_group);
928} 933}
929 934
930struct file *hugetlb_file_setup(const char *name, unsigned long addr, 935struct file *hugetlb_file_setup(const char *name, unsigned long addr,
@@ -1042,6 +1047,11 @@ static int __init init_hugetlbfs_fs(void)
1042 1047
1043static void __exit exit_hugetlbfs_fs(void) 1048static void __exit exit_hugetlbfs_fs(void)
1044{ 1049{
1050 /*
1051 * Make sure all delayed rcu free inodes are flushed before we
1052 * destroy cache.
1053 */
1054 rcu_barrier();
1045 kmem_cache_destroy(hugetlbfs_inode_cachep); 1055 kmem_cache_destroy(hugetlbfs_inode_cachep);
1046 kern_unmount(hugetlbfs_vfsmount); 1056 kern_unmount(hugetlbfs_vfsmount);
1047 unregister_filesystem(&hugetlbfs_fs_type); 1057 unregister_filesystem(&hugetlbfs_fs_type);
diff --git a/fs/inode.c b/fs/inode.c
index ac8d904b3f16..b03c71957246 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
348 mutex_init(&mapping->i_mmap_mutex); 348 mutex_init(&mapping->i_mmap_mutex);
349 INIT_LIST_HEAD(&mapping->private_list); 349 INIT_LIST_HEAD(&mapping->private_list);
350 spin_lock_init(&mapping->private_lock); 350 spin_lock_init(&mapping->private_lock);
351 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); 351 mapping->i_mmap = RB_ROOT;
352 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 352 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
353} 353}
354EXPORT_SYMBOL(address_space_init_once); 354EXPORT_SYMBOL(address_space_init_once);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29167bebe874..3bdad6d1f268 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -603,21 +603,14 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
603 603
604SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 604SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
605{ 605{
606 struct file *filp; 606 int error;
607 int error = -EBADF; 607 struct fd f = fdget(fd);
608 int fput_needed; 608
609 609 if (!f.file)
610 filp = fget_light(fd, &fput_needed); 610 return -EBADF;
611 if (!filp) 611 error = security_file_ioctl(f.file, cmd, arg);
612 goto out; 612 if (!error)
613 613 error = do_vfs_ioctl(f.file, fd, cmd, arg);
614 error = security_file_ioctl(filp, cmd, arg); 614 fdput(f);
615 if (error)
616 goto out_fput;
617
618 error = do_vfs_ioctl(filp, fd, cmd, arg);
619 out_fput:
620 fput_light(filp, fput_needed);
621 out:
622 return error; 615 return error;
623} 616}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 29037c365ba4..67ce52507d7d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -21,6 +21,7 @@
21#include <linux/cdrom.h> 21#include <linux/cdrom.h>
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/mpage.h> 23#include <linux/mpage.h>
24#include <linux/user_namespace.h>
24 25
25#include "isofs.h" 26#include "isofs.h"
26#include "zisofs.h" 27#include "zisofs.h"
@@ -114,6 +115,11 @@ static int init_inodecache(void)
114 115
115static void destroy_inodecache(void) 116static void destroy_inodecache(void)
116{ 117{
118 /*
119 * Make sure all delayed rcu free inodes are flushed before we
120 * destroy cache.
121 */
122 rcu_barrier();
117 kmem_cache_destroy(isofs_inode_cachep); 123 kmem_cache_destroy(isofs_inode_cachep);
118} 124}
119 125
@@ -171,8 +177,8 @@ struct iso9660_options{
171 unsigned int blocksize; 177 unsigned int blocksize;
172 umode_t fmode; 178 umode_t fmode;
173 umode_t dmode; 179 umode_t dmode;
174 gid_t gid; 180 kgid_t gid;
175 uid_t uid; 181 kuid_t uid;
176 char *iocharset; 182 char *iocharset;
177 /* LVE */ 183 /* LVE */
178 s32 session; 184 s32 session;
@@ -383,8 +389,8 @@ static int parse_options(char *options, struct iso9660_options *popt)
383 popt->fmode = popt->dmode = ISOFS_INVALID_MODE; 389 popt->fmode = popt->dmode = ISOFS_INVALID_MODE;
384 popt->uid_set = 0; 390 popt->uid_set = 0;
385 popt->gid_set = 0; 391 popt->gid_set = 0;
386 popt->gid = 0; 392 popt->gid = GLOBAL_ROOT_GID;
387 popt->uid = 0; 393 popt->uid = GLOBAL_ROOT_UID;
388 popt->iocharset = NULL; 394 popt->iocharset = NULL;
389 popt->utf8 = 0; 395 popt->utf8 = 0;
390 popt->overriderockperm = 0; 396 popt->overriderockperm = 0;
@@ -460,13 +466,17 @@ static int parse_options(char *options, struct iso9660_options *popt)
460 case Opt_uid: 466 case Opt_uid:
461 if (match_int(&args[0], &option)) 467 if (match_int(&args[0], &option))
462 return 0; 468 return 0;
463 popt->uid = option; 469 popt->uid = make_kuid(current_user_ns(), option);
470 if (!uid_valid(popt->uid))
471 return 0;
464 popt->uid_set = 1; 472 popt->uid_set = 1;
465 break; 473 break;
466 case Opt_gid: 474 case Opt_gid:
467 if (match_int(&args[0], &option)) 475 if (match_int(&args[0], &option))
468 return 0; 476 return 0;
469 popt->gid = option; 477 popt->gid = make_kgid(current_user_ns(), option);
478 if (!gid_valid(popt->gid))
479 return 0;
470 popt->gid_set = 1; 480 popt->gid_set = 1;
471 break; 481 break;
472 case Opt_mode: 482 case Opt_mode:
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 3620ad1ea9bc..99167238518d 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -52,8 +52,8 @@ struct isofs_sb_info {
52 52
53 umode_t s_fmode; 53 umode_t s_fmode;
54 umode_t s_dmode; 54 umode_t s_dmode;
55 gid_t s_gid; 55 kgid_t s_gid;
56 uid_t s_uid; 56 kuid_t s_uid;
57 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
58}; 58};
59 59
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 70e79d0c756a..c0bf42472e40 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -364,8 +364,8 @@ repeat:
364 case SIG('P', 'X'): 364 case SIG('P', 'X'):
365 inode->i_mode = isonum_733(rr->u.PX.mode); 365 inode->i_mode = isonum_733(rr->u.PX.mode);
366 set_nlink(inode, isonum_733(rr->u.PX.n_links)); 366 set_nlink(inode, isonum_733(rr->u.PX.n_links));
367 inode->i_uid = isonum_733(rr->u.PX.uid); 367 i_uid_write(inode, isonum_733(rr->u.PX.uid));
368 inode->i_gid = isonum_733(rr->u.PX.gid); 368 i_gid_write(inode, isonum_733(rr->u.PX.gid));
369 break; 369 break;
370 case SIG('P', 'N'): 370 case SIG('P', 'N'):
371 { 371 {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 52c15c776029..86b39b167c23 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -86,7 +86,12 @@ nope:
86static void release_data_buffer(struct buffer_head *bh) 86static void release_data_buffer(struct buffer_head *bh)
87{ 87{
88 if (buffer_freed(bh)) { 88 if (buffer_freed(bh)) {
89 WARN_ON_ONCE(buffer_dirty(bh));
89 clear_buffer_freed(bh); 90 clear_buffer_freed(bh);
91 clear_buffer_mapped(bh);
92 clear_buffer_new(bh);
93 clear_buffer_req(bh);
94 bh->b_bdev = NULL;
90 release_buffer_page(bh); 95 release_buffer_page(bh);
91 } else 96 } else
92 put_bh(bh); 97 put_bh(bh);
@@ -866,17 +871,35 @@ restart_loop:
866 * there's no point in keeping a checkpoint record for 871 * there's no point in keeping a checkpoint record for
867 * it. */ 872 * it. */
868 873
869 /* A buffer which has been freed while still being 874 /*
870 * journaled by a previous transaction may end up still 875 * A buffer which has been freed while still being journaled by
871 * being dirty here, but we want to avoid writing back 876 * a previous transaction.
872 * that buffer in the future after the "add to orphan" 877 */
873 * operation been committed, That's not only a performance 878 if (buffer_freed(bh)) {
874 * gain, it also stops aliasing problems if the buffer is 879 /*
875 * left behind for writeback and gets reallocated for another 880 * If the running transaction is the one containing
876 * use in a different page. */ 881 * "add to orphan" operation (b_next_transaction !=
877 if (buffer_freed(bh) && !jh->b_next_transaction) { 882 * NULL), we have to wait for that transaction to
878 clear_buffer_freed(bh); 883 * commit before we can really get rid of the buffer.
879 clear_buffer_jbddirty(bh); 884 * So just clear b_modified to not confuse transaction
885 * credit accounting and refile the buffer to
886 * BJ_Forget of the running transaction. If the just
887 * committed transaction contains "add to orphan"
888 * operation, we can completely invalidate the buffer
889 * now. We are rather throughout in that since the
890 * buffer may be still accessible when blocksize <
891 * pagesize and it is attached to the last partial
892 * page.
893 */
894 jh->b_modified = 0;
895 if (!jh->b_next_transaction) {
896 clear_buffer_freed(bh);
897 clear_buffer_jbddirty(bh);
898 clear_buffer_mapped(bh);
899 clear_buffer_new(bh);
900 clear_buffer_req(bh);
901 bh->b_bdev = NULL;
902 }
880 } 903 }
881 904
882 if (buffer_jbddirty(bh)) { 905 if (buffer_jbddirty(bh)) {
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 09357508ec9a..a2862339323b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal)
1113 1113
1114 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1114 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1115 spin_lock(&journal->j_state_lock); 1115 spin_lock(&journal->j_state_lock);
1116 /* Is it already empty? */
1117 if (sb->s_start == 0) {
1118 spin_unlock(&journal->j_state_lock);
1119 return;
1120 }
1116 jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", 1121 jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n",
1117 journal->j_tail_sequence); 1122 journal->j_tail_sequence);
1118 1123
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index febc10db5ced..78b7f84241d4 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1843,15 +1843,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1843 * We're outside-transaction here. Either or both of j_running_transaction 1843 * We're outside-transaction here. Either or both of j_running_transaction
1844 * and j_committing_transaction may be NULL. 1844 * and j_committing_transaction may be NULL.
1845 */ 1845 */
1846static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1846static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1847 int partial_page)
1847{ 1848{
1848 transaction_t *transaction; 1849 transaction_t *transaction;
1849 struct journal_head *jh; 1850 struct journal_head *jh;
1850 int may_free = 1; 1851 int may_free = 1;
1851 int ret;
1852 1852
1853 BUFFER_TRACE(bh, "entry"); 1853 BUFFER_TRACE(bh, "entry");
1854 1854
1855retry:
1855 /* 1856 /*
1856 * It is safe to proceed here without the j_list_lock because the 1857 * It is safe to proceed here without the j_list_lock because the
1857 * buffers cannot be stolen by try_to_free_buffers as long as we are 1858 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1879,10 +1880,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1879 * clear the buffer dirty bit at latest at the moment when the 1880 * clear the buffer dirty bit at latest at the moment when the
1880 * transaction marking the buffer as freed in the filesystem 1881 * transaction marking the buffer as freed in the filesystem
1881 * structures is committed because from that moment on the 1882 * structures is committed because from that moment on the
1882 * buffer can be reallocated and used by a different page. 1883 * block can be reallocated and used by a different page.
1883 * Since the block hasn't been freed yet but the inode has 1884 * Since the block hasn't been freed yet but the inode has
1884 * already been added to orphan list, it is safe for us to add 1885 * already been added to orphan list, it is safe for us to add
1885 * the buffer to BJ_Forget list of the newest transaction. 1886 * the buffer to BJ_Forget list of the newest transaction.
1887 *
1888 * Also we have to clear buffer_mapped flag of a truncated buffer
1889 * because the buffer_head may be attached to the page straddling
1890 * i_size (can happen only when blocksize < pagesize) and thus the
1891 * buffer_head can be reused when the file is extended again. So we end
1892 * up keeping around invalidated buffers attached to transactions'
1893 * BJ_Forget list just to stop checkpointing code from cleaning up
1894 * the transaction this buffer was modified in.
1886 */ 1895 */
1887 transaction = jh->b_transaction; 1896 transaction = jh->b_transaction;
1888 if (transaction == NULL) { 1897 if (transaction == NULL) {
@@ -1909,13 +1918,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1909 * committed, the buffer won't be needed any 1918 * committed, the buffer won't be needed any
1910 * longer. */ 1919 * longer. */
1911 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1920 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1912 ret = __dispose_buffer(jh, 1921 may_free = __dispose_buffer(jh,
1913 journal->j_running_transaction); 1922 journal->j_running_transaction);
1914 journal_put_journal_head(jh); 1923 goto zap_buffer;
1915 spin_unlock(&journal->j_list_lock);
1916 jbd_unlock_bh_state(bh);
1917 spin_unlock(&journal->j_state_lock);
1918 return ret;
1919 } else { 1924 } else {
1920 /* There is no currently-running transaction. So the 1925 /* There is no currently-running transaction. So the
1921 * orphan record which we wrote for this file must have 1926 * orphan record which we wrote for this file must have
@@ -1923,13 +1928,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1923 * the committing transaction, if it exists. */ 1928 * the committing transaction, if it exists. */
1924 if (journal->j_committing_transaction) { 1929 if (journal->j_committing_transaction) {
1925 JBUFFER_TRACE(jh, "give to committing trans"); 1930 JBUFFER_TRACE(jh, "give to committing trans");
1926 ret = __dispose_buffer(jh, 1931 may_free = __dispose_buffer(jh,
1927 journal->j_committing_transaction); 1932 journal->j_committing_transaction);
1928 journal_put_journal_head(jh); 1933 goto zap_buffer;
1929 spin_unlock(&journal->j_list_lock);
1930 jbd_unlock_bh_state(bh);
1931 spin_unlock(&journal->j_state_lock);
1932 return ret;
1933 } else { 1934 } else {
1934 /* The orphan record's transaction has 1935 /* The orphan record's transaction has
1935 * committed. We can cleanse this buffer */ 1936 * committed. We can cleanse this buffer */
@@ -1950,10 +1951,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1950 } 1951 }
1951 /* 1952 /*
1952 * The buffer is committing, we simply cannot touch 1953 * The buffer is committing, we simply cannot touch
1953 * it. So we just set j_next_transaction to the 1954 * it. If the page is straddling i_size we have to wait
1954 * running transaction (if there is one) and mark 1955 * for commit and try again.
1955 * buffer as freed so that commit code knows it should 1956 */
1956 * clear dirty bits when it is done with the buffer. 1957 if (partial_page) {
1958 tid_t tid = journal->j_committing_transaction->t_tid;
1959
1960 journal_put_journal_head(jh);
1961 spin_unlock(&journal->j_list_lock);
1962 jbd_unlock_bh_state(bh);
1963 spin_unlock(&journal->j_state_lock);
1964 log_wait_commit(journal, tid);
1965 goto retry;
1966 }
1967 /*
1968 * OK, buffer won't be reachable after truncate. We just set
1969 * j_next_transaction to the running transaction (if there is
1970 * one) and mark buffer as freed so that commit code knows it
1971 * should clear dirty bits when it is done with the buffer.
1957 */ 1972 */
1958 set_buffer_freed(bh); 1973 set_buffer_freed(bh);
1959 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1974 if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1976,6 +1991,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1976 } 1991 }
1977 1992
1978zap_buffer: 1993zap_buffer:
1994 /*
1995 * This is tricky. Although the buffer is truncated, it may be reused
1996 * if blocksize < pagesize and it is attached to the page straddling
1997 * EOF. Since the buffer might have been added to BJ_Forget list of the
1998 * running transaction, journal_get_write_access() won't clear
1999 * b_modified and credit accounting gets confused. So clear b_modified
2000 * here. */
2001 jh->b_modified = 0;
1979 journal_put_journal_head(jh); 2002 journal_put_journal_head(jh);
1980zap_buffer_no_jh: 2003zap_buffer_no_jh:
1981 spin_unlock(&journal->j_list_lock); 2004 spin_unlock(&journal->j_list_lock);
@@ -2024,7 +2047,8 @@ void journal_invalidatepage(journal_t *journal,
2024 if (offset <= curr_off) { 2047 if (offset <= curr_off) {
2025 /* This block is wholly outside the truncation point */ 2048 /* This block is wholly outside the truncation point */
2026 lock_buffer(bh); 2049 lock_buffer(bh);
2027 may_free &= journal_unmap_buffer(journal, bh); 2050 may_free &= journal_unmap_buffer(journal, bh,
2051 offset > 0);
2028 unlock_buffer(bh); 2052 unlock_buffer(bh);
2029 } 2053 }
2030 curr_off = next_off; 2054 curr_off = next_off;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index af5280fb579b..3091d42992f0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1014,17 +1014,35 @@ restart_loop:
1014 * there's no point in keeping a checkpoint record for 1014 * there's no point in keeping a checkpoint record for
1015 * it. */ 1015 * it. */
1016 1016
1017 /* A buffer which has been freed while still being 1017 /*
1018 * journaled by a previous transaction may end up still 1018 * A buffer which has been freed while still being journaled by
1019 * being dirty here, but we want to avoid writing back 1019 * a previous transaction.
1020 * that buffer in the future after the "add to orphan" 1020 */
1021 * operation been committed, That's not only a performance 1021 if (buffer_freed(bh)) {
1022 * gain, it also stops aliasing problems if the buffer is 1022 /*
1023 * left behind for writeback and gets reallocated for another 1023 * If the running transaction is the one containing
1024 * use in a different page. */ 1024 * "add to orphan" operation (b_next_transaction !=
1025 if (buffer_freed(bh) && !jh->b_next_transaction) { 1025 * NULL), we have to wait for that transaction to
1026 clear_buffer_freed(bh); 1026 * commit before we can really get rid of the buffer.
1027 clear_buffer_jbddirty(bh); 1027 * So just clear b_modified to not confuse transaction
1028 * credit accounting and refile the buffer to
1029 * BJ_Forget of the running transaction. If the just
1030 * committed transaction contains "add to orphan"
1031 * operation, we can completely invalidate the buffer
1032 * now. We are rather through in that since the
1033 * buffer may be still accessible when blocksize <
1034 * pagesize and it is attached to the last partial
1035 * page.
1036 */
1037 jh->b_modified = 0;
1038 if (!jh->b_next_transaction) {
1039 clear_buffer_freed(bh);
1040 clear_buffer_jbddirty(bh);
1041 clear_buffer_mapped(bh);
1042 clear_buffer_new(bh);
1043 clear_buffer_req(bh);
1044 bh->b_bdev = NULL;
1045 }
1028 } 1046 }
1029 1047
1030 if (buffer_jbddirty(bh)) { 1048 if (buffer_jbddirty(bh)) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8625da27eccf..484b8d1c6cb6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal)
1354 1354
1355 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1355 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1356 read_lock(&journal->j_state_lock); 1356 read_lock(&journal->j_state_lock);
1357 /* Is it already empty? */
1358 if (sb->s_start == 0) {
1359 read_unlock(&journal->j_state_lock);
1360 return;
1361 }
1357 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1362 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1358 journal->j_tail_sequence); 1363 journal->j_tail_sequence);
1359 1364
@@ -1377,7 +1382,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
1377 * Update a journal's errno. Write updated superblock to disk waiting for IO 1382 * Update a journal's errno. Write updated superblock to disk waiting for IO
1378 * to complete. 1383 * to complete.
1379 */ 1384 */
1380static void jbd2_journal_update_sb_errno(journal_t *journal) 1385void jbd2_journal_update_sb_errno(journal_t *journal)
1381{ 1386{
1382 journal_superblock_t *sb = journal->j_superblock; 1387 journal_superblock_t *sb = journal->j_superblock;
1383 1388
@@ -1390,6 +1395,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
1390 1395
1391 jbd2_write_superblock(journal, WRITE_SYNC); 1396 jbd2_write_superblock(journal, WRITE_SYNC);
1392} 1397}
1398EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
1393 1399
1394/* 1400/*
1395 * Read the superblock for a given journal, performing initial 1401 * Read the superblock for a given journal, performing initial
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 0131e4362534..626846bac32f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal)
289 if (!err) 289 if (!err)
290 err = err2; 290 err = err2;
291 /* Make sure all replayed data is on permanent storage */ 291 /* Make sure all replayed data is on permanent storage */
292 if (journal->j_flags & JBD2_BARRIER) 292 if (journal->j_flags & JBD2_BARRIER) {
293 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 293 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
294 if (!err)
295 err = err2;
296 }
294 return err; 297 return err;
295} 298}
296 299
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index fb1ab9533b67..a74ba4659549 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1841 * We're outside-transaction here. Either or both of j_running_transaction 1841 * We're outside-transaction here. Either or both of j_running_transaction
1842 * and j_committing_transaction may be NULL. 1842 * and j_committing_transaction may be NULL.
1843 */ 1843 */
1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1845 int partial_page)
1845{ 1846{
1846 transaction_t *transaction; 1847 transaction_t *transaction;
1847 struct journal_head *jh; 1848 struct journal_head *jh;
1848 int may_free = 1; 1849 int may_free = 1;
1849 int ret;
1850 1850
1851 BUFFER_TRACE(bh, "entry"); 1851 BUFFER_TRACE(bh, "entry");
1852 1852
1853retry:
1853 /* 1854 /*
1854 * It is safe to proceed here without the j_list_lock because the 1855 * It is safe to proceed here without the j_list_lock because the
1855 * buffers cannot be stolen by try_to_free_buffers as long as we are 1856 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1878 * clear the buffer dirty bit at latest at the moment when the 1879 * clear the buffer dirty bit at latest at the moment when the
1879 * transaction marking the buffer as freed in the filesystem 1880 * transaction marking the buffer as freed in the filesystem
1880 * structures is committed because from that moment on the 1881 * structures is committed because from that moment on the
1881 * buffer can be reallocated and used by a different page. 1882 * block can be reallocated and used by a different page.
1882 * Since the block hasn't been freed yet but the inode has 1883 * Since the block hasn't been freed yet but the inode has
1883 * already been added to orphan list, it is safe for us to add 1884 * already been added to orphan list, it is safe for us to add
1884 * the buffer to BJ_Forget list of the newest transaction. 1885 * the buffer to BJ_Forget list of the newest transaction.
1886 *
1887 * Also we have to clear buffer_mapped flag of a truncated buffer
1888 * because the buffer_head may be attached to the page straddling
1889 * i_size (can happen only when blocksize < pagesize) and thus the
1890 * buffer_head can be reused when the file is extended again. So we end
1891 * up keeping around invalidated buffers attached to transactions'
1892 * BJ_Forget list just to stop checkpointing code from cleaning up
1893 * the transaction this buffer was modified in.
1885 */ 1894 */
1886 transaction = jh->b_transaction; 1895 transaction = jh->b_transaction;
1887 if (transaction == NULL) { 1896 if (transaction == NULL) {
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1908 * committed, the buffer won't be needed any 1917 * committed, the buffer won't be needed any
1909 * longer. */ 1918 * longer. */
1910 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1919 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1911 ret = __dispose_buffer(jh, 1920 may_free = __dispose_buffer(jh,
1912 journal->j_running_transaction); 1921 journal->j_running_transaction);
1913 jbd2_journal_put_journal_head(jh); 1922 goto zap_buffer;
1914 spin_unlock(&journal->j_list_lock);
1915 jbd_unlock_bh_state(bh);
1916 write_unlock(&journal->j_state_lock);
1917 return ret;
1918 } else { 1923 } else {
1919 /* There is no currently-running transaction. So the 1924 /* There is no currently-running transaction. So the
1920 * orphan record which we wrote for this file must have 1925 * orphan record which we wrote for this file must have
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1922 * the committing transaction, if it exists. */ 1927 * the committing transaction, if it exists. */
1923 if (journal->j_committing_transaction) { 1928 if (journal->j_committing_transaction) {
1924 JBUFFER_TRACE(jh, "give to committing trans"); 1929 JBUFFER_TRACE(jh, "give to committing trans");
1925 ret = __dispose_buffer(jh, 1930 may_free = __dispose_buffer(jh,
1926 journal->j_committing_transaction); 1931 journal->j_committing_transaction);
1927 jbd2_journal_put_journal_head(jh); 1932 goto zap_buffer;
1928 spin_unlock(&journal->j_list_lock);
1929 jbd_unlock_bh_state(bh);
1930 write_unlock(&journal->j_state_lock);
1931 return ret;
1932 } else { 1933 } else {
1933 /* The orphan record's transaction has 1934 /* The orphan record's transaction has
1934 * committed. We can cleanse this buffer */ 1935 * committed. We can cleanse this buffer */
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1940 JBUFFER_TRACE(jh, "on committing transaction"); 1941 JBUFFER_TRACE(jh, "on committing transaction");
1941 /* 1942 /*
1942 * The buffer is committing, we simply cannot touch 1943 * The buffer is committing, we simply cannot touch
1943 * it. So we just set j_next_transaction to the 1944 * it. If the page is straddling i_size we have to wait
1944 * running transaction (if there is one) and mark 1945 * for commit and try again.
1945 * buffer as freed so that commit code knows it should 1946 */
1946 * clear dirty bits when it is done with the buffer. 1947 if (partial_page) {
1948 tid_t tid = journal->j_committing_transaction->t_tid;
1949
1950 jbd2_journal_put_journal_head(jh);
1951 spin_unlock(&journal->j_list_lock);
1952 jbd_unlock_bh_state(bh);
1953 write_unlock(&journal->j_state_lock);
1954 jbd2_log_wait_commit(journal, tid);
1955 goto retry;
1956 }
1957 /*
1958 * OK, buffer won't be reachable after truncate. We just set
1959 * j_next_transaction to the running transaction (if there is
1960 * one) and mark buffer as freed so that commit code knows it
1961 * should clear dirty bits when it is done with the buffer.
1947 */ 1962 */
1948 set_buffer_freed(bh); 1963 set_buffer_freed(bh);
1949 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1964 if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1966 } 1981 }
1967 1982
1968zap_buffer: 1983zap_buffer:
1984 /*
1985 * This is tricky. Although the buffer is truncated, it may be reused
1986 * if blocksize < pagesize and it is attached to the page straddling
1987 * EOF. Since the buffer might have been added to BJ_Forget list of the
1988 * running transaction, journal_get_write_access() won't clear
1989 * b_modified and credit accounting gets confused. So clear b_modified
1990 * here.
1991 */
1992 jh->b_modified = 0;
1969 jbd2_journal_put_journal_head(jh); 1993 jbd2_journal_put_journal_head(jh);
1970zap_buffer_no_jh: 1994zap_buffer_no_jh:
1971 spin_unlock(&journal->j_list_lock); 1995 spin_unlock(&journal->j_list_lock);
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2017 if (offset <= curr_off) { 2041 if (offset <= curr_off) {
2018 /* This block is wholly outside the truncation point */ 2042 /* This block is wholly outside the truncation point */
2019 lock_buffer(bh); 2043 lock_buffer(bh);
2020 may_free &= journal_unmap_buffer(journal, bh); 2044 may_free &= journal_unmap_buffer(journal, bh,
2045 offset > 0);
2021 unlock_buffer(bh); 2046 unlock_buffer(bh);
2022 } 2047 }
2023 curr_off = next_off; 2048 curr_off = next_off;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 922f146e4235..223283c30111 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -94,15 +94,23 @@ static struct posix_acl *jffs2_acl_from_medium(void *value, size_t size)
94 case ACL_MASK: 94 case ACL_MASK:
95 case ACL_OTHER: 95 case ACL_OTHER:
96 value += sizeof(struct jffs2_acl_entry_short); 96 value += sizeof(struct jffs2_acl_entry_short);
97 acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
98 break; 97 break;
99 98
100 case ACL_USER: 99 case ACL_USER:
100 value += sizeof(struct jffs2_acl_entry);
101 if (value > end)
102 goto fail;
103 acl->a_entries[i].e_uid =
104 make_kuid(&init_user_ns,
105 je32_to_cpu(entry->e_id));
106 break;
101 case ACL_GROUP: 107 case ACL_GROUP:
102 value += sizeof(struct jffs2_acl_entry); 108 value += sizeof(struct jffs2_acl_entry);
103 if (value > end) 109 if (value > end)
104 goto fail; 110 goto fail;
105 acl->a_entries[i].e_id = je32_to_cpu(entry->e_id); 111 acl->a_entries[i].e_gid =
112 make_kgid(&init_user_ns,
113 je32_to_cpu(entry->e_id));
106 break; 114 break;
107 115
108 default: 116 default:
@@ -131,13 +139,19 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
131 header->a_version = cpu_to_je32(JFFS2_ACL_VERSION); 139 header->a_version = cpu_to_je32(JFFS2_ACL_VERSION);
132 e = header + 1; 140 e = header + 1;
133 for (i=0; i < acl->a_count; i++) { 141 for (i=0; i < acl->a_count; i++) {
142 const struct posix_acl_entry *acl_e = &acl->a_entries[i];
134 entry = e; 143 entry = e;
135 entry->e_tag = cpu_to_je16(acl->a_entries[i].e_tag); 144 entry->e_tag = cpu_to_je16(acl_e->e_tag);
136 entry->e_perm = cpu_to_je16(acl->a_entries[i].e_perm); 145 entry->e_perm = cpu_to_je16(acl_e->e_perm);
137 switch(acl->a_entries[i].e_tag) { 146 switch(acl_e->e_tag) {
138 case ACL_USER: 147 case ACL_USER:
148 entry->e_id = cpu_to_je32(
149 from_kuid(&init_user_ns, acl_e->e_uid));
150 e += sizeof(struct jffs2_acl_entry);
151 break;
139 case ACL_GROUP: 152 case ACL_GROUP:
140 entry->e_id = cpu_to_je32(acl->a_entries[i].e_id); 153 entry->e_id = cpu_to_je32(
154 from_kgid(&init_user_ns, acl_e->e_gid));
141 e += sizeof(struct jffs2_acl_entry); 155 e += sizeof(struct jffs2_acl_entry);
142 break; 156 break;
143 157
@@ -363,7 +377,7 @@ static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
363 return PTR_ERR(acl); 377 return PTR_ERR(acl);
364 if (!acl) 378 if (!acl)
365 return -ENODATA; 379 return -ENODATA;
366 rc = posix_acl_to_xattr(acl, buffer, size); 380 rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
367 posix_acl_release(acl); 381 posix_acl_release(acl);
368 382
369 return rc; 383 return rc;
@@ -381,7 +395,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
381 return -EPERM; 395 return -EPERM;
382 396
383 if (value) { 397 if (value) {
384 acl = posix_acl_from_xattr(value, size); 398 acl = posix_acl_from_xattr(&init_user_ns, value, size);
385 if (IS_ERR(acl)) 399 if (IS_ERR(acl))
386 return PTR_ERR(acl); 400 return PTR_ERR(acl);
387 if (acl) { 401 if (acl) {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index db3889ba8818..60ef3fb707ff 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -175,8 +175,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
175 ri.ino = cpu_to_je32(f->inocache->ino); 175 ri.ino = cpu_to_je32(f->inocache->ino);
176 ri.version = cpu_to_je32(++f->highest_version); 176 ri.version = cpu_to_je32(++f->highest_version);
177 ri.mode = cpu_to_jemode(inode->i_mode); 177 ri.mode = cpu_to_jemode(inode->i_mode);
178 ri.uid = cpu_to_je16(inode->i_uid); 178 ri.uid = cpu_to_je16(i_uid_read(inode));
179 ri.gid = cpu_to_je16(inode->i_gid); 179 ri.gid = cpu_to_je16(i_gid_read(inode));
180 ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); 180 ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
181 ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds()); 181 ri.atime = ri.ctime = ri.mtime = cpu_to_je32(get_seconds());
182 ri.offset = cpu_to_je32(inode->i_size); 182 ri.offset = cpu_to_je32(inode->i_size);
@@ -283,8 +283,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
283 /* Set the fields that the generic jffs2_write_inode_range() code can't find */ 283 /* Set the fields that the generic jffs2_write_inode_range() code can't find */
284 ri->ino = cpu_to_je32(inode->i_ino); 284 ri->ino = cpu_to_je32(inode->i_ino);
285 ri->mode = cpu_to_jemode(inode->i_mode); 285 ri->mode = cpu_to_jemode(inode->i_mode);
286 ri->uid = cpu_to_je16(inode->i_uid); 286 ri->uid = cpu_to_je16(i_uid_read(inode));
287 ri->gid = cpu_to_je16(inode->i_gid); 287 ri->gid = cpu_to_je16(i_gid_read(inode));
288 ri->isize = cpu_to_je32((uint32_t)inode->i_size); 288 ri->isize = cpu_to_je32((uint32_t)inode->i_size);
289 ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds()); 289 ri->atime = ri->ctime = ri->mtime = cpu_to_je32(get_seconds());
290 290
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3d3092eda811..fe3c0527545f 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -99,8 +99,10 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
99 ri->ino = cpu_to_je32(inode->i_ino); 99 ri->ino = cpu_to_je32(inode->i_ino);
100 ri->version = cpu_to_je32(++f->highest_version); 100 ri->version = cpu_to_je32(++f->highest_version);
101 101
102 ri->uid = cpu_to_je16((ivalid & ATTR_UID)?iattr->ia_uid:inode->i_uid); 102 ri->uid = cpu_to_je16((ivalid & ATTR_UID)?
103 ri->gid = cpu_to_je16((ivalid & ATTR_GID)?iattr->ia_gid:inode->i_gid); 103 from_kuid(&init_user_ns, iattr->ia_uid):i_uid_read(inode));
104 ri->gid = cpu_to_je16((ivalid & ATTR_GID)?
105 from_kgid(&init_user_ns, iattr->ia_gid):i_gid_read(inode));
104 106
105 if (ivalid & ATTR_MODE) 107 if (ivalid & ATTR_MODE)
106 ri->mode = cpu_to_jemode(iattr->ia_mode); 108 ri->mode = cpu_to_jemode(iattr->ia_mode);
@@ -147,8 +149,8 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
147 inode->i_ctime = ITIME(je32_to_cpu(ri->ctime)); 149 inode->i_ctime = ITIME(je32_to_cpu(ri->ctime));
148 inode->i_mtime = ITIME(je32_to_cpu(ri->mtime)); 150 inode->i_mtime = ITIME(je32_to_cpu(ri->mtime));
149 inode->i_mode = jemode_to_cpu(ri->mode); 151 inode->i_mode = jemode_to_cpu(ri->mode);
150 inode->i_uid = je16_to_cpu(ri->uid); 152 i_uid_write(inode, je16_to_cpu(ri->uid));
151 inode->i_gid = je16_to_cpu(ri->gid); 153 i_gid_write(inode, je16_to_cpu(ri->gid));
152 154
153 155
154 old_metadata = f->metadata; 156 old_metadata = f->metadata;
@@ -276,8 +278,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
276 return ERR_PTR(ret); 278 return ERR_PTR(ret);
277 } 279 }
278 inode->i_mode = jemode_to_cpu(latest_node.mode); 280 inode->i_mode = jemode_to_cpu(latest_node.mode);
279 inode->i_uid = je16_to_cpu(latest_node.uid); 281 i_uid_write(inode, je16_to_cpu(latest_node.uid));
280 inode->i_gid = je16_to_cpu(latest_node.gid); 282 i_gid_write(inode, je16_to_cpu(latest_node.gid));
281 inode->i_size = je32_to_cpu(latest_node.isize); 283 inode->i_size = je32_to_cpu(latest_node.isize);
282 inode->i_atime = ITIME(je32_to_cpu(latest_node.atime)); 284 inode->i_atime = ITIME(je32_to_cpu(latest_node.atime));
283 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 285 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
@@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
440 442
441 memset(ri, 0, sizeof(*ri)); 443 memset(ri, 0, sizeof(*ri));
442 /* Set OS-specific defaults for new inodes */ 444 /* Set OS-specific defaults for new inodes */
443 ri->uid = cpu_to_je16(current_fsuid()); 445 ri->uid = cpu_to_je16(from_kuid(&init_user_ns, current_fsuid()));
444 446
445 if (dir_i->i_mode & S_ISGID) { 447 if (dir_i->i_mode & S_ISGID) {
446 ri->gid = cpu_to_je16(dir_i->i_gid); 448 ri->gid = cpu_to_je16(i_gid_read(dir_i));
447 if (S_ISDIR(mode)) 449 if (S_ISDIR(mode))
448 mode |= S_ISGID; 450 mode |= S_ISGID;
449 } else { 451 } else {
450 ri->gid = cpu_to_je16(current_fsgid()); 452 ri->gid = cpu_to_je16(from_kgid(&init_user_ns, current_fsgid()));
451 } 453 }
452 454
453 /* POSIX ACLs have to be processed now, at least partly. 455 /* POSIX ACLs have to be processed now, at least partly.
@@ -467,8 +469,8 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
467 set_nlink(inode, 1); 469 set_nlink(inode, 1);
468 inode->i_ino = je32_to_cpu(ri->ino); 470 inode->i_ino = je32_to_cpu(ri->ino);
469 inode->i_mode = jemode_to_cpu(ri->mode); 471 inode->i_mode = jemode_to_cpu(ri->mode);
470 inode->i_gid = je16_to_cpu(ri->gid); 472 i_gid_write(inode, je16_to_cpu(ri->gid));
471 inode->i_uid = je16_to_cpu(ri->uid); 473 i_uid_write(inode, je16_to_cpu(ri->uid));
472 inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 474 inode->i_atime = inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
473 ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime)); 475 ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
474 476
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index bcd983d7e7f9..d200a9b8fd5e 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -27,8 +27,8 @@ struct kvec;
27 27
28#define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size) 28#define JFFS2_F_I_SIZE(f) (OFNI_EDONI_2SFFJ(f)->i_size)
29#define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode) 29#define JFFS2_F_I_MODE(f) (OFNI_EDONI_2SFFJ(f)->i_mode)
30#define JFFS2_F_I_UID(f) (OFNI_EDONI_2SFFJ(f)->i_uid) 30#define JFFS2_F_I_UID(f) (i_uid_read(OFNI_EDONI_2SFFJ(f)))
31#define JFFS2_F_I_GID(f) (OFNI_EDONI_2SFFJ(f)->i_gid) 31#define JFFS2_F_I_GID(f) (i_gid_read(OFNI_EDONI_2SFFJ(f)))
32#define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev) 32#define JFFS2_F_I_RDEV(f) (OFNI_EDONI_2SFFJ(f)->i_rdev)
33 33
34#define ITIME(sec) ((struct timespec){sec, 0}) 34#define ITIME(sec) ((struct timespec){sec, 0})
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1ea349fff68b..ae81b01e6fd7 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -394,8 +394,11 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
394} 394}
395 395
396/* Trivial function to remove the last node in the tree. Which by definition 396/* Trivial function to remove the last node in the tree. Which by definition
397 has no right-hand -- so can be removed just by making its only child (if 397 has no right-hand child — so can be removed just by making its left-hand
398 any) take its place under its parent. */ 398 child (if any) take its place under its parent. Since this is only done
399 when we're consuming the whole tree, there's no need to use rb_erase()
400 and let it worry about adjusting colours and balancing the tree. That
401 would just be a waste of time. */
399static void eat_last(struct rb_root *root, struct rb_node *node) 402static void eat_last(struct rb_root *root, struct rb_node *node)
400{ 403{
401 struct rb_node *parent = rb_parent(node); 404 struct rb_node *parent = rb_parent(node);
@@ -412,12 +415,12 @@ static void eat_last(struct rb_root *root, struct rb_node *node)
412 link = &parent->rb_right; 415 link = &parent->rb_right;
413 416
414 *link = node->rb_left; 417 *link = node->rb_left;
415 /* Colour doesn't matter now. Only the parent pointer. */
416 if (node->rb_left) 418 if (node->rb_left)
417 node->rb_left->rb_parent_color = node->rb_parent_color; 419 node->rb_left->__rb_parent_color = node->__rb_parent_color;
418} 420}
419 421
420/* We put this in reverse order, so we can just use eat_last */ 422/* We put the version tree in reverse order, so we can use the same eat_last()
423 function that we use to consume the tmpnode tree (tn_root). */
421static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn) 424static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn)
422{ 425{
423 struct rb_node **link = &ver_root->rb_node; 426 struct rb_node **link = &ver_root->rb_node;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 61ea41389f90..ff487954cd96 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -418,6 +418,12 @@ static void __exit exit_jffs2_fs(void)
418 unregister_filesystem(&jffs2_fs_type); 418 unregister_filesystem(&jffs2_fs_type);
419 jffs2_destroy_slab_caches(); 419 jffs2_destroy_slab_caches();
420 jffs2_compressors_exit(); 420 jffs2_compressors_exit();
421
422 /*
423 * Make sure all delayed rcu free inodes are flushed before we
424 * destroy cache.
425 */
426 rcu_barrier();
421 kmem_cache_destroy(jffs2_inode_cachep); 427 kmem_cache_destroy(jffs2_inode_cachep);
422} 428}
423 429
diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index a58fa72d7e59..d20d4737b3ef 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_JFS_FS) += jfs.o
6 6
7jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ 7jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
8 jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ 8 jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
9 jfs_unicode.o jfs_dtree.o jfs_inode.o \ 9 jfs_unicode.o jfs_dtree.o jfs_inode.o jfs_discard.o \
10 jfs_extent.o symlink.o jfs_metapage.o \ 10 jfs_extent.o symlink.o jfs_metapage.o \
11 jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ 11 jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
12 resize.o xattr.o ioctl.o 12 resize.o xattr.o ioctl.o
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 45559dc3ea2f..d254d6d35995 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -64,7 +64,7 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type)
64 else 64 else
65 acl = ERR_PTR(size); 65 acl = ERR_PTR(size);
66 } else { 66 } else {
67 acl = posix_acl_from_xattr(value, size); 67 acl = posix_acl_from_xattr(&init_user_ns, value, size);
68 } 68 }
69 kfree(value); 69 kfree(value);
70 if (!IS_ERR(acl)) 70 if (!IS_ERR(acl))
@@ -100,7 +100,7 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
100 value = kmalloc(size, GFP_KERNEL); 100 value = kmalloc(size, GFP_KERNEL);
101 if (!value) 101 if (!value)
102 return -ENOMEM; 102 return -ENOMEM;
103 rc = posix_acl_to_xattr(acl, value, size); 103 rc = posix_acl_to_xattr(&init_user_ns, acl, value, size);
104 if (rc < 0) 104 if (rc < 0)
105 goto out; 105 goto out;
106 } 106 }
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 844f9460cb11..9d3afd157f99 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,8 +108,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
108 108
109 if (is_quota_modification(inode, iattr)) 109 if (is_quota_modification(inode, iattr))
110 dquot_initialize(inode); 110 dquot_initialize(inode);
111 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 111 if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) ||
112 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { 112 (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) {
113 rc = dquot_transfer(inode, iattr); 113 rc = dquot_transfer(inode, iattr);
114 if (rc) 114 if (rc)
115 return rc; 115 return rc;
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index f19d1e04a374..bc555ff417e9 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -11,13 +11,17 @@
11#include <linux/mount.h> 11#include <linux/mount.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/blkdev.h>
14#include <asm/current.h> 15#include <asm/current.h>
15#include <asm/uaccess.h> 16#include <asm/uaccess.h>
16 17
18#include "jfs_filsys.h"
19#include "jfs_debug.h"
17#include "jfs_incore.h" 20#include "jfs_incore.h"
18#include "jfs_dinode.h" 21#include "jfs_dinode.h"
19#include "jfs_inode.h" 22#include "jfs_inode.h"
20 23#include "jfs_dmap.h"
24#include "jfs_discard.h"
21 25
22static struct { 26static struct {
23 long jfs_flag; 27 long jfs_flag;
@@ -123,6 +127,40 @@ setflags_out:
123 mnt_drop_write_file(filp); 127 mnt_drop_write_file(filp);
124 return err; 128 return err;
125 } 129 }
130
131 case FITRIM:
132 {
133 struct super_block *sb = inode->i_sb;
134 struct request_queue *q = bdev_get_queue(sb->s_bdev);
135 struct fstrim_range range;
136 s64 ret = 0;
137
138 if (!capable(CAP_SYS_ADMIN))
139 return -EPERM;
140
141 if (!blk_queue_discard(q)) {
142 jfs_warn("FITRIM not supported on device");
143 return -EOPNOTSUPP;
144 }
145
146 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
147 sizeof(range)))
148 return -EFAULT;
149
150 range.minlen = max_t(unsigned int, range.minlen,
151 q->limits.discard_granularity);
152
153 ret = jfs_ioc_trim(inode, &range);
154 if (ret < 0)
155 return ret;
156
157 if (copy_to_user((struct fstrim_range __user *)arg, &range,
158 sizeof(range)))
159 return -EFAULT;
160
161 return 0;
162 }
163
126 default: 164 default:
127 return -ENOTTY; 165 return -ENOTTY;
128 } 166 }
@@ -142,6 +180,9 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
142 case JFS_IOC_SETFLAGS32: 180 case JFS_IOC_SETFLAGS32:
143 cmd = JFS_IOC_SETFLAGS; 181 cmd = JFS_IOC_SETFLAGS;
144 break; 182 break;
183 case FITRIM:
184 cmd = FITRIM;
185 break;
145 } 186 }
146 return jfs_ioctl(filp, cmd, arg); 187 return jfs_ioctl(filp, cmd, arg);
147} 188}
diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c
new file mode 100644
index 000000000000..9947563e4175
--- /dev/null
+++ b/fs/jfs/jfs_discard.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright (C) Tino Reichardt, 2012
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/fs.h>
20#include <linux/slab.h>
21#include <linux/blkdev.h>
22
23#include "jfs_incore.h"
24#include "jfs_superblock.h"
25#include "jfs_discard.h"
26#include "jfs_dmap.h"
27#include "jfs_debug.h"
28
29
30/*
31 * NAME: jfs_issue_discard()
32 *
33 * FUNCTION: TRIM the specified block range on device, if supported
34 *
35 * PARAMETERS:
36 * ip - pointer to in-core inode
37 * blkno - starting block number to be trimmed (0..N)
38 * nblocks - number of blocks to be trimmed
39 *
40 * RETURN VALUES:
41 * none
42 *
43 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
44 */
45void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks)
46{
47 struct super_block *sb = ip->i_sb;
48 int r = 0;
49
50 r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0);
51 if (unlikely(r != 0)) {
52 jfs_err("JFS: sb_issue_discard" \
53 "(%p, %llu, %llu, GFP_NOFS, 0) = %d => failed!\n",
54 sb, (unsigned long long)blkno,
55 (unsigned long long)nblocks, r);
56 }
57
58 jfs_info("JFS: sb_issue_discard" \
59 "(%p, %llu, %llu, GFP_NOFS, 0) = %d\n",
60 sb, (unsigned long long)blkno,
61 (unsigned long long)nblocks, r);
62
63 return;
64}
65
66/*
67 * NAME: jfs_ioc_trim()
68 *
69 * FUNCTION: attempt to discard (TRIM) all free blocks from the
70 * filesystem.
71 *
72 * PARAMETERS:
73 * ip - pointer to in-core inode;
74 * range - the range, given by user space
75 *
76 * RETURN VALUES:
77 * 0 - success
78 * -EIO - i/o error
79 */
80int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
81{
82 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
83 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
84 struct super_block *sb = ipbmap->i_sb;
85 int agno, agno_end;
86 s64 start, end, minlen;
87 u64 trimmed = 0;
88
89 /**
90 * convert byte values to block size of filesystem:
91 * start: First Byte to trim
92 * len: number of Bytes to trim from start
93 * minlen: minimum extent length in Bytes
94 */
95 start = range->start >> sb->s_blocksize_bits;
96 if (start < 0)
97 start = 0;
98 end = start + (range->len >> sb->s_blocksize_bits) - 1;
99 if (end >= bmp->db_mapsize)
100 end = bmp->db_mapsize - 1;
101 minlen = range->minlen >> sb->s_blocksize_bits;
102 if (minlen <= 0)
103 minlen = 1;
104
105 /**
106 * we trim all ag's within the range
107 */
108 agno = BLKTOAG(start, JFS_SBI(ip->i_sb));
109 agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb));
110 while (agno <= agno_end) {
111 trimmed += dbDiscardAG(ip, agno, minlen);
112 agno++;
113 }
114 range->len = trimmed << sb->s_blocksize_bits;
115
116 return 0;
117}
diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h
new file mode 100644
index 000000000000..40d1ee6081a0
--- /dev/null
+++ b/fs/jfs/jfs_discard.h
@@ -0,0 +1,26 @@
1/*
2 * Copyright (C) Tino Reichardt, 2012
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18#ifndef _H_JFS_DISCARD
19#define _H_JFS_DISCARD
20
21struct fstrim_range;
22
23extern void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks);
24extern int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range);
25
26#endif /* _H_JFS_DISCARD */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 9cbd11a3f804..9a55f53be5ff 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (C) International Business Machines Corp., 2000-2004 2 * Copyright (C) International Business Machines Corp., 2000-2004
3 * Portions Copyright (C) Tino Reichardt, 2012
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -25,6 +26,7 @@
25#include "jfs_lock.h" 26#include "jfs_lock.h"
26#include "jfs_metapage.h" 27#include "jfs_metapage.h"
27#include "jfs_debug.h" 28#include "jfs_debug.h"
29#include "jfs_discard.h"
28 30
29/* 31/*
30 * SERIALIZATION of the Block Allocation Map. 32 * SERIALIZATION of the Block Allocation Map.
@@ -104,7 +106,6 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
104static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 106static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
105 int nblocks); 107 int nblocks);
106static int dbMaxBud(u8 * cp); 108static int dbMaxBud(u8 * cp);
107s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
108static int blkstol2(s64 nb); 109static int blkstol2(s64 nb);
109 110
110static int cntlz(u32 value); 111static int cntlz(u32 value);
@@ -145,7 +146,6 @@ static const s8 budtab[256] = {
145 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 146 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
146}; 147};
147 148
148
149/* 149/*
150 * NAME: dbMount() 150 * NAME: dbMount()
151 * 151 *
@@ -310,7 +310,6 @@ int dbSync(struct inode *ipbmap)
310 return (0); 310 return (0);
311} 311}
312 312
313
314/* 313/*
315 * NAME: dbFree() 314 * NAME: dbFree()
316 * 315 *
@@ -337,6 +336,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
337 s64 lblkno, rem; 336 s64 lblkno, rem;
338 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 337 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
339 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 338 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
339 struct super_block *sb = ipbmap->i_sb;
340 340
341 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 341 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
342 342
@@ -351,6 +351,13 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
351 return -EIO; 351 return -EIO;
352 } 352 }
353 353
354 /**
355 * TRIM the blocks, when mounted with discard option
356 */
357 if (JFS_SBI(sb)->flag & JFS_DISCARD)
358 if (JFS_SBI(sb)->minblks_trim <= nblocks)
359 jfs_issue_discard(ipbmap, blkno, nblocks);
360
354 /* 361 /*
355 * free the blocks a dmap at a time. 362 * free the blocks a dmap at a time.
356 */ 363 */
@@ -1095,7 +1102,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1095 /* we were not successful */ 1102 /* we were not successful */
1096 release_metapage(mp); 1103 release_metapage(mp);
1097 1104
1098
1099 return (rc); 1105 return (rc);
1100} 1106}
1101 1107
@@ -1590,6 +1596,118 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1590 1596
1591 1597
1592/* 1598/*
1599 * NAME: dbDiscardAG()
1600 *
1601 * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG
1602 *
1603 * algorithm:
1604 * 1) allocate blocks, as large as possible and save them
1605 * while holding IWRITE_LOCK on ipbmap
1606 * 2) trim all these saved block/length values
1607 * 3) mark the blocks free again
1608 *
1609 * benefit:
1610 * - we work only on one ag at some time, minimizing how long we
1611 * need to lock ipbmap
1612 * - reading / writing the fs is possible most time, even on
1613 * trimming
1614 *
1615 * downside:
1616 * - we write two times to the dmapctl and dmap pages
1617 * - but for me, this seems the best way, better ideas?
1618 * /TR 2012
1619 *
1620 * PARAMETERS:
1621 * ip - pointer to in-core inode
1622 * agno - ag to trim
1623 * minlen - minimum value of contiguous blocks
1624 *
1625 * RETURN VALUES:
1626 * s64 - actual number of blocks trimmed
1627 */
1628s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
1629{
1630 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
1631 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
1632 s64 nblocks, blkno;
1633 u64 trimmed = 0;
1634 int rc, l2nb;
1635 struct super_block *sb = ipbmap->i_sb;
1636
1637 struct range2trim {
1638 u64 blkno;
1639 u64 nblocks;
1640 } *totrim, *tt;
1641
1642 /* max blkno / nblocks pairs to trim */
1643 int count = 0, range_cnt;
1644 u64 max_ranges;
1645
1646 /* prevent others from writing new stuff here, while trimming */
1647 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
1648
1649 nblocks = bmp->db_agfree[agno];
1650 max_ranges = nblocks;
1651 do_div(max_ranges, minlen);
1652 range_cnt = min_t(u64, max_ranges + 1, 32 * 1024);
1653 totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS);
1654 if (totrim == NULL) {
1655 jfs_error(bmp->db_ipbmap->i_sb,
1656 "dbDiscardAG: no memory for trim array");
1657 IWRITE_UNLOCK(ipbmap);
1658 return 0;
1659 }
1660
1661 tt = totrim;
1662 while (nblocks >= minlen) {
1663 l2nb = BLKSTOL2(nblocks);
1664
1665 /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */
1666 rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno);
1667 if (rc == 0) {
1668 tt->blkno = blkno;
1669 tt->nblocks = nblocks;
1670 tt++; count++;
1671
1672 /* the whole ag is free, trim now */
1673 if (bmp->db_agfree[agno] == 0)
1674 break;
1675
1676 /* give a hint for the next while */
1677 nblocks = bmp->db_agfree[agno];
1678 continue;
1679 } else if (rc == -ENOSPC) {
1680 /* search for next smaller log2 block */
1681 l2nb = BLKSTOL2(nblocks) - 1;
1682 nblocks = 1 << l2nb;
1683 } else {
1684 /* Trim any already allocated blocks */
1685 jfs_error(bmp->db_ipbmap->i_sb,
1686 "dbDiscardAG: -EIO");
1687 break;
1688 }
1689
1690 /* check, if our trim array is full */
1691 if (unlikely(count >= range_cnt - 1))
1692 break;
1693 }
1694 IWRITE_UNLOCK(ipbmap);
1695
1696 tt->nblocks = 0; /* mark the current end */
1697 for (tt = totrim; tt->nblocks != 0; tt++) {
1698 /* when mounted with online discard, dbFree() will
1699 * call jfs_issue_discard() itself */
1700 if (!(JFS_SBI(sb)->flag & JFS_DISCARD))
1701 jfs_issue_discard(ip, tt->blkno, tt->nblocks);
1702 dbFree(ip, tt->blkno, tt->nblocks);
1703 trimmed += tt->nblocks;
1704 }
1705 kfree(totrim);
1706
1707 return trimmed;
1708}
1709
1710/*
1593 * NAME: dbFindCtl() 1711 * NAME: dbFindCtl()
1594 * 1712 *
1595 * FUNCTION: starting at a specified dmap control page level and block 1713 * FUNCTION: starting at a specified dmap control page level and block
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 6dcb906c55d8..562b9a7e4311 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -311,4 +311,6 @@ extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks);
311extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); 311extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks);
312extern void dbFinalizeBmap(struct inode *ipbmap); 312extern void dbFinalizeBmap(struct inode *ipbmap);
313extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); 313extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
314extern s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen);
315
314#endif /* _H_JFS_DMAP */ 316#endif /* _H_JFS_DMAP */
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index b3f5463fbe52..b67d64671bb4 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -45,6 +45,9 @@
45/* mount time flag to disable journaling to disk */ 45/* mount time flag to disable journaling to disk */
46#define JFS_NOINTEGRITY 0x00000040 46#define JFS_NOINTEGRITY 0x00000040
47 47
48/* mount time flag to enable TRIM to ssd disks */
49#define JFS_DISCARD 0x00000080
50
48/* commit option */ 51/* commit option */
49#define JFS_COMMIT 0x00000f00 /* commit option mask */ 52#define JFS_COMMIT 0x00000f00 /* commit option mask */
50#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ 53#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 1b6f15f191b3..6ba4006e011b 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3078,15 +3078,15 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3078 } 3078 }
3079 set_nlink(ip, le32_to_cpu(dip->di_nlink)); 3079 set_nlink(ip, le32_to_cpu(dip->di_nlink));
3080 3080
3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); 3081 jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3082 if (sbi->uid == -1) 3082 if (!uid_valid(sbi->uid))
3083 ip->i_uid = jfs_ip->saved_uid; 3083 ip->i_uid = jfs_ip->saved_uid;
3084 else { 3084 else {
3085 ip->i_uid = sbi->uid; 3085 ip->i_uid = sbi->uid;
3086 } 3086 }
3087 3087
3088 jfs_ip->saved_gid = le32_to_cpu(dip->di_gid); 3088 jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3089 if (sbi->gid == -1) 3089 if (!gid_valid(sbi->gid))
3090 ip->i_gid = jfs_ip->saved_gid; 3090 ip->i_gid = jfs_ip->saved_gid;
3091 else { 3091 else {
3092 ip->i_gid = sbi->gid; 3092 ip->i_gid = sbi->gid;
@@ -3150,14 +3150,16 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3150 dip->di_size = cpu_to_le64(ip->i_size); 3150 dip->di_size = cpu_to_le64(ip->i_size);
3151 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3151 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3152 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3152 dip->di_nlink = cpu_to_le32(ip->i_nlink);
3153 if (sbi->uid == -1) 3153 if (!uid_valid(sbi->uid))
3154 dip->di_uid = cpu_to_le32(ip->i_uid); 3154 dip->di_uid = cpu_to_le32(i_uid_read(ip));
3155 else 3155 else
3156 dip->di_uid = cpu_to_le32(jfs_ip->saved_uid); 3156 dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3157 if (sbi->gid == -1) 3157 jfs_ip->saved_uid));
3158 dip->di_gid = cpu_to_le32(ip->i_gid); 3158 if (!gid_valid(sbi->gid))
3159 dip->di_gid = cpu_to_le32(i_gid_read(ip));
3159 else 3160 else
3160 dip->di_gid = cpu_to_le32(jfs_ip->saved_gid); 3161 dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3162 jfs_ip->saved_gid));
3161 jfs_get_inode_flags(jfs_ip); 3163 jfs_get_inode_flags(jfs_ip);
3162 /* 3164 /*
3163 * mode2 is only needed for storing the higher order bits. 3165 * mode2 is only needed for storing the higher order bits.
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 584a4a1a6e81..cf47f09e8ac8 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -38,8 +38,8 @@
38struct jfs_inode_info { 38struct jfs_inode_info {
39 int fileset; /* fileset number (always 16)*/ 39 int fileset; /* fileset number (always 16)*/
40 uint mode2; /* jfs-specific mode */ 40 uint mode2; /* jfs-specific mode */
41 uint saved_uid; /* saved for uid mount option */ 41 kuid_t saved_uid; /* saved for uid mount option */
42 uint saved_gid; /* saved for gid mount option */ 42 kgid_t saved_gid; /* saved for gid mount option */
43 pxd_t ixpxd; /* inode extent descriptor */ 43 pxd_t ixpxd; /* inode extent descriptor */
44 dxd_t acl; /* dxd describing acl */ 44 dxd_t acl; /* dxd describing acl */
45 dxd_t ea; /* dxd describing ea */ 45 dxd_t ea; /* dxd describing ea */
@@ -192,9 +192,10 @@ struct jfs_sb_info {
192 uint state; /* mount/recovery state */ 192 uint state; /* mount/recovery state */
193 unsigned long flag; /* mount time flags */ 193 unsigned long flag; /* mount time flags */
194 uint p_state; /* state prior to going no integrity */ 194 uint p_state; /* state prior to going no integrity */
195 uint uid; /* uid to override on-disk uid */ 195 kuid_t uid; /* uid to override on-disk uid */
196 uint gid; /* gid to override on-disk gid */ 196 kgid_t gid; /* gid to override on-disk gid */
197 uint umask; /* umask to override on-disk umask */ 197 uint umask; /* umask to override on-disk umask */
198 uint minblks_trim; /* minimum blocks, for online trim */
198}; 199};
199 200
200/* jfs_sb_info commit_state */ 201/* jfs_sb_info commit_state */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index bb8b661bcc50..5fcc02eaa64c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2977,12 +2977,9 @@ int jfs_sync(void *arg)
2977 * put back on the anon_list. 2977 * put back on the anon_list.
2978 */ 2978 */
2979 2979
2980 /* Take off anon_list */ 2980 /* Move from anon_list to anon_list2 */
2981 list_del(&jfs_ip->anon_inode_list); 2981 list_move(&jfs_ip->anon_inode_list,
2982 2982 &TxAnchor.anon_list2);
2983 /* Put on anon_list2 */
2984 list_add(&jfs_ip->anon_inode_list,
2985 &TxAnchor.anon_list2);
2986 2983
2987 TXN_UNLOCK(); 2984 TXN_UNLOCK();
2988 iput(ip); 2985 iput(ip);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index c55c7452d285..1a543be09c79 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -33,6 +33,7 @@
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <asm/uaccess.h> 34#include <asm/uaccess.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/blkdev.h>
36 37
37#include "jfs_incore.h" 38#include "jfs_incore.h"
38#include "jfs_filsys.h" 39#include "jfs_filsys.h"
@@ -100,7 +101,7 @@ void jfs_error(struct super_block *sb, const char * function, ...)
100 vsnprintf(error_buf, sizeof(error_buf), function, args); 101 vsnprintf(error_buf, sizeof(error_buf), function, args);
101 va_end(args); 102 va_end(args);
102 103
103 printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); 104 pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf);
104 105
105 jfs_handle_error(sb); 106 jfs_handle_error(sb);
106} 107}
@@ -197,7 +198,8 @@ static void jfs_put_super(struct super_block *sb)
197enum { 198enum {
198 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, 199 Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
199 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, 200 Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
200 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask 201 Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask,
202 Opt_discard, Opt_nodiscard, Opt_discard_minblk
201}; 203};
202 204
203static const match_table_t tokens = { 205static const match_table_t tokens = {
@@ -214,6 +216,9 @@ static const match_table_t tokens = {
214 {Opt_uid, "uid=%u"}, 216 {Opt_uid, "uid=%u"},
215 {Opt_gid, "gid=%u"}, 217 {Opt_gid, "gid=%u"},
216 {Opt_umask, "umask=%u"}, 218 {Opt_umask, "umask=%u"},
219 {Opt_discard, "discard"},
220 {Opt_nodiscard, "nodiscard"},
221 {Opt_discard_minblk, "discard=%u"},
217 {Opt_err, NULL} 222 {Opt_err, NULL}
218}; 223};
219 224
@@ -255,8 +260,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
255 else { 260 else {
256 nls_map = load_nls(args[0].from); 261 nls_map = load_nls(args[0].from);
257 if (!nls_map) { 262 if (!nls_map) {
258 printk(KERN_ERR 263 pr_err("JFS: charset not found\n");
259 "JFS: charset not found\n");
260 goto cleanup; 264 goto cleanup;
261 } 265 }
262 } 266 }
@@ -272,8 +276,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
272 *newLVSize = sb->s_bdev->bd_inode->i_size >> 276 *newLVSize = sb->s_bdev->bd_inode->i_size >>
273 sb->s_blocksize_bits; 277 sb->s_blocksize_bits;
274 if (*newLVSize == 0) 278 if (*newLVSize == 0)
275 printk(KERN_ERR 279 pr_err("JFS: Cannot determine volume size\n");
276 "JFS: Cannot determine volume size\n");
277 break; 280 break;
278 } 281 }
279 case Opt_errors: 282 case Opt_errors:
@@ -294,8 +297,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
294 *flag &= ~JFS_ERR_REMOUNT_RO; 297 *flag &= ~JFS_ERR_REMOUNT_RO;
295 *flag |= JFS_ERR_PANIC; 298 *flag |= JFS_ERR_PANIC;
296 } else { 299 } else {
297 printk(KERN_ERR 300 pr_err("JFS: %s is an invalid error handler\n",
298 "JFS: %s is an invalid error handler\n",
299 errors); 301 errors);
300 goto cleanup; 302 goto cleanup;
301 } 303 }
@@ -314,33 +316,76 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
314 case Opt_usrquota: 316 case Opt_usrquota:
315 case Opt_grpquota: 317 case Opt_grpquota:
316 case Opt_quota: 318 case Opt_quota:
317 printk(KERN_ERR 319 pr_err("JFS: quota operations not supported\n");
318 "JFS: quota operations not supported\n");
319 break; 320 break;
320#endif 321#endif
321 case Opt_uid: 322 case Opt_uid:
322 { 323 {
323 char *uid = args[0].from; 324 char *uid = args[0].from;
324 sbi->uid = simple_strtoul(uid, &uid, 0); 325 uid_t val = simple_strtoul(uid, &uid, 0);
326 sbi->uid = make_kuid(current_user_ns(), val);
327 if (!uid_valid(sbi->uid))
328 goto cleanup;
325 break; 329 break;
326 } 330 }
331
327 case Opt_gid: 332 case Opt_gid:
328 { 333 {
329 char *gid = args[0].from; 334 char *gid = args[0].from;
330 sbi->gid = simple_strtoul(gid, &gid, 0); 335 gid_t val = simple_strtoul(gid, &gid, 0);
336 sbi->gid = make_kgid(current_user_ns(), val);
337 if (!gid_valid(sbi->gid))
338 goto cleanup;
331 break; 339 break;
332 } 340 }
341
333 case Opt_umask: 342 case Opt_umask:
334 { 343 {
335 char *umask = args[0].from; 344 char *umask = args[0].from;
336 sbi->umask = simple_strtoul(umask, &umask, 8); 345 sbi->umask = simple_strtoul(umask, &umask, 8);
337 if (sbi->umask & ~0777) { 346 if (sbi->umask & ~0777) {
338 printk(KERN_ERR 347 pr_err("JFS: Invalid value of umask\n");
339 "JFS: Invalid value of umask\n");
340 goto cleanup; 348 goto cleanup;
341 } 349 }
342 break; 350 break;
343 } 351 }
352
353 case Opt_discard:
354 {
355 struct request_queue *q = bdev_get_queue(sb->s_bdev);
356 /* if set to 1, even copying files will cause
357 * trimming :O
358 * -> user has more control over the online trimming
359 */
360 sbi->minblks_trim = 64;
361 if (blk_queue_discard(q)) {
362 *flag |= JFS_DISCARD;
363 } else {
364 pr_err("JFS: discard option " \
365 "not supported on device\n");
366 }
367 break;
368 }
369
370 case Opt_nodiscard:
371 *flag &= ~JFS_DISCARD;
372 break;
373
374 case Opt_discard_minblk:
375 {
376 struct request_queue *q = bdev_get_queue(sb->s_bdev);
377 char *minblks_trim = args[0].from;
378 if (blk_queue_discard(q)) {
379 *flag |= JFS_DISCARD;
380 sbi->minblks_trim = simple_strtoull(
381 minblks_trim, &minblks_trim, 0);
382 } else {
383 pr_err("JFS: discard option " \
384 "not supported on device\n");
385 }
386 break;
387 }
388
344 default: 389 default:
345 printk("jfs: Unrecognized mount option \"%s\" " 390 printk("jfs: Unrecognized mount option \"%s\" "
346 " or missing value\n", p); 391 " or missing value\n", p);
@@ -374,8 +419,8 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
374 419
375 if (newLVSize) { 420 if (newLVSize) {
376 if (sb->s_flags & MS_RDONLY) { 421 if (sb->s_flags & MS_RDONLY) {
377 printk(KERN_ERR 422 pr_err("JFS: resize requires volume" \
378 "JFS: resize requires volume to be mounted read-write\n"); 423 " to be mounted read-write\n");
379 return -EROFS; 424 return -EROFS;
380 } 425 }
381 rc = jfs_extendfs(sb, newLVSize, 0); 426 rc = jfs_extendfs(sb, newLVSize, 0);
@@ -443,7 +488,9 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
443 sb->s_fs_info = sbi; 488 sb->s_fs_info = sbi;
444 sb->s_max_links = JFS_LINK_MAX; 489 sb->s_max_links = JFS_LINK_MAX;
445 sbi->sb = sb; 490 sbi->sb = sb;
446 sbi->uid = sbi->gid = sbi->umask = -1; 491 sbi->uid = INVALID_UID;
492 sbi->gid = INVALID_GID;
493 sbi->umask = -1;
447 494
448 /* initialize the mount flag and determine the default error handler */ 495 /* initialize the mount flag and determine the default error handler */
449 flag = JFS_ERR_REMOUNT_RO; 496 flag = JFS_ERR_REMOUNT_RO;
@@ -457,7 +504,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
457#endif 504#endif
458 505
459 if (newLVSize) { 506 if (newLVSize) {
460 printk(KERN_ERR "resize option for remount only\n"); 507 pr_err("resize option for remount only\n");
461 goto out_kfree; 508 goto out_kfree;
462 } 509 }
463 510
@@ -617,14 +664,16 @@ static int jfs_show_options(struct seq_file *seq, struct dentry *root)
617{ 664{
618 struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); 665 struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
619 666
620 if (sbi->uid != -1) 667 if (uid_valid(sbi->uid))
621 seq_printf(seq, ",uid=%d", sbi->uid); 668 seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid));
622 if (sbi->gid != -1) 669 if (gid_valid(sbi->gid))
623 seq_printf(seq, ",gid=%d", sbi->gid); 670 seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid));
624 if (sbi->umask != -1) 671 if (sbi->umask != -1)
625 seq_printf(seq, ",umask=%03o", sbi->umask); 672 seq_printf(seq, ",umask=%03o", sbi->umask);
626 if (sbi->flag & JFS_NOINTEGRITY) 673 if (sbi->flag & JFS_NOINTEGRITY)
627 seq_puts(seq, ",nointegrity"); 674 seq_puts(seq, ",nointegrity");
675 if (sbi->flag & JFS_DISCARD)
676 seq_printf(seq, ",discard=%u", sbi->minblks_trim);
628 if (sbi->nls_tab) 677 if (sbi->nls_tab)
629 seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); 678 seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset);
630 if (sbi->flag & JFS_ERR_CONTINUE) 679 if (sbi->flag & JFS_ERR_CONTINUE)
@@ -903,6 +952,12 @@ static void __exit exit_jfs_fs(void)
903 jfs_proc_clean(); 952 jfs_proc_clean();
904#endif 953#endif
905 unregister_filesystem(&jfs_fs_type); 954 unregister_filesystem(&jfs_fs_type);
955
956 /*
957 * Make sure all delayed rcu free inodes are flushed before we
958 * destroy cache.
959 */
960 rcu_barrier();
906 kmem_cache_destroy(jfs_inode_cachep); 961 kmem_cache_destroy(jfs_inode_cachep);
907} 962}
908 963
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 26683e15b3ac..42d67f9757bf 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -685,7 +685,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
685 * POSIX_ACL_XATTR_ACCESS is tied to i_mode 685 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
686 */ 686 */
687 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { 687 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
688 acl = posix_acl_from_xattr(value, value_len); 688 acl = posix_acl_from_xattr(&init_user_ns, value, value_len);
689 if (IS_ERR(acl)) { 689 if (IS_ERR(acl)) {
690 rc = PTR_ERR(acl); 690 rc = PTR_ERR(acl);
691 printk(KERN_ERR "posix_acl_from_xattr returned %d\n", 691 printk(KERN_ERR "posix_acl_from_xattr returned %d\n",
@@ -710,7 +710,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
710 710
711 return 0; 711 return 0;
712 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { 712 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
713 acl = posix_acl_from_xattr(value, value_len); 713 acl = posix_acl_from_xattr(&init_user_ns, value, value_len);
714 if (IS_ERR(acl)) { 714 if (IS_ERR(acl)) {
715 rc = PTR_ERR(acl); 715 rc = PTR_ERR(acl);
716 printk(KERN_ERR "posix_acl_from_xattr returned %d\n", 716 printk(KERN_ERR "posix_acl_from_xattr returned %d\n",
diff --git a/fs/libfs.c b/fs/libfs.c
index a74cb1725ac6..7cc37ca19cd8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -874,7 +874,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
874EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 874EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
875 875
876/** 876/**
877 * generic_fh_to_dentry - generic helper for the fh_to_parent export operation 877 * generic_fh_to_parent - generic helper for the fh_to_parent export operation
878 * @sb: filesystem to do the file handle conversion on 878 * @sb: filesystem to do the file handle conversion on
879 * @fid: file handle to convert 879 * @fid: file handle to convert
880 * @fh_len: length of the file handle in bytes 880 * @fh_len: length of the file handle in bytes
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index fb1a2bedbe97..8d80c990dffd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -289,7 +289,6 @@ static void nlmsvc_free_block(struct kref *kref)
289 dprintk("lockd: freeing block %p...\n", block); 289 dprintk("lockd: freeing block %p...\n", block);
290 290
291 /* Remove block from file's list of blocks */ 291 /* Remove block from file's list of blocks */
292 mutex_lock(&file->f_mutex);
293 list_del_init(&block->b_flist); 292 list_del_init(&block->b_flist);
294 mutex_unlock(&file->f_mutex); 293 mutex_unlock(&file->f_mutex);
295 294
@@ -303,7 +302,7 @@ static void nlmsvc_free_block(struct kref *kref)
303static void nlmsvc_release_block(struct nlm_block *block) 302static void nlmsvc_release_block(struct nlm_block *block)
304{ 303{
305 if (block != NULL) 304 if (block != NULL)
306 kref_put(&block->b_count, nlmsvc_free_block); 305 kref_put_mutex(&block->b_count, nlmsvc_free_block, &block->b_file->f_mutex);
307} 306}
308 307
309/* 308/*
diff --git a/fs/locks.c b/fs/locks.c
index 669911e4af9d..a94e331a52a2 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1625,15 +1625,13 @@ EXPORT_SYMBOL(flock_lock_file_wait);
1625 */ 1625 */
1626SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) 1626SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1627{ 1627{
1628 struct file *filp; 1628 struct fd f = fdget(fd);
1629 int fput_needed;
1630 struct file_lock *lock; 1629 struct file_lock *lock;
1631 int can_sleep, unlock; 1630 int can_sleep, unlock;
1632 int error; 1631 int error;
1633 1632
1634 error = -EBADF; 1633 error = -EBADF;
1635 filp = fget_light(fd, &fput_needed); 1634 if (!f.file)
1636 if (!filp)
1637 goto out; 1635 goto out;
1638 1636
1639 can_sleep = !(cmd & LOCK_NB); 1637 can_sleep = !(cmd & LOCK_NB);
@@ -1641,31 +1639,31 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1641 unlock = (cmd == LOCK_UN); 1639 unlock = (cmd == LOCK_UN);
1642 1640
1643 if (!unlock && !(cmd & LOCK_MAND) && 1641 if (!unlock && !(cmd & LOCK_MAND) &&
1644 !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) 1642 !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
1645 goto out_putf; 1643 goto out_putf;
1646 1644
1647 error = flock_make_lock(filp, &lock, cmd); 1645 error = flock_make_lock(f.file, &lock, cmd);
1648 if (error) 1646 if (error)
1649 goto out_putf; 1647 goto out_putf;
1650 if (can_sleep) 1648 if (can_sleep)
1651 lock->fl_flags |= FL_SLEEP; 1649 lock->fl_flags |= FL_SLEEP;
1652 1650
1653 error = security_file_lock(filp, lock->fl_type); 1651 error = security_file_lock(f.file, lock->fl_type);
1654 if (error) 1652 if (error)
1655 goto out_free; 1653 goto out_free;
1656 1654
1657 if (filp->f_op && filp->f_op->flock) 1655 if (f.file->f_op && f.file->f_op->flock)
1658 error = filp->f_op->flock(filp, 1656 error = f.file->f_op->flock(f.file,
1659 (can_sleep) ? F_SETLKW : F_SETLK, 1657 (can_sleep) ? F_SETLKW : F_SETLK,
1660 lock); 1658 lock);
1661 else 1659 else
1662 error = flock_lock_file_wait(filp, lock); 1660 error = flock_lock_file_wait(f.file, lock);
1663 1661
1664 out_free: 1662 out_free:
1665 locks_free_lock(lock); 1663 locks_free_lock(lock);
1666 1664
1667 out_putf: 1665 out_putf:
1668 fput_light(filp, fput_needed); 1666 fdput(f);
1669 out: 1667 out:
1670 return error; 1668 return error;
1671} 1669}
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index df0de27c2733..e784a217b500 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,6 +26,7 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
26 struct completion complete; 26 struct completion complete;
27 27
28 bio_init(&bio); 28 bio_init(&bio);
29 bio.bi_max_vecs = 1;
29 bio.bi_io_vec = &bio_vec; 30 bio.bi_io_vec = &bio_vec;
30 bio_vec.bv_page = page; 31 bio_vec.bv_page = page;
31 bio_vec.bv_len = PAGE_SIZE; 32 bio_vec.bv_len = PAGE_SIZE;
@@ -95,12 +96,11 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
95 struct address_space *mapping = super->s_mapping_inode->i_mapping; 96 struct address_space *mapping = super->s_mapping_inode->i_mapping;
96 struct bio *bio; 97 struct bio *bio;
97 struct page *page; 98 struct page *page;
98 struct request_queue *q = bdev_get_queue(sb->s_bdev); 99 unsigned int max_pages;
99 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
100 int i; 100 int i;
101 101
102 if (max_pages > BIO_MAX_PAGES) 102 max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
103 max_pages = BIO_MAX_PAGES; 103
104 bio = bio_alloc(GFP_NOFS, max_pages); 104 bio = bio_alloc(GFP_NOFS, max_pages);
105 BUG_ON(!bio); 105 BUG_ON(!bio);
106 106
@@ -190,12 +190,11 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
190{ 190{
191 struct logfs_super *super = logfs_super(sb); 191 struct logfs_super *super = logfs_super(sb);
192 struct bio *bio; 192 struct bio *bio;
193 struct request_queue *q = bdev_get_queue(sb->s_bdev); 193 unsigned int max_pages;
194 unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9);
195 int i; 194 int i;
196 195
197 if (max_pages > BIO_MAX_PAGES) 196 max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
198 max_pages = BIO_MAX_PAGES; 197
199 bio = bio_alloc(GFP_NOFS, max_pages); 198 bio = bio_alloc(GFP_NOFS, max_pages);
200 BUG_ON(!bio); 199 BUG_ON(!bio);
201 200
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index a422f42238b2..adb90116d36b 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -156,10 +156,26 @@ static void __logfs_destroy_inode(struct inode *inode)
156 call_rcu(&inode->i_rcu, logfs_i_callback); 156 call_rcu(&inode->i_rcu, logfs_i_callback);
157} 157}
158 158
159static void __logfs_destroy_meta_inode(struct inode *inode)
160{
161 struct logfs_inode *li = logfs_inode(inode);
162 BUG_ON(li->li_block);
163 call_rcu(&inode->i_rcu, logfs_i_callback);
164}
165
159static void logfs_destroy_inode(struct inode *inode) 166static void logfs_destroy_inode(struct inode *inode)
160{ 167{
161 struct logfs_inode *li = logfs_inode(inode); 168 struct logfs_inode *li = logfs_inode(inode);
162 169
170 if (inode->i_ino < LOGFS_RESERVED_INOS) {
171 /*
172 * The reserved inodes are never destroyed unless we are in
173 * unmont path.
174 */
175 __logfs_destroy_meta_inode(inode);
176 return;
177 }
178
163 BUG_ON(list_empty(&li->li_freeing_list)); 179 BUG_ON(list_empty(&li->li_freeing_list));
164 spin_lock(&logfs_inode_lock); 180 spin_lock(&logfs_inode_lock);
165 li->li_refcount--; 181 li->li_refcount--;
@@ -192,8 +208,8 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
192 li->li_height = 0; 208 li->li_height = 0;
193 li->li_used_bytes = 0; 209 li->li_used_bytes = 0;
194 li->li_block = NULL; 210 li->li_block = NULL;
195 inode->i_uid = 0; 211 i_uid_write(inode, 0);
196 inode->i_gid = 0; 212 i_gid_write(inode, 0);
197 inode->i_size = 0; 213 inode->i_size = 0;
198 inode->i_blocks = 0; 214 inode->i_blocks = 0;
199 inode->i_ctime = CURRENT_TIME; 215 inode->i_ctime = CURRENT_TIME;
@@ -373,8 +389,8 @@ static void logfs_put_super(struct super_block *sb)
373{ 389{
374 struct logfs_super *super = logfs_super(sb); 390 struct logfs_super *super = logfs_super(sb);
375 /* kill the meta-inodes */ 391 /* kill the meta-inodes */
376 iput(super->s_master_inode);
377 iput(super->s_segfile_inode); 392 iput(super->s_segfile_inode);
393 iput(super->s_master_inode);
378 iput(super->s_mapping_inode); 394 iput(super->s_mapping_inode);
379} 395}
380 396
@@ -401,5 +417,10 @@ int logfs_init_inode_cache(void)
401 417
402void logfs_destroy_inode_cache(void) 418void logfs_destroy_inode_cache(void)
403{ 419{
420 /*
421 * Make sure all delayed rcu free inodes are flushed before we
422 * destroy cache.
423 */
424 rcu_barrier();
404 kmem_cache_destroy(logfs_inode_cache); 425 kmem_cache_destroy(logfs_inode_cache);
405} 426}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 1e1c369df22b..2a09b8d73989 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -565,7 +565,7 @@ static void write_wbuf(struct super_block *sb, struct logfs_area *area,
565 index = ofs >> PAGE_SHIFT; 565 index = ofs >> PAGE_SHIFT;
566 page_ofs = ofs & (PAGE_SIZE - 1); 566 page_ofs = ofs & (PAGE_SIZE - 1);
567 567
568 page = find_lock_page(mapping, index); 568 page = find_or_create_page(mapping, index, GFP_NOFS);
569 BUG_ON(!page); 569 BUG_ON(!page);
570 memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); 570 memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize);
571 unlock_page(page); 571 unlock_page(page);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index f1cb512c5019..e1a3b6bf6324 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -119,8 +119,8 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
119 inode->i_mode = be16_to_cpu(di->di_mode); 119 inode->i_mode = be16_to_cpu(di->di_mode);
120 li->li_height = di->di_height; 120 li->li_height = di->di_height;
121 li->li_flags = be32_to_cpu(di->di_flags); 121 li->li_flags = be32_to_cpu(di->di_flags);
122 inode->i_uid = be32_to_cpu(di->di_uid); 122 i_uid_write(inode, be32_to_cpu(di->di_uid));
123 inode->i_gid = be32_to_cpu(di->di_gid); 123 i_gid_write(inode, be32_to_cpu(di->di_gid));
124 inode->i_size = be64_to_cpu(di->di_size); 124 inode->i_size = be64_to_cpu(di->di_size);
125 logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes)); 125 logfs_set_blocks(inode, be64_to_cpu(di->di_used_bytes));
126 inode->i_atime = be64_to_timespec(di->di_atime); 126 inode->i_atime = be64_to_timespec(di->di_atime);
@@ -156,8 +156,8 @@ static void logfs_inode_to_disk(struct inode *inode, struct logfs_disk_inode*di)
156 di->di_height = li->li_height; 156 di->di_height = li->li_height;
157 di->di_pad = 0; 157 di->di_pad = 0;
158 di->di_flags = cpu_to_be32(li->li_flags); 158 di->di_flags = cpu_to_be32(li->li_flags);
159 di->di_uid = cpu_to_be32(inode->i_uid); 159 di->di_uid = cpu_to_be32(i_uid_read(inode));
160 di->di_gid = cpu_to_be32(inode->i_gid); 160 di->di_gid = cpu_to_be32(i_gid_read(inode));
161 di->di_size = cpu_to_be64(i_size_read(inode)); 161 di->di_size = cpu_to_be64(i_size_read(inode));
162 di->di_used_bytes = cpu_to_be64(li->li_used_bytes); 162 di->di_used_bytes = cpu_to_be64(li->li_used_bytes);
163 di->di_atime = timespec_to_be64(inode->i_atime); 163 di->di_atime = timespec_to_be64(inode->i_atime);
@@ -2189,7 +2189,6 @@ void logfs_evict_inode(struct inode *inode)
2189 return; 2189 return;
2190 } 2190 }
2191 2191
2192 BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
2193 page = inode_to_page(inode); 2192 page = inode_to_page(inode);
2194 BUG_ON(!page); /* FIXME: Use emergency page */ 2193 BUG_ON(!page); /* FIXME: Use emergency page */
2195 logfs_put_write_page(page); 2194 logfs_put_write_page(page);
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index e28d090c98d6..038da0991794 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -886,7 +886,7 @@ static struct logfs_area *alloc_area(struct super_block *sb)
886 886
887static void map_invalidatepage(struct page *page, unsigned long l) 887static void map_invalidatepage(struct page *page, unsigned long l)
888{ 888{
889 BUG(); 889 return;
890} 890}
891 891
892static int map_releasepage(struct page *page, gfp_t g) 892static int map_releasepage(struct page *page, gfp_t g)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2a503ad020d5..4fc5f8ab1c44 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -100,6 +100,11 @@ static int init_inodecache(void)
100 100
101static void destroy_inodecache(void) 101static void destroy_inodecache(void)
102{ 102{
103 /*
104 * Make sure all delayed rcu free inodes are flushed before we
105 * destroy cache.
106 */
107 rcu_barrier();
103 kmem_cache_destroy(minix_inode_cachep); 108 kmem_cache_destroy(minix_inode_cachep);
104} 109}
105 110
@@ -460,8 +465,8 @@ static struct inode *V1_minix_iget(struct inode *inode)
460 return ERR_PTR(-EIO); 465 return ERR_PTR(-EIO);
461 } 466 }
462 inode->i_mode = raw_inode->i_mode; 467 inode->i_mode = raw_inode->i_mode;
463 inode->i_uid = (uid_t)raw_inode->i_uid; 468 i_uid_write(inode, raw_inode->i_uid);
464 inode->i_gid = (gid_t)raw_inode->i_gid; 469 i_gid_write(inode, raw_inode->i_gid);
465 set_nlink(inode, raw_inode->i_nlinks); 470 set_nlink(inode, raw_inode->i_nlinks);
466 inode->i_size = raw_inode->i_size; 471 inode->i_size = raw_inode->i_size;
467 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; 472 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
@@ -493,8 +498,8 @@ static struct inode *V2_minix_iget(struct inode *inode)
493 return ERR_PTR(-EIO); 498 return ERR_PTR(-EIO);
494 } 499 }
495 inode->i_mode = raw_inode->i_mode; 500 inode->i_mode = raw_inode->i_mode;
496 inode->i_uid = (uid_t)raw_inode->i_uid; 501 i_uid_write(inode, raw_inode->i_uid);
497 inode->i_gid = (gid_t)raw_inode->i_gid; 502 i_gid_write(inode, raw_inode->i_gid);
498 set_nlink(inode, raw_inode->i_nlinks); 503 set_nlink(inode, raw_inode->i_nlinks);
499 inode->i_size = raw_inode->i_size; 504 inode->i_size = raw_inode->i_size;
500 inode->i_mtime.tv_sec = raw_inode->i_mtime; 505 inode->i_mtime.tv_sec = raw_inode->i_mtime;
@@ -545,8 +550,8 @@ static struct buffer_head * V1_minix_update_inode(struct inode * inode)
545 if (!raw_inode) 550 if (!raw_inode)
546 return NULL; 551 return NULL;
547 raw_inode->i_mode = inode->i_mode; 552 raw_inode->i_mode = inode->i_mode;
548 raw_inode->i_uid = fs_high2lowuid(inode->i_uid); 553 raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode));
549 raw_inode->i_gid = fs_high2lowgid(inode->i_gid); 554 raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode));
550 raw_inode->i_nlinks = inode->i_nlink; 555 raw_inode->i_nlinks = inode->i_nlink;
551 raw_inode->i_size = inode->i_size; 556 raw_inode->i_size = inode->i_size;
552 raw_inode->i_time = inode->i_mtime.tv_sec; 557 raw_inode->i_time = inode->i_mtime.tv_sec;
@@ -572,8 +577,8 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
572 if (!raw_inode) 577 if (!raw_inode)
573 return NULL; 578 return NULL;
574 raw_inode->i_mode = inode->i_mode; 579 raw_inode->i_mode = inode->i_mode;
575 raw_inode->i_uid = fs_high2lowuid(inode->i_uid); 580 raw_inode->i_uid = fs_high2lowuid(i_uid_read(inode));
576 raw_inode->i_gid = fs_high2lowgid(inode->i_gid); 581 raw_inode->i_gid = fs_high2lowgid(i_gid_read(inode));
577 raw_inode->i_nlinks = inode->i_nlink; 582 raw_inode->i_nlinks = inode->i_nlink;
578 raw_inode->i_size = inode->i_size; 583 raw_inode->i_size = inode->i_size;
579 raw_inode->i_mtime = inode->i_mtime.tv_sec; 584 raw_inode->i_mtime = inode->i_mtime.tv_sec;
diff --git a/fs/namei.c b/fs/namei.c
index 1b464390dde8..aa30d19e9edd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask)
352/** 352/**
353 * sb_permission - Check superblock-level permissions 353 * sb_permission - Check superblock-level permissions
354 * @sb: Superblock of inode to check permission on 354 * @sb: Superblock of inode to check permission on
355 * @inode: Inode to check permission on
355 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 356 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
356 * 357 *
357 * Separate out file-system wide checks from inode-specific permission checks. 358 * Separate out file-system wide checks from inode-specific permission checks.
@@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1;
656/** 657/**
657 * may_follow_link - Check symlink following for unsafe situations 658 * may_follow_link - Check symlink following for unsafe situations
658 * @link: The path of the symlink 659 * @link: The path of the symlink
660 * @nd: nameidata pathwalk data
659 * 661 *
660 * In the case of the sysctl_protected_symlinks sysctl being enabled, 662 * In the case of the sysctl_protected_symlinks sysctl being enabled,
661 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is 663 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
@@ -678,7 +680,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
678 680
679 /* Allowed if owner and follower match. */ 681 /* Allowed if owner and follower match. */
680 inode = link->dentry->d_inode; 682 inode = link->dentry->d_inode;
681 if (current_cred()->fsuid == inode->i_uid) 683 if (uid_eq(current_cred()->fsuid, inode->i_uid))
682 return 0; 684 return 0;
683 685
684 /* Allowed if parent directory not sticky and world-writable. */ 686 /* Allowed if parent directory not sticky and world-writable. */
@@ -687,7 +689,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd)
687 return 0; 689 return 0;
688 690
689 /* Allowed if parent directory and link owner match. */ 691 /* Allowed if parent directory and link owner match. */
690 if (parent->i_uid == inode->i_uid) 692 if (uid_eq(parent->i_uid, inode->i_uid))
691 return 0; 693 return 0;
692 694
693 path_put_conditional(link, nd); 695 path_put_conditional(link, nd);
@@ -757,7 +759,7 @@ static int may_linkat(struct path *link)
757 /* Source inode owner (or CAP_FOWNER) can hardlink all they like, 759 /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
758 * otherwise, it must be a safe source. 760 * otherwise, it must be a safe source.
759 */ 761 */
760 if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) || 762 if (uid_eq(cred->fsuid, inode->i_uid) || safe_hardlink_source(inode) ||
761 capable(CAP_FOWNER)) 763 capable(CAP_FOWNER))
762 return 0; 764 return 0;
763 765
@@ -1795,8 +1797,6 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1795 struct nameidata *nd, struct file **fp) 1797 struct nameidata *nd, struct file **fp)
1796{ 1798{
1797 int retval = 0; 1799 int retval = 0;
1798 int fput_needed;
1799 struct file *file;
1800 1800
1801 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1801 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1802 nd->flags = flags | LOOKUP_JUMPED; 1802 nd->flags = flags | LOOKUP_JUMPED;
@@ -1848,44 +1848,41 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1848 get_fs_pwd(current->fs, &nd->path); 1848 get_fs_pwd(current->fs, &nd->path);
1849 } 1849 }
1850 } else { 1850 } else {
1851 struct fd f = fdget_raw(dfd);
1851 struct dentry *dentry; 1852 struct dentry *dentry;
1852 1853
1853 file = fget_raw_light(dfd, &fput_needed); 1854 if (!f.file)
1854 retval = -EBADF; 1855 return -EBADF;
1855 if (!file)
1856 goto out_fail;
1857 1856
1858 dentry = file->f_path.dentry; 1857 dentry = f.file->f_path.dentry;
1859 1858
1860 if (*name) { 1859 if (*name) {
1861 retval = -ENOTDIR; 1860 if (!S_ISDIR(dentry->d_inode->i_mode)) {
1862 if (!S_ISDIR(dentry->d_inode->i_mode)) 1861 fdput(f);
1863 goto fput_fail; 1862 return -ENOTDIR;
1863 }
1864 1864
1865 retval = inode_permission(dentry->d_inode, MAY_EXEC); 1865 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1866 if (retval) 1866 if (retval) {
1867 goto fput_fail; 1867 fdput(f);
1868 return retval;
1869 }
1868 } 1870 }
1869 1871
1870 nd->path = file->f_path; 1872 nd->path = f.file->f_path;
1871 if (flags & LOOKUP_RCU) { 1873 if (flags & LOOKUP_RCU) {
1872 if (fput_needed) 1874 if (f.need_put)
1873 *fp = file; 1875 *fp = f.file;
1874 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1876 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1875 lock_rcu_walk(); 1877 lock_rcu_walk();
1876 } else { 1878 } else {
1877 path_get(&file->f_path); 1879 path_get(&nd->path);
1878 fput_light(file, fput_needed); 1880 fdput(f);
1879 } 1881 }
1880 } 1882 }
1881 1883
1882 nd->inode = nd->path.dentry->d_inode; 1884 nd->inode = nd->path.dentry->d_inode;
1883 return 0; 1885 return 0;
1884
1885fput_fail:
1886 fput_light(file, fput_needed);
1887out_fail:
1888 return retval;
1889} 1886}
1890 1887
1891static inline int lookup_last(struct nameidata *nd, struct path *path) 1888static inline int lookup_last(struct nameidata *nd, struct path *path)
@@ -2414,7 +2411,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2414 goto out; 2411 goto out;
2415 } 2412 }
2416 2413
2417 mode = op->mode & S_IALLUGO; 2414 mode = op->mode;
2418 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) 2415 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2419 mode &= ~current_umask(); 2416 mode &= ~current_umask();
2420 2417
@@ -2452,7 +2449,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2452 } 2449 }
2453 2450
2454 if (open_flag & O_CREAT) { 2451 if (open_flag & O_CREAT) {
2455 error = may_o_create(&nd->path, dentry, op->mode); 2452 error = may_o_create(&nd->path, dentry, mode);
2456 if (error) { 2453 if (error) {
2457 create_error = error; 2454 create_error = error;
2458 if (open_flag & O_EXCL) 2455 if (open_flag & O_EXCL)
@@ -2489,6 +2486,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2489 dput(dentry); 2486 dput(dentry);
2490 dentry = file->f_path.dentry; 2487 dentry = file->f_path.dentry;
2491 } 2488 }
2489 if (create_error && dentry->d_inode == NULL) {
2490 error = create_error;
2491 goto out;
2492 }
2492 goto looked_up; 2493 goto looked_up;
2493 } 2494 }
2494 2495
@@ -3965,7 +3966,7 @@ EXPORT_SYMBOL(user_path_at);
3965EXPORT_SYMBOL(follow_down_one); 3966EXPORT_SYMBOL(follow_down_one);
3966EXPORT_SYMBOL(follow_down); 3967EXPORT_SYMBOL(follow_down);
3967EXPORT_SYMBOL(follow_up); 3968EXPORT_SYMBOL(follow_up);
3968EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 3969EXPORT_SYMBOL(get_write_access); /* nfsd */
3969EXPORT_SYMBOL(getname); 3970EXPORT_SYMBOL(getname);
3970EXPORT_SYMBOL(lock_rename); 3971EXPORT_SYMBOL(lock_rename);
3971EXPORT_SYMBOL(lookup_one_len); 3972EXPORT_SYMBOL(lookup_one_len);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4d31f73e2561..7bdf7907413f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1886,8 +1886,14 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1886 return err; 1886 return err;
1887 1887
1888 err = -EINVAL; 1888 err = -EINVAL;
1889 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt))) 1889 if (unlikely(!check_mnt(real_mount(path->mnt)))) {
1890 goto unlock; 1890 /* that's acceptable only for automounts done in private ns */
1891 if (!(mnt_flags & MNT_SHRINKABLE))
1892 goto unlock;
1893 /* ... and for those we'd better have mountpoint still alive */
1894 if (!real_mount(path->mnt)->mnt_ns)
1895 goto unlock;
1896 }
1891 1897
1892 /* Refuse the same filesystem on the same mount point */ 1898 /* Refuse the same filesystem on the same mount point */
1893 err = -EBUSY; 1899 err = -EBUSY;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 333df07ae3bd..d7e9fe77188a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -89,6 +89,11 @@ static int init_inodecache(void)
89 89
90static void destroy_inodecache(void) 90static void destroy_inodecache(void)
91{ 91{
92 /*
93 * Make sure all delayed rcu free inodes are flushed before we
94 * destroy cache.
95 */
96 rcu_barrier();
92 kmem_cache_destroy(ncp_inode_cachep); 97 kmem_cache_destroy(ncp_inode_cachep);
93} 98}
94 99
@@ -314,11 +319,11 @@ static void ncp_stop_tasks(struct ncp_server *server) {
314 release_sock(sk); 319 release_sock(sk);
315 del_timer_sync(&server->timeout_tm); 320 del_timer_sync(&server->timeout_tm);
316 321
317 flush_work_sync(&server->rcv.tq); 322 flush_work(&server->rcv.tq);
318 if (sk->sk_socket->type == SOCK_STREAM) 323 if (sk->sk_socket->type == SOCK_STREAM)
319 flush_work_sync(&server->tx.tq); 324 flush_work(&server->tx.tq);
320 else 325 else
321 flush_work_sync(&server->timeout_tq); 326 flush_work(&server->timeout_tq);
322} 327}
323 328
324static int ncp_show_options(struct seq_file *seq, struct dentry *root) 329static int ncp_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8bf3a3f6925a..b7db60897f91 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -12,19 +12,19 @@ nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
12nfs-$(CONFIG_SYSCTL) += sysctl.o 12nfs-$(CONFIG_SYSCTL) += sysctl.o
13nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 13nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
14 14
15obj-$(CONFIG_NFS_V2) += nfs2.o 15obj-$(CONFIG_NFS_V2) += nfsv2.o
16nfs2-y := nfs2super.o proc.o nfs2xdr.o 16nfsv2-y := nfs2super.o proc.o nfs2xdr.o
17 17
18obj-$(CONFIG_NFS_V3) += nfs3.o 18obj-$(CONFIG_NFS_V3) += nfsv3.o
19nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o 19nfsv3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
20nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 20nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
21 21
22obj-$(CONFIG_NFS_V4) += nfs4.o 22obj-$(CONFIG_NFS_V4) += nfsv4.o
23nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \ 23nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
24 delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \ 24 delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
25 nfs4namespace.o nfs4getroot.o nfs4client.o 25 nfs4namespace.o nfs4getroot.o nfs4client.o
26nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o 26nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
27nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o 27nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
28 28
29obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o 29obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
30nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o 30nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9fc0d9dfc91b..99694442b93f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -105,7 +105,7 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
105 105
106 if (IS_ERR(nfs)) { 106 if (IS_ERR(nfs)) {
107 mutex_lock(&nfs_version_mutex); 107 mutex_lock(&nfs_version_mutex);
108 request_module("nfs%d", version); 108 request_module("nfsv%d", version);
109 nfs = find_nfs_version(version); 109 nfs = find_nfs_version(version);
110 mutex_unlock(&nfs_version_mutex); 110 mutex_unlock(&nfs_version_mutex);
111 } 111 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 75d6d0a3d32e..f692be97676d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -287,10 +287,12 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
287 struct inode *inode = file->f_path.dentry->d_inode; 287 struct inode *inode = file->f_path.dentry->d_inode;
288 288
289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
290 if (ret != 0)
291 goto out;
290 mutex_lock(&inode->i_mutex); 292 mutex_lock(&inode->i_mutex);
291 ret = nfs_file_fsync_commit(file, start, end, datasync); 293 ret = nfs_file_fsync_commit(file, start, end, datasync);
292 mutex_unlock(&inode->i_mutex); 294 mutex_unlock(&inode->i_mutex);
293 295out:
294 return ret; 296 return ret;
295} 297}
296 298
@@ -576,6 +578,7 @@ out:
576static const struct vm_operations_struct nfs_file_vm_ops = { 578static const struct vm_operations_struct nfs_file_vm_ops = {
577 .fault = filemap_fault, 579 .fault = filemap_fault,
578 .page_mkwrite = nfs_vm_page_mkwrite, 580 .page_mkwrite = nfs_vm_page_mkwrite,
581 .remap_pages = generic_file_remap_pages,
579}; 582};
580 583
581static int nfs_need_sync_write(struct file *filp, struct inode *inode) 584static int nfs_need_sync_write(struct file *filp, struct inode *inode)
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b701358c39c3..a850079467d8 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -61,6 +61,12 @@ struct idmap {
61 struct mutex idmap_mutex; 61 struct mutex idmap_mutex;
62}; 62};
63 63
64struct idmap_legacy_upcalldata {
65 struct rpc_pipe_msg pipe_msg;
66 struct idmap_msg idmap_msg;
67 struct idmap *idmap;
68};
69
64/** 70/**
65 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 71 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
66 * @fattr: fully initialised struct nfs_fattr 72 * @fattr: fully initialised struct nfs_fattr
@@ -324,6 +330,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
324 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, 330 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
325 name, namelen, type, data, 331 name, namelen, type, data,
326 data_size, idmap); 332 data_size, idmap);
333 idmap->idmap_key_cons = NULL;
327 mutex_unlock(&idmap->idmap_mutex); 334 mutex_unlock(&idmap->idmap_mutex);
328 } 335 }
329 return ret; 336 return ret;
@@ -380,11 +387,13 @@ static const match_table_t nfs_idmap_tokens = {
380static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); 387static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
381static ssize_t idmap_pipe_downcall(struct file *, const char __user *, 388static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
382 size_t); 389 size_t);
390static void idmap_release_pipe(struct inode *);
383static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); 391static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
384 392
385static const struct rpc_pipe_ops idmap_upcall_ops = { 393static const struct rpc_pipe_ops idmap_upcall_ops = {
386 .upcall = rpc_pipe_generic_upcall, 394 .upcall = rpc_pipe_generic_upcall,
387 .downcall = idmap_pipe_downcall, 395 .downcall = idmap_pipe_downcall,
396 .release_pipe = idmap_release_pipe,
388 .destroy_msg = idmap_pipe_destroy_msg, 397 .destroy_msg = idmap_pipe_destroy_msg,
389}; 398};
390 399
@@ -616,7 +625,8 @@ void nfs_idmap_quit(void)
616 nfs_idmap_quit_keyring(); 625 nfs_idmap_quit_keyring();
617} 626}
618 627
619static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, 628static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
629 struct idmap_msg *im,
620 struct rpc_pipe_msg *msg) 630 struct rpc_pipe_msg *msg)
621{ 631{
622 substring_t substr; 632 substring_t substr;
@@ -659,6 +669,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
659 const char *op, 669 const char *op,
660 void *aux) 670 void *aux)
661{ 671{
672 struct idmap_legacy_upcalldata *data;
662 struct rpc_pipe_msg *msg; 673 struct rpc_pipe_msg *msg;
663 struct idmap_msg *im; 674 struct idmap_msg *im;
664 struct idmap *idmap = (struct idmap *)aux; 675 struct idmap *idmap = (struct idmap *)aux;
@@ -666,15 +677,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
666 int ret = -ENOMEM; 677 int ret = -ENOMEM;
667 678
668 /* msg and im are freed in idmap_pipe_destroy_msg */ 679 /* msg and im are freed in idmap_pipe_destroy_msg */
669 msg = kmalloc(sizeof(*msg), GFP_KERNEL); 680 data = kmalloc(sizeof(*data), GFP_KERNEL);
670 if (!msg) 681 if (!data)
671 goto out0;
672
673 im = kmalloc(sizeof(*im), GFP_KERNEL);
674 if (!im)
675 goto out1; 682 goto out1;
676 683
677 ret = nfs_idmap_prepare_message(key->description, im, msg); 684 msg = &data->pipe_msg;
685 im = &data->idmap_msg;
686 data->idmap = idmap;
687
688 ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
678 if (ret < 0) 689 if (ret < 0)
679 goto out2; 690 goto out2;
680 691
@@ -683,15 +694,15 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
683 694
684 ret = rpc_queue_upcall(idmap->idmap_pipe, msg); 695 ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
685 if (ret < 0) 696 if (ret < 0)
686 goto out2; 697 goto out3;
687 698
688 return ret; 699 return ret;
689 700
701out3:
702 idmap->idmap_key_cons = NULL;
690out2: 703out2:
691 kfree(im); 704 kfree(data);
692out1: 705out1:
693 kfree(msg);
694out0:
695 complete_request_key(cons, ret); 706 complete_request_key(cons, ret);
696 return ret; 707 return ret;
697} 708}
@@ -749,9 +760,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
749 } 760 }
750 761
751 if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { 762 if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
752 ret = mlen; 763 ret = -ENOKEY;
753 complete_request_key(cons, -ENOKEY); 764 goto out;
754 goto out_incomplete;
755 } 765 }
756 766
757 namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); 767 namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
@@ -768,16 +778,32 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
768 778
769out: 779out:
770 complete_request_key(cons, ret); 780 complete_request_key(cons, ret);
771out_incomplete:
772 return ret; 781 return ret;
773} 782}
774 783
775static void 784static void
776idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) 785idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
777{ 786{
787 struct idmap_legacy_upcalldata *data = container_of(msg,
788 struct idmap_legacy_upcalldata,
789 pipe_msg);
790 struct idmap *idmap = data->idmap;
791 struct key_construction *cons;
792 if (msg->errno) {
793 cons = ACCESS_ONCE(idmap->idmap_key_cons);
794 idmap->idmap_key_cons = NULL;
795 complete_request_key(cons, msg->errno);
796 }
778 /* Free memory allocated in nfs_idmap_legacy_upcall() */ 797 /* Free memory allocated in nfs_idmap_legacy_upcall() */
779 kfree(msg->data); 798 kfree(data);
780 kfree(msg); 799}
800
801static void
802idmap_release_pipe(struct inode *inode)
803{
804 struct rpc_inode *rpci = RPC_I(inode);
805 struct idmap *idmap = (struct idmap *)rpci->private;
806 idmap->idmap_key_cons = NULL;
781} 807}
782 808
783int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 809int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6e895f0fbf3..e4c716d374a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
154 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); 154 nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
155 nfsi->attrtimeo_timestamp = jiffies; 155 nfsi->attrtimeo_timestamp = jiffies;
156 156
157 memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); 157 memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
158 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) 158 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
159 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; 159 nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
160 else 160 else
@@ -1571,6 +1571,11 @@ static int __init nfs_init_inodecache(void)
1571 1571
1572static void nfs_destroy_inodecache(void) 1572static void nfs_destroy_inodecache(void)
1573{ 1573{
1574 /*
1575 * Make sure all delayed rcu free inodes are flushed before we
1576 * destroy cache.
1577 */
1578 rcu_barrier();
1574 kmem_cache_destroy(nfs_inode_cachep); 1579 kmem_cache_destroy(nfs_inode_cachep);
1575} 1580}
1576 1581
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index e4498dc351a8..4a1aafba6a20 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -70,7 +70,7 @@ ssize_t nfs3_getxattr(struct dentry *dentry, const char *name,
70 if (type == ACL_TYPE_ACCESS && acl->a_count == 0) 70 if (type == ACL_TYPE_ACCESS && acl->a_count == 0)
71 error = -ENODATA; 71 error = -ENODATA;
72 else 72 else
73 error = posix_acl_to_xattr(acl, buffer, size); 73 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
74 posix_acl_release(acl); 74 posix_acl_release(acl);
75 } else 75 } else
76 error = -ENODATA; 76 error = -ENODATA;
@@ -92,7 +92,7 @@ int nfs3_setxattr(struct dentry *dentry, const char *name,
92 else 92 else
93 return -EOPNOTSUPP; 93 return -EOPNOTSUPP;
94 94
95 acl = posix_acl_from_xattr(value, size); 95 acl = posix_acl_from_xattr(&init_user_ns, value, size);
96 if (IS_ERR(acl)) 96 if (IS_ERR(acl))
97 return PTR_ERR(acl); 97 return PTR_ERR(acl);
98 error = nfs3_proc_setacl(inode, type, acl); 98 error = nfs3_proc_setacl(inode, type, acl);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0952c791df36..69322096c325 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
69 nfs_fattr_init(info->fattr); 69 nfs_fattr_init(info->fattr);
70 status = rpc_call_sync(client, &msg, 0); 70 status = rpc_call_sync(client, &msg, 0);
71 dprintk("%s: reply fsinfo: %d\n", __func__, status); 71 dprintk("%s: reply fsinfo: %d\n", __func__, status);
72 if (!(info->fattr->valid & NFS_ATTR_FATTR)) { 72 if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
73 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; 73 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
74 msg.rpc_resp = info->fattr; 74 msg.rpc_resp = info->fattr;
75 status = rpc_call_sync(client, &msg, 0); 75 status = rpc_call_sync(client, &msg, 0);
@@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
643 u64 cookie, struct page **pages, unsigned int count, int plus) 643 u64 cookie, struct page **pages, unsigned int count, int plus)
644{ 644{
645 struct inode *dir = dentry->d_inode; 645 struct inode *dir = dentry->d_inode;
646 __be32 *verf = NFS_COOKIEVERF(dir); 646 __be32 *verf = NFS_I(dir)->cookieverf;
647 struct nfs3_readdirargs arg = { 647 struct nfs3_readdirargs arg = {
648 .fh = NFS_FH(dir), 648 .fh = NFS_FH(dir),
649 .cookie = cookie, 649 .cookie = cookie,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 3b950dd81e81..da0618aeeadb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -205,6 +205,9 @@ extern const struct dentry_operations nfs4_dentry_operations;
205int nfs_atomic_open(struct inode *, struct dentry *, struct file *, 205int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
206 unsigned, umode_t, int *); 206 unsigned, umode_t, int *);
207 207
208/* super.c */
209extern struct file_system_type nfs4_fs_type;
210
208/* nfs4namespace.c */ 211/* nfs4namespace.c */
209rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); 212rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
210struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); 213struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index cbcdfaf32505..24eb663f8ed5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -74,7 +74,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
74 return clp; 74 return clp;
75 75
76error: 76error:
77 kfree(clp); 77 nfs_free_client(clp);
78 return ERR_PTR(err); 78 return ERR_PTR(err);
79} 79}
80 80
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index acb65e7887f8..eb5eb8eef4d3 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -96,13 +96,15 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
96 struct inode *inode = file->f_path.dentry->d_inode; 96 struct inode *inode = file->f_path.dentry->d_inode;
97 97
98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
99 if (ret != 0)
100 goto out;
99 mutex_lock(&inode->i_mutex); 101 mutex_lock(&inode->i_mutex);
100 ret = nfs_file_fsync_commit(file, start, end, datasync); 102 ret = nfs_file_fsync_commit(file, start, end, datasync);
101 if (!ret && !datasync) 103 if (!ret && !datasync)
102 /* application has asked for meta-data sync */ 104 /* application has asked for meta-data sync */
103 ret = pnfs_layoutcommit_inode(inode, true); 105 ret = pnfs_layoutcommit_inode(inode, true);
104 mutex_unlock(&inode->i_mutex); 106 mutex_unlock(&inode->i_mutex);
105 107out:
106 return ret; 108 return ret;
107} 109}
108 110
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a99a8d948721..1e50326d00dd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
3215 dentry->d_parent->d_name.name, 3215 dentry->d_parent->d_name.name,
3216 dentry->d_name.name, 3216 dentry->d_name.name,
3217 (unsigned long long)cookie); 3217 (unsigned long long)cookie);
3218 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 3218 nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
3219 res.pgbase = args.pgbase; 3219 res.pgbase = args.pgbase;
3220 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); 3220 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
3221 if (status >= 0) { 3221 if (status >= 0) {
3222 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 3222 memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
3223 status += args.pgbase; 3223 status += args.pgbase;
3224 } 3224 }
3225 3225
@@ -3653,11 +3653,11 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
3653 && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); 3653 && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL);
3654} 3654}
3655 3655
3656/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that 3656/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
3657 * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on 3657 * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_SIZE) bytes on
3658 * the stack. 3658 * the stack.
3659 */ 3659 */
3660#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) 3660#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
3661 3661
3662static int buf_to_pages_noslab(const void *buf, size_t buflen, 3662static int buf_to_pages_noslab(const void *buf, size_t buflen,
3663 struct page **pages, unsigned int *pgbase) 3663 struct page **pages, unsigned int *pgbase)
@@ -3668,7 +3668,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
3668 spages = pages; 3668 spages = pages;
3669 3669
3670 do { 3670 do {
3671 len = min_t(size_t, PAGE_CACHE_SIZE, buflen); 3671 len = min_t(size_t, PAGE_SIZE, buflen);
3672 newpage = alloc_page(GFP_KERNEL); 3672 newpage = alloc_page(GFP_KERNEL);
3673 3673
3674 if (newpage == NULL) 3674 if (newpage == NULL)
@@ -3737,9 +3737,10 @@ out:
3737static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) 3737static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
3738{ 3738{
3739 struct nfs4_cached_acl *acl; 3739 struct nfs4_cached_acl *acl;
3740 size_t buflen = sizeof(*acl) + acl_len;
3740 3741
3741 if (pages && acl_len <= PAGE_SIZE) { 3742 if (buflen <= PAGE_SIZE) {
3742 acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); 3743 acl = kmalloc(buflen, GFP_KERNEL);
3743 if (acl == NULL) 3744 if (acl == NULL)
3744 goto out; 3745 goto out;
3745 acl->cached = 1; 3746 acl->cached = 1;
@@ -3781,17 +3782,15 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3781 .rpc_argp = &args, 3782 .rpc_argp = &args,
3782 .rpc_resp = &res, 3783 .rpc_resp = &res,
3783 }; 3784 };
3784 int ret = -ENOMEM, npages, i; 3785 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
3785 size_t acl_len = 0; 3786 int ret = -ENOMEM, i;
3786 3787
3787 npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
3788 /* As long as we're doing a round trip to the server anyway, 3788 /* As long as we're doing a round trip to the server anyway,
3789 * let's be prepared for a page of acl data. */ 3789 * let's be prepared for a page of acl data. */
3790 if (npages == 0) 3790 if (npages == 0)
3791 npages = 1; 3791 npages = 1;
3792 3792 if (npages > ARRAY_SIZE(pages))
3793 /* Add an extra page to handle the bitmap returned */ 3793 return -ERANGE;
3794 npages++;
3795 3794
3796 for (i = 0; i < npages; i++) { 3795 for (i = 0; i < npages; i++) {
3797 pages[i] = alloc_page(GFP_KERNEL); 3796 pages[i] = alloc_page(GFP_KERNEL);
@@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3807 args.acl_len = npages * PAGE_SIZE; 3806 args.acl_len = npages * PAGE_SIZE;
3808 args.acl_pgbase = 0; 3807 args.acl_pgbase = 0;
3809 3808
3810 /* Let decode_getfacl know not to fail if the ACL data is larger than
3811 * the page we send as a guess */
3812 if (buf == NULL)
3813 res.acl_flags |= NFS4_ACL_LEN_REQUEST;
3814
3815 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", 3809 dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
3816 __func__, buf, buflen, npages, args.acl_len); 3810 __func__, buf, buflen, npages, args.acl_len);
3817 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), 3811 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
@@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3819 if (ret) 3813 if (ret)
3820 goto out_free; 3814 goto out_free;
3821 3815
3822 acl_len = res.acl_len - res.acl_data_offset; 3816 /* Handle the case where the passed-in buffer is too short */
3823 if (acl_len > args.acl_len) 3817 if (res.acl_flags & NFS4_ACL_TRUNC) {
3824 nfs4_write_cached_acl(inode, NULL, 0, acl_len); 3818 /* Did the user only issue a request for the acl length? */
3825 else 3819 if (buf == NULL)
3826 nfs4_write_cached_acl(inode, pages, res.acl_data_offset, 3820 goto out_ok;
3827 acl_len);
3828 if (buf) {
3829 ret = -ERANGE; 3821 ret = -ERANGE;
3830 if (acl_len > buflen) 3822 goto out_free;
3831 goto out_free;
3832 _copy_from_pages(buf, pages, res.acl_data_offset,
3833 acl_len);
3834 } 3823 }
3835 ret = acl_len; 3824 nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
3825 if (buf)
3826 _copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
3827out_ok:
3828 ret = res.acl_len;
3836out_free: 3829out_free:
3837 for (i = 0; i < npages; i++) 3830 for (i = 0; i < npages; i++)
3838 if (pages[i]) 3831 if (pages[i])
@@ -3890,10 +3883,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3890 .rpc_argp = &arg, 3883 .rpc_argp = &arg,
3891 .rpc_resp = &res, 3884 .rpc_resp = &res,
3892 }; 3885 };
3886 unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
3893 int ret, i; 3887 int ret, i;
3894 3888
3895 if (!nfs4_server_supports_acls(server)) 3889 if (!nfs4_server_supports_acls(server))
3896 return -EOPNOTSUPP; 3890 return -EOPNOTSUPP;
3891 if (npages > ARRAY_SIZE(pages))
3892 return -ERANGE;
3897 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3893 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3898 if (i < 0) 3894 if (i < 0)
3899 return i; 3895 return i;
@@ -6223,11 +6219,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
6223 dprintk("<-- %s\n", __func__); 6219 dprintk("<-- %s\n", __func__);
6224} 6220}
6225 6221
6222static size_t max_response_pages(struct nfs_server *server)
6223{
6224 u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
6225 return nfs_page_array_len(0, max_resp_sz);
6226}
6227
6228static void nfs4_free_pages(struct page **pages, size_t size)
6229{
6230 int i;
6231
6232 if (!pages)
6233 return;
6234
6235 for (i = 0; i < size; i++) {
6236 if (!pages[i])
6237 break;
6238 __free_page(pages[i]);
6239 }
6240 kfree(pages);
6241}
6242
6243static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
6244{
6245 struct page **pages;
6246 int i;
6247
6248 pages = kcalloc(size, sizeof(struct page *), gfp_flags);
6249 if (!pages) {
6250 dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
6251 return NULL;
6252 }
6253
6254 for (i = 0; i < size; i++) {
6255 pages[i] = alloc_page(gfp_flags);
6256 if (!pages[i]) {
6257 dprintk("%s: failed to allocate page\n", __func__);
6258 nfs4_free_pages(pages, size);
6259 return NULL;
6260 }
6261 }
6262
6263 return pages;
6264}
6265
6226static void nfs4_layoutget_release(void *calldata) 6266static void nfs4_layoutget_release(void *calldata)
6227{ 6267{
6228 struct nfs4_layoutget *lgp = calldata; 6268 struct nfs4_layoutget *lgp = calldata;
6269 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
6270 size_t max_pages = max_response_pages(server);
6229 6271
6230 dprintk("--> %s\n", __func__); 6272 dprintk("--> %s\n", __func__);
6273 nfs4_free_pages(lgp->args.layout.pages, max_pages);
6231 put_nfs_open_context(lgp->args.ctx); 6274 put_nfs_open_context(lgp->args.ctx);
6232 kfree(calldata); 6275 kfree(calldata);
6233 dprintk("<-- %s\n", __func__); 6276 dprintk("<-- %s\n", __func__);
@@ -6239,9 +6282,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
6239 .rpc_release = nfs4_layoutget_release, 6282 .rpc_release = nfs4_layoutget_release,
6240}; 6283};
6241 6284
6242int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) 6285void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6243{ 6286{
6244 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 6287 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
6288 size_t max_pages = max_response_pages(server);
6245 struct rpc_task *task; 6289 struct rpc_task *task;
6246 struct rpc_message msg = { 6290 struct rpc_message msg = {
6247 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], 6291 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
@@ -6259,12 +6303,19 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
6259 6303
6260 dprintk("--> %s\n", __func__); 6304 dprintk("--> %s\n", __func__);
6261 6305
6306 lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
6307 if (!lgp->args.layout.pages) {
6308 nfs4_layoutget_release(lgp);
6309 return;
6310 }
6311 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
6312
6262 lgp->res.layoutp = &lgp->args.layout; 6313 lgp->res.layoutp = &lgp->args.layout;
6263 lgp->res.seq_res.sr_slot = NULL; 6314 lgp->res.seq_res.sr_slot = NULL;
6264 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); 6315 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
6265 task = rpc_run_task(&task_setup_data); 6316 task = rpc_run_task(&task_setup_data);
6266 if (IS_ERR(task)) 6317 if (IS_ERR(task))
6267 return PTR_ERR(task); 6318 return;
6268 status = nfs4_wait_for_completion_rpc_task(task); 6319 status = nfs4_wait_for_completion_rpc_task(task);
6269 if (status == 0) 6320 if (status == 0)
6270 status = task->tk_status; 6321 status = task->tk_status;
@@ -6272,7 +6323,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
6272 status = pnfs_layout_process(lgp); 6323 status = pnfs_layout_process(lgp);
6273 rpc_put_task(task); 6324 rpc_put_task(task);
6274 dprintk("<-- %s status=%d\n", __func__, status); 6325 dprintk("<-- %s status=%d\n", __func__, status);
6275 return status; 6326 return;
6276} 6327}
6277 6328
6278static void 6329static void
@@ -6304,12 +6355,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
6304 return; 6355 return;
6305 } 6356 }
6306 spin_lock(&lo->plh_inode->i_lock); 6357 spin_lock(&lo->plh_inode->i_lock);
6307 if (task->tk_status == 0) { 6358 if (task->tk_status == 0 && lrp->res.lrs_present)
6308 if (lrp->res.lrs_present) { 6359 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
6309 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
6310 } else
6311 BUG_ON(!list_empty(&lo->plh_segs));
6312 }
6313 lo->plh_block_lgets--; 6360 lo->plh_block_lgets--;
6314 spin_unlock(&lo->plh_inode->i_lock); 6361 spin_unlock(&lo->plh_inode->i_lock);
6315 dprintk("<-- %s\n", __func__); 6362 dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 6930bec91bca..1720d32ffa54 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -117,8 +117,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
117 timeout = 5 * HZ; 117 timeout = 5 * HZ;
118 dprintk("%s: requeueing work. Lease period = %ld\n", 118 dprintk("%s: requeueing work. Lease period = %ld\n",
119 __func__, (timeout + HZ - 1) / HZ); 119 __func__, (timeout + HZ - 1) / HZ);
120 cancel_delayed_work(&clp->cl_renewd); 120 mod_delayed_work(system_wq, &clp->cl_renewd, timeout);
121 schedule_delayed_work(&clp->cl_renewd, timeout);
122 set_bit(NFS_CS_RENEWD, &clp->cl_res_state); 121 set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
123 spin_unlock(&clp->cl_lock); 122 spin_unlock(&clp->cl_lock);
124} 123}
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 12a31a9dbcdd..bd61221ad2c5 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -23,14 +23,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
23static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, 23static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
24 int flags, const char *dev_name, void *raw_data); 24 int flags, const char *dev_name, void *raw_data);
25 25
26static struct file_system_type nfs4_fs_type = {
27 .owner = THIS_MODULE,
28 .name = "nfs4",
29 .mount = nfs_fs_mount,
30 .kill_sb = nfs_kill_super,
31 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
32};
33
34static struct file_system_type nfs4_remote_fs_type = { 26static struct file_system_type nfs4_remote_fs_type = {
35 .owner = THIS_MODULE, 27 .owner = THIS_MODULE,
36 .name = "nfs4", 28 .name = "nfs4",
@@ -344,14 +336,8 @@ static int __init init_nfs_v4(void)
344 if (err) 336 if (err)
345 goto out1; 337 goto out1;
346 338
347 err = register_filesystem(&nfs4_fs_type);
348 if (err < 0)
349 goto out2;
350
351 register_nfs_version(&nfs_v4); 339 register_nfs_version(&nfs_v4);
352 return 0; 340 return 0;
353out2:
354 nfs4_unregister_sysctl();
355out1: 341out1:
356 nfs_idmap_quit(); 342 nfs_idmap_quit();
357out: 343out:
@@ -361,7 +347,6 @@ out:
361static void __exit exit_nfs_v4(void) 347static void __exit exit_nfs_v4(void)
362{ 348{
363 unregister_nfs_version(&nfs_v4); 349 unregister_nfs_version(&nfs_v4);
364 unregister_filesystem(&nfs4_fs_type);
365 nfs4_unregister_sysctl(); 350 nfs4_unregister_sysctl();
366 nfs_idmap_quit(); 351 nfs_idmap_quit();
367} 352}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ca13483edd60..8dba6bd48557 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5045,22 +5045,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
5045 struct nfs_getaclres *res) 5045 struct nfs_getaclres *res)
5046{ 5046{
5047 unsigned int savep; 5047 unsigned int savep;
5048 __be32 *bm_p;
5049 uint32_t attrlen, 5048 uint32_t attrlen,
5050 bitmap[3] = {0}; 5049 bitmap[3] = {0};
5051 int status; 5050 int status;
5052 size_t page_len = xdr->buf->page_len; 5051 unsigned int pg_offset;
5053 5052
5054 res->acl_len = 0; 5053 res->acl_len = 0;
5055 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) 5054 if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
5056 goto out; 5055 goto out;
5057 5056
5058 bm_p = xdr->p; 5057 xdr_enter_page(xdr, xdr->buf->page_len);
5059 res->acl_data_offset = be32_to_cpup(bm_p) + 2; 5058
5060 res->acl_data_offset <<= 2; 5059 /* Calculate the offset of the page data */
5061 /* Check if the acl data starts beyond the allocated buffer */ 5060 pg_offset = xdr->buf->head[0].iov_len;
5062 if (res->acl_data_offset > page_len)
5063 return -ERANGE;
5064 5061
5065 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) 5062 if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
5066 goto out; 5063 goto out;
@@ -5074,23 +5071,16 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
5074 /* The bitmap (xdr len + bitmaps) and the attr xdr len words 5071 /* The bitmap (xdr len + bitmaps) and the attr xdr len words
5075 * are stored with the acl data to handle the problem of 5072 * are stored with the acl data to handle the problem of
5076 * variable length bitmaps.*/ 5073 * variable length bitmaps.*/
5077 xdr->p = bm_p; 5074 res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
5078
5079 /* We ignore &savep and don't do consistency checks on
5080 * the attr length. Let userspace figure it out.... */
5081 attrlen += res->acl_data_offset;
5082 if (attrlen > page_len) {
5083 if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
5084 /* getxattr interface called with a NULL buf */
5085 res->acl_len = attrlen;
5086 goto out;
5087 }
5088 dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
5089 attrlen, page_len);
5090 return -EINVAL;
5091 }
5092 xdr_read_pages(xdr, attrlen);
5093 res->acl_len = attrlen; 5075 res->acl_len = attrlen;
5076
5077 /* Check for receive buffer overflow */
5078 if (res->acl_len > (xdr->nwords << 2) ||
5079 res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
5080 res->acl_flags |= NFS4_ACL_TRUNC;
5081 dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
5082 attrlen, xdr->nwords << 2);
5083 }
5094 } else 5084 } else
5095 status = -EOPNOTSUPP; 5085 status = -EOPNOTSUPP;
5096 5086
@@ -6235,7 +6225,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6235 status = decode_open(xdr, res); 6225 status = decode_open(xdr, res);
6236 if (status) 6226 if (status)
6237 goto out; 6227 goto out;
6238 if (decode_getfh(xdr, &res->fh) != 0) 6228 status = decode_getfh(xdr, &res->fh);
6229 if (status)
6239 goto out; 6230 goto out;
6240 decode_getfattr(xdr, res->f_attr, res->server); 6231 decode_getfattr(xdr, res->f_attr, res->server);
6241out: 6232out:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index f50d3e8d6f22..ea6d111b03e9 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
570 return false; 570 return false;
571 571
572 return pgio->pg_count + req->wb_bytes <= 572 return pgio->pg_count + req->wb_bytes <=
573 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; 573 (unsigned long)pgio->pg_layout_private;
574}
575
576void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
577{
578 pnfs_generic_pg_init_read(pgio, req);
579 if (unlikely(pgio->pg_lseg == NULL))
580 return; /* Not pNFS */
581
582 pgio->pg_layout_private = (void *)
583 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
584}
585
586static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
587 unsigned long *stripe_end)
588{
589 u32 stripe_off;
590 unsigned stripe_size;
591
592 if (layout->raid_algorithm == PNFS_OSD_RAID_0)
593 return true;
594
595 stripe_size = layout->stripe_unit *
596 (layout->group_width - layout->parity);
597
598 div_u64_rem(offset, stripe_size, &stripe_off);
599 if (!stripe_off)
600 return true;
601
602 *stripe_end = stripe_size - stripe_off;
603 return false;
604}
605
606void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
607{
608 unsigned long stripe_end = 0;
609
610 pnfs_generic_pg_init_write(pgio, req);
611 if (unlikely(pgio->pg_lseg == NULL))
612 return; /* Not pNFS */
613
614 if (req->wb_offset ||
615 !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
616 &OBJIO_LSEG(pgio->pg_lseg)->layout,
617 &stripe_end)) {
618 pgio->pg_layout_private = (void *)stripe_end;
619 } else {
620 pgio->pg_layout_private = (void *)
621 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
622 }
574} 623}
575 624
576static const struct nfs_pageio_ops objio_pg_read_ops = { 625static const struct nfs_pageio_ops objio_pg_read_ops = {
577 .pg_init = pnfs_generic_pg_init_read, 626 .pg_init = objio_init_read,
578 .pg_test = objio_pg_test, 627 .pg_test = objio_pg_test,
579 .pg_doio = pnfs_generic_pg_readpages, 628 .pg_doio = pnfs_generic_pg_readpages,
580}; 629};
581 630
582static const struct nfs_pageio_ops objio_pg_write_ops = { 631static const struct nfs_pageio_ops objio_pg_write_ops = {
583 .pg_init = pnfs_generic_pg_init_write, 632 .pg_init = objio_init_write,
584 .pg_test = objio_pg_test, 633 .pg_test = objio_pg_test,
585 .pg_doio = pnfs_generic_pg_writepages, 634 .pg_doio = pnfs_generic_pg_writepages,
586}; 635};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 1a6732ed04a4..311a79681e2b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
49 hdr->io_start = req_offset(hdr->req); 49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count; 50 hdr->good_bytes = desc->pg_count;
51 hdr->dreq = desc->pg_dreq; 51 hdr->dreq = desc->pg_dreq;
52 hdr->layout_private = desc->pg_layout_private;
52 hdr->release = release; 53 hdr->release = release;
53 hdr->completion_ops = desc->pg_completion_ops; 54 hdr->completion_ops = desc->pg_completion_ops;
54 if (hdr->completion_ops->init_hdr) 55 if (hdr->completion_ops->init_hdr)
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
268 desc->pg_error = 0; 269 desc->pg_error = 0;
269 desc->pg_lseg = NULL; 270 desc->pg_lseg = NULL;
270 desc->pg_dreq = NULL; 271 desc->pg_dreq = NULL;
272 desc->pg_layout_private = NULL;
271} 273}
272EXPORT_SYMBOL_GPL(nfs_pageio_init); 274EXPORT_SYMBOL_GPL(nfs_pageio_init);
273 275
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 76875bfcf19c..2e00feacd4be 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -583,9 +583,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
583 struct nfs_server *server = NFS_SERVER(ino); 583 struct nfs_server *server = NFS_SERVER(ino);
584 struct nfs4_layoutget *lgp; 584 struct nfs4_layoutget *lgp;
585 struct pnfs_layout_segment *lseg = NULL; 585 struct pnfs_layout_segment *lseg = NULL;
586 struct page **pages = NULL;
587 int i;
588 u32 max_resp_sz, max_pages;
589 586
590 dprintk("--> %s\n", __func__); 587 dprintk("--> %s\n", __func__);
591 588
@@ -594,20 +591,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
594 if (lgp == NULL) 591 if (lgp == NULL)
595 return NULL; 592 return NULL;
596 593
597 /* allocate pages for xdr post processing */
598 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
599 max_pages = nfs_page_array_len(0, max_resp_sz);
600
601 pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
602 if (!pages)
603 goto out_err_free;
604
605 for (i = 0; i < max_pages; i++) {
606 pages[i] = alloc_page(gfp_flags);
607 if (!pages[i])
608 goto out_err_free;
609 }
610
611 lgp->args.minlength = PAGE_CACHE_SIZE; 594 lgp->args.minlength = PAGE_CACHE_SIZE;
612 if (lgp->args.minlength > range->length) 595 if (lgp->args.minlength > range->length)
613 lgp->args.minlength = range->length; 596 lgp->args.minlength = range->length;
@@ -616,39 +599,19 @@ send_layoutget(struct pnfs_layout_hdr *lo,
616 lgp->args.type = server->pnfs_curr_ld->id; 599 lgp->args.type = server->pnfs_curr_ld->id;
617 lgp->args.inode = ino; 600 lgp->args.inode = ino;
618 lgp->args.ctx = get_nfs_open_context(ctx); 601 lgp->args.ctx = get_nfs_open_context(ctx);
619 lgp->args.layout.pages = pages;
620 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
621 lgp->lsegpp = &lseg; 602 lgp->lsegpp = &lseg;
622 lgp->gfp_flags = gfp_flags; 603 lgp->gfp_flags = gfp_flags;
623 604
624 /* Synchronously retrieve layout information from server and 605 /* Synchronously retrieve layout information from server and
625 * store in lseg. 606 * store in lseg.
626 */ 607 */
627 nfs4_proc_layoutget(lgp); 608 nfs4_proc_layoutget(lgp, gfp_flags);
628 if (!lseg) { 609 if (!lseg) {
629 /* remember that LAYOUTGET failed and suspend trying */ 610 /* remember that LAYOUTGET failed and suspend trying */
630 set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); 611 set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
631 } 612 }
632 613
633 /* free xdr pages */
634 for (i = 0; i < max_pages; i++)
635 __free_page(pages[i]);
636 kfree(pages);
637
638 return lseg; 614 return lseg;
639
640out_err_free:
641 /* free any allocated xdr pages, lgp as it's not used */
642 if (pages) {
643 for (i = 0; i < max_pages; i++) {
644 if (!pages[i])
645 break;
646 __free_page(pages[i]);
647 }
648 kfree(pages);
649 }
650 kfree(lgp);
651 return NULL;
652} 615}
653 616
654/* 617/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2c6c80503ba4..745aa1b39e7c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
172 struct pnfs_devicelist *devlist); 172 struct pnfs_devicelist *devlist);
173extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, 173extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
174 struct pnfs_device *dev); 174 struct pnfs_device *dev);
175extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); 175extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
176extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); 176extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
177 177
178/* pnfs.c */ 178/* pnfs.c */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ac6a3c55dce4..d2c7f5db0847 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -319,6 +319,34 @@ EXPORT_SYMBOL_GPL(nfs_sops);
319static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); 319static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
320static int nfs4_validate_mount_data(void *options, 320static int nfs4_validate_mount_data(void *options,
321 struct nfs_parsed_mount_data *args, const char *dev_name); 321 struct nfs_parsed_mount_data *args, const char *dev_name);
322
323struct file_system_type nfs4_fs_type = {
324 .owner = THIS_MODULE,
325 .name = "nfs4",
326 .mount = nfs_fs_mount,
327 .kill_sb = nfs_kill_super,
328 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
329};
330EXPORT_SYMBOL_GPL(nfs4_fs_type);
331
332static int __init register_nfs4_fs(void)
333{
334 return register_filesystem(&nfs4_fs_type);
335}
336
337static void unregister_nfs4_fs(void)
338{
339 unregister_filesystem(&nfs4_fs_type);
340}
341#else
342static int __init register_nfs4_fs(void)
343{
344 return 0;
345}
346
347static void unregister_nfs4_fs(void)
348{
349}
322#endif 350#endif
323 351
324static struct shrinker acl_shrinker = { 352static struct shrinker acl_shrinker = {
@@ -337,12 +365,18 @@ int __init register_nfs_fs(void)
337 if (ret < 0) 365 if (ret < 0)
338 goto error_0; 366 goto error_0;
339 367
340 ret = nfs_register_sysctl(); 368 ret = register_nfs4_fs();
341 if (ret < 0) 369 if (ret < 0)
342 goto error_1; 370 goto error_1;
371
372 ret = nfs_register_sysctl();
373 if (ret < 0)
374 goto error_2;
343 register_shrinker(&acl_shrinker); 375 register_shrinker(&acl_shrinker);
344 return 0; 376 return 0;
345 377
378error_2:
379 unregister_nfs4_fs();
346error_1: 380error_1:
347 unregister_filesystem(&nfs_fs_type); 381 unregister_filesystem(&nfs_fs_type);
348error_0: 382error_0:
@@ -356,6 +390,7 @@ void __exit unregister_nfs_fs(void)
356{ 390{
357 unregister_shrinker(&acl_shrinker); 391 unregister_shrinker(&acl_shrinker);
358 nfs_unregister_sysctl(); 392 nfs_unregister_sysctl();
393 unregister_nfs4_fs();
359 unregister_filesystem(&nfs_fs_type); 394 unregister_filesystem(&nfs_fs_type);
360} 395}
361 396
@@ -1502,7 +1537,7 @@ static int nfs_parse_mount_options(char *raw,
1502 1537
1503 /* 1538 /*
1504 * verify that any proto=/mountproto= options match the address 1539 * verify that any proto=/mountproto= options match the address
1505 * familiies in the addr=/mountaddr= options. 1540 * families in the addr=/mountaddr= options.
1506 */ 1541 */
1507 if (protofamily != AF_UNSPEC && 1542 if (protofamily != AF_UNSPEC &&
1508 protofamily != mnt->nfs_server.address.ss_family) 1543 protofamily != mnt->nfs_server.address.ss_family)
@@ -1832,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,
1832 1867
1833 memcpy(sap, &data->addr, sizeof(data->addr)); 1868 memcpy(sap, &data->addr, sizeof(data->addr));
1834 args->nfs_server.addrlen = sizeof(data->addr); 1869 args->nfs_server.addrlen = sizeof(data->addr);
1870 args->nfs_server.port = ntohs(data->addr.sin_port);
1835 if (!nfs_verify_server_address(sap)) 1871 if (!nfs_verify_server_address(sap))
1836 goto out_no_address; 1872 goto out_no_address;
1837 1873
@@ -2529,6 +2565,7 @@ static int nfs4_validate_mount_data(void *options,
2529 return -EFAULT; 2565 return -EFAULT;
2530 if (!nfs_verify_server_address(sap)) 2566 if (!nfs_verify_server_address(sap))
2531 goto out_no_address; 2567 goto out_no_address;
2568 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2532 2569
2533 if (data->auth_flavourlen) { 2570 if (data->auth_flavourlen) {
2534 if (data->auth_flavourlen > 1) 2571 if (data->auth_flavourlen > 1)
@@ -2645,4 +2682,6 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
2645module_param(send_implementation_id, ushort, 0644); 2682module_param(send_implementation_id, ushort, 0644);
2646MODULE_PARM_DESC(send_implementation_id, 2683MODULE_PARM_DESC(send_implementation_id,
2647 "Send implementation ID with NFSv4.1 exchange_id"); 2684 "Send implementation ID with NFSv4.1 exchange_id");
2685MODULE_ALIAS("nfs4");
2686
2648#endif /* CONFIG_NFS_V4 */ 2687#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5829d0ce7cfb..e3b55372726c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1814,19 +1814,19 @@ int __init nfs_init_writepagecache(void)
1814 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1814 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
1815 nfs_wdata_cachep); 1815 nfs_wdata_cachep);
1816 if (nfs_wdata_mempool == NULL) 1816 if (nfs_wdata_mempool == NULL)
1817 return -ENOMEM; 1817 goto out_destroy_write_cache;
1818 1818
1819 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", 1819 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
1820 sizeof(struct nfs_commit_data), 1820 sizeof(struct nfs_commit_data),
1821 0, SLAB_HWCACHE_ALIGN, 1821 0, SLAB_HWCACHE_ALIGN,
1822 NULL); 1822 NULL);
1823 if (nfs_cdata_cachep == NULL) 1823 if (nfs_cdata_cachep == NULL)
1824 return -ENOMEM; 1824 goto out_destroy_write_mempool;
1825 1825
1826 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1826 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1827 nfs_wdata_cachep); 1827 nfs_wdata_cachep);
1828 if (nfs_commit_mempool == NULL) 1828 if (nfs_commit_mempool == NULL)
1829 return -ENOMEM; 1829 goto out_destroy_commit_cache;
1830 1830
1831 /* 1831 /*
1832 * NFS congestion size, scale with available memory. 1832 * NFS congestion size, scale with available memory.
@@ -1849,11 +1849,20 @@ int __init nfs_init_writepagecache(void)
1849 nfs_congestion_kb = 256*1024; 1849 nfs_congestion_kb = 256*1024;
1850 1850
1851 return 0; 1851 return 0;
1852
1853out_destroy_commit_cache:
1854 kmem_cache_destroy(nfs_cdata_cachep);
1855out_destroy_write_mempool:
1856 mempool_destroy(nfs_wdata_mempool);
1857out_destroy_write_cache:
1858 kmem_cache_destroy(nfs_wdata_cachep);
1859 return -ENOMEM;
1852} 1860}
1853 1861
1854void nfs_destroy_writepagecache(void) 1862void nfs_destroy_writepagecache(void)
1855{ 1863{
1856 mempool_destroy(nfs_commit_mempool); 1864 mempool_destroy(nfs_commit_mempool);
1865 kmem_cache_destroy(nfs_cdata_cachep);
1857 mempool_destroy(nfs_wdata_mempool); 1866 mempool_destroy(nfs_wdata_mempool);
1858 kmem_cache_destroy(nfs_wdata_cachep); 1867 kmem_cache_destroy(nfs_wdata_cachep);
1859} 1868}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 412b888faecb..d0237f872cc4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2851,8 +2851,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
2851 return -ENOMEM; 2851 return -ENOMEM;
2852 } 2852 }
2853 fp->fi_lease = fl; 2853 fp->fi_lease = fl;
2854 fp->fi_deleg_file = fl->fl_file; 2854 fp->fi_deleg_file = get_file(fl->fl_file);
2855 get_file(fp->fi_deleg_file);
2856 atomic_set(&fp->fi_delegees, 1); 2855 atomic_set(&fp->fi_delegees, 1);
2857 list_add(&dp->dl_perfile, &fp->fi_delegations); 2856 list_add(&dp->dl_perfile, &fp->fi_delegations);
2858 return 0; 2857 return 0;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e76a17e003c5..c120b48ec305 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -480,7 +480,7 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
480 if (buf == NULL) 480 if (buf == NULL)
481 goto out; 481 goto out;
482 482
483 len = posix_acl_to_xattr(pacl, buf, buflen); 483 len = posix_acl_to_xattr(&init_user_ns, pacl, buf, buflen);
484 if (len < 0) { 484 if (len < 0) {
485 error = len; 485 error = len;
486 goto out; 486 goto out;
@@ -549,7 +549,7 @@ _get_posix_acl(struct dentry *dentry, char *key)
549 if (buflen <= 0) 549 if (buflen <= 0)
550 return ERR_PTR(buflen); 550 return ERR_PTR(buflen);
551 551
552 pacl = posix_acl_from_xattr(buf, buflen); 552 pacl = posix_acl_from_xattr(&init_user_ns, buf, buflen);
553 kfree(buf); 553 kfree(buf);
554 return pacl; 554 return pacl;
555} 555}
@@ -2264,7 +2264,7 @@ nfsd_get_posix_acl(struct svc_fh *fhp, int type)
2264 if (size < 0) 2264 if (size < 0)
2265 return ERR_PTR(size); 2265 return ERR_PTR(size);
2266 2266
2267 acl = posix_acl_from_xattr(value, size); 2267 acl = posix_acl_from_xattr(&init_user_ns, value, size);
2268 kfree(value); 2268 kfree(value);
2269 return acl; 2269 return acl;
2270} 2270}
@@ -2297,7 +2297,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2297 value = kmalloc(size, GFP_KERNEL); 2297 value = kmalloc(size, GFP_KERNEL);
2298 if (!value) 2298 if (!value)
2299 return -ENOMEM; 2299 return -ENOMEM;
2300 error = posix_acl_to_xattr(acl, value, size); 2300 error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
2301 if (error < 0) 2301 if (error < 0)
2302 goto getout; 2302 goto getout;
2303 size = error; 2303 size = error;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a4d56ac02e6c..16f35f7423c5 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -116,6 +116,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
116 if (unlikely(ret)) 116 if (unlikely(ret))
117 goto out; 117 goto out;
118 118
119 file_update_time(vma->vm_file);
119 ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); 120 ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
120 if (ret) { 121 if (ret) {
121 nilfs_transaction_abort(inode->i_sb); 122 nilfs_transaction_abort(inode->i_sb);
@@ -134,13 +135,13 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
134static const struct vm_operations_struct nilfs_file_vm_ops = { 135static const struct vm_operations_struct nilfs_file_vm_ops = {
135 .fault = filemap_fault, 136 .fault = filemap_fault,
136 .page_mkwrite = nilfs_page_mkwrite, 137 .page_mkwrite = nilfs_page_mkwrite,
138 .remap_pages = generic_file_remap_pages,
137}; 139};
138 140
139static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) 141static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
140{ 142{
141 file_accessed(file); 143 file_accessed(file);
142 vma->vm_ops = &nilfs_file_vm_ops; 144 vma->vm_ops = &nilfs_file_vm_ops;
143 vma->vm_flags |= VM_CAN_NONLINEAR;
144 return 0; 145 return 0;
145} 146}
146 147
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6e2c3db976b2..4d31d2cca7fd 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -401,8 +401,8 @@ int nilfs_read_inode_common(struct inode *inode,
401 int err; 401 int err;
402 402
403 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 403 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
404 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); 404 i_uid_write(inode, le32_to_cpu(raw_inode->i_uid));
405 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); 405 i_gid_write(inode, le32_to_cpu(raw_inode->i_gid));
406 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 406 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
407 inode->i_size = le64_to_cpu(raw_inode->i_size); 407 inode->i_size = le64_to_cpu(raw_inode->i_size);
408 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 408 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
@@ -590,8 +590,8 @@ void nilfs_write_inode_common(struct inode *inode,
590 struct nilfs_inode_info *ii = NILFS_I(inode); 590 struct nilfs_inode_info *ii = NILFS_I(inode);
591 591
592 raw_inode->i_mode = cpu_to_le16(inode->i_mode); 592 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
593 raw_inode->i_uid = cpu_to_le32(inode->i_uid); 593 raw_inode->i_uid = cpu_to_le32(i_uid_read(inode));
594 raw_inode->i_gid = cpu_to_le32(inode->i_gid); 594 raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
595 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 595 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
596 raw_inode->i_size = cpu_to_le64(inode->i_size); 596 raw_inode->i_size = cpu_to_le64(inode->i_size);
597 raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 597 raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6a10812711c1..3c991dc84f2f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1382,6 +1382,12 @@ static void nilfs_segbuf_init_once(void *obj)
1382 1382
1383static void nilfs_destroy_cachep(void) 1383static void nilfs_destroy_cachep(void)
1384{ 1384{
1385 /*
1386 * Make sure all delayed rcu free inodes are flushed before we
1387 * destroy cache.
1388 */
1389 rcu_barrier();
1390
1385 if (nilfs_inode_cachep) 1391 if (nilfs_inode_cachep)
1386 kmem_cache_destroy(nilfs_inode_cachep); 1392 kmem_cache_destroy(nilfs_inode_cachep);
1387 if (nilfs_transaction_cachep) 1393 if (nilfs_transaction_cachep)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index d43803669739..721d692fa8d4 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -58,7 +58,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
58 return fsnotify_remove_notify_event(group); 58 return fsnotify_remove_notify_event(group);
59} 59}
60 60
61static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) 61static int create_fd(struct fsnotify_group *group,
62 struct fsnotify_event *event,
63 struct file **file)
62{ 64{
63 int client_fd; 65 int client_fd;
64 struct file *new_file; 66 struct file *new_file;
@@ -98,7 +100,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
98 put_unused_fd(client_fd); 100 put_unused_fd(client_fd);
99 client_fd = PTR_ERR(new_file); 101 client_fd = PTR_ERR(new_file);
100 } else { 102 } else {
101 fd_install(client_fd, new_file); 103 *file = new_file;
102 } 104 }
103 105
104 return client_fd; 106 return client_fd;
@@ -106,13 +108,15 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
106 108
107static int fill_event_metadata(struct fsnotify_group *group, 109static int fill_event_metadata(struct fsnotify_group *group,
108 struct fanotify_event_metadata *metadata, 110 struct fanotify_event_metadata *metadata,
109 struct fsnotify_event *event) 111 struct fsnotify_event *event,
112 struct file **file)
110{ 113{
111 int ret = 0; 114 int ret = 0;
112 115
113 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 116 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
114 group, metadata, event); 117 group, metadata, event);
115 118
119 *file = NULL;
116 metadata->event_len = FAN_EVENT_METADATA_LEN; 120 metadata->event_len = FAN_EVENT_METADATA_LEN;
117 metadata->metadata_len = FAN_EVENT_METADATA_LEN; 121 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
118 metadata->vers = FANOTIFY_METADATA_VERSION; 122 metadata->vers = FANOTIFY_METADATA_VERSION;
@@ -121,7 +125,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
121 if (unlikely(event->mask & FAN_Q_OVERFLOW)) 125 if (unlikely(event->mask & FAN_Q_OVERFLOW))
122 metadata->fd = FAN_NOFD; 126 metadata->fd = FAN_NOFD;
123 else { 127 else {
124 metadata->fd = create_fd(group, event); 128 metadata->fd = create_fd(group, event, file);
125 if (metadata->fd < 0) 129 if (metadata->fd < 0)
126 ret = metadata->fd; 130 ret = metadata->fd;
127 } 131 }
@@ -220,25 +224,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
220 return 0; 224 return 0;
221} 225}
222 226
223static void remove_access_response(struct fsnotify_group *group,
224 struct fsnotify_event *event,
225 __s32 fd)
226{
227 struct fanotify_response_event *re;
228
229 if (!(event->mask & FAN_ALL_PERM_EVENTS))
230 return;
231
232 re = dequeue_re(group, fd);
233 if (!re)
234 return;
235
236 BUG_ON(re->event != event);
237
238 kmem_cache_free(fanotify_response_event_cache, re);
239
240 return;
241}
242#else 227#else
243static int prepare_for_access_response(struct fsnotify_group *group, 228static int prepare_for_access_response(struct fsnotify_group *group,
244 struct fsnotify_event *event, 229 struct fsnotify_event *event,
@@ -247,12 +232,6 @@ static int prepare_for_access_response(struct fsnotify_group *group,
247 return 0; 232 return 0;
248} 233}
249 234
250static void remove_access_response(struct fsnotify_group *group,
251 struct fsnotify_event *event,
252 __s32 fd)
253{
254 return;
255}
256#endif 235#endif
257 236
258static ssize_t copy_event_to_user(struct fsnotify_group *group, 237static ssize_t copy_event_to_user(struct fsnotify_group *group,
@@ -260,31 +239,33 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
260 char __user *buf) 239 char __user *buf)
261{ 240{
262 struct fanotify_event_metadata fanotify_event_metadata; 241 struct fanotify_event_metadata fanotify_event_metadata;
242 struct file *f;
263 int fd, ret; 243 int fd, ret;
264 244
265 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 245 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
266 246
267 ret = fill_event_metadata(group, &fanotify_event_metadata, event); 247 ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
268 if (ret < 0) 248 if (ret < 0)
269 goto out; 249 goto out;
270 250
271 fd = fanotify_event_metadata.fd; 251 fd = fanotify_event_metadata.fd;
272 ret = prepare_for_access_response(group, event, fd);
273 if (ret)
274 goto out_close_fd;
275
276 ret = -EFAULT; 252 ret = -EFAULT;
277 if (copy_to_user(buf, &fanotify_event_metadata, 253 if (copy_to_user(buf, &fanotify_event_metadata,
278 fanotify_event_metadata.event_len)) 254 fanotify_event_metadata.event_len))
279 goto out_kill_access_response; 255 goto out_close_fd;
280 256
257 ret = prepare_for_access_response(group, event, fd);
258 if (ret)
259 goto out_close_fd;
260
261 fd_install(fd, f);
281 return fanotify_event_metadata.event_len; 262 return fanotify_event_metadata.event_len;
282 263
283out_kill_access_response:
284 remove_access_response(group, event, fd);
285out_close_fd: 264out_close_fd:
286 if (fd != FAN_NOFD) 265 if (fd != FAN_NOFD) {
287 sys_close(fd); 266 put_unused_fd(fd);
267 fput(f);
268 }
288out: 269out:
289#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 270#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
290 if (event->mask & FAN_ALL_PERM_EVENTS) { 271 if (event->mask & FAN_ALL_PERM_EVENTS) {
@@ -470,24 +451,22 @@ static int fanotify_find_path(int dfd, const char __user *filename,
470 dfd, filename, flags); 451 dfd, filename, flags);
471 452
472 if (filename == NULL) { 453 if (filename == NULL) {
473 struct file *file; 454 struct fd f = fdget(dfd);
474 int fput_needed;
475 455
476 ret = -EBADF; 456 ret = -EBADF;
477 file = fget_light(dfd, &fput_needed); 457 if (!f.file)
478 if (!file)
479 goto out; 458 goto out;
480 459
481 ret = -ENOTDIR; 460 ret = -ENOTDIR;
482 if ((flags & FAN_MARK_ONLYDIR) && 461 if ((flags & FAN_MARK_ONLYDIR) &&
483 !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { 462 !(S_ISDIR(f.file->f_path.dentry->d_inode->i_mode))) {
484 fput_light(file, fput_needed); 463 fdput(f);
485 goto out; 464 goto out;
486 } 465 }
487 466
488 *path = file->f_path; 467 *path = f.file->f_path;
489 path_get(path); 468 path_get(path);
490 fput_light(file, fput_needed); 469 fdput(f);
491 } else { 470 } else {
492 unsigned int lookup_flags = 0; 471 unsigned int lookup_flags = 0;
493 472
@@ -767,9 +746,9 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
767 struct inode *inode = NULL; 746 struct inode *inode = NULL;
768 struct vfsmount *mnt = NULL; 747 struct vfsmount *mnt = NULL;
769 struct fsnotify_group *group; 748 struct fsnotify_group *group;
770 struct file *filp; 749 struct fd f;
771 struct path path; 750 struct path path;
772 int ret, fput_needed; 751 int ret;
773 752
774 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", 753 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
775 __func__, fanotify_fd, flags, dfd, pathname, mask); 754 __func__, fanotify_fd, flags, dfd, pathname, mask);
@@ -803,15 +782,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
803#endif 782#endif
804 return -EINVAL; 783 return -EINVAL;
805 784
806 filp = fget_light(fanotify_fd, &fput_needed); 785 f = fdget(fanotify_fd);
807 if (unlikely(!filp)) 786 if (unlikely(!f.file))
808 return -EBADF; 787 return -EBADF;
809 788
810 /* verify that this is indeed an fanotify instance */ 789 /* verify that this is indeed an fanotify instance */
811 ret = -EINVAL; 790 ret = -EINVAL;
812 if (unlikely(filp->f_op != &fanotify_fops)) 791 if (unlikely(f.file->f_op != &fanotify_fops))
813 goto fput_and_out; 792 goto fput_and_out;
814 group = filp->private_data; 793 group = f.file->private_data;
815 794
816 /* 795 /*
817 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not 796 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF. These are not
@@ -858,7 +837,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
858 837
859 path_put(&path); 838 path_put(&path);
860fput_and_out: 839fput_and_out:
861 fput_light(filp, fput_needed); 840 fdput(f);
862 return ret; 841 return ret;
863} 842}
864 843
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985c..c311dda054a3 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -757,16 +757,16 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
757 struct fsnotify_group *group; 757 struct fsnotify_group *group;
758 struct inode *inode; 758 struct inode *inode;
759 struct path path; 759 struct path path;
760 struct file *filp; 760 struct fd f;
761 int ret, fput_needed; 761 int ret;
762 unsigned flags = 0; 762 unsigned flags = 0;
763 763
764 filp = fget_light(fd, &fput_needed); 764 f = fdget(fd);
765 if (unlikely(!filp)) 765 if (unlikely(!f.file))
766 return -EBADF; 766 return -EBADF;
767 767
768 /* verify that this is indeed an inotify instance */ 768 /* verify that this is indeed an inotify instance */
769 if (unlikely(filp->f_op != &inotify_fops)) { 769 if (unlikely(f.file->f_op != &inotify_fops)) {
770 ret = -EINVAL; 770 ret = -EINVAL;
771 goto fput_and_out; 771 goto fput_and_out;
772 } 772 }
@@ -782,13 +782,13 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
782 782
783 /* inode held in place by reference to path; group by fget on fd */ 783 /* inode held in place by reference to path; group by fget on fd */
784 inode = path.dentry->d_inode; 784 inode = path.dentry->d_inode;
785 group = filp->private_data; 785 group = f.file->private_data;
786 786
787 /* create/update an inode mark */ 787 /* create/update an inode mark */
788 ret = inotify_update_watch(group, inode, mask); 788 ret = inotify_update_watch(group, inode, mask);
789 path_put(&path); 789 path_put(&path);
790fput_and_out: 790fput_and_out:
791 fput_light(filp, fput_needed); 791 fdput(f);
792 return ret; 792 return ret;
793} 793}
794 794
@@ -796,19 +796,19 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
796{ 796{
797 struct fsnotify_group *group; 797 struct fsnotify_group *group;
798 struct inotify_inode_mark *i_mark; 798 struct inotify_inode_mark *i_mark;
799 struct file *filp; 799 struct fd f;
800 int ret = 0, fput_needed; 800 int ret = 0;
801 801
802 filp = fget_light(fd, &fput_needed); 802 f = fdget(fd);
803 if (unlikely(!filp)) 803 if (unlikely(!f.file))
804 return -EBADF; 804 return -EBADF;
805 805
806 /* verify that this is indeed an inotify instance */ 806 /* verify that this is indeed an inotify instance */
807 ret = -EINVAL; 807 ret = -EINVAL;
808 if (unlikely(filp->f_op != &inotify_fops)) 808 if (unlikely(f.file->f_op != &inotify_fops))
809 goto out; 809 goto out;
810 810
811 group = filp->private_data; 811 group = f.file->private_data;
812 812
813 ret = -EINVAL; 813 ret = -EINVAL;
814 i_mark = inotify_idr_find(group, wd); 814 i_mark = inotify_idr_find(group, wd);
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
823 fsnotify_put_mark(&i_mark->fsn_mark); 823 fsnotify_put_mark(&i_mark->fsn_mark);
824 824
825out: 825out:
826 fput_light(filp, fput_needed); 826 fdput(f);
827 return ret; 827 return ret;
828} 828}
829 829
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c6dbd3db6ca8..1d27331e6fc9 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2124,7 +2124,8 @@ int ntfs_read_inode_mount(struct inode *vi)
2124 * ntfs_read_inode() will have set up the default ones. 2124 * ntfs_read_inode() will have set up the default ones.
2125 */ 2125 */
2126 /* Set uid and gid to root. */ 2126 /* Set uid and gid to root. */
2127 vi->i_uid = vi->i_gid = 0; 2127 vi->i_uid = GLOBAL_ROOT_UID;
2128 vi->i_gid = GLOBAL_ROOT_GID;
2128 /* Regular file. No access for anyone. */ 2129 /* Regular file. No access for anyone. */
2129 vi->i_mode = S_IFREG; 2130 vi->i_mode = S_IFREG;
2130 /* No VFS initiated operations allowed for $MFT. */ 2131 /* No VFS initiated operations allowed for $MFT. */
@@ -2312,8 +2313,8 @@ int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2312 ntfs_volume *vol = NTFS_SB(root->d_sb); 2313 ntfs_volume *vol = NTFS_SB(root->d_sb);
2313 int i; 2314 int i;
2314 2315
2315 seq_printf(sf, ",uid=%i", vol->uid); 2316 seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
2316 seq_printf(sf, ",gid=%i", vol->gid); 2317 seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
2317 if (vol->fmask == vol->dmask) 2318 if (vol->fmask == vol->dmask)
2318 seq_printf(sf, ",umask=0%o", vol->fmask); 2319 seq_printf(sf, ",umask=0%o", vol->fmask);
2319 else { 2320 else {
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 2bc149d6a784..4a8289f8b16c 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -102,8 +102,8 @@ static bool parse_options(ntfs_volume *vol, char *opt)
102 char *p, *v, *ov; 102 char *p, *v, *ov;
103 static char *utf8 = "utf8"; 103 static char *utf8 = "utf8";
104 int errors = 0, sloppy = 0; 104 int errors = 0, sloppy = 0;
105 uid_t uid = (uid_t)-1; 105 kuid_t uid = INVALID_UID;
106 gid_t gid = (gid_t)-1; 106 kgid_t gid = INVALID_GID;
107 umode_t fmask = (umode_t)-1, dmask = (umode_t)-1; 107 umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
108 int mft_zone_multiplier = -1, on_errors = -1; 108 int mft_zone_multiplier = -1, on_errors = -1;
109 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; 109 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
@@ -128,6 +128,30 @@ static bool parse_options(ntfs_volume *vol, char *opt)
128 if (*v) \ 128 if (*v) \
129 goto needs_val; \ 129 goto needs_val; \
130 } 130 }
131#define NTFS_GETOPT_UID(option, variable) \
132 if (!strcmp(p, option)) { \
133 uid_t uid_value; \
134 if (!v || !*v) \
135 goto needs_arg; \
136 uid_value = simple_strtoul(ov = v, &v, 0); \
137 if (*v) \
138 goto needs_val; \
139 variable = make_kuid(current_user_ns(), uid_value); \
140 if (!uid_valid(variable)) \
141 goto needs_val; \
142 }
143#define NTFS_GETOPT_GID(option, variable) \
144 if (!strcmp(p, option)) { \
145 gid_t gid_value; \
146 if (!v || !*v) \
147 goto needs_arg; \
148 gid_value = simple_strtoul(ov = v, &v, 0); \
149 if (*v) \
150 goto needs_val; \
151 variable = make_kgid(current_user_ns(), gid_value); \
152 if (!gid_valid(variable)) \
153 goto needs_val; \
154 }
131#define NTFS_GETOPT_OCTAL(option, variable) \ 155#define NTFS_GETOPT_OCTAL(option, variable) \
132 if (!strcmp(p, option)) { \ 156 if (!strcmp(p, option)) { \
133 if (!v || !*v) \ 157 if (!v || !*v) \
@@ -165,8 +189,8 @@ static bool parse_options(ntfs_volume *vol, char *opt)
165 while ((p = strsep(&opt, ","))) { 189 while ((p = strsep(&opt, ","))) {
166 if ((v = strchr(p, '='))) 190 if ((v = strchr(p, '=')))
167 *v++ = 0; 191 *v++ = 0;
168 NTFS_GETOPT("uid", uid) 192 NTFS_GETOPT_UID("uid", uid)
169 else NTFS_GETOPT("gid", gid) 193 else NTFS_GETOPT_GID("gid", gid)
170 else NTFS_GETOPT_OCTAL("umask", fmask = dmask) 194 else NTFS_GETOPT_OCTAL("umask", fmask = dmask)
171 else NTFS_GETOPT_OCTAL("fmask", fmask) 195 else NTFS_GETOPT_OCTAL("fmask", fmask)
172 else NTFS_GETOPT_OCTAL("dmask", dmask) 196 else NTFS_GETOPT_OCTAL("dmask", dmask)
@@ -283,9 +307,9 @@ no_mount_options:
283 vol->on_errors = on_errors; 307 vol->on_errors = on_errors;
284 if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) 308 if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER)
285 vol->on_errors |= ON_ERRORS_CONTINUE; 309 vol->on_errors |= ON_ERRORS_CONTINUE;
286 if (uid != (uid_t)-1) 310 if (uid_valid(uid))
287 vol->uid = uid; 311 vol->uid = uid;
288 if (gid != (gid_t)-1) 312 if (gid_valid(gid))
289 vol->gid = gid; 313 vol->gid = gid;
290 if (fmask != (umode_t)-1) 314 if (fmask != (umode_t)-1)
291 vol->fmask = fmask; 315 vol->fmask = fmask;
@@ -1023,7 +1047,8 @@ static bool load_and_init_mft_mirror(ntfs_volume *vol)
1023 * ntfs_read_inode() will have set up the default ones. 1047 * ntfs_read_inode() will have set up the default ones.
1024 */ 1048 */
1025 /* Set uid and gid to root. */ 1049 /* Set uid and gid to root. */
1026 tmp_ino->i_uid = tmp_ino->i_gid = 0; 1050 tmp_ino->i_uid = GLOBAL_ROOT_UID;
1051 tmp_ino->i_gid = GLOBAL_ROOT_GID;
1027 /* Regular file. No access for anyone. */ 1052 /* Regular file. No access for anyone. */
1028 tmp_ino->i_mode = S_IFREG; 1053 tmp_ino->i_mode = S_IFREG;
1029 /* No VFS initiated operations allowed for $MFTMirr. */ 1054 /* No VFS initiated operations allowed for $MFTMirr. */
@@ -3168,6 +3193,12 @@ static void __exit exit_ntfs_fs(void)
3168 ntfs_debug("Unregistering NTFS driver."); 3193 ntfs_debug("Unregistering NTFS driver.");
3169 3194
3170 unregister_filesystem(&ntfs_fs_type); 3195 unregister_filesystem(&ntfs_fs_type);
3196
3197 /*
3198 * Make sure all delayed rcu free inodes are flushed before we
3199 * destroy cache.
3200 */
3201 rcu_barrier();
3171 kmem_cache_destroy(ntfs_big_inode_cache); 3202 kmem_cache_destroy(ntfs_big_inode_cache);
3172 kmem_cache_destroy(ntfs_inode_cache); 3203 kmem_cache_destroy(ntfs_inode_cache);
3173 kmem_cache_destroy(ntfs_name_cache); 3204 kmem_cache_destroy(ntfs_name_cache);
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 15e3ba8d521a..4f579b02bc76 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -25,6 +25,7 @@
25#define _LINUX_NTFS_VOLUME_H 25#define _LINUX_NTFS_VOLUME_H
26 26
27#include <linux/rwsem.h> 27#include <linux/rwsem.h>
28#include <linux/uidgid.h>
28 29
29#include "types.h" 30#include "types.h"
30#include "layout.h" 31#include "layout.h"
@@ -46,8 +47,8 @@ typedef struct {
46 sized blocks on the device. */ 47 sized blocks on the device. */
47 /* Configuration provided by user at mount time. */ 48 /* Configuration provided by user at mount time. */
48 unsigned long flags; /* Miscellaneous flags, see below. */ 49 unsigned long flags; /* Miscellaneous flags, see below. */
49 uid_t uid; /* uid that files will be mounted as. */ 50 kuid_t uid; /* uid that files will be mounted as. */
50 gid_t gid; /* gid that files will be mounted as. */ 51 kgid_t gid; /* gid that files will be mounted as. */
51 umode_t fmask; /* The mask for file permissions. */ 52 umode_t fmask; /* The mask for file permissions. */
52 umode_t dmask; /* The mask for directory 53 umode_t dmask; /* The mask for directory
53 permissions. */ 54 permissions. */
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index a7219075b4de..260b16281fc3 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -452,7 +452,7 @@ static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
452 return PTR_ERR(acl); 452 return PTR_ERR(acl);
453 if (acl == NULL) 453 if (acl == NULL)
454 return -ENODATA; 454 return -ENODATA;
455 ret = posix_acl_to_xattr(acl, buffer, size); 455 ret = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
456 posix_acl_release(acl); 456 posix_acl_release(acl);
457 457
458 return ret; 458 return ret;
@@ -475,7 +475,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
475 return -EPERM; 475 return -EPERM;
476 476
477 if (value) { 477 if (value) {
478 acl = posix_acl_from_xattr(value, size); 478 acl = posix_acl_from_xattr(&init_user_ns, value, size);
479 if (IS_ERR(acl)) 479 if (IS_ERR(acl))
480 return PTR_ERR(acl); 480 return PTR_ERR(acl);
481 else if (acl) { 481 else if (acl) {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a4e855e3690e..f7c648d7d6bf 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1746,8 +1746,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1746 long fd; 1746 long fd;
1747 int sectsize; 1747 int sectsize;
1748 char *p = (char *)page; 1748 char *p = (char *)page;
1749 struct file *filp = NULL; 1749 struct fd f;
1750 struct inode *inode = NULL; 1750 struct inode *inode;
1751 ssize_t ret = -EINVAL; 1751 ssize_t ret = -EINVAL;
1752 int live_threshold; 1752 int live_threshold;
1753 1753
@@ -1766,26 +1766,26 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1766 if (fd < 0 || fd >= INT_MAX) 1766 if (fd < 0 || fd >= INT_MAX)
1767 goto out; 1767 goto out;
1768 1768
1769 filp = fget(fd); 1769 f = fdget(fd);
1770 if (filp == NULL) 1770 if (f.file == NULL)
1771 goto out; 1771 goto out;
1772 1772
1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || 1773 if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
1774 reg->hr_block_bytes == 0) 1774 reg->hr_block_bytes == 0)
1775 goto out; 1775 goto out2;
1776 1776
1777 inode = igrab(filp->f_mapping->host); 1777 inode = igrab(f.file->f_mapping->host);
1778 if (inode == NULL) 1778 if (inode == NULL)
1779 goto out; 1779 goto out2;
1780 1780
1781 if (!S_ISBLK(inode->i_mode)) 1781 if (!S_ISBLK(inode->i_mode))
1782 goto out; 1782 goto out3;
1783 1783
1784 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1784 reg->hr_bdev = I_BDEV(f.file->f_mapping->host);
1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); 1785 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL);
1786 if (ret) { 1786 if (ret) {
1787 reg->hr_bdev = NULL; 1787 reg->hr_bdev = NULL;
1788 goto out; 1788 goto out3;
1789 } 1789 }
1790 inode = NULL; 1790 inode = NULL;
1791 1791
@@ -1797,7 +1797,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1797 "blocksize %u incorrect for device, expected %d", 1797 "blocksize %u incorrect for device, expected %d",
1798 reg->hr_block_bytes, sectsize); 1798 reg->hr_block_bytes, sectsize);
1799 ret = -EINVAL; 1799 ret = -EINVAL;
1800 goto out; 1800 goto out3;
1801 } 1801 }
1802 1802
1803 o2hb_init_region_params(reg); 1803 o2hb_init_region_params(reg);
@@ -1811,13 +1811,13 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1811 ret = o2hb_map_slot_data(reg); 1811 ret = o2hb_map_slot_data(reg);
1812 if (ret) { 1812 if (ret) {
1813 mlog_errno(ret); 1813 mlog_errno(ret);
1814 goto out; 1814 goto out3;
1815 } 1815 }
1816 1816
1817 ret = o2hb_populate_slot_data(reg); 1817 ret = o2hb_populate_slot_data(reg);
1818 if (ret) { 1818 if (ret) {
1819 mlog_errno(ret); 1819 mlog_errno(ret);
1820 goto out; 1820 goto out3;
1821 } 1821 }
1822 1822
1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout); 1823 INIT_DELAYED_WORK(&reg->hr_write_timeout_work, o2hb_write_timeout);
@@ -1847,7 +1847,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1847 if (IS_ERR(hb_task)) { 1847 if (IS_ERR(hb_task)) {
1848 ret = PTR_ERR(hb_task); 1848 ret = PTR_ERR(hb_task);
1849 mlog_errno(ret); 1849 mlog_errno(ret);
1850 goto out; 1850 goto out3;
1851 } 1851 }
1852 1852
1853 spin_lock(&o2hb_live_lock); 1853 spin_lock(&o2hb_live_lock);
@@ -1863,7 +1863,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1863 1863
1864 if (reg->hr_aborted_start) { 1864 if (reg->hr_aborted_start) {
1865 ret = -EIO; 1865 ret = -EIO;
1866 goto out; 1866 goto out3;
1867 } 1867 }
1868 1868
1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */ 1869 /* Ok, we were woken. Make sure it wasn't by drop_item() */
@@ -1882,11 +1882,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", 1882 printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
1883 config_item_name(&reg->hr_item), reg->hr_dev_name); 1883 config_item_name(&reg->hr_item), reg->hr_dev_name);
1884 1884
1885out3:
1886 iput(inode);
1887out2:
1888 fdput(f);
1885out: 1889out:
1886 if (filp)
1887 fput(filp);
1888 if (inode)
1889 iput(inode);
1890 if (ret < 0) { 1890 if (ret < 0) {
1891 if (reg->hr_bdev) { 1891 if (reg->hr_bdev) {
1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); 1892 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 8f9cea1597af..c19897d0fe14 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -327,5 +327,5 @@ void o2quo_exit(void)
327{ 327{
328 struct o2quo_state *qs = &o2quo_state; 328 struct o2quo_state *qs = &o2quo_state;
329 329
330 flush_work_sync(&qs->qs_work); 330 flush_work(&qs->qs_work);
331} 331}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 83b6f98e0665..16b712d260d4 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -691,6 +691,11 @@ static void __exit exit_dlmfs_fs(void)
691 flush_workqueue(user_dlm_worker); 691 flush_workqueue(user_dlm_worker);
692 destroy_workqueue(user_dlm_worker); 692 destroy_workqueue(user_dlm_worker);
693 693
694 /*
695 * Make sure all delayed rcu free inodes are flushed before we
696 * destroy cache.
697 */
698 rcu_barrier();
694 kmem_cache_destroy(dlmfs_inode_cache); 699 kmem_cache_destroy(dlmfs_inode_cache);
695 700
696 bdi_destroy(&dlmfs_backing_dev_info); 701 bdi_destroy(&dlmfs_backing_dev_info);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46a1f6d75104..5a4ee77cec51 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1184,8 +1184,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1184 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid 1184 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
1185 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1185 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1186 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1186 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1187 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, 1187 transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
1188 USRQUOTA);
1189 if (!transfer_to[USRQUOTA]) { 1188 if (!transfer_to[USRQUOTA]) {
1190 status = -ESRCH; 1189 status = -ESRCH;
1191 goto bail_unlock; 1190 goto bail_unlock;
@@ -1194,8 +1193,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1194 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid 1193 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
1195 && OCFS2_HAS_RO_COMPAT_FEATURE(sb, 1194 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1196 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1195 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1197 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, 1196 transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
1198 GRPQUOTA);
1199 if (!transfer_to[GRPQUOTA]) { 1197 if (!transfer_to[GRPQUOTA]) {
1200 status = -ESRCH; 1198 status = -ESRCH;
1201 goto bail_unlock; 1199 goto bail_unlock;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index d150372fd81d..47a87dda54ce 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -173,6 +173,7 @@ out:
173static const struct vm_operations_struct ocfs2_file_vm_ops = { 173static const struct vm_operations_struct ocfs2_file_vm_ops = {
174 .fault = ocfs2_fault, 174 .fault = ocfs2_fault,
175 .page_mkwrite = ocfs2_page_mkwrite, 175 .page_mkwrite = ocfs2_page_mkwrite,
176 .remap_pages = generic_file_remap_pages,
176}; 177};
177 178
178int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 179int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
@@ -188,7 +189,6 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
188 ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level); 189 ocfs2_inode_unlock(file->f_dentry->d_inode, lock_level);
189out: 190out:
190 vma->vm_ops = &ocfs2_file_vm_ops; 191 vma->vm_ops = &ocfs2_file_vm_ops;
191 vma->vm_flags |= VM_CAN_NONLINEAR;
192 return 0; 192 return 0;
193} 193}
194 194
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 0a86e302655f..332a281f217e 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -95,7 +95,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
95 struct ocfs2_global_disk_dqblk *d = dp; 95 struct ocfs2_global_disk_dqblk *d = dp;
96 struct mem_dqblk *m = &dquot->dq_dqb; 96 struct mem_dqblk *m = &dquot->dq_dqb;
97 97
98 d->dqb_id = cpu_to_le32(dquot->dq_id); 98 d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
99 d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); 99 d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
100 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); 100 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
101 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); 101 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
@@ -112,11 +112,14 @@ static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
112{ 112{
113 struct ocfs2_global_disk_dqblk *d = dp; 113 struct ocfs2_global_disk_dqblk *d = dp;
114 struct ocfs2_mem_dqinfo *oinfo = 114 struct ocfs2_mem_dqinfo *oinfo =
115 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 115 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
116 116
117 if (qtree_entry_unused(&oinfo->dqi_gi, dp)) 117 if (qtree_entry_unused(&oinfo->dqi_gi, dp))
118 return 0; 118 return 0;
119 return le32_to_cpu(d->dqb_id) == dquot->dq_id; 119
120 return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type,
121 le32_to_cpu(d->dqb_id)),
122 dquot->dq_id);
120} 123}
121 124
122struct qtree_fmt_operations ocfs2_global_ops = { 125struct qtree_fmt_operations ocfs2_global_ops = {
@@ -475,7 +478,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
475{ 478{
476 int err, err2; 479 int err, err2;
477 struct super_block *sb = dquot->dq_sb; 480 struct super_block *sb = dquot->dq_sb;
478 int type = dquot->dq_type; 481 int type = dquot->dq_id.type;
479 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; 482 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
480 struct ocfs2_global_disk_dqblk dqblk; 483 struct ocfs2_global_disk_dqblk dqblk;
481 s64 spacechange, inodechange; 484 s64 spacechange, inodechange;
@@ -504,7 +507,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
504 olditime = dquot->dq_dqb.dqb_itime; 507 olditime = dquot->dq_dqb.dqb_itime;
505 oldbtime = dquot->dq_dqb.dqb_btime; 508 oldbtime = dquot->dq_dqb.dqb_btime;
506 ocfs2_global_disk2memdqb(dquot, &dqblk); 509 ocfs2_global_disk2memdqb(dquot, &dqblk);
507 trace_ocfs2_sync_dquot(dquot->dq_id, dquot->dq_dqb.dqb_curspace, 510 trace_ocfs2_sync_dquot(from_kqid(&init_user_ns, dquot->dq_id),
511 dquot->dq_dqb.dqb_curspace,
508 (long long)spacechange, 512 (long long)spacechange,
509 dquot->dq_dqb.dqb_curinodes, 513 dquot->dq_dqb.dqb_curinodes,
510 (long long)inodechange); 514 (long long)inodechange);
@@ -555,8 +559,8 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
555 err = ocfs2_qinfo_lock(info, freeing); 559 err = ocfs2_qinfo_lock(info, freeing);
556 if (err < 0) { 560 if (err < 0) {
557 mlog(ML_ERROR, "Failed to lock quota info, losing quota write" 561 mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
558 " (type=%d, id=%u)\n", dquot->dq_type, 562 " (type=%d, id=%u)\n", dquot->dq_id.type,
559 (unsigned)dquot->dq_id); 563 (unsigned)from_kqid(&init_user_ns, dquot->dq_id));
560 goto out; 564 goto out;
561 } 565 }
562 if (freeing) 566 if (freeing)
@@ -591,9 +595,10 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
591 struct ocfs2_super *osb = OCFS2_SB(sb); 595 struct ocfs2_super *osb = OCFS2_SB(sb);
592 int status = 0; 596 int status = 0;
593 597
594 trace_ocfs2_sync_dquot_helper(dquot->dq_id, dquot->dq_type, 598 trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id),
599 dquot->dq_id.type,
595 type, sb->s_id); 600 type, sb->s_id);
596 if (type != dquot->dq_type) 601 if (type != dquot->dq_id.type)
597 goto out; 602 goto out;
598 status = ocfs2_lock_global_qf(oinfo, 1); 603 status = ocfs2_lock_global_qf(oinfo, 1);
599 if (status < 0) 604 if (status < 0)
@@ -643,7 +648,8 @@ static int ocfs2_write_dquot(struct dquot *dquot)
643 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); 648 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
644 int status = 0; 649 int status = 0;
645 650
646 trace_ocfs2_write_dquot(dquot->dq_id, dquot->dq_type); 651 trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id),
652 dquot->dq_id.type);
647 653
648 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); 654 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
649 if (IS_ERR(handle)) { 655 if (IS_ERR(handle)) {
@@ -677,11 +683,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
677{ 683{
678 handle_t *handle; 684 handle_t *handle;
679 struct ocfs2_mem_dqinfo *oinfo = 685 struct ocfs2_mem_dqinfo *oinfo =
680 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 686 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
681 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); 687 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
682 int status = 0; 688 int status = 0;
683 689
684 trace_ocfs2_release_dquot(dquot->dq_id, dquot->dq_type); 690 trace_ocfs2_release_dquot(from_kqid(&init_user_ns, dquot->dq_id),
691 dquot->dq_id.type);
685 692
686 mutex_lock(&dquot->dq_lock); 693 mutex_lock(&dquot->dq_lock);
687 /* Check whether we are not racing with some other dqget() */ 694 /* Check whether we are not racing with some other dqget() */
@@ -691,7 +698,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
691 if (status < 0) 698 if (status < 0)
692 goto out; 699 goto out;
693 handle = ocfs2_start_trans(osb, 700 handle = ocfs2_start_trans(osb,
694 ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type)); 701 ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type));
695 if (IS_ERR(handle)) { 702 if (IS_ERR(handle)) {
696 status = PTR_ERR(handle); 703 status = PTR_ERR(handle);
697 mlog_errno(status); 704 mlog_errno(status);
@@ -733,13 +740,14 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
733 int ex = 0; 740 int ex = 0;
734 struct super_block *sb = dquot->dq_sb; 741 struct super_block *sb = dquot->dq_sb;
735 struct ocfs2_super *osb = OCFS2_SB(sb); 742 struct ocfs2_super *osb = OCFS2_SB(sb);
736 int type = dquot->dq_type; 743 int type = dquot->dq_id.type;
737 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; 744 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
738 struct inode *gqinode = info->dqi_gqinode; 745 struct inode *gqinode = info->dqi_gqinode;
739 int need_alloc = ocfs2_global_qinit_alloc(sb, type); 746 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
740 handle_t *handle; 747 handle_t *handle;
741 748
742 trace_ocfs2_acquire_dquot(dquot->dq_id, type); 749 trace_ocfs2_acquire_dquot(from_kqid(&init_user_ns, dquot->dq_id),
750 type);
743 mutex_lock(&dquot->dq_lock); 751 mutex_lock(&dquot->dq_lock);
744 /* 752 /*
745 * We need an exclusive lock, because we're going to update use count 753 * We need an exclusive lock, because we're going to update use count
@@ -821,12 +829,13 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
821 int sync = 0; 829 int sync = 0;
822 int status; 830 int status;
823 struct super_block *sb = dquot->dq_sb; 831 struct super_block *sb = dquot->dq_sb;
824 int type = dquot->dq_type; 832 int type = dquot->dq_id.type;
825 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; 833 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
826 handle_t *handle; 834 handle_t *handle;
827 struct ocfs2_super *osb = OCFS2_SB(sb); 835 struct ocfs2_super *osb = OCFS2_SB(sb);
828 836
829 trace_ocfs2_mark_dquot_dirty(dquot->dq_id, type); 837 trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id),
838 type);
830 839
831 /* In case user set some limits, sync dquot immediately to global 840 /* In case user set some limits, sync dquot immediately to global
832 * quota file so that information propagates quicker */ 841 * quota file so that information propagates quicker */
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index f100bf70a906..27fe7ee4874c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -501,7 +501,9 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
501 } 501 }
502 dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data + 502 dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
503 ol_dqblk_block_off(sb, chunk, bit)); 503 ol_dqblk_block_off(sb, chunk, bit));
504 dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type); 504 dquot = dqget(sb,
505 make_kqid(&init_user_ns, type,
506 le64_to_cpu(dqblk->dqb_id)));
505 if (!dquot) { 507 if (!dquot) {
506 status = -EIO; 508 status = -EIO;
507 mlog(ML_ERROR, "Failed to get quota structure " 509 mlog(ML_ERROR, "Failed to get quota structure "
@@ -881,7 +883,8 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
881 dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data 883 dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
882 + ol_dqblk_block_offset(sb, od->dq_local_off)); 884 + ol_dqblk_block_offset(sb, od->dq_local_off));
883 885
884 dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); 886 dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns,
887 od->dq_dquot.dq_id));
885 spin_lock(&dq_data_lock); 888 spin_lock(&dq_data_lock);
886 dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - 889 dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
887 od->dq_origspace); 890 od->dq_origspace);
@@ -891,7 +894,7 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
891 trace_olq_set_dquot( 894 trace_olq_set_dquot(
892 (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod), 895 (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod),
893 (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod), 896 (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod),
894 od->dq_dquot.dq_id); 897 from_kqid(&init_user_ns, od->dq_dquot.dq_id));
895} 898}
896 899
897/* Write dquot to local quota file */ 900/* Write dquot to local quota file */
@@ -900,7 +903,7 @@ int ocfs2_local_write_dquot(struct dquot *dquot)
900 struct super_block *sb = dquot->dq_sb; 903 struct super_block *sb = dquot->dq_sb;
901 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 904 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
902 struct buffer_head *bh; 905 struct buffer_head *bh;
903 struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type]; 906 struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_id.type];
904 int status; 907 int status;
905 908
906 status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk, 909 status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk,
@@ -1221,7 +1224,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private)
1221int ocfs2_create_local_dquot(struct dquot *dquot) 1224int ocfs2_create_local_dquot(struct dquot *dquot)
1222{ 1225{
1223 struct super_block *sb = dquot->dq_sb; 1226 struct super_block *sb = dquot->dq_sb;
1224 int type = dquot->dq_type; 1227 int type = dquot->dq_id.type;
1225 struct inode *lqinode = sb_dqopt(sb)->files[type]; 1228 struct inode *lqinode = sb_dqopt(sb)->files[type];
1226 struct ocfs2_quota_chunk *chunk; 1229 struct ocfs2_quota_chunk *chunk;
1227 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 1230 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
@@ -1275,7 +1278,7 @@ out:
1275int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) 1278int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
1276{ 1279{
1277 int status; 1280 int status;
1278 int type = dquot->dq_type; 1281 int type = dquot->dq_id.type;
1279 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 1282 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1280 struct super_block *sb = dquot->dq_sb; 1283 struct super_block *sb = dquot->dq_sb;
1281 struct ocfs2_local_disk_chunk *dchunk; 1284 struct ocfs2_local_disk_chunk *dchunk;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 68f4541c2db9..0e91ec22a940 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1818,6 +1818,11 @@ static int ocfs2_initialize_mem_caches(void)
1818 1818
1819static void ocfs2_free_mem_caches(void) 1819static void ocfs2_free_mem_caches(void)
1820{ 1820{
1821 /*
1822 * Make sure all delayed rcu free inodes are flushed before we
1823 * destroy cache.
1824 */
1825 rcu_barrier();
1821 if (ocfs2_inode_cachep) 1826 if (ocfs2_inode_cachep)
1822 kmem_cache_destroy(ocfs2_inode_cachep); 1827 kmem_cache_destroy(ocfs2_inode_cachep);
1823 ocfs2_inode_cachep = NULL; 1828 ocfs2_inode_cachep = NULL;
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 2c6d95257a4d..77e3cb2962b4 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -146,8 +146,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
146 be64_to_cpu(entry->e_blocks); 146 be64_to_cpu(entry->e_blocks);
147 147
148 if (omfs_allocate_block(inode->i_sb, new_block)) { 148 if (omfs_allocate_block(inode->i_sb, new_block)) {
149 entry->e_blocks = 149 be64_add_cpu(&entry->e_blocks, 1);
150 cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1);
151 terminator->e_blocks = ~(cpu_to_be64( 150 terminator->e_blocks = ~(cpu_to_be64(
152 be64_to_cpu(~terminator->e_blocks) + 1)); 151 be64_to_cpu(~terminator->e_blocks) + 1));
153 goto out; 152 goto out;
@@ -177,7 +176,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
177 be64_to_cpu(~terminator->e_blocks) + (u64) new_count)); 176 be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
178 177
179 /* write in new entry */ 178 /* write in new entry */
180 oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count)); 179 be32_add_cpu(&oe->e_extent_count, 1);
181 180
182out: 181out:
183 *ret_block = new_block; 182 *ret_block = new_block;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index e6213b3725d1..25d715c7c87a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -391,12 +391,16 @@ static int parse_options(char *options, struct omfs_sb_info *sbi)
391 case Opt_uid: 391 case Opt_uid:
392 if (match_int(&args[0], &option)) 392 if (match_int(&args[0], &option))
393 return 0; 393 return 0;
394 sbi->s_uid = option; 394 sbi->s_uid = make_kuid(current_user_ns(), option);
395 if (!uid_valid(sbi->s_uid))
396 return 0;
395 break; 397 break;
396 case Opt_gid: 398 case Opt_gid:
397 if (match_int(&args[0], &option)) 399 if (match_int(&args[0], &option))
398 return 0; 400 return 0;
399 sbi->s_gid = option; 401 sbi->s_gid = make_kgid(current_user_ns(), option);
402 if (!gid_valid(sbi->s_gid))
403 return 0;
400 break; 404 break;
401 case Opt_umask: 405 case Opt_umask:
402 if (match_octal(&args[0], &option)) 406 if (match_octal(&args[0], &option))
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 8941f12c6b01..f0f8bc75e609 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -19,8 +19,8 @@ struct omfs_sb_info {
19 unsigned long **s_imap; 19 unsigned long **s_imap;
20 int s_imap_size; 20 int s_imap_size;
21 struct mutex s_bitmap_lock; 21 struct mutex s_bitmap_lock;
22 int s_uid; 22 kuid_t s_uid;
23 int s_gid; 23 kgid_t s_gid;
24 int s_dmask; 24 int s_dmask;
25 int s_fmask; 25 int s_fmask;
26}; 26};
diff --git a/fs/open.c b/fs/open.c
index bc132e167d2d..44da0feeca2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -132,27 +132,27 @@ SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
132 132
133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) 133static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
134{ 134{
135 struct inode * inode; 135 struct inode *inode;
136 struct dentry *dentry; 136 struct dentry *dentry;
137 struct file * file; 137 struct fd f;
138 int error; 138 int error;
139 139
140 error = -EINVAL; 140 error = -EINVAL;
141 if (length < 0) 141 if (length < 0)
142 goto out; 142 goto out;
143 error = -EBADF; 143 error = -EBADF;
144 file = fget(fd); 144 f = fdget(fd);
145 if (!file) 145 if (!f.file)
146 goto out; 146 goto out;
147 147
148 /* explicitly opened as large or we are on 64-bit box */ 148 /* explicitly opened as large or we are on 64-bit box */
149 if (file->f_flags & O_LARGEFILE) 149 if (f.file->f_flags & O_LARGEFILE)
150 small = 0; 150 small = 0;
151 151
152 dentry = file->f_path.dentry; 152 dentry = f.file->f_path.dentry;
153 inode = dentry->d_inode; 153 inode = dentry->d_inode;
154 error = -EINVAL; 154 error = -EINVAL;
155 if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) 155 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
156 goto out_putf; 156 goto out_putf;
157 157
158 error = -EINVAL; 158 error = -EINVAL;
@@ -165,14 +165,14 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
165 goto out_putf; 165 goto out_putf;
166 166
167 sb_start_write(inode->i_sb); 167 sb_start_write(inode->i_sb);
168 error = locks_verify_truncate(inode, file, length); 168 error = locks_verify_truncate(inode, f.file, length);
169 if (!error) 169 if (!error)
170 error = security_path_truncate(&file->f_path); 170 error = security_path_truncate(&f.file->f_path);
171 if (!error) 171 if (!error)
172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
173 sb_end_write(inode->i_sb); 173 sb_end_write(inode->i_sb);
174out_putf: 174out_putf:
175 fput(file); 175 fdput(f);
176out: 176out:
177 return error; 177 return error;
178} 178}
@@ -276,15 +276,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
276 276
277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
278{ 278{
279 struct file *file; 279 struct fd f = fdget(fd);
280 int error = -EBADF; 280 int error = -EBADF;
281 281
282 file = fget(fd); 282 if (f.file) {
283 if (file) { 283 error = do_fallocate(f.file, mode, offset, len);
284 error = do_fallocate(file, mode, offset, len); 284 fdput(f);
285 fput(file);
286 } 285 }
287
288 return error; 286 return error;
289} 287}
290 288
@@ -400,16 +398,15 @@ out:
400 398
401SYSCALL_DEFINE1(fchdir, unsigned int, fd) 399SYSCALL_DEFINE1(fchdir, unsigned int, fd)
402{ 400{
403 struct file *file; 401 struct fd f = fdget_raw(fd);
404 struct inode *inode; 402 struct inode *inode;
405 int error, fput_needed; 403 int error = -EBADF;
406 404
407 error = -EBADF; 405 error = -EBADF;
408 file = fget_raw_light(fd, &fput_needed); 406 if (!f.file)
409 if (!file)
410 goto out; 407 goto out;
411 408
412 inode = file->f_path.dentry->d_inode; 409 inode = f.file->f_path.dentry->d_inode;
413 410
414 error = -ENOTDIR; 411 error = -ENOTDIR;
415 if (!S_ISDIR(inode->i_mode)) 412 if (!S_ISDIR(inode->i_mode))
@@ -417,9 +414,9 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
417 414
418 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); 415 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
419 if (!error) 416 if (!error)
420 set_fs_pwd(current->fs, &file->f_path); 417 set_fs_pwd(current->fs, &f.file->f_path);
421out_putf: 418out_putf:
422 fput_light(file, fput_needed); 419 fdput(f);
423out: 420out:
424 return error; 421 return error;
425} 422}
@@ -534,7 +531,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
534 newattrs.ia_valid |= 531 newattrs.ia_valid |=
535 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 532 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
536 mutex_lock(&inode->i_mutex); 533 mutex_lock(&inode->i_mutex);
537 error = security_path_chown(path, user, group); 534 error = security_path_chown(path, uid, gid);
538 if (!error) 535 if (!error)
539 error = notify_change(path->dentry, &newattrs); 536 error = notify_change(path->dentry, &newattrs);
540 mutex_unlock(&inode->i_mutex); 537 mutex_unlock(&inode->i_mutex);
@@ -582,23 +579,20 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
582 579
583SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) 580SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
584{ 581{
585 struct file * file; 582 struct fd f = fdget(fd);
586 int error = -EBADF; 583 int error = -EBADF;
587 struct dentry * dentry;
588 584
589 file = fget(fd); 585 if (!f.file)
590 if (!file)
591 goto out; 586 goto out;
592 587
593 error = mnt_want_write_file(file); 588 error = mnt_want_write_file(f.file);
594 if (error) 589 if (error)
595 goto out_fput; 590 goto out_fput;
596 dentry = file->f_path.dentry; 591 audit_inode(NULL, f.file->f_path.dentry);
597 audit_inode(NULL, dentry); 592 error = chown_common(&f.file->f_path, user, group);
598 error = chown_common(&file->f_path, user, group); 593 mnt_drop_write_file(f.file);
599 mnt_drop_write_file(file);
600out_fput: 594out_fput:
601 fput(file); 595 fdput(f);
602out: 596out:
603 return error; 597 return error;
604} 598}
@@ -803,61 +797,18 @@ struct file *dentry_open(const struct path *path, int flags,
803} 797}
804EXPORT_SYMBOL(dentry_open); 798EXPORT_SYMBOL(dentry_open);
805 799
806static void __put_unused_fd(struct files_struct *files, unsigned int fd)
807{
808 struct fdtable *fdt = files_fdtable(files);
809 __clear_open_fd(fd, fdt);
810 if (fd < files->next_fd)
811 files->next_fd = fd;
812}
813
814void put_unused_fd(unsigned int fd)
815{
816 struct files_struct *files = current->files;
817 spin_lock(&files->file_lock);
818 __put_unused_fd(files, fd);
819 spin_unlock(&files->file_lock);
820}
821
822EXPORT_SYMBOL(put_unused_fd);
823
824/*
825 * Install a file pointer in the fd array.
826 *
827 * The VFS is full of places where we drop the files lock between
828 * setting the open_fds bitmap and installing the file in the file
829 * array. At any such point, we are vulnerable to a dup2() race
830 * installing a file in the array before us. We need to detect this and
831 * fput() the struct file we are about to overwrite in this case.
832 *
833 * It should never happen - if we allow dup2() do it, _really_ bad things
834 * will follow.
835 */
836
837void fd_install(unsigned int fd, struct file *file)
838{
839 struct files_struct *files = current->files;
840 struct fdtable *fdt;
841 spin_lock(&files->file_lock);
842 fdt = files_fdtable(files);
843 BUG_ON(fdt->fd[fd] != NULL);
844 rcu_assign_pointer(fdt->fd[fd], file);
845 spin_unlock(&files->file_lock);
846}
847
848EXPORT_SYMBOL(fd_install);
849
850static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) 800static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
851{ 801{
852 int lookup_flags = 0; 802 int lookup_flags = 0;
853 int acc_mode; 803 int acc_mode;
854 804
855 if (!(flags & O_CREAT)) 805 if (flags & O_CREAT)
856 mode = 0; 806 op->mode = (mode & S_IALLUGO) | S_IFREG;
857 op->mode = mode; 807 else
808 op->mode = 0;
858 809
859 /* Must never be set by userspace */ 810 /* Must never be set by userspace */
860 flags &= ~FMODE_NONOTIFY; 811 flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
861 812
862 /* 813 /*
863 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 814 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1037,23 +988,7 @@ EXPORT_SYMBOL(filp_close);
1037 */ 988 */
1038SYSCALL_DEFINE1(close, unsigned int, fd) 989SYSCALL_DEFINE1(close, unsigned int, fd)
1039{ 990{
1040 struct file * filp; 991 int retval = __close_fd(current->files, fd);
1041 struct files_struct *files = current->files;
1042 struct fdtable *fdt;
1043 int retval;
1044
1045 spin_lock(&files->file_lock);
1046 fdt = files_fdtable(files);
1047 if (fd >= fdt->max_fds)
1048 goto out_unlock;
1049 filp = fdt->fd[fd];
1050 if (!filp)
1051 goto out_unlock;
1052 rcu_assign_pointer(fdt->fd[fd], NULL);
1053 __clear_close_on_exec(fd, fdt);
1054 __put_unused_fd(files, fd);
1055 spin_unlock(&files->file_lock);
1056 retval = filp_close(filp, files);
1057 992
1058 /* can't restart close syscall because file table entry was cleared */ 993 /* can't restart close syscall because file table entry was cleared */
1059 if (unlikely(retval == -ERESTARTSYS || 994 if (unlikely(retval == -ERESTARTSYS ||
@@ -1063,10 +998,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
1063 retval = -EINTR; 998 retval = -EINTR;
1064 999
1065 return retval; 1000 return retval;
1066
1067out_unlock:
1068 spin_unlock(&files->file_lock);
1069 return -EBADF;
1070} 1001}
1071EXPORT_SYMBOL(sys_close); 1002EXPORT_SYMBOL(sys_close);
1072 1003
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 4a3477949bca..2ad080faca34 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -463,6 +463,11 @@ static int __init init_openprom_fs(void)
463static void __exit exit_openprom_fs(void) 463static void __exit exit_openprom_fs(void)
464{ 464{
465 unregister_filesystem(&openprom_fs_type); 465 unregister_filesystem(&openprom_fs_type);
466 /*
467 * Make sure all delayed rcu free inodes are flushed before we
468 * destroy cache.
469 */
470 rcu_barrier();
466 kmem_cache_destroy(op_inode_cachep); 471 kmem_cache_destroy(op_inode_cachep);
467} 472}
468 473
diff --git a/fs/pipe.c b/fs/pipe.c
index 8d85d7068c1e..bd3479db4b62 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1064,9 +1064,8 @@ err_inode:
1064 return err; 1064 return err;
1065} 1065}
1066 1066
1067int do_pipe_flags(int *fd, int flags) 1067static int __do_pipe_flags(int *fd, struct file **files, int flags)
1068{ 1068{
1069 struct file *files[2];
1070 int error; 1069 int error;
1071 int fdw, fdr; 1070 int fdw, fdr;
1072 1071
@@ -1088,11 +1087,8 @@ int do_pipe_flags(int *fd, int flags)
1088 fdw = error; 1087 fdw = error;
1089 1088
1090 audit_fd_pair(fdr, fdw); 1089 audit_fd_pair(fdr, fdw);
1091 fd_install(fdr, files[0]);
1092 fd_install(fdw, files[1]);
1093 fd[0] = fdr; 1090 fd[0] = fdr;
1094 fd[1] = fdw; 1091 fd[1] = fdw;
1095
1096 return 0; 1092 return 0;
1097 1093
1098 err_fdr: 1094 err_fdr:
@@ -1103,21 +1099,38 @@ int do_pipe_flags(int *fd, int flags)
1103 return error; 1099 return error;
1104} 1100}
1105 1101
1102int do_pipe_flags(int *fd, int flags)
1103{
1104 struct file *files[2];
1105 int error = __do_pipe_flags(fd, files, flags);
1106 if (!error) {
1107 fd_install(fd[0], files[0]);
1108 fd_install(fd[1], files[1]);
1109 }
1110 return error;
1111}
1112
1106/* 1113/*
1107 * sys_pipe() is the normal C calling standard for creating 1114 * sys_pipe() is the normal C calling standard for creating
1108 * a pipe. It's not the way Unix traditionally does this, though. 1115 * a pipe. It's not the way Unix traditionally does this, though.
1109 */ 1116 */
1110SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1117SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1111{ 1118{
1119 struct file *files[2];
1112 int fd[2]; 1120 int fd[2];
1113 int error; 1121 int error;
1114 1122
1115 error = do_pipe_flags(fd, flags); 1123 error = __do_pipe_flags(fd, files, flags);
1116 if (!error) { 1124 if (!error) {
1117 if (copy_to_user(fildes, fd, sizeof(fd))) { 1125 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1118 sys_close(fd[0]); 1126 fput(files[0]);
1119 sys_close(fd[1]); 1127 fput(files[1]);
1128 put_unused_fd(fd[0]);
1129 put_unused_fd(fd[1]);
1120 error = -EFAULT; 1130 error = -EFAULT;
1131 } else {
1132 fd_install(fd[0], files[0]);
1133 fd_install(fd[1], files[1]);
1121 } 1134 }
1122 } 1135 }
1123 return error; 1136 return error;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 5e325a42e33d..8bd2135b7f82 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -78,7 +78,8 @@ posix_acl_valid(const struct posix_acl *acl)
78{ 78{
79 const struct posix_acl_entry *pa, *pe; 79 const struct posix_acl_entry *pa, *pe;
80 int state = ACL_USER_OBJ; 80 int state = ACL_USER_OBJ;
81 unsigned int id = 0; /* keep gcc happy */ 81 kuid_t prev_uid = INVALID_UID;
82 kgid_t prev_gid = INVALID_GID;
82 int needs_mask = 0; 83 int needs_mask = 0;
83 84
84 FOREACH_ACL_ENTRY(pa, acl, pe) { 85 FOREACH_ACL_ENTRY(pa, acl, pe) {
@@ -87,7 +88,6 @@ posix_acl_valid(const struct posix_acl *acl)
87 switch (pa->e_tag) { 88 switch (pa->e_tag) {
88 case ACL_USER_OBJ: 89 case ACL_USER_OBJ:
89 if (state == ACL_USER_OBJ) { 90 if (state == ACL_USER_OBJ) {
90 id = 0;
91 state = ACL_USER; 91 state = ACL_USER;
92 break; 92 break;
93 } 93 }
@@ -96,16 +96,17 @@ posix_acl_valid(const struct posix_acl *acl)
96 case ACL_USER: 96 case ACL_USER:
97 if (state != ACL_USER) 97 if (state != ACL_USER)
98 return -EINVAL; 98 return -EINVAL;
99 if (pa->e_id == ACL_UNDEFINED_ID || 99 if (!uid_valid(pa->e_uid))
100 pa->e_id < id)
101 return -EINVAL; 100 return -EINVAL;
102 id = pa->e_id + 1; 101 if (uid_valid(prev_uid) &&
102 uid_lte(pa->e_uid, prev_uid))
103 return -EINVAL;
104 prev_uid = pa->e_uid;
103 needs_mask = 1; 105 needs_mask = 1;
104 break; 106 break;
105 107
106 case ACL_GROUP_OBJ: 108 case ACL_GROUP_OBJ:
107 if (state == ACL_USER) { 109 if (state == ACL_USER) {
108 id = 0;
109 state = ACL_GROUP; 110 state = ACL_GROUP;
110 break; 111 break;
111 } 112 }
@@ -114,10 +115,12 @@ posix_acl_valid(const struct posix_acl *acl)
114 case ACL_GROUP: 115 case ACL_GROUP:
115 if (state != ACL_GROUP) 116 if (state != ACL_GROUP)
116 return -EINVAL; 117 return -EINVAL;
117 if (pa->e_id == ACL_UNDEFINED_ID || 118 if (!gid_valid(pa->e_gid))
118 pa->e_id < id) 119 return -EINVAL;
120 if (gid_valid(prev_gid) &&
121 gid_lte(pa->e_gid, prev_gid))
119 return -EINVAL; 122 return -EINVAL;
120 id = pa->e_id + 1; 123 prev_gid = pa->e_gid;
121 needs_mask = 1; 124 needs_mask = 1;
122 break; 125 break;
123 126
@@ -195,15 +198,12 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
195 return ERR_PTR(-ENOMEM); 198 return ERR_PTR(-ENOMEM);
196 199
197 acl->a_entries[0].e_tag = ACL_USER_OBJ; 200 acl->a_entries[0].e_tag = ACL_USER_OBJ;
198 acl->a_entries[0].e_id = ACL_UNDEFINED_ID;
199 acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6; 201 acl->a_entries[0].e_perm = (mode & S_IRWXU) >> 6;
200 202
201 acl->a_entries[1].e_tag = ACL_GROUP_OBJ; 203 acl->a_entries[1].e_tag = ACL_GROUP_OBJ;
202 acl->a_entries[1].e_id = ACL_UNDEFINED_ID;
203 acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3; 204 acl->a_entries[1].e_perm = (mode & S_IRWXG) >> 3;
204 205
205 acl->a_entries[2].e_tag = ACL_OTHER; 206 acl->a_entries[2].e_tag = ACL_OTHER;
206 acl->a_entries[2].e_id = ACL_UNDEFINED_ID;
207 acl->a_entries[2].e_perm = (mode & S_IRWXO); 207 acl->a_entries[2].e_perm = (mode & S_IRWXO);
208 return acl; 208 return acl;
209} 209}
@@ -224,11 +224,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
224 switch(pa->e_tag) { 224 switch(pa->e_tag) {
225 case ACL_USER_OBJ: 225 case ACL_USER_OBJ:
226 /* (May have been checked already) */ 226 /* (May have been checked already) */
227 if (inode->i_uid == current_fsuid()) 227 if (uid_eq(inode->i_uid, current_fsuid()))
228 goto check_perm; 228 goto check_perm;
229 break; 229 break;
230 case ACL_USER: 230 case ACL_USER:
231 if (pa->e_id == current_fsuid()) 231 if (uid_eq(pa->e_uid, current_fsuid()))
232 goto mask; 232 goto mask;
233 break; 233 break;
234 case ACL_GROUP_OBJ: 234 case ACL_GROUP_OBJ:
@@ -239,7 +239,7 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
239 } 239 }
240 break; 240 break;
241 case ACL_GROUP: 241 case ACL_GROUP:
242 if (in_group_p(pa->e_id)) { 242 if (in_group_p(pa->e_gid)) {
243 found = 1; 243 found = 1;
244 if ((pa->e_perm & want) == want) 244 if ((pa->e_perm & want) == want)
245 goto mask; 245 goto mask;
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index c1c729335924..99349efbbc2b 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
8proc-$(CONFIG_MMU) := mmu.o task_mmu.o 8proc-$(CONFIG_MMU) := mmu.o task_mmu.o
9 9
10proc-y += inode.o root.o base.o generic.o array.o \ 10proc-y += inode.o root.o base.o generic.o array.o \
11 proc_tty.o 11 proc_tty.o fd.o
12proc-y += cmdline.o 12proc-y += cmdline.o
13proc-y += consoles.o 13proc-y += consoles.o
14proc-y += cpuinfo.o 14proc-y += cpuinfo.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b6c84cbdb73..ef5c84be66f9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -90,6 +90,7 @@
90#endif 90#endif
91#include <trace/events/oom.h> 91#include <trace/events/oom.h>
92#include "internal.h" 92#include "internal.h"
93#include "fd.h"
93 94
94/* NOTE: 95/* NOTE:
95 * Implementing inode permission operations in /proc is almost 96 * Implementing inode permission operations in /proc is almost
@@ -136,8 +137,6 @@ struct pid_entry {
136 NULL, &proc_single_file_operations, \ 137 NULL, &proc_single_file_operations, \
137 { .proc_show = show } ) 138 { .proc_show = show } )
138 139
139static int proc_fd_permission(struct inode *inode, int mask);
140
141/* 140/*
142 * Count the number of hardlinks for the pid_entry table, excluding the . 141 * Count the number of hardlinks for the pid_entry table, excluding the .
143 * and .. links. 142 * and .. links.
@@ -874,111 +873,6 @@ static const struct file_operations proc_environ_operations = {
874 .release = mem_release, 873 .release = mem_release,
875}; 874};
876 875
877static ssize_t oom_adjust_read(struct file *file, char __user *buf,
878 size_t count, loff_t *ppos)
879{
880 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
881 char buffer[PROC_NUMBUF];
882 size_t len;
883 int oom_adjust = OOM_DISABLE;
884 unsigned long flags;
885
886 if (!task)
887 return -ESRCH;
888
889 if (lock_task_sighand(task, &flags)) {
890 oom_adjust = task->signal->oom_adj;
891 unlock_task_sighand(task, &flags);
892 }
893
894 put_task_struct(task);
895
896 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
897
898 return simple_read_from_buffer(buf, count, ppos, buffer, len);
899}
900
901static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
902 size_t count, loff_t *ppos)
903{
904 struct task_struct *task;
905 char buffer[PROC_NUMBUF];
906 int oom_adjust;
907 unsigned long flags;
908 int err;
909
910 memset(buffer, 0, sizeof(buffer));
911 if (count > sizeof(buffer) - 1)
912 count = sizeof(buffer) - 1;
913 if (copy_from_user(buffer, buf, count)) {
914 err = -EFAULT;
915 goto out;
916 }
917
918 err = kstrtoint(strstrip(buffer), 0, &oom_adjust);
919 if (err)
920 goto out;
921 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
922 oom_adjust != OOM_DISABLE) {
923 err = -EINVAL;
924 goto out;
925 }
926
927 task = get_proc_task(file->f_path.dentry->d_inode);
928 if (!task) {
929 err = -ESRCH;
930 goto out;
931 }
932
933 task_lock(task);
934 if (!task->mm) {
935 err = -EINVAL;
936 goto err_task_lock;
937 }
938
939 if (!lock_task_sighand(task, &flags)) {
940 err = -ESRCH;
941 goto err_task_lock;
942 }
943
944 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
945 err = -EACCES;
946 goto err_sighand;
947 }
948
949 /*
950 * Warn that /proc/pid/oom_adj is deprecated, see
951 * Documentation/feature-removal-schedule.txt.
952 */
953 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
954 current->comm, task_pid_nr(current), task_pid_nr(task),
955 task_pid_nr(task));
956 task->signal->oom_adj = oom_adjust;
957 /*
958 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
959 * value is always attainable.
960 */
961 if (task->signal->oom_adj == OOM_ADJUST_MAX)
962 task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX;
963 else
964 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
965 -OOM_DISABLE;
966 trace_oom_score_adj_update(task);
967err_sighand:
968 unlock_task_sighand(task, &flags);
969err_task_lock:
970 task_unlock(task);
971 put_task_struct(task);
972out:
973 return err < 0 ? err : count;
974}
975
976static const struct file_operations proc_oom_adjust_operations = {
977 .read = oom_adjust_read,
978 .write = oom_adjust_write,
979 .llseek = generic_file_llseek,
980};
981
982static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 876static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
983 size_t count, loff_t *ppos) 877 size_t count, loff_t *ppos)
984{ 878{
@@ -1052,15 +946,7 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1052 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 946 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1053 task->signal->oom_score_adj_min = oom_score_adj; 947 task->signal->oom_score_adj_min = oom_score_adj;
1054 trace_oom_score_adj_update(task); 948 trace_oom_score_adj_update(task);
1055 /* 949
1056 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
1057 * always attainable.
1058 */
1059 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1060 task->signal->oom_adj = OOM_DISABLE;
1061 else
1062 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1063 OOM_SCORE_ADJ_MAX;
1064err_sighand: 950err_sighand:
1065 unlock_task_sighand(task, &flags); 951 unlock_task_sighand(task, &flags);
1066err_task_lock: 952err_task_lock:
@@ -1089,7 +975,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
1089 if (!task) 975 if (!task)
1090 return -ESRCH; 976 return -ESRCH;
1091 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 977 length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
1092 audit_get_loginuid(task)); 978 from_kuid(file->f_cred->user_ns,
979 audit_get_loginuid(task)));
1093 put_task_struct(task); 980 put_task_struct(task);
1094 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 981 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
1095} 982}
@@ -1101,6 +988,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1101 char *page, *tmp; 988 char *page, *tmp;
1102 ssize_t length; 989 ssize_t length;
1103 uid_t loginuid; 990 uid_t loginuid;
991 kuid_t kloginuid;
1104 992
1105 rcu_read_lock(); 993 rcu_read_lock();
1106 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 994 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
@@ -1130,7 +1018,13 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
1130 goto out_free_page; 1018 goto out_free_page;
1131 1019
1132 } 1020 }
1133 length = audit_set_loginuid(loginuid); 1021 kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
1022 if (!uid_valid(kloginuid)) {
1023 length = -EINVAL;
1024 goto out_free_page;
1025 }
1026
1027 length = audit_set_loginuid(kloginuid);
1134 if (likely(length == 0)) 1028 if (likely(length == 0))
1135 length = count; 1029 length = count;
1136 1030
@@ -1492,7 +1386,7 @@ out:
1492 return error; 1386 return error;
1493} 1387}
1494 1388
1495static const struct inode_operations proc_pid_link_inode_operations = { 1389const struct inode_operations proc_pid_link_inode_operations = {
1496 .readlink = proc_pid_readlink, 1390 .readlink = proc_pid_readlink,
1497 .follow_link = proc_pid_follow_link, 1391 .follow_link = proc_pid_follow_link,
1498 .setattr = proc_setattr, 1392 .setattr = proc_setattr,
@@ -1501,21 +1395,6 @@ static const struct inode_operations proc_pid_link_inode_operations = {
1501 1395
1502/* building an inode */ 1396/* building an inode */
1503 1397
1504static int task_dumpable(struct task_struct *task)
1505{
1506 int dumpable = 0;
1507 struct mm_struct *mm;
1508
1509 task_lock(task);
1510 mm = task->mm;
1511 if (mm)
1512 dumpable = get_dumpable(mm);
1513 task_unlock(task);
1514 if(dumpable == 1)
1515 return 1;
1516 return 0;
1517}
1518
1519struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1398struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1520{ 1399{
1521 struct inode * inode; 1400 struct inode * inode;
@@ -1641,15 +1520,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
1641 return 0; 1520 return 0;
1642} 1521}
1643 1522
1644static int pid_delete_dentry(const struct dentry * dentry)
1645{
1646 /* Is the task we represent dead?
1647 * If so, then don't put the dentry on the lru list,
1648 * kill it immediately.
1649 */
1650 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1651}
1652
1653const struct dentry_operations pid_dentry_operations = 1523const struct dentry_operations pid_dentry_operations =
1654{ 1524{
1655 .d_revalidate = pid_revalidate, 1525 .d_revalidate = pid_revalidate,
@@ -1712,289 +1582,6 @@ end_instantiate:
1712 return filldir(dirent, name, len, filp->f_pos, ino, type); 1582 return filldir(dirent, name, len, filp->f_pos, ino, type);
1713} 1583}
1714 1584
1715static unsigned name_to_int(struct dentry *dentry)
1716{
1717 const char *name = dentry->d_name.name;
1718 int len = dentry->d_name.len;
1719 unsigned n = 0;
1720
1721 if (len > 1 && *name == '0')
1722 goto out;
1723 while (len-- > 0) {
1724 unsigned c = *name++ - '0';
1725 if (c > 9)
1726 goto out;
1727 if (n >= (~0U-9)/10)
1728 goto out;
1729 n *= 10;
1730 n += c;
1731 }
1732 return n;
1733out:
1734 return ~0U;
1735}
1736
1737#define PROC_FDINFO_MAX 64
1738
1739static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1740{
1741 struct task_struct *task = get_proc_task(inode);
1742 struct files_struct *files = NULL;
1743 struct file *file;
1744 int fd = proc_fd(inode);
1745
1746 if (task) {
1747 files = get_files_struct(task);
1748 put_task_struct(task);
1749 }
1750 if (files) {
1751 /*
1752 * We are not taking a ref to the file structure, so we must
1753 * hold ->file_lock.
1754 */
1755 spin_lock(&files->file_lock);
1756 file = fcheck_files(files, fd);
1757 if (file) {
1758 unsigned int f_flags;
1759 struct fdtable *fdt;
1760
1761 fdt = files_fdtable(files);
1762 f_flags = file->f_flags & ~O_CLOEXEC;
1763 if (close_on_exec(fd, fdt))
1764 f_flags |= O_CLOEXEC;
1765
1766 if (path) {
1767 *path = file->f_path;
1768 path_get(&file->f_path);
1769 }
1770 if (info)
1771 snprintf(info, PROC_FDINFO_MAX,
1772 "pos:\t%lli\n"
1773 "flags:\t0%o\n",
1774 (long long) file->f_pos,
1775 f_flags);
1776 spin_unlock(&files->file_lock);
1777 put_files_struct(files);
1778 return 0;
1779 }
1780 spin_unlock(&files->file_lock);
1781 put_files_struct(files);
1782 }
1783 return -ENOENT;
1784}
1785
1786static int proc_fd_link(struct dentry *dentry, struct path *path)
1787{
1788 return proc_fd_info(dentry->d_inode, path, NULL);
1789}
1790
1791static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
1792{
1793 struct inode *inode;
1794 struct task_struct *task;
1795 int fd;
1796 struct files_struct *files;
1797 const struct cred *cred;
1798
1799 if (flags & LOOKUP_RCU)
1800 return -ECHILD;
1801
1802 inode = dentry->d_inode;
1803 task = get_proc_task(inode);
1804 fd = proc_fd(inode);
1805
1806 if (task) {
1807 files = get_files_struct(task);
1808 if (files) {
1809 struct file *file;
1810 rcu_read_lock();
1811 file = fcheck_files(files, fd);
1812 if (file) {
1813 unsigned f_mode = file->f_mode;
1814
1815 rcu_read_unlock();
1816 put_files_struct(files);
1817
1818 if (task_dumpable(task)) {
1819 rcu_read_lock();
1820 cred = __task_cred(task);
1821 inode->i_uid = cred->euid;
1822 inode->i_gid = cred->egid;
1823 rcu_read_unlock();
1824 } else {
1825 inode->i_uid = GLOBAL_ROOT_UID;
1826 inode->i_gid = GLOBAL_ROOT_GID;
1827 }
1828
1829 if (S_ISLNK(inode->i_mode)) {
1830 unsigned i_mode = S_IFLNK;
1831 if (f_mode & FMODE_READ)
1832 i_mode |= S_IRUSR | S_IXUSR;
1833 if (f_mode & FMODE_WRITE)
1834 i_mode |= S_IWUSR | S_IXUSR;
1835 inode->i_mode = i_mode;
1836 }
1837
1838 security_task_to_inode(task, inode);
1839 put_task_struct(task);
1840 return 1;
1841 }
1842 rcu_read_unlock();
1843 put_files_struct(files);
1844 }
1845 put_task_struct(task);
1846 }
1847 d_drop(dentry);
1848 return 0;
1849}
1850
1851static const struct dentry_operations tid_fd_dentry_operations =
1852{
1853 .d_revalidate = tid_fd_revalidate,
1854 .d_delete = pid_delete_dentry,
1855};
1856
1857static struct dentry *proc_fd_instantiate(struct inode *dir,
1858 struct dentry *dentry, struct task_struct *task, const void *ptr)
1859{
1860 unsigned fd = (unsigned long)ptr;
1861 struct inode *inode;
1862 struct proc_inode *ei;
1863 struct dentry *error = ERR_PTR(-ENOENT);
1864
1865 inode = proc_pid_make_inode(dir->i_sb, task);
1866 if (!inode)
1867 goto out;
1868 ei = PROC_I(inode);
1869 ei->fd = fd;
1870
1871 inode->i_mode = S_IFLNK;
1872 inode->i_op = &proc_pid_link_inode_operations;
1873 inode->i_size = 64;
1874 ei->op.proc_get_link = proc_fd_link;
1875 d_set_d_op(dentry, &tid_fd_dentry_operations);
1876 d_add(dentry, inode);
1877 /* Close the race of the process dying before we return the dentry */
1878 if (tid_fd_revalidate(dentry, 0))
1879 error = NULL;
1880
1881 out:
1882 return error;
1883}
1884
1885static struct dentry *proc_lookupfd_common(struct inode *dir,
1886 struct dentry *dentry,
1887 instantiate_t instantiate)
1888{
1889 struct task_struct *task = get_proc_task(dir);
1890 unsigned fd = name_to_int(dentry);
1891 struct dentry *result = ERR_PTR(-ENOENT);
1892
1893 if (!task)
1894 goto out_no_task;
1895 if (fd == ~0U)
1896 goto out;
1897
1898 result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
1899out:
1900 put_task_struct(task);
1901out_no_task:
1902 return result;
1903}
1904
1905static int proc_readfd_common(struct file * filp, void * dirent,
1906 filldir_t filldir, instantiate_t instantiate)
1907{
1908 struct dentry *dentry = filp->f_path.dentry;
1909 struct inode *inode = dentry->d_inode;
1910 struct task_struct *p = get_proc_task(inode);
1911 unsigned int fd, ino;
1912 int retval;
1913 struct files_struct * files;
1914
1915 retval = -ENOENT;
1916 if (!p)
1917 goto out_no_task;
1918 retval = 0;
1919
1920 fd = filp->f_pos;
1921 switch (fd) {
1922 case 0:
1923 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1924 goto out;
1925 filp->f_pos++;
1926 case 1:
1927 ino = parent_ino(dentry);
1928 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1929 goto out;
1930 filp->f_pos++;
1931 default:
1932 files = get_files_struct(p);
1933 if (!files)
1934 goto out;
1935 rcu_read_lock();
1936 for (fd = filp->f_pos-2;
1937 fd < files_fdtable(files)->max_fds;
1938 fd++, filp->f_pos++) {
1939 char name[PROC_NUMBUF];
1940 int len;
1941 int rv;
1942
1943 if (!fcheck_files(files, fd))
1944 continue;
1945 rcu_read_unlock();
1946
1947 len = snprintf(name, sizeof(name), "%d", fd);
1948 rv = proc_fill_cache(filp, dirent, filldir,
1949 name, len, instantiate, p,
1950 (void *)(unsigned long)fd);
1951 if (rv < 0)
1952 goto out_fd_loop;
1953 rcu_read_lock();
1954 }
1955 rcu_read_unlock();
1956out_fd_loop:
1957 put_files_struct(files);
1958 }
1959out:
1960 put_task_struct(p);
1961out_no_task:
1962 return retval;
1963}
1964
1965static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1966 unsigned int flags)
1967{
1968 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1969}
1970
1971static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
1972{
1973 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
1974}
1975
1976static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1977 size_t len, loff_t *ppos)
1978{
1979 char tmp[PROC_FDINFO_MAX];
1980 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
1981 if (!err)
1982 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
1983 return err;
1984}
1985
1986static const struct file_operations proc_fdinfo_file_operations = {
1987 .open = nonseekable_open,
1988 .read = proc_fdinfo_read,
1989 .llseek = no_llseek,
1990};
1991
1992static const struct file_operations proc_fd_operations = {
1993 .read = generic_read_dir,
1994 .readdir = proc_readfd,
1995 .llseek = default_llseek,
1996};
1997
1998#ifdef CONFIG_CHECKPOINT_RESTORE 1585#ifdef CONFIG_CHECKPOINT_RESTORE
1999 1586
2000/* 1587/*
@@ -2113,7 +1700,7 @@ out:
2113} 1700}
2114 1701
2115struct map_files_info { 1702struct map_files_info {
2116 struct file *file; 1703 fmode_t mode;
2117 unsigned long len; 1704 unsigned long len;
2118 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ 1705 unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
2119}; 1706};
@@ -2122,13 +1709,10 @@ static struct dentry *
2122proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, 1709proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2123 struct task_struct *task, const void *ptr) 1710 struct task_struct *task, const void *ptr)
2124{ 1711{
2125 const struct file *file = ptr; 1712 fmode_t mode = (fmode_t)(unsigned long)ptr;
2126 struct proc_inode *ei; 1713 struct proc_inode *ei;
2127 struct inode *inode; 1714 struct inode *inode;
2128 1715
2129 if (!file)
2130 return ERR_PTR(-ENOENT);
2131
2132 inode = proc_pid_make_inode(dir->i_sb, task); 1716 inode = proc_pid_make_inode(dir->i_sb, task);
2133 if (!inode) 1717 if (!inode)
2134 return ERR_PTR(-ENOENT); 1718 return ERR_PTR(-ENOENT);
@@ -2140,9 +1724,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2140 inode->i_size = 64; 1724 inode->i_size = 64;
2141 inode->i_mode = S_IFLNK; 1725 inode->i_mode = S_IFLNK;
2142 1726
2143 if (file->f_mode & FMODE_READ) 1727 if (mode & FMODE_READ)
2144 inode->i_mode |= S_IRUSR; 1728 inode->i_mode |= S_IRUSR;
2145 if (file->f_mode & FMODE_WRITE) 1729 if (mode & FMODE_WRITE)
2146 inode->i_mode |= S_IWUSR; 1730 inode->i_mode |= S_IWUSR;
2147 1731
2148 d_set_d_op(dentry, &tid_map_files_dentry_operations); 1732 d_set_d_op(dentry, &tid_map_files_dentry_operations);
@@ -2186,7 +1770,8 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
2186 if (!vma) 1770 if (!vma)
2187 goto out_no_vma; 1771 goto out_no_vma;
2188 1772
2189 result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); 1773 result = proc_map_files_instantiate(dir, dentry, task,
1774 (void *)(unsigned long)vma->vm_file->f_mode);
2190 1775
2191out_no_vma: 1776out_no_vma:
2192 up_read(&mm->mmap_sem); 1777 up_read(&mm->mmap_sem);
@@ -2287,8 +1872,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2287 if (++pos <= filp->f_pos) 1872 if (++pos <= filp->f_pos)
2288 continue; 1873 continue;
2289 1874
2290 get_file(vma->vm_file); 1875 info.mode = vma->vm_file->f_mode;
2291 info.file = vma->vm_file;
2292 info.len = snprintf(info.name, 1876 info.len = snprintf(info.name,
2293 sizeof(info.name), "%lx-%lx", 1877 sizeof(info.name), "%lx-%lx",
2294 vma->vm_start, vma->vm_end); 1878 vma->vm_start, vma->vm_end);
@@ -2303,19 +1887,11 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
2303 ret = proc_fill_cache(filp, dirent, filldir, 1887 ret = proc_fill_cache(filp, dirent, filldir,
2304 p->name, p->len, 1888 p->name, p->len,
2305 proc_map_files_instantiate, 1889 proc_map_files_instantiate,
2306 task, p->file); 1890 task,
1891 (void *)(unsigned long)p->mode);
2307 if (ret) 1892 if (ret)
2308 break; 1893 break;
2309 filp->f_pos++; 1894 filp->f_pos++;
2310 fput(p->file);
2311 }
2312 for (; i < nr_files; i++) {
2313 /*
2314 * In case of error don't forget
2315 * to put rest of file refs.
2316 */
2317 p = flex_array_get(fa, i);
2318 fput(p->file);
2319 } 1895 }
2320 if (fa) 1896 if (fa)
2321 flex_array_free(fa); 1897 flex_array_free(fa);
@@ -2337,82 +1913,6 @@ static const struct file_operations proc_map_files_operations = {
2337 1913
2338#endif /* CONFIG_CHECKPOINT_RESTORE */ 1914#endif /* CONFIG_CHECKPOINT_RESTORE */
2339 1915
2340/*
2341 * /proc/pid/fd needs a special permission handler so that a process can still
2342 * access /proc/self/fd after it has executed a setuid().
2343 */
2344static int proc_fd_permission(struct inode *inode, int mask)
2345{
2346 int rv = generic_permission(inode, mask);
2347 if (rv == 0)
2348 return 0;
2349 if (task_pid(current) == proc_pid(inode))
2350 rv = 0;
2351 return rv;
2352}
2353
2354/*
2355 * proc directories can do almost nothing..
2356 */
2357static const struct inode_operations proc_fd_inode_operations = {
2358 .lookup = proc_lookupfd,
2359 .permission = proc_fd_permission,
2360 .setattr = proc_setattr,
2361};
2362
2363static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2364 struct dentry *dentry, struct task_struct *task, const void *ptr)
2365{
2366 unsigned fd = (unsigned long)ptr;
2367 struct inode *inode;
2368 struct proc_inode *ei;
2369 struct dentry *error = ERR_PTR(-ENOENT);
2370
2371 inode = proc_pid_make_inode(dir->i_sb, task);
2372 if (!inode)
2373 goto out;
2374 ei = PROC_I(inode);
2375 ei->fd = fd;
2376 inode->i_mode = S_IFREG | S_IRUSR;
2377 inode->i_fop = &proc_fdinfo_file_operations;
2378 d_set_d_op(dentry, &tid_fd_dentry_operations);
2379 d_add(dentry, inode);
2380 /* Close the race of the process dying before we return the dentry */
2381 if (tid_fd_revalidate(dentry, 0))
2382 error = NULL;
2383
2384 out:
2385 return error;
2386}
2387
2388static struct dentry *proc_lookupfdinfo(struct inode *dir,
2389 struct dentry *dentry,
2390 unsigned int flags)
2391{
2392 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2393}
2394
2395static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
2396{
2397 return proc_readfd_common(filp, dirent, filldir,
2398 proc_fdinfo_instantiate);
2399}
2400
2401static const struct file_operations proc_fdinfo_operations = {
2402 .read = generic_read_dir,
2403 .readdir = proc_readfdinfo,
2404 .llseek = default_llseek,
2405};
2406
2407/*
2408 * proc directories can do almost nothing..
2409 */
2410static const struct inode_operations proc_fdinfo_inode_operations = {
2411 .lookup = proc_lookupfdinfo,
2412 .setattr = proc_setattr,
2413};
2414
2415
2416static struct dentry *proc_pident_instantiate(struct inode *dir, 1916static struct dentry *proc_pident_instantiate(struct inode *dir,
2417 struct dentry *dentry, struct task_struct *task, const void *ptr) 1917 struct dentry *dentry, struct task_struct *task, const void *ptr)
2418{ 1918{
@@ -2983,6 +2483,11 @@ static int proc_gid_map_open(struct inode *inode, struct file *file)
2983 return proc_id_map_open(inode, file, &proc_gid_seq_operations); 2483 return proc_id_map_open(inode, file, &proc_gid_seq_operations);
2984} 2484}
2985 2485
2486static int proc_projid_map_open(struct inode *inode, struct file *file)
2487{
2488 return proc_id_map_open(inode, file, &proc_projid_seq_operations);
2489}
2490
2986static const struct file_operations proc_uid_map_operations = { 2491static const struct file_operations proc_uid_map_operations = {
2987 .open = proc_uid_map_open, 2492 .open = proc_uid_map_open,
2988 .write = proc_uid_map_write, 2493 .write = proc_uid_map_write,
@@ -2998,6 +2503,14 @@ static const struct file_operations proc_gid_map_operations = {
2998 .llseek = seq_lseek, 2503 .llseek = seq_lseek,
2999 .release = proc_id_map_release, 2504 .release = proc_id_map_release,
3000}; 2505};
2506
2507static const struct file_operations proc_projid_map_operations = {
2508 .open = proc_projid_map_open,
2509 .write = proc_projid_map_write,
2510 .read = seq_read,
2511 .llseek = seq_lseek,
2512 .release = proc_id_map_release,
2513};
3001#endif /* CONFIG_USER_NS */ 2514#endif /* CONFIG_USER_NS */
3002 2515
3003static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2516static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -3084,7 +2597,6 @@ static const struct pid_entry tgid_base_stuff[] = {
3084 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2597 REG("cgroup", S_IRUGO, proc_cgroup_operations),
3085#endif 2598#endif
3086 INF("oom_score", S_IRUGO, proc_oom_score), 2599 INF("oom_score", S_IRUGO, proc_oom_score),
3087 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3088 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2600 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3089#ifdef CONFIG_AUDITSYSCALL 2601#ifdef CONFIG_AUDITSYSCALL
3090 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2602 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -3105,6 +2617,7 @@ static const struct pid_entry tgid_base_stuff[] = {
3105#ifdef CONFIG_USER_NS 2617#ifdef CONFIG_USER_NS
3106 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2618 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3107 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2619 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2620 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
3108#endif 2621#endif
3109}; 2622};
3110 2623
@@ -3450,7 +2963,6 @@ static const struct pid_entry tid_base_stuff[] = {
3450 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2963 REG("cgroup", S_IRUGO, proc_cgroup_operations),
3451#endif 2964#endif
3452 INF("oom_score", S_IRUGO, proc_oom_score), 2965 INF("oom_score", S_IRUGO, proc_oom_score),
3453 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
3454 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2966 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3455#ifdef CONFIG_AUDITSYSCALL 2967#ifdef CONFIG_AUDITSYSCALL
3456 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2968 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -3468,6 +2980,7 @@ static const struct pid_entry tid_base_stuff[] = {
3468#ifdef CONFIG_USER_NS 2980#ifdef CONFIG_USER_NS
3469 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 2981 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
3470 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 2982 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
2983 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
3471#endif 2984#endif
3472}; 2985};
3473 2986
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
new file mode 100644
index 000000000000..f28a875f8779
--- /dev/null
+++ b/fs/proc/fd.c
@@ -0,0 +1,367 @@
1#include <linux/sched.h>
2#include <linux/errno.h>
3#include <linux/dcache.h>
4#include <linux/path.h>
5#include <linux/fdtable.h>
6#include <linux/namei.h>
7#include <linux/pid.h>
8#include <linux/security.h>
9#include <linux/file.h>
10#include <linux/seq_file.h>
11
12#include <linux/proc_fs.h>
13
14#include "internal.h"
15#include "fd.h"
16
17static int seq_show(struct seq_file *m, void *v)
18{
19 struct files_struct *files = NULL;
20 int f_flags = 0, ret = -ENOENT;
21 struct file *file = NULL;
22 struct task_struct *task;
23
24 task = get_proc_task(m->private);
25 if (!task)
26 return -ENOENT;
27
28 files = get_files_struct(task);
29 put_task_struct(task);
30
31 if (files) {
32 int fd = proc_fd(m->private);
33
34 spin_lock(&files->file_lock);
35 file = fcheck_files(files, fd);
36 if (file) {
37 struct fdtable *fdt = files_fdtable(files);
38
39 f_flags = file->f_flags;
40 if (close_on_exec(fd, fdt))
41 f_flags |= O_CLOEXEC;
42
43 get_file(file);
44 ret = 0;
45 }
46 spin_unlock(&files->file_lock);
47 put_files_struct(files);
48 }
49
50 if (!ret) {
51 seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
52 (long long)file->f_pos, f_flags);
53 fput(file);
54 }
55
56 return ret;
57}
58
59static int seq_fdinfo_open(struct inode *inode, struct file *file)
60{
61 return single_open(file, seq_show, inode);
62}
63
64static const struct file_operations proc_fdinfo_file_operations = {
65 .open = seq_fdinfo_open,
66 .read = seq_read,
67 .llseek = seq_lseek,
68 .release = single_release,
69};
70
71static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
72{
73 struct files_struct *files;
74 struct task_struct *task;
75 const struct cred *cred;
76 struct inode *inode;
77 int fd;
78
79 if (flags & LOOKUP_RCU)
80 return -ECHILD;
81
82 inode = dentry->d_inode;
83 task = get_proc_task(inode);
84 fd = proc_fd(inode);
85
86 if (task) {
87 files = get_files_struct(task);
88 if (files) {
89 struct file *file;
90
91 rcu_read_lock();
92 file = fcheck_files(files, fd);
93 if (file) {
94 unsigned f_mode = file->f_mode;
95
96 rcu_read_unlock();
97 put_files_struct(files);
98
99 if (task_dumpable(task)) {
100 rcu_read_lock();
101 cred = __task_cred(task);
102 inode->i_uid = cred->euid;
103 inode->i_gid = cred->egid;
104 rcu_read_unlock();
105 } else {
106 inode->i_uid = GLOBAL_ROOT_UID;
107 inode->i_gid = GLOBAL_ROOT_GID;
108 }
109
110 if (S_ISLNK(inode->i_mode)) {
111 unsigned i_mode = S_IFLNK;
112 if (f_mode & FMODE_READ)
113 i_mode |= S_IRUSR | S_IXUSR;
114 if (f_mode & FMODE_WRITE)
115 i_mode |= S_IWUSR | S_IXUSR;
116 inode->i_mode = i_mode;
117 }
118
119 security_task_to_inode(task, inode);
120 put_task_struct(task);
121 return 1;
122 }
123 rcu_read_unlock();
124 put_files_struct(files);
125 }
126 put_task_struct(task);
127 }
128
129 d_drop(dentry);
130 return 0;
131}
132
133static const struct dentry_operations tid_fd_dentry_operations = {
134 .d_revalidate = tid_fd_revalidate,
135 .d_delete = pid_delete_dentry,
136};
137
138static int proc_fd_link(struct dentry *dentry, struct path *path)
139{
140 struct files_struct *files = NULL;
141 struct task_struct *task;
142 int ret = -ENOENT;
143
144 task = get_proc_task(dentry->d_inode);
145 if (task) {
146 files = get_files_struct(task);
147 put_task_struct(task);
148 }
149
150 if (files) {
151 int fd = proc_fd(dentry->d_inode);
152 struct file *fd_file;
153
154 spin_lock(&files->file_lock);
155 fd_file = fcheck_files(files, fd);
156 if (fd_file) {
157 *path = fd_file->f_path;
158 path_get(&fd_file->f_path);
159 ret = 0;
160 }
161 spin_unlock(&files->file_lock);
162 put_files_struct(files);
163 }
164
165 return ret;
166}
167
168static struct dentry *
169proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
170 struct task_struct *task, const void *ptr)
171{
172 struct dentry *error = ERR_PTR(-ENOENT);
173 unsigned fd = (unsigned long)ptr;
174 struct proc_inode *ei;
175 struct inode *inode;
176
177 inode = proc_pid_make_inode(dir->i_sb, task);
178 if (!inode)
179 goto out;
180
181 ei = PROC_I(inode);
182 ei->fd = fd;
183
184 inode->i_mode = S_IFLNK;
185 inode->i_op = &proc_pid_link_inode_operations;
186 inode->i_size = 64;
187
188 ei->op.proc_get_link = proc_fd_link;
189
190 d_set_d_op(dentry, &tid_fd_dentry_operations);
191 d_add(dentry, inode);
192
193 /* Close the race of the process dying before we return the dentry */
194 if (tid_fd_revalidate(dentry, 0))
195 error = NULL;
196 out:
197 return error;
198}
199
200static struct dentry *proc_lookupfd_common(struct inode *dir,
201 struct dentry *dentry,
202 instantiate_t instantiate)
203{
204 struct task_struct *task = get_proc_task(dir);
205 struct dentry *result = ERR_PTR(-ENOENT);
206 unsigned fd = name_to_int(dentry);
207
208 if (!task)
209 goto out_no_task;
210 if (fd == ~0U)
211 goto out;
212
213 result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
214out:
215 put_task_struct(task);
216out_no_task:
217 return result;
218}
219
220static int proc_readfd_common(struct file * filp, void * dirent,
221 filldir_t filldir, instantiate_t instantiate)
222{
223 struct dentry *dentry = filp->f_path.dentry;
224 struct inode *inode = dentry->d_inode;
225 struct task_struct *p = get_proc_task(inode);
226 struct files_struct *files;
227 unsigned int fd, ino;
228 int retval;
229
230 retval = -ENOENT;
231 if (!p)
232 goto out_no_task;
233 retval = 0;
234
235 fd = filp->f_pos;
236 switch (fd) {
237 case 0:
238 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
239 goto out;
240 filp->f_pos++;
241 case 1:
242 ino = parent_ino(dentry);
243 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
244 goto out;
245 filp->f_pos++;
246 default:
247 files = get_files_struct(p);
248 if (!files)
249 goto out;
250 rcu_read_lock();
251 for (fd = filp->f_pos - 2;
252 fd < files_fdtable(files)->max_fds;
253 fd++, filp->f_pos++) {
254 char name[PROC_NUMBUF];
255 int len;
256 int rv;
257
258 if (!fcheck_files(files, fd))
259 continue;
260 rcu_read_unlock();
261
262 len = snprintf(name, sizeof(name), "%d", fd);
263 rv = proc_fill_cache(filp, dirent, filldir,
264 name, len, instantiate, p,
265 (void *)(unsigned long)fd);
266 if (rv < 0)
267 goto out_fd_loop;
268 rcu_read_lock();
269 }
270 rcu_read_unlock();
271out_fd_loop:
272 put_files_struct(files);
273 }
274out:
275 put_task_struct(p);
276out_no_task:
277 return retval;
278}
279
280static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
281{
282 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
283}
284
285const struct file_operations proc_fd_operations = {
286 .read = generic_read_dir,
287 .readdir = proc_readfd,
288 .llseek = default_llseek,
289};
290
291static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
292 unsigned int flags)
293{
294 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
295}
296
297/*
298 * /proc/pid/fd needs a special permission handler so that a process can still
299 * access /proc/self/fd after it has executed a setuid().
300 */
301int proc_fd_permission(struct inode *inode, int mask)
302{
303 int rv = generic_permission(inode, mask);
304 if (rv == 0)
305 return 0;
306 if (task_pid(current) == proc_pid(inode))
307 rv = 0;
308 return rv;
309}
310
311const struct inode_operations proc_fd_inode_operations = {
312 .lookup = proc_lookupfd,
313 .permission = proc_fd_permission,
314 .setattr = proc_setattr,
315};
316
317static struct dentry *
318proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
319 struct task_struct *task, const void *ptr)
320{
321 struct dentry *error = ERR_PTR(-ENOENT);
322 unsigned fd = (unsigned long)ptr;
323 struct proc_inode *ei;
324 struct inode *inode;
325
326 inode = proc_pid_make_inode(dir->i_sb, task);
327 if (!inode)
328 goto out;
329
330 ei = PROC_I(inode);
331 ei->fd = fd;
332
333 inode->i_mode = S_IFREG | S_IRUSR;
334 inode->i_fop = &proc_fdinfo_file_operations;
335
336 d_set_d_op(dentry, &tid_fd_dentry_operations);
337 d_add(dentry, inode);
338
339 /* Close the race of the process dying before we return the dentry */
340 if (tid_fd_revalidate(dentry, 0))
341 error = NULL;
342 out:
343 return error;
344}
345
346static struct dentry *
347proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
348{
349 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
350}
351
352static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
353{
354 return proc_readfd_common(filp, dirent, filldir,
355 proc_fdinfo_instantiate);
356}
357
358const struct inode_operations proc_fdinfo_inode_operations = {
359 .lookup = proc_lookupfdinfo,
360 .setattr = proc_setattr,
361};
362
363const struct file_operations proc_fdinfo_operations = {
364 .read = generic_read_dir,
365 .readdir = proc_readfdinfo,
366 .llseek = default_llseek,
367};
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
new file mode 100644
index 000000000000..cbb1d47deda8
--- /dev/null
+++ b/fs/proc/fd.h
@@ -0,0 +1,14 @@
1#ifndef __PROCFS_FD_H__
2#define __PROCFS_FD_H__
3
4#include <linux/fs.h>
5
6extern const struct file_operations proc_fd_operations;
7extern const struct inode_operations proc_fd_inode_operations;
8
9extern const struct file_operations proc_fdinfo_operations;
10extern const struct inode_operations proc_fdinfo_inode_operations;
11
12extern int proc_fd_permission(struct inode *inode, int mask);
13
14#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b3647fe6a608..0d80cef4cfb9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -427,7 +427,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
427 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 427 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
428 pde_get(de); 428 pde_get(de);
429 spin_unlock(&proc_subdir_lock); 429 spin_unlock(&proc_subdir_lock);
430 error = -EINVAL; 430 error = -ENOMEM;
431 inode = proc_get_inode(dir->i_sb, de); 431 inode = proc_get_inode(dir->i_sb, de);
432 goto out_unlock; 432 goto out_unlock;
433 } 433 }
@@ -605,7 +605,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
605 unsigned int len; 605 unsigned int len;
606 606
607 /* make sure name is valid */ 607 /* make sure name is valid */
608 if (!name || !strlen(name)) goto out; 608 if (!name || !strlen(name))
609 goto out;
609 610
610 if (xlate_proc_name(name, parent, &fn) != 0) 611 if (xlate_proc_name(name, parent, &fn) != 0)
611 goto out; 612 goto out;
@@ -616,20 +617,18 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
616 617
617 len = strlen(fn); 618 len = strlen(fn);
618 619
619 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 620 ent = kzalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
620 if (!ent) goto out; 621 if (!ent)
622 goto out;
621 623
622 memset(ent, 0, sizeof(struct proc_dir_entry));
623 memcpy(ent->name, fn, len + 1); 624 memcpy(ent->name, fn, len + 1);
624 ent->namelen = len; 625 ent->namelen = len;
625 ent->mode = mode; 626 ent->mode = mode;
626 ent->nlink = nlink; 627 ent->nlink = nlink;
627 atomic_set(&ent->count, 1); 628 atomic_set(&ent->count, 1);
628 ent->pde_users = 0;
629 spin_lock_init(&ent->pde_unload_lock); 629 spin_lock_init(&ent->pde_unload_lock);
630 ent->pde_unload_completion = NULL;
631 INIT_LIST_HEAD(&ent->pde_openers); 630 INIT_LIST_HEAD(&ent->pde_openers);
632 out: 631out:
633 return ent; 632 return ent;
634} 633}
635 634
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ac817b64a71..3b22bbdee9ec 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -450,7 +450,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
450 return NULL; 450 return NULL;
451 if (inode->i_state & I_NEW) { 451 if (inode->i_state & I_NEW) {
452 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 452 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
453 PROC_I(inode)->fd = 0;
454 PROC_I(inode)->pde = de; 453 PROC_I(inode)->pde = de;
455 454
456 if (de->mode) { 455 if (de->mode) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index e1167a1c9126..cceaab07ad54 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -9,6 +9,7 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/sched.h>
12#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
13struct ctl_table_header; 14struct ctl_table_header;
14 15
@@ -65,6 +66,7 @@ extern const struct file_operations proc_clear_refs_operations;
65extern const struct file_operations proc_pagemap_operations; 66extern const struct file_operations proc_pagemap_operations;
66extern const struct file_operations proc_net_operations; 67extern const struct file_operations proc_net_operations;
67extern const struct inode_operations proc_net_inode_operations; 68extern const struct inode_operations proc_net_inode_operations;
69extern const struct inode_operations proc_pid_link_inode_operations;
68 70
69struct proc_maps_private { 71struct proc_maps_private {
70 struct pid *pid; 72 struct pid *pid;
@@ -91,6 +93,52 @@ static inline int proc_fd(struct inode *inode)
91 return PROC_I(inode)->fd; 93 return PROC_I(inode)->fd;
92} 94}
93 95
96static inline int task_dumpable(struct task_struct *task)
97{
98 int dumpable = 0;
99 struct mm_struct *mm;
100
101 task_lock(task);
102 mm = task->mm;
103 if (mm)
104 dumpable = get_dumpable(mm);
105 task_unlock(task);
106 if (dumpable == SUID_DUMPABLE_ENABLED)
107 return 1;
108 return 0;
109}
110
111static inline int pid_delete_dentry(const struct dentry * dentry)
112{
113 /* Is the task we represent dead?
114 * If so, then don't put the dentry on the lru list,
115 * kill it immediately.
116 */
117 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
118}
119
120static inline unsigned name_to_int(struct dentry *dentry)
121{
122 const char *name = dentry->d_name.name;
123 int len = dentry->d_name.len;
124 unsigned n = 0;
125
126 if (len > 1 && *name == '0')
127 goto out;
128 while (len-- > 0) {
129 unsigned c = *name++ - '0';
130 if (c > 9)
131 goto out;
132 if (n >= (~0U-9)/10)
133 goto out;
134 n *= 10;
135 n += c;
136 }
137 return n;
138out:
139 return ~0U;
140}
141
94struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, 142struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
95 struct dentry *dentry); 143 struct dentry *dentry);
96int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 144int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7fcd0d60a968..b8730d9ebaee 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page)
115 u |= 1 << KPF_COMPOUND_TAIL; 115 u |= 1 << KPF_COMPOUND_TAIL;
116 if (PageHuge(page)) 116 if (PageHuge(page))
117 u |= 1 << KPF_HUGE; 117 u |= 1 << KPF_HUGE;
118 else if (PageTransCompound(page)) 118 /*
119 * PageTransCompound can be true for non-huge compound pages (slab
120 * pages or pages allocated by drivers with __GFP_COMP) because it
121 * just checks PG_head/PG_tail, so we need to check PageLRU to make
122 * sure a given page is a thp, not a non-huge compound page.
123 */
124 else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
119 u |= 1 << KPF_THP; 125 u |= 1 << KPF_THP;
120 126
121 /* 127 /*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index dfafeb2b05a0..a781bdf06694 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -142,6 +142,7 @@ static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
142 } 142 }
143 143
144 rb_link_node(node, parent, p); 144 rb_link_node(node, parent, p);
145 rb_insert_color(node, &head->parent->root);
145 return 0; 146 return 0;
146} 147}
147 148
@@ -168,10 +169,8 @@ static void init_header(struct ctl_table_header *head,
168 head->node = node; 169 head->node = node;
169 if (node) { 170 if (node) {
170 struct ctl_table *entry; 171 struct ctl_table *entry;
171 for (entry = table; entry->procname; entry++, node++) { 172 for (entry = table; entry->procname; entry++, node++)
172 rb_init_node(&node->node);
173 node->header = head; 173 node->header = head;
174 }
175 } 174 }
176} 175}
177 176
@@ -266,8 +265,7 @@ void sysctl_head_put(struct ctl_table_header *head)
266 265
267static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 266static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
268{ 267{
269 if (!head) 268 BUG_ON(!head);
270 BUG();
271 spin_lock(&sysctl_lock); 269 spin_lock(&sysctl_lock);
272 if (!use_table(head)) 270 if (!use_table(head))
273 head = ERR_PTR(-ENOENT); 271 head = ERR_PTR(-ENOENT);
@@ -462,9 +460,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
462 460
463 err = ERR_PTR(-ENOMEM); 461 err = ERR_PTR(-ENOMEM);
464 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); 462 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
465 if (h)
466 sysctl_head_finish(h);
467
468 if (!inode) 463 if (!inode)
469 goto out; 464 goto out;
470 465
@@ -473,6 +468,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
473 d_add(dentry, inode); 468 d_add(dentry, inode);
474 469
475out: 470out:
471 if (h)
472 sysctl_head_finish(h);
476 sysctl_head_finish(head); 473 sysctl_head_finish(head);
477 return err; 474 return err;
478} 475}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a2d9fd7cadd..9889a92d2e01 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -61,7 +61,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
61 if (!*p) 61 if (!*p)
62 continue; 62 continue;
63 63
64 args[0].to = args[0].from = 0; 64 args[0].to = args[0].from = NULL;
65 token = match_token(p, tokens, args); 65 token = match_token(p, tokens, args);
66 switch (token) { 66 switch (token) {
67 case Opt_gid: 67 case Opt_gid:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4540b8f76f16..79827ce03e3b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
54 "VmPTE:\t%8lu kB\n" 54 "VmPTE:\t%8lu kB\n"
55 "VmSwap:\t%8lu kB\n", 55 "VmSwap:\t%8lu kB\n",
56 hiwater_vm << (PAGE_SHIFT-10), 56 hiwater_vm << (PAGE_SHIFT-10),
57 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), 57 total_vm << (PAGE_SHIFT-10),
58 mm->locked_vm << (PAGE_SHIFT-10), 58 mm->locked_vm << (PAGE_SHIFT-10),
59 mm->pinned_vm << (PAGE_SHIFT-10), 59 mm->pinned_vm << (PAGE_SHIFT-10),
60 hiwater_rss << (PAGE_SHIFT-10), 60 hiwater_rss << (PAGE_SHIFT-10),
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index d39bb5cce883..ca71db69da07 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -23,6 +23,7 @@ config PSTORE_FTRACE
23 bool "Persistent function tracer" 23 bool "Persistent function tracer"
24 depends on PSTORE 24 depends on PSTORE
25 depends on FUNCTION_TRACER 25 depends on FUNCTION_TRACER
26 depends on DEBUG_FS
26 help 27 help
27 With this option kernel traces function calls into a persistent 28 With this option kernel traces function calls into a persistent
28 ram buffer that can be decoded and dumped after reboot through 29 ram buffer that can be decoded and dumped after reboot through
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index a130d484b7d3..2d57e1ac0115 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -17,19 +17,113 @@
17#include <linux/percpu.h> 17#include <linux/percpu.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/atomic.h> 19#include <linux/atomic.h>
20#include <linux/types.h>
21#include <linux/mutex.h>
22#include <linux/ftrace.h>
23#include <linux/fs.h>
24#include <linux/debugfs.h>
25#include <linux/err.h>
26#include <linux/cache.h>
20#include <asm/barrier.h> 27#include <asm/barrier.h>
21#include "internal.h" 28#include "internal.h"
22 29
23void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip) 30static void notrace pstore_ftrace_call(unsigned long ip,
31 unsigned long parent_ip)
24{ 32{
33 unsigned long flags;
25 struct pstore_ftrace_record rec = {}; 34 struct pstore_ftrace_record rec = {};
26 35
27 if (unlikely(oops_in_progress)) 36 if (unlikely(oops_in_progress))
28 return; 37 return;
29 38
39 local_irq_save(flags);
40
30 rec.ip = ip; 41 rec.ip = ip;
31 rec.parent_ip = parent_ip; 42 rec.parent_ip = parent_ip;
32 pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id()); 43 pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
33 psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec, 44 psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
34 sizeof(rec), psinfo); 45 sizeof(rec), psinfo);
46
47 local_irq_restore(flags);
48}
49
50static struct ftrace_ops pstore_ftrace_ops __read_mostly = {
51 .func = pstore_ftrace_call,
52};
53
54static DEFINE_MUTEX(pstore_ftrace_lock);
55static bool pstore_ftrace_enabled;
56
57static ssize_t pstore_ftrace_knob_write(struct file *f, const char __user *buf,
58 size_t count, loff_t *ppos)
59{
60 u8 on;
61 ssize_t ret;
62
63 ret = kstrtou8_from_user(buf, count, 2, &on);
64 if (ret)
65 return ret;
66
67 mutex_lock(&pstore_ftrace_lock);
68
69 if (!on ^ pstore_ftrace_enabled)
70 goto out;
71
72 if (on)
73 ret = register_ftrace_function(&pstore_ftrace_ops);
74 else
75 ret = unregister_ftrace_function(&pstore_ftrace_ops);
76 if (ret) {
77 pr_err("%s: unable to %sregister ftrace ops: %zd\n",
78 __func__, on ? "" : "un", ret);
79 goto err;
80 }
81
82 pstore_ftrace_enabled = on;
83out:
84 ret = count;
85err:
86 mutex_unlock(&pstore_ftrace_lock);
87
88 return ret;
89}
90
91static ssize_t pstore_ftrace_knob_read(struct file *f, char __user *buf,
92 size_t count, loff_t *ppos)
93{
94 char val[] = { '0' + pstore_ftrace_enabled, '\n' };
95
96 return simple_read_from_buffer(buf, count, ppos, val, sizeof(val));
97}
98
99static const struct file_operations pstore_knob_fops = {
100 .open = simple_open,
101 .read = pstore_ftrace_knob_read,
102 .write = pstore_ftrace_knob_write,
103};
104
105void pstore_register_ftrace(void)
106{
107 struct dentry *dir;
108 struct dentry *file;
109
110 if (!psinfo->write_buf)
111 return;
112
113 dir = debugfs_create_dir("pstore", NULL);
114 if (!dir) {
115 pr_err("%s: unable to create pstore directory\n", __func__);
116 return;
117 }
118
119 file = debugfs_create_file("record_ftrace", 0600, dir, NULL,
120 &pstore_knob_fops);
121 if (!file) {
122 pr_err("%s: unable to create record_ftrace file\n", __func__);
123 goto err_file;
124 }
125
126 return;
127err_file:
128 debugfs_remove(dir);
35} 129}
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 0d0d3b7d5f12..4847f588b7d5 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -39,6 +39,12 @@ pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
39#endif 39#endif
40} 40}
41 41
42#ifdef CONFIG_PSTORE_FTRACE
43extern void pstore_register_ftrace(void);
44#else
45static inline void pstore_register_ftrace(void) {}
46#endif
47
42extern struct pstore_info *psinfo; 48extern struct pstore_info *psinfo;
43 49
44extern void pstore_set_kmsg_bytes(int); 50extern void pstore_set_kmsg_bytes(int);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 29996e8793a7..a40da07e93d6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -164,7 +164,13 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
164 164
165 if (c > psinfo->bufsize) 165 if (c > psinfo->bufsize)
166 c = psinfo->bufsize; 166 c = psinfo->bufsize;
167 spin_lock_irqsave(&psinfo->buf_lock, flags); 167
168 if (oops_in_progress) {
169 if (!spin_trylock_irqsave(&psinfo->buf_lock, flags))
170 break;
171 } else {
172 spin_lock_irqsave(&psinfo->buf_lock, flags);
173 }
168 memcpy(psinfo->buf, s, c); 174 memcpy(psinfo->buf, s, c);
169 psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo); 175 psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo);
170 spin_unlock_irqrestore(&psinfo->buf_lock, flags); 176 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
@@ -236,6 +242,7 @@ int pstore_register(struct pstore_info *psi)
236 242
237 kmsg_dump_register(&pstore_dumper); 243 kmsg_dump_register(&pstore_dumper);
238 pstore_register_console(); 244 pstore_register_console();
245 pstore_register_ftrace();
239 246
240 if (pstore_update_ms >= 0) { 247 if (pstore_update_ms >= 0) {
241 pstore_timer.expires = jiffies + 248 pstore_timer.expires = jiffies +
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 0b311bc18916..1a4f6da58eab 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -32,6 +32,7 @@
32#include <linux/ioport.h> 32#include <linux/ioport.h>
33#include <linux/platform_device.h> 33#include <linux/platform_device.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/compiler.h>
35#include <linux/pstore_ram.h> 36#include <linux/pstore_ram.h>
36 37
37#define RAMOOPS_KERNMSG_HDR "====" 38#define RAMOOPS_KERNMSG_HDR "===="
@@ -181,12 +182,11 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
181 return len; 182 return len;
182} 183}
183 184
184 185static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
185static int ramoops_pstore_write_buf(enum pstore_type_id type, 186 enum kmsg_dump_reason reason,
186 enum kmsg_dump_reason reason, 187 u64 *id, unsigned int part,
187 u64 *id, unsigned int part, 188 const char *buf, size_t size,
188 const char *buf, size_t size, 189 struct pstore_info *psi)
189 struct pstore_info *psi)
190{ 190{
191 struct ramoops_context *cxt = psi->data; 191 struct ramoops_context *cxt = psi->data;
192 struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt]; 192 struct persistent_ram_zone *prz = cxt->przs[cxt->dump_write_cnt];
@@ -406,7 +406,7 @@ static int __devinit ramoops_probe(struct platform_device *pdev)
406 goto fail_init_fprz; 406 goto fail_init_fprz;
407 407
408 if (!cxt->przs && !cxt->cprz && !cxt->fprz) { 408 if (!cxt->przs && !cxt->cprz && !cxt->fprz) {
409 pr_err("memory size too small, minimum is %lu\n", 409 pr_err("memory size too small, minimum is %zu\n",
410 cxt->console_size + cxt->record_size + 410 cxt->console_size + cxt->record_size +
411 cxt->ftrace_size); 411 cxt->ftrace_size);
412 goto fail_cnt; 412 goto fail_cnt;
@@ -414,13 +414,14 @@ static int __devinit ramoops_probe(struct platform_device *pdev)
414 414
415 cxt->pstore.data = cxt; 415 cxt->pstore.data = cxt;
416 /* 416 /*
417 * Console can handle any buffer size, so prefer dumps buffer 417 * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we
418 * size since usually it is smaller. 418 * have to handle dumps, we must have at least record_size buffer. And
419 * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be
420 * ZERO_SIZE_PTR).
419 */ 421 */
420 if (cxt->przs) 422 if (cxt->console_size)
421 cxt->pstore.bufsize = cxt->przs[0]->buffer_size; 423 cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */
422 else 424 cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize);
423 cxt->pstore.bufsize = cxt->cprz->buffer_size;
424 cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL); 425 cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL);
425 spin_lock_init(&cxt->pstore.buf_lock); 426 spin_lock_init(&cxt->pstore.buf_lock);
426 if (!cxt->pstore.buf) { 427 if (!cxt->pstore.buf) {
@@ -537,6 +538,7 @@ postcore_initcall(ramoops_init);
537static void __exit ramoops_exit(void) 538static void __exit ramoops_exit(void)
538{ 539{
539 platform_driver_unregister(&ramoops_driver); 540 platform_driver_unregister(&ramoops_driver);
541 platform_device_unregister(dummy);
540 kfree(dummy_data); 542 kfree(dummy_data);
541} 543}
542module_exit(ramoops_exit); 544module_exit(ramoops_exit);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 552e994e3aa1..43098bb5723a 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -312,8 +312,8 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
312 (ino % QNX4_INODES_PER_BLOCK); 312 (ino % QNX4_INODES_PER_BLOCK);
313 313
314 inode->i_mode = le16_to_cpu(raw_inode->di_mode); 314 inode->i_mode = le16_to_cpu(raw_inode->di_mode);
315 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid); 315 i_uid_write(inode, (uid_t)le16_to_cpu(raw_inode->di_uid));
316 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid); 316 i_gid_write(inode, (gid_t)le16_to_cpu(raw_inode->di_gid));
317 set_nlink(inode, le16_to_cpu(raw_inode->di_nlink)); 317 set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
318 inode->i_size = le32_to_cpu(raw_inode->di_size); 318 inode->i_size = le32_to_cpu(raw_inode->di_size);
319 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); 319 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime);
@@ -391,6 +391,11 @@ static int init_inodecache(void)
391 391
392static void destroy_inodecache(void) 392static void destroy_inodecache(void)
393{ 393{
394 /*
395 * Make sure all delayed rcu free inodes are flushed before we
396 * destroy cache.
397 */
398 rcu_barrier();
394 kmem_cache_destroy(qnx4_inode_cachep); 399 kmem_cache_destroy(qnx4_inode_cachep);
395} 400}
396 401
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 2049c814bda4..b6addf560483 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -574,8 +574,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
574 raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs; 574 raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
575 575
576 inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode); 576 inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode);
577 inode->i_uid = (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid); 577 i_uid_write(inode, (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid));
578 inode->i_gid = (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid); 578 i_gid_write(inode, (gid_t)fs32_to_cpu(sbi, raw_inode->di_gid));
579 inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size); 579 inode->i_size = fs64_to_cpu(sbi, raw_inode->di_size);
580 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime); 580 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_mtime);
581 inode->i_mtime.tv_nsec = 0; 581 inode->i_mtime.tv_nsec = 0;
@@ -651,6 +651,11 @@ static int init_inodecache(void)
651 651
652static void destroy_inodecache(void) 652static void destroy_inodecache(void)
653{ 653{
654 /*
655 * Make sure all delayed rcu free inodes are flushed before we
656 * destroy cache.
657 */
658 rcu_barrier();
654 kmem_cache_destroy(qnx6_inode_cachep); 659 kmem_cache_destroy(qnx6_inode_cachep);
655} 660}
656 661
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 5f9e9e276af0..c66c37cdaa39 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -2,6 +2,6 @@ obj-$(CONFIG_QUOTA) += dquot.o
2obj-$(CONFIG_QFMT_V1) += quota_v1.o 2obj-$(CONFIG_QFMT_V1) += quota_v1.o
3obj-$(CONFIG_QFMT_V2) += quota_v2.o 3obj-$(CONFIG_QFMT_V2) += quota_v2.o
4obj-$(CONFIG_QUOTA_TREE) += quota_tree.o 4obj-$(CONFIG_QUOTA_TREE) += quota_tree.o
5obj-$(CONFIG_QUOTACTL) += quota.o 5obj-$(CONFIG_QUOTACTL) += quota.o kqid.o
6obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o 6obj-$(CONFIG_QUOTACTL_COMPAT) += compat.o
7obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o 7obj-$(CONFIG_QUOTA_NETLINK_INTERFACE) += netlink.o
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 36a29b753c79..557a9c20a215 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -253,8 +253,10 @@ static qsize_t inode_get_rsv_space(struct inode *inode);
253static void __dquot_initialize(struct inode *inode, int type); 253static void __dquot_initialize(struct inode *inode, int type);
254 254
255static inline unsigned int 255static inline unsigned int
256hashfn(const struct super_block *sb, unsigned int id, int type) 256hashfn(const struct super_block *sb, struct kqid qid)
257{ 257{
258 unsigned int id = from_kqid(&init_user_ns, qid);
259 int type = qid.type;
258 unsigned long tmp; 260 unsigned long tmp;
259 261
260 tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type); 262 tmp = (((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type);
@@ -267,7 +269,7 @@ hashfn(const struct super_block *sb, unsigned int id, int type)
267static inline void insert_dquot_hash(struct dquot *dquot) 269static inline void insert_dquot_hash(struct dquot *dquot)
268{ 270{
269 struct hlist_head *head; 271 struct hlist_head *head;
270 head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type); 272 head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id);
271 hlist_add_head(&dquot->dq_hash, head); 273 hlist_add_head(&dquot->dq_hash, head);
272} 274}
273 275
@@ -277,15 +279,14 @@ static inline void remove_dquot_hash(struct dquot *dquot)
277} 279}
278 280
279static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, 281static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
280 unsigned int id, int type) 282 struct kqid qid)
281{ 283{
282 struct hlist_node *node; 284 struct hlist_node *node;
283 struct dquot *dquot; 285 struct dquot *dquot;
284 286
285 hlist_for_each (node, dquot_hash+hashent) { 287 hlist_for_each (node, dquot_hash+hashent) {
286 dquot = hlist_entry(node, struct dquot, dq_hash); 288 dquot = hlist_entry(node, struct dquot, dq_hash);
287 if (dquot->dq_sb == sb && dquot->dq_id == id && 289 if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid))
288 dquot->dq_type == type)
289 return dquot; 290 return dquot;
290 } 291 }
291 return NULL; 292 return NULL;
@@ -351,7 +352,7 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
351 spin_lock(&dq_list_lock); 352 spin_lock(&dq_list_lock);
352 if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) { 353 if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
353 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> 354 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
354 info[dquot->dq_type].dqi_dirty_list); 355 info[dquot->dq_id.type].dqi_dirty_list);
355 ret = 0; 356 ret = 0;
356 } 357 }
357 spin_unlock(&dq_list_lock); 358 spin_unlock(&dq_list_lock);
@@ -410,17 +411,17 @@ int dquot_acquire(struct dquot *dquot)
410 mutex_lock(&dquot->dq_lock); 411 mutex_lock(&dquot->dq_lock);
411 mutex_lock(&dqopt->dqio_mutex); 412 mutex_lock(&dqopt->dqio_mutex);
412 if (!test_bit(DQ_READ_B, &dquot->dq_flags)) 413 if (!test_bit(DQ_READ_B, &dquot->dq_flags))
413 ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); 414 ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
414 if (ret < 0) 415 if (ret < 0)
415 goto out_iolock; 416 goto out_iolock;
416 set_bit(DQ_READ_B, &dquot->dq_flags); 417 set_bit(DQ_READ_B, &dquot->dq_flags);
417 /* Instantiate dquot if needed */ 418 /* Instantiate dquot if needed */
418 if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) { 419 if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) {
419 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); 420 ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
420 /* Write the info if needed */ 421 /* Write the info if needed */
421 if (info_dirty(&dqopt->info[dquot->dq_type])) { 422 if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
422 ret2 = dqopt->ops[dquot->dq_type]->write_file_info( 423 ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info(
423 dquot->dq_sb, dquot->dq_type); 424 dquot->dq_sb, dquot->dq_id.type);
424 } 425 }
425 if (ret < 0) 426 if (ret < 0)
426 goto out_iolock; 427 goto out_iolock;
@@ -455,7 +456,7 @@ int dquot_commit(struct dquot *dquot)
455 /* Inactive dquot can be only if there was error during read/init 456 /* Inactive dquot can be only if there was error during read/init
456 * => we have better not writing it */ 457 * => we have better not writing it */
457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) 458 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); 459 ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
459 else 460 else
460 ret = -EIO; 461 ret = -EIO;
461out_sem: 462out_sem:
@@ -477,12 +478,12 @@ int dquot_release(struct dquot *dquot)
477 if (atomic_read(&dquot->dq_count) > 1) 478 if (atomic_read(&dquot->dq_count) > 1)
478 goto out_dqlock; 479 goto out_dqlock;
479 mutex_lock(&dqopt->dqio_mutex); 480 mutex_lock(&dqopt->dqio_mutex);
480 if (dqopt->ops[dquot->dq_type]->release_dqblk) { 481 if (dqopt->ops[dquot->dq_id.type]->release_dqblk) {
481 ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); 482 ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot);
482 /* Write the info */ 483 /* Write the info */
483 if (info_dirty(&dqopt->info[dquot->dq_type])) { 484 if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
484 ret2 = dqopt->ops[dquot->dq_type]->write_file_info( 485 ret2 = dqopt->ops[dquot->dq_id.type]->write_file_info(
485 dquot->dq_sb, dquot->dq_type); 486 dquot->dq_sb, dquot->dq_id.type);
486 } 487 }
487 if (ret >= 0) 488 if (ret >= 0)
488 ret = ret2; 489 ret = ret2;
@@ -521,7 +522,7 @@ restart:
521 list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) { 522 list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
522 if (dquot->dq_sb != sb) 523 if (dquot->dq_sb != sb)
523 continue; 524 continue;
524 if (dquot->dq_type != type) 525 if (dquot->dq_id.type != type)
525 continue; 526 continue;
526 /* Wait for dquot users */ 527 /* Wait for dquot users */
527 if (atomic_read(&dquot->dq_count)) { 528 if (atomic_read(&dquot->dq_count)) {
@@ -741,7 +742,8 @@ void dqput(struct dquot *dquot)
741#ifdef CONFIG_QUOTA_DEBUG 742#ifdef CONFIG_QUOTA_DEBUG
742 if (!atomic_read(&dquot->dq_count)) { 743 if (!atomic_read(&dquot->dq_count)) {
743 quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", 744 quota_error(dquot->dq_sb, "trying to free free dquot of %s %d",
744 quotatypes[dquot->dq_type], dquot->dq_id); 745 quotatypes[dquot->dq_id.type],
746 from_kqid(&init_user_ns, dquot->dq_id));
745 BUG(); 747 BUG();
746 } 748 }
747#endif 749#endif
@@ -752,7 +754,7 @@ we_slept:
752 /* We have more than one user... nothing to do */ 754 /* We have more than one user... nothing to do */
753 atomic_dec(&dquot->dq_count); 755 atomic_dec(&dquot->dq_count);
754 /* Releasing dquot during quotaoff phase? */ 756 /* Releasing dquot during quotaoff phase? */
755 if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) && 757 if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_id.type) &&
756 atomic_read(&dquot->dq_count) == 1) 758 atomic_read(&dquot->dq_count) == 1)
757 wake_up(&dquot->dq_wait_unused); 759 wake_up(&dquot->dq_wait_unused);
758 spin_unlock(&dq_list_lock); 760 spin_unlock(&dq_list_lock);
@@ -815,7 +817,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
815 INIT_LIST_HEAD(&dquot->dq_dirty); 817 INIT_LIST_HEAD(&dquot->dq_dirty);
816 init_waitqueue_head(&dquot->dq_wait_unused); 818 init_waitqueue_head(&dquot->dq_wait_unused);
817 dquot->dq_sb = sb; 819 dquot->dq_sb = sb;
818 dquot->dq_type = type; 820 dquot->dq_id = make_kqid_invalid(type);
819 atomic_set(&dquot->dq_count, 1); 821 atomic_set(&dquot->dq_count, 1);
820 822
821 return dquot; 823 return dquot;
@@ -829,35 +831,35 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
829 * a) checking for quota flags under dq_list_lock and 831 * a) checking for quota flags under dq_list_lock and
830 * b) getting a reference to dquot before we release dq_list_lock 832 * b) getting a reference to dquot before we release dq_list_lock
831 */ 833 */
832struct dquot *dqget(struct super_block *sb, unsigned int id, int type) 834struct dquot *dqget(struct super_block *sb, struct kqid qid)
833{ 835{
834 unsigned int hashent = hashfn(sb, id, type); 836 unsigned int hashent = hashfn(sb, qid);
835 struct dquot *dquot = NULL, *empty = NULL; 837 struct dquot *dquot = NULL, *empty = NULL;
836 838
837 if (!sb_has_quota_active(sb, type)) 839 if (!sb_has_quota_active(sb, qid.type))
838 return NULL; 840 return NULL;
839we_slept: 841we_slept:
840 spin_lock(&dq_list_lock); 842 spin_lock(&dq_list_lock);
841 spin_lock(&dq_state_lock); 843 spin_lock(&dq_state_lock);
842 if (!sb_has_quota_active(sb, type)) { 844 if (!sb_has_quota_active(sb, qid.type)) {
843 spin_unlock(&dq_state_lock); 845 spin_unlock(&dq_state_lock);
844 spin_unlock(&dq_list_lock); 846 spin_unlock(&dq_list_lock);
845 goto out; 847 goto out;
846 } 848 }
847 spin_unlock(&dq_state_lock); 849 spin_unlock(&dq_state_lock);
848 850
849 dquot = find_dquot(hashent, sb, id, type); 851 dquot = find_dquot(hashent, sb, qid);
850 if (!dquot) { 852 if (!dquot) {
851 if (!empty) { 853 if (!empty) {
852 spin_unlock(&dq_list_lock); 854 spin_unlock(&dq_list_lock);
853 empty = get_empty_dquot(sb, type); 855 empty = get_empty_dquot(sb, qid.type);
854 if (!empty) 856 if (!empty)
855 schedule(); /* Try to wait for a moment... */ 857 schedule(); /* Try to wait for a moment... */
856 goto we_slept; 858 goto we_slept;
857 } 859 }
858 dquot = empty; 860 dquot = empty;
859 empty = NULL; 861 empty = NULL;
860 dquot->dq_id = id; 862 dquot->dq_id = qid;
861 /* all dquots go on the inuse_list */ 863 /* all dquots go on the inuse_list */
862 put_inuse(dquot); 864 put_inuse(dquot);
863 /* hash it first so it can be found */ 865 /* hash it first so it can be found */
@@ -1129,8 +1131,7 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number)
1129 1131
1130struct dquot_warn { 1132struct dquot_warn {
1131 struct super_block *w_sb; 1133 struct super_block *w_sb;
1132 qid_t w_dq_id; 1134 struct kqid w_dq_id;
1133 short w_dq_type;
1134 short w_type; 1135 short w_type;
1135}; 1136};
1136 1137
@@ -1154,11 +1155,11 @@ static int need_print_warning(struct dquot_warn *warn)
1154 if (!flag_print_warnings) 1155 if (!flag_print_warnings)
1155 return 0; 1156 return 0;
1156 1157
1157 switch (warn->w_dq_type) { 1158 switch (warn->w_dq_id.type) {
1158 case USRQUOTA: 1159 case USRQUOTA:
1159 return current_fsuid() == warn->w_dq_id; 1160 return uid_eq(current_fsuid(), warn->w_dq_id.uid);
1160 case GRPQUOTA: 1161 case GRPQUOTA:
1161 return in_group_p(warn->w_dq_id); 1162 return in_group_p(warn->w_dq_id.gid);
1162 } 1163 }
1163 return 0; 1164 return 0;
1164} 1165}
@@ -1184,7 +1185,7 @@ static void print_warning(struct dquot_warn *warn)
1184 tty_write_message(tty, ": warning, "); 1185 tty_write_message(tty, ": warning, ");
1185 else 1186 else
1186 tty_write_message(tty, ": write failed, "); 1187 tty_write_message(tty, ": write failed, ");
1187 tty_write_message(tty, quotatypes[warn->w_dq_type]); 1188 tty_write_message(tty, quotatypes[warn->w_dq_id.type]);
1188 switch (warntype) { 1189 switch (warntype) {
1189 case QUOTA_NL_IHARDWARN: 1190 case QUOTA_NL_IHARDWARN:
1190 msg = " file limit reached.\r\n"; 1191 msg = " file limit reached.\r\n";
@@ -1218,7 +1219,6 @@ static void prepare_warning(struct dquot_warn *warn, struct dquot *dquot,
1218 warn->w_type = warntype; 1219 warn->w_type = warntype;
1219 warn->w_sb = dquot->dq_sb; 1220 warn->w_sb = dquot->dq_sb;
1220 warn->w_dq_id = dquot->dq_id; 1221 warn->w_dq_id = dquot->dq_id;
1221 warn->w_dq_type = dquot->dq_type;
1222} 1222}
1223 1223
1224/* 1224/*
@@ -1236,14 +1236,14 @@ static void flush_warnings(struct dquot_warn *warn)
1236#ifdef CONFIG_PRINT_QUOTA_WARNING 1236#ifdef CONFIG_PRINT_QUOTA_WARNING
1237 print_warning(&warn[i]); 1237 print_warning(&warn[i]);
1238#endif 1238#endif
1239 quota_send_warning(warn[i].w_dq_type, warn[i].w_dq_id, 1239 quota_send_warning(warn[i].w_dq_id,
1240 warn[i].w_sb->s_dev, warn[i].w_type); 1240 warn[i].w_sb->s_dev, warn[i].w_type);
1241 } 1241 }
1242} 1242}
1243 1243
1244static int ignore_hardlimit(struct dquot *dquot) 1244static int ignore_hardlimit(struct dquot *dquot)
1245{ 1245{
1246 struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; 1246 struct mem_dqinfo *info = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
1247 1247
1248 return capable(CAP_SYS_RESOURCE) && 1248 return capable(CAP_SYS_RESOURCE) &&
1249 (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || 1249 (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
@@ -1256,7 +1256,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
1256{ 1256{
1257 qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; 1257 qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
1258 1258
1259 if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || 1259 if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) ||
1260 test_bit(DQ_FAKE_B, &dquot->dq_flags)) 1260 test_bit(DQ_FAKE_B, &dquot->dq_flags))
1261 return 0; 1261 return 0;
1262 1262
@@ -1281,7 +1281,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
1281 dquot->dq_dqb.dqb_itime == 0) { 1281 dquot->dq_dqb.dqb_itime == 0) {
1282 prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN); 1282 prepare_warning(warn, dquot, QUOTA_NL_ISOFTWARN);
1283 dquot->dq_dqb.dqb_itime = get_seconds() + 1283 dquot->dq_dqb.dqb_itime = get_seconds() +
1284 sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; 1284 sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace;
1285 } 1285 }
1286 1286
1287 return 0; 1287 return 0;
@@ -1294,7 +1294,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
1294 qsize_t tspace; 1294 qsize_t tspace;
1295 struct super_block *sb = dquot->dq_sb; 1295 struct super_block *sb = dquot->dq_sb;
1296 1296
1297 if (!sb_has_quota_limits_enabled(sb, dquot->dq_type) || 1297 if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
1298 test_bit(DQ_FAKE_B, &dquot->dq_flags)) 1298 test_bit(DQ_FAKE_B, &dquot->dq_flags))
1299 return 0; 1299 return 0;
1300 1300
@@ -1325,7 +1325,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
1325 if (!prealloc) { 1325 if (!prealloc) {
1326 prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); 1326 prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
1327 dquot->dq_dqb.dqb_btime = get_seconds() + 1327 dquot->dq_dqb.dqb_btime = get_seconds() +
1328 sb_dqopt(sb)->info[dquot->dq_type].dqi_bgrace; 1328 sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace;
1329 } 1329 }
1330 else 1330 else
1331 /* 1331 /*
@@ -1344,7 +1344,7 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes)
1344 1344
1345 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || 1345 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1346 dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || 1346 dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
1347 !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type)) 1347 !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type))
1348 return QUOTA_NL_NOWARN; 1348 return QUOTA_NL_NOWARN;
1349 1349
1350 newinodes = dquot->dq_dqb.dqb_curinodes - inodes; 1350 newinodes = dquot->dq_dqb.dqb_curinodes - inodes;
@@ -1390,7 +1390,6 @@ static int dquot_active(const struct inode *inode)
1390 */ 1390 */
1391static void __dquot_initialize(struct inode *inode, int type) 1391static void __dquot_initialize(struct inode *inode, int type)
1392{ 1392{
1393 unsigned int id = 0;
1394 int cnt; 1393 int cnt;
1395 struct dquot *got[MAXQUOTAS]; 1394 struct dquot *got[MAXQUOTAS];
1396 struct super_block *sb = inode->i_sb; 1395 struct super_block *sb = inode->i_sb;
@@ -1403,18 +1402,19 @@ static void __dquot_initialize(struct inode *inode, int type)
1403 1402
1404 /* First get references to structures we might need. */ 1403 /* First get references to structures we might need. */
1405 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1404 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1405 struct kqid qid;
1406 got[cnt] = NULL; 1406 got[cnt] = NULL;
1407 if (type != -1 && cnt != type) 1407 if (type != -1 && cnt != type)
1408 continue; 1408 continue;
1409 switch (cnt) { 1409 switch (cnt) {
1410 case USRQUOTA: 1410 case USRQUOTA:
1411 id = inode->i_uid; 1411 qid = make_kqid_uid(inode->i_uid);
1412 break; 1412 break;
1413 case GRPQUOTA: 1413 case GRPQUOTA:
1414 id = inode->i_gid; 1414 qid = make_kqid_gid(inode->i_gid);
1415 break; 1415 break;
1416 } 1416 }
1417 got[cnt] = dqget(sb, id, cnt); 1417 got[cnt] = dqget(sb, qid);
1418 } 1418 }
1419 1419
1420 down_write(&sb_dqopt(sb)->dqptr_sem); 1420 down_write(&sb_dqopt(sb)->dqptr_sem);
@@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1589 goto out; 1589 goto out;
1590 } 1590 }
1591 1591
1592 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1593 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1592 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1594 warn[cnt].w_type = QUOTA_NL_NOWARN; 1593 warn[cnt].w_type = QUOTA_NL_NOWARN;
1595 1594
1595 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1596 spin_lock(&dq_data_lock); 1596 spin_lock(&dq_data_lock);
1597 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1597 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1598 if (!dquots[cnt]) 1598 if (!dquots[cnt])
@@ -1897,10 +1897,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1897 if (!dquot_active(inode)) 1897 if (!dquot_active(inode))
1898 return 0; 1898 return 0;
1899 1899
1900 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) 1900 if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid))
1901 transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA); 1901 transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid));
1902 if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) 1902 if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))
1903 transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA); 1903 transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid));
1904 1904
1905 ret = __dquot_transfer(inode, transfer_to); 1905 ret = __dquot_transfer(inode, transfer_to);
1906 dqput_all(transfer_to); 1906 dqput_all(transfer_to);
@@ -2360,9 +2360,9 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2360 2360
2361 memset(di, 0, sizeof(*di)); 2361 memset(di, 0, sizeof(*di));
2362 di->d_version = FS_DQUOT_VERSION; 2362 di->d_version = FS_DQUOT_VERSION;
2363 di->d_flags = dquot->dq_type == USRQUOTA ? 2363 di->d_flags = dquot->dq_id.type == USRQUOTA ?
2364 FS_USER_QUOTA : FS_GROUP_QUOTA; 2364 FS_USER_QUOTA : FS_GROUP_QUOTA;
2365 di->d_id = dquot->dq_id; 2365 di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id);
2366 2366
2367 spin_lock(&dq_data_lock); 2367 spin_lock(&dq_data_lock);
2368 di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); 2368 di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
@@ -2376,12 +2376,12 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2376 spin_unlock(&dq_data_lock); 2376 spin_unlock(&dq_data_lock);
2377} 2377}
2378 2378
2379int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, 2379int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
2380 struct fs_disk_quota *di) 2380 struct fs_disk_quota *di)
2381{ 2381{
2382 struct dquot *dquot; 2382 struct dquot *dquot;
2383 2383
2384 dquot = dqget(sb, id, type); 2384 dquot = dqget(sb, qid);
2385 if (!dquot) 2385 if (!dquot)
2386 return -ESRCH; 2386 return -ESRCH;
2387 do_get_dqblk(dquot, di); 2387 do_get_dqblk(dquot, di);
@@ -2401,7 +2401,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2401{ 2401{
2402 struct mem_dqblk *dm = &dquot->dq_dqb; 2402 struct mem_dqblk *dm = &dquot->dq_dqb;
2403 int check_blim = 0, check_ilim = 0; 2403 int check_blim = 0, check_ilim = 0;
2404 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; 2404 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type];
2405 2405
2406 if (di->d_fieldmask & ~VFS_FS_DQ_MASK) 2406 if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
2407 return -EINVAL; 2407 return -EINVAL;
@@ -2488,13 +2488,13 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2488 return 0; 2488 return 0;
2489} 2489}
2490 2490
2491int dquot_set_dqblk(struct super_block *sb, int type, qid_t id, 2491int dquot_set_dqblk(struct super_block *sb, struct kqid qid,
2492 struct fs_disk_quota *di) 2492 struct fs_disk_quota *di)
2493{ 2493{
2494 struct dquot *dquot; 2494 struct dquot *dquot;
2495 int rc; 2495 int rc;
2496 2496
2497 dquot = dqget(sb, id, type); 2497 dquot = dqget(sb, qid);
2498 if (!dquot) { 2498 if (!dquot) {
2499 rc = -ESRCH; 2499 rc = -ESRCH;
2500 goto out; 2500 goto out;
diff --git a/fs/quota/kqid.c b/fs/quota/kqid.c
new file mode 100644
index 000000000000..2f97b0e2c501
--- /dev/null
+++ b/fs/quota/kqid.c
@@ -0,0 +1,132 @@
1#include <linux/fs.h>
2#include <linux/quota.h>
3#include <linux/export.h>
4
5/**
6 * qid_eq - Test to see if to kquid values are the same
7 * @left: A qid value
8 * @right: Another quid value
9 *
10 * Return true if the two qid values are equal and false otherwise.
11 */
12bool qid_eq(struct kqid left, struct kqid right)
13{
14 if (left.type != right.type)
15 return false;
16 switch(left.type) {
17 case USRQUOTA:
18 return uid_eq(left.uid, right.uid);
19 case GRPQUOTA:
20 return gid_eq(left.gid, right.gid);
21 case PRJQUOTA:
22 return projid_eq(left.projid, right.projid);
23 default:
24 BUG();
25 }
26}
27EXPORT_SYMBOL(qid_eq);
28
29/**
30 * qid_lt - Test to see if one qid value is less than another
31 * @left: The possibly lesser qid value
32 * @right: The possibly greater qid value
33 *
34 * Return true if left is less than right and false otherwise.
35 */
36bool qid_lt(struct kqid left, struct kqid right)
37{
38 if (left.type < right.type)
39 return true;
40 if (left.type > right.type)
41 return false;
42 switch (left.type) {
43 case USRQUOTA:
44 return uid_lt(left.uid, right.uid);
45 case GRPQUOTA:
46 return gid_lt(left.gid, right.gid);
47 case PRJQUOTA:
48 return projid_lt(left.projid, right.projid);
49 default:
50 BUG();
51 }
52}
53EXPORT_SYMBOL(qid_lt);
54
55/**
56 * from_kqid - Create a qid from a kqid user-namespace pair.
57 * @targ: The user namespace we want a qid in.
58 * @kuid: The kernel internal quota identifier to start with.
59 *
60 * Map @kqid into the user-namespace specified by @targ and
61 * return the resulting qid.
62 *
63 * There is always a mapping into the initial user_namespace.
64 *
65 * If @kqid has no mapping in @targ (qid_t)-1 is returned.
66 */
67qid_t from_kqid(struct user_namespace *targ, struct kqid kqid)
68{
69 switch (kqid.type) {
70 case USRQUOTA:
71 return from_kuid(targ, kqid.uid);
72 case GRPQUOTA:
73 return from_kgid(targ, kqid.gid);
74 case PRJQUOTA:
75 return from_kprojid(targ, kqid.projid);
76 default:
77 BUG();
78 }
79}
80EXPORT_SYMBOL(from_kqid);
81
82/**
83 * from_kqid_munged - Create a qid from a kqid user-namespace pair.
84 * @targ: The user namespace we want a qid in.
85 * @kqid: The kernel internal quota identifier to start with.
86 *
87 * Map @kqid into the user-namespace specified by @targ and
88 * return the resulting qid.
89 *
90 * There is always a mapping into the initial user_namespace.
91 *
92 * Unlike from_kqid from_kqid_munged never fails and always
93 * returns a valid projid. This makes from_kqid_munged
94 * appropriate for use in places where failing to provide
95 * a qid_t is not a good option.
96 *
97 * If @kqid has no mapping in @targ the kqid.type specific
98 * overflow identifier is returned.
99 */
100qid_t from_kqid_munged(struct user_namespace *targ, struct kqid kqid)
101{
102 switch (kqid.type) {
103 case USRQUOTA:
104 return from_kuid_munged(targ, kqid.uid);
105 case GRPQUOTA:
106 return from_kgid_munged(targ, kqid.gid);
107 case PRJQUOTA:
108 return from_kprojid_munged(targ, kqid.projid);
109 default:
110 BUG();
111 }
112}
113EXPORT_SYMBOL(from_kqid_munged);
114
115/**
116 * qid_valid - Report if a valid value is stored in a kqid.
117 * @qid: The kernel internal quota identifier to test.
118 */
119bool qid_valid(struct kqid qid)
120{
121 switch (qid.type) {
122 case USRQUOTA:
123 return uid_valid(qid.uid);
124 case GRPQUOTA:
125 return gid_valid(qid.gid);
126 case PRJQUOTA:
127 return projid_valid(qid.projid);
128 default:
129 BUG();
130 }
131}
132EXPORT_SYMBOL(qid_valid);
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index d67908b407d9..16e8abb7709b 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -30,7 +30,7 @@ static struct genl_family quota_genl_family = {
30 * 30 *
31 */ 31 */
32 32
33void quota_send_warning(short type, unsigned int id, dev_t dev, 33void quota_send_warning(struct kqid qid, dev_t dev,
34 const char warntype) 34 const char warntype)
35{ 35{
36 static atomic_t seq; 36 static atomic_t seq;
@@ -56,10 +56,11 @@ void quota_send_warning(short type, unsigned int id, dev_t dev,
56 "VFS: Cannot store netlink header in quota warning.\n"); 56 "VFS: Cannot store netlink header in quota warning.\n");
57 goto err_out; 57 goto err_out;
58 } 58 }
59 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); 59 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, qid.type);
60 if (ret) 60 if (ret)
61 goto attr_err_out; 61 goto attr_err_out;
62 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); 62 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID,
63 from_kqid_munged(&init_user_ns, qid));
63 if (ret) 64 if (ret)
64 goto attr_err_out; 65 goto attr_err_out;
65 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); 66 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
@@ -71,7 +72,8 @@ void quota_send_warning(short type, unsigned int id, dev_t dev,
71 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); 72 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
72 if (ret) 73 if (ret)
73 goto attr_err_out; 74 goto attr_err_out;
74 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); 75 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID,
76 from_kuid_munged(&init_user_ns, current_uid()));
75 if (ret) 77 if (ret)
76 goto attr_err_out; 78 goto attr_err_out;
77 genlmsg_end(skb, msg_head); 79 genlmsg_end(skb, msg_head);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 6f155788cbc6..ff0135d6bc51 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -32,8 +32,8 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
32 /* allow to query information for dquots we "own" */ 32 /* allow to query information for dquots we "own" */
33 case Q_GETQUOTA: 33 case Q_GETQUOTA:
34 case Q_XGETQUOTA: 34 case Q_XGETQUOTA:
35 if ((type == USRQUOTA && current_euid() == id) || 35 if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) ||
36 (type == GRPQUOTA && in_egroup_p(id))) 36 (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id))))
37 break; 37 break;
38 /*FALLTHROUGH*/ 38 /*FALLTHROUGH*/
39 default: 39 default:
@@ -130,13 +130,17 @@ static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
130static int quota_getquota(struct super_block *sb, int type, qid_t id, 130static int quota_getquota(struct super_block *sb, int type, qid_t id,
131 void __user *addr) 131 void __user *addr)
132{ 132{
133 struct kqid qid;
133 struct fs_disk_quota fdq; 134 struct fs_disk_quota fdq;
134 struct if_dqblk idq; 135 struct if_dqblk idq;
135 int ret; 136 int ret;
136 137
137 if (!sb->s_qcop->get_dqblk) 138 if (!sb->s_qcop->get_dqblk)
138 return -ENOSYS; 139 return -ENOSYS;
139 ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); 140 qid = make_kqid(current_user_ns(), type, id);
141 if (!qid_valid(qid))
142 return -EINVAL;
143 ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
140 if (ret) 144 if (ret)
141 return ret; 145 return ret;
142 copy_to_if_dqblk(&idq, &fdq); 146 copy_to_if_dqblk(&idq, &fdq);
@@ -176,13 +180,17 @@ static int quota_setquota(struct super_block *sb, int type, qid_t id,
176{ 180{
177 struct fs_disk_quota fdq; 181 struct fs_disk_quota fdq;
178 struct if_dqblk idq; 182 struct if_dqblk idq;
183 struct kqid qid;
179 184
180 if (copy_from_user(&idq, addr, sizeof(idq))) 185 if (copy_from_user(&idq, addr, sizeof(idq)))
181 return -EFAULT; 186 return -EFAULT;
182 if (!sb->s_qcop->set_dqblk) 187 if (!sb->s_qcop->set_dqblk)
183 return -ENOSYS; 188 return -ENOSYS;
189 qid = make_kqid(current_user_ns(), type, id);
190 if (!qid_valid(qid))
191 return -EINVAL;
184 copy_from_if_dqblk(&fdq, &idq); 192 copy_from_if_dqblk(&fdq, &idq);
185 return sb->s_qcop->set_dqblk(sb, type, id, &fdq); 193 return sb->s_qcop->set_dqblk(sb, qid, &fdq);
186} 194}
187 195
188static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) 196static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
@@ -213,23 +221,31 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
213 void __user *addr) 221 void __user *addr)
214{ 222{
215 struct fs_disk_quota fdq; 223 struct fs_disk_quota fdq;
224 struct kqid qid;
216 225
217 if (copy_from_user(&fdq, addr, sizeof(fdq))) 226 if (copy_from_user(&fdq, addr, sizeof(fdq)))
218 return -EFAULT; 227 return -EFAULT;
219 if (!sb->s_qcop->set_dqblk) 228 if (!sb->s_qcop->set_dqblk)
220 return -ENOSYS; 229 return -ENOSYS;
221 return sb->s_qcop->set_dqblk(sb, type, id, &fdq); 230 qid = make_kqid(current_user_ns(), type, id);
231 if (!qid_valid(qid))
232 return -EINVAL;
233 return sb->s_qcop->set_dqblk(sb, qid, &fdq);
222} 234}
223 235
224static int quota_getxquota(struct super_block *sb, int type, qid_t id, 236static int quota_getxquota(struct super_block *sb, int type, qid_t id,
225 void __user *addr) 237 void __user *addr)
226{ 238{
227 struct fs_disk_quota fdq; 239 struct fs_disk_quota fdq;
240 struct kqid qid;
228 int ret; 241 int ret;
229 242
230 if (!sb->s_qcop->get_dqblk) 243 if (!sb->s_qcop->get_dqblk)
231 return -ENOSYS; 244 return -ENOSYS;
232 ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq); 245 qid = make_kqid(current_user_ns(), type, id);
246 if (!qid_valid(qid))
247 return -EINVAL;
248 ret = sb->s_qcop->get_dqblk(sb, qid, &fdq);
233 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) 249 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
234 return -EFAULT; 250 return -EFAULT;
235 return ret; 251 return ret;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index e41c1becf096..d65877fbe8f4 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -22,9 +22,10 @@ MODULE_LICENSE("GPL");
22 22
23#define __QUOTA_QT_PARANOIA 23#define __QUOTA_QT_PARANOIA
24 24
25static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth) 25static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
26{ 26{
27 unsigned int epb = info->dqi_usable_bs >> 2; 27 unsigned int epb = info->dqi_usable_bs >> 2;
28 qid_t id = from_kqid(&init_user_ns, qid);
28 29
29 depth = info->dqi_qtree_depth - depth - 1; 30 depth = info->dqi_qtree_depth - depth - 1;
30 while (depth--) 31 while (depth--)
@@ -244,7 +245,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
244 /* This is enough as the block is already zeroed and the entry 245 /* This is enough as the block is already zeroed and the entry
245 * list is empty... */ 246 * list is empty... */
246 info->dqi_free_entry = blk; 247 info->dqi_free_entry = blk;
247 mark_info_dirty(dquot->dq_sb, dquot->dq_type); 248 mark_info_dirty(dquot->dq_sb, dquot->dq_id.type);
248 } 249 }
249 /* Block will be full? */ 250 /* Block will be full? */
250 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { 251 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
@@ -357,7 +358,7 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
357 */ 358 */
358int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) 359int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
359{ 360{
360 int type = dquot->dq_type; 361 int type = dquot->dq_id.type;
361 struct super_block *sb = dquot->dq_sb; 362 struct super_block *sb = dquot->dq_sb;
362 ssize_t ret; 363 ssize_t ret;
363 char *ddquot = getdqbuf(info->dqi_entry_size); 364 char *ddquot = getdqbuf(info->dqi_entry_size);
@@ -538,8 +539,9 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
538 ddquot += info->dqi_entry_size; 539 ddquot += info->dqi_entry_size;
539 } 540 }
540 if (i == qtree_dqstr_in_blk(info)) { 541 if (i == qtree_dqstr_in_blk(info)) {
541 quota_error(dquot->dq_sb, "Quota for id %u referenced " 542 quota_error(dquot->dq_sb,
542 "but not present", dquot->dq_id); 543 "Quota for id %u referenced but not present",
544 from_kqid(&init_user_ns, dquot->dq_id));
543 ret = -EIO; 545 ret = -EIO;
544 goto out_buf; 546 goto out_buf;
545 } else { 547 } else {
@@ -589,7 +591,7 @@ static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
589 591
590int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) 592int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
591{ 593{
592 int type = dquot->dq_type; 594 int type = dquot->dq_id.type;
593 struct super_block *sb = dquot->dq_sb; 595 struct super_block *sb = dquot->dq_sb;
594 loff_t offset; 596 loff_t offset;
595 char *ddquot; 597 char *ddquot;
@@ -607,8 +609,10 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
607 offset = find_dqentry(info, dquot); 609 offset = find_dqentry(info, dquot);
608 if (offset <= 0) { /* Entry not present? */ 610 if (offset <= 0) { /* Entry not present? */
609 if (offset < 0) 611 if (offset < 0)
610 quota_error(sb, "Can't read quota structure " 612 quota_error(sb,"Can't read quota structure "
611 "for id %u", dquot->dq_id); 613 "for id %u",
614 from_kqid(&init_user_ns,
615 dquot->dq_id));
612 dquot->dq_off = 0; 616 dquot->dq_off = 0;
613 set_bit(DQ_FAKE_B, &dquot->dq_flags); 617 set_bit(DQ_FAKE_B, &dquot->dq_flags);
614 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); 618 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
@@ -626,7 +630,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
626 if (ret >= 0) 630 if (ret >= 0)
627 ret = -EIO; 631 ret = -EIO;
628 quota_error(sb, "Error while reading quota structure for id %u", 632 quota_error(sb, "Error while reading quota structure for id %u",
629 dquot->dq_id); 633 from_kqid(&init_user_ns, dquot->dq_id));
630 set_bit(DQ_FAKE_B, &dquot->dq_flags); 634 set_bit(DQ_FAKE_B, &dquot->dq_flags);
631 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); 635 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
632 kfree(ddquot); 636 kfree(ddquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 34b37a67bb16..469c6848b322 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -54,7 +54,7 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
54 54
55static int v1_read_dqblk(struct dquot *dquot) 55static int v1_read_dqblk(struct dquot *dquot)
56{ 56{
57 int type = dquot->dq_type; 57 int type = dquot->dq_id.type;
58 struct v1_disk_dqblk dqblk; 58 struct v1_disk_dqblk dqblk;
59 59
60 if (!sb_dqopt(dquot->dq_sb)->files[type]) 60 if (!sb_dqopt(dquot->dq_sb)->files[type])
@@ -63,7 +63,8 @@ static int v1_read_dqblk(struct dquot *dquot)
63 /* Set structure to 0s in case read fails/is after end of file */ 63 /* Set structure to 0s in case read fails/is after end of file */
64 memset(&dqblk, 0, sizeof(struct v1_disk_dqblk)); 64 memset(&dqblk, 0, sizeof(struct v1_disk_dqblk));
65 dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk, 65 dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, (char *)&dqblk,
66 sizeof(struct v1_disk_dqblk), v1_dqoff(dquot->dq_id)); 66 sizeof(struct v1_disk_dqblk),
67 v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id)));
67 68
68 v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); 69 v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk);
69 if (dquot->dq_dqb.dqb_bhardlimit == 0 && 70 if (dquot->dq_dqb.dqb_bhardlimit == 0 &&
@@ -78,12 +79,13 @@ static int v1_read_dqblk(struct dquot *dquot)
78 79
79static int v1_commit_dqblk(struct dquot *dquot) 80static int v1_commit_dqblk(struct dquot *dquot)
80{ 81{
81 short type = dquot->dq_type; 82 short type = dquot->dq_id.type;
82 ssize_t ret; 83 ssize_t ret;
83 struct v1_disk_dqblk dqblk; 84 struct v1_disk_dqblk dqblk;
84 85
85 v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); 86 v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb);
86 if (dquot->dq_id == 0) { 87 if (((type == USRQUOTA) && uid_eq(dquot->dq_id.uid, GLOBAL_ROOT_UID)) ||
88 ((type == GRPQUOTA) && gid_eq(dquot->dq_id.gid, GLOBAL_ROOT_GID))) {
87 dqblk.dqb_btime = 89 dqblk.dqb_btime =
88 sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; 90 sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace;
89 dqblk.dqb_itime = 91 dqblk.dqb_itime =
@@ -93,7 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
93 if (sb_dqopt(dquot->dq_sb)->files[type]) 95 if (sb_dqopt(dquot->dq_sb)->files[type])
94 ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, 96 ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
95 (char *)&dqblk, sizeof(struct v1_disk_dqblk), 97 (char *)&dqblk, sizeof(struct v1_disk_dqblk),
96 v1_dqoff(dquot->dq_id)); 98 v1_dqoff(from_kqid(&init_user_ns, dquot->dq_id)));
97 if (ret != sizeof(struct v1_disk_dqblk)) { 99 if (ret != sizeof(struct v1_disk_dqblk)) {
98 quota_error(dquot->dq_sb, "dquota write failed"); 100 quota_error(dquot->dq_sb, "dquota write failed");
99 if (ret >= 0) 101 if (ret >= 0)
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index f1ab3604db5a..02751ec695c5 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -196,7 +196,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
196 struct v2r0_disk_dqblk *d = dp; 196 struct v2r0_disk_dqblk *d = dp;
197 struct mem_dqblk *m = &dquot->dq_dqb; 197 struct mem_dqblk *m = &dquot->dq_dqb;
198 struct qtree_mem_dqinfo *info = 198 struct qtree_mem_dqinfo *info =
199 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 199 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
200 200
201 d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); 201 d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
202 d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); 202 d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
@@ -206,7 +206,7 @@ static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
206 d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit)); 206 d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
207 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 207 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
208 d->dqb_btime = cpu_to_le64(m->dqb_btime); 208 d->dqb_btime = cpu_to_le64(m->dqb_btime);
209 d->dqb_id = cpu_to_le32(dquot->dq_id); 209 d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
210 if (qtree_entry_unused(info, dp)) 210 if (qtree_entry_unused(info, dp))
211 d->dqb_itime = cpu_to_le64(1); 211 d->dqb_itime = cpu_to_le64(1);
212} 212}
@@ -215,11 +215,13 @@ static int v2r0_is_id(void *dp, struct dquot *dquot)
215{ 215{
216 struct v2r0_disk_dqblk *d = dp; 216 struct v2r0_disk_dqblk *d = dp;
217 struct qtree_mem_dqinfo *info = 217 struct qtree_mem_dqinfo *info =
218 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 218 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
219 219
220 if (qtree_entry_unused(info, dp)) 220 if (qtree_entry_unused(info, dp))
221 return 0; 221 return 0;
222 return le32_to_cpu(d->dqb_id) == dquot->dq_id; 222 return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type,
223 le32_to_cpu(d->dqb_id)),
224 dquot->dq_id);
223} 225}
224 226
225static void v2r1_disk2memdqb(struct dquot *dquot, void *dp) 227static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
@@ -247,7 +249,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
247 struct v2r1_disk_dqblk *d = dp; 249 struct v2r1_disk_dqblk *d = dp;
248 struct mem_dqblk *m = &dquot->dq_dqb; 250 struct mem_dqblk *m = &dquot->dq_dqb;
249 struct qtree_mem_dqinfo *info = 251 struct qtree_mem_dqinfo *info =
250 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 252 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
251 253
252 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); 254 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
253 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); 255 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
@@ -257,7 +259,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
257 d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit)); 259 d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
258 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 260 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
259 d->dqb_btime = cpu_to_le64(m->dqb_btime); 261 d->dqb_btime = cpu_to_le64(m->dqb_btime);
260 d->dqb_id = cpu_to_le32(dquot->dq_id); 262 d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
261 if (qtree_entry_unused(info, dp)) 263 if (qtree_entry_unused(info, dp))
262 d->dqb_itime = cpu_to_le64(1); 264 d->dqb_itime = cpu_to_le64(1);
263} 265}
@@ -266,26 +268,28 @@ static int v2r1_is_id(void *dp, struct dquot *dquot)
266{ 268{
267 struct v2r1_disk_dqblk *d = dp; 269 struct v2r1_disk_dqblk *d = dp;
268 struct qtree_mem_dqinfo *info = 270 struct qtree_mem_dqinfo *info =
269 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 271 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv;
270 272
271 if (qtree_entry_unused(info, dp)) 273 if (qtree_entry_unused(info, dp))
272 return 0; 274 return 0;
273 return le32_to_cpu(d->dqb_id) == dquot->dq_id; 275 return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type,
276 le32_to_cpu(d->dqb_id)),
277 dquot->dq_id);
274} 278}
275 279
276static int v2_read_dquot(struct dquot *dquot) 280static int v2_read_dquot(struct dquot *dquot)
277{ 281{
278 return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); 282 return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
279} 283}
280 284
281static int v2_write_dquot(struct dquot *dquot) 285static int v2_write_dquot(struct dquot *dquot)
282{ 286{
283 return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); 287 return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
284} 288}
285 289
286static int v2_release_dquot(struct dquot *dquot) 290static int v2_release_dquot(struct dquot *dquot)
287{ 291{
288 return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); 292 return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, dquot);
289} 293}
290 294
291static int v2_free_file_info(struct super_block *sb, int type) 295static int v2_free_file_info(struct super_block *sb, int type)
diff --git a/fs/read_write.c b/fs/read_write.c
index 1adfb691e4f1..d06534857e9e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -232,23 +232,18 @@ EXPORT_SYMBOL(vfs_llseek);
232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 232SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
233{ 233{
234 off_t retval; 234 off_t retval;
235 struct file * file; 235 struct fd f = fdget(fd);
236 int fput_needed; 236 if (!f.file)
237 237 return -EBADF;
238 retval = -EBADF;
239 file = fget_light(fd, &fput_needed);
240 if (!file)
241 goto bad;
242 238
243 retval = -EINVAL; 239 retval = -EINVAL;
244 if (origin <= SEEK_MAX) { 240 if (origin <= SEEK_MAX) {
245 loff_t res = vfs_llseek(file, offset, origin); 241 loff_t res = vfs_llseek(f.file, offset, origin);
246 retval = res; 242 retval = res;
247 if (res != (loff_t)retval) 243 if (res != (loff_t)retval)
248 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 244 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
249 } 245 }
250 fput_light(file, fput_needed); 246 fdput(f);
251bad:
252 return retval; 247 return retval;
253} 248}
254 249
@@ -258,20 +253,17 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
258 unsigned int, origin) 253 unsigned int, origin)
259{ 254{
260 int retval; 255 int retval;
261 struct file * file; 256 struct fd f = fdget(fd);
262 loff_t offset; 257 loff_t offset;
263 int fput_needed;
264 258
265 retval = -EBADF; 259 if (!f.file)
266 file = fget_light(fd, &fput_needed); 260 return -EBADF;
267 if (!file)
268 goto bad;
269 261
270 retval = -EINVAL; 262 retval = -EINVAL;
271 if (origin > SEEK_MAX) 263 if (origin > SEEK_MAX)
272 goto out_putf; 264 goto out_putf;
273 265
274 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 266 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
275 origin); 267 origin);
276 268
277 retval = (int)offset; 269 retval = (int)offset;
@@ -281,8 +273,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
281 retval = 0; 273 retval = 0;
282 } 274 }
283out_putf: 275out_putf:
284 fput_light(file, fput_needed); 276 fdput(f);
285bad:
286 return retval; 277 return retval;
287} 278}
288#endif 279#endif
@@ -461,34 +452,29 @@ static inline void file_pos_write(struct file *file, loff_t pos)
461 452
462SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 453SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
463{ 454{
464 struct file *file; 455 struct fd f = fdget(fd);
465 ssize_t ret = -EBADF; 456 ssize_t ret = -EBADF;
466 int fput_needed;
467 457
468 file = fget_light(fd, &fput_needed); 458 if (f.file) {
469 if (file) { 459 loff_t pos = file_pos_read(f.file);
470 loff_t pos = file_pos_read(file); 460 ret = vfs_read(f.file, buf, count, &pos);
471 ret = vfs_read(file, buf, count, &pos); 461 file_pos_write(f.file, pos);
472 file_pos_write(file, pos); 462 fdput(f);
473 fput_light(file, fput_needed);
474 } 463 }
475
476 return ret; 464 return ret;
477} 465}
478 466
479SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 467SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
480 size_t, count) 468 size_t, count)
481{ 469{
482 struct file *file; 470 struct fd f = fdget(fd);
483 ssize_t ret = -EBADF; 471 ssize_t ret = -EBADF;
484 int fput_needed;
485 472
486 file = fget_light(fd, &fput_needed); 473 if (f.file) {
487 if (file) { 474 loff_t pos = file_pos_read(f.file);
488 loff_t pos = file_pos_read(file); 475 ret = vfs_write(f.file, buf, count, &pos);
489 ret = vfs_write(file, buf, count, &pos); 476 file_pos_write(f.file, pos);
490 file_pos_write(file, pos); 477 fdput(f);
491 fput_light(file, fput_needed);
492 } 478 }
493 479
494 return ret; 480 return ret;
@@ -497,19 +483,18 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
497SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 483SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
498 size_t count, loff_t pos) 484 size_t count, loff_t pos)
499{ 485{
500 struct file *file; 486 struct fd f;
501 ssize_t ret = -EBADF; 487 ssize_t ret = -EBADF;
502 int fput_needed;
503 488
504 if (pos < 0) 489 if (pos < 0)
505 return -EINVAL; 490 return -EINVAL;
506 491
507 file = fget_light(fd, &fput_needed); 492 f = fdget(fd);
508 if (file) { 493 if (f.file) {
509 ret = -ESPIPE; 494 ret = -ESPIPE;
510 if (file->f_mode & FMODE_PREAD) 495 if (f.file->f_mode & FMODE_PREAD)
511 ret = vfs_read(file, buf, count, &pos); 496 ret = vfs_read(f.file, buf, count, &pos);
512 fput_light(file, fput_needed); 497 fdput(f);
513 } 498 }
514 499
515 return ret; 500 return ret;
@@ -526,19 +511,18 @@ SYSCALL_ALIAS(sys_pread64, SyS_pread64);
526SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 511SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
527 size_t count, loff_t pos) 512 size_t count, loff_t pos)
528{ 513{
529 struct file *file; 514 struct fd f;
530 ssize_t ret = -EBADF; 515 ssize_t ret = -EBADF;
531 int fput_needed;
532 516
533 if (pos < 0) 517 if (pos < 0)
534 return -EINVAL; 518 return -EINVAL;
535 519
536 file = fget_light(fd, &fput_needed); 520 f = fdget(fd);
537 if (file) { 521 if (f.file) {
538 ret = -ESPIPE; 522 ret = -ESPIPE;
539 if (file->f_mode & FMODE_PWRITE) 523 if (f.file->f_mode & FMODE_PWRITE)
540 ret = vfs_write(file, buf, count, &pos); 524 ret = vfs_write(f.file, buf, count, &pos);
541 fput_light(file, fput_needed); 525 fdput(f);
542 } 526 }
543 527
544 return ret; 528 return ret;
@@ -789,16 +773,14 @@ EXPORT_SYMBOL(vfs_writev);
789SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 773SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
790 unsigned long, vlen) 774 unsigned long, vlen)
791{ 775{
792 struct file *file; 776 struct fd f = fdget(fd);
793 ssize_t ret = -EBADF; 777 ssize_t ret = -EBADF;
794 int fput_needed;
795 778
796 file = fget_light(fd, &fput_needed); 779 if (f.file) {
797 if (file) { 780 loff_t pos = file_pos_read(f.file);
798 loff_t pos = file_pos_read(file); 781 ret = vfs_readv(f.file, vec, vlen, &pos);
799 ret = vfs_readv(file, vec, vlen, &pos); 782 file_pos_write(f.file, pos);
800 file_pos_write(file, pos); 783 fdput(f);
801 fput_light(file, fput_needed);
802 } 784 }
803 785
804 if (ret > 0) 786 if (ret > 0)
@@ -810,16 +792,14 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
810SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 792SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
811 unsigned long, vlen) 793 unsigned long, vlen)
812{ 794{
813 struct file *file; 795 struct fd f = fdget(fd);
814 ssize_t ret = -EBADF; 796 ssize_t ret = -EBADF;
815 int fput_needed;
816 797
817 file = fget_light(fd, &fput_needed); 798 if (f.file) {
818 if (file) { 799 loff_t pos = file_pos_read(f.file);
819 loff_t pos = file_pos_read(file); 800 ret = vfs_writev(f.file, vec, vlen, &pos);
820 ret = vfs_writev(file, vec, vlen, &pos); 801 file_pos_write(f.file, pos);
821 file_pos_write(file, pos); 802 fdput(f);
822 fput_light(file, fput_needed);
823 } 803 }
824 804
825 if (ret > 0) 805 if (ret > 0)
@@ -838,19 +818,18 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
838 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 818 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
839{ 819{
840 loff_t pos = pos_from_hilo(pos_h, pos_l); 820 loff_t pos = pos_from_hilo(pos_h, pos_l);
841 struct file *file; 821 struct fd f;
842 ssize_t ret = -EBADF; 822 ssize_t ret = -EBADF;
843 int fput_needed;
844 823
845 if (pos < 0) 824 if (pos < 0)
846 return -EINVAL; 825 return -EINVAL;
847 826
848 file = fget_light(fd, &fput_needed); 827 f = fdget(fd);
849 if (file) { 828 if (f.file) {
850 ret = -ESPIPE; 829 ret = -ESPIPE;
851 if (file->f_mode & FMODE_PREAD) 830 if (f.file->f_mode & FMODE_PREAD)
852 ret = vfs_readv(file, vec, vlen, &pos); 831 ret = vfs_readv(f.file, vec, vlen, &pos);
853 fput_light(file, fput_needed); 832 fdput(f);
854 } 833 }
855 834
856 if (ret > 0) 835 if (ret > 0)
@@ -863,19 +842,18 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
863 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 842 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
864{ 843{
865 loff_t pos = pos_from_hilo(pos_h, pos_l); 844 loff_t pos = pos_from_hilo(pos_h, pos_l);
866 struct file *file; 845 struct fd f;
867 ssize_t ret = -EBADF; 846 ssize_t ret = -EBADF;
868 int fput_needed;
869 847
870 if (pos < 0) 848 if (pos < 0)
871 return -EINVAL; 849 return -EINVAL;
872 850
873 file = fget_light(fd, &fput_needed); 851 f = fdget(fd);
874 if (file) { 852 if (f.file) {
875 ret = -ESPIPE; 853 ret = -ESPIPE;
876 if (file->f_mode & FMODE_PWRITE) 854 if (f.file->f_mode & FMODE_PWRITE)
877 ret = vfs_writev(file, vec, vlen, &pos); 855 ret = vfs_writev(f.file, vec, vlen, &pos);
878 fput_light(file, fput_needed); 856 fdput(f);
879 } 857 }
880 858
881 if (ret > 0) 859 if (ret > 0)
@@ -884,31 +862,31 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
884 return ret; 862 return ret;
885} 863}
886 864
887static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 865ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
888 size_t count, loff_t max) 866 loff_t max)
889{ 867{
890 struct file * in_file, * out_file; 868 struct fd in, out;
891 struct inode * in_inode, * out_inode; 869 struct inode *in_inode, *out_inode;
892 loff_t pos; 870 loff_t pos;
893 ssize_t retval; 871 ssize_t retval;
894 int fput_needed_in, fput_needed_out, fl; 872 int fl;
895 873
896 /* 874 /*
897 * Get input file, and verify that it is ok.. 875 * Get input file, and verify that it is ok..
898 */ 876 */
899 retval = -EBADF; 877 retval = -EBADF;
900 in_file = fget_light(in_fd, &fput_needed_in); 878 in = fdget(in_fd);
901 if (!in_file) 879 if (!in.file)
902 goto out; 880 goto out;
903 if (!(in_file->f_mode & FMODE_READ)) 881 if (!(in.file->f_mode & FMODE_READ))
904 goto fput_in; 882 goto fput_in;
905 retval = -ESPIPE; 883 retval = -ESPIPE;
906 if (!ppos) 884 if (!ppos)
907 ppos = &in_file->f_pos; 885 ppos = &in.file->f_pos;
908 else 886 else
909 if (!(in_file->f_mode & FMODE_PREAD)) 887 if (!(in.file->f_mode & FMODE_PREAD))
910 goto fput_in; 888 goto fput_in;
911 retval = rw_verify_area(READ, in_file, ppos, count); 889 retval = rw_verify_area(READ, in.file, ppos, count);
912 if (retval < 0) 890 if (retval < 0)
913 goto fput_in; 891 goto fput_in;
914 count = retval; 892 count = retval;
@@ -917,15 +895,15 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
917 * Get output file, and verify that it is ok.. 895 * Get output file, and verify that it is ok..
918 */ 896 */
919 retval = -EBADF; 897 retval = -EBADF;
920 out_file = fget_light(out_fd, &fput_needed_out); 898 out = fdget(out_fd);
921 if (!out_file) 899 if (!out.file)
922 goto fput_in; 900 goto fput_in;
923 if (!(out_file->f_mode & FMODE_WRITE)) 901 if (!(out.file->f_mode & FMODE_WRITE))
924 goto fput_out; 902 goto fput_out;
925 retval = -EINVAL; 903 retval = -EINVAL;
926 in_inode = in_file->f_path.dentry->d_inode; 904 in_inode = in.file->f_path.dentry->d_inode;
927 out_inode = out_file->f_path.dentry->d_inode; 905 out_inode = out.file->f_path.dentry->d_inode;
928 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 906 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count);
929 if (retval < 0) 907 if (retval < 0)
930 goto fput_out; 908 goto fput_out;
931 count = retval; 909 count = retval;
@@ -949,10 +927,10 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
949 * and the application is arguably buggy if it doesn't expect 927 * and the application is arguably buggy if it doesn't expect
950 * EAGAIN on a non-blocking file descriptor. 928 * EAGAIN on a non-blocking file descriptor.
951 */ 929 */
952 if (in_file->f_flags & O_NONBLOCK) 930 if (in.file->f_flags & O_NONBLOCK)
953 fl = SPLICE_F_NONBLOCK; 931 fl = SPLICE_F_NONBLOCK;
954#endif 932#endif
955 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 933 retval = do_splice_direct(in.file, ppos, out.file, count, fl);
956 934
957 if (retval > 0) { 935 if (retval > 0) {
958 add_rchar(current, retval); 936 add_rchar(current, retval);
@@ -965,9 +943,9 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
965 retval = -EOVERFLOW; 943 retval = -EOVERFLOW;
966 944
967fput_out: 945fput_out:
968 fput_light(out_file, fput_needed_out); 946 fdput(out);
969fput_in: 947fput_in:
970 fput_light(in_file, fput_needed_in); 948 fdput(in);
971out: 949out:
972 return retval; 950 return retval;
973} 951}
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0c..d3e00ef67420 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -12,3 +12,5 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn); 12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn); 14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
15ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count,
16 loff_t max);
diff --git a/fs/readdir.c b/fs/readdir.c
index 39e3370d79cf..5e69ef533b77 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -106,22 +106,20 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
106 struct old_linux_dirent __user *, dirent, unsigned int, count) 106 struct old_linux_dirent __user *, dirent, unsigned int, count)
107{ 107{
108 int error; 108 int error;
109 struct file * file; 109 struct fd f = fdget(fd);
110 struct readdir_callback buf; 110 struct readdir_callback buf;
111 int fput_needed;
112 111
113 file = fget_light(fd, &fput_needed); 112 if (!f.file)
114 if (!file)
115 return -EBADF; 113 return -EBADF;
116 114
117 buf.result = 0; 115 buf.result = 0;
118 buf.dirent = dirent; 116 buf.dirent = dirent;
119 117
120 error = vfs_readdir(file, fillonedir, &buf); 118 error = vfs_readdir(f.file, fillonedir, &buf);
121 if (buf.result) 119 if (buf.result)
122 error = buf.result; 120 error = buf.result;
123 121
124 fput_light(file, fput_needed); 122 fdput(f);
125 return error; 123 return error;
126} 124}
127 125
@@ -191,17 +189,16 @@ efault:
191SYSCALL_DEFINE3(getdents, unsigned int, fd, 189SYSCALL_DEFINE3(getdents, unsigned int, fd,
192 struct linux_dirent __user *, dirent, unsigned int, count) 190 struct linux_dirent __user *, dirent, unsigned int, count)
193{ 191{
194 struct file * file; 192 struct fd f;
195 struct linux_dirent __user * lastdirent; 193 struct linux_dirent __user * lastdirent;
196 struct getdents_callback buf; 194 struct getdents_callback buf;
197 int fput_needed;
198 int error; 195 int error;
199 196
200 if (!access_ok(VERIFY_WRITE, dirent, count)) 197 if (!access_ok(VERIFY_WRITE, dirent, count))
201 return -EFAULT; 198 return -EFAULT;
202 199
203 file = fget_light(fd, &fput_needed); 200 f = fdget(fd);
204 if (!file) 201 if (!f.file)
205 return -EBADF; 202 return -EBADF;
206 203
207 buf.current_dir = dirent; 204 buf.current_dir = dirent;
@@ -209,17 +206,17 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
209 buf.count = count; 206 buf.count = count;
210 buf.error = 0; 207 buf.error = 0;
211 208
212 error = vfs_readdir(file, filldir, &buf); 209 error = vfs_readdir(f.file, filldir, &buf);
213 if (error >= 0) 210 if (error >= 0)
214 error = buf.error; 211 error = buf.error;
215 lastdirent = buf.previous; 212 lastdirent = buf.previous;
216 if (lastdirent) { 213 if (lastdirent) {
217 if (put_user(file->f_pos, &lastdirent->d_off)) 214 if (put_user(f.file->f_pos, &lastdirent->d_off))
218 error = -EFAULT; 215 error = -EFAULT;
219 else 216 else
220 error = count - buf.count; 217 error = count - buf.count;
221 } 218 }
222 fput_light(file, fput_needed); 219 fdput(f);
223 return error; 220 return error;
224} 221}
225 222
@@ -272,17 +269,16 @@ efault:
272SYSCALL_DEFINE3(getdents64, unsigned int, fd, 269SYSCALL_DEFINE3(getdents64, unsigned int, fd,
273 struct linux_dirent64 __user *, dirent, unsigned int, count) 270 struct linux_dirent64 __user *, dirent, unsigned int, count)
274{ 271{
275 struct file * file; 272 struct fd f;
276 struct linux_dirent64 __user * lastdirent; 273 struct linux_dirent64 __user * lastdirent;
277 struct getdents_callback64 buf; 274 struct getdents_callback64 buf;
278 int fput_needed;
279 int error; 275 int error;
280 276
281 if (!access_ok(VERIFY_WRITE, dirent, count)) 277 if (!access_ok(VERIFY_WRITE, dirent, count))
282 return -EFAULT; 278 return -EFAULT;
283 279
284 file = fget_light(fd, &fput_needed); 280 f = fdget(fd);
285 if (!file) 281 if (!f.file)
286 return -EBADF; 282 return -EBADF;
287 283
288 buf.current_dir = dirent; 284 buf.current_dir = dirent;
@@ -290,17 +286,17 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
290 buf.count = count; 286 buf.count = count;
291 buf.error = 0; 287 buf.error = 0;
292 288
293 error = vfs_readdir(file, filldir64, &buf); 289 error = vfs_readdir(f.file, filldir64, &buf);
294 if (error >= 0) 290 if (error >= 0)
295 error = buf.error; 291 error = buf.error;
296 lastdirent = buf.previous; 292 lastdirent = buf.previous;
297 if (lastdirent) { 293 if (lastdirent) {
298 typeof(lastdirent->d_off) d_off = file->f_pos; 294 typeof(lastdirent->d_off) d_off = f.file->f_pos;
299 if (__put_user(d_off, &lastdirent->d_off)) 295 if (__put_user(d_off, &lastdirent->d_off))
300 error = -EFAULT; 296 error = -EFAULT;
301 else 297 else
302 error = count - buf.count; 298 error = count - buf.count;
303 } 299 }
304 fput_light(file, fput_needed); 300 fdput(f);
305 return error; 301 return error;
306} 302}
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 4c0c7d163d15..a98b7740a0fc 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1334 else if (bitmap == 0) 1334 else if (bitmap == 0)
1335 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; 1335 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1336 1336
1337 reiserfs_write_unlock(sb);
1338 bh = sb_bread(sb, block); 1337 bh = sb_bread(sb, block);
1339 reiserfs_write_lock(sb);
1340 if (bh == NULL) 1338 if (bh == NULL)
1341 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " 1339 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1342 "reading failed", __func__, block); 1340 "reading failed", __func__, block);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6d4268fb6c1..46485557cdc6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode)
76 ; 76 ;
77 } 77 }
78 out: 78 out:
79 reiserfs_write_unlock_once(inode->i_sb, depth);
79 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 80 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
80 dquot_drop(inode); 81 dquot_drop(inode);
81 inode->i_blocks = 0; 82 inode->i_blocks = 0;
82 reiserfs_write_unlock_once(inode->i_sb, depth);
83 return; 83 return;
84 84
85no_delete: 85no_delete:
@@ -1155,8 +1155,8 @@ static void init_inode(struct inode *inode, struct treepath *path)
1155 set_inode_sd_version(inode, STAT_DATA_V1); 1155 set_inode_sd_version(inode, STAT_DATA_V1);
1156 inode->i_mode = sd_v1_mode(sd); 1156 inode->i_mode = sd_v1_mode(sd);
1157 set_nlink(inode, sd_v1_nlink(sd)); 1157 set_nlink(inode, sd_v1_nlink(sd));
1158 inode->i_uid = sd_v1_uid(sd); 1158 i_uid_write(inode, sd_v1_uid(sd));
1159 inode->i_gid = sd_v1_gid(sd); 1159 i_gid_write(inode, sd_v1_gid(sd));
1160 inode->i_size = sd_v1_size(sd); 1160 inode->i_size = sd_v1_size(sd);
1161 inode->i_atime.tv_sec = sd_v1_atime(sd); 1161 inode->i_atime.tv_sec = sd_v1_atime(sd);
1162 inode->i_mtime.tv_sec = sd_v1_mtime(sd); 1162 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
@@ -1200,9 +1200,9 @@ static void init_inode(struct inode *inode, struct treepath *path)
1200 1200
1201 inode->i_mode = sd_v2_mode(sd); 1201 inode->i_mode = sd_v2_mode(sd);
1202 set_nlink(inode, sd_v2_nlink(sd)); 1202 set_nlink(inode, sd_v2_nlink(sd));
1203 inode->i_uid = sd_v2_uid(sd); 1203 i_uid_write(inode, sd_v2_uid(sd));
1204 inode->i_size = sd_v2_size(sd); 1204 inode->i_size = sd_v2_size(sd);
1205 inode->i_gid = sd_v2_gid(sd); 1205 i_gid_write(inode, sd_v2_gid(sd));
1206 inode->i_mtime.tv_sec = sd_v2_mtime(sd); 1206 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1207 inode->i_atime.tv_sec = sd_v2_atime(sd); 1207 inode->i_atime.tv_sec = sd_v2_atime(sd);
1208 inode->i_ctime.tv_sec = sd_v2_ctime(sd); 1208 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
@@ -1258,9 +1258,9 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
1258 1258
1259 set_sd_v2_mode(sd_v2, inode->i_mode); 1259 set_sd_v2_mode(sd_v2, inode->i_mode);
1260 set_sd_v2_nlink(sd_v2, inode->i_nlink); 1260 set_sd_v2_nlink(sd_v2, inode->i_nlink);
1261 set_sd_v2_uid(sd_v2, inode->i_uid); 1261 set_sd_v2_uid(sd_v2, i_uid_read(inode));
1262 set_sd_v2_size(sd_v2, size); 1262 set_sd_v2_size(sd_v2, size);
1263 set_sd_v2_gid(sd_v2, inode->i_gid); 1263 set_sd_v2_gid(sd_v2, i_gid_read(inode));
1264 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec); 1264 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1265 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec); 1265 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1266 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec); 1266 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
@@ -1280,8 +1280,8 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1280 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; 1280 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1281 1281
1282 set_sd_v1_mode(sd_v1, inode->i_mode); 1282 set_sd_v1_mode(sd_v1, inode->i_mode);
1283 set_sd_v1_uid(sd_v1, inode->i_uid); 1283 set_sd_v1_uid(sd_v1, i_uid_read(inode));
1284 set_sd_v1_gid(sd_v1, inode->i_gid); 1284 set_sd_v1_gid(sd_v1, i_gid_read(inode));
1285 set_sd_v1_nlink(sd_v1, inode->i_nlink); 1285 set_sd_v1_nlink(sd_v1, inode->i_nlink);
1286 set_sd_v1_size(sd_v1, size); 1286 set_sd_v1_size(sd_v1, size);
1287 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec); 1287 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
@@ -1869,7 +1869,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1869 goto out_bad_inode; 1869 goto out_bad_inode;
1870 } 1870 }
1871 if (old_format_only(sb)) { 1871 if (old_format_only(sb)) {
1872 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { 1872 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
1873 pathrelse(&path_to_key); 1873 pathrelse(&path_to_key);
1874 /* i_uid or i_gid is too big to be stored in stat data v3.5 */ 1874 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1875 err = -EINVAL; 1875 err = -EINVAL;
@@ -3140,16 +3140,16 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3140 } 3140 }
3141 } 3141 }
3142 3142
3143 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || 3143 if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) ||
3144 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && 3144 ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) &&
3145 (get_inode_sd_version(inode) == STAT_DATA_V1)) { 3145 (get_inode_sd_version(inode) == STAT_DATA_V1)) {
3146 /* stat data of format v3.5 has 16 bit uid and gid */ 3146 /* stat data of format v3.5 has 16 bit uid and gid */
3147 error = -EINVAL; 3147 error = -EINVAL;
3148 goto out; 3148 goto out;
3149 } 3149 }
3150 3150
3151 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3151 if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
3152 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3152 (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
3153 struct reiserfs_transaction_handle th; 3153 struct reiserfs_transaction_handle th;
3154 int jbegin_count = 3154 int jbegin_count =
3155 2 * 3155 2 *
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7a37dabf5a96..1078ae179993 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -608,6 +608,11 @@ static int init_inodecache(void)
608 608
609static void destroy_inodecache(void) 609static void destroy_inodecache(void)
610{ 610{
611 /*
612 * Make sure all delayed rcu free inodes are flushed before we
613 * destroy cache.
614 */
615 rcu_barrier();
611 kmem_cache_destroy(reiserfs_inode_cachep); 616 kmem_cache_destroy(reiserfs_inode_cachep);
612} 617}
613 618
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d319963aeb11..c196369fe408 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -896,7 +896,7 @@ static int create_privroot(struct dentry *dentry) { return 0; }
896#endif 896#endif
897 897
898/* Actual operations that are exported to VFS-land */ 898/* Actual operations that are exported to VFS-land */
899const struct xattr_handler *reiserfs_xattr_handlers[] = { 899static const struct xattr_handler *reiserfs_xattr_handlers[] = {
900#ifdef CONFIG_REISERFS_FS_XATTR 900#ifdef CONFIG_REISERFS_FS_XATTR
901 &reiserfs_xattr_user_handler, 901 &reiserfs_xattr_user_handler,
902 &reiserfs_xattr_trusted_handler, 902 &reiserfs_xattr_trusted_handler,
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 44474f9b990d..d7c01ef64eda 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -30,7 +30,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value,
30 return -EPERM; 30 return -EPERM;
31 31
32 if (value) { 32 if (value) {
33 acl = posix_acl_from_xattr(value, size); 33 acl = posix_acl_from_xattr(&init_user_ns, value, size);
34 if (IS_ERR(acl)) { 34 if (IS_ERR(acl)) {
35 return PTR_ERR(acl); 35 return PTR_ERR(acl);
36 } else if (acl) { 36 } else if (acl) {
@@ -77,7 +77,7 @@ posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
77 return PTR_ERR(acl); 77 return PTR_ERR(acl);
78 if (acl == NULL) 78 if (acl == NULL)
79 return -ENODATA; 79 return -ENODATA;
80 error = posix_acl_to_xattr(acl, buffer, size); 80 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
81 posix_acl_release(acl); 81 posix_acl_release(acl);
82 82
83 return error; 83 return error;
@@ -121,15 +121,23 @@ static struct posix_acl *posix_acl_from_disk(const void *value, size_t size)
121 case ACL_OTHER: 121 case ACL_OTHER:
122 value = (char *)value + 122 value = (char *)value +
123 sizeof(reiserfs_acl_entry_short); 123 sizeof(reiserfs_acl_entry_short);
124 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
125 break; 124 break;
126 125
127 case ACL_USER: 126 case ACL_USER:
127 value = (char *)value + sizeof(reiserfs_acl_entry);
128 if ((char *)value > end)
129 goto fail;
130 acl->a_entries[n].e_uid =
131 make_kuid(&init_user_ns,
132 le32_to_cpu(entry->e_id));
133 break;
128 case ACL_GROUP: 134 case ACL_GROUP:
129 value = (char *)value + sizeof(reiserfs_acl_entry); 135 value = (char *)value + sizeof(reiserfs_acl_entry);
130 if ((char *)value > end) 136 if ((char *)value > end)
131 goto fail; 137 goto fail;
132 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); 138 acl->a_entries[n].e_gid =
139 make_kgid(&init_user_ns,
140 le32_to_cpu(entry->e_id));
133 break; 141 break;
134 142
135 default: 143 default:
@@ -164,13 +172,19 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
164 ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); 172 ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION);
165 e = (char *)ext_acl + sizeof(reiserfs_acl_header); 173 e = (char *)ext_acl + sizeof(reiserfs_acl_header);
166 for (n = 0; n < acl->a_count; n++) { 174 for (n = 0; n < acl->a_count; n++) {
175 const struct posix_acl_entry *acl_e = &acl->a_entries[n];
167 reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e; 176 reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e;
168 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 177 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
169 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 178 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
170 switch (acl->a_entries[n].e_tag) { 179 switch (acl->a_entries[n].e_tag) {
171 case ACL_USER: 180 case ACL_USER:
181 entry->e_id = cpu_to_le32(
182 from_kuid(&init_user_ns, acl_e->e_uid));
183 e += sizeof(reiserfs_acl_entry);
184 break;
172 case ACL_GROUP: 185 case ACL_GROUP:
173 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); 186 entry->e_id = cpu_to_le32(
187 from_kgid(&init_user_ns, acl_e->e_gid));
174 e += sizeof(reiserfs_acl_entry); 188 e += sizeof(reiserfs_acl_entry);
175 break; 189 break;
176 190
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 77c5f2173983..fd7c5f60b46b 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -648,6 +648,11 @@ error_register:
648static void __exit exit_romfs_fs(void) 648static void __exit exit_romfs_fs(void)
649{ 649{
650 unregister_filesystem(&romfs_fs_type); 650 unregister_filesystem(&romfs_fs_type);
651 /*
652 * Make sure all delayed rcu free inodes are flushed before we
653 * destroy cache.
654 */
655 rcu_barrier();
651 kmem_cache_destroy(romfs_inode_cachep); 656 kmem_cache_destroy(romfs_inode_cachep);
652} 657}
653 658
diff --git a/fs/select.c b/fs/select.c
index db14c781335e..2ef72d965036 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -220,8 +220,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
220 struct poll_table_entry *entry = poll_get_entry(pwq); 220 struct poll_table_entry *entry = poll_get_entry(pwq);
221 if (!entry) 221 if (!entry)
222 return; 222 return;
223 get_file(filp); 223 entry->filp = get_file(filp);
224 entry->filp = filp;
225 entry->wait_address = wait_address; 224 entry->wait_address = wait_address;
226 entry->key = p->_key; 225 entry->key = p->_key;
227 init_waitqueue_func_entry(&entry->wait, pollwake); 226 init_waitqueue_func_entry(&entry->wait, pollwake);
@@ -429,8 +428,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
429 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 428 for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
430 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 429 unsigned long in, out, ex, all_bits, bit = 1, mask, j;
431 unsigned long res_in = 0, res_out = 0, res_ex = 0; 430 unsigned long res_in = 0, res_out = 0, res_ex = 0;
432 const struct file_operations *f_op = NULL;
433 struct file *file = NULL;
434 431
435 in = *inp++; out = *outp++; ex = *exp++; 432 in = *inp++; out = *outp++; ex = *exp++;
436 all_bits = in | out | ex; 433 all_bits = in | out | ex;
@@ -440,20 +437,21 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
440 } 437 }
441 438
442 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { 439 for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) {
443 int fput_needed; 440 struct fd f;
444 if (i >= n) 441 if (i >= n)
445 break; 442 break;
446 if (!(bit & all_bits)) 443 if (!(bit & all_bits))
447 continue; 444 continue;
448 file = fget_light(i, &fput_needed); 445 f = fdget(i);
449 if (file) { 446 if (f.file) {
450 f_op = file->f_op; 447 const struct file_operations *f_op;
448 f_op = f.file->f_op;
451 mask = DEFAULT_POLLMASK; 449 mask = DEFAULT_POLLMASK;
452 if (f_op && f_op->poll) { 450 if (f_op && f_op->poll) {
453 wait_key_set(wait, in, out, bit); 451 wait_key_set(wait, in, out, bit);
454 mask = (*f_op->poll)(file, wait); 452 mask = (*f_op->poll)(f.file, wait);
455 } 453 }
456 fput_light(file, fput_needed); 454 fdput(f);
457 if ((mask & POLLIN_SET) && (in & bit)) { 455 if ((mask & POLLIN_SET) && (in & bit)) {
458 res_in |= bit; 456 res_in |= bit;
459 retval++; 457 retval++;
@@ -726,20 +724,17 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
726 mask = 0; 724 mask = 0;
727 fd = pollfd->fd; 725 fd = pollfd->fd;
728 if (fd >= 0) { 726 if (fd >= 0) {
729 int fput_needed; 727 struct fd f = fdget(fd);
730 struct file * file;
731
732 file = fget_light(fd, &fput_needed);
733 mask = POLLNVAL; 728 mask = POLLNVAL;
734 if (file != NULL) { 729 if (f.file) {
735 mask = DEFAULT_POLLMASK; 730 mask = DEFAULT_POLLMASK;
736 if (file->f_op && file->f_op->poll) { 731 if (f.file->f_op && f.file->f_op->poll) {
737 pwait->_key = pollfd->events|POLLERR|POLLHUP; 732 pwait->_key = pollfd->events|POLLERR|POLLHUP;
738 mask = file->f_op->poll(file, pwait); 733 mask = f.file->f_op->poll(f.file, pwait);
739 } 734 }
740 /* Mask out unneeded events. */ 735 /* Mask out unneeded events. */
741 mask &= pollfd->events | POLLERR | POLLHUP; 736 mask &= pollfd->events | POLLERR | POLLHUP;
742 fput_light(file, fput_needed); 737 fdput(f);
743 } 738 }
744 } 739 }
745 pollfd->revents = mask; 740 pollfd->revents = mask;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 14cf9de1dbe1..99dffab4c4e4 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -9,6 +9,7 @@
9#include <linux/export.h> 9#include <linux/export.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/cred.h>
12 13
13#include <asm/uaccess.h> 14#include <asm/uaccess.h>
14#include <asm/page.h> 15#include <asm/page.h>
@@ -56,6 +57,9 @@ int seq_open(struct file *file, const struct seq_operations *op)
56 memset(p, 0, sizeof(*p)); 57 memset(p, 0, sizeof(*p));
57 mutex_init(&p->lock); 58 mutex_init(&p->lock);
58 p->op = op; 59 p->op = op;
60#ifdef CONFIG_USER_NS
61 p->user_ns = file->f_cred->user_ns;
62#endif
59 63
60 /* 64 /*
61 * Wrappers around seq_open(e.g. swaps_open) need to be 65 * Wrappers around seq_open(e.g. swaps_open) need to be
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 9f35a37173de..8bee4e570911 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -269,13 +269,12 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
269 if (ufd < 0) 269 if (ufd < 0)
270 kfree(ctx); 270 kfree(ctx);
271 } else { 271 } else {
272 int fput_needed; 272 struct fd f = fdget(ufd);
273 struct file *file = fget_light(ufd, &fput_needed); 273 if (!f.file)
274 if (!file)
275 return -EBADF; 274 return -EBADF;
276 ctx = file->private_data; 275 ctx = f.file->private_data;
277 if (file->f_op != &signalfd_fops) { 276 if (f.file->f_op != &signalfd_fops) {
278 fput_light(file, fput_needed); 277 fdput(f);
279 return -EINVAL; 278 return -EINVAL;
280 } 279 }
281 spin_lock_irq(&current->sighand->siglock); 280 spin_lock_irq(&current->sighand->siglock);
@@ -283,7 +282,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
283 spin_unlock_irq(&current->sighand->siglock); 282 spin_unlock_irq(&current->sighand->siglock);
284 283
285 wake_up(&current->sighand->signalfd_wqh); 284 wake_up(&current->sighand->signalfd_wqh);
286 fput_light(file, fput_needed); 285 fdput(f);
287 } 286 }
288 287
289 return ufd; 288 return ufd;
diff --git a/fs/splice.c b/fs/splice.c
index 41514dd89462..13e5b4776e7a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1666,9 +1666,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1666SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, 1666SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1667 unsigned long, nr_segs, unsigned int, flags) 1667 unsigned long, nr_segs, unsigned int, flags)
1668{ 1668{
1669 struct file *file; 1669 struct fd f;
1670 long error; 1670 long error;
1671 int fput;
1672 1671
1673 if (unlikely(nr_segs > UIO_MAXIOV)) 1672 if (unlikely(nr_segs > UIO_MAXIOV))
1674 return -EINVAL; 1673 return -EINVAL;
@@ -1676,14 +1675,14 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,
1676 return 0; 1675 return 0;
1677 1676
1678 error = -EBADF; 1677 error = -EBADF;
1679 file = fget_light(fd, &fput); 1678 f = fdget(fd);
1680 if (file) { 1679 if (f.file) {
1681 if (file->f_mode & FMODE_WRITE) 1680 if (f.file->f_mode & FMODE_WRITE)
1682 error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1681 error = vmsplice_to_pipe(f.file, iov, nr_segs, flags);
1683 else if (file->f_mode & FMODE_READ) 1682 else if (f.file->f_mode & FMODE_READ)
1684 error = vmsplice_to_user(file, iov, nr_segs, flags); 1683 error = vmsplice_to_user(f.file, iov, nr_segs, flags);
1685 1684
1686 fput_light(file, fput); 1685 fdput(f);
1687 } 1686 }
1688 1687
1689 return error; 1688 return error;
@@ -1693,30 +1692,27 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1693 int, fd_out, loff_t __user *, off_out, 1692 int, fd_out, loff_t __user *, off_out,
1694 size_t, len, unsigned int, flags) 1693 size_t, len, unsigned int, flags)
1695{ 1694{
1695 struct fd in, out;
1696 long error; 1696 long error;
1697 struct file *in, *out;
1698 int fput_in, fput_out;
1699 1697
1700 if (unlikely(!len)) 1698 if (unlikely(!len))
1701 return 0; 1699 return 0;
1702 1700
1703 error = -EBADF; 1701 error = -EBADF;
1704 in = fget_light(fd_in, &fput_in); 1702 in = fdget(fd_in);
1705 if (in) { 1703 if (in.file) {
1706 if (in->f_mode & FMODE_READ) { 1704 if (in.file->f_mode & FMODE_READ) {
1707 out = fget_light(fd_out, &fput_out); 1705 out = fdget(fd_out);
1708 if (out) { 1706 if (out.file) {
1709 if (out->f_mode & FMODE_WRITE) 1707 if (out.file->f_mode & FMODE_WRITE)
1710 error = do_splice(in, off_in, 1708 error = do_splice(in.file, off_in,
1711 out, off_out, 1709 out.file, off_out,
1712 len, flags); 1710 len, flags);
1713 fput_light(out, fput_out); 1711 fdput(out);
1714 } 1712 }
1715 } 1713 }
1716 1714 fdput(in);
1717 fput_light(in, fput_in);
1718 } 1715 }
1719
1720 return error; 1716 return error;
1721} 1717}
1722 1718
@@ -2027,26 +2023,25 @@ static long do_tee(struct file *in, struct file *out, size_t len,
2027 2023
2028SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 2024SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
2029{ 2025{
2030 struct file *in; 2026 struct fd in;
2031 int error, fput_in; 2027 int error;
2032 2028
2033 if (unlikely(!len)) 2029 if (unlikely(!len))
2034 return 0; 2030 return 0;
2035 2031
2036 error = -EBADF; 2032 error = -EBADF;
2037 in = fget_light(fdin, &fput_in); 2033 in = fdget(fdin);
2038 if (in) { 2034 if (in.file) {
2039 if (in->f_mode & FMODE_READ) { 2035 if (in.file->f_mode & FMODE_READ) {
2040 int fput_out; 2036 struct fd out = fdget(fdout);
2041 struct file *out = fget_light(fdout, &fput_out); 2037 if (out.file) {
2042 2038 if (out.file->f_mode & FMODE_WRITE)
2043 if (out) { 2039 error = do_tee(in.file, out.file,
2044 if (out->f_mode & FMODE_WRITE) 2040 len, flags);
2045 error = do_tee(in, out, len, flags); 2041 fdput(out);
2046 fput_light(out, fput_out);
2047 } 2042 }
2048 } 2043 }
2049 fput_light(in, fput_in); 2044 fdput(in);
2050 } 2045 }
2051 2046
2052 return error; 2047 return error;
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 81afbccfa843..a1ce5ce60632 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -56,16 +56,20 @@
56static int squashfs_new_inode(struct super_block *sb, struct inode *inode, 56static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
57 struct squashfs_base_inode *sqsh_ino) 57 struct squashfs_base_inode *sqsh_ino)
58{ 58{
59 uid_t i_uid;
60 gid_t i_gid;
59 int err; 61 int err;
60 62
61 err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &inode->i_uid); 63 err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
62 if (err) 64 if (err)
63 return err; 65 return err;
64 66
65 err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &inode->i_gid); 67 err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &i_gid);
66 if (err) 68 if (err)
67 return err; 69 return err;
68 70
71 i_uid_write(inode, i_uid);
72 i_gid_write(inode, i_gid);
69 inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); 73 inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
70 inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime); 74 inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
71 inode->i_atime.tv_sec = inode->i_mtime.tv_sec; 75 inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 29cd014ed3a1..260e3928d4f5 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -425,6 +425,11 @@ static int __init init_inodecache(void)
425 425
426static void destroy_inodecache(void) 426static void destroy_inodecache(void)
427{ 427{
428 /*
429 * Make sure all delayed rcu free inodes are flushed before we
430 * destroy cache.
431 */
432 rcu_barrier();
428 kmem_cache_destroy(squashfs_inode_cachep); 433 kmem_cache_destroy(squashfs_inode_cachep);
429} 434}
430 435
diff --git a/fs/stat.c b/fs/stat.c
index b6ff11825fc8..eae494630a36 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_fstat(unsigned int fd, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 int fput_needed; 60 struct fd f = fdget_raw(fd);
61 struct file *f = fget_light(fd, &fput_needed);
62 int error = -EBADF; 61 int error = -EBADF;
63 62
64 if (f) { 63 if (f.file) {
65 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); 64 error = vfs_getattr(f.file->f_path.mnt, f.file->f_path.dentry,
66 fput_light(f, fput_needed); 65 stat);
66 fdput(f);
67 } 67 }
68 return error; 68 return error;
69} 69}
@@ -326,7 +326,7 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
326 326
327 327
328/* ---------- LFS-64 ----------- */ 328/* ---------- LFS-64 ----------- */
329#ifdef __ARCH_WANT_STAT64 329#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
330 330
331#ifndef INIT_STRUCT_STAT64_PADDING 331#ifndef INIT_STRUCT_STAT64_PADDING
332# define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) 332# define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st))
@@ -415,7 +415,7 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
415 return error; 415 return error;
416 return cp_new_stat64(&stat, statbuf); 416 return cp_new_stat64(&stat, statbuf);
417} 417}
418#endif /* __ARCH_WANT_STAT64 */ 418#endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
419 419
420/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ 420/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
421void __inode_add_bytes(struct inode *inode, loff_t bytes) 421void __inode_add_bytes(struct inode *inode, loff_t bytes)
diff --git a/fs/statfs.c b/fs/statfs.c
index 95ad5c0e586c..f8e832e6f0a2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -87,12 +87,11 @@ int user_statfs(const char __user *pathname, struct kstatfs *st)
87 87
88int fd_statfs(int fd, struct kstatfs *st) 88int fd_statfs(int fd, struct kstatfs *st)
89{ 89{
90 int fput_needed; 90 struct fd f = fdget(fd);
91 struct file *file = fget_light(fd, &fput_needed);
92 int error = -EBADF; 91 int error = -EBADF;
93 if (file) { 92 if (f.file) {
94 error = vfs_statfs(&file->f_path, st); 93 error = vfs_statfs(&f.file->f_path, st);
95 fput_light(file, fput_needed); 94 fdput(f);
96 } 95 }
97 return error; 96 return error;
98} 97}
diff --git a/fs/super.c b/fs/super.c
index 0902cfa6a12e..a3bc935069d9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -307,12 +307,6 @@ void deactivate_locked_super(struct super_block *s)
307 307
308 /* caches are now gone, we can safely kill the shrinker now */ 308 /* caches are now gone, we can safely kill the shrinker now */
309 unregister_shrinker(&s->s_shrink); 309 unregister_shrinker(&s->s_shrink);
310
311 /*
312 * We need to call rcu_barrier so all the delayed rcu free
313 * inodes are flushed before we release the fs module.
314 */
315 rcu_barrier();
316 put_filesystem(fs); 310 put_filesystem(fs);
317 put_super(s); 311 put_super(s);
318 } else { 312 } else {
@@ -871,7 +865,7 @@ int get_anon_bdev(dev_t *p)
871 else if (error) 865 else if (error)
872 return -EAGAIN; 866 return -EAGAIN;
873 867
874 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 868 if ((dev & MAX_IDR_MASK) == (1 << MINORBITS)) {
875 spin_lock(&unnamed_dev_lock); 869 spin_lock(&unnamed_dev_lock);
876 ida_remove(&unnamed_dev_ida, dev); 870 ida_remove(&unnamed_dev_ida, dev);
877 if (unnamed_dev_start > dev) 871 if (unnamed_dev_start > dev)
diff --git a/fs/sync.c b/fs/sync.c
index eb8722dc556f..14eefeb44636 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -148,21 +148,19 @@ void emergency_sync(void)
148 */ 148 */
149SYSCALL_DEFINE1(syncfs, int, fd) 149SYSCALL_DEFINE1(syncfs, int, fd)
150{ 150{
151 struct file *file; 151 struct fd f = fdget(fd);
152 struct super_block *sb; 152 struct super_block *sb;
153 int ret; 153 int ret;
154 int fput_needed;
155 154
156 file = fget_light(fd, &fput_needed); 155 if (!f.file)
157 if (!file)
158 return -EBADF; 156 return -EBADF;
159 sb = file->f_dentry->d_sb; 157 sb = f.file->f_dentry->d_sb;
160 158
161 down_read(&sb->s_umount); 159 down_read(&sb->s_umount);
162 ret = sync_filesystem(sb); 160 ret = sync_filesystem(sb);
163 up_read(&sb->s_umount); 161 up_read(&sb->s_umount);
164 162
165 fput_light(file, fput_needed); 163 fdput(f);
166 return ret; 164 return ret;
167} 165}
168 166
@@ -201,14 +199,12 @@ EXPORT_SYMBOL(vfs_fsync);
201 199
202static int do_fsync(unsigned int fd, int datasync) 200static int do_fsync(unsigned int fd, int datasync)
203{ 201{
204 struct file *file; 202 struct fd f = fdget(fd);
205 int ret = -EBADF; 203 int ret = -EBADF;
206 int fput_needed;
207 204
208 file = fget_light(fd, &fput_needed); 205 if (f.file) {
209 if (file) { 206 ret = vfs_fsync(f.file, datasync);
210 ret = vfs_fsync(file, datasync); 207 fdput(f);
211 fput_light(file, fput_needed);
212 } 208 }
213 return ret; 209 return ret;
214} 210}
@@ -291,10 +287,9 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
291 unsigned int flags) 287 unsigned int flags)
292{ 288{
293 int ret; 289 int ret;
294 struct file *file; 290 struct fd f;
295 struct address_space *mapping; 291 struct address_space *mapping;
296 loff_t endbyte; /* inclusive */ 292 loff_t endbyte; /* inclusive */
297 int fput_needed;
298 umode_t i_mode; 293 umode_t i_mode;
299 294
300 ret = -EINVAL; 295 ret = -EINVAL;
@@ -333,17 +328,17 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
333 endbyte--; /* inclusive */ 328 endbyte--; /* inclusive */
334 329
335 ret = -EBADF; 330 ret = -EBADF;
336 file = fget_light(fd, &fput_needed); 331 f = fdget(fd);
337 if (!file) 332 if (!f.file)
338 goto out; 333 goto out;
339 334
340 i_mode = file->f_path.dentry->d_inode->i_mode; 335 i_mode = f.file->f_path.dentry->d_inode->i_mode;
341 ret = -ESPIPE; 336 ret = -ESPIPE;
342 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && 337 if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
343 !S_ISLNK(i_mode)) 338 !S_ISLNK(i_mode))
344 goto out_put; 339 goto out_put;
345 340
346 mapping = file->f_mapping; 341 mapping = f.file->f_mapping;
347 if (!mapping) { 342 if (!mapping) {
348 ret = -EINVAL; 343 ret = -EINVAL;
349 goto out_put; 344 goto out_put;
@@ -366,7 +361,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
366 ret = filemap_fdatawait_range(mapping, offset, endbyte); 361 ret = filemap_fdatawait_range(mapping, offset, endbyte);
367 362
368out_put: 363out_put:
369 fput_light(file, fput_needed); 364 fdput(f);
370out: 365out:
371 return ret; 366 return ret;
372} 367}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index a7ac78f8e67a..3c9eb5624f5e 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -113,7 +113,7 @@ int sysfs_create_link(struct kobject *kobj, struct kobject *target,
113 * @target: object we're pointing to. 113 * @target: object we're pointing to.
114 * @name: name of the symlink. 114 * @name: name of the symlink.
115 * 115 *
116 * This function does the same as sysf_create_link(), but it 116 * This function does the same as sysfs_create_link(), but it
117 * doesn't warn if the link already exists. 117 * doesn't warn if the link already exists.
118 */ 118 */
119int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target, 119int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 80e1e2b18df1..d33e506c1eac 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -202,8 +202,8 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
202 } 202 }
203 /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */ 203 /* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */
204 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); 204 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
205 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid); 205 i_uid_write(inode, (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid));
206 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid); 206 i_gid_write(inode, (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid));
207 set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink)); 207 set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
208 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); 208 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
209 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); 209 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
@@ -256,8 +256,8 @@ static int __sysv_write_inode(struct inode *inode, int wait)
256 } 256 }
257 257
258 raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); 258 raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
259 raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); 259 raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(i_uid_read(inode)));
260 raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); 260 raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(i_gid_read(inode)));
261 raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink); 261 raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink);
262 raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size); 262 raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size);
263 raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec); 263 raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec);
@@ -360,5 +360,10 @@ int __init sysv_init_icache(void)
360 360
361void sysv_destroy_icache(void) 361void sysv_destroy_icache(void)
362{ 362{
363 /*
364 * Make sure all delayed rcu free inodes are flushed before we
365 * destroy cache.
366 */
367 rcu_barrier();
363 kmem_cache_destroy(sysv_inode_cachep); 368 kmem_cache_destroy(sysv_inode_cachep);
364} 369}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index dffeb3795af1..d03822bbf190 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -234,19 +234,17 @@ static const struct file_operations timerfd_fops = {
234 .llseek = noop_llseek, 234 .llseek = noop_llseek,
235}; 235};
236 236
237static struct file *timerfd_fget(int fd) 237static int timerfd_fget(int fd, struct fd *p)
238{ 238{
239 struct file *file; 239 struct fd f = fdget(fd);
240 240 if (!f.file)
241 file = fget(fd); 241 return -EBADF;
242 if (!file) 242 if (f.file->f_op != &timerfd_fops) {
243 return ERR_PTR(-EBADF); 243 fdput(f);
244 if (file->f_op != &timerfd_fops) { 244 return -EINVAL;
245 fput(file);
246 return ERR_PTR(-EINVAL);
247 } 245 }
248 246 *p = f;
249 return file; 247 return 0;
250} 248}
251 249
252SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) 250SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
@@ -284,7 +282,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
284 const struct itimerspec __user *, utmr, 282 const struct itimerspec __user *, utmr,
285 struct itimerspec __user *, otmr) 283 struct itimerspec __user *, otmr)
286{ 284{
287 struct file *file; 285 struct fd f;
288 struct timerfd_ctx *ctx; 286 struct timerfd_ctx *ctx;
289 struct itimerspec ktmr, kotmr; 287 struct itimerspec ktmr, kotmr;
290 int ret; 288 int ret;
@@ -297,10 +295,10 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
297 !timespec_valid(&ktmr.it_interval)) 295 !timespec_valid(&ktmr.it_interval))
298 return -EINVAL; 296 return -EINVAL;
299 297
300 file = timerfd_fget(ufd); 298 ret = timerfd_fget(ufd, &f);
301 if (IS_ERR(file)) 299 if (ret)
302 return PTR_ERR(file); 300 return ret;
303 ctx = file->private_data; 301 ctx = f.file->private_data;
304 302
305 timerfd_setup_cancel(ctx, flags); 303 timerfd_setup_cancel(ctx, flags);
306 304
@@ -334,7 +332,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
334 ret = timerfd_setup(ctx, flags, &ktmr); 332 ret = timerfd_setup(ctx, flags, &ktmr);
335 333
336 spin_unlock_irq(&ctx->wqh.lock); 334 spin_unlock_irq(&ctx->wqh.lock);
337 fput(file); 335 fdput(f);
338 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 336 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
339 return -EFAULT; 337 return -EFAULT;
340 338
@@ -343,14 +341,13 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
343 341
344SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 342SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
345{ 343{
346 struct file *file; 344 struct fd f;
347 struct timerfd_ctx *ctx; 345 struct timerfd_ctx *ctx;
348 struct itimerspec kotmr; 346 struct itimerspec kotmr;
349 347 int ret = timerfd_fget(ufd, &f);
350 file = timerfd_fget(ufd); 348 if (ret)
351 if (IS_ERR(file)) 349 return ret;
352 return PTR_ERR(file); 350 ctx = f.file->private_data;
353 ctx = file->private_data;
354 351
355 spin_lock_irq(&ctx->wqh.lock); 352 spin_lock_irq(&ctx->wqh.lock);
356 if (ctx->expired && ctx->tintv.tv64) { 353 if (ctx->expired && ctx->tintv.tv64) {
@@ -362,7 +359,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
362 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); 359 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
363 kotmr.it_interval = ktime_to_timespec(ctx->tintv); 360 kotmr.it_interval = ktime_to_timespec(ctx->tintv);
364 spin_unlock_irq(&ctx->wqh.lock); 361 spin_unlock_irq(&ctx->wqh.lock);
365 fput(file); 362 fdput(f);
366 363
367 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; 364 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
368} 365}
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index bc4f94b28706..e8e01d74dc05 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -272,8 +272,8 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
272 */ 272 */
273static int can_use_rp(struct ubifs_info *c) 273static int can_use_rp(struct ubifs_info *c)
274{ 274{
275 if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) || 275 if (uid_eq(current_fsuid(), c->rp_uid) || capable(CAP_SYS_RESOURCE) ||
276 (c->rp_gid != 0 && in_group_p(c->rp_gid))) 276 (!gid_eq(c->rp_gid, GLOBAL_ROOT_GID) && in_group_p(c->rp_gid)))
277 return 1; 277 return 1;
278 return 0; 278 return 0;
279} 279}
@@ -342,9 +342,8 @@ static int do_budget_space(struct ubifs_info *c)
342 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 342 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
343 c->lst.taken_empty_lebs; 343 c->lst.taken_empty_lebs;
344 if (unlikely(rsvd_idx_lebs > lebs)) { 344 if (unlikely(rsvd_idx_lebs > lebs)) {
345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " 345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), rsvd_idx_lebs %d",
346 "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, 346 min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs);
347 rsvd_idx_lebs);
348 return -ENOSPC; 347 return -ENOSPC;
349 } 348 }
350 349
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 8eda717cb99b..ff8229340cd5 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -293,8 +293,8 @@ int ubifs_bg_thread(void *info)
293 int err; 293 int err;
294 struct ubifs_info *c = info; 294 struct ubifs_info *c = info;
295 295
296 dbg_msg("background thread \"%s\" started, PID %d", 296 ubifs_msg("background thread \"%s\" started, PID %d",
297 c->bgt_name, current->pid); 297 c->bgt_name, current->pid);
298 set_freezable(); 298 set_freezable();
299 299
300 while (1) { 300 while (1) {
@@ -328,7 +328,7 @@ int ubifs_bg_thread(void *info)
328 cond_resched(); 328 cond_resched();
329 } 329 }
330 330
331 dbg_msg("background thread \"%s\" stops", c->bgt_name); 331 ubifs_msg("background thread \"%s\" stops", c->bgt_name);
332 return 0; 332 return 0;
333} 333}
334 334
@@ -514,7 +514,7 @@ struct idx_node {
514 struct list_head list; 514 struct list_head list;
515 int iip; 515 int iip;
516 union ubifs_key upper_key; 516 union ubifs_key upper_key;
517 struct ubifs_idx_node idx __attribute__((aligned(8))); 517 struct ubifs_idx_node idx __aligned(8);
518}; 518};
519 519
520/** 520/**
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 11e4132f314a..2bfa0953335d 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -112,8 +112,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
112 if (compr->comp_mutex) 112 if (compr->comp_mutex)
113 mutex_unlock(compr->comp_mutex); 113 mutex_unlock(compr->comp_mutex);
114 if (unlikely(err)) { 114 if (unlikely(err)) {
115 ubifs_warn("cannot compress %d bytes, compressor %s, " 115 ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
116 "error %d, leave data uncompressed",
117 in_len, compr->name, err); 116 in_len, compr->name, err);
118 goto no_compr; 117 goto no_compr;
119 } 118 }
@@ -176,8 +175,8 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
176 if (compr->decomp_mutex) 175 if (compr->decomp_mutex)
177 mutex_unlock(compr->decomp_mutex); 176 mutex_unlock(compr->decomp_mutex);
178 if (err) 177 if (err)
179 ubifs_err("cannot decompress %d bytes, compressor %s, " 178 ubifs_err("cannot decompress %d bytes, compressor %s, error %d",
180 "error %d", in_len, compr->name, err); 179 in_len, compr->name, err);
181 180
182 return err; 181 return err;
183} 182}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index bb3167257aab..62911637e12f 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -219,15 +219,15 @@ const char *dbg_jhead(int jhead)
219 219
220static void dump_ch(const struct ubifs_ch *ch) 220static void dump_ch(const struct ubifs_ch *ch)
221{ 221{
222 printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); 222 pr_err("\tmagic %#x\n", le32_to_cpu(ch->magic));
223 printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); 223 pr_err("\tcrc %#x\n", le32_to_cpu(ch->crc));
224 printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, 224 pr_err("\tnode_type %d (%s)\n", ch->node_type,
225 dbg_ntype(ch->node_type)); 225 dbg_ntype(ch->node_type));
226 printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, 226 pr_err("\tgroup_type %d (%s)\n", ch->group_type,
227 dbg_gtype(ch->group_type)); 227 dbg_gtype(ch->group_type));
228 printk(KERN_ERR "\tsqnum %llu\n", 228 pr_err("\tsqnum %llu\n",
229 (unsigned long long)le64_to_cpu(ch->sqnum)); 229 (unsigned long long)le64_to_cpu(ch->sqnum));
230 printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); 230 pr_err("\tlen %u\n", le32_to_cpu(ch->len));
231} 231}
232 232
233void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) 233void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
@@ -238,43 +238,43 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
238 struct ubifs_dent_node *dent, *pdent = NULL; 238 struct ubifs_dent_node *dent, *pdent = NULL;
239 int count = 2; 239 int count = 2;
240 240
241 printk(KERN_ERR "Dump in-memory inode:"); 241 pr_err("Dump in-memory inode:");
242 printk(KERN_ERR "\tinode %lu\n", inode->i_ino); 242 pr_err("\tinode %lu\n", inode->i_ino);
243 printk(KERN_ERR "\tsize %llu\n", 243 pr_err("\tsize %llu\n",
244 (unsigned long long)i_size_read(inode)); 244 (unsigned long long)i_size_read(inode));
245 printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); 245 pr_err("\tnlink %u\n", inode->i_nlink);
246 printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); 246 pr_err("\tuid %u\n", (unsigned int)i_uid_read(inode));
247 printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); 247 pr_err("\tgid %u\n", (unsigned int)i_gid_read(inode));
248 printk(KERN_ERR "\tatime %u.%u\n", 248 pr_err("\tatime %u.%u\n",
249 (unsigned int)inode->i_atime.tv_sec, 249 (unsigned int)inode->i_atime.tv_sec,
250 (unsigned int)inode->i_atime.tv_nsec); 250 (unsigned int)inode->i_atime.tv_nsec);
251 printk(KERN_ERR "\tmtime %u.%u\n", 251 pr_err("\tmtime %u.%u\n",
252 (unsigned int)inode->i_mtime.tv_sec, 252 (unsigned int)inode->i_mtime.tv_sec,
253 (unsigned int)inode->i_mtime.tv_nsec); 253 (unsigned int)inode->i_mtime.tv_nsec);
254 printk(KERN_ERR "\tctime %u.%u\n", 254 pr_err("\tctime %u.%u\n",
255 (unsigned int)inode->i_ctime.tv_sec, 255 (unsigned int)inode->i_ctime.tv_sec,
256 (unsigned int)inode->i_ctime.tv_nsec); 256 (unsigned int)inode->i_ctime.tv_nsec);
257 printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); 257 pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum);
258 printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); 258 pr_err("\txattr_size %u\n", ui->xattr_size);
259 printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); 259 pr_err("\txattr_cnt %u\n", ui->xattr_cnt);
260 printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); 260 pr_err("\txattr_names %u\n", ui->xattr_names);
261 printk(KERN_ERR "\tdirty %u\n", ui->dirty); 261 pr_err("\tdirty %u\n", ui->dirty);
262 printk(KERN_ERR "\txattr %u\n", ui->xattr); 262 pr_err("\txattr %u\n", ui->xattr);
263 printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); 263 pr_err("\tbulk_read %u\n", ui->xattr);
264 printk(KERN_ERR "\tsynced_i_size %llu\n", 264 pr_err("\tsynced_i_size %llu\n",
265 (unsigned long long)ui->synced_i_size); 265 (unsigned long long)ui->synced_i_size);
266 printk(KERN_ERR "\tui_size %llu\n", 266 pr_err("\tui_size %llu\n",
267 (unsigned long long)ui->ui_size); 267 (unsigned long long)ui->ui_size);
268 printk(KERN_ERR "\tflags %d\n", ui->flags); 268 pr_err("\tflags %d\n", ui->flags);
269 printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); 269 pr_err("\tcompr_type %d\n", ui->compr_type);
270 printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); 270 pr_err("\tlast_page_read %lu\n", ui->last_page_read);
271 printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); 271 pr_err("\tread_in_a_row %lu\n", ui->read_in_a_row);
272 printk(KERN_ERR "\tdata_len %d\n", ui->data_len); 272 pr_err("\tdata_len %d\n", ui->data_len);
273 273
274 if (!S_ISDIR(inode->i_mode)) 274 if (!S_ISDIR(inode->i_mode))
275 return; 275 return;
276 276
277 printk(KERN_ERR "List of directory entries:\n"); 277 pr_err("List of directory entries:\n");
278 ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); 278 ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
279 279
280 lowest_dent_key(c, &key, inode->i_ino); 280 lowest_dent_key(c, &key, inode->i_ino);
@@ -282,11 +282,11 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
282 dent = ubifs_tnc_next_ent(c, &key, &nm); 282 dent = ubifs_tnc_next_ent(c, &key, &nm);
283 if (IS_ERR(dent)) { 283 if (IS_ERR(dent)) {
284 if (PTR_ERR(dent) != -ENOENT) 284 if (PTR_ERR(dent) != -ENOENT)
285 printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); 285 pr_err("error %ld\n", PTR_ERR(dent));
286 break; 286 break;
287 } 287 }
288 288
289 printk(KERN_ERR "\t%d: %s (%s)\n", 289 pr_err("\t%d: %s (%s)\n",
290 count++, dent->name, get_dent_type(dent->type)); 290 count++, dent->name, get_dent_type(dent->type));
291 291
292 nm.name = dent->name; 292 nm.name = dent->name;
@@ -305,12 +305,9 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
305 const struct ubifs_ch *ch = node; 305 const struct ubifs_ch *ch = node;
306 char key_buf[DBG_KEY_BUF_LEN]; 306 char key_buf[DBG_KEY_BUF_LEN];
307 307
308 if (dbg_is_tst_rcvry(c))
309 return;
310
311 /* If the magic is incorrect, just hexdump the first bytes */ 308 /* If the magic is incorrect, just hexdump the first bytes */
312 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { 309 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
313 printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); 310 pr_err("Not a node, first %zu bytes:", UBIFS_CH_SZ);
314 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, 311 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
315 (void *)node, UBIFS_CH_SZ, 1); 312 (void *)node, UBIFS_CH_SZ, 1);
316 return; 313 return;
@@ -324,8 +321,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
324 { 321 {
325 const struct ubifs_pad_node *pad = node; 322 const struct ubifs_pad_node *pad = node;
326 323
327 printk(KERN_ERR "\tpad_len %u\n", 324 pr_err("\tpad_len %u\n", le32_to_cpu(pad->pad_len));
328 le32_to_cpu(pad->pad_len));
329 break; 325 break;
330 } 326 }
331 case UBIFS_SB_NODE: 327 case UBIFS_SB_NODE:
@@ -333,112 +329,77 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
333 const struct ubifs_sb_node *sup = node; 329 const struct ubifs_sb_node *sup = node;
334 unsigned int sup_flags = le32_to_cpu(sup->flags); 330 unsigned int sup_flags = le32_to_cpu(sup->flags);
335 331
336 printk(KERN_ERR "\tkey_hash %d (%s)\n", 332 pr_err("\tkey_hash %d (%s)\n",
337 (int)sup->key_hash, get_key_hash(sup->key_hash)); 333 (int)sup->key_hash, get_key_hash(sup->key_hash));
338 printk(KERN_ERR "\tkey_fmt %d (%s)\n", 334 pr_err("\tkey_fmt %d (%s)\n",
339 (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); 335 (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
340 printk(KERN_ERR "\tflags %#x\n", sup_flags); 336 pr_err("\tflags %#x\n", sup_flags);
341 printk(KERN_ERR "\t big_lpt %u\n", 337 pr_err("\t big_lpt %u\n",
342 !!(sup_flags & UBIFS_FLG_BIGLPT)); 338 !!(sup_flags & UBIFS_FLG_BIGLPT));
343 printk(KERN_ERR "\t space_fixup %u\n", 339 pr_err("\t space_fixup %u\n",
344 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); 340 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
345 printk(KERN_ERR "\tmin_io_size %u\n", 341 pr_err("\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size));
346 le32_to_cpu(sup->min_io_size)); 342 pr_err("\tleb_size %u\n", le32_to_cpu(sup->leb_size));
347 printk(KERN_ERR "\tleb_size %u\n", 343 pr_err("\tleb_cnt %u\n", le32_to_cpu(sup->leb_cnt));
348 le32_to_cpu(sup->leb_size)); 344 pr_err("\tmax_leb_cnt %u\n", le32_to_cpu(sup->max_leb_cnt));
349 printk(KERN_ERR "\tleb_cnt %u\n", 345 pr_err("\tmax_bud_bytes %llu\n",
350 le32_to_cpu(sup->leb_cnt));
351 printk(KERN_ERR "\tmax_leb_cnt %u\n",
352 le32_to_cpu(sup->max_leb_cnt));
353 printk(KERN_ERR "\tmax_bud_bytes %llu\n",
354 (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); 346 (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
355 printk(KERN_ERR "\tlog_lebs %u\n", 347 pr_err("\tlog_lebs %u\n", le32_to_cpu(sup->log_lebs));
356 le32_to_cpu(sup->log_lebs)); 348 pr_err("\tlpt_lebs %u\n", le32_to_cpu(sup->lpt_lebs));
357 printk(KERN_ERR "\tlpt_lebs %u\n", 349 pr_err("\torph_lebs %u\n", le32_to_cpu(sup->orph_lebs));
358 le32_to_cpu(sup->lpt_lebs)); 350 pr_err("\tjhead_cnt %u\n", le32_to_cpu(sup->jhead_cnt));
359 printk(KERN_ERR "\torph_lebs %u\n", 351 pr_err("\tfanout %u\n", le32_to_cpu(sup->fanout));
360 le32_to_cpu(sup->orph_lebs)); 352 pr_err("\tlsave_cnt %u\n", le32_to_cpu(sup->lsave_cnt));
361 printk(KERN_ERR "\tjhead_cnt %u\n", 353 pr_err("\tdefault_compr %u\n",
362 le32_to_cpu(sup->jhead_cnt));
363 printk(KERN_ERR "\tfanout %u\n",
364 le32_to_cpu(sup->fanout));
365 printk(KERN_ERR "\tlsave_cnt %u\n",
366 le32_to_cpu(sup->lsave_cnt));
367 printk(KERN_ERR "\tdefault_compr %u\n",
368 (int)le16_to_cpu(sup->default_compr)); 354 (int)le16_to_cpu(sup->default_compr));
369 printk(KERN_ERR "\trp_size %llu\n", 355 pr_err("\trp_size %llu\n",
370 (unsigned long long)le64_to_cpu(sup->rp_size)); 356 (unsigned long long)le64_to_cpu(sup->rp_size));
371 printk(KERN_ERR "\trp_uid %u\n", 357 pr_err("\trp_uid %u\n", le32_to_cpu(sup->rp_uid));
372 le32_to_cpu(sup->rp_uid)); 358 pr_err("\trp_gid %u\n", le32_to_cpu(sup->rp_gid));
373 printk(KERN_ERR "\trp_gid %u\n", 359 pr_err("\tfmt_version %u\n", le32_to_cpu(sup->fmt_version));
374 le32_to_cpu(sup->rp_gid)); 360 pr_err("\ttime_gran %u\n", le32_to_cpu(sup->time_gran));
375 printk(KERN_ERR "\tfmt_version %u\n", 361 pr_err("\tUUID %pUB\n", sup->uuid);
376 le32_to_cpu(sup->fmt_version));
377 printk(KERN_ERR "\ttime_gran %u\n",
378 le32_to_cpu(sup->time_gran));
379 printk(KERN_ERR "\tUUID %pUB\n",
380 sup->uuid);
381 break; 362 break;
382 } 363 }
383 case UBIFS_MST_NODE: 364 case UBIFS_MST_NODE:
384 { 365 {
385 const struct ubifs_mst_node *mst = node; 366 const struct ubifs_mst_node *mst = node;
386 367
387 printk(KERN_ERR "\thighest_inum %llu\n", 368 pr_err("\thighest_inum %llu\n",
388 (unsigned long long)le64_to_cpu(mst->highest_inum)); 369 (unsigned long long)le64_to_cpu(mst->highest_inum));
389 printk(KERN_ERR "\tcommit number %llu\n", 370 pr_err("\tcommit number %llu\n",
390 (unsigned long long)le64_to_cpu(mst->cmt_no)); 371 (unsigned long long)le64_to_cpu(mst->cmt_no));
391 printk(KERN_ERR "\tflags %#x\n", 372 pr_err("\tflags %#x\n", le32_to_cpu(mst->flags));
392 le32_to_cpu(mst->flags)); 373 pr_err("\tlog_lnum %u\n", le32_to_cpu(mst->log_lnum));
393 printk(KERN_ERR "\tlog_lnum %u\n", 374 pr_err("\troot_lnum %u\n", le32_to_cpu(mst->root_lnum));
394 le32_to_cpu(mst->log_lnum)); 375 pr_err("\troot_offs %u\n", le32_to_cpu(mst->root_offs));
395 printk(KERN_ERR "\troot_lnum %u\n", 376 pr_err("\troot_len %u\n", le32_to_cpu(mst->root_len));
396 le32_to_cpu(mst->root_lnum)); 377 pr_err("\tgc_lnum %u\n", le32_to_cpu(mst->gc_lnum));
397 printk(KERN_ERR "\troot_offs %u\n", 378 pr_err("\tihead_lnum %u\n", le32_to_cpu(mst->ihead_lnum));
398 le32_to_cpu(mst->root_offs)); 379 pr_err("\tihead_offs %u\n", le32_to_cpu(mst->ihead_offs));
399 printk(KERN_ERR "\troot_len %u\n", 380 pr_err("\tindex_size %llu\n",
400 le32_to_cpu(mst->root_len));
401 printk(KERN_ERR "\tgc_lnum %u\n",
402 le32_to_cpu(mst->gc_lnum));
403 printk(KERN_ERR "\tihead_lnum %u\n",
404 le32_to_cpu(mst->ihead_lnum));
405 printk(KERN_ERR "\tihead_offs %u\n",
406 le32_to_cpu(mst->ihead_offs));
407 printk(KERN_ERR "\tindex_size %llu\n",
408 (unsigned long long)le64_to_cpu(mst->index_size)); 381 (unsigned long long)le64_to_cpu(mst->index_size));
409 printk(KERN_ERR "\tlpt_lnum %u\n", 382 pr_err("\tlpt_lnum %u\n", le32_to_cpu(mst->lpt_lnum));
410 le32_to_cpu(mst->lpt_lnum)); 383 pr_err("\tlpt_offs %u\n", le32_to_cpu(mst->lpt_offs));
411 printk(KERN_ERR "\tlpt_offs %u\n", 384 pr_err("\tnhead_lnum %u\n", le32_to_cpu(mst->nhead_lnum));
412 le32_to_cpu(mst->lpt_offs)); 385 pr_err("\tnhead_offs %u\n", le32_to_cpu(mst->nhead_offs));
413 printk(KERN_ERR "\tnhead_lnum %u\n", 386 pr_err("\tltab_lnum %u\n", le32_to_cpu(mst->ltab_lnum));
414 le32_to_cpu(mst->nhead_lnum)); 387 pr_err("\tltab_offs %u\n", le32_to_cpu(mst->ltab_offs));
415 printk(KERN_ERR "\tnhead_offs %u\n", 388 pr_err("\tlsave_lnum %u\n", le32_to_cpu(mst->lsave_lnum));
416 le32_to_cpu(mst->nhead_offs)); 389 pr_err("\tlsave_offs %u\n", le32_to_cpu(mst->lsave_offs));
417 printk(KERN_ERR "\tltab_lnum %u\n", 390 pr_err("\tlscan_lnum %u\n", le32_to_cpu(mst->lscan_lnum));
418 le32_to_cpu(mst->ltab_lnum)); 391 pr_err("\tleb_cnt %u\n", le32_to_cpu(mst->leb_cnt));
419 printk(KERN_ERR "\tltab_offs %u\n", 392 pr_err("\tempty_lebs %u\n", le32_to_cpu(mst->empty_lebs));
420 le32_to_cpu(mst->ltab_offs)); 393 pr_err("\tidx_lebs %u\n", le32_to_cpu(mst->idx_lebs));
421 printk(KERN_ERR "\tlsave_lnum %u\n", 394 pr_err("\ttotal_free %llu\n",
422 le32_to_cpu(mst->lsave_lnum));
423 printk(KERN_ERR "\tlsave_offs %u\n",
424 le32_to_cpu(mst->lsave_offs));
425 printk(KERN_ERR "\tlscan_lnum %u\n",
426 le32_to_cpu(mst->lscan_lnum));
427 printk(KERN_ERR "\tleb_cnt %u\n",
428 le32_to_cpu(mst->leb_cnt));
429 printk(KERN_ERR "\tempty_lebs %u\n",
430 le32_to_cpu(mst->empty_lebs));
431 printk(KERN_ERR "\tidx_lebs %u\n",
432 le32_to_cpu(mst->idx_lebs));
433 printk(KERN_ERR "\ttotal_free %llu\n",
434 (unsigned long long)le64_to_cpu(mst->total_free)); 395 (unsigned long long)le64_to_cpu(mst->total_free));
435 printk(KERN_ERR "\ttotal_dirty %llu\n", 396 pr_err("\ttotal_dirty %llu\n",
436 (unsigned long long)le64_to_cpu(mst->total_dirty)); 397 (unsigned long long)le64_to_cpu(mst->total_dirty));
437 printk(KERN_ERR "\ttotal_used %llu\n", 398 pr_err("\ttotal_used %llu\n",
438 (unsigned long long)le64_to_cpu(mst->total_used)); 399 (unsigned long long)le64_to_cpu(mst->total_used));
439 printk(KERN_ERR "\ttotal_dead %llu\n", 400 pr_err("\ttotal_dead %llu\n",
440 (unsigned long long)le64_to_cpu(mst->total_dead)); 401 (unsigned long long)le64_to_cpu(mst->total_dead));
441 printk(KERN_ERR "\ttotal_dark %llu\n", 402 pr_err("\ttotal_dark %llu\n",
442 (unsigned long long)le64_to_cpu(mst->total_dark)); 403 (unsigned long long)le64_to_cpu(mst->total_dark));
443 break; 404 break;
444 } 405 }
@@ -446,12 +407,9 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
446 { 407 {
447 const struct ubifs_ref_node *ref = node; 408 const struct ubifs_ref_node *ref = node;
448 409
449 printk(KERN_ERR "\tlnum %u\n", 410 pr_err("\tlnum %u\n", le32_to_cpu(ref->lnum));
450 le32_to_cpu(ref->lnum)); 411 pr_err("\toffs %u\n", le32_to_cpu(ref->offs));
451 printk(KERN_ERR "\toffs %u\n", 412 pr_err("\tjhead %u\n", le32_to_cpu(ref->jhead));
452 le32_to_cpu(ref->offs));
453 printk(KERN_ERR "\tjhead %u\n",
454 le32_to_cpu(ref->jhead));
455 break; 413 break;
456 } 414 }
457 case UBIFS_INO_NODE: 415 case UBIFS_INO_NODE:
@@ -459,41 +417,32 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
459 const struct ubifs_ino_node *ino = node; 417 const struct ubifs_ino_node *ino = node;
460 418
461 key_read(c, &ino->key, &key); 419 key_read(c, &ino->key, &key);
462 printk(KERN_ERR "\tkey %s\n", 420 pr_err("\tkey %s\n",
463 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 421 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
464 printk(KERN_ERR "\tcreat_sqnum %llu\n", 422 pr_err("\tcreat_sqnum %llu\n",
465 (unsigned long long)le64_to_cpu(ino->creat_sqnum)); 423 (unsigned long long)le64_to_cpu(ino->creat_sqnum));
466 printk(KERN_ERR "\tsize %llu\n", 424 pr_err("\tsize %llu\n",
467 (unsigned long long)le64_to_cpu(ino->size)); 425 (unsigned long long)le64_to_cpu(ino->size));
468 printk(KERN_ERR "\tnlink %u\n", 426 pr_err("\tnlink %u\n", le32_to_cpu(ino->nlink));
469 le32_to_cpu(ino->nlink)); 427 pr_err("\tatime %lld.%u\n",
470 printk(KERN_ERR "\tatime %lld.%u\n",
471 (long long)le64_to_cpu(ino->atime_sec), 428 (long long)le64_to_cpu(ino->atime_sec),
472 le32_to_cpu(ino->atime_nsec)); 429 le32_to_cpu(ino->atime_nsec));
473 printk(KERN_ERR "\tmtime %lld.%u\n", 430 pr_err("\tmtime %lld.%u\n",
474 (long long)le64_to_cpu(ino->mtime_sec), 431 (long long)le64_to_cpu(ino->mtime_sec),
475 le32_to_cpu(ino->mtime_nsec)); 432 le32_to_cpu(ino->mtime_nsec));
476 printk(KERN_ERR "\tctime %lld.%u\n", 433 pr_err("\tctime %lld.%u\n",
477 (long long)le64_to_cpu(ino->ctime_sec), 434 (long long)le64_to_cpu(ino->ctime_sec),
478 le32_to_cpu(ino->ctime_nsec)); 435 le32_to_cpu(ino->ctime_nsec));
479 printk(KERN_ERR "\tuid %u\n", 436 pr_err("\tuid %u\n", le32_to_cpu(ino->uid));
480 le32_to_cpu(ino->uid)); 437 pr_err("\tgid %u\n", le32_to_cpu(ino->gid));
481 printk(KERN_ERR "\tgid %u\n", 438 pr_err("\tmode %u\n", le32_to_cpu(ino->mode));
482 le32_to_cpu(ino->gid)); 439 pr_err("\tflags %#x\n", le32_to_cpu(ino->flags));
483 printk(KERN_ERR "\tmode %u\n", 440 pr_err("\txattr_cnt %u\n", le32_to_cpu(ino->xattr_cnt));
484 le32_to_cpu(ino->mode)); 441 pr_err("\txattr_size %u\n", le32_to_cpu(ino->xattr_size));
485 printk(KERN_ERR "\tflags %#x\n", 442 pr_err("\txattr_names %u\n", le32_to_cpu(ino->xattr_names));
486 le32_to_cpu(ino->flags)); 443 pr_err("\tcompr_type %#x\n",
487 printk(KERN_ERR "\txattr_cnt %u\n",
488 le32_to_cpu(ino->xattr_cnt));
489 printk(KERN_ERR "\txattr_size %u\n",
490 le32_to_cpu(ino->xattr_size));
491 printk(KERN_ERR "\txattr_names %u\n",
492 le32_to_cpu(ino->xattr_names));
493 printk(KERN_ERR "\tcompr_type %#x\n",
494 (int)le16_to_cpu(ino->compr_type)); 444 (int)le16_to_cpu(ino->compr_type));
495 printk(KERN_ERR "\tdata len %u\n", 445 pr_err("\tdata len %u\n", le32_to_cpu(ino->data_len));
496 le32_to_cpu(ino->data_len));
497 break; 446 break;
498 } 447 }
499 case UBIFS_DENT_NODE: 448 case UBIFS_DENT_NODE:
@@ -503,22 +452,21 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
503 int nlen = le16_to_cpu(dent->nlen); 452 int nlen = le16_to_cpu(dent->nlen);
504 453
505 key_read(c, &dent->key, &key); 454 key_read(c, &dent->key, &key);
506 printk(KERN_ERR "\tkey %s\n", 455 pr_err("\tkey %s\n",
507 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 456 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
508 printk(KERN_ERR "\tinum %llu\n", 457 pr_err("\tinum %llu\n",
509 (unsigned long long)le64_to_cpu(dent->inum)); 458 (unsigned long long)le64_to_cpu(dent->inum));
510 printk(KERN_ERR "\ttype %d\n", (int)dent->type); 459 pr_err("\ttype %d\n", (int)dent->type);
511 printk(KERN_ERR "\tnlen %d\n", nlen); 460 pr_err("\tnlen %d\n", nlen);
512 printk(KERN_ERR "\tname "); 461 pr_err("\tname ");
513 462
514 if (nlen > UBIFS_MAX_NLEN) 463 if (nlen > UBIFS_MAX_NLEN)
515 printk(KERN_ERR "(bad name length, not printing, " 464 pr_err("(bad name length, not printing, bad or corrupted node)");
516 "bad or corrupted node)");
517 else { 465 else {
518 for (i = 0; i < nlen && dent->name[i]; i++) 466 for (i = 0; i < nlen && dent->name[i]; i++)
519 printk(KERN_CONT "%c", dent->name[i]); 467 pr_cont("%c", dent->name[i]);
520 } 468 }
521 printk(KERN_CONT "\n"); 469 pr_cont("\n");
522 470
523 break; 471 break;
524 } 472 }
@@ -528,15 +476,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
528 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; 476 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
529 477
530 key_read(c, &dn->key, &key); 478 key_read(c, &dn->key, &key);
531 printk(KERN_ERR "\tkey %s\n", 479 pr_err("\tkey %s\n",
532 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 480 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
533 printk(KERN_ERR "\tsize %u\n", 481 pr_err("\tsize %u\n", le32_to_cpu(dn->size));
534 le32_to_cpu(dn->size)); 482 pr_err("\tcompr_typ %d\n",
535 printk(KERN_ERR "\tcompr_typ %d\n",
536 (int)le16_to_cpu(dn->compr_type)); 483 (int)le16_to_cpu(dn->compr_type));
537 printk(KERN_ERR "\tdata size %d\n", 484 pr_err("\tdata size %d\n", dlen);
538 dlen); 485 pr_err("\tdata:\n");
539 printk(KERN_ERR "\tdata:\n");
540 print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, 486 print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
541 (void *)&dn->data, dlen, 0); 487 (void *)&dn->data, dlen, 0);
542 break; 488 break;
@@ -545,11 +491,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
545 { 491 {
546 const struct ubifs_trun_node *trun = node; 492 const struct ubifs_trun_node *trun = node;
547 493
548 printk(KERN_ERR "\tinum %u\n", 494 pr_err("\tinum %u\n", le32_to_cpu(trun->inum));
549 le32_to_cpu(trun->inum)); 495 pr_err("\told_size %llu\n",
550 printk(KERN_ERR "\told_size %llu\n",
551 (unsigned long long)le64_to_cpu(trun->old_size)); 496 (unsigned long long)le64_to_cpu(trun->old_size));
552 printk(KERN_ERR "\tnew_size %llu\n", 497 pr_err("\tnew_size %llu\n",
553 (unsigned long long)le64_to_cpu(trun->new_size)); 498 (unsigned long long)le64_to_cpu(trun->new_size));
554 break; 499 break;
555 } 500 }
@@ -558,17 +503,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
558 const struct ubifs_idx_node *idx = node; 503 const struct ubifs_idx_node *idx = node;
559 504
560 n = le16_to_cpu(idx->child_cnt); 505 n = le16_to_cpu(idx->child_cnt);
561 printk(KERN_ERR "\tchild_cnt %d\n", n); 506 pr_err("\tchild_cnt %d\n", n);
562 printk(KERN_ERR "\tlevel %d\n", 507 pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level));
563 (int)le16_to_cpu(idx->level)); 508 pr_err("\tBranches:\n");
564 printk(KERN_ERR "\tBranches:\n");
565 509
566 for (i = 0; i < n && i < c->fanout - 1; i++) { 510 for (i = 0; i < n && i < c->fanout - 1; i++) {
567 const struct ubifs_branch *br; 511 const struct ubifs_branch *br;
568 512
569 br = ubifs_idx_branch(c, idx, i); 513 br = ubifs_idx_branch(c, idx, i);
570 key_read(c, &br->key, &key); 514 key_read(c, &br->key, &key);
571 printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", 515 pr_err("\t%d: LEB %d:%d len %d key %s\n",
572 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), 516 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
573 le32_to_cpu(br->len), 517 le32_to_cpu(br->len),
574 dbg_snprintf_key(c, &key, key_buf, 518 dbg_snprintf_key(c, &key, key_buf,
@@ -582,20 +526,20 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
582 { 526 {
583 const struct ubifs_orph_node *orph = node; 527 const struct ubifs_orph_node *orph = node;
584 528
585 printk(KERN_ERR "\tcommit number %llu\n", 529 pr_err("\tcommit number %llu\n",
586 (unsigned long long) 530 (unsigned long long)
587 le64_to_cpu(orph->cmt_no) & LLONG_MAX); 531 le64_to_cpu(orph->cmt_no) & LLONG_MAX);
588 printk(KERN_ERR "\tlast node flag %llu\n", 532 pr_err("\tlast node flag %llu\n",
589 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); 533 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
590 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; 534 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
591 printk(KERN_ERR "\t%d orphan inode numbers:\n", n); 535 pr_err("\t%d orphan inode numbers:\n", n);
592 for (i = 0; i < n; i++) 536 for (i = 0; i < n; i++)
593 printk(KERN_ERR "\t ino %llu\n", 537 pr_err("\t ino %llu\n",
594 (unsigned long long)le64_to_cpu(orph->inos[i])); 538 (unsigned long long)le64_to_cpu(orph->inos[i]));
595 break; 539 break;
596 } 540 }
597 default: 541 default:
598 printk(KERN_ERR "node type %d was not recognized\n", 542 pr_err("node type %d was not recognized\n",
599 (int)ch->node_type); 543 (int)ch->node_type);
600 } 544 }
601 spin_unlock(&dbg_lock); 545 spin_unlock(&dbg_lock);
@@ -604,16 +548,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
604void ubifs_dump_budget_req(const struct ubifs_budget_req *req) 548void ubifs_dump_budget_req(const struct ubifs_budget_req *req)
605{ 549{
606 spin_lock(&dbg_lock); 550 spin_lock(&dbg_lock);
607 printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", 551 pr_err("Budgeting request: new_ino %d, dirtied_ino %d\n",
608 req->new_ino, req->dirtied_ino); 552 req->new_ino, req->dirtied_ino);
609 printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", 553 pr_err("\tnew_ino_d %d, dirtied_ino_d %d\n",
610 req->new_ino_d, req->dirtied_ino_d); 554 req->new_ino_d, req->dirtied_ino_d);
611 printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", 555 pr_err("\tnew_page %d, dirtied_page %d\n",
612 req->new_page, req->dirtied_page); 556 req->new_page, req->dirtied_page);
613 printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", 557 pr_err("\tnew_dent %d, mod_dent %d\n",
614 req->new_dent, req->mod_dent); 558 req->new_dent, req->mod_dent);
615 printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); 559 pr_err("\tidx_growth %d\n", req->idx_growth);
616 printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", 560 pr_err("\tdata_growth %d dd_growth %d\n",
617 req->data_growth, req->dd_growth); 561 req->data_growth, req->dd_growth);
618 spin_unlock(&dbg_lock); 562 spin_unlock(&dbg_lock);
619} 563}
@@ -621,14 +565,12 @@ void ubifs_dump_budget_req(const struct ubifs_budget_req *req)
621void ubifs_dump_lstats(const struct ubifs_lp_stats *lst) 565void ubifs_dump_lstats(const struct ubifs_lp_stats *lst)
622{ 566{
623 spin_lock(&dbg_lock); 567 spin_lock(&dbg_lock);
624 printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " 568 pr_err("(pid %d) Lprops statistics: empty_lebs %d, idx_lebs %d\n",
625 "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); 569 current->pid, lst->empty_lebs, lst->idx_lebs);
626 printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " 570 pr_err("\ttaken_empty_lebs %d, total_free %lld, total_dirty %lld\n",
627 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, 571 lst->taken_empty_lebs, lst->total_free, lst->total_dirty);
628 lst->total_dirty); 572 pr_err("\ttotal_used %lld, total_dark %lld, total_dead %lld\n",
629 printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " 573 lst->total_used, lst->total_dark, lst->total_dead);
630 "total_dead %lld\n", lst->total_used, lst->total_dark,
631 lst->total_dead);
632 spin_unlock(&dbg_lock); 574 spin_unlock(&dbg_lock);
633} 575}
634 576
@@ -642,21 +584,17 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
642 584
643 spin_lock(&c->space_lock); 585 spin_lock(&c->space_lock);
644 spin_lock(&dbg_lock); 586 spin_lock(&dbg_lock);
645 printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " 587 pr_err("(pid %d) Budgeting info: data budget sum %lld, total budget sum %lld\n",
646 "total budget sum %lld\n", current->pid, 588 current->pid, bi->data_growth + bi->dd_growth,
647 bi->data_growth + bi->dd_growth,
648 bi->data_growth + bi->dd_growth + bi->idx_growth); 589 bi->data_growth + bi->dd_growth + bi->idx_growth);
649 printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " 590 pr_err("\tbudg_data_growth %lld, budg_dd_growth %lld, budg_idx_growth %lld\n",
650 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, 591 bi->data_growth, bi->dd_growth, bi->idx_growth);
651 bi->idx_growth); 592 pr_err("\tmin_idx_lebs %d, old_idx_sz %llu, uncommitted_idx %lld\n",
652 printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " 593 bi->min_idx_lebs, bi->old_idx_sz, bi->uncommitted_idx);
653 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, 594 pr_err("\tpage_budget %d, inode_budget %d, dent_budget %d\n",
654 bi->uncommitted_idx);
655 printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
656 bi->page_budget, bi->inode_budget, bi->dent_budget); 595 bi->page_budget, bi->inode_budget, bi->dent_budget);
657 printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", 596 pr_err("\tnospace %u, nospace_rp %u\n", bi->nospace, bi->nospace_rp);
658 bi->nospace, bi->nospace_rp); 597 pr_err("\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
659 printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
660 c->dark_wm, c->dead_wm, c->max_idx_node_sz); 598 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
661 599
662 if (bi != &c->bi) 600 if (bi != &c->bi)
@@ -667,38 +605,37 @@ void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
667 */ 605 */
668 goto out_unlock; 606 goto out_unlock;
669 607
670 printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", 608 pr_err("\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
671 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); 609 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
672 printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 610 pr_err("\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, clean_zn_cnt %ld\n",
673 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 611 atomic_long_read(&c->dirty_pg_cnt),
674 atomic_long_read(&c->dirty_zn_cnt), 612 atomic_long_read(&c->dirty_zn_cnt),
675 atomic_long_read(&c->clean_zn_cnt)); 613 atomic_long_read(&c->clean_zn_cnt));
676 printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", 614 pr_err("\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum);
677 c->gc_lnum, c->ihead_lnum);
678 615
679 /* If we are in R/O mode, journal heads do not exist */ 616 /* If we are in R/O mode, journal heads do not exist */
680 if (c->jheads) 617 if (c->jheads)
681 for (i = 0; i < c->jhead_cnt; i++) 618 for (i = 0; i < c->jhead_cnt; i++)
682 printk(KERN_ERR "\tjhead %s\t LEB %d\n", 619 pr_err("\tjhead %s\t LEB %d\n",
683 dbg_jhead(c->jheads[i].wbuf.jhead), 620 dbg_jhead(c->jheads[i].wbuf.jhead),
684 c->jheads[i].wbuf.lnum); 621 c->jheads[i].wbuf.lnum);
685 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { 622 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
686 bud = rb_entry(rb, struct ubifs_bud, rb); 623 bud = rb_entry(rb, struct ubifs_bud, rb);
687 printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); 624 pr_err("\tbud LEB %d\n", bud->lnum);
688 } 625 }
689 list_for_each_entry(bud, &c->old_buds, list) 626 list_for_each_entry(bud, &c->old_buds, list)
690 printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); 627 pr_err("\told bud LEB %d\n", bud->lnum);
691 list_for_each_entry(idx_gc, &c->idx_gc, list) 628 list_for_each_entry(idx_gc, &c->idx_gc, list)
692 printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", 629 pr_err("\tGC'ed idx LEB %d unmap %d\n",
693 idx_gc->lnum, idx_gc->unmap); 630 idx_gc->lnum, idx_gc->unmap);
694 printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); 631 pr_err("\tcommit state %d\n", c->cmt_state);
695 632
696 /* Print budgeting predictions */ 633 /* Print budgeting predictions */
697 available = ubifs_calc_available(c, c->bi.min_idx_lebs); 634 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
698 outstanding = c->bi.data_growth + c->bi.dd_growth; 635 outstanding = c->bi.data_growth + c->bi.dd_growth;
699 free = ubifs_get_free_space_nolock(c); 636 free = ubifs_get_free_space_nolock(c);
700 printk(KERN_ERR "Budgeting predictions:\n"); 637 pr_err("Budgeting predictions:\n");
701 printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", 638 pr_err("\tavailable: %lld, outstanding %lld, free %lld\n",
702 available, outstanding, free); 639 available, outstanding, free);
703out_unlock: 640out_unlock:
704 spin_unlock(&dbg_lock); 641 spin_unlock(&dbg_lock);
@@ -718,21 +655,19 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
718 dark = ubifs_calc_dark(c, spc); 655 dark = ubifs_calc_dark(c, spc);
719 656
720 if (lp->flags & LPROPS_INDEX) 657 if (lp->flags & LPROPS_INDEX)
721 printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " 658 pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d flags %#x (",
722 "free + dirty %-8d flags %#x (", lp->lnum, lp->free, 659 lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
723 lp->dirty, c->leb_size - spc, spc, lp->flags); 660 lp->flags);
724 else 661 else
725 printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " 662 pr_err("LEB %-7d free %-8d dirty %-8d used %-8d free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d flags %#-4x (",
726 "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " 663 lp->lnum, lp->free, lp->dirty, c->leb_size - spc, spc,
727 "flags %#-4x (", lp->lnum, lp->free, lp->dirty, 664 dark, dead, (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
728 c->leb_size - spc, spc, dark, dead,
729 (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
730 665
731 if (lp->flags & LPROPS_TAKEN) { 666 if (lp->flags & LPROPS_TAKEN) {
732 if (lp->flags & LPROPS_INDEX) 667 if (lp->flags & LPROPS_INDEX)
733 printk(KERN_CONT "index, taken"); 668 pr_cont("index, taken");
734 else 669 else
735 printk(KERN_CONT "taken"); 670 pr_cont("taken");
736 } else { 671 } else {
737 const char *s; 672 const char *s;
738 673
@@ -769,7 +704,7 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
769 break; 704 break;
770 } 705 }
771 } 706 }
772 printk(KERN_CONT "%s", s); 707 pr_cont("%s", s);
773 } 708 }
774 709
775 for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { 710 for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
@@ -784,19 +719,18 @@ void ubifs_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
784 */ 719 */
785 if (c->jheads && 720 if (c->jheads &&
786 lp->lnum == c->jheads[i].wbuf.lnum) { 721 lp->lnum == c->jheads[i].wbuf.lnum) {
787 printk(KERN_CONT ", jhead %s", 722 pr_cont(", jhead %s", dbg_jhead(i));
788 dbg_jhead(i));
789 head = 1; 723 head = 1;
790 } 724 }
791 } 725 }
792 if (!head) 726 if (!head)
793 printk(KERN_CONT ", bud of jhead %s", 727 pr_cont(", bud of jhead %s",
794 dbg_jhead(bud->jhead)); 728 dbg_jhead(bud->jhead));
795 } 729 }
796 } 730 }
797 if (lp->lnum == c->gc_lnum) 731 if (lp->lnum == c->gc_lnum)
798 printk(KERN_CONT ", GC LEB"); 732 pr_cont(", GC LEB");
799 printk(KERN_CONT ")\n"); 733 pr_cont(")\n");
800} 734}
801 735
802void ubifs_dump_lprops(struct ubifs_info *c) 736void ubifs_dump_lprops(struct ubifs_info *c)
@@ -805,8 +739,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
805 struct ubifs_lprops lp; 739 struct ubifs_lprops lp;
806 struct ubifs_lp_stats lst; 740 struct ubifs_lp_stats lst;
807 741
808 printk(KERN_ERR "(pid %d) start dumping LEB properties\n", 742 pr_err("(pid %d) start dumping LEB properties\n", current->pid);
809 current->pid);
810 ubifs_get_lp_stats(c, &lst); 743 ubifs_get_lp_stats(c, &lst);
811 ubifs_dump_lstats(&lst); 744 ubifs_dump_lstats(&lst);
812 745
@@ -817,8 +750,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
817 750
818 ubifs_dump_lprop(c, &lp); 751 ubifs_dump_lprop(c, &lp);
819 } 752 }
820 printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", 753 pr_err("(pid %d) finish dumping LEB properties\n", current->pid);
821 current->pid);
822} 754}
823 755
824void ubifs_dump_lpt_info(struct ubifs_info *c) 756void ubifs_dump_lpt_info(struct ubifs_info *c)
@@ -826,37 +758,36 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
826 int i; 758 int i;
827 759
828 spin_lock(&dbg_lock); 760 spin_lock(&dbg_lock);
829 printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); 761 pr_err("(pid %d) dumping LPT information\n", current->pid);
830 printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); 762 pr_err("\tlpt_sz: %lld\n", c->lpt_sz);
831 printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); 763 pr_err("\tpnode_sz: %d\n", c->pnode_sz);
832 printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); 764 pr_err("\tnnode_sz: %d\n", c->nnode_sz);
833 printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); 765 pr_err("\tltab_sz: %d\n", c->ltab_sz);
834 printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); 766 pr_err("\tlsave_sz: %d\n", c->lsave_sz);
835 printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); 767 pr_err("\tbig_lpt: %d\n", c->big_lpt);
836 printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); 768 pr_err("\tlpt_hght: %d\n", c->lpt_hght);
837 printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); 769 pr_err("\tpnode_cnt: %d\n", c->pnode_cnt);
838 printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); 770 pr_err("\tnnode_cnt: %d\n", c->nnode_cnt);
839 printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); 771 pr_err("\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt);
840 printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); 772 pr_err("\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt);
841 printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); 773 pr_err("\tlsave_cnt: %d\n", c->lsave_cnt);
842 printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); 774 pr_err("\tspace_bits: %d\n", c->space_bits);
843 printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); 775 pr_err("\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits);
844 printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); 776 pr_err("\tlpt_offs_bits: %d\n", c->lpt_offs_bits);
845 printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); 777 pr_err("\tlpt_spc_bits: %d\n", c->lpt_spc_bits);
846 printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); 778 pr_err("\tpcnt_bits: %d\n", c->pcnt_bits);
847 printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); 779 pr_err("\tlnum_bits: %d\n", c->lnum_bits);
848 printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); 780 pr_err("\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs);
849 printk(KERN_ERR "\tLPT head is at %d:%d\n", 781 pr_err("\tLPT head is at %d:%d\n",
850 c->nhead_lnum, c->nhead_offs); 782 c->nhead_lnum, c->nhead_offs);
851 printk(KERN_ERR "\tLPT ltab is at %d:%d\n", 783 pr_err("\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs);
852 c->ltab_lnum, c->ltab_offs);
853 if (c->big_lpt) 784 if (c->big_lpt)
854 printk(KERN_ERR "\tLPT lsave is at %d:%d\n", 785 pr_err("\tLPT lsave is at %d:%d\n",
855 c->lsave_lnum, c->lsave_offs); 786 c->lsave_lnum, c->lsave_offs);
856 for (i = 0; i < c->lpt_lebs; i++) 787 for (i = 0; i < c->lpt_lebs; i++)
857 printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " 788 pr_err("\tLPT LEB %d free %d dirty %d tgc %d cmt %d\n",
858 "cmt %d\n", i + c->lpt_first, c->ltab[i].free, 789 i + c->lpt_first, c->ltab[i].free, c->ltab[i].dirty,
859 c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); 790 c->ltab[i].tgc, c->ltab[i].cmt);
860 spin_unlock(&dbg_lock); 791 spin_unlock(&dbg_lock);
861} 792}
862 793
@@ -865,13 +796,13 @@ void ubifs_dump_sleb(const struct ubifs_info *c,
865{ 796{
866 struct ubifs_scan_node *snod; 797 struct ubifs_scan_node *snod;
867 798
868 printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", 799 pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n",
869 current->pid, sleb->lnum, offs); 800 current->pid, sleb->lnum, offs);
870 801
871 list_for_each_entry(snod, &sleb->nodes, list) { 802 list_for_each_entry(snod, &sleb->nodes, list) {
872 cond_resched(); 803 cond_resched();
873 printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, 804 pr_err("Dumping node at LEB %d:%d len %d\n",
874 snod->offs, snod->len); 805 sleb->lnum, snod->offs, snod->len);
875 ubifs_dump_node(c, snod->node); 806 ubifs_dump_node(c, snod->node);
876 } 807 }
877} 808}
@@ -882,11 +813,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
882 struct ubifs_scan_node *snod; 813 struct ubifs_scan_node *snod;
883 void *buf; 814 void *buf;
884 815
885 if (dbg_is_tst_rcvry(c)) 816 pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
886 return;
887
888 printk(KERN_ERR "(pid %d) start dumping LEB %d\n",
889 current->pid, lnum);
890 817
891 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 818 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
892 if (!buf) { 819 if (!buf) {
@@ -900,18 +827,17 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
900 goto out; 827 goto out;
901 } 828 }
902 829
903 printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, 830 pr_err("LEB %d has %d nodes ending at %d\n", lnum,
904 sleb->nodes_cnt, sleb->endpt); 831 sleb->nodes_cnt, sleb->endpt);
905 832
906 list_for_each_entry(snod, &sleb->nodes, list) { 833 list_for_each_entry(snod, &sleb->nodes, list) {
907 cond_resched(); 834 cond_resched();
908 printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, 835 pr_err("Dumping node at LEB %d:%d len %d\n", lnum,
909 snod->offs, snod->len); 836 snod->offs, snod->len);
910 ubifs_dump_node(c, snod->node); 837 ubifs_dump_node(c, snod->node);
911 } 838 }
912 839
913 printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", 840 pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
914 current->pid, lnum);
915 ubifs_scan_destroy(sleb); 841 ubifs_scan_destroy(sleb);
916 842
917out: 843out:
@@ -932,33 +858,28 @@ void ubifs_dump_znode(const struct ubifs_info *c,
932 else 858 else
933 zbr = &c->zroot; 859 zbr = &c->zroot;
934 860
935 printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" 861 pr_err("znode %p, LEB %d:%d len %d parent %p iip %d level %d child_cnt %d flags %lx\n",
936 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, 862 znode, zbr->lnum, zbr->offs, zbr->len, znode->parent, znode->iip,
937 zbr->len, znode->parent, znode->iip, znode->level, 863 znode->level, znode->child_cnt, znode->flags);
938 znode->child_cnt, znode->flags);
939 864
940 if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) { 865 if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
941 spin_unlock(&dbg_lock); 866 spin_unlock(&dbg_lock);
942 return; 867 return;
943 } 868 }
944 869
945 printk(KERN_ERR "zbranches:\n"); 870 pr_err("zbranches:\n");
946 for (n = 0; n < znode->child_cnt; n++) { 871 for (n = 0; n < znode->child_cnt; n++) {
947 zbr = &znode->zbranch[n]; 872 zbr = &znode->zbranch[n];
948 if (znode->level > 0) 873 if (znode->level > 0)
949 printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " 874 pr_err("\t%d: znode %p LEB %d:%d len %d key %s\n",
950 "%s\n", n, zbr->znode, zbr->lnum, 875 n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
951 zbr->offs, zbr->len, 876 dbg_snprintf_key(c, &zbr->key, key_buf,
952 dbg_snprintf_key(c, &zbr->key, 877 DBG_KEY_BUF_LEN));
953 key_buf,
954 DBG_KEY_BUF_LEN));
955 else 878 else
956 printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " 879 pr_err("\t%d: LNC %p LEB %d:%d len %d key %s\n",
957 "%s\n", n, zbr->znode, zbr->lnum, 880 n, zbr->znode, zbr->lnum, zbr->offs, zbr->len,
958 zbr->offs, zbr->len, 881 dbg_snprintf_key(c, &zbr->key, key_buf,
959 dbg_snprintf_key(c, &zbr->key, 882 DBG_KEY_BUF_LEN));
960 key_buf,
961 DBG_KEY_BUF_LEN));
962 } 883 }
963 spin_unlock(&dbg_lock); 884 spin_unlock(&dbg_lock);
964} 885}
@@ -967,16 +888,16 @@ void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
967{ 888{
968 int i; 889 int i;
969 890
970 printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", 891 pr_err("(pid %d) start dumping heap cat %d (%d elements)\n",
971 current->pid, cat, heap->cnt); 892 current->pid, cat, heap->cnt);
972 for (i = 0; i < heap->cnt; i++) { 893 for (i = 0; i < heap->cnt; i++) {
973 struct ubifs_lprops *lprops = heap->arr[i]; 894 struct ubifs_lprops *lprops = heap->arr[i];
974 895
975 printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " 896 pr_err("\t%d. LEB %d hpos %d free %d dirty %d flags %d\n",
976 "flags %d\n", i, lprops->lnum, lprops->hpos, 897 i, lprops->lnum, lprops->hpos, lprops->free,
977 lprops->free, lprops->dirty, lprops->flags); 898 lprops->dirty, lprops->flags);
978 } 899 }
979 printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); 900 pr_err("(pid %d) finish dumping heap\n", current->pid);
980} 901}
981 902
982void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, 903void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
@@ -984,15 +905,15 @@ void ubifs_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
984{ 905{
985 int i; 906 int i;
986 907
987 printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); 908 pr_err("(pid %d) dumping pnode:\n", current->pid);
988 printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", 909 pr_err("\taddress %zx parent %zx cnext %zx\n",
989 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); 910 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
990 printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", 911 pr_err("\tflags %lu iip %d level %d num %d\n",
991 pnode->flags, iip, pnode->level, pnode->num); 912 pnode->flags, iip, pnode->level, pnode->num);
992 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 913 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
993 struct ubifs_lprops *lp = &pnode->lprops[i]; 914 struct ubifs_lprops *lp = &pnode->lprops[i];
994 915
995 printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", 916 pr_err("\t%d: free %d dirty %d flags %d lnum %d\n",
996 i, lp->free, lp->dirty, lp->flags, lp->lnum); 917 i, lp->free, lp->dirty, lp->flags, lp->lnum);
997 } 918 }
998} 919}
@@ -1002,20 +923,20 @@ void ubifs_dump_tnc(struct ubifs_info *c)
1002 struct ubifs_znode *znode; 923 struct ubifs_znode *znode;
1003 int level; 924 int level;
1004 925
1005 printk(KERN_ERR "\n"); 926 pr_err("\n");
1006 printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); 927 pr_err("(pid %d) start dumping TNC tree\n", current->pid);
1007 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); 928 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
1008 level = znode->level; 929 level = znode->level;
1009 printk(KERN_ERR "== Level %d ==\n", level); 930 pr_err("== Level %d ==\n", level);
1010 while (znode) { 931 while (znode) {
1011 if (level != znode->level) { 932 if (level != znode->level) {
1012 level = znode->level; 933 level = znode->level;
1013 printk(KERN_ERR "== Level %d ==\n", level); 934 pr_err("== Level %d ==\n", level);
1014 } 935 }
1015 ubifs_dump_znode(c, znode); 936 ubifs_dump_znode(c, znode);
1016 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); 937 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
1017 } 938 }
1018 printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); 939 pr_err("(pid %d) finish dumping TNC tree\n", current->pid);
1019} 940}
1020 941
1021static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, 942static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
@@ -1154,8 +1075,8 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
1154 mutex_lock(&ui->ui_mutex); 1075 mutex_lock(&ui->ui_mutex);
1155 spin_lock(&ui->ui_lock); 1076 spin_lock(&ui->ui_lock);
1156 if (ui->ui_size != ui->synced_i_size && !ui->dirty) { 1077 if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
1157 ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode " 1078 ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean",
1158 "is clean", ui->ui_size, ui->synced_i_size); 1079 ui->ui_size, ui->synced_i_size);
1159 ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, 1080 ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
1160 inode->i_mode, i_size_read(inode)); 1081 inode->i_mode, i_size_read(inode));
1161 dump_stack(); 1082 dump_stack();
@@ -1217,17 +1138,16 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
1217 kfree(pdent); 1138 kfree(pdent);
1218 1139
1219 if (i_size_read(dir) != size) { 1140 if (i_size_read(dir) != size) {
1220 ubifs_err("directory inode %lu has size %llu, " 1141 ubifs_err("directory inode %lu has size %llu, but calculated size is %llu",
1221 "but calculated size is %llu", dir->i_ino, 1142 dir->i_ino, (unsigned long long)i_size_read(dir),
1222 (unsigned long long)i_size_read(dir),
1223 (unsigned long long)size); 1143 (unsigned long long)size);
1224 ubifs_dump_inode(c, dir); 1144 ubifs_dump_inode(c, dir);
1225 dump_stack(); 1145 dump_stack();
1226 return -EINVAL; 1146 return -EINVAL;
1227 } 1147 }
1228 if (dir->i_nlink != nlink) { 1148 if (dir->i_nlink != nlink) {
1229 ubifs_err("directory inode %lu has nlink %u, but calculated " 1149 ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u",
1230 "nlink is %u", dir->i_ino, dir->i_nlink, nlink); 1150 dir->i_ino, dir->i_nlink, nlink);
1231 ubifs_dump_inode(c, dir); 1151 ubifs_dump_inode(c, dir);
1232 dump_stack(); 1152 dump_stack();
1233 return -EINVAL; 1153 return -EINVAL;
@@ -1686,8 +1606,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1686 if (znode_cb) { 1606 if (znode_cb) {
1687 err = znode_cb(c, znode, priv); 1607 err = znode_cb(c, znode, priv);
1688 if (err) { 1608 if (err) {
1689 ubifs_err("znode checking function returned " 1609 ubifs_err("znode checking function returned error %d",
1690 "error %d", err); 1610 err);
1691 ubifs_dump_znode(c, znode); 1611 ubifs_dump_znode(c, znode);
1692 goto out_dump; 1612 goto out_dump;
1693 } 1613 }
@@ -1697,9 +1617,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1697 zbr = &znode->zbranch[idx]; 1617 zbr = &znode->zbranch[idx];
1698 err = leaf_cb(c, zbr, priv); 1618 err = leaf_cb(c, zbr, priv);
1699 if (err) { 1619 if (err) {
1700 ubifs_err("leaf checking function " 1620 ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d",
1701 "returned error %d, for leaf "
1702 "at LEB %d:%d",
1703 err, zbr->lnum, zbr->offs); 1621 err, zbr->lnum, zbr->offs);
1704 goto out_dump; 1622 goto out_dump;
1705 } 1623 }
@@ -1807,8 +1725,8 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
1807 } 1725 }
1808 1726
1809 if (calc != idx_size) { 1727 if (calc != idx_size) {
1810 ubifs_err("index size check failed: calculated size is %lld, " 1728 ubifs_err("index size check failed: calculated size is %lld, should be %lld",
1811 "should be %lld", calc, idx_size); 1729 calc, idx_size);
1812 dump_stack(); 1730 dump_stack();
1813 return -EINVAL; 1731 return -EINVAL;
1814 } 1732 }
@@ -2120,8 +2038,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2120 fscki = read_add_inode(c, priv, inum); 2038 fscki = read_add_inode(c, priv, inum);
2121 if (IS_ERR(fscki)) { 2039 if (IS_ERR(fscki)) {
2122 err = PTR_ERR(fscki); 2040 err = PTR_ERR(fscki);
2123 ubifs_err("error %d while processing data node and " 2041 ubifs_err("error %d while processing data node and trying to find inode node %lu",
2124 "trying to find inode node %lu",
2125 err, (unsigned long)inum); 2042 err, (unsigned long)inum);
2126 goto out_dump; 2043 goto out_dump;
2127 } 2044 }
@@ -2131,9 +2048,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2131 blk_offs <<= UBIFS_BLOCK_SHIFT; 2048 blk_offs <<= UBIFS_BLOCK_SHIFT;
2132 blk_offs += le32_to_cpu(dn->size); 2049 blk_offs += le32_to_cpu(dn->size);
2133 if (blk_offs > fscki->size) { 2050 if (blk_offs > fscki->size) {
2134 ubifs_err("data node at LEB %d:%d is not within inode " 2051 ubifs_err("data node at LEB %d:%d is not within inode size %lld",
2135 "size %lld", zbr->lnum, zbr->offs, 2052 zbr->lnum, zbr->offs, fscki->size);
2136 fscki->size);
2137 err = -EINVAL; 2053 err = -EINVAL;
2138 goto out_dump; 2054 goto out_dump;
2139 } 2055 }
@@ -2154,8 +2070,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2154 fscki = read_add_inode(c, priv, inum); 2070 fscki = read_add_inode(c, priv, inum);
2155 if (IS_ERR(fscki)) { 2071 if (IS_ERR(fscki)) {
2156 err = PTR_ERR(fscki); 2072 err = PTR_ERR(fscki);
2157 ubifs_err("error %d while processing entry node and " 2073 ubifs_err("error %d while processing entry node and trying to find inode node %lu",
2158 "trying to find inode node %lu",
2159 err, (unsigned long)inum); 2074 err, (unsigned long)inum);
2160 goto out_dump; 2075 goto out_dump;
2161 } 2076 }
@@ -2167,8 +2082,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2167 fscki1 = read_add_inode(c, priv, inum); 2082 fscki1 = read_add_inode(c, priv, inum);
2168 if (IS_ERR(fscki1)) { 2083 if (IS_ERR(fscki1)) {
2169 err = PTR_ERR(fscki1); 2084 err = PTR_ERR(fscki1);
2170 ubifs_err("error %d while processing entry node and " 2085 ubifs_err("error %d while processing entry node and trying to find parent inode node %lu",
2171 "trying to find parent inode node %lu",
2172 err, (unsigned long)inum); 2086 err, (unsigned long)inum);
2173 goto out_dump; 2087 goto out_dump;
2174 } 2088 }
@@ -2258,61 +2172,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
2258 */ 2172 */
2259 if (fscki->inum != UBIFS_ROOT_INO && 2173 if (fscki->inum != UBIFS_ROOT_INO &&
2260 fscki->references != 1) { 2174 fscki->references != 1) {
2261 ubifs_err("directory inode %lu has %d " 2175 ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1",
2262 "direntries which refer it, but "
2263 "should be 1",
2264 (unsigned long)fscki->inum, 2176 (unsigned long)fscki->inum,
2265 fscki->references); 2177 fscki->references);
2266 goto out_dump; 2178 goto out_dump;
2267 } 2179 }
2268 if (fscki->inum == UBIFS_ROOT_INO && 2180 if (fscki->inum == UBIFS_ROOT_INO &&
2269 fscki->references != 0) { 2181 fscki->references != 0) {
2270 ubifs_err("root inode %lu has non-zero (%d) " 2182 ubifs_err("root inode %lu has non-zero (%d) direntries which refer it",
2271 "direntries which refer it",
2272 (unsigned long)fscki->inum, 2183 (unsigned long)fscki->inum,
2273 fscki->references); 2184 fscki->references);
2274 goto out_dump; 2185 goto out_dump;
2275 } 2186 }
2276 if (fscki->calc_sz != fscki->size) { 2187 if (fscki->calc_sz != fscki->size) {
2277 ubifs_err("directory inode %lu size is %lld, " 2188 ubifs_err("directory inode %lu size is %lld, but calculated size is %lld",
2278 "but calculated size is %lld",
2279 (unsigned long)fscki->inum, 2189 (unsigned long)fscki->inum,
2280 fscki->size, fscki->calc_sz); 2190 fscki->size, fscki->calc_sz);
2281 goto out_dump; 2191 goto out_dump;
2282 } 2192 }
2283 if (fscki->calc_cnt != fscki->nlink) { 2193 if (fscki->calc_cnt != fscki->nlink) {
2284 ubifs_err("directory inode %lu nlink is %d, " 2194 ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d",
2285 "but calculated nlink is %d",
2286 (unsigned long)fscki->inum, 2195 (unsigned long)fscki->inum,
2287 fscki->nlink, fscki->calc_cnt); 2196 fscki->nlink, fscki->calc_cnt);
2288 goto out_dump; 2197 goto out_dump;
2289 } 2198 }
2290 } else { 2199 } else {
2291 if (fscki->references != fscki->nlink) { 2200 if (fscki->references != fscki->nlink) {
2292 ubifs_err("inode %lu nlink is %d, but " 2201 ubifs_err("inode %lu nlink is %d, but calculated nlink is %d",
2293 "calculated nlink is %d",
2294 (unsigned long)fscki->inum, 2202 (unsigned long)fscki->inum,
2295 fscki->nlink, fscki->references); 2203 fscki->nlink, fscki->references);
2296 goto out_dump; 2204 goto out_dump;
2297 } 2205 }
2298 } 2206 }
2299 if (fscki->xattr_sz != fscki->calc_xsz) { 2207 if (fscki->xattr_sz != fscki->calc_xsz) {
2300 ubifs_err("inode %lu has xattr size %u, but " 2208 ubifs_err("inode %lu has xattr size %u, but calculated size is %lld",
2301 "calculated size is %lld",
2302 (unsigned long)fscki->inum, fscki->xattr_sz, 2209 (unsigned long)fscki->inum, fscki->xattr_sz,
2303 fscki->calc_xsz); 2210 fscki->calc_xsz);
2304 goto out_dump; 2211 goto out_dump;
2305 } 2212 }
2306 if (fscki->xattr_cnt != fscki->calc_xcnt) { 2213 if (fscki->xattr_cnt != fscki->calc_xcnt) {
2307 ubifs_err("inode %lu has %u xattrs, but " 2214 ubifs_err("inode %lu has %u xattrs, but calculated count is %lld",
2308 "calculated count is %lld",
2309 (unsigned long)fscki->inum, 2215 (unsigned long)fscki->inum,
2310 fscki->xattr_cnt, fscki->calc_xcnt); 2216 fscki->xattr_cnt, fscki->calc_xcnt);
2311 goto out_dump; 2217 goto out_dump;
2312 } 2218 }
2313 if (fscki->xattr_nms != fscki->calc_xnms) { 2219 if (fscki->xattr_nms != fscki->calc_xnms) {
2314 ubifs_err("inode %lu has xattr names' size %u, but " 2220 ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld",
2315 "calculated names' size is %lld",
2316 (unsigned long)fscki->inum, fscki->xattr_nms, 2221 (unsigned long)fscki->inum, fscki->xattr_nms,
2317 fscki->calc_xnms); 2222 fscki->calc_xnms);
2318 goto out_dump; 2223 goto out_dump;
@@ -2652,20 +2557,18 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2652 return 1; 2557 return 1;
2653} 2558}
2654 2559
2655static void cut_data(const void *buf, unsigned int len) 2560static int corrupt_data(const struct ubifs_info *c, const void *buf,
2561 unsigned int len)
2656{ 2562{
2657 unsigned int from, to, i, ffs = chance(1, 2); 2563 unsigned int from, to, i, ffs = chance(1, 2);
2658 unsigned char *p = (void *)buf; 2564 unsigned char *p = (void *)buf;
2659 2565
2660 from = random32() % (len + 1); 2566 from = random32() % (len + 1);
2661 if (chance(1, 2)) 2567 /* Corruption may only span one max. write unit */
2662 to = random32() % (len - from + 1); 2568 to = min(len, ALIGN(from, c->max_write_size));
2663 else
2664 to = len;
2665 2569
2666 if (from < to) 2570 ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
2667 ubifs_warn("filled bytes %u-%u with %s", from, to - 1, 2571 ffs ? "0xFFs" : "random data");
2668 ffs ? "0xFFs" : "random data");
2669 2572
2670 if (ffs) 2573 if (ffs)
2671 for (i = from; i < to; i++) 2574 for (i = from; i < to; i++)
@@ -2673,6 +2576,8 @@ static void cut_data(const void *buf, unsigned int len)
2673 else 2576 else
2674 for (i = from; i < to; i++) 2577 for (i = from; i < to; i++)
2675 p[i] = random32() % 0x100; 2578 p[i] = random32() % 0x100;
2579
2580 return to;
2676} 2581}
2677 2582
2678int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, 2583int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
@@ -2685,7 +2590,9 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
2685 2590
2686 failing = power_cut_emulated(c, lnum, 1); 2591 failing = power_cut_emulated(c, lnum, 1);
2687 if (failing) 2592 if (failing)
2688 cut_data(buf, len); 2593 len = corrupt_data(c, buf, len);
2594 ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
2595 len, lnum, offs);
2689 err = ubi_leb_write(c->ubi, lnum, buf, offs, len); 2596 err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
2690 if (err) 2597 if (err)
2691 return err; 2598 return err;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 8b8cc4e945f4..e03d5179769a 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -150,7 +150,7 @@ struct ubifs_global_debug_info {
150 150
151#define ubifs_assert(expr) do { \ 151#define ubifs_assert(expr) do { \
152 if (unlikely(!(expr))) { \ 152 if (unlikely(!(expr))) { \
153 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ 153 pr_crit("UBIFS assert failed in %s at %u (pid %d)\n", \
154 __func__, __LINE__, current->pid); \ 154 __func__, __LINE__, current->pid); \
155 dump_stack(); \ 155 dump_stack(); \
156 } \ 156 } \
@@ -159,26 +159,23 @@ struct ubifs_global_debug_info {
159#define ubifs_assert_cmt_locked(c) do { \ 159#define ubifs_assert_cmt_locked(c) do { \
160 if (unlikely(down_write_trylock(&(c)->commit_sem))) { \ 160 if (unlikely(down_write_trylock(&(c)->commit_sem))) { \
161 up_write(&(c)->commit_sem); \ 161 up_write(&(c)->commit_sem); \
162 printk(KERN_CRIT "commit lock is not locked!\n"); \ 162 pr_crit("commit lock is not locked!\n"); \
163 ubifs_assert(0); \ 163 ubifs_assert(0); \
164 } \ 164 } \
165} while (0) 165} while (0)
166 166
167#define ubifs_dbg_msg(type, fmt, ...) \ 167#define ubifs_dbg_msg(type, fmt, ...) \
168 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) 168 pr_debug("UBIFS DBG " type " (pid %d): " fmt "\n", current->pid, \
169 ##__VA_ARGS__)
169 170
170#define DBG_KEY_BUF_LEN 32 171#define DBG_KEY_BUF_LEN 48
171#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ 172#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
172 char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ 173 char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
173 pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ 174 pr_debug("UBIFS DBG " type " (pid %d): " fmt "%s\n", current->pid, \
175 ##__VA_ARGS__, \
174 dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ 176 dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \
175} while (0) 177} while (0)
176 178
177/* Just a debugging messages not related to any specific UBIFS subsystem */
178#define dbg_msg(fmt, ...) \
179 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
180 __func__, ##__VA_ARGS__)
181
182/* General messages */ 179/* General messages */
183#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) 180#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
184/* Additional journal messages */ 181/* Additional journal messages */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index c95681cf1b71..e271fba1651b 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -980,8 +980,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
980 * separately. 980 * separately.
981 */ 981 */
982 982
983 dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in " 983 dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu",
984 "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, 984 old_dentry->d_name.len, old_dentry->d_name.name,
985 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, 985 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
986 new_dentry->d_name.name, new_dir->i_ino); 986 new_dentry->d_name.name, new_dir->i_ino);
987 ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); 987 ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 7bd6e72afd11..5bc77817f382 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1486,8 +1486,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1486 err = ubifs_budget_space(c, &req); 1486 err = ubifs_budget_space(c, &req);
1487 if (unlikely(err)) { 1487 if (unlikely(err)) {
1488 if (err == -ENOSPC) 1488 if (err == -ENOSPC)
1489 ubifs_warn("out of space for mmapped file " 1489 ubifs_warn("out of space for mmapped file (inode number %lu)",
1490 "(inode number %lu)", inode->i_ino); 1490 inode->i_ino);
1491 return VM_FAULT_SIGBUS; 1491 return VM_FAULT_SIGBUS;
1492 } 1492 }
1493 1493
@@ -1536,6 +1536,7 @@ out_unlock:
1536static const struct vm_operations_struct ubifs_file_vm_ops = { 1536static const struct vm_operations_struct ubifs_file_vm_ops = {
1537 .fault = filemap_fault, 1537 .fault = filemap_fault,
1538 .page_mkwrite = ubifs_vm_page_mkwrite, 1538 .page_mkwrite = ubifs_vm_page_mkwrite,
1539 .remap_pages = generic_file_remap_pages,
1539}; 1540};
1540 1541
1541static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) 1542static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 04dd6f47635e..76ca53cd3eee 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -714,9 +714,9 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
714 break; 714 break;
715 } 715 }
716 716
717 dbg_gc("found LEB %d: free %d, dirty %d, sum %d " 717 dbg_gc("found LEB %d: free %d, dirty %d, sum %d (min. space %d)",
718 "(min. space %d)", lp.lnum, lp.free, lp.dirty, 718 lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty,
719 lp.free + lp.dirty, min_space); 719 min_space);
720 720
721 space_before = c->leb_size - wbuf->offs - wbuf->used; 721 space_before = c->leb_size - wbuf->offs - wbuf->used;
722 if (wbuf->lnum == -1) 722 if (wbuf->lnum == -1)
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 12c0f154ca83..afaad07f3b29 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -469,8 +469,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
469 ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 469 ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
470 ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec); 470 ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
471 ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 471 ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
472 ino->uid = cpu_to_le32(inode->i_uid); 472 ino->uid = cpu_to_le32(i_uid_read(inode));
473 ino->gid = cpu_to_le32(inode->i_gid); 473 ino->gid = cpu_to_le32(i_gid_read(inode));
474 ino->mode = cpu_to_le32(inode->i_mode); 474 ino->mode = cpu_to_le32(inode->i_mode);
475 ino->flags = cpu_to_le32(ui->flags); 475 ino->flags = cpu_to_le32(ui->flags);
476 ino->size = cpu_to_le64(ui->ui_size); 476 ino->size = cpu_to_le64(ui->ui_size);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index c80b15d6c8de..36bd4efd0819 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -315,17 +315,15 @@ static void remove_buds(struct ubifs_info *c)
315 * heads (non-closed buds). 315 * heads (non-closed buds).
316 */ 316 */
317 c->cmt_bud_bytes += wbuf->offs - bud->start; 317 c->cmt_bud_bytes += wbuf->offs - bud->start;
318 dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " 318 dbg_log("preserve %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
319 "cmt_bud_bytes %lld", bud->lnum, bud->start, 319 bud->lnum, bud->start, dbg_jhead(bud->jhead),
320 dbg_jhead(bud->jhead), wbuf->offs - bud->start, 320 wbuf->offs - bud->start, c->cmt_bud_bytes);
321 c->cmt_bud_bytes);
322 bud->start = wbuf->offs; 321 bud->start = wbuf->offs;
323 } else { 322 } else {
324 c->cmt_bud_bytes += c->leb_size - bud->start; 323 c->cmt_bud_bytes += c->leb_size - bud->start;
325 dbg_log("remove %d:%d, jhead %s, bud bytes %d, " 324 dbg_log("remove %d:%d, jhead %s, bud bytes %d, cmt_bud_bytes %lld",
326 "cmt_bud_bytes %lld", bud->lnum, bud->start, 325 bud->lnum, bud->start, dbg_jhead(bud->jhead),
327 dbg_jhead(bud->jhead), c->leb_size - bud->start, 326 c->leb_size - bud->start, c->cmt_bud_bytes);
328 c->cmt_bud_bytes);
329 rb_erase(p1, &c->buds); 327 rb_erase(p1, &c->buds);
330 /* 328 /*
331 * If the commit does not finish, the recovery will need 329 * If the commit does not finish, the recovery will need
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 86eb8e533249..e5a2a35a46dc 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -867,15 +867,15 @@ int dbg_check_cats(struct ubifs_info *c)
867 867
868 list_for_each_entry(lprops, &c->empty_list, list) { 868 list_for_each_entry(lprops, &c->empty_list, list) {
869 if (lprops->free != c->leb_size) { 869 if (lprops->free != c->leb_size) {
870 ubifs_err("non-empty LEB %d on empty list " 870 ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)",
871 "(free %d dirty %d flags %d)", lprops->lnum, 871 lprops->lnum, lprops->free, lprops->dirty,
872 lprops->free, lprops->dirty, lprops->flags); 872 lprops->flags);
873 return -EINVAL; 873 return -EINVAL;
874 } 874 }
875 if (lprops->flags & LPROPS_TAKEN) { 875 if (lprops->flags & LPROPS_TAKEN) {
876 ubifs_err("taken LEB %d on empty list " 876 ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)",
877 "(free %d dirty %d flags %d)", lprops->lnum, 877 lprops->lnum, lprops->free, lprops->dirty,
878 lprops->free, lprops->dirty, lprops->flags); 878 lprops->flags);
879 return -EINVAL; 879 return -EINVAL;
880 } 880 }
881 } 881 }
@@ -883,15 +883,15 @@ int dbg_check_cats(struct ubifs_info *c)
883 i = 0; 883 i = 0;
884 list_for_each_entry(lprops, &c->freeable_list, list) { 884 list_for_each_entry(lprops, &c->freeable_list, list) {
885 if (lprops->free + lprops->dirty != c->leb_size) { 885 if (lprops->free + lprops->dirty != c->leb_size) {
886 ubifs_err("non-freeable LEB %d on freeable list " 886 ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
887 "(free %d dirty %d flags %d)", lprops->lnum, 887 lprops->lnum, lprops->free, lprops->dirty,
888 lprops->free, lprops->dirty, lprops->flags); 888 lprops->flags);
889 return -EINVAL; 889 return -EINVAL;
890 } 890 }
891 if (lprops->flags & LPROPS_TAKEN) { 891 if (lprops->flags & LPROPS_TAKEN) {
892 ubifs_err("taken LEB %d on freeable list " 892 ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)",
893 "(free %d dirty %d flags %d)", lprops->lnum, 893 lprops->lnum, lprops->free, lprops->dirty,
894 lprops->free, lprops->dirty, lprops->flags); 894 lprops->flags);
895 return -EINVAL; 895 return -EINVAL;
896 } 896 }
897 i += 1; 897 i += 1;
@@ -913,21 +913,21 @@ int dbg_check_cats(struct ubifs_info *c)
913 913
914 list_for_each_entry(lprops, &c->frdi_idx_list, list) { 914 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
915 if (lprops->free + lprops->dirty != c->leb_size) { 915 if (lprops->free + lprops->dirty != c->leb_size) {
916 ubifs_err("non-freeable LEB %d on frdi_idx list " 916 ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
917 "(free %d dirty %d flags %d)", lprops->lnum, 917 lprops->lnum, lprops->free, lprops->dirty,
918 lprops->free, lprops->dirty, lprops->flags); 918 lprops->flags);
919 return -EINVAL; 919 return -EINVAL;
920 } 920 }
921 if (lprops->flags & LPROPS_TAKEN) { 921 if (lprops->flags & LPROPS_TAKEN) {
922 ubifs_err("taken LEB %d on frdi_idx list " 922 ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
923 "(free %d dirty %d flags %d)", lprops->lnum, 923 lprops->lnum, lprops->free, lprops->dirty,
924 lprops->free, lprops->dirty, lprops->flags); 924 lprops->flags);
925 return -EINVAL; 925 return -EINVAL;
926 } 926 }
927 if (!(lprops->flags & LPROPS_INDEX)) { 927 if (!(lprops->flags & LPROPS_INDEX)) {
928 ubifs_err("non-index LEB %d on frdi_idx list " 928 ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
929 "(free %d dirty %d flags %d)", lprops->lnum, 929 lprops->lnum, lprops->free, lprops->dirty,
930 lprops->free, lprops->dirty, lprops->flags); 930 lprops->flags);
931 return -EINVAL; 931 return -EINVAL;
932 } 932 }
933 } 933 }
@@ -982,9 +982,9 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
982 goto out; 982 goto out;
983 } 983 }
984 if (lprops != lp) { 984 if (lprops != lp) {
985 dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", 985 ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
986 (size_t)lprops, (size_t)lp, lprops->lnum, 986 (size_t)lprops, (size_t)lp, lprops->lnum,
987 lp->lnum); 987 lp->lnum);
988 err = 4; 988 err = 4;
989 goto out; 989 goto out;
990 } 990 }
@@ -1002,7 +1002,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
1002 } 1002 }
1003out: 1003out:
1004 if (err) { 1004 if (err) {
1005 dbg_msg("failed cat %d hpos %d err %d", cat, i, err); 1005 ubifs_err("failed cat %d hpos %d err %d", cat, i, err);
1006 dump_stack(); 1006 dump_stack();
1007 ubifs_dump_heap(c, heap, cat); 1007 ubifs_dump_heap(c, heap, cat);
1008 } 1008 }
@@ -1153,8 +1153,8 @@ static int scan_check_cb(struct ubifs_info *c,
1153 1153
1154 if (free > c->leb_size || free < 0 || dirty > c->leb_size || 1154 if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
1155 dirty < 0) { 1155 dirty < 0) {
1156 ubifs_err("bad calculated accounting for LEB %d: " 1156 ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d",
1157 "free %d, dirty %d", lnum, free, dirty); 1157 lnum, free, dirty);
1158 goto out_destroy; 1158 goto out_destroy;
1159 } 1159 }
1160 1160
@@ -1200,8 +1200,7 @@ static int scan_check_cb(struct ubifs_info *c,
1200 /* Free but not unmapped LEB, it's fine */ 1200 /* Free but not unmapped LEB, it's fine */
1201 is_idx = 0; 1201 is_idx = 0;
1202 else { 1202 else {
1203 ubifs_err("indexing node without indexing " 1203 ubifs_err("indexing node without indexing flag");
1204 "flag");
1205 goto out_print; 1204 goto out_print;
1206 } 1205 }
1207 } 1206 }
@@ -1236,8 +1235,7 @@ static int scan_check_cb(struct ubifs_info *c,
1236 return LPT_SCAN_CONTINUE; 1235 return LPT_SCAN_CONTINUE;
1237 1236
1238out_print: 1237out_print:
1239 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " 1238 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
1240 "should be free %d, dirty %d",
1241 lnum, lp->free, lp->dirty, lp->flags, free, dirty); 1239 lnum, lp->free, lp->dirty, lp->flags, free, dirty);
1242 ubifs_dump_leb(c, lnum); 1240 ubifs_dump_leb(c, lnum);
1243out_destroy: 1241out_destroy:
@@ -1290,12 +1288,10 @@ int dbg_check_lprops(struct ubifs_info *c)
1290 lst.total_dirty != c->lst.total_dirty || 1288 lst.total_dirty != c->lst.total_dirty ||
1291 lst.total_used != c->lst.total_used) { 1289 lst.total_used != c->lst.total_used) {
1292 ubifs_err("bad overall accounting"); 1290 ubifs_err("bad overall accounting");
1293 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " 1291 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
1294 "total_free %lld, total_dirty %lld, total_used %lld",
1295 lst.empty_lebs, lst.idx_lebs, lst.total_free, 1292 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1296 lst.total_dirty, lst.total_used); 1293 lst.total_dirty, lst.total_used);
1297 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " 1294 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
1298 "total_free %lld, total_dirty %lld, total_used %lld",
1299 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1295 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
1300 c->lst.total_dirty, c->lst.total_used); 1296 c->lst.total_dirty, c->lst.total_used);
1301 err = -EINVAL; 1297 err = -EINVAL;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ce33b2beb151..d46b19ec1815 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1311,7 +1311,7 @@ out:
1311 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); 1311 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
1312 ubifs_dump_pnode(c, pnode, parent, iip); 1312 ubifs_dump_pnode(c, pnode, parent, iip);
1313 dump_stack(); 1313 dump_stack();
1314 dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); 1314 ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
1315 kfree(pnode); 1315 kfree(pnode);
1316 return err; 1316 return err;
1317} 1317}
@@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
1749 return 0; 1749 return 0;
1750 1750
1751out_err: 1751out_err:
1752 ubifs_lpt_free(c, 0); 1752 if (wr)
1753 ubifs_lpt_free(c, 1);
1754 if (rd)
1755 ubifs_lpt_free(c, 0);
1753 return err; 1756 return err;
1754} 1757}
1755 1758
@@ -2234,8 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
2234 /* cnode is a nnode */ 2237 /* cnode is a nnode */
2235 num = calc_nnode_num(row, col); 2238 num = calc_nnode_num(row, col);
2236 if (cnode->num != num) { 2239 if (cnode->num != num) {
2237 ubifs_err("nnode num %d expected %d " 2240 ubifs_err("nnode num %d expected %d parent num %d iip %d",
2238 "parent num %d iip %d",
2239 cnode->num, num, 2241 cnode->num, num,
2240 (nnode ? nnode->num : 0), cnode->iip); 2242 (nnode ? nnode->num : 0), cnode->iip);
2241 return -EINVAL; 2243 return -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 4fa70734e6e7..9daaeef675dd 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -320,8 +320,8 @@ static int layout_cnodes(struct ubifs_info *c)
320 return 0; 320 return 0;
321 321
322no_space: 322no_space:
323 ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " 323 ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
324 "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); 324 lnum, offs, len, done_ltab, done_lsave);
325 ubifs_dump_lpt_info(c); 325 ubifs_dump_lpt_info(c);
326 ubifs_dump_lpt_lebs(c); 326 ubifs_dump_lpt_lebs(c);
327 dump_stack(); 327 dump_stack();
@@ -545,8 +545,8 @@ static int write_cnodes(struct ubifs_info *c)
545 return 0; 545 return 0;
546 546
547no_space: 547no_space:
548 ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " 548 ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
549 "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); 549 lnum, offs, len, done_ltab, done_lsave);
550 ubifs_dump_lpt_info(c); 550 ubifs_dump_lpt_info(c);
551 ubifs_dump_lpt_lebs(c); 551 ubifs_dump_lpt_lebs(c);
552 dump_stack(); 552 dump_stack();
@@ -1662,21 +1662,19 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1662 continue; 1662 continue;
1663 } 1663 }
1664 if (!dbg_is_all_ff(p, len)) { 1664 if (!dbg_is_all_ff(p, len)) {
1665 dbg_msg("invalid empty space in LEB %d at %d", 1665 ubifs_err("invalid empty space in LEB %d at %d",
1666 lnum, c->leb_size - len); 1666 lnum, c->leb_size - len);
1667 err = -EINVAL; 1667 err = -EINVAL;
1668 } 1668 }
1669 i = lnum - c->lpt_first; 1669 i = lnum - c->lpt_first;
1670 if (len != c->ltab[i].free) { 1670 if (len != c->ltab[i].free) {
1671 dbg_msg("invalid free space in LEB %d " 1671 ubifs_err("invalid free space in LEB %d (free %d, expected %d)",
1672 "(free %d, expected %d)", 1672 lnum, len, c->ltab[i].free);
1673 lnum, len, c->ltab[i].free);
1674 err = -EINVAL; 1673 err = -EINVAL;
1675 } 1674 }
1676 if (dirty != c->ltab[i].dirty) { 1675 if (dirty != c->ltab[i].dirty) {
1677 dbg_msg("invalid dirty space in LEB %d " 1676 ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)",
1678 "(dirty %d, expected %d)", 1677 lnum, dirty, c->ltab[i].dirty);
1679 lnum, dirty, c->ltab[i].dirty);
1680 err = -EINVAL; 1678 err = -EINVAL;
1681 } 1679 }
1682 goto out; 1680 goto out;
@@ -1888,8 +1886,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1888 int err, len = c->leb_size, node_type, node_num, node_len, offs; 1886 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1889 void *buf, *p; 1887 void *buf, *p;
1890 1888
1891 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 1889 pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
1892 current->pid, lnum);
1893 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1890 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1894 if (!buf) { 1891 if (!buf) {
1895 ubifs_err("cannot allocate memory to dump LPT"); 1892 ubifs_err("cannot allocate memory to dump LPT");
@@ -1907,14 +1904,14 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1907 1904
1908 pad_len = get_pad_len(c, p, len); 1905 pad_len = get_pad_len(c, p, len);
1909 if (pad_len) { 1906 if (pad_len) {
1910 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", 1907 pr_err("LEB %d:%d, pad %d bytes\n",
1911 lnum, offs, pad_len); 1908 lnum, offs, pad_len);
1912 p += pad_len; 1909 p += pad_len;
1913 len -= pad_len; 1910 len -= pad_len;
1914 continue; 1911 continue;
1915 } 1912 }
1916 if (len) 1913 if (len)
1917 printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", 1914 pr_err("LEB %d:%d, free %d bytes\n",
1918 lnum, offs, len); 1915 lnum, offs, len);
1919 break; 1916 break;
1920 } 1917 }
@@ -1925,11 +1922,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1925 { 1922 {
1926 node_len = c->pnode_sz; 1923 node_len = c->pnode_sz;
1927 if (c->big_lpt) 1924 if (c->big_lpt)
1928 printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", 1925 pr_err("LEB %d:%d, pnode num %d\n",
1929 lnum, offs, node_num); 1926 lnum, offs, node_num);
1930 else 1927 else
1931 printk(KERN_DEBUG "LEB %d:%d, pnode\n", 1928 pr_err("LEB %d:%d, pnode\n", lnum, offs);
1932 lnum, offs);
1933 break; 1929 break;
1934 } 1930 }
1935 case UBIFS_LPT_NNODE: 1931 case UBIFS_LPT_NNODE:
@@ -1939,29 +1935,28 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1939 1935
1940 node_len = c->nnode_sz; 1936 node_len = c->nnode_sz;
1941 if (c->big_lpt) 1937 if (c->big_lpt)
1942 printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", 1938 pr_err("LEB %d:%d, nnode num %d, ",
1943 lnum, offs, node_num); 1939 lnum, offs, node_num);
1944 else 1940 else
1945 printk(KERN_DEBUG "LEB %d:%d, nnode, ", 1941 pr_err("LEB %d:%d, nnode, ",
1946 lnum, offs); 1942 lnum, offs);
1947 err = ubifs_unpack_nnode(c, p, &nnode); 1943 err = ubifs_unpack_nnode(c, p, &nnode);
1948 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 1944 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1949 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, 1945 pr_cont("%d:%d", nnode.nbranch[i].lnum,
1950 nnode.nbranch[i].offs); 1946 nnode.nbranch[i].offs);
1951 if (i != UBIFS_LPT_FANOUT - 1) 1947 if (i != UBIFS_LPT_FANOUT - 1)
1952 printk(KERN_CONT ", "); 1948 pr_cont(", ");
1953 } 1949 }
1954 printk(KERN_CONT "\n"); 1950 pr_cont("\n");
1955 break; 1951 break;
1956 } 1952 }
1957 case UBIFS_LPT_LTAB: 1953 case UBIFS_LPT_LTAB:
1958 node_len = c->ltab_sz; 1954 node_len = c->ltab_sz;
1959 printk(KERN_DEBUG "LEB %d:%d, ltab\n", 1955 pr_err("LEB %d:%d, ltab\n", lnum, offs);
1960 lnum, offs);
1961 break; 1956 break;
1962 case UBIFS_LPT_LSAVE: 1957 case UBIFS_LPT_LSAVE:
1963 node_len = c->lsave_sz; 1958 node_len = c->lsave_sz;
1964 printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); 1959 pr_err("LEB %d:%d, lsave len\n", lnum, offs);
1965 break; 1960 break;
1966 default: 1961 default:
1967 ubifs_err("LPT node type %d not recognized", node_type); 1962 ubifs_err("LPT node type %d not recognized", node_type);
@@ -1972,8 +1967,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1972 len -= node_len; 1967 len -= node_len;
1973 } 1968 }
1974 1969
1975 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 1970 pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
1976 current->pid, lnum);
1977out: 1971out:
1978 vfree(buf); 1972 vfree(buf);
1979 return; 1973 return;
@@ -1990,12 +1984,10 @@ void ubifs_dump_lpt_lebs(const struct ubifs_info *c)
1990{ 1984{
1991 int i; 1985 int i;
1992 1986
1993 printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", 1987 pr_err("(pid %d) start dumping all LPT LEBs\n", current->pid);
1994 current->pid);
1995 for (i = 0; i < c->lpt_lebs; i++) 1988 for (i = 0; i < c->lpt_lebs; i++)
1996 dump_lpt_leb(c, i + c->lpt_first); 1989 dump_lpt_leb(c, i + c->lpt_first);
1997 printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", 1990 pr_err("(pid %d) finish dumping all LPT LEBs\n", current->pid);
1998 current->pid);
1999} 1991}
2000 1992
2001/** 1993/**
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index cebf17ea0458..769701ccb5c9 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -562,8 +562,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
562 562
563 list_for_each_entry(snod, &sleb->nodes, list) { 563 list_for_each_entry(snod, &sleb->nodes, list) {
564 if (snod->type != UBIFS_ORPH_NODE) { 564 if (snod->type != UBIFS_ORPH_NODE) {
565 ubifs_err("invalid node type %d in orphan area at " 565 ubifs_err("invalid node type %d in orphan area at %d:%d",
566 "%d:%d", snod->type, sleb->lnum, snod->offs); 566 snod->type, sleb->lnum, snod->offs);
567 ubifs_dump_node(c, snod->node); 567 ubifs_dump_node(c, snod->node);
568 return -EINVAL; 568 return -EINVAL;
569 } 569 }
@@ -589,8 +589,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
589 * number. That makes this orphan node, out of date. 589 * number. That makes this orphan node, out of date.
590 */ 590 */
591 if (!first) { 591 if (!first) {
592 ubifs_err("out of order commit number %llu in " 592 ubifs_err("out of order commit number %llu in orphan node at %d:%d",
593 "orphan node at %d:%d",
594 cmt_no, sleb->lnum, snod->offs); 593 cmt_no, sleb->lnum, snod->offs);
595 ubifs_dump_node(c, snod->node); 594 ubifs_dump_node(c, snod->node);
596 return -EINVAL; 595 return -EINVAL;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c30d976b4be8..065096e36ed9 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -609,7 +609,8 @@ static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
609 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, 609 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
610 list); 610 list);
611 611
612 dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); 612 dbg_rcvry("dropping last node at %d:%d",
613 sleb->lnum, snod->offs);
613 *offs = snod->offs; 614 *offs = snod->offs;
614 list_del(&snod->list); 615 list_del(&snod->list);
615 kfree(snod); 616 kfree(snod);
@@ -702,8 +703,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
702 * See header comment for this file for more 703 * See header comment for this file for more
703 * explanations about the reasons we have this check. 704 * explanations about the reasons we have this check.
704 */ 705 */
705 ubifs_err("corrupt empty space LEB %d:%d, corruption " 706 ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d",
706 "starts at %d", lnum, offs, corruption); 707 lnum, offs, corruption);
707 /* Make sure we dump interesting non-0xFF data */ 708 /* Make sure we dump interesting non-0xFF data */
708 offs += corruption; 709 offs += corruption;
709 buf += corruption; 710 buf += corruption;
@@ -788,7 +789,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
788 789
789corrupted_rescan: 790corrupted_rescan:
790 /* Re-scan the corrupted data with verbose messages */ 791 /* Re-scan the corrupted data with verbose messages */
791 ubifs_err("corruptio %d", ret); 792 ubifs_err("corruption %d", ret);
792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 793 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
793corrupted: 794corrupted:
794 ubifs_scanned_corruption(c, lnum, offs, buf); 795 ubifs_scanned_corruption(c, lnum, offs, buf);
@@ -899,8 +900,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
899 } 900 }
900 } 901 }
901 if (snod->sqnum > cs_sqnum) { 902 if (snod->sqnum > cs_sqnum) {
902 ubifs_err("unrecoverable log corruption " 903 ubifs_err("unrecoverable log corruption in LEB %d",
903 "in LEB %d", lnum); 904 lnum);
904 ubifs_scan_destroy(sleb); 905 ubifs_scan_destroy(sleb);
905 return ERR_PTR(-EUCLEAN); 906 return ERR_PTR(-EUCLEAN);
906 } 907 }
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eba46d4a7619..3187925e9879 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -141,9 +141,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
141 * during the replay. 141 * during the replay.
142 */ 142 */
143 if (dirty != 0) 143 if (dirty != 0)
144 dbg_msg("LEB %d lp: %d free %d dirty " 144 dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
145 "replay: %d free %d dirty", b->bud->lnum, 145 b->bud->lnum, lp->free, lp->dirty, b->free,
146 lp->free, lp->dirty, b->free, b->dirty); 146 b->dirty);
147 } 147 }
148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, 148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
149 lp->flags | LPROPS_TAKEN, 0); 149 lp->flags | LPROPS_TAKEN, 0);
@@ -677,7 +677,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
677 677
678 b->dirty = sleb->endpt - offs - used; 678 b->dirty = sleb->endpt - offs - used;
679 b->free = c->leb_size - sleb->endpt; 679 b->free = c->leb_size - sleb->endpt;
680 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); 680 dbg_mnt("bud LEB %d replied: dirty %d, free %d",
681 lnum, b->dirty, b->free);
681 682
682out: 683out:
683 ubifs_scan_destroy(sleb); 684 ubifs_scan_destroy(sleb);
@@ -865,8 +866,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
865 goto out_dump; 866 goto out_dump;
866 } 867 }
867 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
868 ubifs_err("first CS node at LEB %d:%d has wrong " 869 ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
869 "commit number %llu expected %llu",
870 lnum, offs, 870 lnum, offs,
871 (unsigned long long)le64_to_cpu(node->cmt_no), 871 (unsigned long long)le64_to_cpu(node->cmt_no),
872 c->cmt_no); 872 c->cmt_no);
@@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c)
1026 c->replaying = 1; 1026 c->replaying = 1;
1027 lnum = c->ltail_lnum = c->lhead_lnum; 1027 lnum = c->ltail_lnum = c->lhead_lnum;
1028 1028
1029 lnum = UBIFS_LOG_LNUM;
1030 do { 1029 do {
1031 err = replay_log_leb(c, lnum, 0, c->sbuf); 1030 err = replay_log_leb(c, lnum, 0, c->sbuf);
1032 if (err == 1) 1031 if (err == 1)
@@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1035 if (err) 1034 if (err)
1036 goto out; 1035 goto out;
1037 lnum = ubifs_next_log_lnum(c, lnum); 1036 lnum = ubifs_next_log_lnum(c, lnum);
1038 } while (lnum != UBIFS_LOG_LNUM); 1037 } while (lnum != c->ltail_lnum);
1039 1038
1040 err = replay_buds(c); 1039 err = replay_buds(c);
1041 if (err) 1040 if (err)
@@ -1059,8 +1058,8 @@ int ubifs_replay_journal(struct ubifs_info *c)
1059 c->bi.uncommitted_idx *= c->max_idx_node_sz; 1058 c->bi.uncommitted_idx *= c->max_idx_node_sz;
1060 1059
1061 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1060 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1062 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1061 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
1063 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1062 c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1064 (unsigned long)c->highest_inum); 1063 (unsigned long)c->highest_inum);
1065out: 1064out:
1066 destroy_replay_list(c); 1065 destroy_replay_list(c);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 15e2fc5aa60b..4c37607a958e 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -391,9 +391,8 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
391 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; 391 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
392 392
393 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { 393 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
394 ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " 394 ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
395 "%d minimum required", c->leb_cnt, c->vi.size, 395 c->leb_cnt, c->vi.size, min_leb_cnt);
396 min_leb_cnt);
397 goto failed; 396 goto failed;
398 } 397 }
399 398
@@ -411,15 +410,14 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
411 410
412 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; 411 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
413 if (c->max_bud_bytes < max_bytes) { 412 if (c->max_bud_bytes < max_bytes) {
414 ubifs_err("too small journal (%lld bytes), must be at least " 413 ubifs_err("too small journal (%lld bytes), must be at least %lld bytes",
415 "%lld bytes", c->max_bud_bytes, max_bytes); 414 c->max_bud_bytes, max_bytes);
416 goto failed; 415 goto failed;
417 } 416 }
418 417
419 max_bytes = (long long)c->leb_size * c->main_lebs; 418 max_bytes = (long long)c->leb_size * c->main_lebs;
420 if (c->max_bud_bytes > max_bytes) { 419 if (c->max_bud_bytes > max_bytes) {
421 ubifs_err("too large journal size (%lld bytes), only %lld bytes" 420 ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area",
422 "available in the main area",
423 c->max_bud_bytes, max_bytes); 421 c->max_bud_bytes, max_bytes);
424 goto failed; 422 goto failed;
425 } 423 }
@@ -549,10 +547,9 @@ int ubifs_read_superblock(struct ubifs_info *c)
549 ubifs_assert(!c->ro_media || c->ro_mount); 547 ubifs_assert(!c->ro_media || c->ro_mount);
550 if (!c->ro_mount || 548 if (!c->ro_mount ||
551 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { 549 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
552 ubifs_err("on-flash format version is w%d/r%d, but " 550 ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
553 "software only supports up to version " 551 c->fmt_version, c->ro_compat_version,
554 "w%d/r%d", c->fmt_version, 552 UBIFS_FORMAT_VERSION,
555 c->ro_compat_version, UBIFS_FORMAT_VERSION,
556 UBIFS_RO_COMPAT_VERSION); 553 UBIFS_RO_COMPAT_VERSION);
557 if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { 554 if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
558 ubifs_msg("only R/O mounting is possible"); 555 ubifs_msg("only R/O mounting is possible");
@@ -611,8 +608,8 @@ int ubifs_read_superblock(struct ubifs_info *c)
611 c->fanout = le32_to_cpu(sup->fanout); 608 c->fanout = le32_to_cpu(sup->fanout);
612 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); 609 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
613 c->rp_size = le64_to_cpu(sup->rp_size); 610 c->rp_size = le64_to_cpu(sup->rp_size);
614 c->rp_uid = le32_to_cpu(sup->rp_uid); 611 c->rp_uid = make_kuid(&init_user_ns, le32_to_cpu(sup->rp_uid));
615 c->rp_gid = le32_to_cpu(sup->rp_gid); 612 c->rp_gid = make_kgid(&init_user_ns, le32_to_cpu(sup->rp_gid));
616 sup_flags = le32_to_cpu(sup->flags); 613 sup_flags = le32_to_cpu(sup->flags);
617 if (!c->mount_opts.override_compr) 614 if (!c->mount_opts.override_compr)
618 c->default_compr = le16_to_cpu(sup->default_compr); 615 c->default_compr = le16_to_cpu(sup->default_compr);
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 7c40e6025fd6..58aa05df2bb6 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -75,7 +75,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
75 magic = le32_to_cpu(ch->magic); 75 magic = le32_to_cpu(ch->magic);
76 76
77 if (magic == 0xFFFFFFFF) { 77 if (magic == 0xFFFFFFFF) {
78 dbg_scan("hit empty space"); 78 dbg_scan("hit empty space at LEB %d:%d", lnum, offs);
79 return SCANNED_EMPTY_SPACE; 79 return SCANNED_EMPTY_SPACE;
80 } 80 }
81 81
@@ -85,7 +85,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
85 if (len < UBIFS_CH_SZ) 85 if (len < UBIFS_CH_SZ)
86 return SCANNED_GARBAGE; 86 return SCANNED_GARBAGE;
87 87
88 dbg_scan("scanning %s", dbg_ntype(ch->node_type)); 88 dbg_scan("scanning %s at LEB %d:%d",
89 dbg_ntype(ch->node_type), lnum, offs);
89 90
90 if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) 91 if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
91 return SCANNED_A_CORRUPT_NODE; 92 return SCANNED_A_CORRUPT_NODE;
@@ -114,8 +115,8 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
114 return SCANNED_A_BAD_PAD_NODE; 115 return SCANNED_A_BAD_PAD_NODE;
115 } 116 }
116 117
117 dbg_scan("%d bytes padded, offset now %d", 118 dbg_scan("%d bytes padded at LEB %d:%d, offset now %d", pad_len,
118 pad_len, ALIGN(offs + node_len + pad_len, 8)); 119 lnum, offs, ALIGN(offs + node_len + pad_len, 8));
119 120
120 return node_len + pad_len; 121 return node_len + pad_len;
121 } 122 }
@@ -150,8 +151,8 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
150 151
151 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); 152 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
152 if (err && err != -EBADMSG) { 153 if (err && err != -EBADMSG) {
153 ubifs_err("cannot read %d bytes from LEB %d:%d," 154 ubifs_err("cannot read %d bytes from LEB %d:%d, error %d",
154 " error %d", c->leb_size - offs, lnum, offs, err); 155 c->leb_size - offs, lnum, offs, err);
155 kfree(sleb); 156 kfree(sleb);
156 return ERR_PTR(err); 157 return ERR_PTR(err);
157 } 158 }
@@ -240,8 +241,6 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
240 int len; 241 int len;
241 242
242 ubifs_err("corruption at LEB %d:%d", lnum, offs); 243 ubifs_err("corruption at LEB %d:%d", lnum, offs);
243 if (dbg_is_tst_rcvry(c))
244 return;
245 len = c->leb_size - offs; 244 len = c->leb_size - offs;
246 if (len > 8192) 245 if (len > 8192)
247 len = 8192; 246 len = 8192;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c3fa6c5327a3..ddc0f6ae65e9 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -89,9 +89,8 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
89 return 5; 89 return 5;
90 90
91 if (!ubifs_compr_present(ui->compr_type)) { 91 if (!ubifs_compr_present(ui->compr_type)) {
92 ubifs_warn("inode %lu uses '%s' compression, but it was not " 92 ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in",
93 "compiled in", inode->i_ino, 93 inode->i_ino, ubifs_compr_name(ui->compr_type));
94 ubifs_compr_name(ui->compr_type));
95 } 94 }
96 95
97 err = dbg_check_dir(c, inode); 96 err = dbg_check_dir(c, inode);
@@ -130,8 +129,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
130 129
131 inode->i_flags |= (S_NOCMTIME | S_NOATIME); 130 inode->i_flags |= (S_NOCMTIME | S_NOATIME);
132 set_nlink(inode, le32_to_cpu(ino->nlink)); 131 set_nlink(inode, le32_to_cpu(ino->nlink));
133 inode->i_uid = le32_to_cpu(ino->uid); 132 i_uid_write(inode, le32_to_cpu(ino->uid));
134 inode->i_gid = le32_to_cpu(ino->gid); 133 i_gid_write(inode, le32_to_cpu(ino->gid));
135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); 134 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
136 inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); 135 inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
137 inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec); 136 inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec);
@@ -1061,8 +1060,8 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
1061 1060
1062 flag = parse_standard_option(p); 1061 flag = parse_standard_option(p);
1063 if (!flag) { 1062 if (!flag) {
1064 ubifs_err("unrecognized mount option \"%s\" " 1063 ubifs_err("unrecognized mount option \"%s\" or missing value",
1065 "or missing value", p); 1064 p);
1066 return -EINVAL; 1065 return -EINVAL;
1067 } 1066 }
1068 sb->s_flags |= flag; 1067 sb->s_flags |= flag;
@@ -1124,8 +1123,8 @@ again:
1124 } 1123 }
1125 1124
1126 /* Just disable bulk-read */ 1125 /* Just disable bulk-read */
1127 ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, " 1126 ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it",
1128 "disabling it", c->max_bu_buf_len); 1127 c->max_bu_buf_len);
1129 c->mount_opts.bulk_read = 1; 1128 c->mount_opts.bulk_read = 1;
1130 c->bulk_read = 0; 1129 c->bulk_read = 0;
1131 return; 1130 return;
@@ -1157,14 +1156,11 @@ static int check_free_space(struct ubifs_info *c)
1157 * 1156 *
1158 * This function mounts UBIFS file system. Returns zero in case of success and 1157 * This function mounts UBIFS file system. Returns zero in case of success and
1159 * a negative error code in case of failure. 1158 * a negative error code in case of failure.
1160 *
1161 * Note, the function does not de-allocate resources it it fails half way
1162 * through, and the caller has to do this instead.
1163 */ 1159 */
1164static int mount_ubifs(struct ubifs_info *c) 1160static int mount_ubifs(struct ubifs_info *c)
1165{ 1161{
1166 int err; 1162 int err;
1167 long long x; 1163 long long x, y;
1168 size_t sz; 1164 size_t sz;
1169 1165
1170 c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); 1166 c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
@@ -1414,75 +1410,69 @@ static int mount_ubifs(struct ubifs_info *c)
1414 1410
1415 c->mounting = 0; 1411 c->mounting = 0;
1416 1412
1417 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", 1413 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s",
1418 c->vi.ubi_num, c->vi.vol_id, c->vi.name); 1414 c->vi.ubi_num, c->vi.vol_id, c->vi.name,
1419 if (c->ro_mount) 1415 c->ro_mount ? ", R/O mode" : NULL);
1420 ubifs_msg("mounted read-only");
1421 x = (long long)c->main_lebs * c->leb_size; 1416 x = (long long)c->main_lebs * c->leb_size;
1422 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " 1417 y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1423 "LEBs)", x, x >> 10, x >> 20, c->main_lebs); 1418 ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
1424 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; 1419 c->leb_size, c->leb_size >> 10, c->min_io_size,
1425 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " 1420 c->max_write_size);
1426 "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); 1421 ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
1427 ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", 1422 x, x >> 20, c->main_lebs,
1423 y, y >> 20, c->log_lebs + c->max_bud_cnt);
1424 ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
1425 c->report_rp_size, c->report_rp_size >> 10);
1426 ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
1428 c->fmt_version, c->ro_compat_version, 1427 c->fmt_version, c->ro_compat_version,
1429 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); 1428 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
1430 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); 1429 c->big_lpt ? ", big LPT model" : ", small LPT model");
1431 ubifs_msg("reserved for root: %llu bytes (%llu KiB)", 1430
1432 c->report_rp_size, c->report_rp_size >> 10); 1431 dbg_gen("default compressor: %s", ubifs_compr_name(c->default_compr));
1433 1432 dbg_gen("data journal heads: %d",
1434 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1435 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1436 dbg_msg("max. write size: %d bytes", c->max_write_size);
1437 dbg_msg("LEB size: %d bytes (%d KiB)",
1438 c->leb_size, c->leb_size >> 10);
1439 dbg_msg("data journal heads: %d",
1440 c->jhead_cnt - NONDATA_JHEADS_CNT); 1433 c->jhead_cnt - NONDATA_JHEADS_CNT);
1441 dbg_msg("UUID: %pUB", c->uuid); 1434 dbg_gen("log LEBs: %d (%d - %d)",
1442 dbg_msg("big_lpt %d", c->big_lpt);
1443 dbg_msg("log LEBs: %d (%d - %d)",
1444 c->log_lebs, UBIFS_LOG_LNUM, c->log_last); 1435 c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
1445 dbg_msg("LPT area LEBs: %d (%d - %d)", 1436 dbg_gen("LPT area LEBs: %d (%d - %d)",
1446 c->lpt_lebs, c->lpt_first, c->lpt_last); 1437 c->lpt_lebs, c->lpt_first, c->lpt_last);
1447 dbg_msg("orphan area LEBs: %d (%d - %d)", 1438 dbg_gen("orphan area LEBs: %d (%d - %d)",
1448 c->orph_lebs, c->orph_first, c->orph_last); 1439 c->orph_lebs, c->orph_first, c->orph_last);
1449 dbg_msg("main area LEBs: %d (%d - %d)", 1440 dbg_gen("main area LEBs: %d (%d - %d)",
1450 c->main_lebs, c->main_first, c->leb_cnt - 1); 1441 c->main_lebs, c->main_first, c->leb_cnt - 1);
1451 dbg_msg("index LEBs: %d", c->lst.idx_lebs); 1442 dbg_gen("index LEBs: %d", c->lst.idx_lebs);
1452 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", 1443 dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)",
1453 c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, 1444 c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
1454 c->bi.old_idx_sz >> 20); 1445 c->bi.old_idx_sz >> 20);
1455 dbg_msg("key hash type: %d", c->key_hash_type); 1446 dbg_gen("key hash type: %d", c->key_hash_type);
1456 dbg_msg("tree fanout: %d", c->fanout); 1447 dbg_gen("tree fanout: %d", c->fanout);
1457 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1448 dbg_gen("reserved GC LEB: %d", c->gc_lnum);
1458 dbg_msg("first main LEB: %d", c->main_first); 1449 dbg_gen("max. znode size %d", c->max_znode_sz);
1459 dbg_msg("max. znode size %d", c->max_znode_sz); 1450 dbg_gen("max. index node size %d", c->max_idx_node_sz);
1460 dbg_msg("max. index node size %d", c->max_idx_node_sz); 1451 dbg_gen("node sizes: data %zu, inode %zu, dentry %zu",
1461 dbg_msg("node sizes: data %zu, inode %zu, dentry %zu",
1462 UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); 1452 UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);
1463 dbg_msg("node sizes: trun %zu, sb %zu, master %zu", 1453 dbg_gen("node sizes: trun %zu, sb %zu, master %zu",
1464 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); 1454 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
1465 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1455 dbg_gen("node sizes: ref %zu, cmt. start %zu, orph %zu",
1466 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1456 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1467 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", 1457 dbg_gen("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1468 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1458 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1469 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); 1459 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1470 dbg_msg("dead watermark: %d", c->dead_wm); 1460 dbg_gen("dead watermark: %d", c->dead_wm);
1471 dbg_msg("dark watermark: %d", c->dark_wm); 1461 dbg_gen("dark watermark: %d", c->dark_wm);
1472 dbg_msg("LEB overhead: %d", c->leb_overhead); 1462 dbg_gen("LEB overhead: %d", c->leb_overhead);
1473 x = (long long)c->main_lebs * c->dark_wm; 1463 x = (long long)c->main_lebs * c->dark_wm;
1474 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", 1464 dbg_gen("max. dark space: %lld (%lld KiB, %lld MiB)",
1475 x, x >> 10, x >> 20); 1465 x, x >> 10, x >> 20);
1476 dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)", 1466 dbg_gen("maximum bud bytes: %lld (%lld KiB, %lld MiB)",
1477 c->max_bud_bytes, c->max_bud_bytes >> 10, 1467 c->max_bud_bytes, c->max_bud_bytes >> 10,
1478 c->max_bud_bytes >> 20); 1468 c->max_bud_bytes >> 20);
1479 dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", 1469 dbg_gen("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
1480 c->bg_bud_bytes, c->bg_bud_bytes >> 10, 1470 c->bg_bud_bytes, c->bg_bud_bytes >> 10,
1481 c->bg_bud_bytes >> 20); 1471 c->bg_bud_bytes >> 20);
1482 dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)", 1472 dbg_gen("current bud bytes %lld (%lld KiB, %lld MiB)",
1483 c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); 1473 c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
1484 dbg_msg("max. seq. number: %llu", c->max_sqnum); 1474 dbg_gen("max. seq. number: %llu", c->max_sqnum);
1485 dbg_msg("commit number: %llu", c->cmt_no); 1475 dbg_gen("commit number: %llu", c->cmt_no);
1486 1476
1487 return 0; 1477 return 0;
1488 1478
@@ -1567,10 +1557,9 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1567 1557
1568 if (c->rw_incompat) { 1558 if (c->rw_incompat) {
1569 ubifs_err("the file-system is not R/W-compatible"); 1559 ubifs_err("the file-system is not R/W-compatible");
1570 ubifs_msg("on-flash format version is w%d/r%d, but software " 1560 ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
1571 "only supports up to version w%d/r%d", c->fmt_version, 1561 c->fmt_version, c->ro_compat_version,
1572 c->ro_compat_version, UBIFS_FORMAT_VERSION, 1562 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
1573 UBIFS_RO_COMPAT_VERSION);
1574 return -EROFS; 1563 return -EROFS;
1575 } 1564 }
1576 1565
@@ -1831,8 +1820,8 @@ static void ubifs_put_super(struct super_block *sb)
1831 * next mount, so we just print a message and 1820 * next mount, so we just print a message and
1832 * continue to unmount normally. 1821 * continue to unmount normally.
1833 */ 1822 */
1834 ubifs_err("failed to write master node, " 1823 ubifs_err("failed to write master node, error %d",
1835 "error %d", err); 1824 err);
1836 } else { 1825 } else {
1837 for (i = 0; i < c->jhead_cnt; i++) 1826 for (i = 0; i < c->jhead_cnt; i++)
1838 /* Make sure write-buffer timers are canceled */ 1827 /* Make sure write-buffer timers are canceled */
@@ -2251,8 +2240,7 @@ static int __init ubifs_init(void)
2251 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. 2240 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
2252 */ 2241 */
2253 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { 2242 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
2254 ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" 2243 ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
2255 " at least 4096 bytes",
2256 (unsigned int)PAGE_CACHE_SIZE); 2244 (unsigned int)PAGE_CACHE_SIZE);
2257 return -EINVAL; 2245 return -EINVAL;
2258 } 2246 }
@@ -2301,6 +2289,12 @@ static void __exit ubifs_exit(void)
2301 dbg_debugfs_exit(); 2289 dbg_debugfs_exit();
2302 ubifs_compressors_exit(); 2290 ubifs_compressors_exit();
2303 unregister_shrinker(&ubifs_shrinker_info); 2291 unregister_shrinker(&ubifs_shrinker_info);
2292
2293 /*
2294 * Make sure all delayed rcu free inodes are flushed before we
2295 * destroy cache.
2296 */
2297 rcu_barrier();
2304 kmem_cache_destroy(ubifs_inode_slab); 2298 kmem_cache_destroy(ubifs_inode_slab);
2305 unregister_filesystem(&ubifs_fs_type); 2299 unregister_filesystem(&ubifs_fs_type);
2306} 2300}
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index d38ac7f9654b..f6bf8995c7b1 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
328 case UBIFS_XENT_KEY: 328 case UBIFS_XENT_KEY:
329 break; 329 break;
330 default: 330 default:
331 dbg_msg("bad key type at slot %d: %d", 331 ubifs_err("bad key type at slot %d: %d",
332 i, key_type(c, &zbr->key)); 332 i, key_type(c, &zbr->key));
333 err = 3; 333 err = 3;
334 goto out_dump; 334 goto out_dump;
335 } 335 }
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1e5a08623d11..5486346d0a3f 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -42,16 +42,15 @@
42#define UBIFS_VERSION 1 42#define UBIFS_VERSION 1
43 43
44/* Normal UBIFS messages */ 44/* Normal UBIFS messages */
45#define ubifs_msg(fmt, ...) \ 45#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__)
46 printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__)
47/* UBIFS error messages */ 46/* UBIFS error messages */
48#define ubifs_err(fmt, ...) \ 47#define ubifs_err(fmt, ...) \
49 printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ 48 pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
50 __func__, ##__VA_ARGS__) 49 __func__, ##__VA_ARGS__)
51/* UBIFS warning messages */ 50/* UBIFS warning messages */
52#define ubifs_warn(fmt, ...) \ 51#define ubifs_warn(fmt, ...) \
53 printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ 52 pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \
54 current->pid, __func__, ##__VA_ARGS__) 53 current->pid, __func__, ##__VA_ARGS__)
55 54
56/* UBIFS file system VFS magic number */ 55/* UBIFS file system VFS magic number */
57#define UBIFS_SUPER_MAGIC 0x24051905 56#define UBIFS_SUPER_MAGIC 0x24051905
@@ -1426,8 +1425,8 @@ struct ubifs_info {
1426 1425
1427 long long rp_size; 1426 long long rp_size;
1428 long long report_rp_size; 1427 long long report_rp_size;
1429 uid_t rp_uid; 1428 kuid_t rp_uid;
1430 gid_t rp_gid; 1429 kgid_t rp_gid;
1431 1430
1432 /* The below fields are used only during mounting and re-mounting */ 1431 /* The below fields are used only during mounting and re-mounting */
1433 unsigned int empty:1; 1432 unsigned int empty:1;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba3df6e..77b5953eaac8 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -39,20 +39,24 @@
39#include "udf_i.h" 39#include "udf_i.h"
40#include "udf_sb.h" 40#include "udf_sb.h"
41 41
42static int udf_adinicb_readpage(struct file *file, struct page *page) 42static void __udf_adinicb_readpage(struct page *page)
43{ 43{
44 struct inode *inode = page->mapping->host; 44 struct inode *inode = page->mapping->host;
45 char *kaddr; 45 char *kaddr;
46 struct udf_inode_info *iinfo = UDF_I(inode); 46 struct udf_inode_info *iinfo = UDF_I(inode);
47 47
48 BUG_ON(!PageLocked(page));
49
50 kaddr = kmap(page); 48 kaddr = kmap(page);
51 memset(kaddr, 0, PAGE_CACHE_SIZE);
52 memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); 49 memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
50 memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
53 flush_dcache_page(page); 51 flush_dcache_page(page);
54 SetPageUptodate(page); 52 SetPageUptodate(page);
55 kunmap(page); 53 kunmap(page);
54}
55
56static int udf_adinicb_readpage(struct file *file, struct page *page)
57{
58 BUG_ON(!PageLocked(page));
59 __udf_adinicb_readpage(page);
56 unlock_page(page); 60 unlock_page(page);
57 61
58 return 0; 62 return 0;
@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page,
77 return 0; 81 return 0;
78} 82}
79 83
84static int udf_adinicb_write_begin(struct file *file,
85 struct address_space *mapping, loff_t pos,
86 unsigned len, unsigned flags, struct page **pagep,
87 void **fsdata)
88{
89 struct page *page;
90
91 if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
92 return -EIO;
93 page = grab_cache_page_write_begin(mapping, 0, flags);
94 if (!page)
95 return -ENOMEM;
96 *pagep = page;
97
98 if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
99 __udf_adinicb_readpage(page);
100 return 0;
101}
102
80static int udf_adinicb_write_end(struct file *file, 103static int udf_adinicb_write_end(struct file *file,
81 struct address_space *mapping, 104 struct address_space *mapping,
82 loff_t pos, unsigned len, unsigned copied, 105 loff_t pos, unsigned len, unsigned copied,
@@ -95,11 +118,20 @@ static int udf_adinicb_write_end(struct file *file,
95 return simple_write_end(file, mapping, pos, len, copied, page, fsdata); 118 return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
96} 119}
97 120
121static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
122 const struct iovec *iov,
123 loff_t offset, unsigned long nr_segs)
124{
125 /* Fallback to buffered I/O. */
126 return 0;
127}
128
98const struct address_space_operations udf_adinicb_aops = { 129const struct address_space_operations udf_adinicb_aops = {
99 .readpage = udf_adinicb_readpage, 130 .readpage = udf_adinicb_readpage,
100 .writepage = udf_adinicb_writepage, 131 .writepage = udf_adinicb_writepage,
101 .write_begin = simple_write_begin, 132 .write_begin = udf_adinicb_write_begin,
102 .write_end = udf_adinicb_write_end, 133 .write_end = udf_adinicb_write_end,
134 .direct_IO = udf_adinicb_direct_IO,
103}; 135};
104 136
105static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 137static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index fafaad795cd6..df88b957ccf0 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -95,11 +95,33 @@ void udf_evict_inode(struct inode *inode)
95 } 95 }
96} 96}
97 97
98static void udf_write_failed(struct address_space *mapping, loff_t to)
99{
100 struct inode *inode = mapping->host;
101 struct udf_inode_info *iinfo = UDF_I(inode);
102 loff_t isize = inode->i_size;
103
104 if (to > isize) {
105 truncate_pagecache(inode, to, isize);
106 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
107 down_write(&iinfo->i_data_sem);
108 udf_truncate_extents(inode);
109 up_write(&iinfo->i_data_sem);
110 }
111 }
112}
113
98static int udf_writepage(struct page *page, struct writeback_control *wbc) 114static int udf_writepage(struct page *page, struct writeback_control *wbc)
99{ 115{
100 return block_write_full_page(page, udf_get_block, wbc); 116 return block_write_full_page(page, udf_get_block, wbc);
101} 117}
102 118
119static int udf_writepages(struct address_space *mapping,
120 struct writeback_control *wbc)
121{
122 return mpage_writepages(mapping, wbc, udf_get_block);
123}
124
103static int udf_readpage(struct file *file, struct page *page) 125static int udf_readpage(struct file *file, struct page *page)
104{ 126{
105 return mpage_readpage(page, udf_get_block); 127 return mpage_readpage(page, udf_get_block);
@@ -118,21 +140,24 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
118 int ret; 140 int ret;
119 141
120 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); 142 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
121 if (unlikely(ret)) { 143 if (unlikely(ret))
122 struct inode *inode = mapping->host; 144 udf_write_failed(mapping, pos + len);
123 struct udf_inode_info *iinfo = UDF_I(inode); 145 return ret;
124 loff_t isize = inode->i_size; 146}
125
126 if (pos + len > isize) {
127 truncate_pagecache(inode, pos + len, isize);
128 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
129 down_write(&iinfo->i_data_sem);
130 udf_truncate_extents(inode);
131 up_write(&iinfo->i_data_sem);
132 }
133 }
134 }
135 147
148static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
149 const struct iovec *iov,
150 loff_t offset, unsigned long nr_segs)
151{
152 struct file *file = iocb->ki_filp;
153 struct address_space *mapping = file->f_mapping;
154 struct inode *inode = mapping->host;
155 ssize_t ret;
156
157 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
158 udf_get_block);
159 if (unlikely(ret < 0 && (rw & WRITE)))
160 udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
136 return ret; 161 return ret;
137} 162}
138 163
@@ -145,8 +170,10 @@ const struct address_space_operations udf_aops = {
145 .readpage = udf_readpage, 170 .readpage = udf_readpage,
146 .readpages = udf_readpages, 171 .readpages = udf_readpages,
147 .writepage = udf_writepage, 172 .writepage = udf_writepage,
148 .write_begin = udf_write_begin, 173 .writepages = udf_writepages,
149 .write_end = generic_write_end, 174 .write_begin = udf_write_begin,
175 .write_end = generic_write_end,
176 .direct_IO = udf_direct_IO,
150 .bmap = udf_bmap, 177 .bmap = udf_bmap,
151}; 178};
152 179
@@ -1124,14 +1151,17 @@ int udf_setsize(struct inode *inode, loff_t newsize)
1124 if (err) 1151 if (err)
1125 return err; 1152 return err;
1126 down_write(&iinfo->i_data_sem); 1153 down_write(&iinfo->i_data_sem);
1127 } else 1154 } else {
1128 iinfo->i_lenAlloc = newsize; 1155 iinfo->i_lenAlloc = newsize;
1156 goto set_size;
1157 }
1129 } 1158 }
1130 err = udf_extend_file(inode, newsize); 1159 err = udf_extend_file(inode, newsize);
1131 if (err) { 1160 if (err) {
1132 up_write(&iinfo->i_data_sem); 1161 up_write(&iinfo->i_data_sem);
1133 return err; 1162 return err;
1134 } 1163 }
1164set_size:
1135 truncate_setsize(inode, newsize); 1165 truncate_setsize(inode, newsize);
1136 up_write(&iinfo->i_data_sem); 1166 up_write(&iinfo->i_data_sem);
1137 } else { 1167 } else {
@@ -1309,14 +1339,14 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1309 } 1339 }
1310 1340
1311 read_lock(&sbi->s_cred_lock); 1341 read_lock(&sbi->s_cred_lock);
1312 inode->i_uid = le32_to_cpu(fe->uid); 1342 i_uid_write(inode, le32_to_cpu(fe->uid));
1313 if (inode->i_uid == -1 || 1343 if (!uid_valid(inode->i_uid) ||
1314 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) || 1344 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_IGNORE) ||
1315 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET)) 1345 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_SET))
1316 inode->i_uid = UDF_SB(inode->i_sb)->s_uid; 1346 inode->i_uid = UDF_SB(inode->i_sb)->s_uid;
1317 1347
1318 inode->i_gid = le32_to_cpu(fe->gid); 1348 i_gid_write(inode, le32_to_cpu(fe->gid));
1319 if (inode->i_gid == -1 || 1349 if (!gid_valid(inode->i_gid) ||
1320 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) || 1350 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_IGNORE) ||
1321 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET)) 1351 UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_SET))
1322 inode->i_gid = UDF_SB(inode->i_sb)->s_gid; 1352 inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
@@ -1539,12 +1569,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1539 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET)) 1569 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_UID_FORGET))
1540 fe->uid = cpu_to_le32(-1); 1570 fe->uid = cpu_to_le32(-1);
1541 else 1571 else
1542 fe->uid = cpu_to_le32(inode->i_uid); 1572 fe->uid = cpu_to_le32(i_uid_read(inode));
1543 1573
1544 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET)) 1574 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_GID_FORGET))
1545 fe->gid = cpu_to_le32(-1); 1575 fe->gid = cpu_to_le32(-1);
1546 else 1576 else
1547 fe->gid = cpu_to_le32(inode->i_gid); 1577 fe->gid = cpu_to_le32(i_gid_read(inode));
1548 1578
1549 udfperms = ((inode->i_mode & S_IRWXO)) | 1579 udfperms = ((inode->i_mode & S_IRWXO)) |
1550 ((inode->i_mode & S_IRWXG) << 2) | 1580 ((inode->i_mode & S_IRWXG) << 2) |
diff --git a/fs/udf/super.c b/fs/udf/super.c
index dcbf98722afc..d44fb568abe1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -171,6 +171,11 @@ static int init_inodecache(void)
171 171
172static void destroy_inodecache(void) 172static void destroy_inodecache(void)
173{ 173{
174 /*
175 * Make sure all delayed rcu free inodes are flushed before we
176 * destroy cache.
177 */
178 rcu_barrier();
174 kmem_cache_destroy(udf_inode_cachep); 179 kmem_cache_destroy(udf_inode_cachep);
175} 180}
176 181
@@ -199,8 +204,8 @@ struct udf_options {
199 unsigned int rootdir; 204 unsigned int rootdir;
200 unsigned int flags; 205 unsigned int flags;
201 umode_t umask; 206 umode_t umask;
202 gid_t gid; 207 kgid_t gid;
203 uid_t uid; 208 kuid_t uid;
204 umode_t fmode; 209 umode_t fmode;
205 umode_t dmode; 210 umode_t dmode;
206 struct nls_table *nls_map; 211 struct nls_table *nls_map;
@@ -335,9 +340,9 @@ static int udf_show_options(struct seq_file *seq, struct dentry *root)
335 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE)) 340 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_IGNORE))
336 seq_puts(seq, ",gid=ignore"); 341 seq_puts(seq, ",gid=ignore");
337 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET)) 342 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
338 seq_printf(seq, ",uid=%u", sbi->s_uid); 343 seq_printf(seq, ",uid=%u", from_kuid(&init_user_ns, sbi->s_uid));
339 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) 344 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
340 seq_printf(seq, ",gid=%u", sbi->s_gid); 345 seq_printf(seq, ",gid=%u", from_kgid(&init_user_ns, sbi->s_gid));
341 if (sbi->s_umask != 0) 346 if (sbi->s_umask != 0)
342 seq_printf(seq, ",umask=%ho", sbi->s_umask); 347 seq_printf(seq, ",umask=%ho", sbi->s_umask);
343 if (sbi->s_fmode != UDF_INVALID_MODE) 348 if (sbi->s_fmode != UDF_INVALID_MODE)
@@ -516,13 +521,17 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
516 case Opt_gid: 521 case Opt_gid:
517 if (match_int(args, &option)) 522 if (match_int(args, &option))
518 return 0; 523 return 0;
519 uopt->gid = option; 524 uopt->gid = make_kgid(current_user_ns(), option);
525 if (!gid_valid(uopt->gid))
526 return 0;
520 uopt->flags |= (1 << UDF_FLAG_GID_SET); 527 uopt->flags |= (1 << UDF_FLAG_GID_SET);
521 break; 528 break;
522 case Opt_uid: 529 case Opt_uid:
523 if (match_int(args, &option)) 530 if (match_int(args, &option))
524 return 0; 531 return 0;
525 uopt->uid = option; 532 uopt->uid = make_kuid(current_user_ns(), option);
533 if (!uid_valid(uopt->uid))
534 return 0;
526 uopt->flags |= (1 << UDF_FLAG_UID_SET); 535 uopt->flags |= (1 << UDF_FLAG_UID_SET);
527 break; 536 break;
528 case Opt_umask: 537 case Opt_umask:
@@ -1344,6 +1353,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1344 udf_err(sb, "error loading logical volume descriptor: " 1353 udf_err(sb, "error loading logical volume descriptor: "
1345 "Partition table too long (%u > %lu)\n", table_len, 1354 "Partition table too long (%u > %lu)\n", table_len,
1346 sb->s_blocksize - sizeof(*lvd)); 1355 sb->s_blocksize - sizeof(*lvd));
1356 ret = 1;
1347 goto out_bh; 1357 goto out_bh;
1348 } 1358 }
1349 1359
@@ -1388,8 +1398,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1388 UDF_ID_SPARABLE, 1398 UDF_ID_SPARABLE,
1389 strlen(UDF_ID_SPARABLE))) { 1399 strlen(UDF_ID_SPARABLE))) {
1390 if (udf_load_sparable_map(sb, map, 1400 if (udf_load_sparable_map(sb, map,
1391 (struct sparablePartitionMap *)gpm) < 0) 1401 (struct sparablePartitionMap *)gpm) < 0) {
1402 ret = 1;
1392 goto out_bh; 1403 goto out_bh;
1404 }
1393 } else if (!strncmp(upm2->partIdent.ident, 1405 } else if (!strncmp(upm2->partIdent.ident,
1394 UDF_ID_METADATA, 1406 UDF_ID_METADATA,
1395 strlen(UDF_ID_METADATA))) { 1407 strlen(UDF_ID_METADATA))) {
@@ -1931,8 +1943,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1931 struct udf_sb_info *sbi; 1943 struct udf_sb_info *sbi;
1932 1944
1933 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); 1945 uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT);
1934 uopt.uid = -1; 1946 uopt.uid = INVALID_UID;
1935 uopt.gid = -1; 1947 uopt.gid = INVALID_GID;
1936 uopt.umask = 0; 1948 uopt.umask = 0;
1937 uopt.fmode = UDF_INVALID_MODE; 1949 uopt.fmode = UDF_INVALID_MODE;
1938 uopt.dmode = UDF_INVALID_MODE; 1950 uopt.dmode = UDF_INVALID_MODE;
@@ -2000,6 +2012,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2000 if (!silent) 2012 if (!silent)
2001 pr_notice("Rescanning with blocksize %d\n", 2013 pr_notice("Rescanning with blocksize %d\n",
2002 UDF_DEFAULT_BLOCKSIZE); 2014 UDF_DEFAULT_BLOCKSIZE);
2015 brelse(sbi->s_lvid_bh);
2016 sbi->s_lvid_bh = NULL;
2003 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; 2017 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
2004 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 2018 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
2005 } 2019 }
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 42ad69ac9576..5f027227f085 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -128,8 +128,8 @@ struct udf_sb_info {
128 128
129 /* Default permissions */ 129 /* Default permissions */
130 umode_t s_umask; 130 umode_t s_umask;
131 gid_t s_gid; 131 kgid_t s_gid;
132 uid_t s_uid; 132 kuid_t s_uid;
133 umode_t s_fmode; 133 umode_t s_fmode;
134 umode_t s_dmode; 134 umode_t s_dmode;
135 /* Lock protecting consistency of above permission settings */ 135 /* Lock protecting consistency of above permission settings */
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index dd7c89d8a1c1..eb6d0b7dc879 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -597,8 +597,8 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
597 /* 597 /*
598 * Linux now has 32-bit uid and gid, so we can support EFT. 598 * Linux now has 32-bit uid and gid, so we can support EFT.
599 */ 599 */
600 inode->i_uid = ufs_get_inode_uid(sb, ufs_inode); 600 i_uid_write(inode, ufs_get_inode_uid(sb, ufs_inode));
601 inode->i_gid = ufs_get_inode_gid(sb, ufs_inode); 601 i_gid_write(inode, ufs_get_inode_gid(sb, ufs_inode));
602 602
603 inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size); 603 inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
604 inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec); 604 inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
@@ -645,8 +645,8 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
645 /* 645 /*
646 * Linux now has 32-bit uid and gid, so we can support EFT. 646 * Linux now has 32-bit uid and gid, so we can support EFT.
647 */ 647 */
648 inode->i_uid = fs32_to_cpu(sb, ufs2_inode->ui_uid); 648 i_uid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_uid));
649 inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid); 649 i_gid_write(inode, fs32_to_cpu(sb, ufs2_inode->ui_gid));
650 650
651 inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size); 651 inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
652 inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime); 652 inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
@@ -745,8 +745,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
745 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); 745 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
746 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); 746 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
747 747
748 ufs_set_inode_uid(sb, ufs_inode, inode->i_uid); 748 ufs_set_inode_uid(sb, ufs_inode, i_uid_read(inode));
749 ufs_set_inode_gid(sb, ufs_inode, inode->i_gid); 749 ufs_set_inode_gid(sb, ufs_inode, i_gid_read(inode));
750 750
751 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); 751 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
752 ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec); 752 ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
@@ -789,8 +789,8 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
789 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode); 789 ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
790 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink); 790 ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
791 791
792 ufs_inode->ui_uid = cpu_to_fs32(sb, inode->i_uid); 792 ufs_inode->ui_uid = cpu_to_fs32(sb, i_uid_read(inode));
793 ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid); 793 ufs_inode->ui_gid = cpu_to_fs32(sb, i_gid_read(inode));
794 794
795 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size); 795 ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
796 ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec); 796 ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 444927e5706b..f7cfecfe1cab 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1466,6 +1466,11 @@ static int init_inodecache(void)
1466 1466
1467static void destroy_inodecache(void) 1467static void destroy_inodecache(void)
1468{ 1468{
1469 /*
1470 * Make sure all delayed rcu free inodes are flushed before we
1471 * destroy cache.
1472 */
1473 rcu_barrier();
1469 kmem_cache_destroy(ufs_inode_cachep); 1474 kmem_cache_destroy(ufs_inode_cachep);
1470} 1475}
1471 1476
diff --git a/fs/utimes.c b/fs/utimes.c
index fa4dbe451e27..bb0696a41735 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -140,19 +140,18 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times,
140 goto out; 140 goto out;
141 141
142 if (filename == NULL && dfd != AT_FDCWD) { 142 if (filename == NULL && dfd != AT_FDCWD) {
143 int fput_needed; 143 struct fd f;
144 struct file *file;
145 144
146 if (flags & AT_SYMLINK_NOFOLLOW) 145 if (flags & AT_SYMLINK_NOFOLLOW)
147 goto out; 146 goto out;
148 147
149 file = fget_light(dfd, &fput_needed); 148 f = fdget(dfd);
150 error = -EBADF; 149 error = -EBADF;
151 if (!file) 150 if (!f.file)
152 goto out; 151 goto out;
153 152
154 error = utimes_common(&file->f_path, times); 153 error = utimes_common(&f.file->f_path, times);
155 fput_light(file, fput_needed); 154 fdput(f);
156 } else { 155 } else {
157 struct path path; 156 struct path path;
158 int lookup_flags = 0; 157 int lookup_flags = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4d45b7189e7e..1780f062dbaf 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -20,6 +20,7 @@
20#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/vmalloc.h> 22#include <linux/vmalloc.h>
23#include <linux/posix_acl_xattr.h>
23 24
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25 26
@@ -295,11 +296,13 @@ vfs_removexattr(struct dentry *dentry, const char *name)
295 if (error) 296 if (error)
296 return error; 297 return error;
297 298
299 mutex_lock(&inode->i_mutex);
298 error = security_inode_removexattr(dentry, name); 300 error = security_inode_removexattr(dentry, name);
299 if (error) 301 if (error) {
302 mutex_unlock(&inode->i_mutex);
300 return error; 303 return error;
304 }
301 305
302 mutex_lock(&inode->i_mutex);
303 error = inode->i_op->removexattr(dentry, name); 306 error = inode->i_op->removexattr(dentry, name);
304 mutex_unlock(&inode->i_mutex); 307 mutex_unlock(&inode->i_mutex);
305 308
@@ -347,6 +350,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
347 error = -EFAULT; 350 error = -EFAULT;
348 goto out; 351 goto out;
349 } 352 }
353 if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
354 (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
355 posix_acl_fix_xattr_from_user(kvalue, size);
350 } 356 }
351 357
352 error = vfs_setxattr(d, kname, kvalue, size, flags); 358 error = vfs_setxattr(d, kname, kvalue, size, flags);
@@ -399,22 +405,20 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
399SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, 405SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
400 const void __user *,value, size_t, size, int, flags) 406 const void __user *,value, size_t, size, int, flags)
401{ 407{
402 int fput_needed; 408 struct fd f = fdget(fd);
403 struct file *f;
404 struct dentry *dentry; 409 struct dentry *dentry;
405 int error = -EBADF; 410 int error = -EBADF;
406 411
407 f = fget_light(fd, &fput_needed); 412 if (!f.file)
408 if (!f)
409 return error; 413 return error;
410 dentry = f->f_path.dentry; 414 dentry = f.file->f_path.dentry;
411 audit_inode(NULL, dentry); 415 audit_inode(NULL, dentry);
412 error = mnt_want_write_file(f); 416 error = mnt_want_write_file(f.file);
413 if (!error) { 417 if (!error) {
414 error = setxattr(dentry, name, value, size, flags); 418 error = setxattr(dentry, name, value, size, flags);
415 mnt_drop_write_file(f); 419 mnt_drop_write_file(f.file);
416 } 420 }
417 fput_light(f, fput_needed); 421 fdput(f);
418 return error; 422 return error;
419} 423}
420 424
@@ -450,6 +454,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
450 454
451 error = vfs_getxattr(d, kname, kvalue, size); 455 error = vfs_getxattr(d, kname, kvalue, size);
452 if (error > 0) { 456 if (error > 0) {
457 if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
458 (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
459 posix_acl_fix_xattr_to_user(kvalue, size);
453 if (size && copy_to_user(value, kvalue, error)) 460 if (size && copy_to_user(value, kvalue, error))
454 error = -EFAULT; 461 error = -EFAULT;
455 } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { 462 } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
@@ -495,16 +502,14 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
495SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, 502SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
496 void __user *, value, size_t, size) 503 void __user *, value, size_t, size)
497{ 504{
498 int fput_needed; 505 struct fd f = fdget(fd);
499 struct file *f;
500 ssize_t error = -EBADF; 506 ssize_t error = -EBADF;
501 507
502 f = fget_light(fd, &fput_needed); 508 if (!f.file)
503 if (!f)
504 return error; 509 return error;
505 audit_inode(NULL, f->f_path.dentry); 510 audit_inode(NULL, f.file->f_path.dentry);
506 error = getxattr(f->f_path.dentry, name, value, size); 511 error = getxattr(f.file->f_path.dentry, name, value, size);
507 fput_light(f, fput_needed); 512 fdput(f);
508 return error; 513 return error;
509} 514}
510 515
@@ -576,16 +581,14 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
576 581
577SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) 582SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
578{ 583{
579 int fput_needed; 584 struct fd f = fdget(fd);
580 struct file *f;
581 ssize_t error = -EBADF; 585 ssize_t error = -EBADF;
582 586
583 f = fget_light(fd, &fput_needed); 587 if (!f.file)
584 if (!f)
585 return error; 588 return error;
586 audit_inode(NULL, f->f_path.dentry); 589 audit_inode(NULL, f.file->f_path.dentry);
587 error = listxattr(f->f_path.dentry, list, size); 590 error = listxattr(f.file->f_path.dentry, list, size);
588 fput_light(f, fput_needed); 591 fdput(f);
589 return error; 592 return error;
590} 593}
591 594
@@ -645,22 +648,20 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
645 648
646SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) 649SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
647{ 650{
648 int fput_needed; 651 struct fd f = fdget(fd);
649 struct file *f;
650 struct dentry *dentry; 652 struct dentry *dentry;
651 int error = -EBADF; 653 int error = -EBADF;
652 654
653 f = fget_light(fd, &fput_needed); 655 if (!f.file)
654 if (!f)
655 return error; 656 return error;
656 dentry = f->f_path.dentry; 657 dentry = f.file->f_path.dentry;
657 audit_inode(NULL, dentry); 658 audit_inode(NULL, dentry);
658 error = mnt_want_write_file(f); 659 error = mnt_want_write_file(f.file);
659 if (!error) { 660 if (!error) {
660 error = removexattr(dentry, name); 661 error = removexattr(dentry, name);
661 mnt_drop_write_file(f); 662 mnt_drop_write_file(f.file);
662 } 663 }
663 fput_light(f, fput_needed); 664 fdput(f);
664 return error; 665 return error;
665} 666}
666 667
@@ -791,3 +792,183 @@ EXPORT_SYMBOL(generic_getxattr);
791EXPORT_SYMBOL(generic_listxattr); 792EXPORT_SYMBOL(generic_listxattr);
792EXPORT_SYMBOL(generic_setxattr); 793EXPORT_SYMBOL(generic_setxattr);
793EXPORT_SYMBOL(generic_removexattr); 794EXPORT_SYMBOL(generic_removexattr);
795
796/*
797 * Allocate new xattr and copy in the value; but leave the name to callers.
798 */
799struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
800{
801 struct simple_xattr *new_xattr;
802 size_t len;
803
804 /* wrap around? */
805 len = sizeof(*new_xattr) + size;
806 if (len <= sizeof(*new_xattr))
807 return NULL;
808
809 new_xattr = kmalloc(len, GFP_KERNEL);
810 if (!new_xattr)
811 return NULL;
812
813 new_xattr->size = size;
814 memcpy(new_xattr->value, value, size);
815 return new_xattr;
816}
817
818/*
819 * xattr GET operation for in-memory/pseudo filesystems
820 */
821int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
822 void *buffer, size_t size)
823{
824 struct simple_xattr *xattr;
825 int ret = -ENODATA;
826
827 spin_lock(&xattrs->lock);
828 list_for_each_entry(xattr, &xattrs->head, list) {
829 if (strcmp(name, xattr->name))
830 continue;
831
832 ret = xattr->size;
833 if (buffer) {
834 if (size < xattr->size)
835 ret = -ERANGE;
836 else
837 memcpy(buffer, xattr->value, xattr->size);
838 }
839 break;
840 }
841 spin_unlock(&xattrs->lock);
842 return ret;
843}
844
845static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
846 const void *value, size_t size, int flags)
847{
848 struct simple_xattr *xattr;
849 struct simple_xattr *uninitialized_var(new_xattr);
850 int err = 0;
851
852 /* value == NULL means remove */
853 if (value) {
854 new_xattr = simple_xattr_alloc(value, size);
855 if (!new_xattr)
856 return -ENOMEM;
857
858 new_xattr->name = kstrdup(name, GFP_KERNEL);
859 if (!new_xattr->name) {
860 kfree(new_xattr);
861 return -ENOMEM;
862 }
863 }
864
865 spin_lock(&xattrs->lock);
866 list_for_each_entry(xattr, &xattrs->head, list) {
867 if (!strcmp(name, xattr->name)) {
868 if (flags & XATTR_CREATE) {
869 xattr = new_xattr;
870 err = -EEXIST;
871 } else if (new_xattr) {
872 list_replace(&xattr->list, &new_xattr->list);
873 } else {
874 list_del(&xattr->list);
875 }
876 goto out;
877 }
878 }
879 if (flags & XATTR_REPLACE) {
880 xattr = new_xattr;
881 err = -ENODATA;
882 } else {
883 list_add(&new_xattr->list, &xattrs->head);
884 xattr = NULL;
885 }
886out:
887 spin_unlock(&xattrs->lock);
888 if (xattr) {
889 kfree(xattr->name);
890 kfree(xattr);
891 }
892 return err;
893
894}
895
896/**
897 * simple_xattr_set - xattr SET operation for in-memory/pseudo filesystems
898 * @xattrs: target simple_xattr list
899 * @name: name of the new extended attribute
900 * @value: value of the new xattr. If %NULL, will remove the attribute
901 * @size: size of the new xattr
902 * @flags: %XATTR_{CREATE|REPLACE}
903 *
904 * %XATTR_CREATE is set, the xattr shouldn't exist already; otherwise fails
905 * with -EEXIST. If %XATTR_REPLACE is set, the xattr should exist;
906 * otherwise, fails with -ENODATA.
907 *
908 * Returns 0 on success, -errno on failure.
909 */
910int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
911 const void *value, size_t size, int flags)
912{
913 if (size == 0)
914 value = ""; /* empty EA, do not remove */
915 return __simple_xattr_set(xattrs, name, value, size, flags);
916}
917
918/*
919 * xattr REMOVE operation for in-memory/pseudo filesystems
920 */
921int simple_xattr_remove(struct simple_xattrs *xattrs, const char *name)
922{
923 return __simple_xattr_set(xattrs, name, NULL, 0, XATTR_REPLACE);
924}
925
926static bool xattr_is_trusted(const char *name)
927{
928 return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
929}
930
931/*
932 * xattr LIST operation for in-memory/pseudo filesystems
933 */
934ssize_t simple_xattr_list(struct simple_xattrs *xattrs, char *buffer,
935 size_t size)
936{
937 bool trusted = capable(CAP_SYS_ADMIN);
938 struct simple_xattr *xattr;
939 size_t used = 0;
940
941 spin_lock(&xattrs->lock);
942 list_for_each_entry(xattr, &xattrs->head, list) {
943 size_t len;
944
945 /* skip "trusted." attributes for unprivileged callers */
946 if (!trusted && xattr_is_trusted(xattr->name))
947 continue;
948
949 len = strlen(xattr->name) + 1;
950 used += len;
951 if (buffer) {
952 if (size < used) {
953 used = -ERANGE;
954 break;
955 }
956 memcpy(buffer, xattr->name, len);
957 buffer += len;
958 }
959 }
960 spin_unlock(&xattrs->lock);
961
962 return used;
963}
964
965/*
966 * Adds an extended attribute to the list
967 */
968void simple_xattr_list_add(struct simple_xattrs *xattrs,
969 struct simple_xattr *new_xattr)
970{
971 spin_lock(&xattrs->lock);
972 list_add(&new_xattr->list, &xattrs->head);
973 spin_unlock(&xattrs->lock);
974}
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index 69d06b07b169..11efd830b5f5 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -9,13 +9,72 @@
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/posix_acl_xattr.h> 10#include <linux/posix_acl_xattr.h>
11#include <linux/gfp.h> 11#include <linux/gfp.h>
12#include <linux/user_namespace.h>
12 13
14/*
15 * Fix up the uids and gids in posix acl extended attributes in place.
16 */
17static void posix_acl_fix_xattr_userns(
18 struct user_namespace *to, struct user_namespace *from,
19 void *value, size_t size)
20{
21 posix_acl_xattr_header *header = (posix_acl_xattr_header *)value;
22 posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end;
23 int count;
24 kuid_t uid;
25 kgid_t gid;
26
27 if (!value)
28 return;
29 if (size < sizeof(posix_acl_xattr_header))
30 return;
31 if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
32 return;
33
34 count = posix_acl_xattr_count(size);
35 if (count < 0)
36 return;
37 if (count == 0)
38 return;
39
40 for (end = entry + count; entry != end; entry++) {
41 switch(le16_to_cpu(entry->e_tag)) {
42 case ACL_USER:
43 uid = make_kuid(from, le32_to_cpu(entry->e_id));
44 entry->e_id = cpu_to_le32(from_kuid(to, uid));
45 break;
46 case ACL_GROUP:
47 gid = make_kgid(from, le32_to_cpu(entry->e_id));
48 entry->e_id = cpu_to_le32(from_kuid(to, uid));
49 break;
50 default:
51 break;
52 }
53 }
54}
55
56void posix_acl_fix_xattr_from_user(void *value, size_t size)
57{
58 struct user_namespace *user_ns = current_user_ns();
59 if (user_ns == &init_user_ns)
60 return;
61 posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
62}
63
64void posix_acl_fix_xattr_to_user(void *value, size_t size)
65{
66 struct user_namespace *user_ns = current_user_ns();
67 if (user_ns == &init_user_ns)
68 return;
69 posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
70}
13 71
14/* 72/*
15 * Convert from extended attribute to in-memory representation. 73 * Convert from extended attribute to in-memory representation.
16 */ 74 */
17struct posix_acl * 75struct posix_acl *
18posix_acl_from_xattr(const void *value, size_t size) 76posix_acl_from_xattr(struct user_namespace *user_ns,
77 const void *value, size_t size)
19{ 78{
20 posix_acl_xattr_header *header = (posix_acl_xattr_header *)value; 79 posix_acl_xattr_header *header = (posix_acl_xattr_header *)value;
21 posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end; 80 posix_acl_xattr_entry *entry = (posix_acl_xattr_entry *)(header+1), *end;
@@ -50,12 +109,21 @@ posix_acl_from_xattr(const void *value, size_t size)
50 case ACL_GROUP_OBJ: 109 case ACL_GROUP_OBJ:
51 case ACL_MASK: 110 case ACL_MASK:
52 case ACL_OTHER: 111 case ACL_OTHER:
53 acl_e->e_id = ACL_UNDEFINED_ID;
54 break; 112 break;
55 113
56 case ACL_USER: 114 case ACL_USER:
115 acl_e->e_uid =
116 make_kuid(user_ns,
117 le32_to_cpu(entry->e_id));
118 if (!uid_valid(acl_e->e_uid))
119 goto fail;
120 break;
57 case ACL_GROUP: 121 case ACL_GROUP:
58 acl_e->e_id = le32_to_cpu(entry->e_id); 122 acl_e->e_gid =
123 make_kgid(user_ns,
124 le32_to_cpu(entry->e_id));
125 if (!gid_valid(acl_e->e_gid))
126 goto fail;
59 break; 127 break;
60 128
61 default: 129 default:
@@ -74,7 +142,8 @@ EXPORT_SYMBOL (posix_acl_from_xattr);
74 * Convert from in-memory to extended attribute representation. 142 * Convert from in-memory to extended attribute representation.
75 */ 143 */
76int 144int
77posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size) 145posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl,
146 void *buffer, size_t size)
78{ 147{
79 posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; 148 posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer;
80 posix_acl_xattr_entry *ext_entry = ext_acl->a_entries; 149 posix_acl_xattr_entry *ext_entry = ext_acl->a_entries;
@@ -89,9 +158,22 @@ posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size)
89 ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); 158 ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
90 159
91 for (n=0; n < acl->a_count; n++, ext_entry++) { 160 for (n=0; n < acl->a_count; n++, ext_entry++) {
92 ext_entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 161 const struct posix_acl_entry *acl_e = &acl->a_entries[n];
93 ext_entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 162 ext_entry->e_tag = cpu_to_le16(acl_e->e_tag);
94 ext_entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); 163 ext_entry->e_perm = cpu_to_le16(acl_e->e_perm);
164 switch(acl_e->e_tag) {
165 case ACL_USER:
166 ext_entry->e_id =
167 cpu_to_le32(from_kuid(user_ns, acl_e->e_uid));
168 break;
169 case ACL_GROUP:
170 ext_entry->e_id =
171 cpu_to_le32(from_kgid(user_ns, acl_e->e_gid));
172 break;
173 default:
174 ext_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
175 break;
176 }
95 } 177 }
96 return real_size; 178 return real_size;
97} 179}
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ac702a6eab9b..1d32f1d52763 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -337,7 +337,7 @@ xfs_xattr_acl_get(struct dentry *dentry, const char *name,
337 if (acl == NULL) 337 if (acl == NULL)
338 return -ENODATA; 338 return -ENODATA;
339 339
340 error = posix_acl_to_xattr(acl, value, size); 340 error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
341 posix_acl_release(acl); 341 posix_acl_release(acl);
342 342
343 return error; 343 return error;
@@ -361,7 +361,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
361 if (!value) 361 if (!value)
362 goto set_acl; 362 goto set_acl;
363 363
364 acl = posix_acl_from_xattr(value, size); 364 acl = posix_acl_from_xattr(&init_user_ns, value, size);
365 if (!acl) { 365 if (!acl) {
366 /* 366 /*
367 * acl_set_file(3) may request that we set default ACLs with 367 * acl_set_file(3) may request that we set default ACLs with
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d7a9dd735e1e..933b7930b863 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,6 +96,7 @@ xfs_buf_lru_add(
96 atomic_inc(&bp->b_hold); 96 atomic_inc(&bp->b_hold);
97 list_add_tail(&bp->b_lru, &btp->bt_lru); 97 list_add_tail(&bp->b_lru, &btp->bt_lru);
98 btp->bt_lru_nr++; 98 btp->bt_lru_nr++;
99 bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
99 } 100 }
100 spin_unlock(&btp->bt_lru_lock); 101 spin_unlock(&btp->bt_lru_lock);
101} 102}
@@ -154,7 +155,8 @@ xfs_buf_stale(
154 struct xfs_buftarg *btp = bp->b_target; 155 struct xfs_buftarg *btp = bp->b_target;
155 156
156 spin_lock(&btp->bt_lru_lock); 157 spin_lock(&btp->bt_lru_lock);
157 if (!list_empty(&bp->b_lru)) { 158 if (!list_empty(&bp->b_lru) &&
159 !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
158 list_del_init(&bp->b_lru); 160 list_del_init(&bp->b_lru);
159 btp->bt_lru_nr--; 161 btp->bt_lru_nr--;
160 atomic_dec(&bp->b_hold); 162 atomic_dec(&bp->b_hold);
@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink(
1501 */ 1503 */
1502 list_move(&bp->b_lru, &dispose); 1504 list_move(&bp->b_lru, &dispose);
1503 btp->bt_lru_nr--; 1505 btp->bt_lru_nr--;
1506 bp->b_lru_flags |= _XBF_LRU_DISPOSE;
1504 } 1507 }
1505 spin_unlock(&btp->bt_lru_lock); 1508 spin_unlock(&btp->bt_lru_lock);
1506 1509
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d03b73b9604e..7c0b6a0a1557 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -38,27 +38,28 @@ typedef enum {
38 XBRW_ZERO = 3, /* Zero target memory */ 38 XBRW_ZERO = 3, /* Zero target memory */
39} xfs_buf_rw_t; 39} xfs_buf_rw_t;
40 40
41#define XBF_READ (1 << 0) /* buffer intended for reading from device */ 41#define XBF_READ (1 << 0) /* buffer intended for reading from device */
42#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ 42#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
43#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ 43#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
44#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 44#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
45#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 45#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
46#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 46#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
47 47
48/* I/O hints for the BIO layer */ 48/* I/O hints for the BIO layer */
49#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ 49#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
50#define XBF_FUA (1 << 11)/* force cache write through mode */ 50#define XBF_FUA (1 << 11)/* force cache write through mode */
51#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ 51#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
52 52
53/* flags used only as arguments to access routines */ 53/* flags used only as arguments to access routines */
54#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ 54#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
55#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ 55#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
56 56
57/* flags used only internally */ 57/* flags used only internally */
58#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ 58#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
59#define _XBF_KMEM (1 << 21)/* backed by heap memory */ 59#define _XBF_KMEM (1 << 21)/* backed by heap memory */
60#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 60#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
61#define _XBF_COMPOUND (1 << 23)/* compound buffer */ 61#define _XBF_COMPOUND (1 << 23)/* compound buffer */
62#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
62 63
63typedef unsigned int xfs_buf_flags_t; 64typedef unsigned int xfs_buf_flags_t;
64 65
@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t;
72 { XBF_SYNCIO, "SYNCIO" }, \ 73 { XBF_SYNCIO, "SYNCIO" }, \
73 { XBF_FUA, "FUA" }, \ 74 { XBF_FUA, "FUA" }, \
74 { XBF_FLUSH, "FLUSH" }, \ 75 { XBF_FLUSH, "FLUSH" }, \
75 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ 76 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
76 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ 77 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
77 { _XBF_PAGES, "PAGES" }, \ 78 { _XBF_PAGES, "PAGES" }, \
78 { _XBF_KMEM, "KMEM" }, \ 79 { _XBF_KMEM, "KMEM" }, \
79 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 80 { _XBF_DELWRI_Q, "DELWRI_Q" }, \
80 { _XBF_COMPOUND, "COMPOUND" } 81 { _XBF_COMPOUND, "COMPOUND" }, \
82 { _XBF_LRU_DISPOSE, "LRU_DISPOSE" }
81 83
82typedef struct xfs_buftarg { 84typedef struct xfs_buftarg {
83 dev_t bt_dev; 85 dev_t bt_dev;
@@ -124,7 +126,12 @@ typedef struct xfs_buf {
124 xfs_buf_flags_t b_flags; /* status flags */ 126 xfs_buf_flags_t b_flags; /* status flags */
125 struct semaphore b_sema; /* semaphore for lockables */ 127 struct semaphore b_sema; /* semaphore for lockables */
126 128
129 /*
130 * concurrent access to b_lru and b_lru_flags are protected by
131 * bt_lru_lock and not by b_sema
132 */
127 struct list_head b_lru; /* lru list */ 133 struct list_head b_lru; /* lru list */
134 xfs_buf_flags_t b_lru_flags; /* internal lru status flags */
128 wait_queue_head_t b_waiters; /* unpin waiters */ 135 wait_queue_head_t b_waiters; /* unpin waiters */
129 struct list_head b_list; 136 struct list_head b_list;
130 struct xfs_perag *b_pag; /* contains rbtree root */ 137 struct xfs_perag *b_pag; /* contains rbtree root */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e00de08dc8ac..b9b8646e62db 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -48,44 +48,44 @@ xfs_swapext(
48 xfs_swapext_t *sxp) 48 xfs_swapext_t *sxp)
49{ 49{
50 xfs_inode_t *ip, *tip; 50 xfs_inode_t *ip, *tip;
51 struct file *file, *tmp_file; 51 struct fd f, tmp;
52 int error = 0; 52 int error = 0;
53 53
54 /* Pull information for the target fd */ 54 /* Pull information for the target fd */
55 file = fget((int)sxp->sx_fdtarget); 55 f = fdget((int)sxp->sx_fdtarget);
56 if (!file) { 56 if (!f.file) {
57 error = XFS_ERROR(EINVAL); 57 error = XFS_ERROR(EINVAL);
58 goto out; 58 goto out;
59 } 59 }
60 60
61 if (!(file->f_mode & FMODE_WRITE) || 61 if (!(f.file->f_mode & FMODE_WRITE) ||
62 !(file->f_mode & FMODE_READ) || 62 !(f.file->f_mode & FMODE_READ) ||
63 (file->f_flags & O_APPEND)) { 63 (f.file->f_flags & O_APPEND)) {
64 error = XFS_ERROR(EBADF); 64 error = XFS_ERROR(EBADF);
65 goto out_put_file; 65 goto out_put_file;
66 } 66 }
67 67
68 tmp_file = fget((int)sxp->sx_fdtmp); 68 tmp = fdget((int)sxp->sx_fdtmp);
69 if (!tmp_file) { 69 if (!tmp.file) {
70 error = XFS_ERROR(EINVAL); 70 error = XFS_ERROR(EINVAL);
71 goto out_put_file; 71 goto out_put_file;
72 } 72 }
73 73
74 if (!(tmp_file->f_mode & FMODE_WRITE) || 74 if (!(tmp.file->f_mode & FMODE_WRITE) ||
75 !(tmp_file->f_mode & FMODE_READ) || 75 !(tmp.file->f_mode & FMODE_READ) ||
76 (tmp_file->f_flags & O_APPEND)) { 76 (tmp.file->f_flags & O_APPEND)) {
77 error = XFS_ERROR(EBADF); 77 error = XFS_ERROR(EBADF);
78 goto out_put_tmp_file; 78 goto out_put_tmp_file;
79 } 79 }
80 80
81 if (IS_SWAPFILE(file->f_path.dentry->d_inode) || 81 if (IS_SWAPFILE(f.file->f_path.dentry->d_inode) ||
82 IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { 82 IS_SWAPFILE(tmp.file->f_path.dentry->d_inode)) {
83 error = XFS_ERROR(EINVAL); 83 error = XFS_ERROR(EINVAL);
84 goto out_put_tmp_file; 84 goto out_put_tmp_file;
85 } 85 }
86 86
87 ip = XFS_I(file->f_path.dentry->d_inode); 87 ip = XFS_I(f.file->f_path.dentry->d_inode);
88 tip = XFS_I(tmp_file->f_path.dentry->d_inode); 88 tip = XFS_I(tmp.file->f_path.dentry->d_inode);
89 89
90 if (ip->i_mount != tip->i_mount) { 90 if (ip->i_mount != tip->i_mount) {
91 error = XFS_ERROR(EINVAL); 91 error = XFS_ERROR(EINVAL);
@@ -105,9 +105,9 @@ xfs_swapext(
105 error = xfs_swap_extents(ip, tip, sxp); 105 error = xfs_swap_extents(ip, tip, sxp);
106 106
107 out_put_tmp_file: 107 out_put_tmp_file:
108 fput(tmp_file); 108 fdput(tmp);
109 out_put_file: 109 out_put_file:
110 fput(file); 110 fdput(f);
111 out: 111 out:
112 return error; 112 return error;
113} 113}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index f9c3fe304a17..69cf4fcde03e 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -179,12 +179,14 @@ xfs_ioc_trim(
179 * used by the fstrim application. In the end it really doesn't 179 * used by the fstrim application. In the end it really doesn't
180 * matter as trimming blocks is an advisory interface. 180 * matter as trimming blocks is an advisory interface.
181 */ 181 */
182 if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
183 range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)))
184 return -XFS_ERROR(EINVAL);
185
182 start = BTOBB(range.start); 186 start = BTOBB(range.start);
183 end = start + BTOBBT(range.len) - 1; 187 end = start + BTOBBT(range.len) - 1;
184 minlen = BTOBB(max_t(u64, granularity, range.minlen)); 188 minlen = BTOBB(max_t(u64, granularity, range.minlen));
185 189
186 if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
187 return -XFS_ERROR(EINVAL);
188 if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) 190 if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
189 end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; 191 end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
190 192
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 56afcdb2377d..aa473fa640a2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
36 36
37#include <linux/dcache.h> 37#include <linux/dcache.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/pagevec.h>
39 40
40static const struct vm_operations_struct xfs_file_vm_ops; 41static const struct vm_operations_struct xfs_file_vm_ops;
41 42
@@ -939,7 +940,6 @@ xfs_file_mmap(
939 struct vm_area_struct *vma) 940 struct vm_area_struct *vma)
940{ 941{
941 vma->vm_ops = &xfs_file_vm_ops; 942 vma->vm_ops = &xfs_file_vm_ops;
942 vma->vm_flags |= VM_CAN_NONLINEAR;
943 943
944 file_accessed(filp); 944 file_accessed(filp);
945 return 0; 945 return 0;
@@ -959,17 +959,232 @@ xfs_vm_page_mkwrite(
959 return block_page_mkwrite(vma, vmf, xfs_get_blocks); 959 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
960} 960}
961 961
962/*
963 * This type is designed to indicate the type of offset we would like
964 * to search from page cache for either xfs_seek_data() or xfs_seek_hole().
965 */
966enum {
967 HOLE_OFF = 0,
968 DATA_OFF,
969};
970
971/*
972 * Lookup the desired type of offset from the given page.
973 *
974 * On success, return true and the offset argument will point to the
975 * start of the region that was found. Otherwise this function will
976 * return false and keep the offset argument unchanged.
977 */
978STATIC bool
979xfs_lookup_buffer_offset(
980 struct page *page,
981 loff_t *offset,
982 unsigned int type)
983{
984 loff_t lastoff = page_offset(page);
985 bool found = false;
986 struct buffer_head *bh, *head;
987
988 bh = head = page_buffers(page);
989 do {
990 /*
991 * Unwritten extents that have data in the page
992 * cache covering them can be identified by the
993 * BH_Unwritten state flag. Pages with multiple
994 * buffers might have a mix of holes, data and
995 * unwritten extents - any buffer with valid
996 * data in it should have BH_Uptodate flag set
997 * on it.
998 */
999 if (buffer_unwritten(bh) ||
1000 buffer_uptodate(bh)) {
1001 if (type == DATA_OFF)
1002 found = true;
1003 } else {
1004 if (type == HOLE_OFF)
1005 found = true;
1006 }
1007
1008 if (found) {
1009 *offset = lastoff;
1010 break;
1011 }
1012 lastoff += bh->b_size;
1013 } while ((bh = bh->b_this_page) != head);
1014
1015 return found;
1016}
1017
1018/*
1019 * This routine is called to find out and return a data or hole offset
1020 * from the page cache for unwritten extents according to the desired
1021 * type for xfs_seek_data() or xfs_seek_hole().
1022 *
1023 * The argument offset is used to tell where we start to search from the
1024 * page cache. Map is used to figure out the end points of the range to
1025 * lookup pages.
1026 *
1027 * Return true if the desired type of offset was found, and the argument
1028 * offset is filled with that address. Otherwise, return false and keep
1029 * offset unchanged.
1030 */
1031STATIC bool
1032xfs_find_get_desired_pgoff(
1033 struct inode *inode,
1034 struct xfs_bmbt_irec *map,
1035 unsigned int type,
1036 loff_t *offset)
1037{
1038 struct xfs_inode *ip = XFS_I(inode);
1039 struct xfs_mount *mp = ip->i_mount;
1040 struct pagevec pvec;
1041 pgoff_t index;
1042 pgoff_t end;
1043 loff_t endoff;
1044 loff_t startoff = *offset;
1045 loff_t lastoff = startoff;
1046 bool found = false;
1047
1048 pagevec_init(&pvec, 0);
1049
1050 index = startoff >> PAGE_CACHE_SHIFT;
1051 endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
1052 end = endoff >> PAGE_CACHE_SHIFT;
1053 do {
1054 int want;
1055 unsigned nr_pages;
1056 unsigned int i;
1057
1058 want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
1059 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
1060 want);
1061 /*
1062 * No page mapped into given range. If we are searching holes
1063 * and if this is the first time we got into the loop, it means
1064 * that the given offset is landed in a hole, return it.
1065 *
1066 * If we have already stepped through some block buffers to find
1067 * holes but they all contains data. In this case, the last
1068 * offset is already updated and pointed to the end of the last
1069 * mapped page, if it does not reach the endpoint to search,
1070 * that means there should be a hole between them.
1071 */
1072 if (nr_pages == 0) {
1073 /* Data search found nothing */
1074 if (type == DATA_OFF)
1075 break;
1076
1077 ASSERT(type == HOLE_OFF);
1078 if (lastoff == startoff || lastoff < endoff) {
1079 found = true;
1080 *offset = lastoff;
1081 }
1082 break;
1083 }
1084
1085 /*
1086 * At lease we found one page. If this is the first time we
1087 * step into the loop, and if the first page index offset is
1088 * greater than the given search offset, a hole was found.
1089 */
1090 if (type == HOLE_OFF && lastoff == startoff &&
1091 lastoff < page_offset(pvec.pages[0])) {
1092 found = true;
1093 break;
1094 }
1095
1096 for (i = 0; i < nr_pages; i++) {
1097 struct page *page = pvec.pages[i];
1098 loff_t b_offset;
1099
1100 /*
1101 * At this point, the page may be truncated or
1102 * invalidated (changing page->mapping to NULL),
1103 * or even swizzled back from swapper_space to tmpfs
1104 * file mapping. However, page->index will not change
1105 * because we have a reference on the page.
1106 *
1107 * Searching done if the page index is out of range.
1108 * If the current offset is not reaches the end of
1109 * the specified search range, there should be a hole
1110 * between them.
1111 */
1112 if (page->index > end) {
1113 if (type == HOLE_OFF && lastoff < endoff) {
1114 *offset = lastoff;
1115 found = true;
1116 }
1117 goto out;
1118 }
1119
1120 lock_page(page);
1121 /*
1122 * Page truncated or invalidated(page->mapping == NULL).
1123 * We can freely skip it and proceed to check the next
1124 * page.
1125 */
1126 if (unlikely(page->mapping != inode->i_mapping)) {
1127 unlock_page(page);
1128 continue;
1129 }
1130
1131 if (!page_has_buffers(page)) {
1132 unlock_page(page);
1133 continue;
1134 }
1135
1136 found = xfs_lookup_buffer_offset(page, &b_offset, type);
1137 if (found) {
1138 /*
1139 * The found offset may be less than the start
1140 * point to search if this is the first time to
1141 * come here.
1142 */
1143 *offset = max_t(loff_t, startoff, b_offset);
1144 unlock_page(page);
1145 goto out;
1146 }
1147
1148 /*
1149 * We either searching data but nothing was found, or
1150 * searching hole but found a data buffer. In either
1151 * case, probably the next page contains the desired
1152 * things, update the last offset to it so.
1153 */
1154 lastoff = page_offset(page) + PAGE_SIZE;
1155 unlock_page(page);
1156 }
1157
1158 /*
1159 * The number of returned pages less than our desired, search
1160 * done. In this case, nothing was found for searching data,
1161 * but we found a hole behind the last offset.
1162 */
1163 if (nr_pages < want) {
1164 if (type == HOLE_OFF) {
1165 *offset = lastoff;
1166 found = true;
1167 }
1168 break;
1169 }
1170
1171 index = pvec.pages[i - 1]->index + 1;
1172 pagevec_release(&pvec);
1173 } while (index <= end);
1174
1175out:
1176 pagevec_release(&pvec);
1177 return found;
1178}
1179
962STATIC loff_t 1180STATIC loff_t
963xfs_seek_data( 1181xfs_seek_data(
964 struct file *file, 1182 struct file *file,
965 loff_t start, 1183 loff_t start)
966 u32 type)
967{ 1184{
968 struct inode *inode = file->f_mapping->host; 1185 struct inode *inode = file->f_mapping->host;
969 struct xfs_inode *ip = XFS_I(inode); 1186 struct xfs_inode *ip = XFS_I(inode);
970 struct xfs_mount *mp = ip->i_mount; 1187 struct xfs_mount *mp = ip->i_mount;
971 struct xfs_bmbt_irec map[2];
972 int nmap = 2;
973 loff_t uninitialized_var(offset); 1188 loff_t uninitialized_var(offset);
974 xfs_fsize_t isize; 1189 xfs_fsize_t isize;
975 xfs_fileoff_t fsbno; 1190 xfs_fileoff_t fsbno;
@@ -985,36 +1200,74 @@ xfs_seek_data(
985 goto out_unlock; 1200 goto out_unlock;
986 } 1201 }
987 1202
988 fsbno = XFS_B_TO_FSBT(mp, start);
989
990 /* 1203 /*
991 * Try to read extents from the first block indicated 1204 * Try to read extents from the first block indicated
992 * by fsbno to the end block of the file. 1205 * by fsbno to the end block of the file.
993 */ 1206 */
1207 fsbno = XFS_B_TO_FSBT(mp, start);
994 end = XFS_B_TO_FSB(mp, isize); 1208 end = XFS_B_TO_FSB(mp, isize);
1209 for (;;) {
1210 struct xfs_bmbt_irec map[2];
1211 int nmap = 2;
1212 unsigned int i;
995 1213
996 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, 1214 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
997 XFS_BMAPI_ENTIRE); 1215 XFS_BMAPI_ENTIRE);
998 if (error) 1216 if (error)
999 goto out_unlock; 1217 goto out_unlock;
1000 1218
1001 /* 1219 /* No extents at given offset, must be beyond EOF */
1002 * Treat unwritten extent as data extent since it might 1220 if (nmap == 0) {
1003 * contains dirty data in page cache. 1221 error = ENXIO;
1004 */ 1222 goto out_unlock;
1005 if (map[0].br_startblock != HOLESTARTBLOCK) { 1223 }
1006 offset = max_t(loff_t, start, 1224
1007 XFS_FSB_TO_B(mp, map[0].br_startoff)); 1225 for (i = 0; i < nmap; i++) {
1008 } else { 1226 offset = max_t(loff_t, start,
1227 XFS_FSB_TO_B(mp, map[i].br_startoff));
1228
1229 /* Landed in a data extent */
1230 if (map[i].br_startblock == DELAYSTARTBLOCK ||
1231 (map[i].br_state == XFS_EXT_NORM &&
1232 !isnullstartblock(map[i].br_startblock)))
1233 goto out;
1234
1235 /*
1236 * Landed in an unwritten extent, try to search data
1237 * from page cache.
1238 */
1239 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1240 if (xfs_find_get_desired_pgoff(inode, &map[i],
1241 DATA_OFF, &offset))
1242 goto out;
1243 }
1244 }
1245
1246 /*
1247 * map[0] is hole or its an unwritten extent but
1248 * without data in page cache. Probably means that
1249 * we are reading after EOF if nothing in map[1].
1250 */
1009 if (nmap == 1) { 1251 if (nmap == 1) {
1010 error = ENXIO; 1252 error = ENXIO;
1011 goto out_unlock; 1253 goto out_unlock;
1012 } 1254 }
1013 1255
1014 offset = max_t(loff_t, start, 1256 ASSERT(i > 1);
1015 XFS_FSB_TO_B(mp, map[1].br_startoff)); 1257
1258 /*
1259 * Nothing was found, proceed to the next round of search
1260 * if reading offset not beyond or hit EOF.
1261 */
1262 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1263 start = XFS_FSB_TO_B(mp, fsbno);
1264 if (start >= isize) {
1265 error = ENXIO;
1266 goto out_unlock;
1267 }
1016 } 1268 }
1017 1269
1270out:
1018 if (offset != file->f_pos) 1271 if (offset != file->f_pos)
1019 file->f_pos = offset; 1272 file->f_pos = offset;
1020 1273
@@ -1029,16 +1282,15 @@ out_unlock:
1029STATIC loff_t 1282STATIC loff_t
1030xfs_seek_hole( 1283xfs_seek_hole(
1031 struct file *file, 1284 struct file *file,
1032 loff_t start, 1285 loff_t start)
1033 u32 type)
1034{ 1286{
1035 struct inode *inode = file->f_mapping->host; 1287 struct inode *inode = file->f_mapping->host;
1036 struct xfs_inode *ip = XFS_I(inode); 1288 struct xfs_inode *ip = XFS_I(inode);
1037 struct xfs_mount *mp = ip->i_mount; 1289 struct xfs_mount *mp = ip->i_mount;
1038 loff_t uninitialized_var(offset); 1290 loff_t uninitialized_var(offset);
1039 loff_t holeoff;
1040 xfs_fsize_t isize; 1291 xfs_fsize_t isize;
1041 xfs_fileoff_t fsbno; 1292 xfs_fileoff_t fsbno;
1293 xfs_filblks_t end;
1042 uint lock; 1294 uint lock;
1043 int error; 1295 int error;
1044 1296
@@ -1054,21 +1306,77 @@ xfs_seek_hole(
1054 } 1306 }
1055 1307
1056 fsbno = XFS_B_TO_FSBT(mp, start); 1308 fsbno = XFS_B_TO_FSBT(mp, start);
1057 error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); 1309 end = XFS_B_TO_FSB(mp, isize);
1058 if (error) 1310
1059 goto out_unlock; 1311 for (;;) {
1312 struct xfs_bmbt_irec map[2];
1313 int nmap = 2;
1314 unsigned int i;
1315
1316 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
1317 XFS_BMAPI_ENTIRE);
1318 if (error)
1319 goto out_unlock;
1320
1321 /* No extents at given offset, must be beyond EOF */
1322 if (nmap == 0) {
1323 error = ENXIO;
1324 goto out_unlock;
1325 }
1326
1327 for (i = 0; i < nmap; i++) {
1328 offset = max_t(loff_t, start,
1329 XFS_FSB_TO_B(mp, map[i].br_startoff));
1330
1331 /* Landed in a hole */
1332 if (map[i].br_startblock == HOLESTARTBLOCK)
1333 goto out;
1334
1335 /*
1336 * Landed in an unwritten extent, try to search hole
1337 * from page cache.
1338 */
1339 if (map[i].br_state == XFS_EXT_UNWRITTEN) {
1340 if (xfs_find_get_desired_pgoff(inode, &map[i],
1341 HOLE_OFF, &offset))
1342 goto out;
1343 }
1344 }
1345
1346 /*
1347 * map[0] contains data or its unwritten but contains
1348 * data in page cache, probably means that we are
1349 * reading after EOF. We should fix offset to point
1350 * to the end of the file(i.e., there is an implicit
1351 * hole at the end of any file).
1352 */
1353 if (nmap == 1) {
1354 offset = isize;
1355 break;
1356 }
1357
1358 ASSERT(i > 1);
1060 1359
1061 holeoff = XFS_FSB_TO_B(mp, fsbno);
1062 if (holeoff <= start)
1063 offset = start;
1064 else {
1065 /* 1360 /*
1066 * xfs_bmap_first_unused() could return a value bigger than 1361 * Both mappings contains data, proceed to the next round of
1067 * isize if there are no more holes past the supplied offset. 1362 * search if the current reading offset not beyond or hit EOF.
1068 */ 1363 */
1069 offset = min_t(loff_t, holeoff, isize); 1364 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1365 start = XFS_FSB_TO_B(mp, fsbno);
1366 if (start >= isize) {
1367 offset = isize;
1368 break;
1369 }
1070 } 1370 }
1071 1371
1372out:
1373 /*
1374 * At this point, we must have found a hole. However, the returned
1375 * offset may be bigger than the file size as it may be aligned to
1376 * page boundary for unwritten extents, we need to deal with this
1377 * situation in particular.
1378 */
1379 offset = min_t(loff_t, offset, isize);
1072 if (offset != file->f_pos) 1380 if (offset != file->f_pos)
1073 file->f_pos = offset; 1381 file->f_pos = offset;
1074 1382
@@ -1092,9 +1400,9 @@ xfs_file_llseek(
1092 case SEEK_SET: 1400 case SEEK_SET:
1093 return generic_file_llseek(file, offset, origin); 1401 return generic_file_llseek(file, offset, origin);
1094 case SEEK_DATA: 1402 case SEEK_DATA:
1095 return xfs_seek_data(file, offset, origin); 1403 return xfs_seek_data(file, offset);
1096 case SEEK_HOLE: 1404 case SEEK_HOLE:
1097 return xfs_seek_hole(file, offset, origin); 1405 return xfs_seek_hole(file, offset);
1098 default: 1406 default:
1099 return -EINVAL; 1407 return -EINVAL;
1100 } 1408 }
@@ -1134,4 +1442,5 @@ const struct file_operations xfs_dir_file_operations = {
1134static const struct vm_operations_struct xfs_file_vm_ops = { 1442static const struct vm_operations_struct xfs_file_vm_ops = {
1135 .fault = filemap_fault, 1443 .fault = filemap_fault,
1136 .page_mkwrite = xfs_vm_page_mkwrite, 1444 .page_mkwrite = xfs_vm_page_mkwrite,
1445 .remap_pages = generic_file_remap_pages,
1137}; 1446};
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 21e37b55f7e5..445bf1aef31c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -431,7 +431,7 @@ xfs_ialloc_next_ag(
431 431
432 spin_lock(&mp->m_agirotor_lock); 432 spin_lock(&mp->m_agirotor_lock);
433 agno = mp->m_agirotor; 433 agno = mp->m_agirotor;
434 if (++mp->m_agirotor == mp->m_maxagi) 434 if (++mp->m_agirotor >= mp->m_maxagi)
435 mp->m_agirotor = 0; 435 mp->m_agirotor = 0;
436 spin_unlock(&mp->m_agirotor_lock); 436 spin_unlock(&mp->m_agirotor_lock);
437 437
@@ -962,23 +962,22 @@ xfs_dialloc(
962 if (!pag->pagi_freecount && !okalloc) 962 if (!pag->pagi_freecount && !okalloc)
963 goto nextag; 963 goto nextag;
964 964
965 /*
966 * Then read in the AGI buffer and recheck with the AGI buffer
967 * lock held.
968 */
965 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 969 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
966 if (error) 970 if (error)
967 goto out_error; 971 goto out_error;
968 972
969 /*
970 * Once the AGI has been read in we have to recheck
971 * pagi_freecount with the AGI buffer lock held.
972 */
973 if (pag->pagi_freecount) { 973 if (pag->pagi_freecount) {
974 xfs_perag_put(pag); 974 xfs_perag_put(pag);
975 goto out_alloc; 975 goto out_alloc;
976 } 976 }
977 977
978 if (!okalloc) { 978 if (!okalloc)
979 xfs_trans_brelse(tp, agbp); 979 goto nextag_relse_buffer;
980 goto nextag; 980
981 }
982 981
983 error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); 982 error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
984 if (error) { 983 if (error) {
@@ -1007,6 +1006,8 @@ xfs_dialloc(
1007 return 0; 1006 return 0;
1008 } 1007 }
1009 1008
1009nextag_relse_buffer:
1010 xfs_trans_brelse(tp, agbp);
1010nextag: 1011nextag:
1011 xfs_perag_put(pag); 1012 xfs_perag_put(pag);
1012 if (++agno == mp->m_sb.sb_agcount) 1013 if (++agno == mp->m_sb.sb_agcount)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c3b6d9..8305f2ac6773 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,16 +70,16 @@ xfs_find_handle(
70 int hsize; 70 int hsize;
71 xfs_handle_t handle; 71 xfs_handle_t handle;
72 struct inode *inode; 72 struct inode *inode;
73 struct file *file = NULL; 73 struct fd f;
74 struct path path; 74 struct path path;
75 int error; 75 int error;
76 struct xfs_inode *ip; 76 struct xfs_inode *ip;
77 77
78 if (cmd == XFS_IOC_FD_TO_HANDLE) { 78 if (cmd == XFS_IOC_FD_TO_HANDLE) {
79 file = fget(hreq->fd); 79 f = fdget(hreq->fd);
80 if (!file) 80 if (!f.file)
81 return -EBADF; 81 return -EBADF;
82 inode = file->f_path.dentry->d_inode; 82 inode = f.file->f_path.dentry->d_inode;
83 } else { 83 } else {
84 error = user_lpath((const char __user *)hreq->path, &path); 84 error = user_lpath((const char __user *)hreq->path, &path);
85 if (error) 85 if (error)
@@ -134,7 +134,7 @@ xfs_find_handle(
134 134
135 out_put: 135 out_put:
136 if (cmd == XFS_IOC_FD_TO_HANDLE) 136 if (cmd == XFS_IOC_FD_TO_HANDLE)
137 fput(file); 137 fdput(f);
138 else 138 else
139 path_put(&path); 139 path_put(&path);
140 return error; 140 return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 29c2f83d4147..b2bd3a0e6376 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -440,7 +440,7 @@ xfs_initialize_perag(
440 xfs_agnumber_t agcount, 440 xfs_agnumber_t agcount,
441 xfs_agnumber_t *maxagi) 441 xfs_agnumber_t *maxagi)
442{ 442{
443 xfs_agnumber_t index, max_metadata; 443 xfs_agnumber_t index;
444 xfs_agnumber_t first_initialised = 0; 444 xfs_agnumber_t first_initialised = 0;
445 xfs_perag_t *pag; 445 xfs_perag_t *pag;
446 xfs_agino_t agino; 446 xfs_agino_t agino;
@@ -500,43 +500,10 @@ xfs_initialize_perag(
500 else 500 else
501 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 501 mp->m_flags &= ~XFS_MOUNT_32BITINODES;
502 502
503 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 503 if (mp->m_flags & XFS_MOUNT_32BITINODES)
504 /* 504 index = xfs_set_inode32(mp);
505 * Calculate how much should be reserved for inodes to meet 505 else
506 * the max inode percentage. 506 index = xfs_set_inode64(mp);
507 */
508 if (mp->m_maxicount) {
509 __uint64_t icount;
510
511 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
512 do_div(icount, 100);
513 icount += sbp->sb_agblocks - 1;
514 do_div(icount, sbp->sb_agblocks);
515 max_metadata = icount;
516 } else {
517 max_metadata = agcount;
518 }
519
520 for (index = 0; index < agcount; index++) {
521 ino = XFS_AGINO_TO_INO(mp, index, agino);
522 if (ino > XFS_MAXINUMBER_32) {
523 index++;
524 break;
525 }
526
527 pag = xfs_perag_get(mp, index);
528 pag->pagi_inodeok = 1;
529 if (index < max_metadata)
530 pag->pagf_metadata = 1;
531 xfs_perag_put(pag);
532 }
533 } else {
534 for (index = 0; index < agcount; index++) {
535 pag = xfs_perag_get(mp, index);
536 pag->pagi_inodeok = 1;
537 xfs_perag_put(pag);
538 }
539 }
540 507
541 if (maxagi) 508 if (maxagi)
542 *maxagi = index; 509 *maxagi = index;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 05a05a7b6119..deee09e534dc 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -54,12 +54,7 @@ typedef struct xfs_trans_reservations {
54#include "xfs_sync.h" 54#include "xfs_sync.h"
55 55
56struct xlog; 56struct xlog;
57struct xfs_mount_args;
58struct xfs_inode; 57struct xfs_inode;
59struct xfs_bmbt_irec;
60struct xfs_bmap_free;
61struct xfs_extdelta;
62struct xfs_swapext;
63struct xfs_mru_cache; 58struct xfs_mru_cache;
64struct xfs_nameops; 59struct xfs_nameops;
65struct xfs_ail; 60struct xfs_ail;
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index fed504fc2999..71926d630527 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -97,8 +97,7 @@ xfs_fs_set_xstate(
97STATIC int 97STATIC int
98xfs_fs_get_dqblk( 98xfs_fs_get_dqblk(
99 struct super_block *sb, 99 struct super_block *sb,
100 int type, 100 struct kqid qid,
101 qid_t id,
102 struct fs_disk_quota *fdq) 101 struct fs_disk_quota *fdq)
103{ 102{
104 struct xfs_mount *mp = XFS_M(sb); 103 struct xfs_mount *mp = XFS_M(sb);
@@ -108,14 +107,14 @@ xfs_fs_get_dqblk(
108 if (!XFS_IS_QUOTA_ON(mp)) 107 if (!XFS_IS_QUOTA_ON(mp))
109 return -ESRCH; 108 return -ESRCH;
110 109
111 return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); 110 return -xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
111 xfs_quota_type(qid.type), fdq);
112} 112}
113 113
114STATIC int 114STATIC int
115xfs_fs_set_dqblk( 115xfs_fs_set_dqblk(
116 struct super_block *sb, 116 struct super_block *sb,
117 int type, 117 struct kqid qid,
118 qid_t id,
119 struct fs_disk_quota *fdq) 118 struct fs_disk_quota *fdq)
120{ 119{
121 struct xfs_mount *mp = XFS_M(sb); 120 struct xfs_mount *mp = XFS_M(sb);
@@ -127,7 +126,8 @@ xfs_fs_set_dqblk(
127 if (!XFS_IS_QUOTA_ON(mp)) 126 if (!XFS_IS_QUOTA_ON(mp))
128 return -ESRCH; 127 return -ESRCH;
129 128
130 return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); 129 return -xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid),
130 xfs_quota_type(qid.type), fdq);
131} 131}
132 132
133const struct quotactl_ops xfs_quotactl_operations = { 133const struct quotactl_ops xfs_quotactl_operations = {
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 92d4331cd4f1..ca28a4ba4b54 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -857,7 +857,7 @@ xfs_rtbuf_get(
857 xfs_buf_t *bp; /* block buffer, result */ 857 xfs_buf_t *bp; /* block buffer, result */
858 xfs_inode_t *ip; /* bitmap or summary inode */ 858 xfs_inode_t *ip; /* bitmap or summary inode */
859 xfs_bmbt_irec_t map; 859 xfs_bmbt_irec_t map;
860 int nmap; 860 int nmap = 1;
861 int error; /* error value */ 861 int error; /* error value */
862 862
863 ip = issum ? mp->m_rsumip : mp->m_rbmip; 863 ip = issum ? mp->m_rsumip : mp->m_rbmip;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a2..26a09bd7f975 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -88,6 +88,8 @@ mempool_t *xfs_ioend_pool;
88 * unwritten extent conversion */ 88 * unwritten extent conversion */
89#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ 89#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
90#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ 90#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
91#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to
92 * XFS_MAXINUMBER_32 */
91#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ 93#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
92#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ 94#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
93#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ 95#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
@@ -120,12 +122,18 @@ mempool_t *xfs_ioend_pool;
120 * in the future, too. 122 * in the future, too.
121 */ 123 */
122enum { 124enum {
123 Opt_barrier, Opt_nobarrier, Opt_err 125 Opt_barrier,
126 Opt_nobarrier,
127 Opt_inode64,
128 Opt_inode32,
129 Opt_err
124}; 130};
125 131
126static const match_table_t tokens = { 132static const match_table_t tokens = {
127 {Opt_barrier, "barrier"}, 133 {Opt_barrier, "barrier"},
128 {Opt_nobarrier, "nobarrier"}, 134 {Opt_nobarrier, "nobarrier"},
135 {Opt_inode64, "inode64"},
136 {Opt_inode32, "inode32"},
129 {Opt_err, NULL} 137 {Opt_err, NULL}
130}; 138};
131 139
@@ -197,7 +205,9 @@ xfs_parseargs(
197 */ 205 */
198 mp->m_flags |= XFS_MOUNT_BARRIER; 206 mp->m_flags |= XFS_MOUNT_BARRIER;
199 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 207 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
208#if !XFS_BIG_INUMS
200 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 209 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
210#endif
201 211
202 /* 212 /*
203 * These can be overridden by the mount option parsing. 213 * These can be overridden by the mount option parsing.
@@ -294,6 +304,8 @@ xfs_parseargs(
294 return EINVAL; 304 return EINVAL;
295 } 305 }
296 dswidth = simple_strtoul(value, &eov, 10); 306 dswidth = simple_strtoul(value, &eov, 10);
307 } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
308 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
297 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 309 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 310 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299#if !XFS_BIG_INUMS 311#if !XFS_BIG_INUMS
@@ -492,6 +504,7 @@ xfs_showargs(
492 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 504 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
493 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 505 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
494 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, 506 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
507 { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
495 { 0, NULL } 508 { 0, NULL }
496 }; 509 };
497 static struct proc_xfs_info xfs_info_unset[] = { 510 static struct proc_xfs_info xfs_info_unset[] = {
@@ -591,6 +604,80 @@ xfs_max_file_offset(
591 return (((__uint64_t)pagefactor) << bitshift) - 1; 604 return (((__uint64_t)pagefactor) << bitshift) - 1;
592} 605}
593 606
607xfs_agnumber_t
608xfs_set_inode32(struct xfs_mount *mp)
609{
610 xfs_agnumber_t index = 0;
611 xfs_agnumber_t maxagi = 0;
612 xfs_sb_t *sbp = &mp->m_sb;
613 xfs_agnumber_t max_metadata;
614 xfs_agino_t agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks -1, 0);
615 xfs_ino_t ino = XFS_AGINO_TO_INO(mp, sbp->sb_agcount -1, agino);
616 xfs_perag_t *pag;
617
618 /* Calculate how much should be reserved for inodes to meet
619 * the max inode percentage.
620 */
621 if (mp->m_maxicount) {
622 __uint64_t icount;
623
624 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
625 do_div(icount, 100);
626 icount += sbp->sb_agblocks - 1;
627 do_div(icount, sbp->sb_agblocks);
628 max_metadata = icount;
629 } else {
630 max_metadata = sbp->sb_agcount;
631 }
632
633 for (index = 0; index < sbp->sb_agcount; index++) {
634 ino = XFS_AGINO_TO_INO(mp, index, agino);
635
636 if (ino > XFS_MAXINUMBER_32) {
637 pag = xfs_perag_get(mp, index);
638 pag->pagi_inodeok = 0;
639 pag->pagf_metadata = 0;
640 xfs_perag_put(pag);
641 continue;
642 }
643
644 pag = xfs_perag_get(mp, index);
645 pag->pagi_inodeok = 1;
646 maxagi++;
647 if (index < max_metadata)
648 pag->pagf_metadata = 1;
649 xfs_perag_put(pag);
650 }
651 mp->m_flags |= (XFS_MOUNT_32BITINODES |
652 XFS_MOUNT_SMALL_INUMS);
653
654 return maxagi;
655}
656
657xfs_agnumber_t
658xfs_set_inode64(struct xfs_mount *mp)
659{
660 xfs_agnumber_t index = 0;
661
662 for (index = 0; index < mp->m_sb.sb_agcount; index++) {
663 struct xfs_perag *pag;
664
665 pag = xfs_perag_get(mp, index);
666 pag->pagi_inodeok = 1;
667 pag->pagf_metadata = 0;
668 xfs_perag_put(pag);
669 }
670
671 /* There is no need for lock protection on m_flags,
672 * the rw_semaphore of the VFS superblock is locked
673 * during mount/umount/remount operations, so this is
674 * enough to avoid concurency on the m_flags field
675 */
676 mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
677 XFS_MOUNT_SMALL_INUMS);
678 return index;
679}
680
594STATIC int 681STATIC int
595xfs_blkdev_get( 682xfs_blkdev_get(
596 xfs_mount_t *mp, 683 xfs_mount_t *mp,
@@ -919,6 +1006,7 @@ xfs_fs_put_super(
919 struct xfs_mount *mp = XFS_M(sb); 1006 struct xfs_mount *mp = XFS_M(sb);
920 1007
921 xfs_filestream_unmount(mp); 1008 xfs_filestream_unmount(mp);
1009 cancel_delayed_work_sync(&mp->m_sync_work);
922 xfs_unmountfs(mp); 1010 xfs_unmountfs(mp);
923 xfs_syncd_stop(mp); 1011 xfs_syncd_stop(mp);
924 xfs_freesb(mp); 1012 xfs_freesb(mp);
@@ -953,7 +1041,7 @@ xfs_fs_sync_fs(
953 * We schedule xfssyncd now (now that the disk is 1041 * We schedule xfssyncd now (now that the disk is
954 * active) instead of later (when it might not be). 1042 * active) instead of later (when it might not be).
955 */ 1043 */
956 flush_delayed_work_sync(&mp->m_sync_work); 1044 flush_delayed_work(&mp->m_sync_work);
957 } 1045 }
958 1046
959 return 0; 1047 return 0;
@@ -1055,6 +1143,12 @@ xfs_fs_remount(
1055 case Opt_nobarrier: 1143 case Opt_nobarrier:
1056 mp->m_flags &= ~XFS_MOUNT_BARRIER; 1144 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1057 break; 1145 break;
1146 case Opt_inode64:
1147 mp->m_maxagi = xfs_set_inode64(mp);
1148 break;
1149 case Opt_inode32:
1150 mp->m_maxagi = xfs_set_inode32(mp);
1151 break;
1058 default: 1152 default:
1059 /* 1153 /*
1060 * Logically we would return an error here to prevent 1154 * Logically we would return an error here to prevent
@@ -1505,6 +1599,11 @@ xfs_init_zones(void)
1505STATIC void 1599STATIC void
1506xfs_destroy_zones(void) 1600xfs_destroy_zones(void)
1507{ 1601{
1602 /*
1603 * Make sure all delayed rcu free are flushed before we
1604 * destroy caches.
1605 */
1606 rcu_barrier();
1508 kmem_zone_destroy(xfs_ili_zone); 1607 kmem_zone_destroy(xfs_ili_zone);
1509 kmem_zone_destroy(xfs_inode_zone); 1608 kmem_zone_destroy(xfs_inode_zone);
1510 kmem_zone_destroy(xfs_efi_zone); 1609 kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 09b0c26b2245..9de4a920ba05 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -75,6 +75,8 @@ struct block_device;
75extern __uint64_t xfs_max_file_offset(unsigned int); 75extern __uint64_t xfs_max_file_offset(unsigned int);
76 76
77extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 77extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
78extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *);
79extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *);
78 80
79extern const struct export_operations xfs_export_operations; 81extern const struct export_operations xfs_export_operations;
80extern const struct xattr_handler *xfs_xattr_handlers[]; 82extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 96548176db80..9500caf15acf 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -475,7 +475,7 @@ xfs_flush_inodes(
475 struct xfs_mount *mp = ip->i_mount; 475 struct xfs_mount *mp = ip->i_mount;
476 476
477 queue_work(xfs_syncd_wq, &mp->m_flush_work); 477 queue_work(xfs_syncd_wq, &mp->m_flush_work);
478 flush_work_sync(&mp->m_flush_work); 478 flush_work(&mp->m_flush_work);
479} 479}
480 480
481STATIC void 481STATIC void
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e5795dd6013a..7d36ccf57f93 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -37,6 +37,7 @@ struct xlog_recover;
37struct xlog_recover_item; 37struct xlog_recover_item;
38struct xfs_buf_log_format; 38struct xfs_buf_log_format;
39struct xfs_inode_log_format; 39struct xfs_inode_log_format;
40struct xfs_bmbt_irec;
40 41
41DECLARE_EVENT_CLASS(xfs_attr_list_class, 42DECLARE_EVENT_CLASS(xfs_attr_list_class,
42 TP_PROTO(struct xfs_attr_list_context *ctx), 43 TP_PROTO(struct xfs_attr_list_context *ctx),
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index bcb60542fcf1..0c7fa54f309e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -578,9 +578,11 @@ xfs_quota_warn(
578 /* no warnings for project quotas - we just return ENOSPC later */ 578 /* no warnings for project quotas - we just return ENOSPC later */
579 if (dqp->dq_flags & XFS_DQ_PROJ) 579 if (dqp->dq_flags & XFS_DQ_PROJ)
580 return; 580 return;
581 quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, 581 quota_send_warning(make_kqid(&init_user_ns,
582 be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, 582 (dqp->dq_flags & XFS_DQ_USER) ?
583 type); 583 USRQUOTA : GRPQUOTA,
584 be32_to_cpu(dqp->q_core.d_id)),
585 mp->m_super->s_dev, type);
584} 586}
585 587
586/* 588/*