aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c87
-rw-r--r--fs/9p/vfs_dir.c15
-rw-r--r--fs/9p/vfs_file.c326
-rw-r--r--fs/9p/xattr.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/dir_fplus.c1
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/super.c20
-rw-r--r--fs/affs/affs.h28
-rw-r--r--fs/affs/amigaffs.c3
-rw-r--r--fs/affs/file.c15
-rw-r--r--fs/affs/inode.c32
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/affs/super.c43
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/misc.c16
-rw-r--r--fs/afs/rxrpc.c5
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c258
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/befs.h22
-rw-r--r--fs/befs/datastream.c4
-rw-r--r--fs/befs/io.c2
-rw-r--r--fs/befs/linuxvfs.c16
-rw-r--r--fs/befs/super.c4
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/binfmt_misc.c30
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/file.c30
-rw-r--r--fs/btrfs/inode.c24
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c3
-rw-r--r--fs/ceph/file.c27
-rw-r--r--fs/cifs/cifsfs.c12
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/file.c93
-rw-r--r--fs/coda/file.c38
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dax.c44
-rw-r--r--fs/dcache.c49
-rw-r--r--fs/debugfs/inode.c5
-rw-r--r--fs/direct-io.c44
-rw-r--r--fs/ecryptfs/file.c9
-rw-r--r--fs/exec.c88
-rw-r--r--fs/exofs/file.c2
-rw-r--r--fs/exofs/inode.c4
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/inode.c18
-rw-r--r--fs/ext2/namei.c10
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/inode.c16
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext3/xattr.c3
-rw-r--r--fs/ext4/Kconfig17
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/acl.c5
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/crypto.c558
-rw-r--r--fs/ext4/crypto_fname.c709
-rw-r--r--fs/ext4/crypto_key.c165
-rw-r--r--fs/ext4/crypto_policy.c194
-rw-r--r--fs/ext4/dir.c81
-rw-r--r--fs/ext4/ext4.h174
-rw-r--r--fs/ext4/ext4_crypto.h147
-rw-r--r--fs/ext4/extents.c81
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c77
-rw-r--r--fs/ext4/fsync.c1
-rw-r--r--fs/ext4/hash.c1
-rw-r--r--fs/ext4/ialloc.c28
-rw-r--r--fs/ext4/indirect.c27
-rw-r--r--fs/ext4/inline.c16
-rw-r--r--fs/ext4/inode.c164
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/namei.c647
-rw-r--r--fs/ext4/page-io.c48
-rw-r--r--fs/ext4/readpage.c328
-rw-r--r--fs/ext4/super.c58
-rw-r--r--fs/ext4/symlink.c97
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h3
-rw-r--r--fs/f2fs/Kconfig2
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c38
-rw-r--r--fs/f2fs/data.c766
-rw-r--r--fs/f2fs/debug.c22
-rw-r--r--fs/f2fs/dir.c93
-rw-r--r--fs/f2fs/f2fs.h174
-rw-r--r--fs/f2fs/file.c66
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/f2fs/inline.c69
-rw-r--r--fs/f2fs/inode.c25
-rw-r--r--fs/f2fs/namei.c81
-rw-r--r--fs/f2fs/node.c18
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c76
-rw-r--r--fs/f2fs/segment.c17
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c40
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/fat.h5
-rw-r--r--fs/fat/fatent.c3
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c23
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/file.c3
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fs_pin.c4
-rw-r--r--fs/fuse/cuse.c27
-rw-r--r--fs/fuse/dev.c64
-rw-r--r--fs/fuse/file.c151
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c24
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c108
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c90
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c12
-rw-r--r--fs/hfsplus/bfind.c4
-rw-r--r--fs/hfsplus/catalog.c3
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c19
-rw-r--r--fs/hfsplus/ioctl.c12
-rw-r--r--fs/hfsplus/xattr.c86
-rw-r--r--fs/hfsplus/xattr.h22
-rw-r--r--fs/hfsplus/xattr_security.c38
-rw-r--r--fs/hfsplus/xattr_trusted.c37
-rw-r--r--fs/hfsplus/xattr_user.c35
-rw-r--r--fs/hostfs/hostfs.h6
-rw-r--r--fs/hostfs/hostfs_kern.c114
-rw-r--r--fs/hostfs/hostfs_user.c29
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hugetlbfs/inode.c183
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/inode.c10
-rw-r--r--fs/jfs/jfs_metapage.c31
-rw-r--r--fs/jfs/jfs_metapage.h1
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/locks.c94
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/namei.c176
-rw-r--r--fs/namespace.c142
-rw-r--r--fs/ncpfs/file.c88
-rw-r--r--fs/ncpfs/ncplib_kernel.c6
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/direct.c40
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/nfs4state.c18
-rw-r--r--fs/nilfs2/alloc.c5
-rw-r--r--fs/nilfs2/bmap.c48
-rw-r--r--fs/nilfs2/bmap.h13
-rw-r--r--fs/nilfs2/btree.c63
-rw-r--r--fs/nilfs2/cpfile.c58
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/inode.c37
-rw-r--r--fs/nilfs2/mdt.c54
-rw-r--r--fs/nilfs2/mdt.h10
-rw-r--r--fs/nilfs2/page.c24
-rw-r--r--fs/nilfs2/segment.c17
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c778
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c178
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/file.c132
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c37
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/open.c9
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/pnode.c60
-rw-r--r--fs/pnode.h7
-rw-r--r--fs/proc/array.c26
-rw-r--r--fs/proc/base.c82
-rw-r--r--fs/proc/fd.c27
-rw-r--r--fs/pstore/inode.c3
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/quota/dquot.c151
-rw-r--r--fs/quota/quota.c217
-rw-r--r--fs/quota/quota_tree.c7
-rw-r--r--fs/quota/quota_v2.c12
-rw-r--r--fs/quota/quotaio_v2.h6
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/read_write.c213
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/reiserfs.h1
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/romfs/mmap-nommu.c1
-rw-r--r--fs/splice.c31
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c12
-rw-r--r--fs/ubifs/compress.c22
-rw-r--r--fs/ubifs/debug.c186
-rw-r--r--fs/ubifs/dir.c23
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/io.c40
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c17
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/lprops.c62
-rw-r--r--fs/ubifs/lpt.c59
-rw-r--r--fs/ubifs/lpt_commit.c34
-rw-r--r--fs/ubifs/master.c6
-rw-r--r--fs/ubifs/orphan.c26
-rw-r--r--fs/ubifs/recovery.c44
-rw-r--r--fs/ubifs/replay.c34
-rw-r--r--fs/ubifs/sb.c30
-rw-r--r--fs/ubifs/scan.c24
-rw-r--r--fs/ubifs/super.c107
-rw-r--r--fs/ubifs/tnc.c20
-rw-r--r--fs/ubifs/tnc_commit.c12
-rw-r--r--fs/ubifs/tnc_misc.c24
-rw-r--r--fs/ubifs/ubifs.h40
-rw-r--r--fs/ubifs/xattr.c18
-rw-r--r--fs/udf/balloc.c20
-rw-r--r--fs/udf/dir.c1
-rw-r--r--fs/udf/directory.c1
-rw-r--r--fs/udf/file.c30
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/udf/misc.c1
-rw-r--r--fs/udf/namei.c10
-rw-r--r--fs/udf/partition.c1
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/udf/symlink.c1
-rw-r--r--fs/udf/truncate.c1
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/xfs/xfs_aops.c13
-rw-r--r--fs/xfs/xfs_file.c42
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c176
-rw-r--r--fs/xfs/xfs_quotaops.c117
287 files changed, 8938 insertions, 4309 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 099c7712631c..fb9ffcb43277 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -78,7 +78,6 @@ enum p9_cache_modes {
78 * @cache: cache mode of type &p9_cache_modes 78 * @cache: cache mode of type &p9_cache_modes
79 * @cachetag: the tag of the cache associated with this session 79 * @cachetag: the tag of the cache associated with this session
80 * @fscache: session cookie associated with FS-Cache 80 * @fscache: session cookie associated with FS-Cache
81 * @options: copy of options string given by user
82 * @uname: string user name to mount hierarchy as 81 * @uname: string user name to mount hierarchy as
83 * @aname: mount specifier for remote hierarchy 82 * @aname: mount specifier for remote hierarchy
84 * @maxdata: maximum data to be sent/recvd per protocol message 83 * @maxdata: maximum data to be sent/recvd per protocol message
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b83ebfbf3fdc..5a0db6dec8d1 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -68,14 +68,10 @@ int v9fs_file_open(struct inode *inode, struct file *file);
68void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); 68void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
69int v9fs_uflags2omode(int uflags, int extended); 69int v9fs_uflags2omode(int uflags, int extended);
70 70
71ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
72ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
73void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
74int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
75int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, 73int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
76 int datasync); 74 int datasync);
77ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
78 const char __user *, size_t, loff_t *, int);
79int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); 75int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
80int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode); 76int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
81static inline void v9fs_invalidate_inode_attr(struct inode *inode) 77static inline void v9fs_invalidate_inode_attr(struct inode *inode)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e055ea83..e9e04376c52c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/aio.h> 36#include <linux/uio.h>
37#include <net/9p/9p.h> 37#include <net/9p/9p.h>
38#include <net/9p/client.h> 38#include <net/9p/client.h>
39 39
@@ -51,12 +51,11 @@
51 */ 51 */
52static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) 52static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
53{ 53{
54 int retval; 54 struct inode *inode = page->mapping->host;
55 loff_t offset; 55 struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
56 char *buffer; 56 struct iov_iter to;
57 struct inode *inode; 57 int retval, err;
58 58
59 inode = page->mapping->host;
60 p9_debug(P9_DEBUG_VFS, "\n"); 59 p9_debug(P9_DEBUG_VFS, "\n");
61 60
62 BUG_ON(!PageLocked(page)); 61 BUG_ON(!PageLocked(page));
@@ -65,16 +64,16 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
65 if (retval == 0) 64 if (retval == 0)
66 return retval; 65 return retval;
67 66
68 buffer = kmap(page); 67 iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
69 offset = page_offset(page);
70 68
71 retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset); 69 retval = p9_client_read(fid, page_offset(page), &to, &err);
72 if (retval < 0) { 70 if (err) {
73 v9fs_uncache_page(inode, page); 71 v9fs_uncache_page(inode, page);
72 retval = err;
74 goto done; 73 goto done;
75 } 74 }
76 75
77 memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); 76 zero_user(page, retval, PAGE_SIZE - retval);
78 flush_dcache_page(page); 77 flush_dcache_page(page);
79 SetPageUptodate(page); 78 SetPageUptodate(page);
80 79
@@ -82,7 +81,6 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
82 retval = 0; 81 retval = 0;
83 82
84done: 83done:
85 kunmap(page);
86 unlock_page(page); 84 unlock_page(page);
87 return retval; 85 return retval;
88} 86}
@@ -161,41 +159,32 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset,
161 159
162static int v9fs_vfs_writepage_locked(struct page *page) 160static int v9fs_vfs_writepage_locked(struct page *page)
163{ 161{
164 char *buffer;
165 int retval, len;
166 loff_t offset, size;
167 mm_segment_t old_fs;
168 struct v9fs_inode *v9inode;
169 struct inode *inode = page->mapping->host; 162 struct inode *inode = page->mapping->host;
163 struct v9fs_inode *v9inode = V9FS_I(inode);
164 loff_t size = i_size_read(inode);
165 struct iov_iter from;
166 struct bio_vec bvec;
167 int err, len;
170 168
171 v9inode = V9FS_I(inode);
172 size = i_size_read(inode);
173 if (page->index == size >> PAGE_CACHE_SHIFT) 169 if (page->index == size >> PAGE_CACHE_SHIFT)
174 len = size & ~PAGE_CACHE_MASK; 170 len = size & ~PAGE_CACHE_MASK;
175 else 171 else
176 len = PAGE_CACHE_SIZE; 172 len = PAGE_CACHE_SIZE;
177 173
178 set_page_writeback(page); 174 bvec.bv_page = page;
179 175 bvec.bv_offset = 0;
180 buffer = kmap(page); 176 bvec.bv_len = len;
181 offset = page_offset(page); 177 iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
182 178
183 old_fs = get_fs();
184 set_fs(get_ds());
185 /* We should have writeback_fid always set */ 179 /* We should have writeback_fid always set */
186 BUG_ON(!v9inode->writeback_fid); 180 BUG_ON(!v9inode->writeback_fid);
187 181
188 retval = v9fs_file_write_internal(inode, 182 set_page_writeback(page);
189 v9inode->writeback_fid, 183
190 (__force const char __user *)buffer, 184 p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err);
191 len, &offset, 0);
192 if (retval > 0)
193 retval = 0;
194 185
195 set_fs(old_fs);
196 kunmap(page);
197 end_page_writeback(page); 186 end_page_writeback(page);
198 return retval; 187 return err;
199} 188}
200 189
201static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) 190static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -241,11 +230,8 @@ static int v9fs_launder_page(struct page *page)
241 230
242/** 231/**
243 * v9fs_direct_IO - 9P address space operation for direct I/O 232 * v9fs_direct_IO - 9P address space operation for direct I/O
244 * @rw: direction (read or write)
245 * @iocb: target I/O control block 233 * @iocb: target I/O control block
246 * @iov: array of vectors that define I/O buffer
247 * @pos: offset in file to begin the operation 234 * @pos: offset in file to begin the operation
248 * @nr_segs: size of iovec array
249 * 235 *
250 * The presence of v9fs_direct_IO() in the address space ops vector 236 * The presence of v9fs_direct_IO() in the address space ops vector
251 * allowes open() O_DIRECT flags which would have failed otherwise. 237 * allowes open() O_DIRECT flags which would have failed otherwise.
@@ -259,18 +245,23 @@ static int v9fs_launder_page(struct page *page)
259 * 245 *
260 */ 246 */
261static ssize_t 247static ssize_t
262v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) 248v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
263{ 249{
264 /* 250 struct file *file = iocb->ki_filp;
265 * FIXME 251 ssize_t n;
266 * Now that we do caching with cache mode enabled, We need 252 int err = 0;
267 * to support direct IO 253 if (iov_iter_rw(iter) == WRITE) {
268 */ 254 n = p9_client_write(file->private_data, pos, iter, &err);
269 p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n", 255 if (n) {
270 iocb->ki_filp, 256 struct inode *inode = file_inode(file);
271 (long long)pos, iter->nr_segs); 257 loff_t i_size = i_size_read(inode);
272 258 if (pos + n > i_size)
273 return -EINVAL; 259 inode_add_bytes(inode, pos + n - i_size);
260 }
261 } else {
262 n = p9_client_read(file->private_data, pos, iter, &err);
263 }
264 return n ? n : err;
274} 265}
275 266
276static int v9fs_write_begin(struct file *filp, struct address_space *mapping, 267static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 4f1151088ebe..76c3b1ab6361 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -33,6 +33,7 @@
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/uio.h>
36#include <net/9p/9p.h> 37#include <net/9p/9p.h>
37#include <net/9p/client.h> 38#include <net/9p/client.h>
38 39
@@ -115,6 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
115 int buflen; 116 int buflen;
116 int reclen = 0; 117 int reclen = 0;
117 struct p9_rdir *rdir; 118 struct p9_rdir *rdir;
119 struct kvec kvec;
118 120
119 p9_debug(P9_DEBUG_VFS, "name %pD\n", file); 121 p9_debug(P9_DEBUG_VFS, "name %pD\n", file);
120 fid = file->private_data; 122 fid = file->private_data;
@@ -124,16 +126,21 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
124 rdir = v9fs_alloc_rdir_buf(file, buflen); 126 rdir = v9fs_alloc_rdir_buf(file, buflen);
125 if (!rdir) 127 if (!rdir)
126 return -ENOMEM; 128 return -ENOMEM;
129 kvec.iov_base = rdir->buf;
130 kvec.iov_len = buflen;
127 131
128 while (1) { 132 while (1) {
129 if (rdir->tail == rdir->head) { 133 if (rdir->tail == rdir->head) {
130 err = v9fs_file_readn(file, rdir->buf, NULL, 134 struct iov_iter to;
131 buflen, ctx->pos); 135 int n;
132 if (err <= 0) 136 iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
137 n = p9_client_read(file->private_data, ctx->pos, &to,
138 &err);
139 if (err)
133 return err; 140 return err;
134 141
135 rdir->head = 0; 142 rdir->head = 0;
136 rdir->tail = err; 143 rdir->tail = n;
137 } 144 }
138 while (rdir->head < rdir->tail) { 145 while (rdir->head < rdir->tail) {
139 p9stat_init(&st); 146 p9stat_init(&st);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b40133796b87..1ef16bd8280b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -36,6 +36,8 @@
36#include <linux/utsname.h> 36#include <linux/utsname.h>
37#include <asm/uaccess.h> 37#include <asm/uaccess.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/uio.h>
40#include <linux/slab.h>
39#include <net/9p/9p.h> 41#include <net/9p/9p.h>
40#include <net/9p/client.h> 42#include <net/9p/client.h>
41 43
@@ -149,7 +151,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
149{ 151{
150 struct p9_flock flock; 152 struct p9_flock flock;
151 struct p9_fid *fid; 153 struct p9_fid *fid;
152 uint8_t status; 154 uint8_t status = P9_LOCK_ERROR;
153 int res = 0; 155 int res = 0;
154 unsigned char fl_type; 156 unsigned char fl_type;
155 157
@@ -194,7 +196,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
194 for (;;) { 196 for (;;) {
195 res = p9_client_lock_dotl(fid, &flock, &status); 197 res = p9_client_lock_dotl(fid, &flock, &status);
196 if (res < 0) 198 if (res < 0)
197 break; 199 goto out_unlock;
198 200
199 if (status != P9_LOCK_BLOCKED) 201 if (status != P9_LOCK_BLOCKED)
200 break; 202 break;
@@ -212,14 +214,16 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
212 case P9_LOCK_BLOCKED: 214 case P9_LOCK_BLOCKED:
213 res = -EAGAIN; 215 res = -EAGAIN;
214 break; 216 break;
217 default:
218 WARN_ONCE(1, "unknown lock status code: %d\n", status);
219 /* fallthough */
215 case P9_LOCK_ERROR: 220 case P9_LOCK_ERROR:
216 case P9_LOCK_GRACE: 221 case P9_LOCK_GRACE:
217 res = -ENOLCK; 222 res = -ENOLCK;
218 break; 223 break;
219 default:
220 BUG();
221 } 224 }
222 225
226out_unlock:
223 /* 227 /*
224 * incase server returned error for lock request, revert 228 * incase server returned error for lock request, revert
225 * it locally 229 * it locally
@@ -285,6 +289,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
285 fl->fl_end = glock.start + glock.length - 1; 289 fl->fl_end = glock.start + glock.length - 1;
286 fl->fl_pid = glock.proc_id; 290 fl->fl_pid = glock.proc_id;
287 } 291 }
292 kfree(glock.client_id);
288 return res; 293 return res;
289} 294}
290 295
@@ -364,63 +369,6 @@ out_err:
364} 369}
365 370
366/** 371/**
367 * v9fs_fid_readn - read from a fid
368 * @fid: fid to read
369 * @data: data buffer to read data into
370 * @udata: user data buffer to read data into
371 * @count: size of buffer
372 * @offset: offset at which to read data
373 *
374 */
375ssize_t
376v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
377 u64 offset)
378{
379 int n, total, size;
380
381 p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n",
382 fid->fid, (long long unsigned)offset, count);
383 n = 0;
384 total = 0;
385 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
386 do {
387 n = p9_client_read(fid, data, udata, offset, count);
388 if (n <= 0)
389 break;
390
391 if (data)
392 data += n;
393 if (udata)
394 udata += n;
395
396 offset += n;
397 count -= n;
398 total += n;
399 } while (count > 0 && n == size);
400
401 if (n < 0)
402 total = n;
403
404 return total;
405}
406
407/**
408 * v9fs_file_readn - read from a file
409 * @filp: file pointer to read
410 * @data: data buffer to read data into
411 * @udata: user data buffer to read data into
412 * @count: size of buffer
413 * @offset: offset at which to read data
414 *
415 */
416ssize_t
417v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
418 u64 offset)
419{
420 return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
421}
422
423/**
424 * v9fs_file_read - read from a file 372 * v9fs_file_read - read from a file
425 * @filp: file pointer to read 373 * @filp: file pointer to read
426 * @udata: user data buffer to read data into 374 * @udata: user data buffer to read data into
@@ -430,69 +378,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
430 */ 378 */
431 379
432static ssize_t 380static ssize_t
433v9fs_file_read(struct file *filp, char __user *udata, size_t count, 381v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
434 loff_t * offset)
435{ 382{
436 int ret; 383 struct p9_fid *fid = iocb->ki_filp->private_data;
437 struct p9_fid *fid; 384 int ret, err;
438 size_t size;
439
440 p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
441 fid = filp->private_data;
442 385
443 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 386 p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
444 if (count > size) 387 iov_iter_count(to), iocb->ki_pos);
445 ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
446 else
447 ret = p9_client_read(fid, NULL, udata, *offset, count);
448 388
449 if (ret > 0) 389 ret = p9_client_read(fid, iocb->ki_pos, to, &err);
450 *offset += ret; 390 if (!ret)
391 return err;
451 392
393 iocb->ki_pos += ret;
452 return ret; 394 return ret;
453} 395}
454 396
455ssize_t
456v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
457 const char __user *data, size_t count,
458 loff_t *offset, int invalidate)
459{
460 int n;
461 loff_t i_size;
462 size_t total = 0;
463 loff_t origin = *offset;
464 unsigned long pg_start, pg_end;
465
466 p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
467 data, (int)count, (int)*offset);
468
469 do {
470 n = p9_client_write(fid, NULL, data+total, origin+total, count);
471 if (n <= 0)
472 break;
473 count -= n;
474 total += n;
475 } while (count > 0);
476
477 if (invalidate && (total > 0)) {
478 pg_start = origin >> PAGE_CACHE_SHIFT;
479 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
480 if (inode->i_mapping && inode->i_mapping->nrpages)
481 invalidate_inode_pages2_range(inode->i_mapping,
482 pg_start, pg_end);
483 *offset += total;
484 i_size = i_size_read(inode);
485 if (*offset > i_size) {
486 inode_add_bytes(inode, *offset - i_size);
487 i_size_write(inode, *offset);
488 }
489 }
490 if (n < 0)
491 return n;
492
493 return total;
494}
495
496/** 397/**
497 * v9fs_file_write - write to a file 398 * v9fs_file_write - write to a file
498 * @filp: file pointer to write 399 * @filp: file pointer to write
@@ -502,35 +403,39 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
502 * 403 *
503 */ 404 */
504static ssize_t 405static ssize_t
505v9fs_file_write(struct file *filp, const char __user * data, 406v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
506 size_t count, loff_t *offset)
507{ 407{
508 ssize_t retval = 0; 408 struct file *file = iocb->ki_filp;
509 loff_t origin = *offset; 409 ssize_t retval;
510 410 loff_t origin;
511 411 int err = 0;
512 retval = generic_write_checks(filp, &origin, &count, 0);
513 if (retval)
514 goto out;
515 412
516 retval = -EINVAL; 413 retval = generic_write_checks(iocb, from);
517 if ((ssize_t) count < 0) 414 if (retval <= 0)
518 goto out; 415 return retval;
519 retval = 0;
520 if (!count)
521 goto out;
522 416
523 retval = v9fs_file_write_internal(file_inode(filp), 417 origin = iocb->ki_pos;
524 filp->private_data, 418 retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
525 data, count, &origin, 1); 419 if (retval > 0) {
526 /* update offset on successful write */ 420 struct inode *inode = file_inode(file);
527 if (retval > 0) 421 loff_t i_size;
528 *offset = origin; 422 unsigned long pg_start, pg_end;
529out: 423 pg_start = origin >> PAGE_CACHE_SHIFT;
530 return retval; 424 pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
425 if (inode->i_mapping && inode->i_mapping->nrpages)
426 invalidate_inode_pages2_range(inode->i_mapping,
427 pg_start, pg_end);
428 iocb->ki_pos += retval;
429 i_size = i_size_read(inode);
430 if (iocb->ki_pos > i_size) {
431 inode_add_bytes(inode, iocb->ki_pos - i_size);
432 i_size_write(inode, iocb->ki_pos);
433 }
434 return retval;
435 }
436 return err;
531} 437}
532 438
533
534static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, 439static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
535 int datasync) 440 int datasync)
536{ 441{
@@ -657,44 +562,6 @@ out_unlock:
657 return VM_FAULT_NOPAGE; 562 return VM_FAULT_NOPAGE;
658} 563}
659 564
660static ssize_t
661v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
662 loff_t *offsetp)
663{
664 loff_t size, offset;
665 struct inode *inode;
666 struct address_space *mapping;
667
668 offset = *offsetp;
669 mapping = filp->f_mapping;
670 inode = mapping->host;
671 if (!count)
672 return 0;
673 size = i_size_read(inode);
674 if (offset < size)
675 filemap_write_and_wait_range(mapping, offset,
676 offset + count - 1);
677
678 return v9fs_file_read(filp, udata, count, offsetp);
679}
680
681/**
682 * v9fs_cached_file_read - read from a file
683 * @filp: file pointer to read
684 * @data: user data buffer to read data into
685 * @count: size of buffer
686 * @offset: offset at which to read data
687 *
688 */
689static ssize_t
690v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
691 loff_t *offset)
692{
693 if (filp->f_flags & O_DIRECT)
694 return v9fs_direct_read(filp, data, count, offset);
695 return new_sync_read(filp, data, count, offset);
696}
697
698/** 565/**
699 * v9fs_mmap_file_read - read from a file 566 * v9fs_mmap_file_read - read from a file
700 * @filp: file pointer to read 567 * @filp: file pointer to read
@@ -704,84 +571,12 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
704 * 571 *
705 */ 572 */
706static ssize_t 573static ssize_t
707v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count, 574v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
708 loff_t *offset)
709{ 575{
710 /* TODO: Check if there are dirty pages */ 576 /* TODO: Check if there are dirty pages */
711 return v9fs_file_read(filp, data, count, offset); 577 return v9fs_file_read_iter(iocb, to);
712}
713
714static ssize_t
715v9fs_direct_write(struct file *filp, const char __user * data,
716 size_t count, loff_t *offsetp)
717{
718 loff_t offset;
719 ssize_t retval;
720 struct inode *inode;
721 struct address_space *mapping;
722
723 offset = *offsetp;
724 mapping = filp->f_mapping;
725 inode = mapping->host;
726 if (!count)
727 return 0;
728
729 mutex_lock(&inode->i_mutex);
730 retval = filemap_write_and_wait_range(mapping, offset,
731 offset + count - 1);
732 if (retval)
733 goto err_out;
734 /*
735 * After a write we want buffered reads to be sure to go to disk to get
736 * the new data. We invalidate clean cached page from the region we're
737 * about to write. We do this *before* the write so that if we fail
738 * here we fall back to buffered write
739 */
740 if (mapping->nrpages) {
741 pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
742 pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
743
744 retval = invalidate_inode_pages2_range(mapping,
745 pg_start, pg_end);
746 /*
747 * If a page can not be invalidated, fall back
748 * to buffered write.
749 */
750 if (retval) {
751 if (retval == -EBUSY)
752 goto buff_write;
753 goto err_out;
754 }
755 }
756 retval = v9fs_file_write(filp, data, count, offsetp);
757err_out:
758 mutex_unlock(&inode->i_mutex);
759 return retval;
760
761buff_write:
762 mutex_unlock(&inode->i_mutex);
763 return new_sync_write(filp, data, count, offsetp);
764}
765
766/**
767 * v9fs_cached_file_write - write to a file
768 * @filp: file pointer to write
769 * @data: data buffer to write data from
770 * @count: size of buffer
771 * @offset: offset at which to write data
772 *
773 */
774static ssize_t
775v9fs_cached_file_write(struct file *filp, const char __user * data,
776 size_t count, loff_t *offset)
777{
778
779 if (filp->f_flags & O_DIRECT)
780 return v9fs_direct_write(filp, data, count, offset);
781 return new_sync_write(filp, data, count, offset);
782} 578}
783 579
784
785/** 580/**
786 * v9fs_mmap_file_write - write to a file 581 * v9fs_mmap_file_write - write to a file
787 * @filp: file pointer to write 582 * @filp: file pointer to write
@@ -791,14 +586,13 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
791 * 586 *
792 */ 587 */
793static ssize_t 588static ssize_t
794v9fs_mmap_file_write(struct file *filp, const char __user *data, 589v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
795 size_t count, loff_t *offset)
796{ 590{
797 /* 591 /*
798 * TODO: invalidate mmaps on filp's inode between 592 * TODO: invalidate mmaps on filp's inode between
799 * offset and offset+count 593 * offset and offset+count
800 */ 594 */
801 return v9fs_file_write(filp, data, count, offset); 595 return v9fs_file_write_iter(iocb, from);
802} 596}
803 597
804static void v9fs_mmap_vm_close(struct vm_area_struct *vma) 598static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
@@ -843,8 +637,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
843 637
844const struct file_operations v9fs_cached_file_operations = { 638const struct file_operations v9fs_cached_file_operations = {
845 .llseek = generic_file_llseek, 639 .llseek = generic_file_llseek,
846 .read = v9fs_cached_file_read,
847 .write = v9fs_cached_file_write,
848 .read_iter = generic_file_read_iter, 640 .read_iter = generic_file_read_iter,
849 .write_iter = generic_file_write_iter, 641 .write_iter = generic_file_write_iter,
850 .open = v9fs_file_open, 642 .open = v9fs_file_open,
@@ -856,8 +648,6 @@ const struct file_operations v9fs_cached_file_operations = {
856 648
857const struct file_operations v9fs_cached_file_operations_dotl = { 649const struct file_operations v9fs_cached_file_operations_dotl = {
858 .llseek = generic_file_llseek, 650 .llseek = generic_file_llseek,
859 .read = v9fs_cached_file_read,
860 .write = v9fs_cached_file_write,
861 .read_iter = generic_file_read_iter, 651 .read_iter = generic_file_read_iter,
862 .write_iter = generic_file_write_iter, 652 .write_iter = generic_file_write_iter,
863 .open = v9fs_file_open, 653 .open = v9fs_file_open,
@@ -870,8 +660,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
870 660
871const struct file_operations v9fs_file_operations = { 661const struct file_operations v9fs_file_operations = {
872 .llseek = generic_file_llseek, 662 .llseek = generic_file_llseek,
873 .read = v9fs_file_read, 663 .read_iter = v9fs_file_read_iter,
874 .write = v9fs_file_write, 664 .write_iter = v9fs_file_write_iter,
875 .open = v9fs_file_open, 665 .open = v9fs_file_open,
876 .release = v9fs_dir_release, 666 .release = v9fs_dir_release,
877 .lock = v9fs_file_lock, 667 .lock = v9fs_file_lock,
@@ -881,8 +671,8 @@ const struct file_operations v9fs_file_operations = {
881 671
882const struct file_operations v9fs_file_operations_dotl = { 672const struct file_operations v9fs_file_operations_dotl = {
883 .llseek = generic_file_llseek, 673 .llseek = generic_file_llseek,
884 .read = v9fs_file_read, 674 .read_iter = v9fs_file_read_iter,
885 .write = v9fs_file_write, 675 .write_iter = v9fs_file_write_iter,
886 .open = v9fs_file_open, 676 .open = v9fs_file_open,
887 .release = v9fs_dir_release, 677 .release = v9fs_dir_release,
888 .lock = v9fs_file_lock_dotl, 678 .lock = v9fs_file_lock_dotl,
@@ -893,8 +683,8 @@ const struct file_operations v9fs_file_operations_dotl = {
893 683
894const struct file_operations v9fs_mmap_file_operations = { 684const struct file_operations v9fs_mmap_file_operations = {
895 .llseek = generic_file_llseek, 685 .llseek = generic_file_llseek,
896 .read = v9fs_mmap_file_read, 686 .read_iter = v9fs_mmap_file_read_iter,
897 .write = v9fs_mmap_file_write, 687 .write_iter = v9fs_mmap_file_write_iter,
898 .open = v9fs_file_open, 688 .open = v9fs_file_open,
899 .release = v9fs_dir_release, 689 .release = v9fs_dir_release,
900 .lock = v9fs_file_lock, 690 .lock = v9fs_file_lock,
@@ -904,8 +694,8 @@ const struct file_operations v9fs_mmap_file_operations = {
904 694
905const struct file_operations v9fs_mmap_file_operations_dotl = { 695const struct file_operations v9fs_mmap_file_operations_dotl = {
906 .llseek = generic_file_llseek, 696 .llseek = generic_file_llseek,
907 .read = v9fs_mmap_file_read, 697 .read_iter = v9fs_mmap_file_read_iter,
908 .write = v9fs_mmap_file_write, 698 .write_iter = v9fs_mmap_file_write_iter,
909 .open = v9fs_file_open, 699 .open = v9fs_file_open,
910 .release = v9fs_dir_release, 700 .release = v9fs_dir_release,
911 .lock = v9fs_file_lock_dotl, 701 .lock = v9fs_file_lock_dotl,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f95e01e058e4..0cf44b6cccd6 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/uio.h>
18#include <net/9p/9p.h> 19#include <net/9p/9p.h>
19#include <net/9p/client.h> 20#include <net/9p/client.h>
20 21
@@ -25,50 +26,34 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
25 void *buffer, size_t buffer_size) 26 void *buffer, size_t buffer_size)
26{ 27{
27 ssize_t retval; 28 ssize_t retval;
28 int msize, read_count; 29 u64 attr_size;
29 u64 offset = 0, attr_size;
30 struct p9_fid *attr_fid; 30 struct p9_fid *attr_fid;
31 struct kvec kvec = {.iov_base = buffer, .iov_len = buffer_size};
32 struct iov_iter to;
33 int err;
34
35 iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
31 36
32 attr_fid = p9_client_xattrwalk(fid, name, &attr_size); 37 attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
33 if (IS_ERR(attr_fid)) { 38 if (IS_ERR(attr_fid)) {
34 retval = PTR_ERR(attr_fid); 39 retval = PTR_ERR(attr_fid);
35 p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n", 40 p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n",
36 retval); 41 retval);
37 attr_fid = NULL; 42 return retval;
38 goto error;
39 }
40 if (!buffer_size) {
41 /* request to get the attr_size */
42 retval = attr_size;
43 goto error;
44 } 43 }
45 if (attr_size > buffer_size) { 44 if (attr_size > buffer_size) {
46 retval = -ERANGE; 45 if (!buffer_size) /* request to get the attr_size */
47 goto error; 46 retval = attr_size;
48 }
49 msize = attr_fid->clnt->msize;
50 while (attr_size) {
51 if (attr_size > (msize - P9_IOHDRSZ))
52 read_count = msize - P9_IOHDRSZ;
53 else 47 else
54 read_count = attr_size; 48 retval = -ERANGE;
55 read_count = p9_client_read(attr_fid, ((char *)buffer)+offset, 49 } else {
56 NULL, offset, read_count); 50 iov_iter_truncate(&to, attr_size);
57 if (read_count < 0) { 51 retval = p9_client_read(attr_fid, 0, &to, &err);
58 /* error in xattr read */ 52 if (err)
59 retval = read_count; 53 retval = err;
60 goto error;
61 }
62 offset += read_count;
63 attr_size -= read_count;
64 } 54 }
65 /* Total read xattr bytes */ 55 p9_client_clunk(attr_fid);
66 retval = offset;
67error:
68 if (attr_fid)
69 p9_client_clunk(attr_fid);
70 return retval; 56 return retval;
71
72} 57}
73 58
74 59
@@ -120,8 +105,11 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
120int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name, 105int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
121 const void *value, size_t value_len, int flags) 106 const void *value, size_t value_len, int flags)
122{ 107{
123 u64 offset = 0; 108 struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len};
124 int retval, msize, write_count; 109 struct iov_iter from;
110 int retval;
111
112 iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
125 113
126 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n", 114 p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
127 name, value_len, flags); 115 name, value_len, flags);
@@ -135,29 +123,11 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
135 * On success fid points to xattr 123 * On success fid points to xattr
136 */ 124 */
137 retval = p9_client_xattrcreate(fid, name, value_len, flags); 125 retval = p9_client_xattrcreate(fid, name, value_len, flags);
138 if (retval < 0) { 126 if (retval < 0)
139 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n", 127 p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
140 retval); 128 retval);
141 goto err; 129 else
142 } 130 p9_client_write(fid, 0, &from, &retval);
143 msize = fid->clnt->msize;
144 while (value_len) {
145 if (value_len > (msize - P9_IOHDRSZ))
146 write_count = msize - P9_IOHDRSZ;
147 else
148 write_count = value_len;
149 write_count = p9_client_write(fid, ((char *)value)+offset,
150 NULL, offset, write_count);
151 if (write_count < 0) {
152 /* error in xattr write */
153 retval = write_count;
154 goto err;
155 }
156 offset += write_count;
157 value_len -= write_count;
158 }
159 retval = 0;
160err:
161 p9_client_clunk(fid); 131 p9_client_clunk(fid);
162 return retval; 132 return retval;
163} 133}
diff --git a/fs/Kconfig b/fs/Kconfig
index ec35851e5b71..011f43365d7b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig"
32source "fs/ocfs2/Kconfig" 32source "fs/ocfs2/Kconfig"
33source "fs/btrfs/Kconfig" 33source "fs/btrfs/Kconfig"
34source "fs/nilfs2/Kconfig" 34source "fs/nilfs2/Kconfig"
35source "fs/f2fs/Kconfig"
35 36
36config FS_DAX 37config FS_DAX
37 bool "Direct Access (DAX) support" 38 bool "Direct Access (DAX) support"
@@ -217,7 +218,6 @@ source "fs/pstore/Kconfig"
217source "fs/sysv/Kconfig" 218source "fs/sysv/Kconfig"
218source "fs/ufs/Kconfig" 219source "fs/ufs/Kconfig"
219source "fs/exofs/Kconfig" 220source "fs/exofs/Kconfig"
220source "fs/f2fs/Kconfig"
221 221
222endif # MISC_FILESYSTEMS 222endif # MISC_FILESYSTEMS
223 223
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
27 bool 27 bool
28 depends on COMPAT && BINFMT_ELF 28 depends on COMPAT && BINFMT_ELF
29 29
30config ARCH_BINFMT_ELF_RANDOMIZE_PIE
31 bool
32
33config ARCH_BINFMT_ELF_STATE 30config ARCH_BINFMT_ELF_STATE
34 bool 31 bool
35 32
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
118obj-$(CONFIG_HPPFS) += hppfs/ 118obj-$(CONFIG_HPPFS) += hppfs/
119obj-$(CONFIG_CACHEFILES) += cachefiles/ 119obj-$(CONFIG_CACHEFILES) += cachefiles/
120obj-$(CONFIG_DEBUG_FS) += debugfs/ 120obj-$(CONFIG_DEBUG_FS) += debugfs/
121obj-$(CONFIG_TRACING) += tracefs/
121obj-$(CONFIG_OCFS2_FS) += ocfs2/ 122obj-$(CONFIG_OCFS2_FS) += ocfs2/
122obj-$(CONFIG_BTRFS_FS) += btrfs/ 123obj-$(CONFIG_BTRFS_FS) += btrfs/
123obj-$(CONFIG_GFS2_FS) += gfs2/ 124obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index f2ba88ab4aed..82d14cdf70f9 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -61,6 +61,7 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
61 kcalloc(size, sizeof(struct buffer_head *), 61 kcalloc(size, sizeof(struct buffer_head *),
62 GFP_KERNEL); 62 GFP_KERNEL);
63 if (!bh_fplus) { 63 if (!bh_fplus) {
64 ret = -ENOMEM;
64 adfs_error(sb, "not enough memory for" 65 adfs_error(sb, "not enough memory for"
65 " dir object %X (%d blocks)", id, size); 66 " dir object %X (%d blocks)", id, size);
66 goto out; 67 goto out;
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 07c9edce5aa7..46c0d5671cd5 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -23,11 +23,9 @@
23 23
24const struct file_operations adfs_file_operations = { 24const struct file_operations adfs_file_operations = {
25 .llseek = generic_file_llseek, 25 .llseek = generic_file_llseek,
26 .read = new_sync_read,
27 .read_iter = generic_file_read_iter, 26 .read_iter = generic_file_read_iter,
28 .mmap = generic_file_mmap, 27 .mmap = generic_file_mmap,
29 .fsync = generic_file_fsync, 28 .fsync = generic_file_fsync,
30 .write = new_sync_write,
31 .write_iter = generic_file_write_iter, 29 .write_iter = generic_file_write_iter,
32 .splice_read = generic_file_splice_read, 30 .splice_read = generic_file_splice_read,
33}; 31};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9852bdf34d76..a19c31d3f369 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -316,7 +316,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
316 dm = kmalloc(nzones * sizeof(*dm), GFP_KERNEL); 316 dm = kmalloc(nzones * sizeof(*dm), GFP_KERNEL);
317 if (dm == NULL) { 317 if (dm == NULL) {
318 adfs_error(sb, "not enough memory"); 318 adfs_error(sb, "not enough memory");
319 return NULL; 319 return ERR_PTR(-ENOMEM);
320 } 320 }
321 321
322 for (zone = 0; zone < nzones; zone++, map_addr++) { 322 for (zone = 0; zone < nzones; zone++, map_addr++) {
@@ -349,7 +349,7 @@ error_free:
349 brelse(dm[zone].dm_bh); 349 brelse(dm[zone].dm_bh);
350 350
351 kfree(dm); 351 kfree(dm);
352 return NULL; 352 return ERR_PTR(-EIO);
353} 353}
354 354
355static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_bits) 355static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_bits)
@@ -370,6 +370,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
370 unsigned char *b_data; 370 unsigned char *b_data;
371 struct adfs_sb_info *asb; 371 struct adfs_sb_info *asb;
372 struct inode *root; 372 struct inode *root;
373 int ret = -EINVAL;
373 374
374 sb->s_flags |= MS_NODIRATIME; 375 sb->s_flags |= MS_NODIRATIME;
375 376
@@ -391,6 +392,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
391 sb_set_blocksize(sb, BLOCK_SIZE); 392 sb_set_blocksize(sb, BLOCK_SIZE);
392 if (!(bh = sb_bread(sb, ADFS_DISCRECORD / BLOCK_SIZE))) { 393 if (!(bh = sb_bread(sb, ADFS_DISCRECORD / BLOCK_SIZE))) {
393 adfs_error(sb, "unable to read superblock"); 394 adfs_error(sb, "unable to read superblock");
395 ret = -EIO;
394 goto error; 396 goto error;
395 } 397 }
396 398
@@ -400,6 +402,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
400 if (!silent) 402 if (!silent)
401 printk("VFS: Can't find an adfs filesystem on dev " 403 printk("VFS: Can't find an adfs filesystem on dev "
402 "%s.\n", sb->s_id); 404 "%s.\n", sb->s_id);
405 ret = -EINVAL;
403 goto error_free_bh; 406 goto error_free_bh;
404 } 407 }
405 408
@@ -412,6 +415,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
412 if (!silent) 415 if (!silent)
413 printk("VPS: Can't find an adfs filesystem on dev " 416 printk("VPS: Can't find an adfs filesystem on dev "
414 "%s.\n", sb->s_id); 417 "%s.\n", sb->s_id);
418 ret = -EINVAL;
415 goto error_free_bh; 419 goto error_free_bh;
416 } 420 }
417 421
@@ -421,11 +425,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
421 if (!bh) { 425 if (!bh) {
422 adfs_error(sb, "couldn't read superblock on " 426 adfs_error(sb, "couldn't read superblock on "
423 "2nd try."); 427 "2nd try.");
428 ret = -EIO;
424 goto error; 429 goto error;
425 } 430 }
426 b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize); 431 b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize);
427 if (adfs_checkbblk(b_data)) { 432 if (adfs_checkbblk(b_data)) {
428 adfs_error(sb, "disc record mismatch, very weird!"); 433 adfs_error(sb, "disc record mismatch, very weird!");
434 ret = -EINVAL;
429 goto error_free_bh; 435 goto error_free_bh;
430 } 436 }
431 dr = (struct adfs_discrecord *)(b_data + ADFS_DR_OFFSET); 437 dr = (struct adfs_discrecord *)(b_data + ADFS_DR_OFFSET);
@@ -433,6 +439,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
433 if (!silent) 439 if (!silent)
434 printk(KERN_ERR "VFS: Unsupported blocksize on dev " 440 printk(KERN_ERR "VFS: Unsupported blocksize on dev "
435 "%s.\n", sb->s_id); 441 "%s.\n", sb->s_id);
442 ret = -EINVAL;
436 goto error; 443 goto error;
437 } 444 }
438 445
@@ -447,10 +454,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
447 asb->s_size = adfs_discsize(dr, sb->s_blocksize_bits); 454 asb->s_size = adfs_discsize(dr, sb->s_blocksize_bits);
448 asb->s_version = dr->format_version; 455 asb->s_version = dr->format_version;
449 asb->s_log2sharesize = dr->log2sharesize; 456 asb->s_log2sharesize = dr->log2sharesize;
450 457
451 asb->s_map = adfs_read_map(sb, dr); 458 asb->s_map = adfs_read_map(sb, dr);
452 if (!asb->s_map) 459 if (IS_ERR(asb->s_map)) {
460 ret = PTR_ERR(asb->s_map);
453 goto error_free_bh; 461 goto error_free_bh;
462 }
454 463
455 brelse(bh); 464 brelse(bh);
456 465
@@ -499,6 +508,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
499 brelse(asb->s_map[i].dm_bh); 508 brelse(asb->s_map[i].dm_bh);
500 kfree(asb->s_map); 509 kfree(asb->s_map);
501 adfs_error(sb, "get root inode failed\n"); 510 adfs_error(sb, "get root inode failed\n");
511 ret = -EIO;
502 goto error; 512 goto error;
503 } 513 }
504 return 0; 514 return 0;
@@ -508,7 +518,7 @@ error_free_bh:
508error: 518error:
509 sb->s_fs_info = NULL; 519 sb->s_fs_info = NULL;
510 kfree(asb); 520 kfree(asb);
511 return -EINVAL; 521 return ret;
512} 522}
513 523
514static struct dentry *adfs_mount(struct file_system_type *fs_type, 524static struct dentry *adfs_mount(struct file_system_type *fs_type,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c8764bd7497d..cffe8370fb44 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,18 +106,22 @@ struct affs_sb_info {
106 spinlock_t work_lock; /* protects sb_work and work_queued */ 106 spinlock_t work_lock; /* protects sb_work and work_queued */
107}; 107};
108 108
109#define SF_INTL 0x0001 /* International filesystem. */ 109#define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
110#define SF_BM_VALID 0x0002 /* Bitmap is valid. */ 110#define AFFS_MOUNT_SF_BM_VALID 0x0002 /* Bitmap is valid. */
111#define SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */ 111#define AFFS_MOUNT_SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */
112#define SF_QUIET 0x0008 /* chmod errors will be not reported */ 112#define AFFS_MOUNT_SF_QUIET 0x0008 /* chmod errors will be not reported */
113#define SF_SETUID 0x0010 /* Ignore Amiga uid */ 113#define AFFS_MOUNT_SF_SETUID 0x0010 /* Ignore Amiga uid */
114#define SF_SETGID 0x0020 /* Ignore Amiga gid */ 114#define AFFS_MOUNT_SF_SETGID 0x0020 /* Ignore Amiga gid */
115#define SF_SETMODE 0x0040 /* Ignore Amiga protection bits */ 115#define AFFS_MOUNT_SF_SETMODE 0x0040 /* Ignore Amiga protection bits */
116#define SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */ 116#define AFFS_MOUNT_SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */
117#define SF_OFS 0x0200 /* Old filesystem */ 117#define AFFS_MOUNT_SF_OFS 0x0200 /* Old filesystem */
118#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */ 118#define AFFS_MOUNT_SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
119#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */ 119#define AFFS_MOUNT_SF_VERBOSE 0x0800 /* Talk about fs when mounting */
120#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */ 120#define AFFS_MOUNT_SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
121
122#define affs_clear_opt(o, opt) (o &= ~AFFS_MOUNT_##opt)
123#define affs_set_opt(o, opt) (o |= AFFS_MOUNT_##opt)
124#define affs_test_opt(o, opt) ((o) & AFFS_MOUNT_##opt)
121 125
122/* short cut to get to the affs specific sb data */ 126/* short cut to get to the affs specific sb data */
123static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) 127static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 388da1ea815d..5022ac96aa40 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -472,7 +472,8 @@ bool
472affs_nofilenametruncate(const struct dentry *dentry) 472affs_nofilenametruncate(const struct dentry *dentry)
473{ 473{
474 struct inode *inode = dentry->d_inode; 474 struct inode *inode = dentry->d_inode;
475 return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE; 475
476 return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
476 477
477} 478}
478 479
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a91795e01a7f..659c579c4588 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
12 * affs regular file handling primitives 12 * affs regular file handling primitives
13 */ 13 */
14 14
15#include <linux/aio.h> 15#include <linux/uio.h>
16#include "affs.h" 16#include "affs.h"
17 17
18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext); 18static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
389} 389}
390 390
391static ssize_t 391static ssize_t
392affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 392affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
393 loff_t offset)
394{ 393{
395 struct file *file = iocb->ki_filp; 394 struct file *file = iocb->ki_filp;
396 struct address_space *mapping = file->f_mapping; 395 struct address_space *mapping = file->f_mapping;
@@ -398,15 +397,15 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
398 size_t count = iov_iter_count(iter); 397 size_t count = iov_iter_count(iter);
399 ssize_t ret; 398 ssize_t ret;
400 399
401 if (rw == WRITE) { 400 if (iov_iter_rw(iter) == WRITE) {
402 loff_t size = offset + count; 401 loff_t size = offset + count;
403 402
404 if (AFFS_I(inode)->mmu_private < size) 403 if (AFFS_I(inode)->mmu_private < size)
405 return 0; 404 return 0;
406 } 405 }
407 406
408 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block); 407 ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block);
409 if (ret < 0 && (rw & WRITE)) 408 if (ret < 0 && iov_iter_rw(iter) == WRITE)
410 affs_write_failed(mapping, offset + count); 409 affs_write_failed(mapping, offset + count);
411 return ret; 410 return ret;
412} 411}
@@ -915,7 +914,7 @@ affs_truncate(struct inode *inode)
915 if (inode->i_size) { 914 if (inode->i_size) {
916 AFFS_I(inode)->i_blkcnt = last_blk + 1; 915 AFFS_I(inode)->i_blkcnt = last_blk + 1;
917 AFFS_I(inode)->i_extcnt = ext + 1; 916 AFFS_I(inode)->i_extcnt = ext + 1;
918 if (AFFS_SB(sb)->s_flags & SF_OFS) { 917 if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS)) {
919 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0); 918 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
920 u32 tmp; 919 u32 tmp;
921 if (IS_ERR(bh)) { 920 if (IS_ERR(bh)) {
@@ -969,9 +968,7 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
969} 968}
970const struct file_operations affs_file_operations = { 969const struct file_operations affs_file_operations = {
971 .llseek = generic_file_llseek, 970 .llseek = generic_file_llseek,
972 .read = new_sync_read,
973 .read_iter = generic_file_read_iter, 971 .read_iter = generic_file_read_iter,
974 .write = new_sync_write,
975 .write_iter = generic_file_write_iter, 972 .write_iter = generic_file_write_iter,
976 .mmap = generic_file_mmap, 973 .mmap = generic_file_mmap,
977 .open = affs_file_open, 974 .open = affs_file_open,
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 6f34510449e8..9628003ccd2f 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -66,23 +66,23 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
66 AFFS_I(inode)->i_lastalloc = 0; 66 AFFS_I(inode)->i_lastalloc = 0;
67 AFFS_I(inode)->i_pa_cnt = 0; 67 AFFS_I(inode)->i_pa_cnt = 0;
68 68
69 if (sbi->s_flags & SF_SETMODE) 69 if (affs_test_opt(sbi->s_flags, SF_SETMODE))
70 inode->i_mode = sbi->s_mode; 70 inode->i_mode = sbi->s_mode;
71 else 71 else
72 inode->i_mode = prot_to_mode(prot); 72 inode->i_mode = prot_to_mode(prot);
73 73
74 id = be16_to_cpu(tail->uid); 74 id = be16_to_cpu(tail->uid);
75 if (id == 0 || sbi->s_flags & SF_SETUID) 75 if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
76 inode->i_uid = sbi->s_uid; 76 inode->i_uid = sbi->s_uid;
77 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) 77 else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
78 i_uid_write(inode, 0); 78 i_uid_write(inode, 0);
79 else 79 else
80 i_uid_write(inode, id); 80 i_uid_write(inode, id);
81 81
82 id = be16_to_cpu(tail->gid); 82 id = be16_to_cpu(tail->gid);
83 if (id == 0 || sbi->s_flags & SF_SETGID) 83 if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETGID))
84 inode->i_gid = sbi->s_gid; 84 inode->i_gid = sbi->s_gid;
85 else if (id == 0xFFFF && sbi->s_flags & SF_MUFS) 85 else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
86 i_gid_write(inode, 0); 86 i_gid_write(inode, 0);
87 else 87 else
88 i_gid_write(inode, id); 88 i_gid_write(inode, id);
@@ -94,7 +94,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
94 /* fall through */ 94 /* fall through */
95 case ST_USERDIR: 95 case ST_USERDIR:
96 if (be32_to_cpu(tail->stype) == ST_USERDIR || 96 if (be32_to_cpu(tail->stype) == ST_USERDIR ||
97 sbi->s_flags & SF_SETMODE) { 97 affs_test_opt(sbi->s_flags, SF_SETMODE)) {
98 if (inode->i_mode & S_IRUSR) 98 if (inode->i_mode & S_IRUSR)
99 inode->i_mode |= S_IXUSR; 99 inode->i_mode |= S_IXUSR;
100 if (inode->i_mode & S_IRGRP) 100 if (inode->i_mode & S_IRGRP)
@@ -133,7 +133,8 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
133 } 133 }
134 if (tail->link_chain) 134 if (tail->link_chain)
135 set_nlink(inode, 2); 135 set_nlink(inode, 2);
136 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 136 inode->i_mapping->a_ops = affs_test_opt(sbi->s_flags, SF_OFS) ?
137 &affs_aops_ofs : &affs_aops;
137 inode->i_op = &affs_file_inode_operations; 138 inode->i_op = &affs_file_inode_operations;
138 inode->i_fop = &affs_file_operations; 139 inode->i_fop = &affs_file_operations;
139 break; 140 break;
@@ -190,15 +191,15 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
190 if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) { 191 if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
191 uid = i_uid_read(inode); 192 uid = i_uid_read(inode);
192 gid = i_gid_read(inode); 193 gid = i_gid_read(inode);
193 if (AFFS_SB(sb)->s_flags & SF_MUFS) { 194 if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_MUFS)) {
194 if (uid == 0 || uid == 0xFFFF) 195 if (uid == 0 || uid == 0xFFFF)
195 uid = uid ^ ~0; 196 uid = uid ^ ~0;
196 if (gid == 0 || gid == 0xFFFF) 197 if (gid == 0 || gid == 0xFFFF)
197 gid = gid ^ ~0; 198 gid = gid ^ ~0;
198 } 199 }
199 if (!(AFFS_SB(sb)->s_flags & SF_SETUID)) 200 if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETUID))
200 tail->uid = cpu_to_be16(uid); 201 tail->uid = cpu_to_be16(uid);
201 if (!(AFFS_SB(sb)->s_flags & SF_SETGID)) 202 if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETGID))
202 tail->gid = cpu_to_be16(gid); 203 tail->gid = cpu_to_be16(gid);
203 } 204 }
204 } 205 }
@@ -221,11 +222,14 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
221 if (error) 222 if (error)
222 goto out; 223 goto out;
223 224
224 if (((attr->ia_valid & ATTR_UID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETUID)) || 225 if (((attr->ia_valid & ATTR_UID) &&
225 ((attr->ia_valid & ATTR_GID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETGID)) || 226 affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETUID)) ||
227 ((attr->ia_valid & ATTR_GID) &&
228 affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETGID)) ||
226 ((attr->ia_valid & ATTR_MODE) && 229 ((attr->ia_valid & ATTR_MODE) &&
227 (AFFS_SB(inode->i_sb)->s_flags & (SF_SETMODE | SF_IMMUTABLE)))) { 230 (AFFS_SB(inode->i_sb)->s_flags &
228 if (!(AFFS_SB(inode->i_sb)->s_flags & SF_QUIET)) 231 (AFFS_MOUNT_SF_SETMODE | AFFS_MOUNT_SF_IMMUTABLE)))) {
232 if (!affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_QUIET))
229 error = -EPERM; 233 error = -EPERM;
230 goto out; 234 goto out;
231 } 235 }
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index ffb7bd82c2a5..ec8ca0efb960 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -53,7 +53,8 @@ affs_intl_toupper(int ch)
53static inline toupper_t 53static inline toupper_t
54affs_get_toupper(struct super_block *sb) 54affs_get_toupper(struct super_block *sb)
55{ 55{
56 return AFFS_SB(sb)->s_flags & SF_INTL ? affs_intl_toupper : affs_toupper; 56 return affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL) ?
57 affs_intl_toupper : affs_toupper;
57} 58}
58 59
59/* 60/*
@@ -275,7 +276,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
275 276
276 inode->i_op = &affs_file_inode_operations; 277 inode->i_op = &affs_file_inode_operations;
277 inode->i_fop = &affs_file_operations; 278 inode->i_fop = &affs_file_operations;
278 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 279 inode->i_mapping->a_ops = affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS) ?
280 &affs_aops_ofs : &affs_aops;
279 error = affs_add_entry(dir, inode, dentry, ST_FILE); 281 error = affs_add_entry(dir, inode, dentry, ST_FILE);
280 if (error) { 282 if (error) {
281 clear_nlink(inode); 283 clear_nlink(inode);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4cf0e9113fb6..3f89c9e05b40 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -227,22 +227,22 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
227 if (match_octal(&args[0], &option)) 227 if (match_octal(&args[0], &option))
228 return 0; 228 return 0;
229 *mode = option & 0777; 229 *mode = option & 0777;
230 *mount_opts |= SF_SETMODE; 230 affs_set_opt(*mount_opts, SF_SETMODE);
231 break; 231 break;
232 case Opt_mufs: 232 case Opt_mufs:
233 *mount_opts |= SF_MUFS; 233 affs_set_opt(*mount_opts, SF_MUFS);
234 break; 234 break;
235 case Opt_notruncate: 235 case Opt_notruncate:
236 *mount_opts |= SF_NO_TRUNCATE; 236 affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
237 break; 237 break;
238 case Opt_prefix: 238 case Opt_prefix:
239 *prefix = match_strdup(&args[0]); 239 *prefix = match_strdup(&args[0]);
240 if (!*prefix) 240 if (!*prefix)
241 return 0; 241 return 0;
242 *mount_opts |= SF_PREFIX; 242 affs_set_opt(*mount_opts, SF_PREFIX);
243 break; 243 break;
244 case Opt_protect: 244 case Opt_protect:
245 *mount_opts |= SF_IMMUTABLE; 245 affs_set_opt(*mount_opts, SF_IMMUTABLE);
246 break; 246 break;
247 case Opt_reserved: 247 case Opt_reserved:
248 if (match_int(&args[0], reserved)) 248 if (match_int(&args[0], reserved))
@@ -258,7 +258,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
258 *gid = make_kgid(current_user_ns(), option); 258 *gid = make_kgid(current_user_ns(), option);
259 if (!gid_valid(*gid)) 259 if (!gid_valid(*gid))
260 return 0; 260 return 0;
261 *mount_opts |= SF_SETGID; 261 affs_set_opt(*mount_opts, SF_SETGID);
262 break; 262 break;
263 case Opt_setuid: 263 case Opt_setuid:
264 if (match_int(&args[0], &option)) 264 if (match_int(&args[0], &option))
@@ -266,10 +266,10 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
266 *uid = make_kuid(current_user_ns(), option); 266 *uid = make_kuid(current_user_ns(), option);
267 if (!uid_valid(*uid)) 267 if (!uid_valid(*uid))
268 return 0; 268 return 0;
269 *mount_opts |= SF_SETUID; 269 affs_set_opt(*mount_opts, SF_SETUID);
270 break; 270 break;
271 case Opt_verbose: 271 case Opt_verbose:
272 *mount_opts |= SF_VERBOSE; 272 affs_set_opt(*mount_opts, SF_VERBOSE);
273 break; 273 break;
274 case Opt_volume: { 274 case Opt_volume: {
275 char *vol = match_strdup(&args[0]); 275 char *vol = match_strdup(&args[0]);
@@ -435,30 +435,31 @@ got_root:
435 case MUFS_FS: 435 case MUFS_FS:
436 case MUFS_INTLFFS: 436 case MUFS_INTLFFS:
437 case MUFS_DCFFS: 437 case MUFS_DCFFS:
438 sbi->s_flags |= SF_MUFS; 438 affs_set_opt(sbi->s_flags, SF_MUFS);
439 /* fall thru */ 439 /* fall thru */
440 case FS_INTLFFS: 440 case FS_INTLFFS:
441 case FS_DCFFS: 441 case FS_DCFFS:
442 sbi->s_flags |= SF_INTL; 442 affs_set_opt(sbi->s_flags, SF_INTL);
443 break; 443 break;
444 case MUFS_FFS: 444 case MUFS_FFS:
445 sbi->s_flags |= SF_MUFS; 445 affs_set_opt(sbi->s_flags, SF_MUFS);
446 break; 446 break;
447 case FS_FFS: 447 case FS_FFS:
448 break; 448 break;
449 case MUFS_OFS: 449 case MUFS_OFS:
450 sbi->s_flags |= SF_MUFS; 450 affs_set_opt(sbi->s_flags, SF_MUFS);
451 /* fall thru */ 451 /* fall thru */
452 case FS_OFS: 452 case FS_OFS:
453 sbi->s_flags |= SF_OFS; 453 affs_set_opt(sbi->s_flags, SF_OFS);
454 sb->s_flags |= MS_NOEXEC; 454 sb->s_flags |= MS_NOEXEC;
455 break; 455 break;
456 case MUFS_DCOFS: 456 case MUFS_DCOFS:
457 case MUFS_INTLOFS: 457 case MUFS_INTLOFS:
458 sbi->s_flags |= SF_MUFS; 458 affs_set_opt(sbi->s_flags, SF_MUFS);
459 case FS_DCOFS: 459 case FS_DCOFS:
460 case FS_INTLOFS: 460 case FS_INTLOFS:
461 sbi->s_flags |= SF_INTL | SF_OFS; 461 affs_set_opt(sbi->s_flags, SF_INTL);
462 affs_set_opt(sbi->s_flags, SF_OFS);
462 sb->s_flags |= MS_NOEXEC; 463 sb->s_flags |= MS_NOEXEC;
463 break; 464 break;
464 default: 465 default:
@@ -467,7 +468,7 @@ got_root:
467 return -EINVAL; 468 return -EINVAL;
468 } 469 }
469 470
470 if (mount_flags & SF_VERBOSE) { 471 if (affs_test_opt(mount_flags, SF_VERBOSE)) {
471 u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0]; 472 u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
472 pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n", 473 pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
473 len > 31 ? 31 : len, 474 len > 31 ? 31 : len,
@@ -478,7 +479,7 @@ got_root:
478 sb->s_flags |= MS_NODEV | MS_NOSUID; 479 sb->s_flags |= MS_NODEV | MS_NOSUID;
479 480
480 sbi->s_data_blksize = sb->s_blocksize; 481 sbi->s_data_blksize = sb->s_blocksize;
481 if (sbi->s_flags & SF_OFS) 482 if (affs_test_opt(sbi->s_flags, SF_OFS))
482 sbi->s_data_blksize -= 24; 483 sbi->s_data_blksize -= 24;
483 484
484 tmp_flags = sb->s_flags; 485 tmp_flags = sb->s_flags;
@@ -493,7 +494,7 @@ got_root:
493 if (IS_ERR(root_inode)) 494 if (IS_ERR(root_inode))
494 return PTR_ERR(root_inode); 495 return PTR_ERR(root_inode);
495 496
496 if (AFFS_SB(sb)->s_flags & SF_INTL) 497 if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL))
497 sb->s_d_op = &affs_intl_dentry_operations; 498 sb->s_d_op = &affs_intl_dentry_operations;
498 else 499 else
499 sb->s_d_op = &affs_dentry_operations; 500 sb->s_d_op = &affs_dentry_operations;
@@ -520,10 +521,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
520 int root_block; 521 int root_block;
521 unsigned long mount_flags; 522 unsigned long mount_flags;
522 int res = 0; 523 int res = 0;
523 char *new_opts = kstrdup(data, GFP_KERNEL); 524 char *new_opts;
524 char volume[32]; 525 char volume[32];
525 char *prefix = NULL; 526 char *prefix = NULL;
526 527
528 new_opts = kstrdup(data, GFP_KERNEL);
529 if (!new_opts)
530 return -ENOMEM;
531
527 pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); 532 pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
528 533
529 sync_filesystem(sb); 534 sync_filesystem(sb);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 932ce07948b3..999bc3caec92 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -31,8 +31,6 @@ const struct file_operations afs_file_operations = {
31 .open = afs_open, 31 .open = afs_open,
32 .release = afs_release, 32 .release = afs_release,
33 .llseek = generic_file_llseek, 33 .llseek = generic_file_llseek,
34 .read = new_sync_read,
35 .write = new_sync_write,
36 .read_iter = generic_file_read_iter, 34 .read_iter = generic_file_read_iter,
37 .write_iter = afs_file_write, 35 .write_iter = afs_file_write,
38 .mmap = generic_file_readonly_mmap, 36 .mmap = generic_file_readonly_mmap,
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 0dd4dafee10b..91ea1aa0d8b3 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,9 +22,12 @@
22int afs_abort_to_error(u32 abort_code) 22int afs_abort_to_error(u32 abort_code)
23{ 23{
24 switch (abort_code) { 24 switch (abort_code) {
25 /* low errno codes inserted into abort namespace */
25 case 13: return -EACCES; 26 case 13: return -EACCES;
26 case 27: return -EFBIG; 27 case 27: return -EFBIG;
27 case 30: return -EROFS; 28 case 30: return -EROFS;
29
30 /* VICE "special error" codes; 101 - 111 */
28 case VSALVAGE: return -EIO; 31 case VSALVAGE: return -EIO;
29 case VNOVNODE: return -ENOENT; 32 case VNOVNODE: return -ENOENT;
30 case VNOVOL: return -ENOMEDIUM; 33 case VNOVOL: return -ENOMEDIUM;
@@ -36,11 +39,18 @@ int afs_abort_to_error(u32 abort_code)
36 case VOVERQUOTA: return -EDQUOT; 39 case VOVERQUOTA: return -EDQUOT;
37 case VBUSY: return -EBUSY; 40 case VBUSY: return -EBUSY;
38 case VMOVED: return -ENXIO; 41 case VMOVED: return -ENXIO;
39 case 0x2f6df0a: return -EWOULDBLOCK; 42
43 /* Unified AFS error table; ET "uae" == 0x2f6df00 */
44 case 0x2f6df00: return -EPERM;
45 case 0x2f6df01: return -ENOENT;
46 case 0x2f6df04: return -EIO;
47 case 0x2f6df0a: return -EAGAIN;
48 case 0x2f6df0b: return -ENOMEM;
40 case 0x2f6df0c: return -EACCES; 49 case 0x2f6df0c: return -EACCES;
41 case 0x2f6df0f: return -EBUSY; 50 case 0x2f6df0f: return -EBUSY;
42 case 0x2f6df10: return -EEXIST; 51 case 0x2f6df10: return -EEXIST;
43 case 0x2f6df11: return -EXDEV; 52 case 0x2f6df11: return -EXDEV;
53 case 0x2f6df12: return -ENODEV;
44 case 0x2f6df13: return -ENOTDIR; 54 case 0x2f6df13: return -ENOTDIR;
45 case 0x2f6df14: return -EISDIR; 55 case 0x2f6df14: return -EISDIR;
46 case 0x2f6df15: return -EINVAL; 56 case 0x2f6df15: return -EINVAL;
@@ -54,8 +64,12 @@ int afs_abort_to_error(u32 abort_code)
54 case 0x2f6df23: return -ENAMETOOLONG; 64 case 0x2f6df23: return -ENAMETOOLONG;
55 case 0x2f6df24: return -ENOLCK; 65 case 0x2f6df24: return -ENOLCK;
56 case 0x2f6df26: return -ENOTEMPTY; 66 case 0x2f6df26: return -ENOTEMPTY;
67 case 0x2f6df28: return -EWOULDBLOCK;
68 case 0x2f6df69: return -ENOTCONN;
69 case 0x2f6df6c: return -ETIMEDOUT;
57 case 0x2f6df78: return -EDQUOT; 70 case 0x2f6df78: return -EDQUOT;
58 71
72 /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
59 case RXKADINCONSISTENCY: return -EPROTO; 73 case RXKADINCONSISTENCY: return -EPROTO;
60 case RXKADPACKETSHORT: return -EPROTO; 74 case RXKADPACKETSHORT: return -EPROTO;
61 case RXKADLEVELFAIL: return -EKEYREJECTED; 75 case RXKADLEVELFAIL: return -EKEYREJECTED;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index dbc732e9a5c0..3a57a1b0fb51 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -770,15 +770,12 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
770void afs_send_empty_reply(struct afs_call *call) 770void afs_send_empty_reply(struct afs_call *call)
771{ 771{
772 struct msghdr msg; 772 struct msghdr msg;
773 struct kvec iov[1];
774 773
775 _enter(""); 774 _enter("");
776 775
777 iov[0].iov_base = NULL;
778 iov[0].iov_len = 0;
779 msg.msg_name = NULL; 776 msg.msg_name = NULL;
780 msg.msg_namelen = 0; 777 msg.msg_namelen = 0;
781 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */ 778 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
782 msg.msg_control = NULL; 779 msg.msg_control = NULL;
783 msg.msg_controllen = 0; 780 msg.msg_controllen = 0;
784 msg.msg_flags = 0; 781 msg.msg_flags = 0;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08964ed..0714abcd7f32 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/pagevec.h> 16#include <linux/pagevec.h>
17#include <linux/aio.h>
18#include "internal.h" 17#include "internal.h"
19 18
20static int afs_write_back_from_locked_page(struct afs_writeback *wb, 19static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index a793f7023755..480440f4701f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
77 unsigned reqs_available; 77 unsigned reqs_available;
78}; 78};
79 79
80struct ctx_rq_wait {
81 struct completion comp;
82 atomic_t count;
83};
84
80struct kioctx { 85struct kioctx {
81 struct percpu_ref users; 86 struct percpu_ref users;
82 atomic_t dead; 87 atomic_t dead;
@@ -115,7 +120,7 @@ struct kioctx {
115 /* 120 /*
116 * signals when all in-flight requests are done 121 * signals when all in-flight requests are done
117 */ 122 */
118 struct completion *requests_done; 123 struct ctx_rq_wait *rq_wait;
119 124
120 struct { 125 struct {
121 /* 126 /*
@@ -151,6 +156,38 @@ struct kioctx {
151 unsigned id; 156 unsigned id;
152}; 157};
153 158
159/*
160 * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
161 * cancelled or completed (this makes a certain amount of sense because
162 * successful cancellation - io_cancel() - does deliver the completion to
163 * userspace).
164 *
165 * And since most things don't implement kiocb cancellation and we'd really like
166 * kiocb completion to be lockless when possible, we use ki_cancel to
167 * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
168 * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
169 */
170#define KIOCB_CANCELLED ((void *) (~0ULL))
171
172struct aio_kiocb {
173 struct kiocb common;
174
175 struct kioctx *ki_ctx;
176 kiocb_cancel_fn *ki_cancel;
177
178 struct iocb __user *ki_user_iocb; /* user's aiocb */
179 __u64 ki_user_data; /* user's data for completion */
180
181 struct list_head ki_list; /* the aio core uses this
182 * for cancellation */
183
184 /*
185 * If the aio_resfd field of the userspace iocb is not zero,
186 * this is the underlying eventfd context to deliver events to.
187 */
188 struct eventfd_ctx *ki_eventfd;
189};
190
154/*------ sysctl variables----*/ 191/*------ sysctl variables----*/
155static DEFINE_SPINLOCK(aio_nr_lock); 192static DEFINE_SPINLOCK(aio_nr_lock);
156unsigned long aio_nr; /* current system wide number of aio requests */ 193unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +257,7 @@ static int __init aio_setup(void)
220 if (IS_ERR(aio_mnt)) 257 if (IS_ERR(aio_mnt))
221 panic("Failed to create aio fs mount."); 258 panic("Failed to create aio fs mount.");
222 259
223 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 260 kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
224 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 261 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
225 262
226 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page)); 263 pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -484,8 +521,9 @@ static int aio_setup_ring(struct kioctx *ctx)
484#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) 521#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
485#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) 522#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
486 523
487void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) 524void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
488{ 525{
526 struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
489 struct kioctx *ctx = req->ki_ctx; 527 struct kioctx *ctx = req->ki_ctx;
490 unsigned long flags; 528 unsigned long flags;
491 529
@@ -500,7 +538,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
500} 538}
501EXPORT_SYMBOL(kiocb_set_cancel_fn); 539EXPORT_SYMBOL(kiocb_set_cancel_fn);
502 540
503static int kiocb_cancel(struct kiocb *kiocb) 541static int kiocb_cancel(struct aio_kiocb *kiocb)
504{ 542{
505 kiocb_cancel_fn *old, *cancel; 543 kiocb_cancel_fn *old, *cancel;
506 544
@@ -518,7 +556,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
518 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED); 556 cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
519 } while (cancel != old); 557 } while (cancel != old);
520 558
521 return cancel(kiocb); 559 return cancel(&kiocb->common);
522} 560}
523 561
524static void free_ioctx(struct work_struct *work) 562static void free_ioctx(struct work_struct *work)
@@ -539,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
539 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 577 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
540 578
541 /* At this point we know that there are no any in-flight requests */ 579 /* At this point we know that there are no any in-flight requests */
542 if (ctx->requests_done) 580 if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
543 complete(ctx->requests_done); 581 complete(&ctx->rq_wait->comp);
544 582
545 INIT_WORK(&ctx->free_work, free_ioctx); 583 INIT_WORK(&ctx->free_work, free_ioctx);
546 schedule_work(&ctx->free_work); 584 schedule_work(&ctx->free_work);
@@ -554,13 +592,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
554static void free_ioctx_users(struct percpu_ref *ref) 592static void free_ioctx_users(struct percpu_ref *ref)
555{ 593{
556 struct kioctx *ctx = container_of(ref, struct kioctx, users); 594 struct kioctx *ctx = container_of(ref, struct kioctx, users);
557 struct kiocb *req; 595 struct aio_kiocb *req;
558 596
559 spin_lock_irq(&ctx->ctx_lock); 597 spin_lock_irq(&ctx->ctx_lock);
560 598
561 while (!list_empty(&ctx->active_reqs)) { 599 while (!list_empty(&ctx->active_reqs)) {
562 req = list_first_entry(&ctx->active_reqs, 600 req = list_first_entry(&ctx->active_reqs,
563 struct kiocb, ki_list); 601 struct aio_kiocb, ki_list);
564 602
565 list_del_init(&req->ki_list); 603 list_del_init(&req->ki_list);
566 kiocb_cancel(req); 604 kiocb_cancel(req);
@@ -659,8 +697,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
659 nr_events *= 2; 697 nr_events *= 2;
660 698
661 /* Prevent overflows */ 699 /* Prevent overflows */
662 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 700 if (nr_events > (0x10000000U / sizeof(struct io_event))) {
663 (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
664 pr_debug("ENOMEM: nr_events too high\n"); 701 pr_debug("ENOMEM: nr_events too high\n");
665 return ERR_PTR(-EINVAL); 702 return ERR_PTR(-EINVAL);
666 } 703 }
@@ -751,7 +788,7 @@ err:
751 * the rapid destruction of the kioctx. 788 * the rapid destruction of the kioctx.
752 */ 789 */
753static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, 790static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
754 struct completion *requests_done) 791 struct ctx_rq_wait *wait)
755{ 792{
756 struct kioctx_table *table; 793 struct kioctx_table *table;
757 794
@@ -781,27 +818,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
781 if (ctx->mmap_size) 818 if (ctx->mmap_size)
782 vm_munmap(ctx->mmap_base, ctx->mmap_size); 819 vm_munmap(ctx->mmap_base, ctx->mmap_size);
783 820
784 ctx->requests_done = requests_done; 821 ctx->rq_wait = wait;
785 percpu_ref_kill(&ctx->users); 822 percpu_ref_kill(&ctx->users);
786 return 0; 823 return 0;
787} 824}
788 825
789/* wait_on_sync_kiocb:
790 * Waits on the given sync kiocb to complete.
791 */
792ssize_t wait_on_sync_kiocb(struct kiocb *req)
793{
794 while (!req->ki_ctx) {
795 set_current_state(TASK_UNINTERRUPTIBLE);
796 if (req->ki_ctx)
797 break;
798 io_schedule();
799 }
800 __set_current_state(TASK_RUNNING);
801 return req->ki_user_data;
802}
803EXPORT_SYMBOL(wait_on_sync_kiocb);
804
805/* 826/*
806 * exit_aio: called when the last user of mm goes away. At this point, there is 827 * exit_aio: called when the last user of mm goes away. At this point, there is
807 * no way for any new requests to be submited or any of the io_* syscalls to be 828 * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -813,18 +834,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
813void exit_aio(struct mm_struct *mm) 834void exit_aio(struct mm_struct *mm)
814{ 835{
815 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table); 836 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
816 int i; 837 struct ctx_rq_wait wait;
838 int i, skipped;
817 839
818 if (!table) 840 if (!table)
819 return; 841 return;
820 842
843 atomic_set(&wait.count, table->nr);
844 init_completion(&wait.comp);
845
846 skipped = 0;
821 for (i = 0; i < table->nr; ++i) { 847 for (i = 0; i < table->nr; ++i) {
822 struct kioctx *ctx = table->table[i]; 848 struct kioctx *ctx = table->table[i];
823 struct completion requests_done =
824 COMPLETION_INITIALIZER_ONSTACK(requests_done);
825 849
826 if (!ctx) 850 if (!ctx) {
851 skipped++;
827 continue; 852 continue;
853 }
854
828 /* 855 /*
829 * We don't need to bother with munmap() here - exit_mmap(mm) 856 * We don't need to bother with munmap() here - exit_mmap(mm)
830 * is coming and it'll unmap everything. And we simply can't, 857 * is coming and it'll unmap everything. And we simply can't,
@@ -833,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
833 * that it needs to unmap the area, just set it to 0. 860 * that it needs to unmap the area, just set it to 0.
834 */ 861 */
835 ctx->mmap_size = 0; 862 ctx->mmap_size = 0;
836 kill_ioctx(mm, ctx, &requests_done); 863 kill_ioctx(mm, ctx, &wait);
864 }
837 865
866 if (!atomic_sub_and_test(skipped, &wait.count)) {
838 /* Wait until all IO for the context are done. */ 867 /* Wait until all IO for the context are done. */
839 wait_for_completion(&requests_done); 868 wait_for_completion(&wait.comp);
840 } 869 }
841 870
842 RCU_INIT_POINTER(mm->ioctx_table, NULL); 871 RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -956,9 +985,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
956 * Allocate a slot for an aio request. 985 * Allocate a slot for an aio request.
957 * Returns NULL if no requests are free. 986 * Returns NULL if no requests are free.
958 */ 987 */
959static inline struct kiocb *aio_get_req(struct kioctx *ctx) 988static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
960{ 989{
961 struct kiocb *req; 990 struct aio_kiocb *req;
962 991
963 if (!get_reqs_available(ctx)) { 992 if (!get_reqs_available(ctx)) {
964 user_refill_reqs_available(ctx); 993 user_refill_reqs_available(ctx);
@@ -979,10 +1008,10 @@ out_put:
979 return NULL; 1008 return NULL;
980} 1009}
981 1010
982static void kiocb_free(struct kiocb *req) 1011static void kiocb_free(struct aio_kiocb *req)
983{ 1012{
984 if (req->ki_filp) 1013 if (req->common.ki_filp)
985 fput(req->ki_filp); 1014 fput(req->common.ki_filp);
986 if (req->ki_eventfd != NULL) 1015 if (req->ki_eventfd != NULL)
987 eventfd_ctx_put(req->ki_eventfd); 1016 eventfd_ctx_put(req->ki_eventfd);
988 kmem_cache_free(kiocb_cachep, req); 1017 kmem_cache_free(kiocb_cachep, req);
@@ -1018,8 +1047,9 @@ out:
1018/* aio_complete 1047/* aio_complete
1019 * Called when the io request on the given iocb is complete. 1048 * Called when the io request on the given iocb is complete.
1020 */ 1049 */
1021void aio_complete(struct kiocb *iocb, long res, long res2) 1050static void aio_complete(struct kiocb *kiocb, long res, long res2)
1022{ 1051{
1052 struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
1023 struct kioctx *ctx = iocb->ki_ctx; 1053 struct kioctx *ctx = iocb->ki_ctx;
1024 struct aio_ring *ring; 1054 struct aio_ring *ring;
1025 struct io_event *ev_page, *event; 1055 struct io_event *ev_page, *event;
@@ -1033,13 +1063,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1033 * ref, no other paths have a way to get another ref 1063 * ref, no other paths have a way to get another ref
1034 * - the sync task helpfully left a reference to itself in the iocb 1064 * - the sync task helpfully left a reference to itself in the iocb
1035 */ 1065 */
1036 if (is_sync_kiocb(iocb)) { 1066 BUG_ON(is_sync_kiocb(kiocb));
1037 iocb->ki_user_data = res;
1038 smp_wmb();
1039 iocb->ki_ctx = ERR_PTR(-EXDEV);
1040 wake_up_process(iocb->ki_obj.tsk);
1041 return;
1042 }
1043 1067
1044 if (iocb->ki_list.next) { 1068 if (iocb->ki_list.next) {
1045 unsigned long flags; 1069 unsigned long flags;
@@ -1065,7 +1089,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1065 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1089 ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1066 event = ev_page + pos % AIO_EVENTS_PER_PAGE; 1090 event = ev_page + pos % AIO_EVENTS_PER_PAGE;
1067 1091
1068 event->obj = (u64)(unsigned long)iocb->ki_obj.user; 1092 event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
1069 event->data = iocb->ki_user_data; 1093 event->data = iocb->ki_user_data;
1070 event->res = res; 1094 event->res = res;
1071 event->res2 = res2; 1095 event->res2 = res2;
@@ -1074,7 +1098,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1074 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]); 1098 flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
1075 1099
1076 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n", 1100 pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
1077 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data, 1101 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
1078 res, res2); 1102 res, res2);
1079 1103
1080 /* after flagging the request as done, we 1104 /* after flagging the request as done, we
@@ -1121,7 +1145,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1121 1145
1122 percpu_ref_put(&ctx->reqs); 1146 percpu_ref_put(&ctx->reqs);
1123} 1147}
1124EXPORT_SYMBOL(aio_complete);
1125 1148
1126/* aio_read_events_ring 1149/* aio_read_events_ring
1127 * Pull an event off of the ioctx's event ring. Returns the number of 1150 * Pull an event off of the ioctx's event ring. Returns the number of
@@ -1321,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1321{ 1344{
1322 struct kioctx *ioctx = lookup_ioctx(ctx); 1345 struct kioctx *ioctx = lookup_ioctx(ctx);
1323 if (likely(NULL != ioctx)) { 1346 if (likely(NULL != ioctx)) {
1324 struct completion requests_done = 1347 struct ctx_rq_wait wait;
1325 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1326 int ret; 1348 int ret;
1327 1349
1350 init_completion(&wait.comp);
1351 atomic_set(&wait.count, 1);
1352
1328 /* Pass requests_done to kill_ioctx() where it can be set 1353 /* Pass requests_done to kill_ioctx() where it can be set
1329 * in a thread-safe way. If we try to set it here then we have 1354 * in a thread-safe way. If we try to set it here then we have
1330 * a race condition if two io_destroy() called simultaneously. 1355 * a race condition if two io_destroy() called simultaneously.
1331 */ 1356 */
1332 ret = kill_ioctx(current->mm, ioctx, &requests_done); 1357 ret = kill_ioctx(current->mm, ioctx, &wait);
1333 percpu_ref_put(&ioctx->users); 1358 percpu_ref_put(&ioctx->users);
1334 1359
1335 /* Wait until all IO for the context are done. Otherwise kernel 1360 /* Wait until all IO for the context are done. Otherwise kernel
@@ -1337,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1337 * is destroyed. 1362 * is destroyed.
1338 */ 1363 */
1339 if (!ret) 1364 if (!ret)
1340 wait_for_completion(&requests_done); 1365 wait_for_completion(&wait.comp);
1341 1366
1342 return ret; 1367 return ret;
1343 } 1368 }
@@ -1345,50 +1370,21 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1345 return -EINVAL; 1370 return -EINVAL;
1346} 1371}
1347 1372
1348typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
1349 unsigned long, loff_t);
1350typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); 1373typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
1351 1374
1352static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, 1375static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
1353 int rw, char __user *buf, 1376 struct iovec **iovec,
1354 unsigned long *nr_segs, 1377 bool compat,
1355 struct iovec **iovec, 1378 struct iov_iter *iter)
1356 bool compat)
1357{ 1379{
1358 ssize_t ret;
1359
1360 *nr_segs = kiocb->ki_nbytes;
1361
1362#ifdef CONFIG_COMPAT 1380#ifdef CONFIG_COMPAT
1363 if (compat) 1381 if (compat)
1364 ret = compat_rw_copy_check_uvector(rw, 1382 return compat_import_iovec(rw,
1365 (struct compat_iovec __user *)buf, 1383 (struct compat_iovec __user *)buf,
1366 *nr_segs, UIO_FASTIOV, *iovec, iovec); 1384 len, UIO_FASTIOV, iovec, iter);
1367 else
1368#endif 1385#endif
1369 ret = rw_copy_check_uvector(rw, 1386 return import_iovec(rw, (struct iovec __user *)buf,
1370 (struct iovec __user *)buf, 1387 len, UIO_FASTIOV, iovec, iter);
1371 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1372 if (ret < 0)
1373 return ret;
1374
1375 /* ki_nbytes now reflect bytes instead of segs */
1376 kiocb->ki_nbytes = ret;
1377 return 0;
1378}
1379
1380static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1381 int rw, char __user *buf,
1382 unsigned long *nr_segs,
1383 struct iovec *iovec)
1384{
1385 if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
1386 return -EFAULT;
1387
1388 iovec->iov_base = buf;
1389 iovec->iov_len = kiocb->ki_nbytes;
1390 *nr_segs = 1;
1391 return 0;
1392} 1388}
1393 1389
1394/* 1390/*
@@ -1396,14 +1392,12 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1396 * Performs the initial checks and io submission. 1392 * Performs the initial checks and io submission.
1397 */ 1393 */
1398static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1394static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1399 char __user *buf, bool compat) 1395 char __user *buf, size_t len, bool compat)
1400{ 1396{
1401 struct file *file = req->ki_filp; 1397 struct file *file = req->ki_filp;
1402 ssize_t ret; 1398 ssize_t ret;
1403 unsigned long nr_segs;
1404 int rw; 1399 int rw;
1405 fmode_t mode; 1400 fmode_t mode;
1406 aio_rw_op *rw_op;
1407 rw_iter_op *iter_op; 1401 rw_iter_op *iter_op;
1408 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 1402 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
1409 struct iov_iter iter; 1403 struct iov_iter iter;
@@ -1413,7 +1407,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1413 case IOCB_CMD_PREADV: 1407 case IOCB_CMD_PREADV:
1414 mode = FMODE_READ; 1408 mode = FMODE_READ;
1415 rw = READ; 1409 rw = READ;
1416 rw_op = file->f_op->aio_read;
1417 iter_op = file->f_op->read_iter; 1410 iter_op = file->f_op->read_iter;
1418 goto rw_common; 1411 goto rw_common;
1419 1412
@@ -1421,51 +1414,40 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1421 case IOCB_CMD_PWRITEV: 1414 case IOCB_CMD_PWRITEV:
1422 mode = FMODE_WRITE; 1415 mode = FMODE_WRITE;
1423 rw = WRITE; 1416 rw = WRITE;
1424 rw_op = file->f_op->aio_write;
1425 iter_op = file->f_op->write_iter; 1417 iter_op = file->f_op->write_iter;
1426 goto rw_common; 1418 goto rw_common;
1427rw_common: 1419rw_common:
1428 if (unlikely(!(file->f_mode & mode))) 1420 if (unlikely(!(file->f_mode & mode)))
1429 return -EBADF; 1421 return -EBADF;
1430 1422
1431 if (!rw_op && !iter_op) 1423 if (!iter_op)
1432 return -EINVAL; 1424 return -EINVAL;
1433 1425
1434 ret = (opcode == IOCB_CMD_PREADV || 1426 if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
1435 opcode == IOCB_CMD_PWRITEV) 1427 ret = aio_setup_vectored_rw(rw, buf, len,
1436 ? aio_setup_vectored_rw(req, rw, buf, &nr_segs, 1428 &iovec, compat, &iter);
1437 &iovec, compat) 1429 else {
1438 : aio_setup_single_vector(req, rw, buf, &nr_segs, 1430 ret = import_single_range(rw, buf, len, iovec, &iter);
1439 iovec); 1431 iovec = NULL;
1432 }
1440 if (!ret) 1433 if (!ret)
1441 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1434 ret = rw_verify_area(rw, file, &req->ki_pos,
1435 iov_iter_count(&iter));
1442 if (ret < 0) { 1436 if (ret < 0) {
1443 if (iovec != inline_vecs) 1437 kfree(iovec);
1444 kfree(iovec);
1445 return ret; 1438 return ret;
1446 } 1439 }
1447 1440
1448 req->ki_nbytes = ret; 1441 len = ret;
1449
1450 /* XXX: move/kill - rw_verify_area()? */
1451 /* This matches the pread()/pwrite() logic */
1452 if (req->ki_pos < 0) {
1453 ret = -EINVAL;
1454 break;
1455 }
1456 1442
1457 if (rw == WRITE) 1443 if (rw == WRITE)
1458 file_start_write(file); 1444 file_start_write(file);
1459 1445
1460 if (iter_op) { 1446 ret = iter_op(req, &iter);
1461 iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
1462 ret = iter_op(req, &iter);
1463 } else {
1464 ret = rw_op(req, iovec, nr_segs, req->ki_pos);
1465 }
1466 1447
1467 if (rw == WRITE) 1448 if (rw == WRITE)
1468 file_end_write(file); 1449 file_end_write(file);
1450 kfree(iovec);
1469 break; 1451 break;
1470 1452
1471 case IOCB_CMD_FDSYNC: 1453 case IOCB_CMD_FDSYNC:
@@ -1487,9 +1469,6 @@ rw_common:
1487 return -EINVAL; 1469 return -EINVAL;
1488 } 1470 }
1489 1471
1490 if (iovec != inline_vecs)
1491 kfree(iovec);
1492
1493 if (ret != -EIOCBQUEUED) { 1472 if (ret != -EIOCBQUEUED) {
1494 /* 1473 /*
1495 * There's no easy way to restart the syscall since other AIO's 1474 * There's no easy way to restart the syscall since other AIO's
@@ -1508,7 +1487,7 @@ rw_common:
1508static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1487static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1509 struct iocb *iocb, bool compat) 1488 struct iocb *iocb, bool compat)
1510{ 1489{
1511 struct kiocb *req; 1490 struct aio_kiocb *req;
1512 ssize_t ret; 1491 ssize_t ret;
1513 1492
1514 /* enforce forwards compatibility on users */ 1493 /* enforce forwards compatibility on users */
@@ -1531,11 +1510,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1531 if (unlikely(!req)) 1510 if (unlikely(!req))
1532 return -EAGAIN; 1511 return -EAGAIN;
1533 1512
1534 req->ki_filp = fget(iocb->aio_fildes); 1513 req->common.ki_filp = fget(iocb->aio_fildes);
1535 if (unlikely(!req->ki_filp)) { 1514 if (unlikely(!req->common.ki_filp)) {
1536 ret = -EBADF; 1515 ret = -EBADF;
1537 goto out_put_req; 1516 goto out_put_req;
1538 } 1517 }
1518 req->common.ki_pos = iocb->aio_offset;
1519 req->common.ki_complete = aio_complete;
1520 req->common.ki_flags = iocb_flags(req->common.ki_filp);
1539 1521
1540 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1522 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1541 /* 1523 /*
@@ -1550,6 +1532,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1550 req->ki_eventfd = NULL; 1532 req->ki_eventfd = NULL;
1551 goto out_put_req; 1533 goto out_put_req;
1552 } 1534 }
1535
1536 req->common.ki_flags |= IOCB_EVENTFD;
1553 } 1537 }
1554 1538
1555 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1539 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1558,13 +1542,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1558 goto out_put_req; 1542 goto out_put_req;
1559 } 1543 }
1560 1544
1561 req->ki_obj.user = user_iocb; 1545 req->ki_user_iocb = user_iocb;
1562 req->ki_user_data = iocb->aio_data; 1546 req->ki_user_data = iocb->aio_data;
1563 req->ki_pos = iocb->aio_offset;
1564 req->ki_nbytes = iocb->aio_nbytes;
1565 1547
1566 ret = aio_run_iocb(req, iocb->aio_lio_opcode, 1548 ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
1567 (char __user *)(unsigned long)iocb->aio_buf, 1549 (char __user *)(unsigned long)iocb->aio_buf,
1550 iocb->aio_nbytes,
1568 compat); 1551 compat);
1569 if (ret) 1552 if (ret)
1570 goto out_put_req; 1553 goto out_put_req;
@@ -1651,10 +1634,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1651/* lookup_kiocb 1634/* lookup_kiocb
1652 * Finds a given iocb for cancellation. 1635 * Finds a given iocb for cancellation.
1653 */ 1636 */
1654static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, 1637static struct aio_kiocb *
1655 u32 key) 1638lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
1656{ 1639{
1657 struct list_head *pos; 1640 struct aio_kiocb *kiocb;
1658 1641
1659 assert_spin_locked(&ctx->ctx_lock); 1642 assert_spin_locked(&ctx->ctx_lock);
1660 1643
@@ -1662,9 +1645,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
1662 return NULL; 1645 return NULL;
1663 1646
1664 /* TODO: use a hash or array, this sucks. */ 1647 /* TODO: use a hash or array, this sucks. */
1665 list_for_each(pos, &ctx->active_reqs) { 1648 list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
1666 struct kiocb *kiocb = list_kiocb(pos); 1649 if (kiocb->ki_user_iocb == iocb)
1667 if (kiocb->ki_obj.user == iocb)
1668 return kiocb; 1650 return kiocb;
1669 } 1651 }
1670 return NULL; 1652 return NULL;
@@ -1684,7 +1666,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
1684 struct io_event __user *, result) 1666 struct io_event __user *, result)
1685{ 1667{
1686 struct kioctx *ctx; 1668 struct kioctx *ctx;
1687 struct kiocb *kiocb; 1669 struct aio_kiocb *kiocb;
1688 u32 key; 1670 u32 key;
1689 int ret; 1671 int ret;
1690 1672
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8e98cf954bab..d10e619632ab 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -213,7 +213,7 @@ void autofs4_clean_ino(struct autofs_info *);
213 213
214static inline int autofs_prepare_pipe(struct file *pipe) 214static inline int autofs_prepare_pipe(struct file *pipe)
215{ 215{
216 if (!pipe->f_op->write) 216 if (!(pipe->f_mode & FMODE_CAN_WRITE))
217 return -EINVAL; 217 return -EINVAL;
218 if (!S_ISFIFO(file_inode(pipe)->i_mode)) 218 if (!S_ISFIFO(file_inode(pipe)->i_mode))
219 return -EINVAL; 219 return -EINVAL;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 116fd38ee472..2ad05ab93db8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -70,7 +70,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
70 70
71 mutex_lock(&sbi->pipe_mutex); 71 mutex_lock(&sbi->pipe_mutex);
72 while (bytes && 72 while (bytes &&
73 (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) { 73 (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) {
74 data += wr; 74 data += wr;
75 bytes -= wr; 75 bytes -= wr;
76 } 76 }
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index 3a7813ab8c95..1fead8d56a98 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -19,16 +19,16 @@ typedef u64 befs_blocknr_t;
19 * BeFS in memory structures 19 * BeFS in memory structures
20 */ 20 */
21 21
22typedef struct befs_mount_options { 22struct befs_mount_options {
23 kgid_t gid; 23 kgid_t gid;
24 kuid_t uid; 24 kuid_t uid;
25 int use_gid; 25 int use_gid;
26 int use_uid; 26 int use_uid;
27 int debug; 27 int debug;
28 char *iocharset; 28 char *iocharset;
29} befs_mount_options; 29};
30 30
31typedef struct befs_sb_info { 31struct befs_sb_info {
32 u32 magic1; 32 u32 magic1;
33 u32 block_size; 33 u32 block_size;
34 u32 block_shift; 34 u32 block_shift;
@@ -52,12 +52,11 @@ typedef struct befs_sb_info {
52 befs_inode_addr indices; 52 befs_inode_addr indices;
53 u32 magic3; 53 u32 magic3;
54 54
55 befs_mount_options mount_opts; 55 struct befs_mount_options mount_opts;
56 struct nls_table *nls; 56 struct nls_table *nls;
57};
57 58
58} befs_sb_info; 59struct befs_inode_info {
59
60typedef struct befs_inode_info {
61 u32 i_flags; 60 u32 i_flags;
62 u32 i_type; 61 u32 i_type;
63 62
@@ -71,8 +70,7 @@ typedef struct befs_inode_info {
71 } i_data; 70 } i_data;
72 71
73 struct inode vfs_inode; 72 struct inode vfs_inode;
74 73};
75} befs_inode_info;
76 74
77enum befs_err { 75enum befs_err {
78 BEFS_OK, 76 BEFS_OK,
@@ -105,13 +103,13 @@ void befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *);
105/* Gets a pointer to the private portion of the super_block 103/* Gets a pointer to the private portion of the super_block
106 * structure from the public part 104 * structure from the public part
107 */ 105 */
108static inline befs_sb_info * 106static inline struct befs_sb_info *
109BEFS_SB(const struct super_block *super) 107BEFS_SB(const struct super_block *super)
110{ 108{
111 return (befs_sb_info *) super->s_fs_info; 109 return (struct befs_sb_info *) super->s_fs_info;
112} 110}
113 111
114static inline befs_inode_info * 112static inline struct befs_inode_info *
115BEFS_I(const struct inode *inode) 113BEFS_I(const struct inode *inode)
116{ 114{
117 return list_entry(inode, struct befs_inode_info, vfs_inode); 115 return list_entry(inode, struct befs_inode_info, vfs_inode);
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 1e8e0b8d8836..ebd50718659f 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -168,7 +168,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds)
168 befs_blocknr_t blocks; 168 befs_blocknr_t blocks;
169 befs_blocknr_t datablocks; /* File data blocks */ 169 befs_blocknr_t datablocks; /* File data blocks */
170 befs_blocknr_t metablocks; /* FS metadata blocks */ 170 befs_blocknr_t metablocks; /* FS metadata blocks */
171 befs_sb_info *befs_sb = BEFS_SB(sb); 171 struct befs_sb_info *befs_sb = BEFS_SB(sb);
172 172
173 befs_debug(sb, "---> %s", __func__); 173 befs_debug(sb, "---> %s", __func__);
174 174
@@ -428,7 +428,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
428 struct buffer_head *indir_block; 428 struct buffer_head *indir_block;
429 befs_block_run indir_run; 429 befs_block_run indir_run;
430 befs_disk_inode_addr *iaddr_array = NULL; 430 befs_disk_inode_addr *iaddr_array = NULL;
431 befs_sb_info *befs_sb = BEFS_SB(sb); 431 struct befs_sb_info *befs_sb = BEFS_SB(sb);
432 432
433 befs_blocknr_t indir_start_blk = 433 befs_blocknr_t indir_start_blk =
434 data->max_indirect_range >> befs_sb->block_shift; 434 data->max_indirect_range >> befs_sb->block_shift;
diff --git a/fs/befs/io.c b/fs/befs/io.c
index 0408a3d601d0..7a5b4ec21c56 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -28,7 +28,7 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
28{ 28{
29 struct buffer_head *bh = NULL; 29 struct buffer_head *bh = NULL;
30 befs_blocknr_t block = 0; 30 befs_blocknr_t block = 0;
31 befs_sb_info *befs_sb = BEFS_SB(sb); 31 struct befs_sb_info *befs_sb = BEFS_SB(sb);
32 32
33 befs_debug(sb, "---> Enter %s " 33 befs_debug(sb, "---> Enter %s "
34 "[%u, %hu, %hu]", __func__, iaddr.allocation_group, 34 "[%u, %hu, %hu]", __func__, iaddr.allocation_group,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e089f1985fca..16e0a48bfccd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -51,7 +51,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
51static void befs_put_super(struct super_block *); 51static void befs_put_super(struct super_block *);
52static int befs_remount(struct super_block *, int *, char *); 52static int befs_remount(struct super_block *, int *, char *);
53static int befs_statfs(struct dentry *, struct kstatfs *); 53static int befs_statfs(struct dentry *, struct kstatfs *);
54static int parse_options(char *, befs_mount_options *); 54static int parse_options(char *, struct befs_mount_options *);
55 55
56static const struct super_operations befs_sops = { 56static const struct super_operations befs_sops = {
57 .alloc_inode = befs_alloc_inode, /* allocate a new inode */ 57 .alloc_inode = befs_alloc_inode, /* allocate a new inode */
@@ -304,9 +304,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
304{ 304{
305 struct buffer_head *bh = NULL; 305 struct buffer_head *bh = NULL;
306 befs_inode *raw_inode = NULL; 306 befs_inode *raw_inode = NULL;
307 307 struct befs_sb_info *befs_sb = BEFS_SB(sb);
308 befs_sb_info *befs_sb = BEFS_SB(sb); 308 struct befs_inode_info *befs_ino = NULL;
309 befs_inode_info *befs_ino = NULL;
310 struct inode *inode; 309 struct inode *inode;
311 long ret = -EIO; 310 long ret = -EIO;
312 311
@@ -472,7 +471,7 @@ static void *
472befs_follow_link(struct dentry *dentry, struct nameidata *nd) 471befs_follow_link(struct dentry *dentry, struct nameidata *nd)
473{ 472{
474 struct super_block *sb = dentry->d_sb; 473 struct super_block *sb = dentry->d_sb;
475 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 474 struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
476 befs_data_stream *data = &befs_ino->i_data.ds; 475 befs_data_stream *data = &befs_ino->i_data.ds;
477 befs_off_t len = data->size; 476 befs_off_t len = data->size;
478 char *link; 477 char *link;
@@ -502,7 +501,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
502static void * 501static void *
503befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) 502befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
504{ 503{
505 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 504 struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
505
506 nd_set_link(nd, befs_ino->i_data.symlink); 506 nd_set_link(nd, befs_ino->i_data.symlink);
507 return NULL; 507 return NULL;
508} 508}
@@ -669,7 +669,7 @@ static const match_table_t befs_tokens = {
669}; 669};
670 670
671static int 671static int
672parse_options(char *options, befs_mount_options * opts) 672parse_options(char *options, struct befs_mount_options *opts)
673{ 673{
674 char *p; 674 char *p;
675 substring_t args[MAX_OPT_ARGS]; 675 substring_t args[MAX_OPT_ARGS];
@@ -769,7 +769,7 @@ static int
769befs_fill_super(struct super_block *sb, void *data, int silent) 769befs_fill_super(struct super_block *sb, void *data, int silent)
770{ 770{
771 struct buffer_head *bh; 771 struct buffer_head *bh;
772 befs_sb_info *befs_sb; 772 struct befs_sb_info *befs_sb;
773 befs_super_block *disk_sb; 773 befs_super_block *disk_sb;
774 struct inode *root; 774 struct inode *root;
775 long ret = -EINVAL; 775 long ret = -EINVAL;
diff --git a/fs/befs/super.c b/fs/befs/super.c
index ca40f828f64d..aeafc4d84278 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -24,7 +24,7 @@
24int 24int
25befs_load_sb(struct super_block *sb, befs_super_block * disk_sb) 25befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
26{ 26{
27 befs_sb_info *befs_sb = BEFS_SB(sb); 27 struct befs_sb_info *befs_sb = BEFS_SB(sb);
28 28
29 /* Check the byte order of the filesystem */ 29 /* Check the byte order of the filesystem */
30 if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE) 30 if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
@@ -59,7 +59,7 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
59int 59int
60befs_check_sb(struct super_block *sb) 60befs_check_sb(struct super_block *sb)
61{ 61{
62 befs_sb_info *befs_sb = BEFS_SB(sb); 62 struct befs_sb_info *befs_sb = BEFS_SB(sb);
63 63
64 /* Check magic headers of super block */ 64 /* Check magic headers of super block */
65 if ((befs_sb->magic1 != BEFS_SUPER_MAGIC1) 65 if ((befs_sb->magic1 != BEFS_SUPER_MAGIC1)
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 08063ae0a17c..7a8182770649 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -86,7 +86,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
86 86
87 inode = new_inode(s); 87 inode = new_inode(s);
88 if (!inode) 88 if (!inode)
89 return -ENOSPC; 89 return -ENOMEM;
90 mutex_lock(&info->bfs_lock); 90 mutex_lock(&info->bfs_lock);
91 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1); 91 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
92 if (ino > info->si_lasti) { 92 if (ino > info->si_lasti) {
@@ -293,7 +293,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
293 for (block = sblock; block <= eblock; block++) { 293 for (block = sblock; block <= eblock; block++) {
294 bh = sb_bread(dir->i_sb, block); 294 bh = sb_bread(dir->i_sb, block);
295 if (!bh) 295 if (!bh)
296 return -ENOSPC; 296 return -EIO;
297 for (off = 0; off < BFS_BSIZE; off += BFS_DIRENT_SIZE) { 297 for (off = 0; off < BFS_BSIZE; off += BFS_DIRENT_SIZE) {
298 de = (struct bfs_dirent *)(bh->b_data + off); 298 de = (struct bfs_dirent *)(bh->b_data + off);
299 if (!de->ino) { 299 if (!de->ino) {
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index e7f88ace1a25..97f1b5160155 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -23,9 +23,7 @@
23 23
24const struct file_operations bfs_file_operations = { 24const struct file_operations bfs_file_operations = {
25 .llseek = generic_file_llseek, 25 .llseek = generic_file_llseek,
26 .read = new_sync_read,
27 .read_iter = generic_file_read_iter, 26 .read_iter = generic_file_read_iter,
28 .write = new_sync_write,
29 .write_iter = generic_file_write_iter, 27 .write_iter = generic_file_write_iter,
30 .mmap = generic_file_mmap, 28 .mmap = generic_file_mmap,
31 .splice_read = generic_file_splice_read, 29 .splice_read = generic_file_splice_read,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079d9982..fdcb4d69f430 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/vfs.h> 16#include <linux/vfs.h>
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/uio.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "bfs.h" 20#include "bfs.h"
20 21
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/random.h> 32#include <linux/random.h>
33#include <linux/elf.h> 33#include <linux/elf.h>
34#include <linux/elf-randomize.h>
34#include <linux/utsname.h> 35#include <linux/utsname.h>
35#include <linux/coredump.h> 36#include <linux/coredump.h>
36#include <linux/sched.h> 37#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
862 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 863 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
863 int elf_prot = 0, elf_flags; 864 int elf_prot = 0, elf_flags;
864 unsigned long k, vaddr; 865 unsigned long k, vaddr;
866 unsigned long total_size = 0;
865 867
866 if (elf_ppnt->p_type != PT_LOAD) 868 if (elf_ppnt->p_type != PT_LOAD)
867 continue; 869 continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
909 * default mmap base, as well as whatever program they 911 * default mmap base, as well as whatever program they
910 * might try to exec. This is because the brk will 912 * might try to exec. This is because the brk will
911 * follow the loader, and is not movable. */ 913 * follow the loader, and is not movable. */
912#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE 914 load_bias = ELF_ET_DYN_BASE - vaddr;
913 /* Memory randomization might have been switched off
914 * in runtime via sysctl or explicit setting of
915 * personality flags.
916 * If that is the case, retain the original non-zero
917 * load_bias value in order to establish proper
918 * non-randomized mappings.
919 */
920 if (current->flags & PF_RANDOMIZE) 915 if (current->flags & PF_RANDOMIZE)
921 load_bias = 0; 916 load_bias += arch_mmap_rnd();
922 else 917 load_bias = ELF_PAGESTART(load_bias);
923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 918 total_size = total_mapping_size(elf_phdata,
924#else 919 loc->elf_ex.e_phnum);
925 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 920 if (!total_size) {
926#endif 921 error = -EINVAL;
922 goto out_free_dentry;
923 }
927 } 924 }
928 925
929 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 926 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
930 elf_prot, elf_flags, 0); 927 elf_prot, elf_flags, total_size);
931 if (BAD_ADDR(error)) { 928 if (BAD_ADDR(error)) {
932 retval = IS_ERR((void *)error) ? 929 retval = IS_ERR((void *)error) ?
933 PTR_ERR((void*)error) : -EINVAL; 930 PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
1053 current->mm->end_data = end_data; 1050 current->mm->end_data = end_data;
1054 current->mm->start_stack = bprm->p; 1051 current->mm->start_stack = bprm->p;
1055 1052
1056#ifdef arch_randomize_brk
1057 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1053 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1058 current->mm->brk = current->mm->start_brk = 1054 current->mm->brk = current->mm->start_brk =
1059 arch_randomize_brk(current->mm); 1055 arch_randomize_brk(current->mm);
1060#ifdef CONFIG_COMPAT_BRK 1056#ifdef compat_brk_randomized
1061 current->brk_randomized = 1; 1057 current->brk_randomized = 1;
1062#endif 1058#endif
1063 } 1059 }
1064#endif
1065 1060
1066 if (current->personality & MMAP_PAGE_ZERO) { 1061 if (current->personality & MMAP_PAGE_ZERO) {
1067 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 1062 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 97aff2879cda..9dcb05409ba7 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -9,6 +9,7 @@
9 9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 11
12#include <linux/kernel.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/init.h> 14#include <linux/init.h>
14#include <linux/sched.h> 15#include <linux/sched.h>
@@ -521,9 +522,8 @@ static int parse_command(const char __user *buffer, size_t count)
521 522
522static void entry_status(Node *e, char *page) 523static void entry_status(Node *e, char *page)
523{ 524{
524 char *dp; 525 char *dp = page;
525 char *status = "disabled"; 526 const char *status = "disabled";
526 const char *flags = "flags: ";
527 527
528 if (test_bit(Enabled, &e->flags)) 528 if (test_bit(Enabled, &e->flags))
529 status = "enabled"; 529 status = "enabled";
@@ -533,12 +533,10 @@ static void entry_status(Node *e, char *page)
533 return; 533 return;
534 } 534 }
535 535
536 sprintf(page, "%s\ninterpreter %s\n", status, e->interpreter); 536 dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
537 dp = page + strlen(page);
538 537
539 /* print the special flags */ 538 /* print the special flags */
540 sprintf(dp, "%s", flags); 539 dp += sprintf(dp, "flags: ");
541 dp += strlen(flags);
542 if (e->flags & MISC_FMT_PRESERVE_ARGV0) 540 if (e->flags & MISC_FMT_PRESERVE_ARGV0)
543 *dp++ = 'P'; 541 *dp++ = 'P';
544 if (e->flags & MISC_FMT_OPEN_BINARY) 542 if (e->flags & MISC_FMT_OPEN_BINARY)
@@ -550,21 +548,11 @@ static void entry_status(Node *e, char *page)
550 if (!test_bit(Magic, &e->flags)) { 548 if (!test_bit(Magic, &e->flags)) {
551 sprintf(dp, "extension .%s\n", e->magic); 549 sprintf(dp, "extension .%s\n", e->magic);
552 } else { 550 } else {
553 int i; 551 dp += sprintf(dp, "offset %i\nmagic ", e->offset);
554 552 dp = bin2hex(dp, e->magic, e->size);
555 sprintf(dp, "offset %i\nmagic ", e->offset);
556 dp = page + strlen(page);
557 for (i = 0; i < e->size; i++) {
558 sprintf(dp, "%02x", 0xff & (int) (e->magic[i]));
559 dp += 2;
560 }
561 if (e->mask) { 553 if (e->mask) {
562 sprintf(dp, "\nmask "); 554 dp += sprintf(dp, "\nmask ");
563 dp += 6; 555 dp = bin2hex(dp, e->mask, e->size);
564 for (i = 0; i < e->size; i++) {
565 sprintf(dp, "%02x", 0xff & (int) (e->mask[i]));
566 dp += 2;
567 }
568 } 556 }
569 *dp++ = '\n'; 557 *dp++ = '\n';
570 *dp = '\0'; 558 *dp = '\0';
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266be67d3..897ee0503932 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/cleancache.h> 29#include <linux/cleancache.h>
30#include <linux/aio.h>
31#include <asm/uaccess.h> 30#include <asm/uaccess.h>
32#include "internal.h" 31#include "internal.h"
33 32
@@ -147,15 +146,13 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
147} 146}
148 147
149static ssize_t 148static ssize_t
150blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 149blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
151 loff_t offset)
152{ 150{
153 struct file *file = iocb->ki_filp; 151 struct file *file = iocb->ki_filp;
154 struct inode *inode = file->f_mapping->host; 152 struct inode *inode = file->f_mapping->host;
155 153
156 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, 154 return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
157 offset, blkdev_get_block, 155 blkdev_get_block, NULL, NULL, 0);
158 NULL, NULL, 0);
159} 156}
160 157
161int __sync_blockdev(struct block_device *bdev, int wait) 158int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1598,9 +1595,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1598ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 1595ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1599{ 1596{
1600 struct file *file = iocb->ki_filp; 1597 struct file *file = iocb->ki_filp;
1598 struct inode *bd_inode = file->f_mapping->host;
1599 loff_t size = i_size_read(bd_inode);
1601 struct blk_plug plug; 1600 struct blk_plug plug;
1602 ssize_t ret; 1601 ssize_t ret;
1603 1602
1603 if (bdev_read_only(I_BDEV(bd_inode)))
1604 return -EPERM;
1605
1606 if (!iov_iter_count(from))
1607 return 0;
1608
1609 if (iocb->ki_pos >= size)
1610 return -ENOSPC;
1611
1612 iov_iter_truncate(from, size - iocb->ki_pos);
1613
1604 blk_start_plug(&plug); 1614 blk_start_plug(&plug);
1605 ret = __generic_file_write_iter(iocb, from); 1615 ret = __generic_file_write_iter(iocb, from);
1606 if (ret > 0) { 1616 if (ret > 0) {
@@ -1660,8 +1670,6 @@ const struct file_operations def_blk_fops = {
1660 .open = blkdev_open, 1670 .open = blkdev_open,
1661 .release = blkdev_close, 1671 .release = blkdev_close,
1662 .llseek = block_llseek, 1672 .llseek = block_llseek,
1663 .read = new_sync_read,
1664 .write = new_sync_write,
1665 .read_iter = blkdev_read_iter, 1673 .read_iter = blkdev_read_iter,
1666 .write_iter = blkdev_write_iter, 1674 .write_iter = blkdev_write_iter,
1667 .mmap = generic_file_mmap, 1675 .mmap = generic_file_mmap,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 30982bbd31c3..faa7d390841b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/aio.h>
28#include <linux/falloc.h> 27#include <linux/falloc.h>
29#include <linux/swap.h> 28#include <linux/swap.h>
30#include <linux/writeback.h> 29#include <linux/writeback.h>
@@ -32,6 +31,7 @@
32#include <linux/compat.h> 31#include <linux/compat.h>
33#include <linux/slab.h> 32#include <linux/slab.h>
34#include <linux/btrfs.h> 33#include <linux/btrfs.h>
34#include <linux/uio.h>
35#include "ctree.h" 35#include "ctree.h"
36#include "disk-io.h" 36#include "disk-io.h"
37#include "transaction.h" 37#include "transaction.h"
@@ -1739,27 +1739,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1739 u64 start_pos; 1739 u64 start_pos;
1740 u64 end_pos; 1740 u64 end_pos;
1741 ssize_t num_written = 0; 1741 ssize_t num_written = 0;
1742 ssize_t err = 0;
1743 size_t count = iov_iter_count(from);
1744 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1742 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1745 loff_t pos = iocb->ki_pos; 1743 ssize_t err;
1744 loff_t pos;
1745 size_t count;
1746 1746
1747 mutex_lock(&inode->i_mutex); 1747 mutex_lock(&inode->i_mutex);
1748 1748 err = generic_write_checks(iocb, from);
1749 current->backing_dev_info = inode_to_bdi(inode); 1749 if (err <= 0) {
1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1751 if (err) {
1752 mutex_unlock(&inode->i_mutex); 1750 mutex_unlock(&inode->i_mutex);
1753 goto out; 1751 return err;
1754 }
1755
1756 if (count == 0) {
1757 mutex_unlock(&inode->i_mutex);
1758 goto out;
1759 } 1752 }
1760 1753
1761 iov_iter_truncate(from, count); 1754 current->backing_dev_info = inode_to_bdi(inode);
1762
1763 err = file_remove_suid(file); 1755 err = file_remove_suid(file);
1764 if (err) { 1756 if (err) {
1765 mutex_unlock(&inode->i_mutex); 1757 mutex_unlock(&inode->i_mutex);
@@ -1786,6 +1778,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1786 */ 1778 */
1787 update_time_for_write(inode); 1779 update_time_for_write(inode);
1788 1780
1781 pos = iocb->ki_pos;
1782 count = iov_iter_count(from);
1789 start_pos = round_down(pos, root->sectorsize); 1783 start_pos = round_down(pos, root->sectorsize);
1790 if (start_pos > i_size_read(inode)) { 1784 if (start_pos > i_size_read(inode)) {
1791 /* Expand hole size to cover write data, preventing empty gap */ 1785 /* Expand hole size to cover write data, preventing empty gap */
@@ -1800,7 +1794,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1800 if (sync) 1794 if (sync)
1801 atomic_inc(&BTRFS_I(inode)->sync_writers); 1795 atomic_inc(&BTRFS_I(inode)->sync_writers);
1802 1796
1803 if (file->f_flags & O_DIRECT) { 1797 if (iocb->ki_flags & IOCB_DIRECT) {
1804 num_written = __btrfs_direct_write(iocb, from, pos); 1798 num_written = __btrfs_direct_write(iocb, from, pos);
1805 } else { 1799 } else {
1806 num_written = __btrfs_buffered_write(file, from, pos); 1800 num_written = __btrfs_buffered_write(file, from, pos);
@@ -2806,8 +2800,6 @@ out:
2806 2800
2807const struct file_operations btrfs_file_operations = { 2801const struct file_operations btrfs_file_operations = {
2808 .llseek = btrfs_file_llseek, 2802 .llseek = btrfs_file_llseek,
2809 .read = new_sync_read,
2810 .write = new_sync_write,
2811 .read_iter = generic_file_read_iter, 2803 .read_iter = generic_file_read_iter,
2812 .splice_read = generic_file_splice_read, 2804 .splice_read = generic_file_splice_read,
2813 .write_iter = btrfs_file_write_iter, 2805 .write_iter = btrfs_file_write_iter,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2e732d7af52..43192e10cc43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/statfs.h> 33#include <linux/statfs.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include <linux/bit_spinlock.h> 35#include <linux/bit_spinlock.h>
37#include <linux/xattr.h> 36#include <linux/xattr.h>
38#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
43#include <linux/btrfs.h> 42#include <linux/btrfs.h>
44#include <linux/blkdev.h> 43#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h> 44#include <linux/posix_acl_xattr.h>
45#include <linux/uio.h>
46#include "ctree.h" 46#include "ctree.h"
47#include "disk-io.h" 47#include "disk-io.h"
48#include "transaction.h" 48#include "transaction.h"
@@ -8081,7 +8081,7 @@ free_ordered:
8081 bio_endio(dio_bio, ret); 8081 bio_endio(dio_bio, ret);
8082} 8082}
8083 8083
8084static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, 8084static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
8085 const struct iov_iter *iter, loff_t offset) 8085 const struct iov_iter *iter, loff_t offset)
8086{ 8086{
8087 int seg; 8087 int seg;
@@ -8096,7 +8096,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
8096 goto out; 8096 goto out;
8097 8097
8098 /* If this is a write we don't need to check anymore */ 8098 /* If this is a write we don't need to check anymore */
8099 if (rw & WRITE) 8099 if (iov_iter_rw(iter) == WRITE)
8100 return 0; 8100 return 0;
8101 /* 8101 /*
8102 * Check to make sure we don't have duplicate iov_base's in this 8102 * Check to make sure we don't have duplicate iov_base's in this
@@ -8114,8 +8114,8 @@ out:
8114 return retval; 8114 return retval;
8115} 8115}
8116 8116
8117static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 8117static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8118 struct iov_iter *iter, loff_t offset) 8118 loff_t offset)
8119{ 8119{
8120 struct file *file = iocb->ki_filp; 8120 struct file *file = iocb->ki_filp;
8121 struct inode *inode = file->f_mapping->host; 8121 struct inode *inode = file->f_mapping->host;
@@ -8126,7 +8126,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8126 bool relock = false; 8126 bool relock = false;
8127 ssize_t ret; 8127 ssize_t ret;
8128 8128
8129 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) 8129 if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
8130 return 0; 8130 return 0;
8131 8131
8132 atomic_inc(&inode->i_dio_count); 8132 atomic_inc(&inode->i_dio_count);
@@ -8144,7 +8144,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8144 filemap_fdatawrite_range(inode->i_mapping, offset, 8144 filemap_fdatawrite_range(inode->i_mapping, offset,
8145 offset + count - 1); 8145 offset + count - 1);
8146 8146
8147 if (rw & WRITE) { 8147 if (iov_iter_rw(iter) == WRITE) {
8148 /* 8148 /*
8149 * If the write DIO is beyond the EOF, we need update 8149 * If the write DIO is beyond the EOF, we need update
8150 * the isize, but it is protected by i_mutex. So we can 8150 * the isize, but it is protected by i_mutex. So we can
@@ -8174,11 +8174,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8174 wakeup = false; 8174 wakeup = false;
8175 } 8175 }
8176 8176
8177 ret = __blockdev_direct_IO(rw, iocb, inode, 8177 ret = __blockdev_direct_IO(iocb, inode,
8178 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 8178 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
8179 iter, offset, btrfs_get_blocks_direct, NULL, 8179 iter, offset, btrfs_get_blocks_direct, NULL,
8180 btrfs_submit_direct, flags); 8180 btrfs_submit_direct, flags);
8181 if (rw & WRITE) { 8181 if (iov_iter_rw(iter) == WRITE) {
8182 current->journal_info = NULL; 8182 current->journal_info = NULL;
8183 if (ret < 0 && ret != -EIOCBQUEUED) 8183 if (ret < 0 && ret != -EIOCBQUEUED)
8184 btrfs_delalloc_release_space(inode, count); 8184 btrfs_delalloc_release_space(inode, count);
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
3243 * to synchronise against __set_page_dirty_buffers and prevent the 3243 * to synchronise against __set_page_dirty_buffers and prevent the
3244 * dirty bit from being lost. 3244 * dirty bit from being lost.
3245 */ 3245 */
3246 if (ret) 3246 if (ret && TestClearPageDirty(page))
3247 cancel_dirty_page(page, PAGE_CACHE_SIZE); 3247 account_page_cleaned(page, mapping);
3248 spin_unlock(&mapping->private_lock); 3248 spin_unlock(&mapping->private_lock);
3249out: 3249out:
3250 if (buffers_to_free) { 3250 if (buffers_to_free) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index cab1cf5a330b..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1202,8 +1202,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
1202 * intercept O_DIRECT reads and writes early, this function should 1202 * intercept O_DIRECT reads and writes early, this function should
1203 * never get called. 1203 * never get called.
1204 */ 1204 */
1205static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, 1205static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter,
1206 struct iov_iter *iter,
1207 loff_t pos) 1206 loff_t pos)
1208{ 1207{
1209 WARN_ON(1); 1208 WARN_ON(1);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075a823d..b9b8eb225f66 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
7#include <linux/mount.h> 7#include <linux/mount.h>
8#include <linux/namei.h> 8#include <linux/namei.h>
9#include <linux/writeback.h> 9#include <linux/writeback.h>
10#include <linux/aio.h>
11#include <linux/falloc.h> 10#include <linux/falloc.h>
12 11
13#include "super.h" 12#include "super.h"
@@ -458,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
458 if (ret < 0) 457 if (ret < 0)
459 return ret; 458 return ret;
460 459
461 if (file->f_flags & O_DIRECT) { 460 if (iocb->ki_flags & IOCB_DIRECT) {
462 while (iov_iter_count(i)) { 461 while (iov_iter_count(i)) {
463 size_t start; 462 size_t start;
464 ssize_t n; 463 ssize_t n;
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
808{ 807{
809 struct file *filp = iocb->ki_filp; 808 struct file *filp = iocb->ki_filp;
810 struct ceph_file_info *fi = filp->private_data; 809 struct ceph_file_info *fi = filp->private_data;
811 size_t len = iocb->ki_nbytes; 810 size_t len = iov_iter_count(to);
812 struct inode *inode = file_inode(filp); 811 struct inode *inode = file_inode(filp);
813 struct ceph_inode_info *ci = ceph_inode(inode); 812 struct ceph_inode_info *ci = ceph_inode(inode);
814 struct page *pinned_page = NULL; 813 struct page *pinned_page = NULL;
@@ -829,7 +828,7 @@ again:
829 return ret; 828 return ret;
830 829
831 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || 830 if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
832 (iocb->ki_filp->f_flags & O_DIRECT) || 831 (iocb->ki_flags & IOCB_DIRECT) ||
833 (fi->flags & CEPH_F_SYNC)) { 832 (fi->flags & CEPH_F_SYNC)) {
834 833
835 dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", 834 dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
@@ -942,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
942 struct ceph_inode_info *ci = ceph_inode(inode); 941 struct ceph_inode_info *ci = ceph_inode(inode);
943 struct ceph_osd_client *osdc = 942 struct ceph_osd_client *osdc =
944 &ceph_sb_to_client(inode->i_sb)->client->osdc; 943 &ceph_sb_to_client(inode->i_sb)->client->osdc;
945 ssize_t count = iov_iter_count(from), written = 0; 944 ssize_t count, written = 0;
946 int err, want, got; 945 int err, want, got;
947 loff_t pos = iocb->ki_pos; 946 loff_t pos;
948 947
949 if (ceph_snap(inode) != CEPH_NOSNAP) 948 if (ceph_snap(inode) != CEPH_NOSNAP)
950 return -EROFS; 949 return -EROFS;
@@ -954,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
954 /* We can write back this queue in page reclaim */ 953 /* We can write back this queue in page reclaim */
955 current->backing_dev_info = inode_to_bdi(inode); 954 current->backing_dev_info = inode_to_bdi(inode);
956 955
957 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 956 err = generic_write_checks(iocb, from);
958 if (err) 957 if (err <= 0)
959 goto out;
960
961 if (count == 0)
962 goto out; 958 goto out;
963 iov_iter_truncate(from, count);
964 959
960 pos = iocb->ki_pos;
961 count = iov_iter_count(from);
965 err = file_remove_suid(file); 962 err = file_remove_suid(file);
966 if (err) 963 if (err)
967 goto out; 964 goto out;
@@ -998,12 +995,12 @@ retry_snap:
998 inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); 995 inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
999 996
1000 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || 997 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
1001 (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { 998 (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
1002 struct iov_iter data; 999 struct iov_iter data;
1003 mutex_unlock(&inode->i_mutex); 1000 mutex_unlock(&inode->i_mutex);
1004 /* we might need to revert back to that point */ 1001 /* we might need to revert back to that point */
1005 data = *from; 1002 data = *from;
1006 if (file->f_flags & O_DIRECT) 1003 if (iocb->ki_flags & IOCB_DIRECT)
1007 written = ceph_sync_direct_write(iocb, &data, pos); 1004 written = ceph_sync_direct_write(iocb, &data, pos);
1008 else 1005 else
1009 written = ceph_sync_write(iocb, &data, pos); 1006 written = ceph_sync_write(iocb, &data, pos);
@@ -1332,8 +1329,6 @@ const struct file_operations ceph_file_fops = {
1332 .open = ceph_open, 1329 .open = ceph_open,
1333 .release = ceph_release, 1330 .release = ceph_release,
1334 .llseek = ceph_llseek, 1331 .llseek = ceph_llseek,
1335 .read = new_sync_read,
1336 .write = new_sync_write,
1337 .read_iter = ceph_read_iter, 1332 .read_iter = ceph_read_iter,
1338 .write_iter = ceph_write_iter, 1333 .write_iter = ceph_write_iter,
1339 .mmap = ceph_mmap, 1334 .mmap = ceph_mmap,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d72fe37f5420..eaab4b2a0595 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -906,8 +906,6 @@ const struct inode_operations cifs_symlink_inode_ops = {
906}; 906};
907 907
908const struct file_operations cifs_file_ops = { 908const struct file_operations cifs_file_ops = {
909 .read = new_sync_read,
910 .write = new_sync_write,
911 .read_iter = cifs_loose_read_iter, 909 .read_iter = cifs_loose_read_iter,
912 .write_iter = cifs_file_write_iter, 910 .write_iter = cifs_file_write_iter,
913 .open = cifs_open, 911 .open = cifs_open,
@@ -926,8 +924,6 @@ const struct file_operations cifs_file_ops = {
926}; 924};
927 925
928const struct file_operations cifs_file_strict_ops = { 926const struct file_operations cifs_file_strict_ops = {
929 .read = new_sync_read,
930 .write = new_sync_write,
931 .read_iter = cifs_strict_readv, 927 .read_iter = cifs_strict_readv,
932 .write_iter = cifs_strict_writev, 928 .write_iter = cifs_strict_writev,
933 .open = cifs_open, 929 .open = cifs_open,
@@ -947,8 +943,6 @@ const struct file_operations cifs_file_strict_ops = {
947 943
948const struct file_operations cifs_file_direct_ops = { 944const struct file_operations cifs_file_direct_ops = {
949 /* BB reevaluate whether they can be done with directio, no cache */ 945 /* BB reevaluate whether they can be done with directio, no cache */
950 .read = new_sync_read,
951 .write = new_sync_write,
952 .read_iter = cifs_user_readv, 946 .read_iter = cifs_user_readv,
953 .write_iter = cifs_user_writev, 947 .write_iter = cifs_user_writev,
954 .open = cifs_open, 948 .open = cifs_open,
@@ -967,8 +961,6 @@ const struct file_operations cifs_file_direct_ops = {
967}; 961};
968 962
969const struct file_operations cifs_file_nobrl_ops = { 963const struct file_operations cifs_file_nobrl_ops = {
970 .read = new_sync_read,
971 .write = new_sync_write,
972 .read_iter = cifs_loose_read_iter, 964 .read_iter = cifs_loose_read_iter,
973 .write_iter = cifs_file_write_iter, 965 .write_iter = cifs_file_write_iter,
974 .open = cifs_open, 966 .open = cifs_open,
@@ -986,8 +978,6 @@ const struct file_operations cifs_file_nobrl_ops = {
986}; 978};
987 979
988const struct file_operations cifs_file_strict_nobrl_ops = { 980const struct file_operations cifs_file_strict_nobrl_ops = {
989 .read = new_sync_read,
990 .write = new_sync_write,
991 .read_iter = cifs_strict_readv, 981 .read_iter = cifs_strict_readv,
992 .write_iter = cifs_strict_writev, 982 .write_iter = cifs_strict_writev,
993 .open = cifs_open, 983 .open = cifs_open,
@@ -1006,8 +996,6 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
1006 996
1007const struct file_operations cifs_file_direct_nobrl_ops = { 997const struct file_operations cifs_file_direct_nobrl_ops = {
1008 /* BB reevaluate whether they can be done with directio, no cache */ 998 /* BB reevaluate whether they can be done with directio, no cache */
1009 .read = new_sync_read,
1010 .write = new_sync_write,
1011 .read_iter = cifs_user_readv, 999 .read_iter = cifs_user_readv,
1012 .write_iter = cifs_user_writev, 1000 .write_iter = cifs_user_writev,
1013 .open = cifs_open, 1001 .open = cifs_open,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 480cf9c81d50..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
773 773
774 length = atomic_dec_return(&tcpSesAllocCount); 774 length = atomic_dec_return(&tcpSesAllocCount);
775 if (length > 0) 775 if (length > 0)
776 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 776 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
777 GFP_KERNEL);
778} 777}
779 778
780static int 779static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
848 847
849 length = atomic_inc_return(&tcpSesAllocCount); 848 length = atomic_inc_return(&tcpSesAllocCount);
850 if (length > 1) 849 if (length > 1)
851 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 850 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
852 GFP_KERNEL);
853 851
854 set_freezable(); 852 set_freezable();
855 while (server->tcpStatus != CifsExiting) { 853 while (server->tcpStatus != CifsExiting) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ca30c391a894..ca2bc5406306 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2560,10 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2560 return rc; 2560 return rc;
2561} 2561}
2562 2562
2563static ssize_t 2563ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2564cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2565{ 2564{
2566 size_t len; 2565 struct file *file = iocb->ki_filp;
2567 ssize_t total_written = 0; 2566 ssize_t total_written = 0;
2568 struct cifsFileInfo *open_file; 2567 struct cifsFileInfo *open_file;
2569 struct cifs_tcon *tcon; 2568 struct cifs_tcon *tcon;
@@ -2573,15 +2572,15 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2573 struct iov_iter saved_from; 2572 struct iov_iter saved_from;
2574 int rc; 2573 int rc;
2575 2574
2576 len = iov_iter_count(from); 2575 /*
2577 rc = generic_write_checks(file, poffset, &len, 0); 2576 * BB - optimize the way when signing is disabled. We can drop this
2578 if (rc) 2577 * extra memory-to-memory copying and use iovec buffers for constructing
2579 return rc; 2578 * write request.
2580 2579 */
2581 if (!len)
2582 return 0;
2583 2580
2584 iov_iter_truncate(from, len); 2581 rc = generic_write_checks(iocb, from);
2582 if (rc <= 0)
2583 return rc;
2585 2584
2586 INIT_LIST_HEAD(&wdata_list); 2585 INIT_LIST_HEAD(&wdata_list);
2587 cifs_sb = CIFS_FILE_SB(file); 2586 cifs_sb = CIFS_FILE_SB(file);
@@ -2593,8 +2592,8 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2593 2592
2594 memcpy(&saved_from, from, sizeof(struct iov_iter)); 2593 memcpy(&saved_from, from, sizeof(struct iov_iter));
2595 2594
2596 rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, 2595 rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2597 &wdata_list); 2596 open_file, cifs_sb, &wdata_list);
2598 2597
2599 /* 2598 /*
2600 * If at least one write was successfully sent, then discard any rc 2599 * If at least one write was successfully sent, then discard any rc
@@ -2633,7 +2632,7 @@ restart_loop:
2633 memcpy(&tmp_from, &saved_from, 2632 memcpy(&tmp_from, &saved_from,
2634 sizeof(struct iov_iter)); 2633 sizeof(struct iov_iter));
2635 iov_iter_advance(&tmp_from, 2634 iov_iter_advance(&tmp_from,
2636 wdata->offset - *poffset); 2635 wdata->offset - iocb->ki_pos);
2637 2636
2638 rc = cifs_write_from_iter(wdata->offset, 2637 rc = cifs_write_from_iter(wdata->offset,
2639 wdata->bytes, &tmp_from, 2638 wdata->bytes, &tmp_from,
@@ -2650,34 +2649,13 @@ restart_loop:
2650 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2649 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2651 } 2650 }
2652 2651
2653 if (total_written > 0) 2652 if (unlikely(!total_written))
2654 *poffset += total_written; 2653 return rc;
2655 2654
2655 iocb->ki_pos += total_written;
2656 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2656 cifs_stats_bytes_written(tcon, total_written); 2657 cifs_stats_bytes_written(tcon, total_written);
2657 return total_written ? total_written : (ssize_t)rc; 2658 return total_written;
2658}
2659
2660ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2661{
2662 ssize_t written;
2663 struct inode *inode;
2664 loff_t pos = iocb->ki_pos;
2665
2666 inode = file_inode(iocb->ki_filp);
2667
2668 /*
2669 * BB - optimize the way when signing is disabled. We can drop this
2670 * extra memory-to-memory copying and use iovec buffers for constructing
2671 * write request.
2672 */
2673
2674 written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2675 if (written > 0) {
2676 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2677 iocb->ki_pos = pos;
2678 }
2679
2680 return written;
2681} 2659}
2682 2660
2683static ssize_t 2661static ssize_t
@@ -2688,8 +2666,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2688 struct inode *inode = file->f_mapping->host; 2666 struct inode *inode = file->f_mapping->host;
2689 struct cifsInodeInfo *cinode = CIFS_I(inode); 2667 struct cifsInodeInfo *cinode = CIFS_I(inode);
2690 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 2668 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2691 ssize_t rc = -EACCES; 2669 ssize_t rc;
2692 loff_t lock_pos = iocb->ki_pos;
2693 2670
2694 /* 2671 /*
2695 * We need to hold the sem to be sure nobody modifies lock list 2672 * We need to hold the sem to be sure nobody modifies lock list
@@ -2697,23 +2674,24 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2697 */ 2674 */
2698 down_read(&cinode->lock_sem); 2675 down_read(&cinode->lock_sem);
2699 mutex_lock(&inode->i_mutex); 2676 mutex_lock(&inode->i_mutex);
2700 if (file->f_flags & O_APPEND) 2677
2701 lock_pos = i_size_read(inode); 2678 rc = generic_write_checks(iocb, from);
2702 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from), 2679 if (rc <= 0)
2680 goto out;
2681
2682 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2703 server->vals->exclusive_lock_type, NULL, 2683 server->vals->exclusive_lock_type, NULL,
2704 CIFS_WRITE_OP)) { 2684 CIFS_WRITE_OP))
2705 rc = __generic_file_write_iter(iocb, from); 2685 rc = __generic_file_write_iter(iocb, from);
2706 mutex_unlock(&inode->i_mutex); 2686 else
2707 2687 rc = -EACCES;
2708 if (rc > 0) { 2688out:
2709 ssize_t err; 2689 mutex_unlock(&inode->i_mutex);
2710 2690
2711 err = generic_write_sync(file, iocb->ki_pos - rc, rc); 2691 if (rc > 0) {
2712 if (err < 0) 2692 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2713 rc = err; 2693 if (err < 0)
2714 } 2694 rc = err;
2715 } else {
2716 mutex_unlock(&inode->i_mutex);
2717 } 2695 }
2718 up_read(&cinode->lock_sem); 2696 up_read(&cinode->lock_sem);
2719 return rc; 2697 return rc;
@@ -3877,8 +3855,7 @@ void cifs_oplock_break(struct work_struct *work)
3877 * Direct IO is not yet supported in the cached mode. 3855 * Direct IO is not yet supported in the cached mode.
3878 */ 3856 */
3879static ssize_t 3857static ssize_t
3880cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, 3858cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3881 loff_t pos)
3882{ 3859{
3883 /* 3860 /*
3884 * FIXME 3861 * FIXME
diff --git a/fs/coda/file.c b/fs/coda/file.c
index d244d743a232..1da3805f3ddc 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -27,19 +27,14 @@
27#include "coda_int.h" 27#include "coda_int.h"
28 28
29static ssize_t 29static ssize_t
30coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *ppos) 30coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
31{ 31{
32 struct coda_file_info *cfi; 32 struct file *coda_file = iocb->ki_filp;
33 struct file *host_file; 33 struct coda_file_info *cfi = CODA_FTOC(coda_file);
34 34
35 cfi = CODA_FTOC(coda_file);
36 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 35 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
37 host_file = cfi->cfi_container;
38 36
39 if (!host_file->f_op->read) 37 return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
40 return -EINVAL;
41
42 return host_file->f_op->read(host_file, buf, count, ppos);
43} 38}
44 39
45static ssize_t 40static ssize_t
@@ -64,32 +59,25 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
64} 59}
65 60
66static ssize_t 61static ssize_t
67coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos) 62coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
68{ 63{
69 struct inode *host_inode, *coda_inode = file_inode(coda_file); 64 struct file *coda_file = iocb->ki_filp;
70 struct coda_file_info *cfi; 65 struct inode *coda_inode = file_inode(coda_file);
66 struct coda_file_info *cfi = CODA_FTOC(coda_file);
71 struct file *host_file; 67 struct file *host_file;
72 ssize_t ret; 68 ssize_t ret;
73 69
74 cfi = CODA_FTOC(coda_file);
75 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 70 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
76 host_file = cfi->cfi_container;
77
78 if (!host_file->f_op->write)
79 return -EINVAL;
80 71
81 host_inode = file_inode(host_file); 72 host_file = cfi->cfi_container;
82 file_start_write(host_file); 73 file_start_write(host_file);
83 mutex_lock(&coda_inode->i_mutex); 74 mutex_lock(&coda_inode->i_mutex);
84 75 ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
85 ret = host_file->f_op->write(host_file, buf, count, ppos); 76 coda_inode->i_size = file_inode(host_file)->i_size;
86
87 coda_inode->i_size = host_inode->i_size;
88 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; 77 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
89 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; 78 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
90 mutex_unlock(&coda_inode->i_mutex); 79 mutex_unlock(&coda_inode->i_mutex);
91 file_end_write(host_file); 80 file_end_write(host_file);
92
93 return ret; 81 return ret;
94} 82}
95 83
@@ -231,8 +219,8 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
231 219
232const struct file_operations coda_file_operations = { 220const struct file_operations coda_file_operations = {
233 .llseek = generic_file_llseek, 221 .llseek = generic_file_llseek,
234 .read = coda_file_read, 222 .read_iter = coda_file_read_iter,
235 .write = coda_file_write, 223 .write_iter = coda_file_write_iter,
236 .mmap = coda_file_mmap, 224 .mmap = coda_file_mmap,
237 .open = coda_open, 225 .open = coda_open,
238 .release = coda_release, 226 .release = coda_release,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index afec6450450f..6b8e2f091f5b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -570,6 +570,7 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
570#define BNEPCONNDEL _IOW('B', 201, int) 570#define BNEPCONNDEL _IOW('B', 201, int)
571#define BNEPGETCONNLIST _IOR('B', 210, int) 571#define BNEPGETCONNLIST _IOR('B', 210, int)
572#define BNEPGETCONNINFO _IOR('B', 211, int) 572#define BNEPGETCONNINFO _IOR('B', 211, int)
573#define BNEPGETSUPPFEAT _IOR('B', 212, int)
573 574
574#define CMTPCONNADD _IOW('C', 200, int) 575#define CMTPCONNADD _IOW('C', 200, int)
575#define CMTPCONNDEL _IOW('C', 201, int) 576#define CMTPCONNDEL _IOW('C', 201, int)
@@ -1247,6 +1248,7 @@ COMPATIBLE_IOCTL(BNEPCONNADD)
1247COMPATIBLE_IOCTL(BNEPCONNDEL) 1248COMPATIBLE_IOCTL(BNEPCONNDEL)
1248COMPATIBLE_IOCTL(BNEPGETCONNLIST) 1249COMPATIBLE_IOCTL(BNEPGETCONNLIST)
1249COMPATIBLE_IOCTL(BNEPGETCONNINFO) 1250COMPATIBLE_IOCTL(BNEPGETCONNINFO)
1251COMPATIBLE_IOCTL(BNEPGETSUPPFEAT)
1250COMPATIBLE_IOCTL(CMTPCONNADD) 1252COMPATIBLE_IOCTL(CMTPCONNADD)
1251COMPATIBLE_IOCTL(CMTPCONNDEL) 1253COMPATIBLE_IOCTL(CMTPCONNDEL)
1252COMPATIBLE_IOCTL(CMTPGETCONNLIST) 1254COMPATIBLE_IOCTL(CMTPGETCONNLIST)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cf0db005d2f5..acb3d63bc9dc 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
1598 if (offset >= 0) 1598 if (offset >= 0)
1599 break; 1599 break;
1600 default: 1600 default:
1601 mutex_unlock(&file_inode(file)->i_mutex); 1601 mutex_unlock(&dentry->d_inode->i_mutex);
1602 return -EINVAL; 1602 return -EINVAL;
1603 } 1603 }
1604 if (offset != file->f_pos) { 1604 if (offset != file->f_pos) {
diff --git a/fs/coredump.c b/fs/coredump.c
index f319926ddf8c..bbbe139ab280 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -657,7 +657,7 @@ void do_coredump(const siginfo_t *siginfo)
657 */ 657 */
658 if (!uid_eq(inode->i_uid, current_fsuid())) 658 if (!uid_eq(inode->i_uid, current_fsuid()))
659 goto close_fail; 659 goto close_fail;
660 if (!cprm.file->f_op->write) 660 if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
661 goto close_fail; 661 goto close_fail;
662 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) 662 if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
663 goto close_fail; 663 goto close_fail;
diff --git a/fs/dax.c b/fs/dax.c
index ed1619ec6537..0bb0aecb556c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -98,9 +98,9 @@ static bool buffer_size_valid(struct buffer_head *bh)
98 return bh->b_state != 0; 98 return bh->b_state != 0;
99} 99}
100 100
101static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, 101static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
102 loff_t start, loff_t end, get_block_t get_block, 102 loff_t start, loff_t end, get_block_t get_block,
103 struct buffer_head *bh) 103 struct buffer_head *bh)
104{ 104{
105 ssize_t retval = 0; 105 ssize_t retval = 0;
106 loff_t pos = start; 106 loff_t pos = start;
@@ -109,7 +109,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
109 void *addr; 109 void *addr;
110 bool hole = false; 110 bool hole = false;
111 111
112 if (rw != WRITE) 112 if (iov_iter_rw(iter) != WRITE)
113 end = min(end, i_size_read(inode)); 113 end = min(end, i_size_read(inode));
114 114
115 while (pos < end) { 115 while (pos < end) {
@@ -124,7 +124,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
124 bh->b_size = PAGE_ALIGN(end - pos); 124 bh->b_size = PAGE_ALIGN(end - pos);
125 bh->b_state = 0; 125 bh->b_state = 0;
126 retval = get_block(inode, block, bh, 126 retval = get_block(inode, block, bh,
127 rw == WRITE); 127 iov_iter_rw(iter) == WRITE);
128 if (retval) 128 if (retval)
129 break; 129 break;
130 if (!buffer_size_valid(bh)) 130 if (!buffer_size_valid(bh))
@@ -137,7 +137,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
137 bh->b_size -= done; 137 bh->b_size -= done;
138 } 138 }
139 139
140 hole = (rw != WRITE) && !buffer_written(bh); 140 hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh);
141 if (hole) { 141 if (hole) {
142 addr = NULL; 142 addr = NULL;
143 size = bh->b_size - first; 143 size = bh->b_size - first;
@@ -154,7 +154,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
154 max = min(pos + size, end); 154 max = min(pos + size, end);
155 } 155 }
156 156
157 if (rw == WRITE) 157 if (iov_iter_rw(iter) == WRITE)
158 len = copy_from_iter(addr, max - pos, iter); 158 len = copy_from_iter(addr, max - pos, iter);
159 else if (!hole) 159 else if (!hole)
160 len = copy_to_iter(addr, max - pos, iter); 160 len = copy_to_iter(addr, max - pos, iter);
@@ -173,7 +173,6 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
173 173
174/** 174/**
175 * dax_do_io - Perform I/O to a DAX file 175 * dax_do_io - Perform I/O to a DAX file
176 * @rw: READ to read or WRITE to write
177 * @iocb: The control block for this I/O 176 * @iocb: The control block for this I/O
178 * @inode: The file which the I/O is directed at 177 * @inode: The file which the I/O is directed at
179 * @iter: The addresses to do I/O from or to 178 * @iter: The addresses to do I/O from or to
@@ -189,9 +188,9 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
189 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O 188 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
190 * is in progress. 189 * is in progress.
191 */ 190 */
192ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, 191ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
193 struct iov_iter *iter, loff_t pos, 192 struct iov_iter *iter, loff_t pos, get_block_t get_block,
194 get_block_t get_block, dio_iodone_t end_io, int flags) 193 dio_iodone_t end_io, int flags)
195{ 194{
196 struct buffer_head bh; 195 struct buffer_head bh;
197 ssize_t retval = -EINVAL; 196 ssize_t retval = -EINVAL;
@@ -199,7 +198,7 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
199 198
200 memset(&bh, 0, sizeof(bh)); 199 memset(&bh, 0, sizeof(bh));
201 200
202 if ((flags & DIO_LOCKING) && (rw == READ)) { 201 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
203 struct address_space *mapping = inode->i_mapping; 202 struct address_space *mapping = inode->i_mapping;
204 mutex_lock(&inode->i_mutex); 203 mutex_lock(&inode->i_mutex);
205 retval = filemap_write_and_wait_range(mapping, pos, end - 1); 204 retval = filemap_write_and_wait_range(mapping, pos, end - 1);
@@ -212,9 +211,9 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
212 /* Protects against truncate */ 211 /* Protects against truncate */
213 atomic_inc(&inode->i_dio_count); 212 atomic_inc(&inode->i_dio_count);
214 213
215 retval = dax_io(rw, inode, iter, pos, end, get_block, &bh); 214 retval = dax_io(inode, iter, pos, end, get_block, &bh);
216 215
217 if ((flags & DIO_LOCKING) && (rw == READ)) 216 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
218 mutex_unlock(&inode->i_mutex); 217 mutex_unlock(&inode->i_mutex);
219 218
220 if ((retval > 0) && end_io) 219 if ((retval > 0) && end_io)
@@ -464,6 +463,23 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
464EXPORT_SYMBOL_GPL(dax_fault); 463EXPORT_SYMBOL_GPL(dax_fault);
465 464
466/** 465/**
466 * dax_pfn_mkwrite - handle first write to DAX page
467 * @vma: The virtual memory area where the fault occurred
468 * @vmf: The description of the fault
469 *
470 */
471int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
472{
473 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
474
475 sb_start_pagefault(sb);
476 file_update_time(vma->vm_file);
477 sb_end_pagefault(sb);
478 return VM_FAULT_NOPAGE;
479}
480EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
481
482/**
467 * dax_zero_page_range - zero a range within a page of a DAX file 483 * dax_zero_page_range - zero a range within a page of a DAX file
468 * @inode: The file being truncated 484 * @inode: The file being truncated
469 * @from: The file offset that is being truncated to 485 * @from: The file offset that is being truncated to
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e3732e53b..656ce522a218 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,6 +269,41 @@ static inline int dname_external(const struct dentry *dentry)
269 return dentry->d_name.name != dentry->d_iname; 269 return dentry->d_name.name != dentry->d_iname;
270} 270}
271 271
272/*
273 * Make sure other CPUs see the inode attached before the type is set.
274 */
275static inline void __d_set_inode_and_type(struct dentry *dentry,
276 struct inode *inode,
277 unsigned type_flags)
278{
279 unsigned flags;
280
281 dentry->d_inode = inode;
282 smp_wmb();
283 flags = READ_ONCE(dentry->d_flags);
284 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
285 flags |= type_flags;
286 WRITE_ONCE(dentry->d_flags, flags);
287}
288
289/*
290 * Ideally, we want to make sure that other CPUs see the flags cleared before
291 * the inode is detached, but this is really a violation of RCU principles
292 * since the ordering suggests we should always set inode before flags.
293 *
294 * We should instead replace or discard the entire dentry - but that sucks
295 * performancewise on mass deletion/rename.
296 */
297static inline void __d_clear_type_and_inode(struct dentry *dentry)
298{
299 unsigned flags = READ_ONCE(dentry->d_flags);
300
301 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
302 WRITE_ONCE(dentry->d_flags, flags);
303 smp_wmb();
304 dentry->d_inode = NULL;
305}
306
272static void dentry_free(struct dentry *dentry) 307static void dentry_free(struct dentry *dentry)
273{ 308{
274 WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); 309 WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
@@ -311,7 +346,7 @@ static void dentry_iput(struct dentry * dentry)
311{ 346{
312 struct inode *inode = dentry->d_inode; 347 struct inode *inode = dentry->d_inode;
313 if (inode) { 348 if (inode) {
314 dentry->d_inode = NULL; 349 __d_clear_type_and_inode(dentry);
315 hlist_del_init(&dentry->d_u.d_alias); 350 hlist_del_init(&dentry->d_u.d_alias);
316 spin_unlock(&dentry->d_lock); 351 spin_unlock(&dentry->d_lock);
317 spin_unlock(&inode->i_lock); 352 spin_unlock(&inode->i_lock);
@@ -335,8 +370,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
335 __releases(dentry->d_inode->i_lock) 370 __releases(dentry->d_inode->i_lock)
336{ 371{
337 struct inode *inode = dentry->d_inode; 372 struct inode *inode = dentry->d_inode;
338 __d_clear_type(dentry); 373 __d_clear_type_and_inode(dentry);
339 dentry->d_inode = NULL;
340 hlist_del_init(&dentry->d_u.d_alias); 374 hlist_del_init(&dentry->d_u.d_alias);
341 dentry_rcuwalk_barrier(dentry); 375 dentry_rcuwalk_barrier(dentry);
342 spin_unlock(&dentry->d_lock); 376 spin_unlock(&dentry->d_lock);
@@ -1715,11 +1749,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1715 unsigned add_flags = d_flags_for_inode(inode); 1749 unsigned add_flags = d_flags_for_inode(inode);
1716 1750
1717 spin_lock(&dentry->d_lock); 1751 spin_lock(&dentry->d_lock);
1718 dentry->d_flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
1719 dentry->d_flags |= add_flags;
1720 if (inode) 1752 if (inode)
1721 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1753 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1722 dentry->d_inode = inode; 1754 __d_set_inode_and_type(dentry, inode, add_flags);
1723 dentry_rcuwalk_barrier(dentry); 1755 dentry_rcuwalk_barrier(dentry);
1724 spin_unlock(&dentry->d_lock); 1756 spin_unlock(&dentry->d_lock);
1725 fsnotify_d_instantiate(dentry, inode); 1757 fsnotify_d_instantiate(dentry, inode);
@@ -1937,8 +1969,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1937 add_flags |= DCACHE_DISCONNECTED; 1969 add_flags |= DCACHE_DISCONNECTED;
1938 1970
1939 spin_lock(&tmp->d_lock); 1971 spin_lock(&tmp->d_lock);
1940 tmp->d_inode = inode; 1972 __d_set_inode_and_type(tmp, inode, add_flags);
1941 tmp->d_flags |= add_flags;
1942 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); 1973 hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
1943 hlist_bl_lock(&tmp->d_sb->s_anon); 1974 hlist_bl_lock(&tmp->d_sb->s_anon);
1944 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1975 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2690,7 +2721,7 @@ static int __d_unalias(struct inode *inode,
2690 struct dentry *dentry, struct dentry *alias) 2721 struct dentry *dentry, struct dentry *alias)
2691{ 2722{
2692 struct mutex *m1 = NULL, *m2 = NULL; 2723 struct mutex *m1 = NULL, *m2 = NULL;
2693 int ret = -EBUSY; 2724 int ret = -ESTALE;
2694 2725
2695 /* If alias and dentry share a parent, then no extra locks required */ 2726 /* If alias and dentry share a parent, then no extra locks required */
2696 if (alias->d_parent == dentry->d_parent) 2727 if (alias->d_parent == dentry->d_parent)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 96400ab42d13..c9ee0dfe90b5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
254 254
255 pr_debug("debugfs: creating file '%s'\n",name); 255 pr_debug("debugfs: creating file '%s'\n",name);
256 256
257 if (IS_ERR(parent))
258 return parent;
259
257 error = simple_pin_fs(&debug_fs_type, &debugfs_mount, 260 error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
258 &debugfs_mount_count); 261 &debugfs_mount_count);
259 if (error) 262 if (error)
@@ -521,7 +524,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
521 524
522 if (debugfs_positive(dentry)) { 525 if (debugfs_positive(dentry)) {
523 dget(dentry); 526 dget(dentry);
524 if (S_ISDIR(dentry->d_inode->i_mode)) 527 if (d_is_dir(dentry))
525 ret = simple_rmdir(parent->d_inode, dentry); 528 ret = simple_rmdir(parent->d_inode, dentry);
526 else 529 else
527 simple_unlink(parent->d_inode, dentry); 530 simple_unlink(parent->d_inode, dentry);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b2e297..c3b560b24a46 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h> 39#include <linux/prefetch.h>
40#include <linux/aio.h>
41 40
42/* 41/*
43 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
265 ret = err; 264 ret = err;
266 } 265 }
267 266
268 aio_complete(dio->iocb, ret, 0); 267 dio->iocb->ki_complete(dio->iocb, ret, 0);
269 } 268 }
270 269
271 kmem_cache_free(dio_cache, dio); 270 kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
1056 * operation. AIO can if it was a broken operation described above or 1055 * operation. AIO can if it was a broken operation described above or
1057 * in fact if all the bios race to complete before we get here. In 1056 * in fact if all the bios race to complete before we get here. In
1058 * that case dio_complete() translates the EIOCBQUEUED into the proper 1057 * that case dio_complete() translates the EIOCBQUEUED into the proper
1059 * return code that the caller will hand to aio_complete(). 1058 * return code that the caller will hand to ->complete().
1060 * 1059 *
1061 * This is managed by the bio_lock instead of being an atomic_t so that 1060 * This is managed by the bio_lock instead of being an atomic_t so that
1062 * completion paths can drop their ref and use the remaining count to 1061 * completion paths can drop their ref and use the remaining count to
@@ -1094,10 +1093,10 @@ static inline int drop_refcount(struct dio *dio)
1094 * for the whole file. 1093 * for the whole file.
1095 */ 1094 */
1096static inline ssize_t 1095static inline ssize_t
1097do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1096do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1098 struct block_device *bdev, struct iov_iter *iter, loff_t offset, 1097 struct block_device *bdev, struct iov_iter *iter,
1099 get_block_t get_block, dio_iodone_t end_io, 1098 loff_t offset, get_block_t get_block, dio_iodone_t end_io,
1100 dio_submit_t submit_io, int flags) 1099 dio_submit_t submit_io, int flags)
1101{ 1100{
1102 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); 1101 unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
1103 unsigned blkbits = i_blkbits; 1102 unsigned blkbits = i_blkbits;
@@ -1111,9 +1110,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1111 struct blk_plug plug; 1110 struct blk_plug plug;
1112 unsigned long align = offset | iov_iter_alignment(iter); 1111 unsigned long align = offset | iov_iter_alignment(iter);
1113 1112
1114 if (rw & WRITE)
1115 rw = WRITE_ODIRECT;
1116
1117 /* 1113 /*
1118 * Avoid references to bdev if not absolutely needed to give 1114 * Avoid references to bdev if not absolutely needed to give
1119 * the early prefetch in the caller enough time. 1115 * the early prefetch in the caller enough time.
@@ -1128,7 +1124,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1128 } 1124 }
1129 1125
1130 /* watch out for a 0 len io from a tricksy fs */ 1126 /* watch out for a 0 len io from a tricksy fs */
1131 if (rw == READ && !iov_iter_count(iter)) 1127 if (iov_iter_rw(iter) == READ && !iov_iter_count(iter))
1132 return 0; 1128 return 0;
1133 1129
1134 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); 1130 dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1144,7 +1140,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1144 1140
1145 dio->flags = flags; 1141 dio->flags = flags;
1146 if (dio->flags & DIO_LOCKING) { 1142 if (dio->flags & DIO_LOCKING) {
1147 if (rw == READ) { 1143 if (iov_iter_rw(iter) == READ) {
1148 struct address_space *mapping = 1144 struct address_space *mapping =
1149 iocb->ki_filp->f_mapping; 1145 iocb->ki_filp->f_mapping;
1150 1146
@@ -1170,19 +1166,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1170 if (is_sync_kiocb(iocb)) 1166 if (is_sync_kiocb(iocb))
1171 dio->is_async = false; 1167 dio->is_async = false;
1172 else if (!(dio->flags & DIO_ASYNC_EXTEND) && 1168 else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
1173 (rw & WRITE) && end > i_size_read(inode)) 1169 iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
1174 dio->is_async = false; 1170 dio->is_async = false;
1175 else 1171 else
1176 dio->is_async = true; 1172 dio->is_async = true;
1177 1173
1178 dio->inode = inode; 1174 dio->inode = inode;
1179 dio->rw = rw; 1175 dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ;
1180 1176
1181 /* 1177 /*
1182 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue 1178 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
1183 * so that we can call ->fsync. 1179 * so that we can call ->fsync.
1184 */ 1180 */
1185 if (dio->is_async && (rw & WRITE) && 1181 if (dio->is_async && iov_iter_rw(iter) == WRITE &&
1186 ((iocb->ki_filp->f_flags & O_DSYNC) || 1182 ((iocb->ki_filp->f_flags & O_DSYNC) ||
1187 IS_SYNC(iocb->ki_filp->f_mapping->host))) { 1183 IS_SYNC(iocb->ki_filp->f_mapping->host))) {
1188 retval = dio_set_defer_completion(dio); 1184 retval = dio_set_defer_completion(dio);
@@ -1275,7 +1271,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1275 * we can let i_mutex go now that its achieved its purpose 1271 * we can let i_mutex go now that its achieved its purpose
1276 * of protecting us from looking up uninitialized blocks. 1272 * of protecting us from looking up uninitialized blocks.
1277 */ 1273 */
1278 if (rw == READ && (dio->flags & DIO_LOCKING)) 1274 if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
1279 mutex_unlock(&dio->inode->i_mutex); 1275 mutex_unlock(&dio->inode->i_mutex);
1280 1276
1281 /* 1277 /*
@@ -1287,7 +1283,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1287 */ 1283 */
1288 BUG_ON(retval == -EIOCBQUEUED); 1284 BUG_ON(retval == -EIOCBQUEUED);
1289 if (dio->is_async && retval == 0 && dio->result && 1285 if (dio->is_async && retval == 0 && dio->result &&
1290 (rw == READ || dio->result == count)) 1286 (iov_iter_rw(iter) == READ || dio->result == count))
1291 retval = -EIOCBQUEUED; 1287 retval = -EIOCBQUEUED;
1292 else 1288 else
1293 dio_await_completion(dio); 1289 dio_await_completion(dio);
@@ -1301,11 +1297,11 @@ out:
1301 return retval; 1297 return retval;
1302} 1298}
1303 1299
1304ssize_t 1300ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1305__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1301 struct block_device *bdev, struct iov_iter *iter,
1306 struct block_device *bdev, struct iov_iter *iter, loff_t offset, 1302 loff_t offset, get_block_t get_block,
1307 get_block_t get_block, dio_iodone_t end_io, 1303 dio_iodone_t end_io, dio_submit_t submit_io,
1308 dio_submit_t submit_io, int flags) 1304 int flags)
1309{ 1305{
1310 /* 1306 /*
1311 * The block device state is needed in the end to finally 1307 * The block device state is needed in the end to finally
@@ -1319,8 +1315,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1319 prefetch(bdev->bd_queue); 1315 prefetch(bdev->bd_queue);
1320 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); 1316 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1321 1317
1322 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, 1318 return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block,
1323 get_block, end_io, submit_io, flags); 1319 end_io, submit_io, flags);
1324} 1320}
1325 1321
1326EXPORT_SYMBOL(__blockdev_direct_IO); 1322EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index fd39bad6f1bd..a65786e26b05 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/compat.h> 32#include <linux/compat.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <linux/aio.h>
35#include "ecryptfs_kernel.h" 34#include "ecryptfs_kernel.h"
36 35
37/** 36/**
@@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
52 struct file *file = iocb->ki_filp; 51 struct file *file = iocb->ki_filp;
53 52
54 rc = generic_file_read_iter(iocb, to); 53 rc = generic_file_read_iter(iocb, to);
55 /*
56 * Even though this is a async interface, we need to wait
57 * for IO to finish to update atime
58 */
59 if (-EIOCBQUEUED == rc)
60 rc = wait_on_sync_kiocb(iocb);
61 if (rc >= 0) { 54 if (rc >= 0) {
62 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); 55 path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
63 touch_atime(path); 56 touch_atime(path);
@@ -365,9 +358,7 @@ const struct file_operations ecryptfs_dir_fops = {
365 358
366const struct file_operations ecryptfs_main_fops = { 359const struct file_operations ecryptfs_main_fops = {
367 .llseek = generic_file_llseek, 360 .llseek = generic_file_llseek,
368 .read = new_sync_read,
369 .read_iter = ecryptfs_read_update_atime, 361 .read_iter = ecryptfs_read_update_atime,
370 .write = new_sync_write,
371 .write_iter = generic_file_write_iter, 362 .write_iter = generic_file_write_iter,
372 .iterate = ecryptfs_readdir, 363 .iterate = ecryptfs_readdir,
373 .unlocked_ioctl = ecryptfs_unlocked_ioctl, 364 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
diff --git a/fs/exec.c b/fs/exec.c
index c7f9b733406d..49a1c61433b7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -926,10 +926,14 @@ static int de_thread(struct task_struct *tsk)
926 if (!thread_group_leader(tsk)) { 926 if (!thread_group_leader(tsk)) {
927 struct task_struct *leader = tsk->group_leader; 927 struct task_struct *leader = tsk->group_leader;
928 928
929 sig->notify_count = -1; /* for exit_notify() */
930 for (;;) { 929 for (;;) {
931 threadgroup_change_begin(tsk); 930 threadgroup_change_begin(tsk);
932 write_lock_irq(&tasklist_lock); 931 write_lock_irq(&tasklist_lock);
932 /*
933 * Do this under tasklist_lock to ensure that
934 * exit_notify() can't miss ->group_exit_task
935 */
936 sig->notify_count = -1;
933 if (likely(leader->exit_state)) 937 if (likely(leader->exit_state))
934 break; 938 break;
935 __set_current_state(TASK_KILLABLE); 939 __set_current_state(TASK_KILLABLE);
@@ -1078,7 +1082,13 @@ int flush_old_exec(struct linux_binprm * bprm)
1078 if (retval) 1082 if (retval)
1079 goto out; 1083 goto out;
1080 1084
1085 /*
1086 * Must be called _before_ exec_mmap() as bprm->mm is
1087 * not visibile until then. This also enables the update
1088 * to be lockless.
1089 */
1081 set_mm_exe_file(bprm->mm, bprm->file); 1090 set_mm_exe_file(bprm->mm, bprm->file);
1091
1082 /* 1092 /*
1083 * Release all of the old mmap stuff 1093 * Release all of the old mmap stuff
1084 */ 1094 */
@@ -1265,6 +1275,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
1265 spin_unlock(&p->fs->lock); 1275 spin_unlock(&p->fs->lock);
1266} 1276}
1267 1277
1278static void bprm_fill_uid(struct linux_binprm *bprm)
1279{
1280 struct inode *inode;
1281 unsigned int mode;
1282 kuid_t uid;
1283 kgid_t gid;
1284
1285 /* clear any previous set[ug]id data from a previous binary */
1286 bprm->cred->euid = current_euid();
1287 bprm->cred->egid = current_egid();
1288
1289 if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
1290 return;
1291
1292 if (task_no_new_privs(current))
1293 return;
1294
1295 inode = file_inode(bprm->file);
1296 mode = READ_ONCE(inode->i_mode);
1297 if (!(mode & (S_ISUID|S_ISGID)))
1298 return;
1299
1300 /* Be careful if suid/sgid is set */
1301 mutex_lock(&inode->i_mutex);
1302
1303 /* reload atomically mode/uid/gid now that lock held */
1304 mode = inode->i_mode;
1305 uid = inode->i_uid;
1306 gid = inode->i_gid;
1307 mutex_unlock(&inode->i_mutex);
1308
1309 /* We ignore suid/sgid if there are no mappings for them in the ns */
1310 if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
1311 !kgid_has_mapping(bprm->cred->user_ns, gid))
1312 return;
1313
1314 if (mode & S_ISUID) {
1315 bprm->per_clear |= PER_CLEAR_ON_SETID;
1316 bprm->cred->euid = uid;
1317 }
1318
1319 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1320 bprm->per_clear |= PER_CLEAR_ON_SETID;
1321 bprm->cred->egid = gid;
1322 }
1323}
1324
1268/* 1325/*
1269 * Fill the binprm structure from the inode. 1326 * Fill the binprm structure from the inode.
1270 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes 1327 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1273,36 +1330,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
1273 */ 1330 */
1274int prepare_binprm(struct linux_binprm *bprm) 1331int prepare_binprm(struct linux_binprm *bprm)
1275{ 1332{
1276 struct inode *inode = file_inode(bprm->file);
1277 umode_t mode = inode->i_mode;
1278 int retval; 1333 int retval;
1279 1334
1280 1335 bprm_fill_uid(bprm);
1281 /* clear any previous set[ug]id data from a previous binary */
1282 bprm->cred->euid = current_euid();
1283 bprm->cred->egid = current_egid();
1284
1285 if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
1286 !task_no_new_privs(current) &&
1287 kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
1288 kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
1289 /* Set-uid? */
1290 if (mode & S_ISUID) {
1291 bprm->per_clear |= PER_CLEAR_ON_SETID;
1292 bprm->cred->euid = inode->i_uid;
1293 }
1294
1295 /* Set-gid? */
1296 /*
1297 * If setgid is set but no group execute bit then this
1298 * is a candidate for mandatory locking, not a setgid
1299 * executable.
1300 */
1301 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1302 bprm->per_clear |= PER_CLEAR_ON_SETID;
1303 bprm->cred->egid = inode->i_gid;
1304 }
1305 }
1306 1336
1307 /* fill in binprm security blob */ 1337 /* fill in binprm security blob */
1308 retval = security_bprm_set_creds(bprm); 1338 retval = security_bprm_set_creds(bprm);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 1a376b42d305..906de66e8e7e 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -67,8 +67,6 @@ static int exofs_flush(struct file *file, fl_owner_t id)
67 67
68const struct file_operations exofs_file_operations = { 68const struct file_operations exofs_file_operations = {
69 .llseek = generic_file_llseek, 69 .llseek = generic_file_llseek,
70 .read = new_sync_read,
71 .write = new_sync_write,
72 .read_iter = generic_file_read_iter, 70 .read_iter = generic_file_read_iter,
73 .write_iter = generic_file_write_iter, 71 .write_iter = generic_file_write_iter,
74 .mmap = generic_file_mmap, 72 .mmap = generic_file_mmap,
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a198e94813fe..35073aaec6e0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -963,8 +963,8 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
963 963
964 964
965 /* TODO: Should be easy enough to do proprly */ 965 /* TODO: Should be easy enough to do proprly */
966static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, 966static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
967 struct iov_iter *iter, loff_t offset) 967 loff_t offset)
968{ 968{
969 return 0; 969 return 0;
970} 970}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 678f9ab08c48..8d15febd0aa3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -793,7 +793,6 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
793 int datasync); 793 int datasync);
794extern const struct inode_operations ext2_file_inode_operations; 794extern const struct inode_operations ext2_file_inode_operations;
795extern const struct file_operations ext2_file_operations; 795extern const struct file_operations ext2_file_operations;
796extern const struct file_operations ext2_dax_file_operations;
797 796
798/* inode.c */ 797/* inode.c */
799extern const struct address_space_operations ext2_aops; 798extern const struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index e31701713516..3a0a6c6406d0 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,6 +39,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
39static const struct vm_operations_struct ext2_dax_vm_ops = { 39static const struct vm_operations_struct ext2_dax_vm_ops = {
40 .fault = ext2_dax_fault, 40 .fault = ext2_dax_fault,
41 .page_mkwrite = ext2_dax_mkwrite, 41 .page_mkwrite = ext2_dax_mkwrite,
42 .pfn_mkwrite = dax_pfn_mkwrite,
42}; 43};
43 44
44static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) 45static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -92,8 +93,6 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
92 */ 93 */
93const struct file_operations ext2_file_operations = { 94const struct file_operations ext2_file_operations = {
94 .llseek = generic_file_llseek, 95 .llseek = generic_file_llseek,
95 .read = new_sync_read,
96 .write = new_sync_write,
97 .read_iter = generic_file_read_iter, 96 .read_iter = generic_file_read_iter,
98 .write_iter = generic_file_write_iter, 97 .write_iter = generic_file_write_iter,
99 .unlocked_ioctl = ext2_ioctl, 98 .unlocked_ioctl = ext2_ioctl,
@@ -108,24 +107,6 @@ const struct file_operations ext2_file_operations = {
108 .splice_write = iter_file_splice_write, 107 .splice_write = iter_file_splice_write,
109}; 108};
110 109
111#ifdef CONFIG_FS_DAX
112const struct file_operations ext2_dax_file_operations = {
113 .llseek = generic_file_llseek,
114 .read = new_sync_read,
115 .write = new_sync_write,
116 .read_iter = generic_file_read_iter,
117 .write_iter = generic_file_write_iter,
118 .unlocked_ioctl = ext2_ioctl,
119#ifdef CONFIG_COMPAT
120 .compat_ioctl = ext2_compat_ioctl,
121#endif
122 .mmap = ext2_file_mmap,
123 .open = dquot_file_open,
124 .release = ext2_release_file,
125 .fsync = ext2_fsync,
126};
127#endif
128
129const struct inode_operations ext2_file_inode_operations = { 110const struct inode_operations ext2_file_inode_operations = {
130#ifdef CONFIG_EXT2_FS_XATTR 111#ifdef CONFIG_EXT2_FS_XATTR
131 .setxattr = generic_setxattr, 112 .setxattr = generic_setxattr,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc000125..5d9213963fae 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
31#include <linux/mpage.h> 31#include <linux/mpage.h>
32#include <linux/fiemap.h> 32#include <linux/fiemap.h>
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/aio.h> 34#include <linux/uio.h>
35#include "ext2.h" 35#include "ext2.h"
36#include "acl.h" 36#include "acl.h"
37#include "xattr.h" 37#include "xattr.h"
@@ -851,8 +851,7 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
851} 851}
852 852
853static ssize_t 853static ssize_t
854ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 854ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
855 loff_t offset)
856{ 855{
857 struct file *file = iocb->ki_filp; 856 struct file *file = iocb->ki_filp;
858 struct address_space *mapping = file->f_mapping; 857 struct address_space *mapping = file->f_mapping;
@@ -861,12 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
861 ssize_t ret; 860 ssize_t ret;
862 861
863 if (IS_DAX(inode)) 862 if (IS_DAX(inode))
864 ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, 863 ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
865 NULL, DIO_LOCKING); 864 DIO_LOCKING);
866 else 865 else
867 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 866 ret = blockdev_direct_IO(iocb, inode, iter, offset,
868 ext2_get_block); 867 ext2_get_block);
869 if (ret < 0 && (rw & WRITE)) 868 if (ret < 0 && iov_iter_rw(iter) == WRITE)
870 ext2_write_failed(mapping, offset + count); 869 ext2_write_failed(mapping, offset + count);
871 return ret; 870 return ret;
872} 871}
@@ -1388,10 +1387,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1388 1387
1389 if (S_ISREG(inode->i_mode)) { 1388 if (S_ISREG(inode->i_mode)) {
1390 inode->i_op = &ext2_file_inode_operations; 1389 inode->i_op = &ext2_file_inode_operations;
1391 if (test_opt(inode->i_sb, DAX)) { 1390 if (test_opt(inode->i_sb, NOBH)) {
1392 inode->i_mapping->a_ops = &ext2_aops;
1393 inode->i_fop = &ext2_dax_file_operations;
1394 } else if (test_opt(inode->i_sb, NOBH)) {
1395 inode->i_mapping->a_ops = &ext2_nobh_aops; 1391 inode->i_mapping->a_ops = &ext2_nobh_aops;
1396 inode->i_fop = &ext2_file_operations; 1392 inode->i_fop = &ext2_file_operations;
1397 } else { 1393 } else {
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 148f6e3789ea..ce422931f411 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,10 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
104 return PTR_ERR(inode); 104 return PTR_ERR(inode);
105 105
106 inode->i_op = &ext2_file_inode_operations; 106 inode->i_op = &ext2_file_inode_operations;
107 if (test_opt(inode->i_sb, DAX)) { 107 if (test_opt(inode->i_sb, NOBH)) {
108 inode->i_mapping->a_ops = &ext2_aops;
109 inode->i_fop = &ext2_dax_file_operations;
110 } else if (test_opt(inode->i_sb, NOBH)) {
111 inode->i_mapping->a_ops = &ext2_nobh_aops; 108 inode->i_mapping->a_ops = &ext2_nobh_aops;
112 inode->i_fop = &ext2_file_operations; 109 inode->i_fop = &ext2_file_operations;
113 } else { 110 } else {
@@ -125,10 +122,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
125 return PTR_ERR(inode); 122 return PTR_ERR(inode);
126 123
127 inode->i_op = &ext2_file_inode_operations; 124 inode->i_op = &ext2_file_inode_operations;
128 if (test_opt(inode->i_sb, DAX)) { 125 if (test_opt(inode->i_sb, NOBH)) {
129 inode->i_mapping->a_ops = &ext2_aops;
130 inode->i_fop = &ext2_dax_file_operations;
131 } else if (test_opt(inode->i_sb, NOBH)) {
132 inode->i_mapping->a_ops = &ext2_nobh_aops; 126 inode->i_mapping->a_ops = &ext2_nobh_aops;
133 inode->i_fop = &ext2_file_operations; 127 inode->i_fop = &ext2_file_operations;
134 } else { 128 } else {
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index a062fa1e1b11..3b8f650de22c 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -50,8 +50,6 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
50 50
51const struct file_operations ext3_file_operations = { 51const struct file_operations ext3_file_operations = {
52 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
53 .read = new_sync_read,
54 .write = new_sync_write,
55 .read_iter = generic_file_read_iter, 53 .read_iter = generic_file_read_iter,
56 .write_iter = generic_file_write_iter, 54 .write_iter = generic_file_write_iter,
57 .unlocked_ioctl = ext3_ioctl, 55 .unlocked_ioctl = ext3_ioctl,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc49ba27..13c0868c7160 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/mpage.h> 28#include <linux/mpage.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/aio.h> 30#include <linux/uio.h>
31#include "ext3.h" 31#include "ext3.h"
32#include "xattr.h" 32#include "xattr.h"
33#include "acl.h" 33#include "acl.h"
@@ -1820,8 +1820,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
1820 * crashes then stale disk data _may_ be exposed inside the file. But current 1820 * crashes then stale disk data _may_ be exposed inside the file. But current
1821 * VFS code falls back into buffered path in that case so we are safe. 1821 * VFS code falls back into buffered path in that case so we are safe.
1822 */ 1822 */
1823static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, 1823static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1824 struct iov_iter *iter, loff_t offset) 1824 loff_t offset)
1825{ 1825{
1826 struct file *file = iocb->ki_filp; 1826 struct file *file = iocb->ki_filp;
1827 struct inode *inode = file->f_mapping->host; 1827 struct inode *inode = file->f_mapping->host;
@@ -1832,9 +1832,9 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1832 size_t count = iov_iter_count(iter); 1832 size_t count = iov_iter_count(iter);
1833 int retries = 0; 1833 int retries = 0;
1834 1834
1835 trace_ext3_direct_IO_enter(inode, offset, count, rw); 1835 trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1836 1836
1837 if (rw == WRITE) { 1837 if (iov_iter_rw(iter) == WRITE) {
1838 loff_t final_size = offset + count; 1838 loff_t final_size = offset + count;
1839 1839
1840 if (final_size > inode->i_size) { 1840 if (final_size > inode->i_size) {
@@ -1856,12 +1856,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1856 } 1856 }
1857 1857
1858retry: 1858retry:
1859 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); 1859 ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block);
1860 /* 1860 /*
1861 * In case of error extending write may have instantiated a few 1861 * In case of error extending write may have instantiated a few
1862 * blocks outside i_size. Trim these off again. 1862 * blocks outside i_size. Trim these off again.
1863 */ 1863 */
1864 if (unlikely((rw & WRITE) && ret < 0)) { 1864 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
1865 loff_t isize = i_size_read(inode); 1865 loff_t isize = i_size_read(inode);
1866 loff_t end = offset + count; 1866 loff_t end = offset + count;
1867 1867
@@ -1908,7 +1908,7 @@ retry:
1908 ret = err; 1908 ret = err;
1909 } 1909 }
1910out: 1910out:
1911 trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); 1911 trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
1912 return ret; 1912 return ret;
1913} 1913}
1914 1914
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index d4dbf3c259b3..f037b4b27300 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -789,7 +789,7 @@ static const struct quotactl_ops ext3_qctl_operations = {
789 .quota_on = ext3_quota_on, 789 .quota_on = ext3_quota_on,
790 .quota_off = dquot_quota_off, 790 .quota_off = dquot_quota_off,
791 .quota_sync = dquot_quota_sync, 791 .quota_sync = dquot_quota_sync,
792 .get_info = dquot_get_dqinfo, 792 .get_state = dquot_get_state,
793 .set_info = dquot_set_dqinfo, 793 .set_info = dquot_set_dqinfo,
794 .get_dqblk = dquot_get_dqblk, 794 .get_dqblk = dquot_get_dqblk,
795 .set_dqblk = dquot_set_dqblk 795 .set_dqblk = dquot_set_dqblk
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index c6874be6d58b..24215dc09a18 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -546,8 +546,7 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
546 free += EXT3_XATTR_LEN(name_len); 546 free += EXT3_XATTR_LEN(name_len);
547 } 547 }
548 if (i->value) { 548 if (i->value) {
549 if (free < EXT3_XATTR_SIZE(i->value_len) || 549 if (free < EXT3_XATTR_LEN(name_len) +
550 free < EXT3_XATTR_LEN(name_len) +
551 EXT3_XATTR_SIZE(i->value_len)) 550 EXT3_XATTR_SIZE(i->value_len))
552 return -ENOSPC; 551 return -ENOSPC;
553 } 552 }
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index efea5d5c44ce..18228c201f7f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -64,6 +64,23 @@ config EXT4_FS_SECURITY
64 If you are not using a security module that requires using 64 If you are not using a security module that requires using
65 extended attributes for file security labels, say N. 65 extended attributes for file security labels, say N.
66 66
67config EXT4_FS_ENCRYPTION
68 bool "Ext4 Encryption"
69 depends on EXT4_FS
70 select CRYPTO_AES
71 select CRYPTO_CBC
72 select CRYPTO_ECB
73 select CRYPTO_XTS
74 select CRYPTO_CTS
75 select CRYPTO_SHA256
76 select KEYS
77 select ENCRYPTED_KEYS
78 help
79 Enable encryption of ext4 files and directories. This
80 feature is similar to ecryptfs, but it is more memory
81 efficient since it avoids caching the encrypted and
82 decrypted pages in the page cache.
83
67config EXT4_DEBUG 84config EXT4_DEBUG
68 bool "EXT4 debugging support" 85 bool "EXT4 debugging support"
69 depends on EXT4_FS 86 depends on EXT4_FS
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 0310fec2ee3d..75285ea9aa05 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,7 +8,9 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ 8 ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ 9 ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
10 mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ 10 mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
11 xattr_trusted.o inline.o 11 xattr_trusted.o inline.o readpage.o
12 12
13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o 13ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
14ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o 14ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
15ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \
16 crypto_key.o crypto_fname.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index d40c8dbbb0d6..69b1e73026a5 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,11 +4,6 @@
4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> 4 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5 */ 5 */
6 6
7#include <linux/init.h>
8#include <linux/sched.h>
9#include <linux/slab.h>
10#include <linux/capability.h>
11#include <linux/fs.h>
12#include "ext4_jbd2.h" 7#include "ext4_jbd2.h"
13#include "ext4.h" 8#include "ext4.h"
14#include "xattr.h" 9#include "xattr.h"
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 83a6f497c4e0..955bf49a7945 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -14,7 +14,6 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h>
18#include <linux/quotaops.h> 17#include <linux/quotaops.h>
19#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
20#include "ext4.h" 19#include "ext4.h"
@@ -641,8 +640,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
641 * fail EDQUOT for metdata, but we do account for it. 640 * fail EDQUOT for metdata, but we do account for it.
642 */ 641 */
643 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { 642 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
644 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
645 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
646 dquot_alloc_block_nofail(inode, 643 dquot_alloc_block_nofail(inode,
647 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); 644 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
648 } 645 }
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index b610779a958c..4a606afb171f 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -8,7 +8,6 @@
8 */ 8 */
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h>
12#include "ext4.h" 11#include "ext4.h"
13 12
14unsigned int ext4_count_free(char *bitmap, unsigned int numchars) 13unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 41eb9dcfac7e..3522340c7a99 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/swap.h> 16#include <linux/swap.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include <linux/mutex.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include "ext4.h" 20#include "ext4.h"
22 21
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
new file mode 100644
index 000000000000..8ff15273ab0c
--- /dev/null
+++ b/fs/ext4/crypto.c
@@ -0,0 +1,558 @@
1/*
2 * linux/fs/ext4/crypto.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption functions for ext4
7 *
8 * Written by Michael Halcrow, 2014.
9 *
10 * Filename encryption additions
11 * Uday Savagaonkar, 2014
12 * Encryption policy handling additions
13 * Ildar Muslukhov, 2014
14 *
15 * This has not yet undergone a rigorous security audit.
16 *
17 * The usage of AES-XTS should conform to recommendations in NIST
18 * Special Publication 800-38E and IEEE P1619/D16.
19 */
20
21#include <crypto/hash.h>
22#include <crypto/sha.h>
23#include <keys/user-type.h>
24#include <keys/encrypted-type.h>
25#include <linux/crypto.h>
26#include <linux/ecryptfs.h>
27#include <linux/gfp.h>
28#include <linux/kernel.h>
29#include <linux/key.h>
30#include <linux/list.h>
31#include <linux/mempool.h>
32#include <linux/module.h>
33#include <linux/mutex.h>
34#include <linux/random.h>
35#include <linux/scatterlist.h>
36#include <linux/spinlock_types.h>
37
38#include "ext4_extents.h"
39#include "xattr.h"
40
41/* Encryption added and removed here! (L: */
42
43static unsigned int num_prealloc_crypto_pages = 32;
44static unsigned int num_prealloc_crypto_ctxs = 128;
45
46module_param(num_prealloc_crypto_pages, uint, 0444);
47MODULE_PARM_DESC(num_prealloc_crypto_pages,
48 "Number of crypto pages to preallocate");
49module_param(num_prealloc_crypto_ctxs, uint, 0444);
50MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
51 "Number of crypto contexts to preallocate");
52
53static mempool_t *ext4_bounce_page_pool;
54
55static LIST_HEAD(ext4_free_crypto_ctxs);
56static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);
57
58/**
59 * ext4_release_crypto_ctx() - Releases an encryption context
60 * @ctx: The encryption context to release.
61 *
62 * If the encryption context was allocated from the pre-allocated pool, returns
63 * it to that pool. Else, frees it.
64 *
65 * If there's a bounce page in the context, this frees that.
66 */
67void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
68{
69 unsigned long flags;
70
71 if (ctx->bounce_page) {
72 if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
73 __free_page(ctx->bounce_page);
74 else
75 mempool_free(ctx->bounce_page, ext4_bounce_page_pool);
76 ctx->bounce_page = NULL;
77 }
78 ctx->control_page = NULL;
79 if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
80 if (ctx->tfm)
81 crypto_free_tfm(ctx->tfm);
82 kfree(ctx);
83 } else {
84 spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
85 list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
86 spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
87 }
88}
89
90/**
91 * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context
92 * @mask: The allocation mask.
93 *
94 * Return: An allocated and initialized encryption context on success. An error
95 * value or NULL otherwise.
96 */
97static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask)
98{
99 struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx),
100 mask);
101
102 if (!ctx)
103 return ERR_PTR(-ENOMEM);
104 return ctx;
105}
106
107/**
108 * ext4_get_crypto_ctx() - Gets an encryption context
109 * @inode: The inode for which we are doing the crypto
110 *
111 * Allocates and initializes an encryption context.
112 *
113 * Return: An allocated and initialized encryption context on success; error
114 * value or NULL otherwise.
115 */
116struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
117{
118 struct ext4_crypto_ctx *ctx = NULL;
119 int res = 0;
120 unsigned long flags;
121 struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key;
122
123 if (!ext4_read_workqueue)
124 ext4_init_crypto();
125
126 /*
127 * We first try getting the ctx from a free list because in
128 * the common case the ctx will have an allocated and
129 * initialized crypto tfm, so it's probably a worthwhile
130 * optimization. For the bounce page, we first try getting it
131 * from the kernel allocator because that's just about as fast
132 * as getting it from a list and because a cache of free pages
133 * should generally be a "last resort" option for a filesystem
134 * to be able to do its job.
135 */
136 spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
137 ctx = list_first_entry_or_null(&ext4_free_crypto_ctxs,
138 struct ext4_crypto_ctx, free_list);
139 if (ctx)
140 list_del(&ctx->free_list);
141 spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
142 if (!ctx) {
143 ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS);
144 if (IS_ERR(ctx)) {
145 res = PTR_ERR(ctx);
146 goto out;
147 }
148 ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
149 } else {
150 ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
151 }
152
153 /* Allocate a new Crypto API context if we don't already have
154 * one or if it isn't the right mode. */
155 BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID);
156 if (ctx->tfm && (ctx->mode != key->mode)) {
157 crypto_free_tfm(ctx->tfm);
158 ctx->tfm = NULL;
159 ctx->mode = EXT4_ENCRYPTION_MODE_INVALID;
160 }
161 if (!ctx->tfm) {
162 switch (key->mode) {
163 case EXT4_ENCRYPTION_MODE_AES_256_XTS:
164 ctx->tfm = crypto_ablkcipher_tfm(
165 crypto_alloc_ablkcipher("xts(aes)", 0, 0));
166 break;
167 case EXT4_ENCRYPTION_MODE_AES_256_GCM:
168 /* TODO(mhalcrow): AEAD w/ gcm(aes);
169 * crypto_aead_setauthsize() */
170 ctx->tfm = ERR_PTR(-ENOTSUPP);
171 break;
172 default:
173 BUG();
174 }
175 if (IS_ERR_OR_NULL(ctx->tfm)) {
176 res = PTR_ERR(ctx->tfm);
177 ctx->tfm = NULL;
178 goto out;
179 }
180 ctx->mode = key->mode;
181 }
182 BUG_ON(key->size != ext4_encryption_key_size(key->mode));
183
184 /* There shouldn't be a bounce page attached to the crypto
185 * context at this point. */
186 BUG_ON(ctx->bounce_page);
187
188out:
189 if (res) {
190 if (!IS_ERR_OR_NULL(ctx))
191 ext4_release_crypto_ctx(ctx);
192 ctx = ERR_PTR(res);
193 }
194 return ctx;
195}
196
197struct workqueue_struct *ext4_read_workqueue;
198static DEFINE_MUTEX(crypto_init);
199
200/**
201 * ext4_exit_crypto() - Shutdown the ext4 encryption system
202 */
203void ext4_exit_crypto(void)
204{
205 struct ext4_crypto_ctx *pos, *n;
206
207 list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) {
208 if (pos->bounce_page) {
209 if (pos->flags &
210 EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
211 __free_page(pos->bounce_page);
212 } else {
213 mempool_free(pos->bounce_page,
214 ext4_bounce_page_pool);
215 }
216 }
217 if (pos->tfm)
218 crypto_free_tfm(pos->tfm);
219 kfree(pos);
220 }
221 INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
222 if (ext4_bounce_page_pool)
223 mempool_destroy(ext4_bounce_page_pool);
224 ext4_bounce_page_pool = NULL;
225 if (ext4_read_workqueue)
226 destroy_workqueue(ext4_read_workqueue);
227 ext4_read_workqueue = NULL;
228}
229
230/**
231 * ext4_init_crypto() - Set up for ext4 encryption.
232 *
233 * We only call this when we start accessing encrypted files, since it
234 * results in memory getting allocated that wouldn't otherwise be used.
235 *
236 * Return: Zero on success, non-zero otherwise.
237 */
238int ext4_init_crypto(void)
239{
240 int i, res;
241
242 mutex_lock(&crypto_init);
243 if (ext4_read_workqueue)
244 goto already_initialized;
245 ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
246 if (!ext4_read_workqueue) {
247 res = -ENOMEM;
248 goto fail;
249 }
250
251 for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
252 struct ext4_crypto_ctx *ctx;
253
254 ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL);
255 if (IS_ERR(ctx)) {
256 res = PTR_ERR(ctx);
257 goto fail;
258 }
259 list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
260 }
261
262 ext4_bounce_page_pool =
263 mempool_create_page_pool(num_prealloc_crypto_pages, 0);
264 if (!ext4_bounce_page_pool) {
265 res = -ENOMEM;
266 goto fail;
267 }
268already_initialized:
269 mutex_unlock(&crypto_init);
270 return 0;
271fail:
272 ext4_exit_crypto();
273 mutex_unlock(&crypto_init);
274 return res;
275}
276
277void ext4_restore_control_page(struct page *data_page)
278{
279 struct ext4_crypto_ctx *ctx =
280 (struct ext4_crypto_ctx *)page_private(data_page);
281
282 set_page_private(data_page, (unsigned long)NULL);
283 ClearPagePrivate(data_page);
284 unlock_page(data_page);
285 ext4_release_crypto_ctx(ctx);
286}
287
288/**
289 * ext4_crypt_complete() - The completion callback for page encryption
290 * @req: The asynchronous encryption request context
291 * @res: The result of the encryption operation
292 */
293static void ext4_crypt_complete(struct crypto_async_request *req, int res)
294{
295 struct ext4_completion_result *ecr = req->data;
296
297 if (res == -EINPROGRESS)
298 return;
299 ecr->res = res;
300 complete(&ecr->completion);
301}
302
303typedef enum {
304 EXT4_DECRYPT = 0,
305 EXT4_ENCRYPT,
306} ext4_direction_t;
307
308static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
309 struct inode *inode,
310 ext4_direction_t rw,
311 pgoff_t index,
312 struct page *src_page,
313 struct page *dest_page)
314
315{
316 u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
317 struct ablkcipher_request *req = NULL;
318 DECLARE_EXT4_COMPLETION_RESULT(ecr);
319 struct scatterlist dst, src;
320 struct ext4_inode_info *ei = EXT4_I(inode);
321 struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
322 int res = 0;
323
324 BUG_ON(!ctx->tfm);
325 BUG_ON(ctx->mode != ei->i_encryption_key.mode);
326
327 if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) {
328 printk_ratelimited(KERN_ERR
329 "%s: unsupported crypto algorithm: %d\n",
330 __func__, ctx->mode);
331 return -ENOTSUPP;
332 }
333
334 crypto_ablkcipher_clear_flags(atfm, ~0);
335 crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
336
337 res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw,
338 ei->i_encryption_key.size);
339 if (res) {
340 printk_ratelimited(KERN_ERR
341 "%s: crypto_ablkcipher_setkey() failed\n",
342 __func__);
343 return res;
344 }
345 req = ablkcipher_request_alloc(atfm, GFP_NOFS);
346 if (!req) {
347 printk_ratelimited(KERN_ERR
348 "%s: crypto_request_alloc() failed\n",
349 __func__);
350 return -ENOMEM;
351 }
352 ablkcipher_request_set_callback(
353 req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
354 ext4_crypt_complete, &ecr);
355
356 BUILD_BUG_ON(EXT4_XTS_TWEAK_SIZE < sizeof(index));
357 memcpy(xts_tweak, &index, sizeof(index));
358 memset(&xts_tweak[sizeof(index)], 0,
359 EXT4_XTS_TWEAK_SIZE - sizeof(index));
360
361 sg_init_table(&dst, 1);
362 sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
363 sg_init_table(&src, 1);
364 sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
365 ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
366 xts_tweak);
367 if (rw == EXT4_DECRYPT)
368 res = crypto_ablkcipher_decrypt(req);
369 else
370 res = crypto_ablkcipher_encrypt(req);
371 if (res == -EINPROGRESS || res == -EBUSY) {
372 BUG_ON(req->base.data != &ecr);
373 wait_for_completion(&ecr.completion);
374 res = ecr.res;
375 }
376 ablkcipher_request_free(req);
377 if (res) {
378 printk_ratelimited(
379 KERN_ERR
380 "%s: crypto_ablkcipher_encrypt() returned %d\n",
381 __func__, res);
382 return res;
383 }
384 return 0;
385}
386
387/**
388 * ext4_encrypt() - Encrypts a page
389 * @inode: The inode for which the encryption should take place
390 * @plaintext_page: The page to encrypt. Must be locked.
391 *
392 * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
393 * encryption context.
394 *
395 * Called on the page write path. The caller must call
396 * ext4_restore_control_page() on the returned ciphertext page to
397 * release the bounce buffer and the encryption context.
398 *
399 * Return: An allocated page with the encrypted content on success. Else, an
400 * error value or NULL.
401 */
402struct page *ext4_encrypt(struct inode *inode,
403 struct page *plaintext_page)
404{
405 struct ext4_crypto_ctx *ctx;
406 struct page *ciphertext_page = NULL;
407 int err;
408
409 BUG_ON(!PageLocked(plaintext_page));
410
411 ctx = ext4_get_crypto_ctx(inode);
412 if (IS_ERR(ctx))
413 return (struct page *) ctx;
414
415 /* The encryption operation will require a bounce page. */
416 ciphertext_page = alloc_page(GFP_NOFS);
417 if (!ciphertext_page) {
418 /* This is a potential bottleneck, but at least we'll have
419 * forward progress. */
420 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
421 GFP_NOFS);
422 if (WARN_ON_ONCE(!ciphertext_page)) {
423 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
424 GFP_NOFS | __GFP_WAIT);
425 }
426 ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
427 } else {
428 ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
429 }
430 ctx->bounce_page = ciphertext_page;
431 ctx->control_page = plaintext_page;
432 err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index,
433 plaintext_page, ciphertext_page);
434 if (err) {
435 ext4_release_crypto_ctx(ctx);
436 return ERR_PTR(err);
437 }
438 SetPagePrivate(ciphertext_page);
439 set_page_private(ciphertext_page, (unsigned long)ctx);
440 lock_page(ciphertext_page);
441 return ciphertext_page;
442}
443
444/**
445 * ext4_decrypt() - Decrypts a page in-place
446 * @ctx: The encryption context.
447 * @page: The page to decrypt. Must be locked.
448 *
449 * Decrypts page in-place using the ctx encryption context.
450 *
451 * Called from the read completion callback.
452 *
453 * Return: Zero on success, non-zero otherwise.
454 */
455int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page)
456{
457 BUG_ON(!PageLocked(page));
458
459 return ext4_page_crypto(ctx, page->mapping->host,
460 EXT4_DECRYPT, page->index, page, page);
461}
462
463/*
464 * Convenience function which takes care of allocating and
465 * deallocating the encryption context
466 */
467int ext4_decrypt_one(struct inode *inode, struct page *page)
468{
469 int ret;
470
471 struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode);
472
473 if (!ctx)
474 return -ENOMEM;
475 ret = ext4_decrypt(ctx, page);
476 ext4_release_crypto_ctx(ctx);
477 return ret;
478}
479
480int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
481{
482 struct ext4_crypto_ctx *ctx;
483 struct page *ciphertext_page = NULL;
484 struct bio *bio;
485 ext4_lblk_t lblk = ex->ee_block;
486 ext4_fsblk_t pblk = ext4_ext_pblock(ex);
487 unsigned int len = ext4_ext_get_actual_len(ex);
488 int err = 0;
489
490 BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
491
492 ctx = ext4_get_crypto_ctx(inode);
493 if (IS_ERR(ctx))
494 return PTR_ERR(ctx);
495
496 ciphertext_page = alloc_page(GFP_NOFS);
497 if (!ciphertext_page) {
498 /* This is a potential bottleneck, but at least we'll have
499 * forward progress. */
500 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
501 GFP_NOFS);
502 if (WARN_ON_ONCE(!ciphertext_page)) {
503 ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
504 GFP_NOFS | __GFP_WAIT);
505 }
506 ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
507 } else {
508 ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
509 }
510 ctx->bounce_page = ciphertext_page;
511
512 while (len--) {
513 err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk,
514 ZERO_PAGE(0), ciphertext_page);
515 if (err)
516 goto errout;
517
518 bio = bio_alloc(GFP_KERNEL, 1);
519 if (!bio) {
520 err = -ENOMEM;
521 goto errout;
522 }
523 bio->bi_bdev = inode->i_sb->s_bdev;
524 bio->bi_iter.bi_sector = pblk;
525 err = bio_add_page(bio, ciphertext_page,
526 inode->i_sb->s_blocksize, 0);
527 if (err) {
528 bio_put(bio);
529 goto errout;
530 }
531 err = submit_bio_wait(WRITE, bio);
532 if (err)
533 goto errout;
534 }
535 err = 0;
536errout:
537 ext4_release_crypto_ctx(ctx);
538 return err;
539}
540
541bool ext4_valid_contents_enc_mode(uint32_t mode)
542{
543 return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS);
544}
545
546/**
547 * ext4_validate_encryption_key_size() - Validate the encryption key size
548 * @mode: The key mode.
549 * @size: The key size to validate.
550 *
551 * Return: The validated key size for @mode. Zero if invalid.
552 */
553uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
554{
555 if (size == ext4_encryption_key_size(mode))
556 return size;
557 return 0;
558}
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
new file mode 100644
index 000000000000..ca2f5948c1ac
--- /dev/null
+++ b/fs/ext4/crypto_fname.c
@@ -0,0 +1,709 @@
1/*
2 * linux/fs/ext4/crypto_fname.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains functions for filename crypto management in ext4
7 *
8 * Written by Uday Savagaonkar, 2014.
9 *
10 * This has not yet undergone a rigorous security audit.
11 *
12 */
13
14#include <crypto/hash.h>
15#include <crypto/sha.h>
16#include <keys/encrypted-type.h>
17#include <keys/user-type.h>
18#include <linux/crypto.h>
19#include <linux/gfp.h>
20#include <linux/kernel.h>
21#include <linux/key.h>
22#include <linux/key.h>
23#include <linux/list.h>
24#include <linux/mempool.h>
25#include <linux/random.h>
26#include <linux/scatterlist.h>
27#include <linux/spinlock_types.h>
28
29#include "ext4.h"
30#include "ext4_crypto.h"
31#include "xattr.h"
32
33/**
34 * ext4_dir_crypt_complete() -
35 */
36static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
37{
38 struct ext4_completion_result *ecr = req->data;
39
40 if (res == -EINPROGRESS)
41 return;
42 ecr->res = res;
43 complete(&ecr->completion);
44}
45
46bool ext4_valid_filenames_enc_mode(uint32_t mode)
47{
48 return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
49}
50
51/**
52 * ext4_fname_encrypt() -
53 *
54 * This function encrypts the input filename, and returns the length of the
55 * ciphertext. Errors are returned as negative numbers. We trust the caller to
56 * allocate sufficient memory to oname string.
57 */
58static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
59 const struct qstr *iname,
60 struct ext4_str *oname)
61{
62 u32 ciphertext_len;
63 struct ablkcipher_request *req = NULL;
64 DECLARE_EXT4_COMPLETION_RESULT(ecr);
65 struct crypto_ablkcipher *tfm = ctx->ctfm;
66 int res = 0;
67 char iv[EXT4_CRYPTO_BLOCK_SIZE];
68 struct scatterlist sg[1];
69 char *workbuf;
70
71 if (iname->len <= 0 || iname->len > ctx->lim)
72 return -EIO;
73
74 ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
75 EXT4_CRYPTO_BLOCK_SIZE : iname->len;
76 ciphertext_len = (ciphertext_len > ctx->lim)
77 ? ctx->lim : ciphertext_len;
78
79 /* Allocate request */
80 req = ablkcipher_request_alloc(tfm, GFP_NOFS);
81 if (!req) {
82 printk_ratelimited(
83 KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
84 return -ENOMEM;
85 }
86 ablkcipher_request_set_callback(req,
87 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
88 ext4_dir_crypt_complete, &ecr);
89
90 /* Map the workpage */
91 workbuf = kmap(ctx->workpage);
92
93 /* Copy the input */
94 memcpy(workbuf, iname->name, iname->len);
95 if (iname->len < ciphertext_len)
96 memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
97
98 /* Initialize IV */
99 memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
100
101 /* Create encryption request */
102 sg_init_table(sg, 1);
103 sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
104 ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
105 res = crypto_ablkcipher_encrypt(req);
106 if (res == -EINPROGRESS || res == -EBUSY) {
107 BUG_ON(req->base.data != &ecr);
108 wait_for_completion(&ecr.completion);
109 res = ecr.res;
110 }
111 if (res >= 0) {
112 /* Copy the result to output */
113 memcpy(oname->name, workbuf, ciphertext_len);
114 res = ciphertext_len;
115 }
116 kunmap(ctx->workpage);
117 ablkcipher_request_free(req);
118 if (res < 0) {
119 printk_ratelimited(
120 KERN_ERR "%s: Error (error code %d)\n", __func__, res);
121 }
122 oname->len = ciphertext_len;
123 return res;
124}
125
126/*
127 * ext4_fname_decrypt()
128 * This function decrypts the input filename, and returns
129 * the length of the plaintext.
130 * Errors are returned as negative numbers.
131 * We trust the caller to allocate sufficient memory to oname string.
132 */
133static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
134 const struct ext4_str *iname,
135 struct ext4_str *oname)
136{
137 struct ext4_str tmp_in[2], tmp_out[1];
138 struct ablkcipher_request *req = NULL;
139 DECLARE_EXT4_COMPLETION_RESULT(ecr);
140 struct scatterlist sg[1];
141 struct crypto_ablkcipher *tfm = ctx->ctfm;
142 int res = 0;
143 char iv[EXT4_CRYPTO_BLOCK_SIZE];
144 char *workbuf;
145
146 if (iname->len <= 0 || iname->len > ctx->lim)
147 return -EIO;
148
149 tmp_in[0].name = iname->name;
150 tmp_in[0].len = iname->len;
151 tmp_out[0].name = oname->name;
152
153 /* Allocate request */
154 req = ablkcipher_request_alloc(tfm, GFP_NOFS);
155 if (!req) {
156 printk_ratelimited(
157 KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
158 return -ENOMEM;
159 }
160 ablkcipher_request_set_callback(req,
161 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
162 ext4_dir_crypt_complete, &ecr);
163
164 /* Map the workpage */
165 workbuf = kmap(ctx->workpage);
166
167 /* Copy the input */
168 memcpy(workbuf, iname->name, iname->len);
169
170 /* Initialize IV */
171 memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
172
173 /* Create encryption request */
174 sg_init_table(sg, 1);
175 sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
176 ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
177 res = crypto_ablkcipher_decrypt(req);
178 if (res == -EINPROGRESS || res == -EBUSY) {
179 BUG_ON(req->base.data != &ecr);
180 wait_for_completion(&ecr.completion);
181 res = ecr.res;
182 }
183 if (res >= 0) {
184 /* Copy the result to output */
185 memcpy(oname->name, workbuf, iname->len);
186 res = iname->len;
187 }
188 kunmap(ctx->workpage);
189 ablkcipher_request_free(req);
190 if (res < 0) {
191 printk_ratelimited(
192 KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n",
193 __func__, res);
194 return res;
195 }
196
197 oname->len = strnlen(oname->name, iname->len);
198 return oname->len;
199}
200
201/**
202 * ext4_fname_encode_digest() -
203 *
204 * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
205 * The encoded string is roughly 4/3 times the size of the input string.
206 */
207int ext4_fname_encode_digest(char *dst, char *src, u32 len)
208{
209 static const char *lookup_table =
210 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+";
211 u32 current_chunk, num_chunks, i;
212 char tmp_buf[3];
213 u32 c0, c1, c2, c3;
214
215 current_chunk = 0;
216 num_chunks = len/3;
217 for (i = 0; i < num_chunks; i++) {
218 c0 = src[3*i] & 0x3f;
219 c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f;
220 c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f;
221 c3 = (src[3*i+2]>>2) & 0x3f;
222 dst[4*i] = lookup_table[c0];
223 dst[4*i+1] = lookup_table[c1];
224 dst[4*i+2] = lookup_table[c2];
225 dst[4*i+3] = lookup_table[c3];
226 }
227 if (i*3 < len) {
228 memset(tmp_buf, 0, 3);
229 memcpy(tmp_buf, &src[3*i], len-3*i);
230 c0 = tmp_buf[0] & 0x3f;
231 c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f;
232 c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f;
233 c3 = (tmp_buf[2]>>2) & 0x3f;
234 dst[4*i] = lookup_table[c0];
235 dst[4*i+1] = lookup_table[c1];
236 dst[4*i+2] = lookup_table[c2];
237 dst[4*i+3] = lookup_table[c3];
238 i++;
239 }
240 return (i * 4);
241}
242
243/**
244 * ext4_fname_hash() -
245 *
246 * This function computes the hash of the input filename, and sets the output
247 * buffer to the *encoded* digest. It returns the length of the digest as its
248 * return value. Errors are returned as negative numbers. We trust the caller
249 * to allocate sufficient memory to oname string.
250 */
251static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx,
252 const struct ext4_str *iname,
253 struct ext4_str *oname)
254{
255 struct scatterlist sg;
256 struct hash_desc desc = {
257 .tfm = (struct crypto_hash *)ctx->htfm,
258 .flags = CRYPTO_TFM_REQ_MAY_SLEEP
259 };
260 int res = 0;
261
262 if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
263 res = ext4_fname_encode_digest(oname->name, iname->name,
264 iname->len);
265 oname->len = res;
266 return res;
267 }
268
269 sg_init_one(&sg, iname->name, iname->len);
270 res = crypto_hash_init(&desc);
271 if (res) {
272 printk(KERN_ERR
273 "%s: Error initializing crypto hash; res = [%d]\n",
274 __func__, res);
275 goto out;
276 }
277 res = crypto_hash_update(&desc, &sg, iname->len);
278 if (res) {
279 printk(KERN_ERR
280 "%s: Error updating crypto hash; res = [%d]\n",
281 __func__, res);
282 goto out;
283 }
284 res = crypto_hash_final(&desc,
285 &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]);
286 if (res) {
287 printk(KERN_ERR
288 "%s: Error finalizing crypto hash; res = [%d]\n",
289 __func__, res);
290 goto out;
291 }
292 /* Encode the digest as a printable string--this will increase the
293 * size of the digest */
294 oname->name[0] = 'I';
295 res = ext4_fname_encode_digest(oname->name+1,
296 &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE],
297 EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1;
298 oname->len = res;
299out:
300 return res;
301}
302
303/**
304 * ext4_free_fname_crypto_ctx() -
305 *
306 * Frees up a crypto context.
307 */
308void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx)
309{
310 if (ctx == NULL || IS_ERR(ctx))
311 return;
312
313 if (ctx->ctfm && !IS_ERR(ctx->ctfm))
314 crypto_free_ablkcipher(ctx->ctfm);
315 if (ctx->htfm && !IS_ERR(ctx->htfm))
316 crypto_free_hash(ctx->htfm);
317 if (ctx->workpage && !IS_ERR(ctx->workpage))
318 __free_page(ctx->workpage);
319 kfree(ctx);
320}
321
322/**
323 * ext4_put_fname_crypto_ctx() -
324 *
325 * Return: The crypto context onto free list. If the free list is above a
326 * threshold, completely frees up the context, and returns the memory.
327 *
328 * TODO: Currently we directly free the crypto context. Eventually we should
329 * add code it to return to free list. Such an approach will increase
330 * efficiency of directory lookup.
331 */
332void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx)
333{
334 if (*ctx == NULL || IS_ERR(*ctx))
335 return;
336 ext4_free_fname_crypto_ctx(*ctx);
337 *ctx = NULL;
338}
339
340/**
341 * ext4_search_fname_crypto_ctx() -
342 */
343static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx(
344 const struct ext4_encryption_key *key)
345{
346 return NULL;
347}
348
349/**
350 * ext4_alloc_fname_crypto_ctx() -
351 */
352struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx(
353 const struct ext4_encryption_key *key)
354{
355 struct ext4_fname_crypto_ctx *ctx;
356
357 ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS);
358 if (ctx == NULL)
359 return ERR_PTR(-ENOMEM);
360 if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) {
361 /* This will automatically set key mode to invalid
362 * As enum for ENCRYPTION_MODE_INVALID is zero */
363 memset(&ctx->key, 0, sizeof(ctx->key));
364 } else {
365 memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key));
366 }
367 ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode)
368 ? 0 : 1;
369 ctx->ctfm_key_is_ready = 0;
370 ctx->ctfm = NULL;
371 ctx->htfm = NULL;
372 ctx->workpage = NULL;
373 return ctx;
374}
375
376/**
377 * ext4_get_fname_crypto_ctx() -
378 *
379 * Allocates a free crypto context and initializes it to hold
380 * the crypto material for the inode.
381 *
382 * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise.
383 */
384struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
385 struct inode *inode, u32 max_ciphertext_len)
386{
387 struct ext4_fname_crypto_ctx *ctx;
388 struct ext4_inode_info *ei = EXT4_I(inode);
389 int res;
390
391 /* Check if the crypto policy is set on the inode */
392 res = ext4_encrypted_inode(inode);
393 if (res == 0)
394 return NULL;
395
396 if (!ext4_has_encryption_key(inode))
397 ext4_generate_encryption_key(inode);
398
399 /* Get a crypto context based on the key.
400 * A new context is allocated if no context matches the requested key.
401 */
402 ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key));
403 if (ctx == NULL)
404 ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key));
405 if (IS_ERR(ctx))
406 return ctx;
407
408 if (ctx->has_valid_key) {
409 if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
410 printk_once(KERN_WARNING
411 "ext4: unsupported key mode %d\n",
412 ctx->key.mode);
413 return ERR_PTR(-ENOKEY);
414 }
415
416 /* As a first cut, we will allocate new tfm in every call.
417 * later, we will keep the tfm around, in case the key gets
418 * re-used */
419 if (ctx->ctfm == NULL) {
420 ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))",
421 0, 0);
422 }
423 if (IS_ERR(ctx->ctfm)) {
424 res = PTR_ERR(ctx->ctfm);
425 printk(
426 KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
427 __func__, res);
428 ctx->ctfm = NULL;
429 ext4_put_fname_crypto_ctx(&ctx);
430 return ERR_PTR(res);
431 }
432 if (ctx->ctfm == NULL) {
433 printk(
434 KERN_DEBUG "%s: could not allocate crypto tfm\n",
435 __func__);
436 ext4_put_fname_crypto_ctx(&ctx);
437 return ERR_PTR(-ENOMEM);
438 }
439 if (ctx->workpage == NULL)
440 ctx->workpage = alloc_page(GFP_NOFS);
441 if (IS_ERR(ctx->workpage)) {
442 res = PTR_ERR(ctx->workpage);
443 printk(
444 KERN_DEBUG "%s: error (%d) allocating work page\n",
445 __func__, res);
446 ctx->workpage = NULL;
447 ext4_put_fname_crypto_ctx(&ctx);
448 return ERR_PTR(res);
449 }
450 if (ctx->workpage == NULL) {
451 printk(
452 KERN_DEBUG "%s: could not allocate work page\n",
453 __func__);
454 ext4_put_fname_crypto_ctx(&ctx);
455 return ERR_PTR(-ENOMEM);
456 }
457 ctx->lim = max_ciphertext_len;
458 crypto_ablkcipher_clear_flags(ctx->ctfm, ~0);
459 crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm),
460 CRYPTO_TFM_REQ_WEAK_KEY);
461
462 /* If we are lucky, we will get a context that is already
463 * set up with the right key. Else, we will have to
464 * set the key */
465 if (!ctx->ctfm_key_is_ready) {
466 /* Since our crypto objectives for filename encryption
467 * are pretty weak,
468 * we directly use the inode master key */
469 res = crypto_ablkcipher_setkey(ctx->ctfm,
470 ctx->key.raw, ctx->key.size);
471 if (res) {
472 ext4_put_fname_crypto_ctx(&ctx);
473 return ERR_PTR(-EIO);
474 }
475 ctx->ctfm_key_is_ready = 1;
476 } else {
477 /* In the current implementation, key should never be
478 * marked "ready" for a context that has just been
479 * allocated. So we should never reach here */
480 BUG();
481 }
482 }
483 if (ctx->htfm == NULL)
484 ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
485 if (IS_ERR(ctx->htfm)) {
486 res = PTR_ERR(ctx->htfm);
487 printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n",
488 __func__, res);
489 ctx->htfm = NULL;
490 ext4_put_fname_crypto_ctx(&ctx);
491 return ERR_PTR(res);
492 }
493 if (ctx->htfm == NULL) {
494 printk(KERN_DEBUG "%s: could not allocate hash tfm\n",
495 __func__);
496 ext4_put_fname_crypto_ctx(&ctx);
497 return ERR_PTR(-ENOMEM);
498 }
499
500 return ctx;
501}
502
503/**
504 * ext4_fname_crypto_round_up() -
505 *
506 * Return: The next multiple of block size
507 */
508u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
509{
510 return ((size+blksize-1)/blksize)*blksize;
511}
512
513/**
514 * ext4_fname_crypto_namelen_on_disk() -
515 */
516int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
517 u32 namelen)
518{
519 u32 ciphertext_len;
520
521 if (ctx == NULL)
522 return -EIO;
523 if (!(ctx->has_valid_key))
524 return -EACCES;
525 ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
526 EXT4_CRYPTO_BLOCK_SIZE : namelen;
527 ciphertext_len = (ciphertext_len > ctx->lim)
528 ? ctx->lim : ciphertext_len;
529 return (int) ciphertext_len;
530}
531
532/**
533 * ext4_fname_crypto_alloc_obuff() -
534 *
535 * Allocates an output buffer that is sufficient for the crypto operation
536 * specified by the context and the direction.
537 */
538int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
539 u32 ilen, struct ext4_str *crypto_str)
540{
541 unsigned int olen;
542
543 if (!ctx)
544 return -EIO;
545 olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE);
546 crypto_str->len = olen;
547 if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
548 olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
549 /* Allocated buffer can hold one more character to null-terminate the
550 * string */
551 crypto_str->name = kmalloc(olen+1, GFP_NOFS);
552 if (!(crypto_str->name))
553 return -ENOMEM;
554 return 0;
555}
556
557/**
558 * ext4_fname_crypto_free_buffer() -
559 *
560 * Frees the buffer allocated for crypto operation.
561 */
562void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
563{
564 if (!crypto_str)
565 return;
566 kfree(crypto_str->name);
567 crypto_str->name = NULL;
568}
569
570/**
571 * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
572 */
573int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
574 const struct ext4_str *iname,
575 struct ext4_str *oname)
576{
577 if (ctx == NULL)
578 return -EIO;
579 if (iname->len < 3) {
580 /*Check for . and .. */
581 if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
582 oname->name[0] = '.';
583 oname->name[iname->len-1] = '.';
584 oname->len = iname->len;
585 return oname->len;
586 }
587 }
588 if (ctx->has_valid_key)
589 return ext4_fname_decrypt(ctx, iname, oname);
590 else
591 return ext4_fname_hash(ctx, iname, oname);
592}
593
594int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
595 const struct ext4_dir_entry_2 *de,
596 struct ext4_str *oname)
597{
598 struct ext4_str iname = {.name = (unsigned char *) de->name,
599 .len = de->name_len };
600
601 return _ext4_fname_disk_to_usr(ctx, &iname, oname);
602}
603
604
605/**
606 * ext4_fname_usr_to_disk() - converts a filename from user space to disk space
607 */
608int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
609 const struct qstr *iname,
610 struct ext4_str *oname)
611{
612 int res;
613
614 if (ctx == NULL)
615 return -EIO;
616 if (iname->len < 3) {
617 /*Check for . and .. */
618 if (iname->name[0] == '.' &&
619 iname->name[iname->len-1] == '.') {
620 oname->name[0] = '.';
621 oname->name[iname->len-1] = '.';
622 oname->len = iname->len;
623 return oname->len;
624 }
625 }
626 if (ctx->has_valid_key) {
627 res = ext4_fname_encrypt(ctx, iname, oname);
628 return res;
629 }
630 /* Without a proper key, a user is not allowed to modify the filenames
631 * in a directory. Consequently, a user space name cannot be mapped to
632 * a disk-space name */
633 return -EACCES;
634}
635
636/*
637 * Calculate the htree hash from a filename from user space
638 */
639int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
640 const struct qstr *iname,
641 struct dx_hash_info *hinfo)
642{
643 struct ext4_str tmp, tmp2;
644 int ret = 0;
645
646 if (!ctx || !ctx->has_valid_key ||
647 ((iname->name[0] == '.') &&
648 ((iname->len == 1) ||
649 ((iname->name[1] == '.') && (iname->len == 2))))) {
650 ext4fs_dirhash(iname->name, iname->len, hinfo);
651 return 0;
652 }
653
654 /* First encrypt the plaintext name */
655 ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
656 if (ret < 0)
657 return ret;
658
659 ret = ext4_fname_encrypt(ctx, iname, &tmp);
660 if (ret < 0)
661 goto out;
662
663 tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
664 tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL);
665 if (tmp2.name == NULL) {
666 ret = -ENOMEM;
667 goto out;
668 }
669
670 ret = ext4_fname_hash(ctx, &tmp, &tmp2);
671 if (ret > 0)
672 ext4fs_dirhash(tmp2.name, tmp2.len, hinfo);
673 ext4_fname_crypto_free_buffer(&tmp2);
674out:
675 ext4_fname_crypto_free_buffer(&tmp);
676 return ret;
677}
678
679/**
680 * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string
681 */
682int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
683 const struct ext4_dir_entry_2 *de,
684 struct dx_hash_info *hinfo)
685{
686 struct ext4_str iname = {.name = (unsigned char *) de->name,
687 .len = de->name_len};
688 struct ext4_str tmp;
689 int ret;
690
691 if (!ctx ||
692 ((iname.name[0] == '.') &&
693 ((iname.len == 1) ||
694 ((iname.name[1] == '.') && (iname.len == 2))))) {
695 ext4fs_dirhash(iname.name, iname.len, hinfo);
696 return 0;
697 }
698
699 tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
700 tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL);
701 if (tmp.name == NULL)
702 return -ENOMEM;
703
704 ret = ext4_fname_hash(ctx, &iname, &tmp);
705 if (ret > 0)
706 ext4fs_dirhash(tmp.name, tmp.len, hinfo);
707 ext4_fname_crypto_free_buffer(&tmp);
708 return ret;
709}
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
new file mode 100644
index 000000000000..c8392af8abbb
--- /dev/null
+++ b/fs/ext4/crypto_key.c
@@ -0,0 +1,165 @@
1/*
2 * linux/fs/ext4/crypto_key.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption key functions for ext4
7 *
8 * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
9 */
10
11#include <keys/encrypted-type.h>
12#include <keys/user-type.h>
13#include <linux/random.h>
14#include <linux/scatterlist.h>
15#include <uapi/linux/keyctl.h>
16
17#include "ext4.h"
18#include "xattr.h"
19
20static void derive_crypt_complete(struct crypto_async_request *req, int rc)
21{
22 struct ext4_completion_result *ecr = req->data;
23
24 if (rc == -EINPROGRESS)
25 return;
26
27 ecr->res = rc;
28 complete(&ecr->completion);
29}
30
31/**
32 * ext4_derive_key_aes() - Derive a key using AES-128-ECB
33 * @deriving_key: Encryption key used for derivatio.
34 * @source_key: Source key to which to apply derivation.
35 * @derived_key: Derived key.
36 *
37 * Return: Zero on success; non-zero otherwise.
38 */
39static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
40 char source_key[EXT4_AES_256_XTS_KEY_SIZE],
41 char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
42{
43 int res = 0;
44 struct ablkcipher_request *req = NULL;
45 DECLARE_EXT4_COMPLETION_RESULT(ecr);
46 struct scatterlist src_sg, dst_sg;
47 struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
48 0);
49
50 if (IS_ERR(tfm)) {
51 res = PTR_ERR(tfm);
52 tfm = NULL;
53 goto out;
54 }
55 crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
56 req = ablkcipher_request_alloc(tfm, GFP_NOFS);
57 if (!req) {
58 res = -ENOMEM;
59 goto out;
60 }
61 ablkcipher_request_set_callback(req,
62 CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
63 derive_crypt_complete, &ecr);
64 res = crypto_ablkcipher_setkey(tfm, deriving_key,
65 EXT4_AES_128_ECB_KEY_SIZE);
66 if (res < 0)
67 goto out;
68 sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE);
69 sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE);
70 ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
71 EXT4_AES_256_XTS_KEY_SIZE, NULL);
72 res = crypto_ablkcipher_encrypt(req);
73 if (res == -EINPROGRESS || res == -EBUSY) {
74 BUG_ON(req->base.data != &ecr);
75 wait_for_completion(&ecr.completion);
76 res = ecr.res;
77 }
78
79out:
80 if (req)
81 ablkcipher_request_free(req);
82 if (tfm)
83 crypto_free_ablkcipher(tfm);
84 return res;
85}
86
87/**
88 * ext4_generate_encryption_key() - generates an encryption key
89 * @inode: The inode to generate the encryption key for.
90 */
91int ext4_generate_encryption_key(struct inode *inode)
92{
93 struct ext4_inode_info *ei = EXT4_I(inode);
94 struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
95 char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
96 (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
97 struct key *keyring_key = NULL;
98 struct ext4_encryption_key *master_key;
99 struct ext4_encryption_context ctx;
100 struct user_key_payload *ukp;
101 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
102 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
103 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
104 &ctx, sizeof(ctx));
105
106 if (res != sizeof(ctx)) {
107 if (res > 0)
108 res = -EINVAL;
109 goto out;
110 }
111 res = 0;
112
113 if (S_ISREG(inode->i_mode))
114 crypt_key->mode = ctx.contents_encryption_mode;
115 else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
116 crypt_key->mode = ctx.filenames_encryption_mode;
117 else {
118 printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
119 BUG();
120 }
121 crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
122 BUG_ON(!crypt_key->size);
123 if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
124 memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
125 goto out;
126 }
127 memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
128 EXT4_KEY_DESC_PREFIX_SIZE);
129 sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE,
130 "%*phN", EXT4_KEY_DESCRIPTOR_SIZE,
131 ctx.master_key_descriptor);
132 full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
133 (2 * EXT4_KEY_DESCRIPTOR_SIZE)] = '\0';
134 keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
135 if (IS_ERR(keyring_key)) {
136 res = PTR_ERR(keyring_key);
137 keyring_key = NULL;
138 goto out;
139 }
140 BUG_ON(keyring_key->type != &key_type_logon);
141 ukp = ((struct user_key_payload *)keyring_key->payload.data);
142 if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
143 res = -EINVAL;
144 goto out;
145 }
146 master_key = (struct ext4_encryption_key *)ukp->data;
147 BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
148 EXT4_KEY_DERIVATION_NONCE_SIZE);
149 BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
150 res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
151out:
152 if (keyring_key)
153 key_put(keyring_key);
154 if (res < 0)
155 crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
156 return res;
157}
158
159int ext4_has_encryption_key(struct inode *inode)
160{
161 struct ext4_inode_info *ei = EXT4_I(inode);
162 struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
163
164 return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
165}
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
new file mode 100644
index 000000000000..30eaf9e9864a
--- /dev/null
+++ b/fs/ext4/crypto_policy.c
@@ -0,0 +1,194 @@
1/*
2 * linux/fs/ext4/crypto_policy.c
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption policy functions for ext4
7 *
8 * Written by Michael Halcrow, 2015.
9 */
10
11#include <linux/random.h>
12#include <linux/string.h>
13#include <linux/types.h>
14
15#include "ext4.h"
16#include "xattr.h"
17
18static int ext4_inode_has_encryption_context(struct inode *inode)
19{
20 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
21 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0);
22 return (res > 0);
23}
24
25/*
26 * check whether the policy is consistent with the encryption context
27 * for the inode
28 */
29static int ext4_is_encryption_context_consistent_with_policy(
30 struct inode *inode, const struct ext4_encryption_policy *policy)
31{
32 struct ext4_encryption_context ctx;
33 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
34 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
35 sizeof(ctx));
36 if (res != sizeof(ctx))
37 return 0;
38 return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
39 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
40 (ctx.contents_encryption_mode ==
41 policy->contents_encryption_mode) &&
42 (ctx.filenames_encryption_mode ==
43 policy->filenames_encryption_mode));
44}
45
46static int ext4_create_encryption_context_from_policy(
47 struct inode *inode, const struct ext4_encryption_policy *policy)
48{
49 struct ext4_encryption_context ctx;
50 int res = 0;
51
52 ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
53 memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
54 EXT4_KEY_DESCRIPTOR_SIZE);
55 if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) {
56 printk(KERN_WARNING
57 "%s: Invalid contents encryption mode %d\n", __func__,
58 policy->contents_encryption_mode);
59 res = -EINVAL;
60 goto out;
61 }
62 if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
63 printk(KERN_WARNING
64 "%s: Invalid filenames encryption mode %d\n", __func__,
65 policy->filenames_encryption_mode);
66 res = -EINVAL;
67 goto out;
68 }
69 ctx.contents_encryption_mode = policy->contents_encryption_mode;
70 ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
71 BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
72 get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
73
74 res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
75 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
76 sizeof(ctx), 0);
77out:
78 if (!res)
79 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
80 return res;
81}
82
83int ext4_process_policy(const struct ext4_encryption_policy *policy,
84 struct inode *inode)
85{
86 if (policy->version != 0)
87 return -EINVAL;
88
89 if (!ext4_inode_has_encryption_context(inode)) {
90 if (!ext4_empty_dir(inode))
91 return -ENOTEMPTY;
92 return ext4_create_encryption_context_from_policy(inode,
93 policy);
94 }
95
96 if (ext4_is_encryption_context_consistent_with_policy(inode, policy))
97 return 0;
98
99 printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
100 __func__);
101 return -EINVAL;
102}
103
104int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
105{
106 struct ext4_encryption_context ctx;
107
108 int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
109 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
110 &ctx, sizeof(ctx));
111 if (res != sizeof(ctx))
112 return -ENOENT;
113 if (ctx.format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1)
114 return -EINVAL;
115 policy->version = 0;
116 policy->contents_encryption_mode = ctx.contents_encryption_mode;
117 policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
118 memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
119 EXT4_KEY_DESCRIPTOR_SIZE);
120 return 0;
121}
122
123int ext4_is_child_context_consistent_with_parent(struct inode *parent,
124 struct inode *child)
125{
126 struct ext4_encryption_context parent_ctx, child_ctx;
127 int res;
128
129 if ((parent == NULL) || (child == NULL)) {
130 pr_err("parent %p child %p\n", parent, child);
131 BUG_ON(1);
132 }
133 /* no restrictions if the parent directory is not encrypted */
134 if (!ext4_encrypted_inode(parent))
135 return 1;
136 res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
137 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
138 &parent_ctx, sizeof(parent_ctx));
139 if (res != sizeof(parent_ctx))
140 return 0;
141 /* if the child directory is not encrypted, this is always a problem */
142 if (!ext4_encrypted_inode(child))
143 return 0;
144 res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
145 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
146 &child_ctx, sizeof(child_ctx));
147 if (res != sizeof(child_ctx))
148 return 0;
149 return (memcmp(parent_ctx.master_key_descriptor,
150 child_ctx.master_key_descriptor,
151 EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
152 (parent_ctx.contents_encryption_mode ==
153 child_ctx.contents_encryption_mode) &&
154 (parent_ctx.filenames_encryption_mode ==
155 child_ctx.filenames_encryption_mode));
156}
157
158/**
159 * ext4_inherit_context() - Sets a child context from its parent
160 * @parent: Parent inode from which the context is inherited.
161 * @child: Child inode that inherits the context from @parent.
162 *
163 * Return: Zero on success, non-zero otherwise
164 */
165int ext4_inherit_context(struct inode *parent, struct inode *child)
166{
167 struct ext4_encryption_context ctx;
168 int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
169 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
170 &ctx, sizeof(ctx));
171
172 if (res != sizeof(ctx)) {
173 if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
174 ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
175 ctx.contents_encryption_mode =
176 EXT4_ENCRYPTION_MODE_AES_256_XTS;
177 ctx.filenames_encryption_mode =
178 EXT4_ENCRYPTION_MODE_AES_256_CTS;
179 memset(ctx.master_key_descriptor, 0x42,
180 EXT4_KEY_DESCRIPTOR_SIZE);
181 res = 0;
182 } else {
183 goto out;
184 }
185 }
186 get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
187 res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
188 EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
189 sizeof(ctx), 0);
190out:
191 if (!res)
192 ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
193 return res;
194}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c24143ea9c08..61db51a5ce4c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -22,10 +22,8 @@
22 */ 22 */
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h>
26#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <linux/rbtree.h>
29#include "ext4.h" 27#include "ext4.h"
30#include "xattr.h" 28#include "xattr.h"
31 29
@@ -110,7 +108,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
110 int err; 108 int err;
111 struct inode *inode = file_inode(file); 109 struct inode *inode = file_inode(file);
112 struct super_block *sb = inode->i_sb; 110 struct super_block *sb = inode->i_sb;
111 struct buffer_head *bh = NULL;
113 int dir_has_error = 0; 112 int dir_has_error = 0;
113 struct ext4_fname_crypto_ctx *enc_ctx = NULL;
114 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
114 115
115 if (is_dx_dir(inode)) { 116 if (is_dx_dir(inode)) {
116 err = ext4_dx_readdir(file, ctx); 117 err = ext4_dx_readdir(file, ctx);
@@ -127,17 +128,28 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
127 128
128 if (ext4_has_inline_data(inode)) { 129 if (ext4_has_inline_data(inode)) {
129 int has_inline_data = 1; 130 int has_inline_data = 1;
130 int ret = ext4_read_inline_dir(file, ctx, 131 err = ext4_read_inline_dir(file, ctx,
131 &has_inline_data); 132 &has_inline_data);
132 if (has_inline_data) 133 if (has_inline_data)
133 return ret; 134 return err;
135 }
136
137 enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
138 if (IS_ERR(enc_ctx))
139 return PTR_ERR(enc_ctx);
140 if (enc_ctx) {
141 err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
142 &fname_crypto_str);
143 if (err < 0) {
144 ext4_put_fname_crypto_ctx(&enc_ctx);
145 return err;
146 }
134 } 147 }
135 148
136 offset = ctx->pos & (sb->s_blocksize - 1); 149 offset = ctx->pos & (sb->s_blocksize - 1);
137 150
138 while (ctx->pos < inode->i_size) { 151 while (ctx->pos < inode->i_size) {
139 struct ext4_map_blocks map; 152 struct ext4_map_blocks map;
140 struct buffer_head *bh = NULL;
141 153
142 map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); 154 map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
143 map.m_len = 1; 155 map.m_len = 1;
@@ -180,6 +192,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
180 (unsigned long long)ctx->pos); 192 (unsigned long long)ctx->pos);
181 ctx->pos += sb->s_blocksize - offset; 193 ctx->pos += sb->s_blocksize - offset;
182 brelse(bh); 194 brelse(bh);
195 bh = NULL;
183 continue; 196 continue;
184 } 197 }
185 set_buffer_verified(bh); 198 set_buffer_verified(bh);
@@ -226,25 +239,44 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
226 offset += ext4_rec_len_from_disk(de->rec_len, 239 offset += ext4_rec_len_from_disk(de->rec_len,
227 sb->s_blocksize); 240 sb->s_blocksize);
228 if (le32_to_cpu(de->inode)) { 241 if (le32_to_cpu(de->inode)) {
229 if (!dir_emit(ctx, de->name, 242 if (enc_ctx == NULL) {
230 de->name_len, 243 /* Directory is not encrypted */
231 le32_to_cpu(de->inode), 244 if (!dir_emit(ctx, de->name,
232 get_dtype(sb, de->file_type))) { 245 de->name_len,
233 brelse(bh); 246 le32_to_cpu(de->inode),
234 return 0; 247 get_dtype(sb, de->file_type)))
248 goto done;
249 } else {
250 /* Directory is encrypted */
251 err = ext4_fname_disk_to_usr(enc_ctx,
252 de, &fname_crypto_str);
253 if (err < 0)
254 goto errout;
255 if (!dir_emit(ctx,
256 fname_crypto_str.name, err,
257 le32_to_cpu(de->inode),
258 get_dtype(sb, de->file_type)))
259 goto done;
235 } 260 }
236 } 261 }
237 ctx->pos += ext4_rec_len_from_disk(de->rec_len, 262 ctx->pos += ext4_rec_len_from_disk(de->rec_len,
238 sb->s_blocksize); 263 sb->s_blocksize);
239 } 264 }
240 offset = 0; 265 if ((ctx->pos < inode->i_size) && !dir_relax(inode))
266 goto done;
241 brelse(bh); 267 brelse(bh);
242 if (ctx->pos < inode->i_size) { 268 bh = NULL;
243 if (!dir_relax(inode)) 269 offset = 0;
244 return 0;
245 }
246 } 270 }
247 return 0; 271done:
272 err = 0;
273errout:
274#ifdef CONFIG_EXT4_FS_ENCRYPTION
275 ext4_put_fname_crypto_ctx(&enc_ctx);
276 ext4_fname_crypto_free_buffer(&fname_crypto_str);
277#endif
278 brelse(bh);
279 return err;
248} 280}
249 281
250static inline int is_32bit_api(void) 282static inline int is_32bit_api(void)
@@ -384,10 +416,15 @@ void ext4_htree_free_dir_info(struct dir_private_info *p)
384 416
385/* 417/*
386 * Given a directory entry, enter it into the fname rb tree. 418 * Given a directory entry, enter it into the fname rb tree.
419 *
420 * When filename encryption is enabled, the dirent will hold the
421 * encrypted filename, while the htree will hold decrypted filename.
422 * The decrypted filename is passed in via ent_name. parameter.
387 */ 423 */
388int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 424int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
389 __u32 minor_hash, 425 __u32 minor_hash,
390 struct ext4_dir_entry_2 *dirent) 426 struct ext4_dir_entry_2 *dirent,
427 struct ext4_str *ent_name)
391{ 428{
392 struct rb_node **p, *parent = NULL; 429 struct rb_node **p, *parent = NULL;
393 struct fname *fname, *new_fn; 430 struct fname *fname, *new_fn;
@@ -398,17 +435,17 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
398 p = &info->root.rb_node; 435 p = &info->root.rb_node;
399 436
400 /* Create and allocate the fname structure */ 437 /* Create and allocate the fname structure */
401 len = sizeof(struct fname) + dirent->name_len + 1; 438 len = sizeof(struct fname) + ent_name->len + 1;
402 new_fn = kzalloc(len, GFP_KERNEL); 439 new_fn = kzalloc(len, GFP_KERNEL);
403 if (!new_fn) 440 if (!new_fn)
404 return -ENOMEM; 441 return -ENOMEM;
405 new_fn->hash = hash; 442 new_fn->hash = hash;
406 new_fn->minor_hash = minor_hash; 443 new_fn->minor_hash = minor_hash;
407 new_fn->inode = le32_to_cpu(dirent->inode); 444 new_fn->inode = le32_to_cpu(dirent->inode);
408 new_fn->name_len = dirent->name_len; 445 new_fn->name_len = ent_name->len;
409 new_fn->file_type = dirent->file_type; 446 new_fn->file_type = dirent->file_type;
410 memcpy(new_fn->name, dirent->name, dirent->name_len); 447 memcpy(new_fn->name, ent_name->name, ent_name->len);
411 new_fn->name[dirent->name_len] = 0; 448 new_fn->name[ent_name->len] = 0;
412 449
413 while (*p) { 450 while (*p) {
414 parent = *p; 451 parent = *p;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f63c3d5805c4..ef267adce19a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -422,7 +422,7 @@ enum {
422 EXT4_INODE_DIRTY = 8, 422 EXT4_INODE_DIRTY = 8,
423 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ 423 EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
424 EXT4_INODE_NOCOMPR = 10, /* Don't compress */ 424 EXT4_INODE_NOCOMPR = 10, /* Don't compress */
425 EXT4_INODE_ENCRYPT = 11, /* Compression error */ 425 EXT4_INODE_ENCRYPT = 11, /* Encrypted file */
426/* End compression flags --- maybe not all used */ 426/* End compression flags --- maybe not all used */
427 EXT4_INODE_INDEX = 12, /* hash-indexed directory */ 427 EXT4_INODE_INDEX = 12, /* hash-indexed directory */
428 EXT4_INODE_IMAGIC = 13, /* AFS directory */ 428 EXT4_INODE_IMAGIC = 13, /* AFS directory */
@@ -582,6 +582,15 @@ enum {
582#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 582#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
583#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 583#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
584 584
585/* Encryption algorithms */
586#define EXT4_ENCRYPTION_MODE_INVALID 0
587#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
588#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
589#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
590#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
591
592#include "ext4_crypto.h"
593
585/* 594/*
586 * ioctl commands 595 * ioctl commands
587 */ 596 */
@@ -603,6 +612,9 @@ enum {
603#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) 612#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
604#define EXT4_IOC_SWAP_BOOT _IO('f', 17) 613#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
605#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18) 614#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
615#define EXT4_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct ext4_encryption_policy)
616#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
617#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
606 618
607#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 619#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
608/* 620/*
@@ -939,6 +951,11 @@ struct ext4_inode_info {
939 951
940 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ 952 /* Precomputed uuid+inum+igen checksum for seeding inode checksums */
941 __u32 i_csum_seed; 953 __u32 i_csum_seed;
954
955#ifdef CONFIG_EXT4_FS_ENCRYPTION
956 /* Encryption params */
957 struct ext4_encryption_key i_encryption_key;
958#endif
942}; 959};
943 960
944/* 961/*
@@ -1142,7 +1159,8 @@ struct ext4_super_block {
1142 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ 1159 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
1143 __u8 s_log_groups_per_flex; /* FLEX_BG group size */ 1160 __u8 s_log_groups_per_flex; /* FLEX_BG group size */
1144 __u8 s_checksum_type; /* metadata checksum algorithm used */ 1161 __u8 s_checksum_type; /* metadata checksum algorithm used */
1145 __le16 s_reserved_pad; 1162 __u8 s_encryption_level; /* versioning level for encryption */
1163 __u8 s_reserved_pad; /* Padding to next 32bits */
1146 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ 1164 __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
1147 __le32 s_snapshot_inum; /* Inode number of active snapshot */ 1165 __le32 s_snapshot_inum; /* Inode number of active snapshot */
1148 __le32 s_snapshot_id; /* sequential ID of active snapshot */ 1166 __le32 s_snapshot_id; /* sequential ID of active snapshot */
@@ -1169,7 +1187,9 @@ struct ext4_super_block {
1169 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ 1187 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1170 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */ 1188 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
1171 __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */ 1189 __u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
1172 __le32 s_reserved[105]; /* Padding to the end of the block */ 1190 __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
1191 __le32 s_lpf_ino; /* Location of the lost+found inode */
1192 __le32 s_reserved[100]; /* Padding to the end of the block */
1173 __le32 s_checksum; /* crc32c(superblock) */ 1193 __le32 s_checksum; /* crc32c(superblock) */
1174}; 1194};
1175 1195
@@ -1180,8 +1200,16 @@ struct ext4_super_block {
1180/* 1200/*
1181 * run-time mount flags 1201 * run-time mount flags
1182 */ 1202 */
1183#define EXT4_MF_MNTDIR_SAMPLED 0x0001 1203#define EXT4_MF_MNTDIR_SAMPLED 0x0001
1184#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ 1204#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
1205#define EXT4_MF_TEST_DUMMY_ENCRYPTION 0x0004
1206
1207#ifdef CONFIG_EXT4_FS_ENCRYPTION
1208#define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \
1209 EXT4_MF_TEST_DUMMY_ENCRYPTION))
1210#else
1211#define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
1212#endif
1185 1213
1186/* Number of quota types we support */ 1214/* Number of quota types we support */
1187#define EXT4_MAXQUOTAS 2 1215#define EXT4_MAXQUOTAS 2
@@ -1351,6 +1379,12 @@ struct ext4_sb_info {
1351 struct ratelimit_state s_err_ratelimit_state; 1379 struct ratelimit_state s_err_ratelimit_state;
1352 struct ratelimit_state s_warning_ratelimit_state; 1380 struct ratelimit_state s_warning_ratelimit_state;
1353 struct ratelimit_state s_msg_ratelimit_state; 1381 struct ratelimit_state s_msg_ratelimit_state;
1382
1383#ifdef CONFIG_EXT4_FS_ENCRYPTION
1384 /* Encryption */
1385 uint32_t s_file_encryption_mode;
1386 uint32_t s_dir_encryption_mode;
1387#endif
1354}; 1388};
1355 1389
1356static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) 1390static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1466,6 +1500,18 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1466#define EXT4_SB(sb) (sb) 1500#define EXT4_SB(sb) (sb)
1467#endif 1501#endif
1468 1502
1503/*
1504 * Returns true if the inode is inode is encrypted
1505 */
1506static inline int ext4_encrypted_inode(struct inode *inode)
1507{
1508#ifdef CONFIG_EXT4_FS_ENCRYPTION
1509 return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
1510#else
1511 return 0;
1512#endif
1513}
1514
1469#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime 1515#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
1470 1516
1471/* 1517/*
@@ -1575,8 +1621,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1575 EXT4_FEATURE_INCOMPAT_EXTENTS| \ 1621 EXT4_FEATURE_INCOMPAT_EXTENTS| \
1576 EXT4_FEATURE_INCOMPAT_64BIT| \ 1622 EXT4_FEATURE_INCOMPAT_64BIT| \
1577 EXT4_FEATURE_INCOMPAT_FLEX_BG| \ 1623 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
1578 EXT4_FEATURE_INCOMPAT_MMP | \ 1624 EXT4_FEATURE_INCOMPAT_MMP | \
1579 EXT4_FEATURE_INCOMPAT_INLINE_DATA) 1625 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
1626 EXT4_FEATURE_INCOMPAT_ENCRYPT)
1580#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ 1627#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
1581 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ 1628 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
1582 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ 1629 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2001,6 +2048,99 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
2001 struct ext4_group_desc *gdp); 2048 struct ext4_group_desc *gdp);
2002ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 2049ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
2003 2050
2051/* crypto_policy.c */
2052int ext4_is_child_context_consistent_with_parent(struct inode *parent,
2053 struct inode *child);
2054int ext4_inherit_context(struct inode *parent, struct inode *child);
2055void ext4_to_hex(char *dst, char *src, size_t src_size);
2056int ext4_process_policy(const struct ext4_encryption_policy *policy,
2057 struct inode *inode);
2058int ext4_get_policy(struct inode *inode,
2059 struct ext4_encryption_policy *policy);
2060
2061/* crypto.c */
2062bool ext4_valid_contents_enc_mode(uint32_t mode);
2063uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
2064extern struct workqueue_struct *ext4_read_workqueue;
2065struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode);
2066void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
2067void ext4_restore_control_page(struct page *data_page);
2068struct page *ext4_encrypt(struct inode *inode,
2069 struct page *plaintext_page);
2070int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page);
2071int ext4_decrypt_one(struct inode *inode, struct page *page);
2072int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
2073
2074#ifdef CONFIG_EXT4_FS_ENCRYPTION
2075int ext4_init_crypto(void);
2076void ext4_exit_crypto(void);
2077static inline int ext4_sb_has_crypto(struct super_block *sb)
2078{
2079 return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
2080}
2081#else
2082static inline int ext4_init_crypto(void) { return 0; }
2083static inline void ext4_exit_crypto(void) { }
2084static inline int ext4_sb_has_crypto(struct super_block *sb)
2085{
2086 return 0;
2087}
2088#endif
2089
2090/* crypto_fname.c */
2091bool ext4_valid_filenames_enc_mode(uint32_t mode);
2092u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
2093int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
2094 u32 ilen, struct ext4_str *crypto_str);
2095int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
2096 const struct ext4_str *iname,
2097 struct ext4_str *oname);
2098int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
2099 const struct ext4_dir_entry_2 *de,
2100 struct ext4_str *oname);
2101int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
2102 const struct qstr *iname,
2103 struct ext4_str *oname);
2104int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
2105 const struct qstr *iname,
2106 struct dx_hash_info *hinfo);
2107int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
2108 const struct ext4_dir_entry_2 *de,
2109 struct dx_hash_info *hinfo);
2110int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
2111 u32 namelen);
2112
2113#ifdef CONFIG_EXT4_FS_ENCRYPTION
2114void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
2115struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
2116 u32 max_len);
2117void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
2118#else
2119static inline
2120void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { }
2121static inline
2122struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
2123 u32 max_len)
2124{
2125 return NULL;
2126}
2127static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
2128#endif
2129
2130
2131/* crypto_key.c */
2132int ext4_generate_encryption_key(struct inode *inode);
2133
2134#ifdef CONFIG_EXT4_FS_ENCRYPTION
2135int ext4_has_encryption_key(struct inode *inode);
2136#else
2137static inline int ext4_has_encryption_key(struct inode *inode)
2138{
2139 return 0;
2140}
2141#endif
2142
2143
2004/* dir.c */ 2144/* dir.c */
2005extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, 2145extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
2006 struct file *, 2146 struct file *,
@@ -2011,17 +2151,20 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
2011 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \ 2151 unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
2012 (de), (bh), (buf), (size), (offset))) 2152 (de), (bh), (buf), (size), (offset)))
2013extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, 2153extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
2014 __u32 minor_hash, 2154 __u32 minor_hash,
2015 struct ext4_dir_entry_2 *dirent); 2155 struct ext4_dir_entry_2 *dirent,
2156 struct ext4_str *ent_name);
2016extern void ext4_htree_free_dir_info(struct dir_private_info *p); 2157extern void ext4_htree_free_dir_info(struct dir_private_info *p);
2017extern int ext4_find_dest_de(struct inode *dir, struct inode *inode, 2158extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2018 struct buffer_head *bh, 2159 struct buffer_head *bh,
2019 void *buf, int buf_size, 2160 void *buf, int buf_size,
2020 const char *name, int namelen, 2161 const char *name, int namelen,
2021 struct ext4_dir_entry_2 **dest_de); 2162 struct ext4_dir_entry_2 **dest_de);
2022void ext4_insert_dentry(struct inode *inode, 2163int ext4_insert_dentry(struct inode *dir,
2164 struct inode *inode,
2023 struct ext4_dir_entry_2 *de, 2165 struct ext4_dir_entry_2 *de,
2024 int buf_size, 2166 int buf_size,
2167 const struct qstr *iname,
2025 const char *name, int namelen); 2168 const char *name, int namelen);
2026static inline void ext4_update_dx_flag(struct inode *inode) 2169static inline void ext4_update_dx_flag(struct inode *inode)
2027{ 2170{
@@ -2099,6 +2242,7 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2099extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2242extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
2100 2243
2101/* inode.c */ 2244/* inode.c */
2245int ext4_inode_is_fast_symlink(struct inode *inode);
2102struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); 2246struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
2103struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); 2247struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
2104int ext4_get_block_write(struct inode *inode, sector_t iblock, 2248int ext4_get_block_write(struct inode *inode, sector_t iblock,
@@ -2152,8 +2296,8 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
2152/* indirect.c */ 2296/* indirect.c */
2153extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 2297extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
2154 struct ext4_map_blocks *map, int flags); 2298 struct ext4_map_blocks *map, int flags);
2155extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 2299extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
2156 struct iov_iter *iter, loff_t offset); 2300 loff_t offset);
2157extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); 2301extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
2158extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); 2302extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
2159extern void ext4_ind_truncate(handle_t *, struct inode *inode); 2303extern void ext4_ind_truncate(handle_t *, struct inode *inode);
@@ -2189,6 +2333,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
2189 void *entry_buf, 2333 void *entry_buf,
2190 int buf_size, 2334 int buf_size,
2191 int csum_size); 2335 int csum_size);
2336extern int ext4_empty_dir(struct inode *inode);
2192 2337
2193/* resize.c */ 2338/* resize.c */
2194extern int ext4_group_add(struct super_block *sb, 2339extern int ext4_group_add(struct super_block *sb,
@@ -2593,7 +2738,6 @@ extern const struct file_operations ext4_dir_operations;
2593/* file.c */ 2738/* file.c */
2594extern const struct inode_operations ext4_file_inode_operations; 2739extern const struct inode_operations ext4_file_inode_operations;
2595extern const struct file_operations ext4_file_operations; 2740extern const struct file_operations ext4_file_operations;
2596extern const struct file_operations ext4_dax_file_operations;
2597extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2741extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2598 2742
2599/* inline.c */ 2743/* inline.c */
@@ -2699,6 +2843,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
2699 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 2843 de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
2700} 2844}
2701 2845
2846/* readpages.c */
2847extern int ext4_mpage_readpages(struct address_space *mapping,
2848 struct list_head *pages, struct page *page,
2849 unsigned nr_pages);
2702 2850
2703/* symlink.c */ 2851/* symlink.c */
2704extern const struct inode_operations ext4_symlink_inode_operations; 2852extern const struct inode_operations ext4_symlink_inode_operations;
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
new file mode 100644
index 000000000000..c2ba35a914b6
--- /dev/null
+++ b/fs/ext4/ext4_crypto.h
@@ -0,0 +1,147 @@
1/*
2 * linux/fs/ext4/ext4_crypto.h
3 *
4 * Copyright (C) 2015, Google, Inc.
5 *
6 * This contains encryption header content for ext4
7 *
8 * Written by Michael Halcrow, 2015.
9 */
10
11#ifndef _EXT4_CRYPTO_H
12#define _EXT4_CRYPTO_H
13
14#include <linux/fs.h>
15
16#define EXT4_KEY_DESCRIPTOR_SIZE 8
17
18/* Policy provided via an ioctl on the topmost directory */
19struct ext4_encryption_policy {
20 char version;
21 char contents_encryption_mode;
22 char filenames_encryption_mode;
23 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
24} __attribute__((__packed__));
25
26#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
27#define EXT4_KEY_DERIVATION_NONCE_SIZE 16
28
29/**
30 * Encryption context for inode
31 *
32 * Protector format:
33 * 1 byte: Protector format (1 = this version)
34 * 1 byte: File contents encryption mode
35 * 1 byte: File names encryption mode
36 * 1 byte: Reserved
37 * 8 bytes: Master Key descriptor
38 * 16 bytes: Encryption Key derivation nonce
39 */
40struct ext4_encryption_context {
41 char format;
42 char contents_encryption_mode;
43 char filenames_encryption_mode;
44 char reserved;
45 char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
46 char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
47} __attribute__((__packed__));
48
49/* Encryption parameters */
50#define EXT4_XTS_TWEAK_SIZE 16
51#define EXT4_AES_128_ECB_KEY_SIZE 16
52#define EXT4_AES_256_GCM_KEY_SIZE 32
53#define EXT4_AES_256_CBC_KEY_SIZE 32
54#define EXT4_AES_256_CTS_KEY_SIZE 32
55#define EXT4_AES_256_XTS_KEY_SIZE 64
56#define EXT4_MAX_KEY_SIZE 64
57
58#define EXT4_KEY_DESC_PREFIX "ext4:"
59#define EXT4_KEY_DESC_PREFIX_SIZE 5
60
61struct ext4_encryption_key {
62 uint32_t mode;
63 char raw[EXT4_MAX_KEY_SIZE];
64 uint32_t size;
65};
66
67#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
68#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002
69
70struct ext4_crypto_ctx {
71 struct crypto_tfm *tfm; /* Crypto API context */
72 struct page *bounce_page; /* Ciphertext page on write path */
73 struct page *control_page; /* Original page on write path */
74 struct bio *bio; /* The bio for this context */
75 struct work_struct work; /* Work queue for read complete path */
76 struct list_head free_list; /* Free list */
77 int flags; /* Flags */
78 int mode; /* Encryption mode for tfm */
79};
80
81struct ext4_completion_result {
82 struct completion completion;
83 int res;
84};
85
86#define DECLARE_EXT4_COMPLETION_RESULT(ecr) \
87 struct ext4_completion_result ecr = { \
88 COMPLETION_INITIALIZER((ecr).completion), 0 }
89
90static inline int ext4_encryption_key_size(int mode)
91{
92 switch (mode) {
93 case EXT4_ENCRYPTION_MODE_AES_256_XTS:
94 return EXT4_AES_256_XTS_KEY_SIZE;
95 case EXT4_ENCRYPTION_MODE_AES_256_GCM:
96 return EXT4_AES_256_GCM_KEY_SIZE;
97 case EXT4_ENCRYPTION_MODE_AES_256_CBC:
98 return EXT4_AES_256_CBC_KEY_SIZE;
99 case EXT4_ENCRYPTION_MODE_AES_256_CTS:
100 return EXT4_AES_256_CTS_KEY_SIZE;
101 default:
102 BUG();
103 }
104 return 0;
105}
106
107#define EXT4_FNAME_NUM_SCATTER_ENTRIES 4
108#define EXT4_CRYPTO_BLOCK_SIZE 16
109#define EXT4_FNAME_CRYPTO_DIGEST_SIZE 32
110
111struct ext4_str {
112 unsigned char *name;
113 u32 len;
114};
115
116struct ext4_fname_crypto_ctx {
117 u32 lim;
118 char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
119 struct crypto_ablkcipher *ctfm;
120 struct crypto_hash *htfm;
121 struct page *workpage;
122 struct ext4_encryption_key key;
123 unsigned has_valid_key : 1;
124 unsigned ctfm_key_is_ready : 1;
125};
126
127/**
128 * For encrypted symlinks, the ciphertext length is stored at the beginning
129 * of the string in little-endian format.
130 */
131struct ext4_encrypted_symlink_data {
132 __le16 len;
133 char encrypted_path[1];
134} __attribute__((__packed__));
135
136/**
137 * This function is used to calculate the disk space required to
138 * store a filename of length l in encrypted symlink format.
139 */
140static inline u32 encrypted_symlink_data_len(u32 l)
141{
142 if (l < EXT4_CRYPTO_BLOCK_SIZE)
143 l = EXT4_CRYPTO_BLOCK_SIZE;
144 return (l + sizeof(struct ext4_encrypted_symlink_data) - 1);
145}
146
147#endif /* _EXT4_CRYPTO_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bed43081720f..973816bfe4a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1717,12 +1717,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1717{ 1717{
1718 unsigned short ext1_ee_len, ext2_ee_len; 1718 unsigned short ext1_ee_len, ext2_ee_len;
1719 1719
1720 /*
1721 * Make sure that both extents are initialized. We don't merge
1722 * unwritten extents so that we can be sure that end_io code has
1723 * the extent that was written properly split out and conversion to
1724 * initialized is trivial.
1725 */
1726 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2)) 1720 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1727 return 0; 1721 return 0;
1728 1722
@@ -3128,6 +3122,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3128 ee_len = ext4_ext_get_actual_len(ex); 3122 ee_len = ext4_ext_get_actual_len(ex);
3129 ee_pblock = ext4_ext_pblock(ex); 3123 ee_pblock = ext4_ext_pblock(ex);
3130 3124
3125 if (ext4_encrypted_inode(inode))
3126 return ext4_encrypted_zeroout(inode, ex);
3127
3131 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); 3128 ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
3132 if (ret > 0) 3129 if (ret > 0)
3133 ret = 0; 3130 ret = 0;
@@ -4535,19 +4532,7 @@ got_allocated_blocks:
4535 */ 4532 */
4536 reserved_clusters = get_reserved_cluster_alloc(inode, 4533 reserved_clusters = get_reserved_cluster_alloc(inode,
4537 map->m_lblk, allocated); 4534 map->m_lblk, allocated);
4538 if (map_from_cluster) { 4535 if (!map_from_cluster) {
4539 if (reserved_clusters) {
4540 /*
4541 * We have clusters reserved for this range.
4542 * But since we are not doing actual allocation
4543 * and are simply using blocks from previously
4544 * allocated cluster, we should release the
4545 * reservation and not claim quota.
4546 */
4547 ext4_da_update_reserve_space(inode,
4548 reserved_clusters, 0);
4549 }
4550 } else {
4551 BUG_ON(allocated_clusters < reserved_clusters); 4536 BUG_ON(allocated_clusters < reserved_clusters);
4552 if (reserved_clusters < allocated_clusters) { 4537 if (reserved_clusters < allocated_clusters) {
4553 struct ext4_inode_info *ei = EXT4_I(inode); 4538 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -4803,12 +4788,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4803 else 4788 else
4804 max_blocks -= lblk; 4789 max_blocks -= lblk;
4805 4790
4806 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
4807 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4808 EXT4_EX_NOCACHE;
4809 if (mode & FALLOC_FL_KEEP_SIZE)
4810 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4811
4812 mutex_lock(&inode->i_mutex); 4791 mutex_lock(&inode->i_mutex);
4813 4792
4814 /* 4793 /*
@@ -4825,15 +4804,28 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4825 ret = inode_newsize_ok(inode, new_size); 4804 ret = inode_newsize_ok(inode, new_size);
4826 if (ret) 4805 if (ret)
4827 goto out_mutex; 4806 goto out_mutex;
4828 /*
4829 * If we have a partial block after EOF we have to allocate
4830 * the entire block.
4831 */
4832 if (partial_end)
4833 max_blocks += 1;
4834 } 4807 }
4835 4808
4809 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4810 if (mode & FALLOC_FL_KEEP_SIZE)
4811 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4812
4813 /* Preallocate the range including the unaligned edges */
4814 if (partial_begin || partial_end) {
4815 ret = ext4_alloc_file_blocks(file,
4816 round_down(offset, 1 << blkbits) >> blkbits,
4817 (round_up((offset + len), 1 << blkbits) -
4818 round_down(offset, 1 << blkbits)) >> blkbits,
4819 new_size, flags, mode);
4820 if (ret)
4821 goto out_mutex;
4822
4823 }
4824
4825 /* Zero range excluding the unaligned edges */
4836 if (max_blocks > 0) { 4826 if (max_blocks > 0) {
4827 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4828 EXT4_EX_NOCACHE);
4837 4829
4838 /* Now release the pages and zero block aligned part of pages*/ 4830 /* Now release the pages and zero block aligned part of pages*/
4839 truncate_pagecache_range(inode, start, end - 1); 4831 truncate_pagecache_range(inode, start, end - 1);
@@ -4847,19 +4839,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4847 flags, mode); 4839 flags, mode);
4848 if (ret) 4840 if (ret)
4849 goto out_dio; 4841 goto out_dio;
4850 /*
4851 * Remove entire range from the extent status tree.
4852 *
4853 * ext4_es_remove_extent(inode, lblk, max_blocks) is
4854 * NOT sufficient. I'm not sure why this is the case,
4855 * but let's be conservative and remove the extent
4856 * status tree for the entire inode. There should be
4857 * no outstanding delalloc extents thanks to the
4858 * filemap_write_and_wait_range() call above.
4859 */
4860 ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
4861 if (ret)
4862 goto out_dio;
4863 } 4842 }
4864 if (!partial_begin && !partial_end) 4843 if (!partial_begin && !partial_end)
4865 goto out_dio; 4844 goto out_dio;
@@ -4922,6 +4901,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4922 ext4_lblk_t lblk; 4901 ext4_lblk_t lblk;
4923 unsigned int blkbits = inode->i_blkbits; 4902 unsigned int blkbits = inode->i_blkbits;
4924 4903
4904 /*
4905 * Encrypted inodes can't handle collapse range or insert
4906 * range since we would need to re-encrypt blocks with a
4907 * different IV or XTS tweak (which are based on the logical
4908 * block number).
4909 *
4910 * XXX It's not clear why zero range isn't working, but we'll
4911 * leave it disabled for encrypted inodes for now. This is a
4912 * bug we should fix....
4913 */
4914 if (ext4_encrypted_inode(inode) &&
4915 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
4916 return -EOPNOTSUPP;
4917
4925 /* Return error if mode is not supported */ 4918 /* Return error if mode is not supported */
4926 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 4919 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
4927 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 4920 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e04d45733976..d33d5a6852b9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -9,12 +9,10 @@
9 * 9 *
10 * Ext4 extents status tree core functions. 10 * Ext4 extents status tree core functions.
11 */ 11 */
12#include <linux/rbtree.h>
13#include <linux/list_sort.h> 12#include <linux/list_sort.h>
14#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
15#include <linux/seq_file.h> 14#include <linux/seq_file.h>
16#include "ext4.h" 15#include "ext4.h"
17#include "extents_status.h"
18 16
19#include <trace/events/ext4.h> 17#include <trace/events/ext4.h>
20 18
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da16c9c..0613c256c344 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -20,12 +20,11 @@
20 20
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h>
24#include <linux/mount.h> 23#include <linux/mount.h>
25#include <linux/path.h> 24#include <linux/path.h>
26#include <linux/aio.h>
27#include <linux/quotaops.h> 25#include <linux/quotaops.h>
28#include <linux/pagevec.h> 26#include <linux/pagevec.h>
27#include <linux/uio.h>
29#include "ext4.h" 28#include "ext4.h"
30#include "ext4_jbd2.h" 29#include "ext4_jbd2.h"
31#include "xattr.h" 30#include "xattr.h"
@@ -95,11 +94,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
95 struct inode *inode = file_inode(iocb->ki_filp); 94 struct inode *inode = file_inode(iocb->ki_filp);
96 struct mutex *aio_mutex = NULL; 95 struct mutex *aio_mutex = NULL;
97 struct blk_plug plug; 96 struct blk_plug plug;
98 int o_direct = io_is_direct(file); 97 int o_direct = iocb->ki_flags & IOCB_DIRECT;
99 int overwrite = 0; 98 int overwrite = 0;
100 size_t length = iov_iter_count(from);
101 ssize_t ret; 99 ssize_t ret;
102 loff_t pos = iocb->ki_pos;
103 100
104 /* 101 /*
105 * Unaligned direct AIO must be serialized; see comment above 102 * Unaligned direct AIO must be serialized; see comment above
@@ -108,16 +105,17 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
108 if (o_direct && 105 if (o_direct &&
109 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && 106 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
110 !is_sync_kiocb(iocb) && 107 !is_sync_kiocb(iocb) &&
111 (file->f_flags & O_APPEND || 108 (iocb->ki_flags & IOCB_APPEND ||
112 ext4_unaligned_aio(inode, from, pos))) { 109 ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
113 aio_mutex = ext4_aio_mutex(inode); 110 aio_mutex = ext4_aio_mutex(inode);
114 mutex_lock(aio_mutex); 111 mutex_lock(aio_mutex);
115 ext4_unwritten_wait(inode); 112 ext4_unwritten_wait(inode);
116 } 113 }
117 114
118 mutex_lock(&inode->i_mutex); 115 mutex_lock(&inode->i_mutex);
119 if (file->f_flags & O_APPEND) 116 ret = generic_write_checks(iocb, from);
120 iocb->ki_pos = pos = i_size_read(inode); 117 if (ret <= 0)
118 goto out;
121 119
122 /* 120 /*
123 * If we have encountered a bitmap-format file, the size limit 121 * If we have encountered a bitmap-format file, the size limit
@@ -126,22 +124,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
126 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 124 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
127 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 125 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
128 126
129 if ((pos > sbi->s_bitmap_maxbytes) || 127 if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
130 (pos == sbi->s_bitmap_maxbytes && length > 0)) {
131 mutex_unlock(&inode->i_mutex);
132 ret = -EFBIG; 128 ret = -EFBIG;
133 goto errout; 129 goto out;
134 } 130 }
135 131 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
136 if (pos + length > sbi->s_bitmap_maxbytes)
137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
138 } 132 }
139 133
140 iocb->private = &overwrite; 134 iocb->private = &overwrite;
141 if (o_direct) { 135 if (o_direct) {
136 size_t length = iov_iter_count(from);
137 loff_t pos = iocb->ki_pos;
142 blk_start_plug(&plug); 138 blk_start_plug(&plug);
143 139
144
145 /* check whether we do a DIO overwrite or not */ 140 /* check whether we do a DIO overwrite or not */
146 if (ext4_should_dioread_nolock(inode) && !aio_mutex && 141 if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
147 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { 142 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
@@ -185,7 +180,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
185 if (o_direct) 180 if (o_direct)
186 blk_finish_plug(&plug); 181 blk_finish_plug(&plug);
187 182
188errout: 183 if (aio_mutex)
184 mutex_unlock(aio_mutex);
185 return ret;
186
187out:
188 mutex_unlock(&inode->i_mutex);
189 if (aio_mutex) 189 if (aio_mutex)
190 mutex_unlock(aio_mutex); 190 mutex_unlock(aio_mutex);
191 return ret; 191 return ret;
@@ -206,6 +206,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
206static const struct vm_operations_struct ext4_dax_vm_ops = { 206static const struct vm_operations_struct ext4_dax_vm_ops = {
207 .fault = ext4_dax_fault, 207 .fault = ext4_dax_fault,
208 .page_mkwrite = ext4_dax_mkwrite, 208 .page_mkwrite = ext4_dax_mkwrite,
209 .pfn_mkwrite = dax_pfn_mkwrite,
209}; 210};
210#else 211#else
211#define ext4_dax_vm_ops ext4_file_vm_ops 212#define ext4_dax_vm_ops ext4_file_vm_ops
@@ -219,6 +220,13 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
219 220
220static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 221static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
221{ 222{
223 struct inode *inode = file->f_mapping->host;
224
225 if (ext4_encrypted_inode(inode)) {
226 int err = ext4_generate_encryption_key(inode);
227 if (err)
228 return 0;
229 }
222 file_accessed(file); 230 file_accessed(file);
223 if (IS_DAX(file_inode(file))) { 231 if (IS_DAX(file_inode(file))) {
224 vma->vm_ops = &ext4_dax_vm_ops; 232 vma->vm_ops = &ext4_dax_vm_ops;
@@ -236,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
236 struct vfsmount *mnt = filp->f_path.mnt; 244 struct vfsmount *mnt = filp->f_path.mnt;
237 struct path path; 245 struct path path;
238 char buf[64], *cp; 246 char buf[64], *cp;
247 int ret;
239 248
240 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && 249 if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
241 !(sb->s_flags & MS_RDONLY))) { 250 !(sb->s_flags & MS_RDONLY))) {
@@ -274,11 +283,17 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
274 * writing and the journal is present 283 * writing and the journal is present
275 */ 284 */
276 if (filp->f_mode & FMODE_WRITE) { 285 if (filp->f_mode & FMODE_WRITE) {
277 int ret = ext4_inode_attach_jinode(inode); 286 ret = ext4_inode_attach_jinode(inode);
278 if (ret < 0) 287 if (ret < 0)
279 return ret; 288 return ret;
280 } 289 }
281 return dquot_file_open(inode, filp); 290 ret = dquot_file_open(inode, filp);
291 if (!ret && ext4_encrypted_inode(inode)) {
292 ret = ext4_generate_encryption_key(inode);
293 if (ret)
294 ret = -EACCES;
295 }
296 return ret;
282} 297}
283 298
284/* 299/*
@@ -607,8 +622,6 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
607 622
608const struct file_operations ext4_file_operations = { 623const struct file_operations ext4_file_operations = {
609 .llseek = ext4_llseek, 624 .llseek = ext4_llseek,
610 .read = new_sync_read,
611 .write = new_sync_write,
612 .read_iter = generic_file_read_iter, 625 .read_iter = generic_file_read_iter,
613 .write_iter = ext4_file_write_iter, 626 .write_iter = ext4_file_write_iter,
614 .unlocked_ioctl = ext4_ioctl, 627 .unlocked_ioctl = ext4_ioctl,
@@ -624,26 +637,6 @@ const struct file_operations ext4_file_operations = {
624 .fallocate = ext4_fallocate, 637 .fallocate = ext4_fallocate,
625}; 638};
626 639
627#ifdef CONFIG_FS_DAX
628const struct file_operations ext4_dax_file_operations = {
629 .llseek = ext4_llseek,
630 .read = new_sync_read,
631 .write = new_sync_write,
632 .read_iter = generic_file_read_iter,
633 .write_iter = ext4_file_write_iter,
634 .unlocked_ioctl = ext4_ioctl,
635#ifdef CONFIG_COMPAT
636 .compat_ioctl = ext4_compat_ioctl,
637#endif
638 .mmap = ext4_file_mmap,
639 .open = ext4_file_open,
640 .release = ext4_release_file,
641 .fsync = ext4_sync_file,
642 /* Splice not yet supported with DAX */
643 .fallocate = ext4_fallocate,
644};
645#endif
646
647const struct inode_operations ext4_file_inode_operations = { 640const struct inode_operations ext4_file_inode_operations = {
648 .setattr = ext4_setattr, 641 .setattr = ext4_setattr,
649 .getattr = ext4_getattr, 642 .getattr = ext4_getattr,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..e9d632e9aa4b 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -26,7 +26,6 @@
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h>
30#include <linux/blkdev.h> 29#include <linux/blkdev.h>
31 30
32#include "ext4.h" 31#include "ext4.h"
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3d586f02883e..e026aa941fd5 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h>
14#include <linux/cryptohash.h> 13#include <linux/cryptohash.h>
15#include "ext4.h" 14#include "ext4.h"
16 15
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ac644c31ca67..2cf18a2d5c72 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -14,7 +14,6 @@
14 14
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h>
18#include <linux/stat.h> 17#include <linux/stat.h>
19#include <linux/string.h> 18#include <linux/string.h>
20#include <linux/quotaops.h> 19#include <linux/quotaops.h>
@@ -997,6 +996,12 @@ got:
997 ei->i_block_group = group; 996 ei->i_block_group = group;
998 ei->i_last_alloc_group = ~0; 997 ei->i_last_alloc_group = ~0;
999 998
999 /* If the directory encrypted, then we should encrypt the inode. */
1000 if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
1001 (ext4_encrypted_inode(dir) ||
1002 DUMMY_ENCRYPTION_ENABLED(sbi)))
1003 ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
1004
1000 ext4_set_inode_flags(inode); 1005 ext4_set_inode_flags(inode);
1001 if (IS_DIRSYNC(inode)) 1006 if (IS_DIRSYNC(inode))
1002 ext4_handle_sync(handle); 1007 ext4_handle_sync(handle);
@@ -1029,11 +1034,28 @@ got:
1029 ext4_set_inode_state(inode, EXT4_STATE_NEW); 1034 ext4_set_inode_state(inode, EXT4_STATE_NEW);
1030 1035
1031 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; 1036 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
1032 1037#ifdef CONFIG_EXT4_FS_ENCRYPTION
1038 if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
1039 (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
1040 ei->i_inline_off = 0;
1041 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1042 EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1043 ext4_set_inode_state(inode,
1044 EXT4_STATE_MAY_INLINE_DATA);
1045 } else {
1046 /* Inline data and encryption are incompatible
1047 * We turn off inline data since encryption is enabled */
1048 ei->i_inline_off = 1;
1049 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1050 EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1051 ext4_clear_inode_state(inode,
1052 EXT4_STATE_MAY_INLINE_DATA);
1053 }
1054#else
1033 ei->i_inline_off = 0; 1055 ei->i_inline_off = 0;
1034 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) 1056 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
1035 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1057 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
1036 1058#endif
1037 ret = inode; 1059 ret = inode;
1038 err = dquot_alloc_inode(inode); 1060 err = dquot_alloc_inode(inode);
1039 if (err) 1061 if (err)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 45fe924f82bc..3580629e42d3 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,9 +20,9 @@
20 * (sct@redhat.com), 1993, 1998 20 * (sct@redhat.com), 1993, 1998
21 */ 21 */
22 22
23#include <linux/aio.h>
24#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
25#include "truncate.h" 24#include "truncate.h"
25#include <linux/uio.h>
26 26
27#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
28 28
@@ -642,8 +642,8 @@ out:
642 * crashes then stale disk data _may_ be exposed inside the file. But current 642 * crashes then stale disk data _may_ be exposed inside the file. But current
643 * VFS code falls back into buffered path in that case so we are safe. 643 * VFS code falls back into buffered path in that case so we are safe.
644 */ 644 */
645ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, 645ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
646 struct iov_iter *iter, loff_t offset) 646 loff_t offset)
647{ 647{
648 struct file *file = iocb->ki_filp; 648 struct file *file = iocb->ki_filp;
649 struct inode *inode = file->f_mapping->host; 649 struct inode *inode = file->f_mapping->host;
@@ -654,7 +654,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
654 size_t count = iov_iter_count(iter); 654 size_t count = iov_iter_count(iter);
655 int retries = 0; 655 int retries = 0;
656 656
657 if (rw == WRITE) { 657 if (iov_iter_rw(iter) == WRITE) {
658 loff_t final_size = offset + count; 658 loff_t final_size = offset + count;
659 659
660 if (final_size > inode->i_size) { 660 if (final_size > inode->i_size) {
@@ -676,7 +676,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
676 } 676 }
677 677
678retry: 678retry:
679 if (rw == READ && ext4_should_dioread_nolock(inode)) { 679 if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) {
680 /* 680 /*
681 * Nolock dioread optimization may be dynamically disabled 681 * Nolock dioread optimization may be dynamically disabled
682 * via ext4_inode_block_unlocked_dio(). Check inode's state 682 * via ext4_inode_block_unlocked_dio(). Check inode's state
@@ -690,23 +690,24 @@ retry:
690 goto locked; 690 goto locked;
691 } 691 }
692 if (IS_DAX(inode)) 692 if (IS_DAX(inode))
693 ret = dax_do_io(rw, iocb, inode, iter, offset, 693 ret = dax_do_io(iocb, inode, iter, offset,
694 ext4_get_block, NULL, 0); 694 ext4_get_block, NULL, 0);
695 else 695 else
696 ret = __blockdev_direct_IO(rw, iocb, inode, 696 ret = __blockdev_direct_IO(iocb, inode,
697 inode->i_sb->s_bdev, iter, offset, 697 inode->i_sb->s_bdev, iter,
698 ext4_get_block, NULL, NULL, 0); 698 offset, ext4_get_block, NULL,
699 NULL, 0);
699 inode_dio_done(inode); 700 inode_dio_done(inode);
700 } else { 701 } else {
701locked: 702locked:
702 if (IS_DAX(inode)) 703 if (IS_DAX(inode))
703 ret = dax_do_io(rw, iocb, inode, iter, offset, 704 ret = dax_do_io(iocb, inode, iter, offset,
704 ext4_get_block, NULL, DIO_LOCKING); 705 ext4_get_block, NULL, DIO_LOCKING);
705 else 706 else
706 ret = blockdev_direct_IO(rw, iocb, inode, iter, 707 ret = blockdev_direct_IO(iocb, inode, iter, offset,
707 offset, ext4_get_block); 708 ext4_get_block);
708 709
709 if (unlikely((rw & WRITE) && ret < 0)) { 710 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
710 loff_t isize = i_size_read(inode); 711 loff_t isize = i_size_read(inode);
711 loff_t end = offset + count; 712 loff_t end = offset + count;
712 713
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 4b143febf21f..feb2cafbeace 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -11,11 +11,13 @@
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 */ 13 */
14
15#include <linux/fiemap.h>
16
14#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
15#include "ext4.h" 18#include "ext4.h"
16#include "xattr.h" 19#include "xattr.h"
17#include "truncate.h" 20#include "truncate.h"
18#include <linux/fiemap.h>
19 21
20#define EXT4_XATTR_SYSTEM_DATA "data" 22#define EXT4_XATTR_SYSTEM_DATA "data"
21#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 23#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -972,7 +974,7 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
972 offset = 0; 974 offset = 0;
973 while ((void *)de < dlimit) { 975 while ((void *)de < dlimit) {
974 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 976 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
975 trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n", 977 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n",
976 offset, de_len, de->name_len, de->name, 978 offset, de_len, de->name_len, de->name,
977 de->name_len, le32_to_cpu(de->inode)); 979 de->name_len, le32_to_cpu(de->inode));
978 if (ext4_check_dir_entry(dir, NULL, de, bh, 980 if (ext4_check_dir_entry(dir, NULL, de, bh,
@@ -1014,7 +1016,8 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
1014 err = ext4_journal_get_write_access(handle, iloc->bh); 1016 err = ext4_journal_get_write_access(handle, iloc->bh);
1015 if (err) 1017 if (err)
1016 return err; 1018 return err;
1017 ext4_insert_dentry(inode, de, inline_size, name, namelen); 1019 ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
1020 name, namelen);
1018 1021
1019 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 1022 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
1020 1023
@@ -1327,6 +1330,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
1327 struct ext4_iloc iloc; 1330 struct ext4_iloc iloc;
1328 void *dir_buf = NULL; 1331 void *dir_buf = NULL;
1329 struct ext4_dir_entry_2 fake; 1332 struct ext4_dir_entry_2 fake;
1333 struct ext4_str tmp_str;
1330 1334
1331 ret = ext4_get_inode_loc(inode, &iloc); 1335 ret = ext4_get_inode_loc(inode, &iloc);
1332 if (ret) 1336 if (ret)
@@ -1398,8 +1402,10 @@ int htree_inlinedir_to_tree(struct file *dir_file,
1398 continue; 1402 continue;
1399 if (de->inode == 0) 1403 if (de->inode == 0)
1400 continue; 1404 continue;
1401 err = ext4_htree_store_dirent(dir_file, 1405 tmp_str.name = de->name;
1402 hinfo->hash, hinfo->minor_hash, de); 1406 tmp_str.len = de->name_len;
1407 err = ext4_htree_store_dirent(dir_file, hinfo->hash,
1408 hinfo->minor_hash, de, &tmp_str);
1403 if (err) { 1409 if (err) {
1404 count = err; 1410 count = err;
1405 goto out; 1411 goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cb9a212b86f..366476e71e10 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -20,7 +20,6 @@
20 20
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h>
24#include <linux/highuid.h> 23#include <linux/highuid.h>
25#include <linux/pagemap.h> 24#include <linux/pagemap.h>
26#include <linux/quotaops.h> 25#include <linux/quotaops.h>
@@ -36,8 +35,6 @@
36#include <linux/kernel.h> 35#include <linux/kernel.h>
37#include <linux/printk.h> 36#include <linux/printk.h>
38#include <linux/slab.h> 37#include <linux/slab.h>
39#include <linux/ratelimit.h>
40#include <linux/aio.h>
41#include <linux/bitops.h> 38#include <linux/bitops.h>
42 39
43#include "ext4_jbd2.h" 40#include "ext4_jbd2.h"
@@ -141,7 +138,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
141/* 138/*
142 * Test whether an inode is a fast symlink. 139 * Test whether an inode is a fast symlink.
143 */ 140 */
144static int ext4_inode_is_fast_symlink(struct inode *inode) 141int ext4_inode_is_fast_symlink(struct inode *inode)
145{ 142{
146 int ea_blocks = EXT4_I(inode)->i_file_acl ? 143 int ea_blocks = EXT4_I(inode)->i_file_acl ?
147 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 144 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
@@ -888,6 +885,95 @@ int do_journal_get_write_access(handle_t *handle,
888 885
889static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, 886static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
890 struct buffer_head *bh_result, int create); 887 struct buffer_head *bh_result, int create);
888
889#ifdef CONFIG_EXT4_FS_ENCRYPTION
890static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
891 get_block_t *get_block)
892{
893 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
894 unsigned to = from + len;
895 struct inode *inode = page->mapping->host;
896 unsigned block_start, block_end;
897 sector_t block;
898 int err = 0;
899 unsigned blocksize = inode->i_sb->s_blocksize;
900 unsigned bbits;
901 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
902 bool decrypt = false;
903
904 BUG_ON(!PageLocked(page));
905 BUG_ON(from > PAGE_CACHE_SIZE);
906 BUG_ON(to > PAGE_CACHE_SIZE);
907 BUG_ON(from > to);
908
909 if (!page_has_buffers(page))
910 create_empty_buffers(page, blocksize, 0);
911 head = page_buffers(page);
912 bbits = ilog2(blocksize);
913 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
914
915 for (bh = head, block_start = 0; bh != head || !block_start;
916 block++, block_start = block_end, bh = bh->b_this_page) {
917 block_end = block_start + blocksize;
918 if (block_end <= from || block_start >= to) {
919 if (PageUptodate(page)) {
920 if (!buffer_uptodate(bh))
921 set_buffer_uptodate(bh);
922 }
923 continue;
924 }
925 if (buffer_new(bh))
926 clear_buffer_new(bh);
927 if (!buffer_mapped(bh)) {
928 WARN_ON(bh->b_size != blocksize);
929 err = get_block(inode, block, bh, 1);
930 if (err)
931 break;
932 if (buffer_new(bh)) {
933 unmap_underlying_metadata(bh->b_bdev,
934 bh->b_blocknr);
935 if (PageUptodate(page)) {
936 clear_buffer_new(bh);
937 set_buffer_uptodate(bh);
938 mark_buffer_dirty(bh);
939 continue;
940 }
941 if (block_end > to || block_start < from)
942 zero_user_segments(page, to, block_end,
943 block_start, from);
944 continue;
945 }
946 }
947 if (PageUptodate(page)) {
948 if (!buffer_uptodate(bh))
949 set_buffer_uptodate(bh);
950 continue;
951 }
952 if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
953 !buffer_unwritten(bh) &&
954 (block_start < from || block_end > to)) {
955 ll_rw_block(READ, 1, &bh);
956 *wait_bh++ = bh;
957 decrypt = ext4_encrypted_inode(inode) &&
958 S_ISREG(inode->i_mode);
959 }
960 }
961 /*
962 * If we issued read requests, let them complete.
963 */
964 while (wait_bh > wait) {
965 wait_on_buffer(*--wait_bh);
966 if (!buffer_uptodate(*wait_bh))
967 err = -EIO;
968 }
969 if (unlikely(err))
970 page_zero_new_buffers(page, from, to);
971 else if (decrypt)
972 err = ext4_decrypt_one(inode, page);
973 return err;
974}
975#endif
976
891static int ext4_write_begin(struct file *file, struct address_space *mapping, 977static int ext4_write_begin(struct file *file, struct address_space *mapping,
892 loff_t pos, unsigned len, unsigned flags, 978 loff_t pos, unsigned len, unsigned flags,
893 struct page **pagep, void **fsdata) 979 struct page **pagep, void **fsdata)
@@ -950,11 +1036,19 @@ retry_journal:
950 /* In case writeback began while the page was unlocked */ 1036 /* In case writeback began while the page was unlocked */
951 wait_for_stable_page(page); 1037 wait_for_stable_page(page);
952 1038
1039#ifdef CONFIG_EXT4_FS_ENCRYPTION
1040 if (ext4_should_dioread_nolock(inode))
1041 ret = ext4_block_write_begin(page, pos, len,
1042 ext4_get_block_write);
1043 else
1044 ret = ext4_block_write_begin(page, pos, len,
1045 ext4_get_block);
1046#else
953 if (ext4_should_dioread_nolock(inode)) 1047 if (ext4_should_dioread_nolock(inode))
954 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 1048 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
955 else 1049 else
956 ret = __block_write_begin(page, pos, len, ext4_get_block); 1050 ret = __block_write_begin(page, pos, len, ext4_get_block);
957 1051#endif
958 if (!ret && ext4_should_journal_data(inode)) { 1052 if (!ret && ext4_should_journal_data(inode)) {
959 ret = ext4_walk_page_buffers(handle, page_buffers(page), 1053 ret = ext4_walk_page_buffers(handle, page_buffers(page),
960 from, to, NULL, 1054 from, to, NULL,
@@ -2576,7 +2670,12 @@ retry_journal:
2576 /* In case writeback began while the page was unlocked */ 2670 /* In case writeback began while the page was unlocked */
2577 wait_for_stable_page(page); 2671 wait_for_stable_page(page);
2578 2672
2673#ifdef CONFIG_EXT4_FS_ENCRYPTION
2674 ret = ext4_block_write_begin(page, pos, len,
2675 ext4_da_get_block_prep);
2676#else
2579 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2677 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2678#endif
2580 if (ret < 0) { 2679 if (ret < 0) {
2581 unlock_page(page); 2680 unlock_page(page);
2582 ext4_journal_stop(handle); 2681 ext4_journal_stop(handle);
@@ -2822,7 +2921,7 @@ static int ext4_readpage(struct file *file, struct page *page)
2822 ret = ext4_readpage_inline(inode, page); 2921 ret = ext4_readpage_inline(inode, page);
2823 2922
2824 if (ret == -EAGAIN) 2923 if (ret == -EAGAIN)
2825 return mpage_readpage(page, ext4_get_block); 2924 return ext4_mpage_readpages(page->mapping, NULL, page, 1);
2826 2925
2827 return ret; 2926 return ret;
2828} 2927}
@@ -2837,7 +2936,7 @@ ext4_readpages(struct file *file, struct address_space *mapping,
2837 if (ext4_has_inline_data(inode)) 2936 if (ext4_has_inline_data(inode))
2838 return 0; 2937 return 0;
2839 2938
2840 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 2939 return ext4_mpage_readpages(mapping, pages, NULL, nr_pages);
2841} 2940}
2842 2941
2843static void ext4_invalidatepage(struct page *page, unsigned int offset, 2942static void ext4_invalidatepage(struct page *page, unsigned int offset,
@@ -2953,8 +3052,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2953 * if the machine crashes during the write. 3052 * if the machine crashes during the write.
2954 * 3053 *
2955 */ 3054 */
2956static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, 3055static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
2957 struct iov_iter *iter, loff_t offset) 3056 loff_t offset)
2958{ 3057{
2959 struct file *file = iocb->ki_filp; 3058 struct file *file = iocb->ki_filp;
2960 struct inode *inode = file->f_mapping->host; 3059 struct inode *inode = file->f_mapping->host;
@@ -2967,8 +3066,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2967 ext4_io_end_t *io_end = NULL; 3066 ext4_io_end_t *io_end = NULL;
2968 3067
2969 /* Use the old path for reads and writes beyond i_size. */ 3068 /* Use the old path for reads and writes beyond i_size. */
2970 if (rw != WRITE || final_size > inode->i_size) 3069 if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
2971 return ext4_ind_direct_IO(rw, iocb, iter, offset); 3070 return ext4_ind_direct_IO(iocb, iter, offset);
2972 3071
2973 BUG_ON(iocb->private == NULL); 3072 BUG_ON(iocb->private == NULL);
2974 3073
@@ -2977,7 +3076,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2977 * conversion. This also disallows race between truncate() and 3076 * conversion. This also disallows race between truncate() and
2978 * overwrite DIO as i_dio_count needs to be incremented under i_mutex. 3077 * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
2979 */ 3078 */
2980 if (rw == WRITE) 3079 if (iov_iter_rw(iter) == WRITE)
2981 atomic_inc(&inode->i_dio_count); 3080 atomic_inc(&inode->i_dio_count);
2982 3081
2983 /* If we do a overwrite dio, i_mutex locking can be released */ 3082 /* If we do a overwrite dio, i_mutex locking can be released */
@@ -3034,11 +3133,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3034 get_block_func = ext4_get_block_write; 3133 get_block_func = ext4_get_block_write;
3035 dio_flags = DIO_LOCKING; 3134 dio_flags = DIO_LOCKING;
3036 } 3135 }
3136#ifdef CONFIG_EXT4_FS_ENCRYPTION
3137 BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
3138#endif
3037 if (IS_DAX(inode)) 3139 if (IS_DAX(inode))
3038 ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, 3140 ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
3039 ext4_end_io_dio, dio_flags); 3141 ext4_end_io_dio, dio_flags);
3040 else 3142 else
3041 ret = __blockdev_direct_IO(rw, iocb, inode, 3143 ret = __blockdev_direct_IO(iocb, inode,
3042 inode->i_sb->s_bdev, iter, offset, 3144 inode->i_sb->s_bdev, iter, offset,
3043 get_block_func, 3145 get_block_func,
3044 ext4_end_io_dio, NULL, dio_flags); 3146 ext4_end_io_dio, NULL, dio_flags);
@@ -3079,7 +3181,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3079 } 3181 }
3080 3182
3081retake_lock: 3183retake_lock:
3082 if (rw == WRITE) 3184 if (iov_iter_rw(iter) == WRITE)
3083 inode_dio_done(inode); 3185 inode_dio_done(inode);
3084 /* take i_mutex locking again if we do a ovewrite dio */ 3186 /* take i_mutex locking again if we do a ovewrite dio */
3085 if (overwrite) { 3187 if (overwrite) {
@@ -3090,14 +3192,19 @@ retake_lock:
3090 return ret; 3192 return ret;
3091} 3193}
3092 3194
3093static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3195static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3094 struct iov_iter *iter, loff_t offset) 3196 loff_t offset)
3095{ 3197{
3096 struct file *file = iocb->ki_filp; 3198 struct file *file = iocb->ki_filp;
3097 struct inode *inode = file->f_mapping->host; 3199 struct inode *inode = file->f_mapping->host;
3098 size_t count = iov_iter_count(iter); 3200 size_t count = iov_iter_count(iter);
3099 ssize_t ret; 3201 ssize_t ret;
3100 3202
3203#ifdef CONFIG_EXT4_FS_ENCRYPTION
3204 if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
3205 return 0;
3206#endif
3207
3101 /* 3208 /*
3102 * If we are doing data journalling we don't support O_DIRECT 3209 * If we are doing data journalling we don't support O_DIRECT
3103 */ 3210 */
@@ -3108,12 +3215,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
3108 if (ext4_has_inline_data(inode)) 3215 if (ext4_has_inline_data(inode))
3109 return 0; 3216 return 0;
3110 3217
3111 trace_ext4_direct_IO_enter(inode, offset, count, rw); 3218 trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
3112 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3219 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3113 ret = ext4_ext_direct_IO(rw, iocb, iter, offset); 3220 ret = ext4_ext_direct_IO(iocb, iter, offset);
3114 else 3221 else
3115 ret = ext4_ind_direct_IO(rw, iocb, iter, offset); 3222 ret = ext4_ind_direct_IO(iocb, iter, offset);
3116 trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); 3223 trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
3117 return ret; 3224 return ret;
3118} 3225}
3119 3226
@@ -3262,6 +3369,13 @@ static int __ext4_block_zero_page_range(handle_t *handle,
3262 /* Uhhuh. Read error. Complain and punt. */ 3369 /* Uhhuh. Read error. Complain and punt. */
3263 if (!buffer_uptodate(bh)) 3370 if (!buffer_uptodate(bh))
3264 goto unlock; 3371 goto unlock;
3372 if (S_ISREG(inode->i_mode) &&
3373 ext4_encrypted_inode(inode)) {
3374 /* We expect the key to be set. */
3375 BUG_ON(!ext4_has_encryption_key(inode));
3376 BUG_ON(blocksize != PAGE_CACHE_SIZE);
3377 WARN_ON_ONCE(ext4_decrypt_one(inode, page));
3378 }
3265 } 3379 }
3266 if (ext4_should_journal_data(inode)) { 3380 if (ext4_should_journal_data(inode)) {
3267 BUFFER_TRACE(bh, "get write access"); 3381 BUFFER_TRACE(bh, "get write access");
@@ -4091,16 +4205,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4091 4205
4092 if (S_ISREG(inode->i_mode)) { 4206 if (S_ISREG(inode->i_mode)) {
4093 inode->i_op = &ext4_file_inode_operations; 4207 inode->i_op = &ext4_file_inode_operations;
4094 if (test_opt(inode->i_sb, DAX)) 4208 inode->i_fop = &ext4_file_operations;
4095 inode->i_fop = &ext4_dax_file_operations;
4096 else
4097 inode->i_fop = &ext4_file_operations;
4098 ext4_set_aops(inode); 4209 ext4_set_aops(inode);
4099 } else if (S_ISDIR(inode->i_mode)) { 4210 } else if (S_ISDIR(inode->i_mode)) {
4100 inode->i_op = &ext4_dir_inode_operations; 4211 inode->i_op = &ext4_dir_inode_operations;
4101 inode->i_fop = &ext4_dir_operations; 4212 inode->i_fop = &ext4_dir_operations;
4102 } else if (S_ISLNK(inode->i_mode)) { 4213 } else if (S_ISLNK(inode->i_mode)) {
4103 if (ext4_inode_is_fast_symlink(inode)) { 4214 if (ext4_inode_is_fast_symlink(inode) &&
4215 !ext4_encrypted_inode(inode)) {
4104 inode->i_op = &ext4_fast_symlink_inode_operations; 4216 inode->i_op = &ext4_fast_symlink_inode_operations;
4105 nd_terminate_link(ei->i_data, inode->i_size, 4217 nd_terminate_link(ei->i_data, inode->i_size,
4106 sizeof(ei->i_data) - 1); 4218 sizeof(ei->i_data) - 1);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f58a0d106726..2cb9e178d1c5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -8,12 +8,12 @@
8 */ 8 */
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h>
12#include <linux/capability.h> 11#include <linux/capability.h>
13#include <linux/time.h> 12#include <linux/time.h>
14#include <linux/compat.h> 13#include <linux/compat.h>
15#include <linux/mount.h> 14#include <linux/mount.h>
16#include <linux/file.h> 15#include <linux/file.h>
16#include <linux/random.h>
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
@@ -196,6 +196,16 @@ journal_err_out:
196 return err; 196 return err;
197} 197}
198 198
199static int uuid_is_zero(__u8 u[16])
200{
201 int i;
202
203 for (i = 0; i < 16; i++)
204 if (u[i])
205 return 0;
206 return 1;
207}
208
199long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 209long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
200{ 210{
201 struct inode *inode = file_inode(filp); 211 struct inode *inode = file_inode(filp);
@@ -615,7 +625,78 @@ resizefs_out:
615 } 625 }
616 case EXT4_IOC_PRECACHE_EXTENTS: 626 case EXT4_IOC_PRECACHE_EXTENTS:
617 return ext4_ext_precache(inode); 627 return ext4_ext_precache(inode);
628 case EXT4_IOC_SET_ENCRYPTION_POLICY: {
629#ifdef CONFIG_EXT4_FS_ENCRYPTION
630 struct ext4_encryption_policy policy;
631 int err = 0;
632
633 if (copy_from_user(&policy,
634 (struct ext4_encryption_policy __user *)arg,
635 sizeof(policy))) {
636 err = -EFAULT;
637 goto encryption_policy_out;
638 }
618 639
640 err = ext4_process_policy(&policy, inode);
641encryption_policy_out:
642 return err;
643#else
644 return -EOPNOTSUPP;
645#endif
646 }
647 case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
648 int err, err2;
649 struct ext4_sb_info *sbi = EXT4_SB(sb);
650 handle_t *handle;
651
652 if (!ext4_sb_has_crypto(sb))
653 return -EOPNOTSUPP;
654 if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) {
655 err = mnt_want_write_file(filp);
656 if (err)
657 return err;
658 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
659 if (IS_ERR(handle)) {
660 err = PTR_ERR(handle);
661 goto pwsalt_err_exit;
662 }
663 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
664 if (err)
665 goto pwsalt_err_journal;
666 generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
667 err = ext4_handle_dirty_metadata(handle, NULL,
668 sbi->s_sbh);
669 pwsalt_err_journal:
670 err2 = ext4_journal_stop(handle);
671 if (err2 && !err)
672 err = err2;
673 pwsalt_err_exit:
674 mnt_drop_write_file(filp);
675 if (err)
676 return err;
677 }
678 if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
679 16))
680 return -EFAULT;
681 return 0;
682 }
683 case EXT4_IOC_GET_ENCRYPTION_POLICY: {
684#ifdef CONFIG_EXT4_FS_ENCRYPTION
685 struct ext4_encryption_policy policy;
686 int err = 0;
687
688 if (!ext4_encrypted_inode(inode))
689 return -ENOENT;
690 err = ext4_get_policy(inode, &policy);
691 if (err)
692 return err;
693 if (copy_to_user((void *)arg, &policy, sizeof(policy)))
694 return -EFAULT;
695 return 0;
696#else
697 return -EOPNOTSUPP;
698#endif
699 }
619 default: 700 default:
620 return -ENOTTY; 701 return -ENOTTY;
621 } 702 }
@@ -680,6 +761,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
680 case FITRIM: 761 case FITRIM:
681 case EXT4_IOC_RESIZE_FS: 762 case EXT4_IOC_RESIZE_FS:
682 case EXT4_IOC_PRECACHE_EXTENTS: 763 case EXT4_IOC_PRECACHE_EXTENTS:
764 case EXT4_IOC_SET_ENCRYPTION_POLICY:
765 case EXT4_IOC_GET_ENCRYPTION_PWSALT:
766 case EXT4_IOC_GET_ENCRYPTION_POLICY:
683 break; 767 break;
684 default: 768 default:
685 return -ENOIOCTLCMD; 769 return -ENOIOCTLCMD;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28fe71a2904c..ef22cd951c0c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h>
30#include <linux/time.h> 29#include <linux/time.h>
31#include <linux/fcntl.h> 30#include <linux/fcntl.h>
32#include <linux/stat.h> 31#include <linux/stat.h>
@@ -254,8 +253,9 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
254 struct dx_hash_info *hinfo, 253 struct dx_hash_info *hinfo,
255 struct dx_frame *frame); 254 struct dx_frame *frame);
256static void dx_release(struct dx_frame *frames); 255static void dx_release(struct dx_frame *frames);
257static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, 256static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
258 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 257 unsigned blocksize, struct dx_hash_info *hinfo,
258 struct dx_map_entry map[]);
259static void dx_sort_map(struct dx_map_entry *map, unsigned count); 259static void dx_sort_map(struct dx_map_entry *map, unsigned count);
260static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, 260static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
261 struct dx_map_entry *offsets, int count, unsigned blocksize); 261 struct dx_map_entry *offsets, int count, unsigned blocksize);
@@ -586,8 +586,10 @@ struct stats
586 unsigned bcount; 586 unsigned bcount;
587}; 587};
588 588
589static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de, 589static struct stats dx_show_leaf(struct inode *dir,
590 int size, int show_names) 590 struct dx_hash_info *hinfo,
591 struct ext4_dir_entry_2 *de,
592 int size, int show_names)
591{ 593{
592 unsigned names = 0, space = 0; 594 unsigned names = 0, space = 0;
593 char *base = (char *) de; 595 char *base = (char *) de;
@@ -600,12 +602,80 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
600 { 602 {
601 if (show_names) 603 if (show_names)
602 { 604 {
605#ifdef CONFIG_EXT4_FS_ENCRYPTION
606 int len;
607 char *name;
608 struct ext4_str fname_crypto_str
609 = {.name = NULL, .len = 0};
610 struct ext4_fname_crypto_ctx *ctx = NULL;
611 int res;
612
613 name = de->name;
614 len = de->name_len;
615 ctx = ext4_get_fname_crypto_ctx(dir,
616 EXT4_NAME_LEN);
617 if (IS_ERR(ctx)) {
618 printk(KERN_WARNING "Error acquiring"
619 " crypto ctxt--skipping crypto\n");
620 ctx = NULL;
621 }
622 if (ctx == NULL) {
623 /* Directory is not encrypted */
624 ext4fs_dirhash(de->name,
625 de->name_len, &h);
626 printk("%*.s:(U)%x.%u ", len,
627 name, h.hash,
628 (unsigned) ((char *) de
629 - base));
630 } else {
631 /* Directory is encrypted */
632 res = ext4_fname_crypto_alloc_buffer(
633 ctx, de->name_len,
634 &fname_crypto_str);
635 if (res < 0) {
636 printk(KERN_WARNING "Error "
637 "allocating crypto "
638 "buffer--skipping "
639 "crypto\n");
640 ext4_put_fname_crypto_ctx(&ctx);
641 ctx = NULL;
642 }
643 res = ext4_fname_disk_to_usr(ctx, de,
644 &fname_crypto_str);
645 if (res < 0) {
646 printk(KERN_WARNING "Error "
647 "converting filename "
648 "from disk to usr"
649 "\n");
650 name = "??";
651 len = 2;
652 } else {
653 name = fname_crypto_str.name;
654 len = fname_crypto_str.len;
655 }
656 res = ext4_fname_disk_to_hash(ctx, de,
657 &h);
658 if (res < 0) {
659 printk(KERN_WARNING "Error "
660 "converting filename "
661 "from disk to htree"
662 "\n");
663 h.hash = 0xDEADBEEF;
664 }
665 printk("%*.s:(E)%x.%u ", len, name,
666 h.hash, (unsigned) ((char *) de
667 - base));
668 ext4_put_fname_crypto_ctx(&ctx);
669 ext4_fname_crypto_free_buffer(
670 &fname_crypto_str);
671 }
672#else
603 int len = de->name_len; 673 int len = de->name_len;
604 char *name = de->name; 674 char *name = de->name;
605 while (len--) printk("%c", *name++);
606 ext4fs_dirhash(de->name, de->name_len, &h); 675 ext4fs_dirhash(de->name, de->name_len, &h);
607 printk(":%x.%u ", h.hash, 676 printk("%*.s:%x.%u ", len, name, h.hash,
608 (unsigned) ((char *) de - base)); 677 (unsigned) ((char *) de - base));
678#endif
609 } 679 }
610 space += EXT4_DIR_REC_LEN(de->name_len); 680 space += EXT4_DIR_REC_LEN(de->name_len);
611 names++; 681 names++;
@@ -623,7 +693,6 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
623 unsigned count = dx_get_count(entries), names = 0, space = 0, i; 693 unsigned count = dx_get_count(entries), names = 0, space = 0, i;
624 unsigned bcount = 0; 694 unsigned bcount = 0;
625 struct buffer_head *bh; 695 struct buffer_head *bh;
626 int err;
627 printk("%i indexed blocks...\n", count); 696 printk("%i indexed blocks...\n", count);
628 for (i = 0; i < count; i++, entries++) 697 for (i = 0; i < count; i++, entries++)
629 { 698 {
@@ -637,7 +706,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
637 continue; 706 continue;
638 stats = levels? 707 stats = levels?
639 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): 708 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
640 dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); 709 dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
710 bh->b_data, blocksize, 0);
641 names += stats.names; 711 names += stats.names;
642 space += stats.space; 712 space += stats.space;
643 bcount += stats.bcount; 713 bcount += stats.bcount;
@@ -687,8 +757,28 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
687 if (hinfo->hash_version <= DX_HASH_TEA) 757 if (hinfo->hash_version <= DX_HASH_TEA)
688 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; 758 hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
689 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; 759 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
760#ifdef CONFIG_EXT4_FS_ENCRYPTION
761 if (d_name) {
762 struct ext4_fname_crypto_ctx *ctx = NULL;
763 int res;
764
765 /* Check if the directory is encrypted */
766 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
767 if (IS_ERR(ctx)) {
768 ret_err = ERR_PTR(PTR_ERR(ctx));
769 goto fail;
770 }
771 res = ext4_fname_usr_to_hash(ctx, d_name, hinfo);
772 if (res < 0) {
773 ret_err = ERR_PTR(res);
774 goto fail;
775 }
776 ext4_put_fname_crypto_ctx(&ctx);
777 }
778#else
690 if (d_name) 779 if (d_name)
691 ext4fs_dirhash(d_name->name, d_name->len, hinfo); 780 ext4fs_dirhash(d_name->name, d_name->len, hinfo);
781#endif
692 hash = hinfo->hash; 782 hash = hinfo->hash;
693 783
694 if (root->info.unused_flags & 1) { 784 if (root->info.unused_flags & 1) {
@@ -773,6 +863,7 @@ fail:
773 brelse(frame->bh); 863 brelse(frame->bh);
774 frame--; 864 frame--;
775 } 865 }
866
776 if (ret_err == ERR_PTR(ERR_BAD_DX_DIR)) 867 if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
777 ext4_warning(dir->i_sb, 868 ext4_warning(dir->i_sb,
778 "Corrupt dir inode %lu, running e2fsck is " 869 "Corrupt dir inode %lu, running e2fsck is "
@@ -878,6 +969,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
878 struct buffer_head *bh; 969 struct buffer_head *bh;
879 struct ext4_dir_entry_2 *de, *top; 970 struct ext4_dir_entry_2 *de, *top;
880 int err = 0, count = 0; 971 int err = 0, count = 0;
972 struct ext4_fname_crypto_ctx *ctx = NULL;
973 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str;
881 974
882 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", 975 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
883 (unsigned long)block)); 976 (unsigned long)block));
@@ -889,6 +982,24 @@ static int htree_dirblock_to_tree(struct file *dir_file,
889 top = (struct ext4_dir_entry_2 *) ((char *) de + 982 top = (struct ext4_dir_entry_2 *) ((char *) de +
890 dir->i_sb->s_blocksize - 983 dir->i_sb->s_blocksize -
891 EXT4_DIR_REC_LEN(0)); 984 EXT4_DIR_REC_LEN(0));
985#ifdef CONFIG_EXT4_FS_ENCRYPTION
986 /* Check if the directory is encrypted */
987 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
988 if (IS_ERR(ctx)) {
989 err = PTR_ERR(ctx);
990 brelse(bh);
991 return err;
992 }
993 if (ctx != NULL) {
994 err = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
995 &fname_crypto_str);
996 if (err < 0) {
997 ext4_put_fname_crypto_ctx(&ctx);
998 brelse(bh);
999 return err;
1000 }
1001 }
1002#endif
892 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { 1003 for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
893 if (ext4_check_dir_entry(dir, NULL, de, bh, 1004 if (ext4_check_dir_entry(dir, NULL, de, bh,
894 bh->b_data, bh->b_size, 1005 bh->b_data, bh->b_size,
@@ -897,21 +1008,52 @@ static int htree_dirblock_to_tree(struct file *dir_file,
897 /* silently ignore the rest of the block */ 1008 /* silently ignore the rest of the block */
898 break; 1009 break;
899 } 1010 }
1011#ifdef CONFIG_EXT4_FS_ENCRYPTION
1012 err = ext4_fname_disk_to_hash(ctx, de, hinfo);
1013 if (err < 0) {
1014 count = err;
1015 goto errout;
1016 }
1017#else
900 ext4fs_dirhash(de->name, de->name_len, hinfo); 1018 ext4fs_dirhash(de->name, de->name_len, hinfo);
1019#endif
901 if ((hinfo->hash < start_hash) || 1020 if ((hinfo->hash < start_hash) ||
902 ((hinfo->hash == start_hash) && 1021 ((hinfo->hash == start_hash) &&
903 (hinfo->minor_hash < start_minor_hash))) 1022 (hinfo->minor_hash < start_minor_hash)))
904 continue; 1023 continue;
905 if (de->inode == 0) 1024 if (de->inode == 0)
906 continue; 1025 continue;
907 if ((err = ext4_htree_store_dirent(dir_file, 1026 if (ctx == NULL) {
908 hinfo->hash, hinfo->minor_hash, de)) != 0) { 1027 /* Directory is not encrypted */
909 brelse(bh); 1028 tmp_str.name = de->name;
910 return err; 1029 tmp_str.len = de->name_len;
1030 err = ext4_htree_store_dirent(dir_file,
1031 hinfo->hash, hinfo->minor_hash, de,
1032 &tmp_str);
1033 } else {
1034 /* Directory is encrypted */
1035 err = ext4_fname_disk_to_usr(ctx, de,
1036 &fname_crypto_str);
1037 if (err < 0) {
1038 count = err;
1039 goto errout;
1040 }
1041 err = ext4_htree_store_dirent(dir_file,
1042 hinfo->hash, hinfo->minor_hash, de,
1043 &fname_crypto_str);
1044 }
1045 if (err != 0) {
1046 count = err;
1047 goto errout;
911 } 1048 }
912 count++; 1049 count++;
913 } 1050 }
1051errout:
914 brelse(bh); 1052 brelse(bh);
1053#ifdef CONFIG_EXT4_FS_ENCRYPTION
1054 ext4_put_fname_crypto_ctx(&ctx);
1055 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1056#endif
915 return count; 1057 return count;
916} 1058}
917 1059
@@ -935,6 +1077,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
935 int count = 0; 1077 int count = 0;
936 int ret, err; 1078 int ret, err;
937 __u32 hashval; 1079 __u32 hashval;
1080 struct ext4_str tmp_str;
938 1081
939 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", 1082 dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
940 start_hash, start_minor_hash)); 1083 start_hash, start_minor_hash));
@@ -970,14 +1113,22 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
970 /* Add '.' and '..' from the htree header */ 1113 /* Add '.' and '..' from the htree header */
971 if (!start_hash && !start_minor_hash) { 1114 if (!start_hash && !start_minor_hash) {
972 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; 1115 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
973 if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0) 1116 tmp_str.name = de->name;
1117 tmp_str.len = de->name_len;
1118 err = ext4_htree_store_dirent(dir_file, 0, 0,
1119 de, &tmp_str);
1120 if (err != 0)
974 goto errout; 1121 goto errout;
975 count++; 1122 count++;
976 } 1123 }
977 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { 1124 if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
978 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; 1125 de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
979 de = ext4_next_entry(de, dir->i_sb->s_blocksize); 1126 de = ext4_next_entry(de, dir->i_sb->s_blocksize);
980 if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0) 1127 tmp_str.name = de->name;
1128 tmp_str.len = de->name_len;
1129 err = ext4_htree_store_dirent(dir_file, 2, 0,
1130 de, &tmp_str);
1131 if (err != 0)
981 goto errout; 1132 goto errout;
982 count++; 1133 count++;
983 } 1134 }
@@ -1035,17 +1186,33 @@ static inline int search_dirblock(struct buffer_head *bh,
1035 * Create map of hash values, offsets, and sizes, stored at end of block. 1186 * Create map of hash values, offsets, and sizes, stored at end of block.
1036 * Returns number of entries mapped. 1187 * Returns number of entries mapped.
1037 */ 1188 */
1038static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, 1189static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
1039 struct dx_hash_info *hinfo, 1190 unsigned blocksize, struct dx_hash_info *hinfo,
1040 struct dx_map_entry *map_tail) 1191 struct dx_map_entry *map_tail)
1041{ 1192{
1042 int count = 0; 1193 int count = 0;
1043 char *base = (char *) de; 1194 char *base = (char *) de;
1044 struct dx_hash_info h = *hinfo; 1195 struct dx_hash_info h = *hinfo;
1196#ifdef CONFIG_EXT4_FS_ENCRYPTION
1197 struct ext4_fname_crypto_ctx *ctx = NULL;
1198 int err;
1199
1200 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1201 if (IS_ERR(ctx))
1202 return PTR_ERR(ctx);
1203#endif
1045 1204
1046 while ((char *) de < base + blocksize) { 1205 while ((char *) de < base + blocksize) {
1047 if (de->name_len && de->inode) { 1206 if (de->name_len && de->inode) {
1207#ifdef CONFIG_EXT4_FS_ENCRYPTION
1208 err = ext4_fname_disk_to_hash(ctx, de, &h);
1209 if (err < 0) {
1210 ext4_put_fname_crypto_ctx(&ctx);
1211 return err;
1212 }
1213#else
1048 ext4fs_dirhash(de->name, de->name_len, &h); 1214 ext4fs_dirhash(de->name, de->name_len, &h);
1215#endif
1049 map_tail--; 1216 map_tail--;
1050 map_tail->hash = h.hash; 1217 map_tail->hash = h.hash;
1051 map_tail->offs = ((char *) de - base)>>2; 1218 map_tail->offs = ((char *) de - base)>>2;
@@ -1056,6 +1223,9 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
1056 /* XXX: do we need to check rec_len == 0 case? -Chris */ 1223 /* XXX: do we need to check rec_len == 0 case? -Chris */
1057 de = ext4_next_entry(de, blocksize); 1224 de = ext4_next_entry(de, blocksize);
1058 } 1225 }
1226#ifdef CONFIG_EXT4_FS_ENCRYPTION
1227 ext4_put_fname_crypto_ctx(&ctx);
1228#endif
1059 return count; 1229 return count;
1060} 1230}
1061 1231
@@ -1106,57 +1276,107 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1106 * `len <= EXT4_NAME_LEN' is guaranteed by caller. 1276 * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1107 * `de != NULL' is guaranteed by caller. 1277 * `de != NULL' is guaranteed by caller.
1108 */ 1278 */
1109static inline int ext4_match (int len, const char * const name, 1279static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
1110 struct ext4_dir_entry_2 * de) 1280 struct ext4_str *fname_crypto_str,
1281 int len, const char * const name,
1282 struct ext4_dir_entry_2 *de)
1111{ 1283{
1112 if (len != de->name_len) 1284 int res;
1113 return 0; 1285
1114 if (!de->inode) 1286 if (!de->inode)
1115 return 0; 1287 return 0;
1116 return !memcmp(name, de->name, len); 1288
1289#ifdef CONFIG_EXT4_FS_ENCRYPTION
1290 if (ctx) {
1291 /* Directory is encrypted */
1292 res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str);
1293 if (res < 0)
1294 return res;
1295 if (len != res)
1296 return 0;
1297 res = memcmp(name, fname_crypto_str->name, len);
1298 return (res == 0) ? 1 : 0;
1299 }
1300#endif
1301 if (len != de->name_len)
1302 return 0;
1303 res = memcmp(name, de->name, len);
1304 return (res == 0) ? 1 : 0;
1117} 1305}
1118 1306
1119/* 1307/*
1120 * Returns 0 if not found, -1 on failure, and 1 on success 1308 * Returns 0 if not found, -1 on failure, and 1 on success
1121 */ 1309 */
1122int search_dir(struct buffer_head *bh, 1310int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1123 char *search_buf, 1311 struct inode *dir, const struct qstr *d_name,
1124 int buf_size, 1312 unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1125 struct inode *dir,
1126 const struct qstr *d_name,
1127 unsigned int offset,
1128 struct ext4_dir_entry_2 **res_dir)
1129{ 1313{
1130 struct ext4_dir_entry_2 * de; 1314 struct ext4_dir_entry_2 * de;
1131 char * dlimit; 1315 char * dlimit;
1132 int de_len; 1316 int de_len;
1133 const char *name = d_name->name; 1317 const char *name = d_name->name;
1134 int namelen = d_name->len; 1318 int namelen = d_name->len;
1319 struct ext4_fname_crypto_ctx *ctx = NULL;
1320 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
1321 int res;
1322
1323 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1324 if (IS_ERR(ctx))
1325 return -1;
1326
1327 if (ctx != NULL) {
1328 /* Allocate buffer to hold maximum name length */
1329 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1330 &fname_crypto_str);
1331 if (res < 0) {
1332 ext4_put_fname_crypto_ctx(&ctx);
1333 return -1;
1334 }
1335 }
1135 1336
1136 de = (struct ext4_dir_entry_2 *)search_buf; 1337 de = (struct ext4_dir_entry_2 *)search_buf;
1137 dlimit = search_buf + buf_size; 1338 dlimit = search_buf + buf_size;
1138 while ((char *) de < dlimit) { 1339 while ((char *) de < dlimit) {
1139 /* this code is executed quadratically often */ 1340 /* this code is executed quadratically often */
1140 /* do minimal checking `by hand' */ 1341 /* do minimal checking `by hand' */
1342 if ((char *) de + de->name_len <= dlimit) {
1343 res = ext4_match(ctx, &fname_crypto_str, namelen,
1344 name, de);
1345 if (res < 0) {
1346 res = -1;
1347 goto return_result;
1348 }
1349 if (res > 0) {
1350 /* found a match - just to be sure, do
1351 * a full check */
1352 if (ext4_check_dir_entry(dir, NULL, de, bh,
1353 bh->b_data,
1354 bh->b_size, offset)) {
1355 res = -1;
1356 goto return_result;
1357 }
1358 *res_dir = de;
1359 res = 1;
1360 goto return_result;
1361 }
1141 1362
1142 if ((char *) de + namelen <= dlimit &&
1143 ext4_match (namelen, name, de)) {
1144 /* found a match - just to be sure, do a full check */
1145 if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
1146 bh->b_size, offset))
1147 return -1;
1148 *res_dir = de;
1149 return 1;
1150 } 1363 }
1151 /* prevent looping on a bad block */ 1364 /* prevent looping on a bad block */
1152 de_len = ext4_rec_len_from_disk(de->rec_len, 1365 de_len = ext4_rec_len_from_disk(de->rec_len,
1153 dir->i_sb->s_blocksize); 1366 dir->i_sb->s_blocksize);
1154 if (de_len <= 0) 1367 if (de_len <= 0) {
1155 return -1; 1368 res = -1;
1369 goto return_result;
1370 }
1156 offset += de_len; 1371 offset += de_len;
1157 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 1372 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1158 } 1373 }
1159 return 0; 1374
1375 res = 0;
1376return_result:
1377 ext4_put_fname_crypto_ctx(&ctx);
1378 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1379 return res;
1160} 1380}
1161 1381
1162static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, 1382static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
@@ -1345,6 +1565,9 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1345 ext4_lblk_t block; 1565 ext4_lblk_t block;
1346 int retval; 1566 int retval;
1347 1567
1568#ifdef CONFIG_EXT4_FS_ENCRYPTION
1569 *res_dir = NULL;
1570#endif
1348 frame = dx_probe(d_name, dir, &hinfo, frames); 1571 frame = dx_probe(d_name, dir, &hinfo, frames);
1349 if (IS_ERR(frame)) 1572 if (IS_ERR(frame))
1350 return (struct buffer_head *) frame; 1573 return (struct buffer_head *) frame;
@@ -1417,6 +1640,18 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1417 ino); 1640 ino);
1418 return ERR_PTR(-EIO); 1641 return ERR_PTR(-EIO);
1419 } 1642 }
1643 if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
1644 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1645 S_ISLNK(inode->i_mode)) &&
1646 !ext4_is_child_context_consistent_with_parent(dir,
1647 inode)) {
1648 iput(inode);
1649 ext4_warning(inode->i_sb,
1650 "Inconsistent encryption contexts: %lu/%lu\n",
1651 (unsigned long) dir->i_ino,
1652 (unsigned long) inode->i_ino);
1653 return ERR_PTR(-EPERM);
1654 }
1420 } 1655 }
1421 return d_splice_alias(inode, dentry); 1656 return d_splice_alias(inode, dentry);
1422} 1657}
@@ -1541,7 +1776,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1541 1776
1542 /* create map in the end of data2 block */ 1777 /* create map in the end of data2 block */
1543 map = (struct dx_map_entry *) (data2 + blocksize); 1778 map = (struct dx_map_entry *) (data2 + blocksize);
1544 count = dx_make_map((struct ext4_dir_entry_2 *) data1, 1779 count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1,
1545 blocksize, hinfo, map); 1780 blocksize, hinfo, map);
1546 map -= count; 1781 map -= count;
1547 dx_sort_map(map, count); 1782 dx_sort_map(map, count);
@@ -1564,7 +1799,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1564 hash2, split, count-split)); 1799 hash2, split, count-split));
1565 1800
1566 /* Fancy dance to stay within two buffers */ 1801 /* Fancy dance to stay within two buffers */
1567 de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); 1802 de2 = dx_move_dirents(data1, data2, map + split, count - split,
1803 blocksize);
1568 de = dx_pack_dirents(data1, blocksize); 1804 de = dx_pack_dirents(data1, blocksize);
1569 de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - 1805 de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1570 (char *) de, 1806 (char *) de,
@@ -1580,8 +1816,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1580 initialize_dirent_tail(t, blocksize); 1816 initialize_dirent_tail(t, blocksize);
1581 } 1817 }
1582 1818
1583 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); 1819 dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
1584 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); 1820 blocksize, 1));
1821 dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
1822 blocksize, 1));
1585 1823
1586 /* Which block gets the new entry? */ 1824 /* Which block gets the new entry? */
1587 if (hinfo->hash >= hash2) { 1825 if (hinfo->hash >= hash2) {
@@ -1618,15 +1856,48 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1618 int nlen, rlen; 1856 int nlen, rlen;
1619 unsigned int offset = 0; 1857 unsigned int offset = 0;
1620 char *top; 1858 char *top;
1859 struct ext4_fname_crypto_ctx *ctx = NULL;
1860 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
1861 int res;
1862
1863 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1864 if (IS_ERR(ctx))
1865 return -1;
1866
1867 if (ctx != NULL) {
1868 /* Calculate record length needed to store the entry */
1869 res = ext4_fname_crypto_namelen_on_disk(ctx, namelen);
1870 if (res < 0) {
1871 ext4_put_fname_crypto_ctx(&ctx);
1872 return res;
1873 }
1874 reclen = EXT4_DIR_REC_LEN(res);
1875
1876 /* Allocate buffer to hold maximum name length */
1877 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1878 &fname_crypto_str);
1879 if (res < 0) {
1880 ext4_put_fname_crypto_ctx(&ctx);
1881 return -1;
1882 }
1883 }
1621 1884
1622 de = (struct ext4_dir_entry_2 *)buf; 1885 de = (struct ext4_dir_entry_2 *)buf;
1623 top = buf + buf_size - reclen; 1886 top = buf + buf_size - reclen;
1624 while ((char *) de <= top) { 1887 while ((char *) de <= top) {
1625 if (ext4_check_dir_entry(dir, NULL, de, bh, 1888 if (ext4_check_dir_entry(dir, NULL, de, bh,
1626 buf, buf_size, offset)) 1889 buf, buf_size, offset)) {
1627 return -EIO; 1890 res = -EIO;
1628 if (ext4_match(namelen, name, de)) 1891 goto return_result;
1629 return -EEXIST; 1892 }
1893 /* Provide crypto context and crypto buffer to ext4 match */
1894 res = ext4_match(ctx, &fname_crypto_str, namelen, name, de);
1895 if (res < 0)
1896 goto return_result;
1897 if (res > 0) {
1898 res = -EEXIST;
1899 goto return_result;
1900 }
1630 nlen = EXT4_DIR_REC_LEN(de->name_len); 1901 nlen = EXT4_DIR_REC_LEN(de->name_len);
1631 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); 1902 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1632 if ((de->inode ? rlen - nlen : rlen) >= reclen) 1903 if ((de->inode ? rlen - nlen : rlen) >= reclen)
@@ -1634,26 +1905,62 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
1634 de = (struct ext4_dir_entry_2 *)((char *)de + rlen); 1905 de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1635 offset += rlen; 1906 offset += rlen;
1636 } 1907 }
1637 if ((char *) de > top)
1638 return -ENOSPC;
1639 1908
1640 *dest_de = de; 1909 if ((char *) de > top)
1641 return 0; 1910 res = -ENOSPC;
1911 else {
1912 *dest_de = de;
1913 res = 0;
1914 }
1915return_result:
1916 ext4_put_fname_crypto_ctx(&ctx);
1917 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1918 return res;
1642} 1919}
1643 1920
1644void ext4_insert_dentry(struct inode *inode, 1921int ext4_insert_dentry(struct inode *dir,
1645 struct ext4_dir_entry_2 *de, 1922 struct inode *inode,
1646 int buf_size, 1923 struct ext4_dir_entry_2 *de,
1647 const char *name, int namelen) 1924 int buf_size,
1925 const struct qstr *iname,
1926 const char *name, int namelen)
1648{ 1927{
1649 1928
1650 int nlen, rlen; 1929 int nlen, rlen;
1930 struct ext4_fname_crypto_ctx *ctx = NULL;
1931 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
1932 struct ext4_str tmp_str;
1933 int res;
1934
1935 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
1936 if (IS_ERR(ctx))
1937 return -EIO;
1938 /* By default, the input name would be written to the disk */
1939 tmp_str.name = (unsigned char *)name;
1940 tmp_str.len = namelen;
1941 if (ctx != NULL) {
1942 /* Directory is encrypted */
1943 res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
1944 &fname_crypto_str);
1945 if (res < 0) {
1946 ext4_put_fname_crypto_ctx(&ctx);
1947 return -ENOMEM;
1948 }
1949 res = ext4_fname_usr_to_disk(ctx, iname, &fname_crypto_str);
1950 if (res < 0) {
1951 ext4_put_fname_crypto_ctx(&ctx);
1952 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1953 return res;
1954 }
1955 tmp_str.name = fname_crypto_str.name;
1956 tmp_str.len = fname_crypto_str.len;
1957 }
1651 1958
1652 nlen = EXT4_DIR_REC_LEN(de->name_len); 1959 nlen = EXT4_DIR_REC_LEN(de->name_len);
1653 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size); 1960 rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
1654 if (de->inode) { 1961 if (de->inode) {
1655 struct ext4_dir_entry_2 *de1 = 1962 struct ext4_dir_entry_2 *de1 =
1656 (struct ext4_dir_entry_2 *)((char *)de + nlen); 1963 (struct ext4_dir_entry_2 *)((char *)de + nlen);
1657 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size); 1964 de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
1658 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size); 1965 de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
1659 de = de1; 1966 de = de1;
@@ -1661,9 +1968,14 @@ void ext4_insert_dentry(struct inode *inode,
1661 de->file_type = EXT4_FT_UNKNOWN; 1968 de->file_type = EXT4_FT_UNKNOWN;
1662 de->inode = cpu_to_le32(inode->i_ino); 1969 de->inode = cpu_to_le32(inode->i_ino);
1663 ext4_set_de_type(inode->i_sb, de, inode->i_mode); 1970 ext4_set_de_type(inode->i_sb, de, inode->i_mode);
1664 de->name_len = namelen; 1971 de->name_len = tmp_str.len;
1665 memcpy(de->name, name, namelen); 1972
1973 memcpy(de->name, tmp_str.name, tmp_str.len);
1974 ext4_put_fname_crypto_ctx(&ctx);
1975 ext4_fname_crypto_free_buffer(&fname_crypto_str);
1976 return 0;
1666} 1977}
1978
1667/* 1979/*
1668 * Add a new entry into a directory (leaf) block. If de is non-NULL, 1980 * Add a new entry into a directory (leaf) block. If de is non-NULL,
1669 * it points to a directory entry which is guaranteed to be large 1981 * it points to a directory entry which is guaranteed to be large
@@ -1700,8 +2012,12 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1700 return err; 2012 return err;
1701 } 2013 }
1702 2014
1703 /* By now the buffer is marked for journaling */ 2015 /* By now the buffer is marked for journaling. Due to crypto operations,
1704 ext4_insert_dentry(inode, de, blocksize, name, namelen); 2016 * the following function call may fail */
2017 err = ext4_insert_dentry(dir, inode, de, blocksize, &dentry->d_name,
2018 name, namelen);
2019 if (err < 0)
2020 return err;
1705 2021
1706 /* 2022 /*
1707 * XXX shouldn't update any times until successful 2023 * XXX shouldn't update any times until successful
@@ -1733,8 +2049,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1733 struct inode *inode, struct buffer_head *bh) 2049 struct inode *inode, struct buffer_head *bh)
1734{ 2050{
1735 struct inode *dir = dentry->d_parent->d_inode; 2051 struct inode *dir = dentry->d_parent->d_inode;
2052#ifdef CONFIG_EXT4_FS_ENCRYPTION
2053 struct ext4_fname_crypto_ctx *ctx = NULL;
2054 int res;
2055#else
1736 const char *name = dentry->d_name.name; 2056 const char *name = dentry->d_name.name;
1737 int namelen = dentry->d_name.len; 2057 int namelen = dentry->d_name.len;
2058#endif
1738 struct buffer_head *bh2; 2059 struct buffer_head *bh2;
1739 struct dx_root *root; 2060 struct dx_root *root;
1740 struct dx_frame frames[2], *frame; 2061 struct dx_frame frames[2], *frame;
@@ -1748,7 +2069,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1748 struct dx_hash_info hinfo; 2069 struct dx_hash_info hinfo;
1749 ext4_lblk_t block; 2070 ext4_lblk_t block;
1750 struct fake_dirent *fde; 2071 struct fake_dirent *fde;
1751 int csum_size = 0; 2072 int csum_size = 0;
2073
2074#ifdef CONFIG_EXT4_FS_ENCRYPTION
2075 ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
2076 if (IS_ERR(ctx))
2077 return PTR_ERR(ctx);
2078#endif
1752 2079
1753 if (ext4_has_metadata_csum(inode->i_sb)) 2080 if (ext4_has_metadata_csum(inode->i_sb))
1754 csum_size = sizeof(struct ext4_dir_entry_tail); 2081 csum_size = sizeof(struct ext4_dir_entry_tail);
@@ -1815,7 +2142,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1815 if (hinfo.hash_version <= DX_HASH_TEA) 2142 if (hinfo.hash_version <= DX_HASH_TEA)
1816 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; 2143 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1817 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; 2144 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2145#ifdef CONFIG_EXT4_FS_ENCRYPTION
2146 res = ext4_fname_usr_to_hash(ctx, &dentry->d_name, &hinfo);
2147 if (res < 0) {
2148 ext4_put_fname_crypto_ctx(&ctx);
2149 ext4_mark_inode_dirty(handle, dir);
2150 brelse(bh);
2151 return res;
2152 }
2153 ext4_put_fname_crypto_ctx(&ctx);
2154#else
1818 ext4fs_dirhash(name, namelen, &hinfo); 2155 ext4fs_dirhash(name, namelen, &hinfo);
2156#endif
1819 memset(frames, 0, sizeof(frames)); 2157 memset(frames, 0, sizeof(frames));
1820 frame = frames; 2158 frame = frames;
1821 frame->entries = entries; 2159 frame->entries = entries;
@@ -1865,7 +2203,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1865 struct inode *inode) 2203 struct inode *inode)
1866{ 2204{
1867 struct inode *dir = dentry->d_parent->d_inode; 2205 struct inode *dir = dentry->d_parent->d_inode;
1868 struct buffer_head *bh; 2206 struct buffer_head *bh = NULL;
1869 struct ext4_dir_entry_2 *de; 2207 struct ext4_dir_entry_2 *de;
1870 struct ext4_dir_entry_tail *t; 2208 struct ext4_dir_entry_tail *t;
1871 struct super_block *sb; 2209 struct super_block *sb;
@@ -1889,14 +2227,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1889 return retval; 2227 return retval;
1890 if (retval == 1) { 2228 if (retval == 1) {
1891 retval = 0; 2229 retval = 0;
1892 return retval; 2230 goto out;
1893 } 2231 }
1894 } 2232 }
1895 2233
1896 if (is_dx(dir)) { 2234 if (is_dx(dir)) {
1897 retval = ext4_dx_add_entry(handle, dentry, inode); 2235 retval = ext4_dx_add_entry(handle, dentry, inode);
1898 if (!retval || (retval != ERR_BAD_DX_DIR)) 2236 if (!retval || (retval != ERR_BAD_DX_DIR))
1899 return retval; 2237 goto out;
1900 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); 2238 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
1901 dx_fallback++; 2239 dx_fallback++;
1902 ext4_mark_inode_dirty(handle, dir); 2240 ext4_mark_inode_dirty(handle, dir);
@@ -1908,14 +2246,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1908 return PTR_ERR(bh); 2246 return PTR_ERR(bh);
1909 2247
1910 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 2248 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1911 if (retval != -ENOSPC) { 2249 if (retval != -ENOSPC)
1912 brelse(bh); 2250 goto out;
1913 return retval;
1914 }
1915 2251
1916 if (blocks == 1 && !dx_fallback && 2252 if (blocks == 1 && !dx_fallback &&
1917 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) 2253 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
1918 return make_indexed_dir(handle, dentry, inode, bh); 2254 retval = make_indexed_dir(handle, dentry, inode, bh);
2255 bh = NULL; /* make_indexed_dir releases bh */
2256 goto out;
2257 }
1919 brelse(bh); 2258 brelse(bh);
1920 } 2259 }
1921 bh = ext4_append(handle, dir, &block); 2260 bh = ext4_append(handle, dir, &block);
@@ -1931,6 +2270,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1931 } 2270 }
1932 2271
1933 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 2272 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
2273out:
1934 brelse(bh); 2274 brelse(bh);
1935 if (retval == 0) 2275 if (retval == 0)
1936 ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); 2276 ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -2235,12 +2575,22 @@ retry:
2235 err = PTR_ERR(inode); 2575 err = PTR_ERR(inode);
2236 if (!IS_ERR(inode)) { 2576 if (!IS_ERR(inode)) {
2237 inode->i_op = &ext4_file_inode_operations; 2577 inode->i_op = &ext4_file_inode_operations;
2238 if (test_opt(inode->i_sb, DAX)) 2578 inode->i_fop = &ext4_file_operations;
2239 inode->i_fop = &ext4_dax_file_operations;
2240 else
2241 inode->i_fop = &ext4_file_operations;
2242 ext4_set_aops(inode); 2579 ext4_set_aops(inode);
2243 err = ext4_add_nondir(handle, dentry, inode); 2580 err = 0;
2581#ifdef CONFIG_EXT4_FS_ENCRYPTION
2582 if (!err && (ext4_encrypted_inode(dir) ||
2583 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)))) {
2584 err = ext4_inherit_context(dir, inode);
2585 if (err) {
2586 clear_nlink(inode);
2587 unlock_new_inode(inode);
2588 iput(inode);
2589 }
2590 }
2591#endif
2592 if (!err)
2593 err = ext4_add_nondir(handle, dentry, inode);
2244 if (!err && IS_DIRSYNC(dir)) 2594 if (!err && IS_DIRSYNC(dir))
2245 ext4_handle_sync(handle); 2595 ext4_handle_sync(handle);
2246 } 2596 }
@@ -2302,10 +2652,7 @@ retry:
2302 err = PTR_ERR(inode); 2652 err = PTR_ERR(inode);
2303 if (!IS_ERR(inode)) { 2653 if (!IS_ERR(inode)) {
2304 inode->i_op = &ext4_file_inode_operations; 2654 inode->i_op = &ext4_file_inode_operations;
2305 if (test_opt(inode->i_sb, DAX)) 2655 inode->i_fop = &ext4_file_operations;
2306 inode->i_fop = &ext4_dax_file_operations;
2307 else
2308 inode->i_fop = &ext4_file_operations;
2309 ext4_set_aops(inode); 2656 ext4_set_aops(inode);
2310 d_tmpfile(dentry, inode); 2657 d_tmpfile(dentry, inode);
2311 err = ext4_orphan_add(handle, inode); 2658 err = ext4_orphan_add(handle, inode);
@@ -2424,6 +2771,14 @@ retry:
2424 err = ext4_init_new_dir(handle, dir, inode); 2771 err = ext4_init_new_dir(handle, dir, inode);
2425 if (err) 2772 if (err)
2426 goto out_clear_inode; 2773 goto out_clear_inode;
2774#ifdef CONFIG_EXT4_FS_ENCRYPTION
2775 if (ext4_encrypted_inode(dir) ||
2776 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) {
2777 err = ext4_inherit_context(dir, inode);
2778 if (err)
2779 goto out_clear_inode;
2780 }
2781#endif
2427 err = ext4_mark_inode_dirty(handle, inode); 2782 err = ext4_mark_inode_dirty(handle, inode);
2428 if (!err) 2783 if (!err)
2429 err = ext4_add_entry(handle, dentry, inode); 2784 err = ext4_add_entry(handle, dentry, inode);
@@ -2456,7 +2811,7 @@ out_stop:
2456/* 2811/*
2457 * routine to check that the specified directory is empty (for rmdir) 2812 * routine to check that the specified directory is empty (for rmdir)
2458 */ 2813 */
2459static int empty_dir(struct inode *inode) 2814int ext4_empty_dir(struct inode *inode)
2460{ 2815{
2461 unsigned int offset; 2816 unsigned int offset;
2462 struct buffer_head *bh; 2817 struct buffer_head *bh;
@@ -2724,7 +3079,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2724 goto end_rmdir; 3079 goto end_rmdir;
2725 3080
2726 retval = -ENOTEMPTY; 3081 retval = -ENOTEMPTY;
2727 if (!empty_dir(inode)) 3082 if (!ext4_empty_dir(inode))
2728 goto end_rmdir; 3083 goto end_rmdir;
2729 3084
2730 handle = ext4_journal_start(dir, EXT4_HT_DIR, 3085 handle = ext4_journal_start(dir, EXT4_HT_DIR,
@@ -2834,16 +3189,25 @@ static int ext4_symlink(struct inode *dir,
2834{ 3189{
2835 handle_t *handle; 3190 handle_t *handle;
2836 struct inode *inode; 3191 struct inode *inode;
2837 int l, err, retries = 0; 3192 int err, len = strlen(symname);
2838 int credits; 3193 int credits;
2839 3194 bool encryption_required;
2840 l = strlen(symname)+1; 3195 struct ext4_str disk_link;
2841 if (l > dir->i_sb->s_blocksize) 3196 struct ext4_encrypted_symlink_data *sd = NULL;
3197
3198 disk_link.len = len + 1;
3199 disk_link.name = (char *) symname;
3200
3201 encryption_required = (ext4_encrypted_inode(dir) ||
3202 DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
3203 if (encryption_required)
3204 disk_link.len = encrypted_symlink_data_len(len) + 1;
3205 if (disk_link.len > dir->i_sb->s_blocksize)
2842 return -ENAMETOOLONG; 3206 return -ENAMETOOLONG;
2843 3207
2844 dquot_initialize(dir); 3208 dquot_initialize(dir);
2845 3209
2846 if (l > EXT4_N_BLOCKS * 4) { 3210 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
2847 /* 3211 /*
2848 * For non-fast symlinks, we just allocate inode and put it on 3212 * For non-fast symlinks, we just allocate inode and put it on
2849 * orphan list in the first transaction => we need bitmap, 3213 * orphan list in the first transaction => we need bitmap,
@@ -2862,16 +3226,49 @@ static int ext4_symlink(struct inode *dir,
2862 credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 3226 credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2863 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; 3227 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
2864 } 3228 }
2865retry: 3229
2866 inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, 3230 inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
2867 &dentry->d_name, 0, NULL, 3231 &dentry->d_name, 0, NULL,
2868 EXT4_HT_DIR, credits); 3232 EXT4_HT_DIR, credits);
2869 handle = ext4_journal_current_handle(); 3233 handle = ext4_journal_current_handle();
2870 err = PTR_ERR(inode); 3234 if (IS_ERR(inode)) {
2871 if (IS_ERR(inode)) 3235 if (handle)
2872 goto out_stop; 3236 ext4_journal_stop(handle);
3237 return PTR_ERR(inode);
3238 }
3239
3240 if (encryption_required) {
3241 struct ext4_fname_crypto_ctx *ctx = NULL;
3242 struct qstr istr;
3243 struct ext4_str ostr;
3244
3245 sd = kzalloc(disk_link.len, GFP_NOFS);
3246 if (!sd) {
3247 err = -ENOMEM;
3248 goto err_drop_inode;
3249 }
3250 err = ext4_inherit_context(dir, inode);
3251 if (err)
3252 goto err_drop_inode;
3253 ctx = ext4_get_fname_crypto_ctx(inode,
3254 inode->i_sb->s_blocksize);
3255 if (IS_ERR_OR_NULL(ctx)) {
3256 /* We just set the policy, so ctx should not be NULL */
3257 err = (ctx == NULL) ? -EIO : PTR_ERR(ctx);
3258 goto err_drop_inode;
3259 }
3260 istr.name = (const unsigned char *) symname;
3261 istr.len = len;
3262 ostr.name = sd->encrypted_path;
3263 err = ext4_fname_usr_to_disk(ctx, &istr, &ostr);
3264 ext4_put_fname_crypto_ctx(&ctx);
3265 if (err < 0)
3266 goto err_drop_inode;
3267 sd->len = cpu_to_le16(ostr.len);
3268 disk_link.name = (char *) sd;
3269 }
2873 3270
2874 if (l > EXT4_N_BLOCKS * 4) { 3271 if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
2875 inode->i_op = &ext4_symlink_inode_operations; 3272 inode->i_op = &ext4_symlink_inode_operations;
2876 ext4_set_aops(inode); 3273 ext4_set_aops(inode);
2877 /* 3274 /*
@@ -2887,9 +3284,10 @@ retry:
2887 drop_nlink(inode); 3284 drop_nlink(inode);
2888 err = ext4_orphan_add(handle, inode); 3285 err = ext4_orphan_add(handle, inode);
2889 ext4_journal_stop(handle); 3286 ext4_journal_stop(handle);
3287 handle = NULL;
2890 if (err) 3288 if (err)
2891 goto err_drop_inode; 3289 goto err_drop_inode;
2892 err = __page_symlink(inode, symname, l, 1); 3290 err = __page_symlink(inode, disk_link.name, disk_link.len, 1);
2893 if (err) 3291 if (err)
2894 goto err_drop_inode; 3292 goto err_drop_inode;
2895 /* 3293 /*
@@ -2901,34 +3299,37 @@ retry:
2901 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); 3299 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2902 if (IS_ERR(handle)) { 3300 if (IS_ERR(handle)) {
2903 err = PTR_ERR(handle); 3301 err = PTR_ERR(handle);
3302 handle = NULL;
2904 goto err_drop_inode; 3303 goto err_drop_inode;
2905 } 3304 }
2906 set_nlink(inode, 1); 3305 set_nlink(inode, 1);
2907 err = ext4_orphan_del(handle, inode); 3306 err = ext4_orphan_del(handle, inode);
2908 if (err) { 3307 if (err)
2909 ext4_journal_stop(handle);
2910 clear_nlink(inode);
2911 goto err_drop_inode; 3308 goto err_drop_inode;
2912 }
2913 } else { 3309 } else {
2914 /* clear the extent format for fast symlink */ 3310 /* clear the extent format for fast symlink */
2915 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 3311 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
2916 inode->i_op = &ext4_fast_symlink_inode_operations; 3312 inode->i_op = encryption_required ?
2917 memcpy((char *)&EXT4_I(inode)->i_data, symname, l); 3313 &ext4_symlink_inode_operations :
2918 inode->i_size = l-1; 3314 &ext4_fast_symlink_inode_operations;
3315 memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3316 disk_link.len);
3317 inode->i_size = disk_link.len - 1;
2919 } 3318 }
2920 EXT4_I(inode)->i_disksize = inode->i_size; 3319 EXT4_I(inode)->i_disksize = inode->i_size;
2921 err = ext4_add_nondir(handle, dentry, inode); 3320 err = ext4_add_nondir(handle, dentry, inode);
2922 if (!err && IS_DIRSYNC(dir)) 3321 if (!err && IS_DIRSYNC(dir))
2923 ext4_handle_sync(handle); 3322 ext4_handle_sync(handle);
2924 3323
2925out_stop:
2926 if (handle) 3324 if (handle)
2927 ext4_journal_stop(handle); 3325 ext4_journal_stop(handle);
2928 if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) 3326 kfree(sd);
2929 goto retry;
2930 return err; 3327 return err;
2931err_drop_inode: 3328err_drop_inode:
3329 if (handle)
3330 ext4_journal_stop(handle);
3331 kfree(sd);
3332 clear_nlink(inode);
2932 unlock_new_inode(inode); 3333 unlock_new_inode(inode);
2933 iput(inode); 3334 iput(inode);
2934 return err; 3335 return err;
@@ -2943,7 +3344,9 @@ static int ext4_link(struct dentry *old_dentry,
2943 3344
2944 if (inode->i_nlink >= EXT4_LINK_MAX) 3345 if (inode->i_nlink >= EXT4_LINK_MAX)
2945 return -EMLINK; 3346 return -EMLINK;
2946 3347 if (ext4_encrypted_inode(dir) &&
3348 !ext4_is_child_context_consistent_with_parent(dir, inode))
3349 return -EPERM;
2947 dquot_initialize(dir); 3350 dquot_initialize(dir);
2948 3351
2949retry: 3352retry:
@@ -3244,6 +3647,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3244 if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) 3647 if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3245 goto end_rename; 3648 goto end_rename;
3246 3649
3650 if ((old.dir != new.dir) &&
3651 ext4_encrypted_inode(new.dir) &&
3652 !ext4_is_child_context_consistent_with_parent(new.dir,
3653 old.inode)) {
3654 retval = -EPERM;
3655 goto end_rename;
3656 }
3657
3247 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3658 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3248 &new.de, &new.inlined); 3659 &new.de, &new.inlined);
3249 if (IS_ERR(new.bh)) { 3660 if (IS_ERR(new.bh)) {
@@ -3264,12 +3675,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3264 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); 3675 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3265 if (!(flags & RENAME_WHITEOUT)) { 3676 if (!(flags & RENAME_WHITEOUT)) {
3266 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits); 3677 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3267 if (IS_ERR(handle)) 3678 if (IS_ERR(handle)) {
3268 return PTR_ERR(handle); 3679 retval = PTR_ERR(handle);
3680 handle = NULL;
3681 goto end_rename;
3682 }
3269 } else { 3683 } else {
3270 whiteout = ext4_whiteout_for_rename(&old, credits, &handle); 3684 whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
3271 if (IS_ERR(whiteout)) 3685 if (IS_ERR(whiteout)) {
3272 return PTR_ERR(whiteout); 3686 retval = PTR_ERR(whiteout);
3687 whiteout = NULL;
3688 goto end_rename;
3689 }
3273 } 3690 }
3274 3691
3275 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) 3692 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
@@ -3278,7 +3695,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3278 if (S_ISDIR(old.inode->i_mode)) { 3695 if (S_ISDIR(old.inode->i_mode)) {
3279 if (new.inode) { 3696 if (new.inode) {
3280 retval = -ENOTEMPTY; 3697 retval = -ENOTEMPTY;
3281 if (!empty_dir(new.inode)) 3698 if (!ext4_empty_dir(new.inode))
3282 goto end_rename; 3699 goto end_rename;
3283 } else { 3700 } else {
3284 retval = -EMLINK; 3701 retval = -EMLINK;
@@ -3352,8 +3769,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3352 3769
3353 ext4_dec_count(handle, old.dir); 3770 ext4_dec_count(handle, old.dir);
3354 if (new.inode) { 3771 if (new.inode) {
3355 /* checked empty_dir above, can't have another parent, 3772 /* checked ext4_empty_dir above, can't have another
3356 * ext4_dec_count() won't work for many-linked dirs */ 3773 * parent, ext4_dec_count() won't work for many-linked
3774 * dirs */
3357 clear_nlink(new.inode); 3775 clear_nlink(new.inode);
3358 } else { 3776 } else {
3359 ext4_inc_count(handle, new.dir); 3777 ext4_inc_count(handle, new.dir);
@@ -3433,8 +3851,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3433 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, 3851 handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
3434 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + 3852 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3435 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); 3853 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
3436 if (IS_ERR(handle)) 3854 if (IS_ERR(handle)) {
3437 return PTR_ERR(handle); 3855 retval = PTR_ERR(handle);
3856 handle = NULL;
3857 goto end_rename;
3858 }
3438 3859
3439 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) 3860 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3440 ext4_handle_sync(handle); 3861 ext4_handle_sync(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a2541a9ba..5765f88b3904 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -8,7 +8,6 @@
8 8
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/jbd2.h>
12#include <linux/highuid.h> 11#include <linux/highuid.h>
13#include <linux/pagemap.h> 12#include <linux/pagemap.h>
14#include <linux/quotaops.h> 13#include <linux/quotaops.h>
@@ -18,14 +17,12 @@
18#include <linux/pagevec.h> 17#include <linux/pagevec.h>
19#include <linux/mpage.h> 18#include <linux/mpage.h>
20#include <linux/namei.h> 19#include <linux/namei.h>
21#include <linux/aio.h>
22#include <linux/uio.h> 20#include <linux/uio.h>
23#include <linux/bio.h> 21#include <linux/bio.h>
24#include <linux/workqueue.h> 22#include <linux/workqueue.h>
25#include <linux/kernel.h> 23#include <linux/kernel.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/mm.h> 25#include <linux/mm.h>
28#include <linux/ratelimit.h>
29 26
30#include "ext4_jbd2.h" 27#include "ext4_jbd2.h"
31#include "xattr.h" 28#include "xattr.h"
@@ -69,6 +66,10 @@ static void ext4_finish_bio(struct bio *bio)
69 66
70 bio_for_each_segment_all(bvec, bio, i) { 67 bio_for_each_segment_all(bvec, bio, i) {
71 struct page *page = bvec->bv_page; 68 struct page *page = bvec->bv_page;
69#ifdef CONFIG_EXT4_FS_ENCRYPTION
70 struct page *data_page = NULL;
71 struct ext4_crypto_ctx *ctx = NULL;
72#endif
72 struct buffer_head *bh, *head; 73 struct buffer_head *bh, *head;
73 unsigned bio_start = bvec->bv_offset; 74 unsigned bio_start = bvec->bv_offset;
74 unsigned bio_end = bio_start + bvec->bv_len; 75 unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,6 +79,15 @@ static void ext4_finish_bio(struct bio *bio)
78 if (!page) 79 if (!page)
79 continue; 80 continue;
80 81
82#ifdef CONFIG_EXT4_FS_ENCRYPTION
83 if (!page->mapping) {
84 /* The bounce data pages are unmapped. */
85 data_page = page;
86 ctx = (struct ext4_crypto_ctx *)page_private(data_page);
87 page = ctx->control_page;
88 }
89#endif
90
81 if (error) { 91 if (error) {
82 SetPageError(page); 92 SetPageError(page);
83 set_bit(AS_EIO, &page->mapping->flags); 93 set_bit(AS_EIO, &page->mapping->flags);
@@ -102,8 +112,13 @@ static void ext4_finish_bio(struct bio *bio)
102 } while ((bh = bh->b_this_page) != head); 112 } while ((bh = bh->b_this_page) != head);
103 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); 113 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
104 local_irq_restore(flags); 114 local_irq_restore(flags);
105 if (!under_io) 115 if (!under_io) {
116#ifdef CONFIG_EXT4_FS_ENCRYPTION
117 if (ctx)
118 ext4_restore_control_page(data_page);
119#endif
106 end_page_writeback(page); 120 end_page_writeback(page);
121 }
107 } 122 }
108} 123}
109 124
@@ -378,6 +393,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
378 393
379static int io_submit_add_bh(struct ext4_io_submit *io, 394static int io_submit_add_bh(struct ext4_io_submit *io,
380 struct inode *inode, 395 struct inode *inode,
396 struct page *page,
381 struct buffer_head *bh) 397 struct buffer_head *bh)
382{ 398{
383 int ret; 399 int ret;
@@ -391,7 +407,7 @@ submit_and_retry:
391 if (ret) 407 if (ret)
392 return ret; 408 return ret;
393 } 409 }
394 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 410 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
395 if (ret != bh->b_size) 411 if (ret != bh->b_size)
396 goto submit_and_retry; 412 goto submit_and_retry;
397 io->io_next_block++; 413 io->io_next_block++;
@@ -404,6 +420,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
404 struct writeback_control *wbc, 420 struct writeback_control *wbc,
405 bool keep_towrite) 421 bool keep_towrite)
406{ 422{
423 struct page *data_page = NULL;
407 struct inode *inode = page->mapping->host; 424 struct inode *inode = page->mapping->host;
408 unsigned block_start, blocksize; 425 unsigned block_start, blocksize;
409 struct buffer_head *bh, *head; 426 struct buffer_head *bh, *head;
@@ -463,19 +480,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
463 set_buffer_async_write(bh); 480 set_buffer_async_write(bh);
464 } while ((bh = bh->b_this_page) != head); 481 } while ((bh = bh->b_this_page) != head);
465 482
466 /* Now submit buffers to write */
467 bh = head = page_buffers(page); 483 bh = head = page_buffers(page);
484
485 if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
486 data_page = ext4_encrypt(inode, page);
487 if (IS_ERR(data_page)) {
488 ret = PTR_ERR(data_page);
489 data_page = NULL;
490 goto out;
491 }
492 }
493
494 /* Now submit buffers to write */
468 do { 495 do {
469 if (!buffer_async_write(bh)) 496 if (!buffer_async_write(bh))
470 continue; 497 continue;
471 ret = io_submit_add_bh(io, inode, bh); 498 ret = io_submit_add_bh(io, inode,
499 data_page ? data_page : page, bh);
472 if (ret) { 500 if (ret) {
473 /* 501 /*
474 * We only get here on ENOMEM. Not much else 502 * We only get here on ENOMEM. Not much else
475 * we can do but mark the page as dirty, and 503 * we can do but mark the page as dirty, and
476 * better luck next time. 504 * better luck next time.
477 */ 505 */
478 redirty_page_for_writepage(wbc, page);
479 break; 506 break;
480 } 507 }
481 nr_submitted++; 508 nr_submitted++;
@@ -484,6 +511,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
484 511
485 /* Error stopped previous loop? Clean up buffers... */ 512 /* Error stopped previous loop? Clean up buffers... */
486 if (ret) { 513 if (ret) {
514 out:
515 if (data_page)
516 ext4_restore_control_page(data_page);
517 printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
518 redirty_page_for_writepage(wbc, page);
487 do { 519 do {
488 clear_buffer_async_write(bh); 520 clear_buffer_async_write(bh);
489 bh = bh->b_this_page; 521 bh = bh->b_this_page;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
new file mode 100644
index 000000000000..171b9ac4b45e
--- /dev/null
+++ b/fs/ext4/readpage.c
@@ -0,0 +1,328 @@
1/*
2 * linux/fs/ext4/readpage.c
3 *
4 * Copyright (C) 2002, Linus Torvalds.
5 * Copyright (C) 2015, Google, Inc.
6 *
7 * This was originally taken from fs/mpage.c
8 *
9 * The intent is the ext4_mpage_readpages() function here is intended
10 * to replace mpage_readpages() in the general case, not just for
11 * encrypted files. It has some limitations (see below), where it
12 * will fall back to read_block_full_page(), but these limitations
13 * should only be hit when page_size != block_size.
14 *
15 * This will allow us to attach a callback function to support ext4
16 * encryption.
17 *
18 * If anything unusual happens, such as:
19 *
20 * - encountering a page which has buffers
21 * - encountering a page which has a non-hole after a hole
22 * - encountering a page with non-contiguous blocks
23 *
24 * then this code just gives up and calls the buffer_head-based read function.
25 * It does handle a page which has holes at the end - that is a common case:
26 * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
27 *
28 */
29
30#include <linux/kernel.h>
31#include <linux/export.h>
32#include <linux/mm.h>
33#include <linux/kdev_t.h>
34#include <linux/gfp.h>
35#include <linux/bio.h>
36#include <linux/fs.h>
37#include <linux/buffer_head.h>
38#include <linux/blkdev.h>
39#include <linux/highmem.h>
40#include <linux/prefetch.h>
41#include <linux/mpage.h>
42#include <linux/writeback.h>
43#include <linux/backing-dev.h>
44#include <linux/pagevec.h>
45#include <linux/cleancache.h>
46
47#include "ext4.h"
48
49/*
50 * Call ext4_decrypt on every single page, reusing the encryption
51 * context.
52 */
53static void completion_pages(struct work_struct *work)
54{
55#ifdef CONFIG_EXT4_FS_ENCRYPTION
56 struct ext4_crypto_ctx *ctx =
57 container_of(work, struct ext4_crypto_ctx, work);
58 struct bio *bio = ctx->bio;
59 struct bio_vec *bv;
60 int i;
61
62 bio_for_each_segment_all(bv, bio, i) {
63 struct page *page = bv->bv_page;
64
65 int ret = ext4_decrypt(ctx, page);
66 if (ret) {
67 WARN_ON_ONCE(1);
68 SetPageError(page);
69 } else
70 SetPageUptodate(page);
71 unlock_page(page);
72 }
73 ext4_release_crypto_ctx(ctx);
74 bio_put(bio);
75#else
76 BUG();
77#endif
78}
79
80static inline bool ext4_bio_encrypted(struct bio *bio)
81{
82#ifdef CONFIG_EXT4_FS_ENCRYPTION
83 return unlikely(bio->bi_private != NULL);
84#else
85 return false;
86#endif
87}
88
89/*
90 * I/O completion handler for multipage BIOs.
91 *
92 * The mpage code never puts partial pages into a BIO (except for end-of-file).
93 * If a page does not map to a contiguous run of blocks then it simply falls
94 * back to block_read_full_page().
95 *
96 * Why is this? If a page's completion depends on a number of different BIOs
97 * which can complete in any order (or at the same time) then determining the
98 * status of that page is hard. See end_buffer_async_read() for the details.
99 * There is no point in duplicating all that complexity.
100 */
101static void mpage_end_io(struct bio *bio, int err)
102{
103 struct bio_vec *bv;
104 int i;
105
106 if (ext4_bio_encrypted(bio)) {
107 struct ext4_crypto_ctx *ctx = bio->bi_private;
108
109 if (err) {
110 ext4_release_crypto_ctx(ctx);
111 } else {
112 INIT_WORK(&ctx->work, completion_pages);
113 ctx->bio = bio;
114 queue_work(ext4_read_workqueue, &ctx->work);
115 return;
116 }
117 }
118 bio_for_each_segment_all(bv, bio, i) {
119 struct page *page = bv->bv_page;
120
121 if (!err) {
122 SetPageUptodate(page);
123 } else {
124 ClearPageUptodate(page);
125 SetPageError(page);
126 }
127 unlock_page(page);
128 }
129
130 bio_put(bio);
131}
132
133int ext4_mpage_readpages(struct address_space *mapping,
134 struct list_head *pages, struct page *page,
135 unsigned nr_pages)
136{
137 struct bio *bio = NULL;
138 unsigned page_idx;
139 sector_t last_block_in_bio = 0;
140
141 struct inode *inode = mapping->host;
142 const unsigned blkbits = inode->i_blkbits;
143 const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
144 const unsigned blocksize = 1 << blkbits;
145 sector_t block_in_file;
146 sector_t last_block;
147 sector_t last_block_in_file;
148 sector_t blocks[MAX_BUF_PER_PAGE];
149 unsigned page_block;
150 struct block_device *bdev = inode->i_sb->s_bdev;
151 int length;
152 unsigned relative_block = 0;
153 struct ext4_map_blocks map;
154
155 map.m_pblk = 0;
156 map.m_lblk = 0;
157 map.m_len = 0;
158 map.m_flags = 0;
159
160 for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
161 int fully_mapped = 1;
162 unsigned first_hole = blocks_per_page;
163
164 prefetchw(&page->flags);
165 if (pages) {
166 page = list_entry(pages->prev, struct page, lru);
167 list_del(&page->lru);
168 if (add_to_page_cache_lru(page, mapping,
169 page->index, GFP_KERNEL))
170 goto next_page;
171 }
172
173 if (page_has_buffers(page))
174 goto confused;
175
176 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
177 last_block = block_in_file + nr_pages * blocks_per_page;
178 last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
179 if (last_block > last_block_in_file)
180 last_block = last_block_in_file;
181 page_block = 0;
182
183 /*
184 * Map blocks using the previous result first.
185 */
186 if ((map.m_flags & EXT4_MAP_MAPPED) &&
187 block_in_file > map.m_lblk &&
188 block_in_file < (map.m_lblk + map.m_len)) {
189 unsigned map_offset = block_in_file - map.m_lblk;
190 unsigned last = map.m_len - map_offset;
191
192 for (relative_block = 0; ; relative_block++) {
193 if (relative_block == last) {
194 /* needed? */
195 map.m_flags &= ~EXT4_MAP_MAPPED;
196 break;
197 }
198 if (page_block == blocks_per_page)
199 break;
200 blocks[page_block] = map.m_pblk + map_offset +
201 relative_block;
202 page_block++;
203 block_in_file++;
204 }
205 }
206
207 /*
208 * Then do more ext4_map_blocks() calls until we are
209 * done with this page.
210 */
211 while (page_block < blocks_per_page) {
212 if (block_in_file < last_block) {
213 map.m_lblk = block_in_file;
214 map.m_len = last_block - block_in_file;
215
216 if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
217 set_error_page:
218 SetPageError(page);
219 zero_user_segment(page, 0,
220 PAGE_CACHE_SIZE);
221 unlock_page(page);
222 goto next_page;
223 }
224 }
225 if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
226 fully_mapped = 0;
227 if (first_hole == blocks_per_page)
228 first_hole = page_block;
229 page_block++;
230 block_in_file++;
231 continue;
232 }
233 if (first_hole != blocks_per_page)
234 goto confused; /* hole -> non-hole */
235
236 /* Contiguous blocks? */
237 if (page_block && blocks[page_block-1] != map.m_pblk-1)
238 goto confused;
239 for (relative_block = 0; ; relative_block++) {
240 if (relative_block == map.m_len) {
241 /* needed? */
242 map.m_flags &= ~EXT4_MAP_MAPPED;
243 break;
244 } else if (page_block == blocks_per_page)
245 break;
246 blocks[page_block] = map.m_pblk+relative_block;
247 page_block++;
248 block_in_file++;
249 }
250 }
251 if (first_hole != blocks_per_page) {
252 zero_user_segment(page, first_hole << blkbits,
253 PAGE_CACHE_SIZE);
254 if (first_hole == 0) {
255 SetPageUptodate(page);
256 unlock_page(page);
257 goto next_page;
258 }
259 } else if (fully_mapped) {
260 SetPageMappedToDisk(page);
261 }
262 if (fully_mapped && blocks_per_page == 1 &&
263 !PageUptodate(page) && cleancache_get_page(page) == 0) {
264 SetPageUptodate(page);
265 goto confused;
266 }
267
268 /*
269 * This page will go to BIO. Do we need to send this
270 * BIO off first?
271 */
272 if (bio && (last_block_in_bio != blocks[0] - 1)) {
273 submit_and_realloc:
274 submit_bio(READ, bio);
275 bio = NULL;
276 }
277 if (bio == NULL) {
278 struct ext4_crypto_ctx *ctx = NULL;
279
280 if (ext4_encrypted_inode(inode) &&
281 S_ISREG(inode->i_mode)) {
282 ctx = ext4_get_crypto_ctx(inode);
283 if (IS_ERR(ctx))
284 goto set_error_page;
285 }
286 bio = bio_alloc(GFP_KERNEL,
287 min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
288 if (!bio) {
289 if (ctx)
290 ext4_release_crypto_ctx(ctx);
291 goto set_error_page;
292 }
293 bio->bi_bdev = bdev;
294 bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
295 bio->bi_end_io = mpage_end_io;
296 bio->bi_private = ctx;
297 }
298
299 length = first_hole << blkbits;
300 if (bio_add_page(bio, page, length, 0) < length)
301 goto submit_and_realloc;
302
303 if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
304 (relative_block == map.m_len)) ||
305 (first_hole != blocks_per_page)) {
306 submit_bio(READ, bio);
307 bio = NULL;
308 } else
309 last_block_in_bio = blocks[blocks_per_page - 1];
310 goto next_page;
311 confused:
312 if (bio) {
313 submit_bio(READ, bio);
314 bio = NULL;
315 }
316 if (!PageUptodate(page))
317 block_read_full_page(page, ext4_get_block);
318 else
319 unlock_page(page);
320 next_page:
321 if (pages)
322 page_cache_release(page);
323 }
324 BUG_ON(pages && !list_empty(pages));
325 if (bio)
326 submit_bio(READ, bio);
327 return 0;
328}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e061e66c8280..821f22dbe825 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,7 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/jbd2.h>
25#include <linux/slab.h> 24#include <linux/slab.h>
26#include <linux/init.h> 25#include <linux/init.h>
27#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -323,22 +322,6 @@ static void save_error_info(struct super_block *sb, const char *func,
323 ext4_commit_super(sb, 1); 322 ext4_commit_super(sb, 1);
324} 323}
325 324
326/*
327 * The del_gendisk() function uninitializes the disk-specific data
328 * structures, including the bdi structure, without telling anyone
329 * else. Once this happens, any attempt to call mark_buffer_dirty()
330 * (for example, by ext4_commit_super), will cause a kernel OOPS.
331 * This is a kludge to prevent these oops until we can put in a proper
332 * hook in del_gendisk() to inform the VFS and file system layers.
333 */
334static int block_device_ejected(struct super_block *sb)
335{
336 struct inode *bd_inode = sb->s_bdev->bd_inode;
337 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
338
339 return bdi->dev == NULL;
340}
341
342static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) 325static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
343{ 326{
344 struct super_block *sb = journal->j_private; 327 struct super_block *sb = journal->j_private;
@@ -893,6 +876,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
893 atomic_set(&ei->i_ioend_count, 0); 876 atomic_set(&ei->i_ioend_count, 0);
894 atomic_set(&ei->i_unwritten, 0); 877 atomic_set(&ei->i_unwritten, 0);
895 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); 878 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
879#ifdef CONFIG_EXT4_FS_ENCRYPTION
880 ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
881#endif
896 882
897 return &ei->vfs_inode; 883 return &ei->vfs_inode;
898} 884}
@@ -1076,7 +1062,7 @@ static const struct quotactl_ops ext4_qctl_operations = {
1076 .quota_on = ext4_quota_on, 1062 .quota_on = ext4_quota_on,
1077 .quota_off = ext4_quota_off, 1063 .quota_off = ext4_quota_off,
1078 .quota_sync = dquot_quota_sync, 1064 .quota_sync = dquot_quota_sync,
1079 .get_info = dquot_get_dqinfo, 1065 .get_state = dquot_get_state,
1080 .set_info = dquot_set_dqinfo, 1066 .set_info = dquot_set_dqinfo,
1081 .get_dqblk = dquot_get_dqblk, 1067 .get_dqblk = dquot_get_dqblk,
1082 .set_dqblk = dquot_set_dqblk 1068 .set_dqblk = dquot_set_dqblk
@@ -1120,7 +1106,7 @@ enum {
1120 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, 1106 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1121 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, 1107 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1122 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1108 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1123 Opt_data_err_abort, Opt_data_err_ignore, 1109 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1124 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1110 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1125 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1111 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1126 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, 1112 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
@@ -1211,6 +1197,7 @@ static const match_table_t tokens = {
1211 {Opt_init_itable, "init_itable"}, 1197 {Opt_init_itable, "init_itable"},
1212 {Opt_noinit_itable, "noinit_itable"}, 1198 {Opt_noinit_itable, "noinit_itable"},
1213 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, 1199 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1200 {Opt_test_dummy_encryption, "test_dummy_encryption"},
1214 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1201 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1215 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1202 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1216 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1203 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1412,6 +1399,7 @@ static const struct mount_opts {
1412 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1399 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1413 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1400 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1414 {Opt_max_dir_size_kb, 0, MOPT_GTE0}, 1401 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1402 {Opt_test_dummy_encryption, 0, MOPT_GTE0},
1415 {Opt_err, 0, 0} 1403 {Opt_err, 0, 0}
1416}; 1404};
1417 1405
@@ -1588,6 +1576,15 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1588 } 1576 }
1589 *journal_ioprio = 1577 *journal_ioprio =
1590 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); 1578 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
1579 } else if (token == Opt_test_dummy_encryption) {
1580#ifdef CONFIG_EXT4_FS_ENCRYPTION
1581 sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
1582 ext4_msg(sb, KERN_WARNING,
1583 "Test dummy encryption mode enabled");
1584#else
1585 ext4_msg(sb, KERN_WARNING,
1586 "Test dummy encryption mount option ignored");
1587#endif
1591 } else if (m->flags & MOPT_DATAJ) { 1588 } else if (m->flags & MOPT_DATAJ) {
1592 if (is_remount) { 1589 if (is_remount) {
1593 if (!sbi->s_journal) 1590 if (!sbi->s_journal)
@@ -2685,11 +2682,13 @@ static struct attribute *ext4_attrs[] = {
2685EXT4_INFO_ATTR(lazy_itable_init); 2682EXT4_INFO_ATTR(lazy_itable_init);
2686EXT4_INFO_ATTR(batched_discard); 2683EXT4_INFO_ATTR(batched_discard);
2687EXT4_INFO_ATTR(meta_bg_resize); 2684EXT4_INFO_ATTR(meta_bg_resize);
2685EXT4_INFO_ATTR(encryption);
2688 2686
2689static struct attribute *ext4_feat_attrs[] = { 2687static struct attribute *ext4_feat_attrs[] = {
2690 ATTR_LIST(lazy_itable_init), 2688 ATTR_LIST(lazy_itable_init),
2691 ATTR_LIST(batched_discard), 2689 ATTR_LIST(batched_discard),
2692 ATTR_LIST(meta_bg_resize), 2690 ATTR_LIST(meta_bg_resize),
2691 ATTR_LIST(encryption),
2693 NULL, 2692 NULL,
2694}; 2693};
2695 2694
@@ -3448,6 +3447,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3448 if (sb->s_bdev->bd_part) 3447 if (sb->s_bdev->bd_part)
3449 sbi->s_sectors_written_start = 3448 sbi->s_sectors_written_start =
3450 part_stat_read(sb->s_bdev->bd_part, sectors[1]); 3449 part_stat_read(sb->s_bdev->bd_part, sectors[1]);
3450#ifdef CONFIG_EXT4_FS_ENCRYPTION
3451 /* Modes of operations for file and directory encryption. */
3452 sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
3453 sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
3454#endif
3451 3455
3452 /* Cleanup superblock name */ 3456 /* Cleanup superblock name */
3453 for (cp = sb->s_id; (cp = strchr(cp, '/'));) 3457 for (cp = sb->s_id; (cp = strchr(cp, '/'));)
@@ -3692,6 +3696,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3692 } 3696 }
3693 } 3697 }
3694 3698
3699 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) &&
3700 es->s_encryption_level) {
3701 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
3702 es->s_encryption_level);
3703 goto failed_mount;
3704 }
3705
3695 if (sb->s_blocksize != blocksize) { 3706 if (sb->s_blocksize != blocksize) {
3696 /* Validate the filesystem blocksize */ 3707 /* Validate the filesystem blocksize */
3697 if (!sb_set_blocksize(sb, blocksize)) { 3708 if (!sb_set_blocksize(sb, blocksize)) {
@@ -4054,6 +4065,13 @@ no_journal:
4054 } 4065 }
4055 } 4066 }
4056 4067
4068 if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
4069 !(sb->s_flags & MS_RDONLY) &&
4070 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
4071 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
4072 ext4_commit_super(sb, 1);
4073 }
4074
4057 /* 4075 /*
4058 * Get the # of file system overhead blocks from the 4076 * Get the # of file system overhead blocks from the
4059 * superblock if present. 4077 * superblock if present.
@@ -4570,7 +4588,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4570 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4588 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4571 int error = 0; 4589 int error = 0;
4572 4590
4573 if (!sbh || block_device_ejected(sb)) 4591 if (!sbh)
4574 return error; 4592 return error;
4575 if (buffer_write_io_error(sbh)) { 4593 if (buffer_write_io_error(sbh)) {
4576 /* 4594 /*
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index ff3711932018..136ca0e911fd 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -18,13 +18,101 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h>
22#include <linux/namei.h> 21#include <linux/namei.h>
23#include "ext4.h" 22#include "ext4.h"
24#include "xattr.h" 23#include "xattr.h"
25 24
25#ifdef CONFIG_EXT4_FS_ENCRYPTION
26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
27{ 27{
28 struct page *cpage = NULL;
29 char *caddr, *paddr = NULL;
30 struct ext4_str cstr, pstr;
31 struct inode *inode = dentry->d_inode;
32 struct ext4_fname_crypto_ctx *ctx = NULL;
33 struct ext4_encrypted_symlink_data *sd;
34 loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
35 int res;
36 u32 plen, max_size = inode->i_sb->s_blocksize;
37
38 if (!ext4_encrypted_inode(inode))
39 return page_follow_link_light(dentry, nd);
40
41 ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
42 if (IS_ERR(ctx))
43 return ctx;
44
45 if (ext4_inode_is_fast_symlink(inode)) {
46 caddr = (char *) EXT4_I(dentry->d_inode)->i_data;
47 max_size = sizeof(EXT4_I(dentry->d_inode)->i_data);
48 } else {
49 cpage = read_mapping_page(inode->i_mapping, 0, NULL);
50 if (IS_ERR(cpage)) {
51 ext4_put_fname_crypto_ctx(&ctx);
52 return cpage;
53 }
54 caddr = kmap(cpage);
55 caddr[size] = 0;
56 }
57
58 /* Symlink is encrypted */
59 sd = (struct ext4_encrypted_symlink_data *)caddr;
60 cstr.name = sd->encrypted_path;
61 cstr.len = le32_to_cpu(sd->len);
62 if ((cstr.len +
63 sizeof(struct ext4_encrypted_symlink_data) - 1) >
64 max_size) {
65 /* Symlink data on the disk is corrupted */
66 res = -EIO;
67 goto errout;
68 }
69 plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ?
70 EXT4_FNAME_CRYPTO_DIGEST_SIZE*2 : cstr.len;
71 paddr = kmalloc(plen + 1, GFP_NOFS);
72 if (!paddr) {
73 res = -ENOMEM;
74 goto errout;
75 }
76 pstr.name = paddr;
77 res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr);
78 if (res < 0)
79 goto errout;
80 /* Null-terminate the name */
81 if (res <= plen)
82 paddr[res] = '\0';
83 nd_set_link(nd, paddr);
84 ext4_put_fname_crypto_ctx(&ctx);
85 if (cpage) {
86 kunmap(cpage);
87 page_cache_release(cpage);
88 }
89 return NULL;
90errout:
91 ext4_put_fname_crypto_ctx(&ctx);
92 if (cpage) {
93 kunmap(cpage);
94 page_cache_release(cpage);
95 }
96 kfree(paddr);
97 return ERR_PTR(res);
98}
99
100static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
101 void *cookie)
102{
103 struct page *page = cookie;
104
105 if (!page) {
106 kfree(nd_get_link(nd));
107 } else {
108 kunmap(page);
109 page_cache_release(page);
110 }
111}
112#endif
113
114static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
115{
28 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); 116 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
29 nd_set_link(nd, (char *) ei->i_data); 117 nd_set_link(nd, (char *) ei->i_data);
30 return NULL; 118 return NULL;
@@ -32,8 +120,13 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
32 120
33const struct inode_operations ext4_symlink_inode_operations = { 121const struct inode_operations ext4_symlink_inode_operations = {
34 .readlink = generic_readlink, 122 .readlink = generic_readlink,
123#ifdef CONFIG_EXT4_FS_ENCRYPTION
124 .follow_link = ext4_follow_link,
125 .put_link = ext4_put_link,
126#else
35 .follow_link = page_follow_link_light, 127 .follow_link = page_follow_link_light,
36 .put_link = page_put_link, 128 .put_link = page_put_link,
129#endif
37 .setattr = ext4_setattr, 130 .setattr = ext4_setattr,
38 .setxattr = generic_setxattr, 131 .setxattr = generic_setxattr,
39 .getxattr = generic_getxattr, 132 .getxattr = generic_getxattr,
@@ -43,7 +136,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
43 136
44const struct inode_operations ext4_fast_symlink_inode_operations = { 137const struct inode_operations ext4_fast_symlink_inode_operations = {
45 .readlink = generic_readlink, 138 .readlink = generic_readlink,
46 .follow_link = ext4_follow_link, 139 .follow_link = ext4_follow_fast_link,
47 .setattr = ext4_setattr, 140 .setattr = ext4_setattr,
48 .setxattr = generic_setxattr, 141 .setxattr = generic_setxattr,
49 .getxattr = generic_getxattr, 142 .getxattr = generic_getxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1e09fc77395c..759842ff8af0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -55,7 +55,6 @@
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/mbcache.h> 56#include <linux/mbcache.h>
57#include <linux/quotaops.h> 57#include <linux/quotaops.h>
58#include <linux/rwsem.h>
59#include "ext4_jbd2.h" 58#include "ext4_jbd2.h"
60#include "ext4.h" 59#include "ext4.h"
61#include "xattr.h" 60#include "xattr.h"
@@ -639,8 +638,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
639 free += EXT4_XATTR_LEN(name_len); 638 free += EXT4_XATTR_LEN(name_len);
640 } 639 }
641 if (i->value) { 640 if (i->value) {
642 if (free < EXT4_XATTR_SIZE(i->value_len) || 641 if (free < EXT4_XATTR_LEN(name_len) +
643 free < EXT4_XATTR_LEN(name_len) +
644 EXT4_XATTR_SIZE(i->value_len)) 642 EXT4_XATTR_SIZE(i->value_len))
645 return -ENOSPC; 643 return -ENOSPC;
646 } 644 }
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 29bedf5589f6..ddc0957760ba 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -23,6 +23,7 @@
23#define EXT4_XATTR_INDEX_SECURITY 6 23#define EXT4_XATTR_INDEX_SECURITY 6
24#define EXT4_XATTR_INDEX_SYSTEM 7 24#define EXT4_XATTR_INDEX_SYSTEM 7
25#define EXT4_XATTR_INDEX_RICHACL 8 25#define EXT4_XATTR_INDEX_RICHACL 8
26#define EXT4_XATTR_INDEX_ENCRYPTION 9
26 27
27struct ext4_xattr_header { 28struct ext4_xattr_header {
28 __le32 h_magic; /* magic number for identification */ 29 __le32 h_magic; /* magic number for identification */
@@ -98,6 +99,8 @@ extern const struct xattr_handler ext4_xattr_user_handler;
98extern const struct xattr_handler ext4_xattr_trusted_handler; 99extern const struct xattr_handler ext4_xattr_trusted_handler;
99extern const struct xattr_handler ext4_xattr_security_handler; 100extern const struct xattr_handler ext4_xattr_security_handler;
100 101
102#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
103
101extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 104extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
102 105
103extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 106extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 94e2d2ffabe1..05f0f663f14c 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -1,5 +1,5 @@
1config F2FS_FS 1config F2FS_FS
2 tristate "F2FS filesystem support (EXPERIMENTAL)" 2 tristate "F2FS filesystem support"
3 depends on BLOCK 3 depends on BLOCK
4 help 4 help
5 F2FS is based on Log-structured File System (LFS), which supports 5 F2FS is based on Log-structured File System (LFS), which supports
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 742202779bd5..4320ffab3495 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -351,13 +351,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
351 351
352 *acl = f2fs_acl_clone(p, GFP_NOFS); 352 *acl = f2fs_acl_clone(p, GFP_NOFS);
353 if (!*acl) 353 if (!*acl)
354 return -ENOMEM; 354 goto no_mem;
355 355
356 ret = f2fs_acl_create_masq(*acl, mode); 356 ret = f2fs_acl_create_masq(*acl, mode);
357 if (ret < 0) { 357 if (ret < 0)
358 posix_acl_release(*acl); 358 goto no_mem_clone;
359 return -ENOMEM;
360 }
361 359
362 if (ret == 0) { 360 if (ret == 0) {
363 posix_acl_release(*acl); 361 posix_acl_release(*acl);
@@ -378,6 +376,12 @@ no_acl:
378 *default_acl = NULL; 376 *default_acl = NULL;
379 *acl = NULL; 377 *acl = NULL;
380 return 0; 378 return 0;
379
380no_mem_clone:
381 posix_acl_release(*acl);
382no_mem:
383 posix_acl_release(p);
384 return -ENOMEM;
381} 385}
382 386
383int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, 387int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7f794b72b3b7..a5e17a2a0781 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -276,7 +276,7 @@ continue_unlock:
276 if (!clear_page_dirty_for_io(page)) 276 if (!clear_page_dirty_for_io(page))
277 goto continue_unlock; 277 goto continue_unlock;
278 278
279 if (f2fs_write_meta_page(page, &wbc)) { 279 if (mapping->a_ops->writepage(page, &wbc)) {
280 unlock_page(page); 280 unlock_page(page);
281 break; 281 break;
282 } 282 }
@@ -464,20 +464,19 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
464 464
465void recover_orphan_inodes(struct f2fs_sb_info *sbi) 465void recover_orphan_inodes(struct f2fs_sb_info *sbi)
466{ 466{
467 block_t start_blk, orphan_blkaddr, i, j; 467 block_t start_blk, orphan_blocks, i, j;
468 468
469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
470 return; 470 return;
471 471
472 set_sbi_flag(sbi, SBI_POR_DOING); 472 set_sbi_flag(sbi, SBI_POR_DOING);
473 473
474 start_blk = __start_cp_addr(sbi) + 1 + 474 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
475 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 475 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
476 orphan_blkaddr = __start_sum_addr(sbi) - 1;
477 476
478 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); 477 ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
479 478
480 for (i = 0; i < orphan_blkaddr; i++) { 479 for (i = 0; i < orphan_blocks; i++) {
481 struct page *page = get_meta_page(sbi, start_blk + i); 480 struct page *page = get_meta_page(sbi, start_blk + i);
482 struct f2fs_orphan_block *orphan_blk; 481 struct f2fs_orphan_block *orphan_blk;
483 482
@@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
615 unsigned long blk_size = sbi->blocksize; 614 unsigned long blk_size = sbi->blocksize;
616 unsigned long long cp1_version = 0, cp2_version = 0; 615 unsigned long long cp1_version = 0, cp2_version = 0;
617 unsigned long long cp_start_blk_no; 616 unsigned long long cp_start_blk_no;
618 unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 617 unsigned int cp_blks = 1 + __cp_payload(sbi);
619 block_t cp_blk_no; 618 block_t cp_blk_no;
620 int i; 619 int i;
621 620
@@ -796,6 +795,7 @@ retry:
796 * wribacking dentry pages in the freeing inode. 795 * wribacking dentry pages in the freeing inode.
797 */ 796 */
798 f2fs_submit_merged_bio(sbi, DATA, WRITE); 797 f2fs_submit_merged_bio(sbi, DATA, WRITE);
798 cond_resched();
799 } 799 }
800 goto retry; 800 goto retry;
801} 801}
@@ -884,7 +884,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
884 __u32 crc32 = 0; 884 __u32 crc32 = 0;
885 void *kaddr; 885 void *kaddr;
886 int i; 886 int i;
887 int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 887 int cp_payload_blks = __cp_payload(sbi);
888 888
889 /* 889 /*
890 * This avoids to conduct wrong roll-forward operations and uses 890 * This avoids to conduct wrong roll-forward operations and uses
@@ -1048,17 +1048,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1048 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1048 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1049 unsigned long long ckpt_ver; 1049 unsigned long long ckpt_ver;
1050 1050
1051 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1052
1053 mutex_lock(&sbi->cp_mutex); 1051 mutex_lock(&sbi->cp_mutex);
1054 1052
1055 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1053 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1056 cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) 1054 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
1057 goto out; 1055 goto out;
1058 if (unlikely(f2fs_cp_error(sbi))) 1056 if (unlikely(f2fs_cp_error(sbi)))
1059 goto out; 1057 goto out;
1060 if (f2fs_readonly(sbi->sb)) 1058 if (f2fs_readonly(sbi->sb))
1061 goto out; 1059 goto out;
1060
1061 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1062
1062 if (block_operations(sbi)) 1063 if (block_operations(sbi))
1063 goto out; 1064 goto out;
1064 1065
@@ -1085,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1085 1086
1086 unblock_operations(sbi); 1087 unblock_operations(sbi);
1087 stat_inc_cp_count(sbi->stat_info); 1088 stat_inc_cp_count(sbi->stat_info);
1089
1090 if (cpc->reason == CP_RECOVERY)
1091 f2fs_msg(sbi->sb, KERN_NOTICE,
1092 "checkpoint: version = %llx", ckpt_ver);
1088out: 1093out:
1089 mutex_unlock(&sbi->cp_mutex); 1094 mutex_unlock(&sbi->cp_mutex);
1090 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1095 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
@@ -1103,14 +1108,9 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
1103 im->ino_num = 0; 1108 im->ino_num = 0;
1104 } 1109 }
1105 1110
1106 /*
1107 * considering 512 blocks in a segment 8 blocks are needed for cp
1108 * and log segment summaries. Remaining blocks are used to keep
1109 * orphan entries with the limitation one reserved segment
1110 * for cp pack we can have max 1020*504 orphan entries
1111 */
1112 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1111 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1113 NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; 1112 NR_CURSEG_TYPE - __cp_payload(sbi)) *
1113 F2FS_ORPHANS_PER_BLOCK;
1114} 1114}
1115 1115
1116int __init create_checkpoint_caches(void) 1116int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed023a750..b91b0e10678e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/mpage.h> 14#include <linux/mpage.h>
15#include <linux/aio.h>
16#include <linux/writeback.h> 15#include <linux/writeback.h>
17#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
18#include <linux/blkdev.h> 17#include <linux/blkdev.h>
19#include <linux/bio.h> 18#include <linux/bio.h>
20#include <linux/prefetch.h> 19#include <linux/prefetch.h>
20#include <linux/uio.h>
21 21
22#include "f2fs.h" 22#include "f2fs.h"
23#include "node.h" 23#include "node.h"
@@ -25,6 +25,9 @@
25#include "trace.h" 25#include "trace.h"
26#include <trace/events/f2fs.h> 26#include <trace/events/f2fs.h>
27 27
28static struct kmem_cache *extent_tree_slab;
29static struct kmem_cache *extent_node_slab;
30
28static void f2fs_read_end_io(struct bio *bio, int err) 31static void f2fs_read_end_io(struct bio *bio, int err)
29{ 32{
30 struct bio_vec *bvec; 33 struct bio_vec *bvec;
@@ -197,7 +200,7 @@ alloc_new:
197 * ->node_page 200 * ->node_page
198 * update block addresses in the node page 201 * update block addresses in the node page
199 */ 202 */
200static void __set_data_blkaddr(struct dnode_of_data *dn) 203void set_data_blkaddr(struct dnode_of_data *dn)
201{ 204{
202 struct f2fs_node *rn; 205 struct f2fs_node *rn;
203 __le32 *addr_array; 206 __le32 *addr_array;
@@ -226,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn)
226 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 229 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
227 230
228 dn->data_blkaddr = NEW_ADDR; 231 dn->data_blkaddr = NEW_ADDR;
229 __set_data_blkaddr(dn); 232 set_data_blkaddr(dn);
230 mark_inode_dirty(dn->inode); 233 mark_inode_dirty(dn->inode);
231 sync_inode_page(dn); 234 sync_inode_page(dn);
232 return 0; 235 return 0;
@@ -248,73 +251,62 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
248 return err; 251 return err;
249} 252}
250 253
251static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 254static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs,
252 struct buffer_head *bh_result) 255 struct extent_info *ei, struct buffer_head *bh_result)
256{
257 unsigned int blkbits = sb->s_blocksize_bits;
258 size_t max_size = bh_result->b_size;
259 size_t mapped_size;
260
261 clear_buffer_new(bh_result);
262 map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs);
263 mapped_size = (ei->fofs + ei->len - pgofs) << blkbits;
264 bh_result->b_size = min(max_size, mapped_size);
265}
266
267static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
268 struct extent_info *ei)
253{ 269{
254 struct f2fs_inode_info *fi = F2FS_I(inode); 270 struct f2fs_inode_info *fi = F2FS_I(inode);
255 pgoff_t start_fofs, end_fofs; 271 pgoff_t start_fofs, end_fofs;
256 block_t start_blkaddr; 272 block_t start_blkaddr;
257 273
258 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 274 read_lock(&fi->ext_lock);
259 return 0;
260
261 read_lock(&fi->ext.ext_lock);
262 if (fi->ext.len == 0) { 275 if (fi->ext.len == 0) {
263 read_unlock(&fi->ext.ext_lock); 276 read_unlock(&fi->ext_lock);
264 return 0; 277 return false;
265 } 278 }
266 279
267 stat_inc_total_hit(inode->i_sb); 280 stat_inc_total_hit(inode->i_sb);
268 281
269 start_fofs = fi->ext.fofs; 282 start_fofs = fi->ext.fofs;
270 end_fofs = fi->ext.fofs + fi->ext.len - 1; 283 end_fofs = fi->ext.fofs + fi->ext.len - 1;
271 start_blkaddr = fi->ext.blk_addr; 284 start_blkaddr = fi->ext.blk;
272 285
273 if (pgofs >= start_fofs && pgofs <= end_fofs) { 286 if (pgofs >= start_fofs && pgofs <= end_fofs) {
274 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 287 *ei = fi->ext;
275 size_t count;
276
277 set_buffer_new(bh_result);
278 map_bh(bh_result, inode->i_sb,
279 start_blkaddr + pgofs - start_fofs);
280 count = end_fofs - pgofs + 1;
281 if (count < (UINT_MAX >> blkbits))
282 bh_result->b_size = (count << blkbits);
283 else
284 bh_result->b_size = UINT_MAX;
285
286 stat_inc_read_hit(inode->i_sb); 288 stat_inc_read_hit(inode->i_sb);
287 read_unlock(&fi->ext.ext_lock); 289 read_unlock(&fi->ext_lock);
288 return 1; 290 return true;
289 } 291 }
290 read_unlock(&fi->ext.ext_lock); 292 read_unlock(&fi->ext_lock);
291 return 0; 293 return false;
292} 294}
293 295
294void update_extent_cache(struct dnode_of_data *dn) 296static bool update_extent_info(struct inode *inode, pgoff_t fofs,
297 block_t blkaddr)
295{ 298{
296 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 299 struct f2fs_inode_info *fi = F2FS_I(inode);
297 pgoff_t fofs, start_fofs, end_fofs; 300 pgoff_t start_fofs, end_fofs;
298 block_t start_blkaddr, end_blkaddr; 301 block_t start_blkaddr, end_blkaddr;
299 int need_update = true; 302 int need_update = true;
300 303
301 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 304 write_lock(&fi->ext_lock);
302
303 /* Update the page address in the parent node */
304 __set_data_blkaddr(dn);
305
306 if (is_inode_flag_set(fi, FI_NO_EXTENT))
307 return;
308
309 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
310 dn->ofs_in_node;
311
312 write_lock(&fi->ext.ext_lock);
313 305
314 start_fofs = fi->ext.fofs; 306 start_fofs = fi->ext.fofs;
315 end_fofs = fi->ext.fofs + fi->ext.len - 1; 307 end_fofs = fi->ext.fofs + fi->ext.len - 1;
316 start_blkaddr = fi->ext.blk_addr; 308 start_blkaddr = fi->ext.blk;
317 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; 309 end_blkaddr = fi->ext.blk + fi->ext.len - 1;
318 310
319 /* Drop and initialize the matched extent */ 311 /* Drop and initialize the matched extent */
320 if (fi->ext.len == 1 && fofs == start_fofs) 312 if (fi->ext.len == 1 && fofs == start_fofs)
@@ -322,24 +314,24 @@ void update_extent_cache(struct dnode_of_data *dn)
322 314
323 /* Initial extent */ 315 /* Initial extent */
324 if (fi->ext.len == 0) { 316 if (fi->ext.len == 0) {
325 if (dn->data_blkaddr != NULL_ADDR) { 317 if (blkaddr != NULL_ADDR) {
326 fi->ext.fofs = fofs; 318 fi->ext.fofs = fofs;
327 fi->ext.blk_addr = dn->data_blkaddr; 319 fi->ext.blk = blkaddr;
328 fi->ext.len = 1; 320 fi->ext.len = 1;
329 } 321 }
330 goto end_update; 322 goto end_update;
331 } 323 }
332 324
333 /* Front merge */ 325 /* Front merge */
334 if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { 326 if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
335 fi->ext.fofs--; 327 fi->ext.fofs--;
336 fi->ext.blk_addr--; 328 fi->ext.blk--;
337 fi->ext.len++; 329 fi->ext.len++;
338 goto end_update; 330 goto end_update;
339 } 331 }
340 332
341 /* Back merge */ 333 /* Back merge */
342 if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { 334 if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
343 fi->ext.len++; 335 fi->ext.len++;
344 goto end_update; 336 goto end_update;
345 } 337 }
@@ -351,8 +343,7 @@ void update_extent_cache(struct dnode_of_data *dn)
351 fi->ext.len = fofs - start_fofs; 343 fi->ext.len = fofs - start_fofs;
352 } else { 344 } else {
353 fi->ext.fofs = fofs + 1; 345 fi->ext.fofs = fofs + 1;
354 fi->ext.blk_addr = start_blkaddr + 346 fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
355 fofs - start_fofs + 1;
356 fi->ext.len -= fofs - start_fofs + 1; 347 fi->ext.len -= fofs - start_fofs + 1;
357 } 348 }
358 } else { 349 } else {
@@ -366,27 +357,583 @@ void update_extent_cache(struct dnode_of_data *dn)
366 need_update = true; 357 need_update = true;
367 } 358 }
368end_update: 359end_update:
369 write_unlock(&fi->ext.ext_lock); 360 write_unlock(&fi->ext_lock);
370 if (need_update) 361 return need_update;
371 sync_inode_page(dn); 362}
363
364static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
365 struct extent_tree *et, struct extent_info *ei,
366 struct rb_node *parent, struct rb_node **p)
367{
368 struct extent_node *en;
369
370 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
371 if (!en)
372 return NULL;
373
374 en->ei = *ei;
375 INIT_LIST_HEAD(&en->list);
376
377 rb_link_node(&en->rb_node, parent, p);
378 rb_insert_color(&en->rb_node, &et->root);
379 et->count++;
380 atomic_inc(&sbi->total_ext_node);
381 return en;
382}
383
384static void __detach_extent_node(struct f2fs_sb_info *sbi,
385 struct extent_tree *et, struct extent_node *en)
386{
387 rb_erase(&en->rb_node, &et->root);
388 et->count--;
389 atomic_dec(&sbi->total_ext_node);
390
391 if (et->cached_en == en)
392 et->cached_en = NULL;
393}
394
395static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
396 nid_t ino)
397{
398 struct extent_tree *et;
399
400 down_read(&sbi->extent_tree_lock);
401 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
402 if (!et) {
403 up_read(&sbi->extent_tree_lock);
404 return NULL;
405 }
406 atomic_inc(&et->refcount);
407 up_read(&sbi->extent_tree_lock);
408
409 return et;
410}
411
412static struct extent_tree *__grab_extent_tree(struct inode *inode)
413{
414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
415 struct extent_tree *et;
416 nid_t ino = inode->i_ino;
417
418 down_write(&sbi->extent_tree_lock);
419 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
420 if (!et) {
421 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
422 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
423 memset(et, 0, sizeof(struct extent_tree));
424 et->ino = ino;
425 et->root = RB_ROOT;
426 et->cached_en = NULL;
427 rwlock_init(&et->lock);
428 atomic_set(&et->refcount, 0);
429 et->count = 0;
430 sbi->total_ext_tree++;
431 }
432 atomic_inc(&et->refcount);
433 up_write(&sbi->extent_tree_lock);
434
435 return et;
436}
437
438static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
439 unsigned int fofs)
440{
441 struct rb_node *node = et->root.rb_node;
442 struct extent_node *en;
443
444 if (et->cached_en) {
445 struct extent_info *cei = &et->cached_en->ei;
446
447 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
448 return et->cached_en;
449 }
450
451 while (node) {
452 en = rb_entry(node, struct extent_node, rb_node);
453
454 if (fofs < en->ei.fofs) {
455 node = node->rb_left;
456 } else if (fofs >= en->ei.fofs + en->ei.len) {
457 node = node->rb_right;
458 } else {
459 et->cached_en = en;
460 return en;
461 }
462 }
463 return NULL;
464}
465
466static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
467 struct extent_tree *et, struct extent_node *en)
468{
469 struct extent_node *prev;
470 struct rb_node *node;
471
472 node = rb_prev(&en->rb_node);
473 if (!node)
474 return NULL;
475
476 prev = rb_entry(node, struct extent_node, rb_node);
477 if (__is_back_mergeable(&en->ei, &prev->ei)) {
478 en->ei.fofs = prev->ei.fofs;
479 en->ei.blk = prev->ei.blk;
480 en->ei.len += prev->ei.len;
481 __detach_extent_node(sbi, et, prev);
482 return prev;
483 }
484 return NULL;
485}
486
487static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
488 struct extent_tree *et, struct extent_node *en)
489{
490 struct extent_node *next;
491 struct rb_node *node;
492
493 node = rb_next(&en->rb_node);
494 if (!node)
495 return NULL;
496
497 next = rb_entry(node, struct extent_node, rb_node);
498 if (__is_front_mergeable(&en->ei, &next->ei)) {
499 en->ei.len += next->ei.len;
500 __detach_extent_node(sbi, et, next);
501 return next;
502 }
503 return NULL;
504}
505
506static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
507 struct extent_tree *et, struct extent_info *ei,
508 struct extent_node **den)
509{
510 struct rb_node **p = &et->root.rb_node;
511 struct rb_node *parent = NULL;
512 struct extent_node *en;
513
514 while (*p) {
515 parent = *p;
516 en = rb_entry(parent, struct extent_node, rb_node);
517
518 if (ei->fofs < en->ei.fofs) {
519 if (__is_front_mergeable(ei, &en->ei)) {
520 f2fs_bug_on(sbi, !den);
521 en->ei.fofs = ei->fofs;
522 en->ei.blk = ei->blk;
523 en->ei.len += ei->len;
524 *den = __try_back_merge(sbi, et, en);
525 return en;
526 }
527 p = &(*p)->rb_left;
528 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
529 if (__is_back_mergeable(ei, &en->ei)) {
530 f2fs_bug_on(sbi, !den);
531 en->ei.len += ei->len;
532 *den = __try_front_merge(sbi, et, en);
533 return en;
534 }
535 p = &(*p)->rb_right;
536 } else {
537 f2fs_bug_on(sbi, 1);
538 }
539 }
540
541 return __attach_extent_node(sbi, et, ei, parent, p);
542}
543
544static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
545 struct extent_tree *et, bool free_all)
546{
547 struct rb_node *node, *next;
548 struct extent_node *en;
549 unsigned int count = et->count;
550
551 node = rb_first(&et->root);
552 while (node) {
553 next = rb_next(node);
554 en = rb_entry(node, struct extent_node, rb_node);
555
556 if (free_all) {
557 spin_lock(&sbi->extent_lock);
558 if (!list_empty(&en->list))
559 list_del_init(&en->list);
560 spin_unlock(&sbi->extent_lock);
561 }
562
563 if (free_all || list_empty(&en->list)) {
564 __detach_extent_node(sbi, et, en);
565 kmem_cache_free(extent_node_slab, en);
566 }
567 node = next;
568 }
569
570 return count - et->count;
571}
572
573static void f2fs_init_extent_tree(struct inode *inode,
574 struct f2fs_extent *i_ext)
575{
576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
577 struct extent_tree *et;
578 struct extent_node *en;
579 struct extent_info ei;
580
581 if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
582 return;
583
584 et = __grab_extent_tree(inode);
585
586 write_lock(&et->lock);
587 if (et->count)
588 goto out;
589
590 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
591 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
592
593 en = __insert_extent_tree(sbi, et, &ei, NULL);
594 if (en) {
595 et->cached_en = en;
596
597 spin_lock(&sbi->extent_lock);
598 list_add_tail(&en->list, &sbi->extent_list);
599 spin_unlock(&sbi->extent_lock);
600 }
601out:
602 write_unlock(&et->lock);
603 atomic_dec(&et->refcount);
604}
605
606static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
607 struct extent_info *ei)
608{
609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
610 struct extent_tree *et;
611 struct extent_node *en;
612
613 trace_f2fs_lookup_extent_tree_start(inode, pgofs);
614
615 et = __find_extent_tree(sbi, inode->i_ino);
616 if (!et)
617 return false;
618
619 read_lock(&et->lock);
620 en = __lookup_extent_tree(et, pgofs);
621 if (en) {
622 *ei = en->ei;
623 spin_lock(&sbi->extent_lock);
624 if (!list_empty(&en->list))
625 list_move_tail(&en->list, &sbi->extent_list);
626 spin_unlock(&sbi->extent_lock);
627 stat_inc_read_hit(sbi->sb);
628 }
629 stat_inc_total_hit(sbi->sb);
630 read_unlock(&et->lock);
631
632 trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
633
634 atomic_dec(&et->refcount);
635 return en ? true : false;
636}
637
638static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
639 block_t blkaddr)
640{
641 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
642 struct extent_tree *et;
643 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
644 struct extent_node *den = NULL;
645 struct extent_info ei, dei;
646 unsigned int endofs;
647
648 trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
649
650 et = __grab_extent_tree(inode);
651
652 write_lock(&et->lock);
653
654 /* 1. lookup and remove existing extent info in cache */
655 en = __lookup_extent_tree(et, fofs);
656 if (!en)
657 goto update_extent;
658
659 dei = en->ei;
660 __detach_extent_node(sbi, et, en);
661
662 /* 2. if extent can be split more, split and insert the left part */
663 if (dei.len > 1) {
664 /* insert left part of split extent into cache */
665 if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
666 set_extent_info(&ei, dei.fofs, dei.blk,
667 fofs - dei.fofs);
668 en1 = __insert_extent_tree(sbi, et, &ei, NULL);
669 }
670
671 /* insert right part of split extent into cache */
672 endofs = dei.fofs + dei.len - 1;
673 if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
674 set_extent_info(&ei, fofs + 1,
675 fofs - dei.fofs + dei.blk, endofs - fofs);
676 en2 = __insert_extent_tree(sbi, et, &ei, NULL);
677 }
678 }
679
680update_extent:
681 /* 3. update extent in extent cache */
682 if (blkaddr) {
683 set_extent_info(&ei, fofs, blkaddr, 1);
684 en3 = __insert_extent_tree(sbi, et, &ei, &den);
685 }
686
687 /* 4. update in global extent list */
688 spin_lock(&sbi->extent_lock);
689 if (en && !list_empty(&en->list))
690 list_del(&en->list);
691 /*
692 * en1 and en2 split from en, they will become more and more smaller
693 * fragments after splitting several times. So if the length is smaller
694 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
695 */
696 if (en1)
697 list_add_tail(&en1->list, &sbi->extent_list);
698 if (en2)
699 list_add_tail(&en2->list, &sbi->extent_list);
700 if (en3) {
701 if (list_empty(&en3->list))
702 list_add_tail(&en3->list, &sbi->extent_list);
703 else
704 list_move_tail(&en3->list, &sbi->extent_list);
705 }
706 if (den && !list_empty(&den->list))
707 list_del(&den->list);
708 spin_unlock(&sbi->extent_lock);
709
710 /* 5. release extent node */
711 if (en)
712 kmem_cache_free(extent_node_slab, en);
713 if (den)
714 kmem_cache_free(extent_node_slab, den);
715
716 write_unlock(&et->lock);
717 atomic_dec(&et->refcount);
718}
719
720void f2fs_preserve_extent_tree(struct inode *inode)
721{
722 struct extent_tree *et;
723 struct extent_info *ext = &F2FS_I(inode)->ext;
724 bool sync = false;
725
726 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
727 return;
728
729 et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
730 if (!et) {
731 if (ext->len) {
732 ext->len = 0;
733 update_inode_page(inode);
734 }
735 return;
736 }
737
738 read_lock(&et->lock);
739 if (et->count) {
740 struct extent_node *en;
741
742 if (et->cached_en) {
743 en = et->cached_en;
744 } else {
745 struct rb_node *node = rb_first(&et->root);
746
747 if (!node)
748 node = rb_last(&et->root);
749 en = rb_entry(node, struct extent_node, rb_node);
750 }
751
752 if (__is_extent_same(ext, &en->ei))
753 goto out;
754
755 *ext = en->ei;
756 sync = true;
757 } else if (ext->len) {
758 ext->len = 0;
759 sync = true;
760 }
761out:
762 read_unlock(&et->lock);
763 atomic_dec(&et->refcount);
764
765 if (sync)
766 update_inode_page(inode);
767}
768
769void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
770{
771 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
772 struct extent_node *en, *tmp;
773 unsigned long ino = F2FS_ROOT_INO(sbi);
774 struct radix_tree_iter iter;
775 void **slot;
776 unsigned int found;
777 unsigned int node_cnt = 0, tree_cnt = 0;
778
779 if (!test_opt(sbi, EXTENT_CACHE))
780 return;
781
782 if (available_free_memory(sbi, EXTENT_CACHE))
783 return;
784
785 spin_lock(&sbi->extent_lock);
786 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
787 if (!nr_shrink--)
788 break;
789 list_del_init(&en->list);
790 }
791 spin_unlock(&sbi->extent_lock);
792
793 down_read(&sbi->extent_tree_lock);
794 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
795 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
796 unsigned i;
797
798 ino = treevec[found - 1]->ino + 1;
799 for (i = 0; i < found; i++) {
800 struct extent_tree *et = treevec[i];
801
802 atomic_inc(&et->refcount);
803 write_lock(&et->lock);
804 node_cnt += __free_extent_tree(sbi, et, false);
805 write_unlock(&et->lock);
806 atomic_dec(&et->refcount);
807 }
808 }
809 up_read(&sbi->extent_tree_lock);
810
811 down_write(&sbi->extent_tree_lock);
812 radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
813 F2FS_ROOT_INO(sbi)) {
814 struct extent_tree *et = (struct extent_tree *)*slot;
815
816 if (!atomic_read(&et->refcount) && !et->count) {
817 radix_tree_delete(&sbi->extent_tree_root, et->ino);
818 kmem_cache_free(extent_tree_slab, et);
819 sbi->total_ext_tree--;
820 tree_cnt++;
821 }
822 }
823 up_write(&sbi->extent_tree_lock);
824
825 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
826}
827
828void f2fs_destroy_extent_tree(struct inode *inode)
829{
830 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
831 struct extent_tree *et;
832 unsigned int node_cnt = 0;
833
834 if (!test_opt(sbi, EXTENT_CACHE))
835 return;
836
837 et = __find_extent_tree(sbi, inode->i_ino);
838 if (!et)
839 goto out;
840
841 /* free all extent info belong to this extent tree */
842 write_lock(&et->lock);
843 node_cnt = __free_extent_tree(sbi, et, true);
844 write_unlock(&et->lock);
845
846 atomic_dec(&et->refcount);
847
848 /* try to find and delete extent tree entry in radix tree */
849 down_write(&sbi->extent_tree_lock);
850 et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
851 if (!et) {
852 up_write(&sbi->extent_tree_lock);
853 goto out;
854 }
855 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
856 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
857 kmem_cache_free(extent_tree_slab, et);
858 sbi->total_ext_tree--;
859 up_write(&sbi->extent_tree_lock);
860out:
861 trace_f2fs_destroy_extent_tree(inode, node_cnt);
372 return; 862 return;
373} 863}
374 864
865void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
866{
867 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
868 f2fs_init_extent_tree(inode, i_ext);
869
870 write_lock(&F2FS_I(inode)->ext_lock);
871 get_extent_info(&F2FS_I(inode)->ext, *i_ext);
872 write_unlock(&F2FS_I(inode)->ext_lock);
873}
874
875static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
876 struct extent_info *ei)
877{
878 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
879 return false;
880
881 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
882 return f2fs_lookup_extent_tree(inode, pgofs, ei);
883
884 return lookup_extent_info(inode, pgofs, ei);
885}
886
887void f2fs_update_extent_cache(struct dnode_of_data *dn)
888{
889 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
890 pgoff_t fofs;
891
892 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
893
894 if (is_inode_flag_set(fi, FI_NO_EXTENT))
895 return;
896
897 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
898 dn->ofs_in_node;
899
900 if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
901 return f2fs_update_extent_tree(dn->inode, fofs,
902 dn->data_blkaddr);
903
904 if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
905 sync_inode_page(dn);
906}
907
375struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 908struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
376{ 909{
377 struct address_space *mapping = inode->i_mapping; 910 struct address_space *mapping = inode->i_mapping;
378 struct dnode_of_data dn; 911 struct dnode_of_data dn;
379 struct page *page; 912 struct page *page;
913 struct extent_info ei;
380 int err; 914 int err;
381 struct f2fs_io_info fio = { 915 struct f2fs_io_info fio = {
382 .type = DATA, 916 .type = DATA,
383 .rw = sync ? READ_SYNC : READA, 917 .rw = sync ? READ_SYNC : READA,
384 }; 918 };
385 919
920 /*
921 * If sync is false, it needs to check its block allocation.
922 * This is need and triggered by two flows:
923 * gc and truncate_partial_data_page.
924 */
925 if (!sync)
926 goto search;
927
386 page = find_get_page(mapping, index); 928 page = find_get_page(mapping, index);
387 if (page && PageUptodate(page)) 929 if (page && PageUptodate(page))
388 return page; 930 return page;
389 f2fs_put_page(page, 0); 931 f2fs_put_page(page, 0);
932search:
933 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
934 dn.data_blkaddr = ei.blk + index - ei.fofs;
935 goto got_it;
936 }
390 937
391 set_new_dnode(&dn, inode, NULL, NULL, 0); 938 set_new_dnode(&dn, inode, NULL, NULL, 0);
392 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 939 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
@@ -401,6 +948,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
401 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 948 if (unlikely(dn.data_blkaddr == NEW_ADDR))
402 return ERR_PTR(-EINVAL); 949 return ERR_PTR(-EINVAL);
403 950
951got_it:
404 page = grab_cache_page(mapping, index); 952 page = grab_cache_page(mapping, index);
405 if (!page) 953 if (!page)
406 return ERR_PTR(-ENOMEM); 954 return ERR_PTR(-ENOMEM);
@@ -435,6 +983,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
435 struct address_space *mapping = inode->i_mapping; 983 struct address_space *mapping = inode->i_mapping;
436 struct dnode_of_data dn; 984 struct dnode_of_data dn;
437 struct page *page; 985 struct page *page;
986 struct extent_info ei;
438 int err; 987 int err;
439 struct f2fs_io_info fio = { 988 struct f2fs_io_info fio = {
440 .type = DATA, 989 .type = DATA,
@@ -445,6 +994,11 @@ repeat:
445 if (!page) 994 if (!page)
446 return ERR_PTR(-ENOMEM); 995 return ERR_PTR(-ENOMEM);
447 996
997 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
998 dn.data_blkaddr = ei.blk + index - ei.fofs;
999 goto got_it;
1000 }
1001
448 set_new_dnode(&dn, inode, NULL, NULL, 0); 1002 set_new_dnode(&dn, inode, NULL, NULL, 0);
449 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1003 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
450 if (err) { 1004 if (err) {
@@ -458,6 +1012,7 @@ repeat:
458 return ERR_PTR(-ENOENT); 1012 return ERR_PTR(-ENOENT);
459 } 1013 }
460 1014
1015got_it:
461 if (PageUptodate(page)) 1016 if (PageUptodate(page))
462 return page; 1017 return page;
463 1018
@@ -569,19 +1124,26 @@ static int __allocate_data_block(struct dnode_of_data *dn)
569 1124
570 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1125 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
571 return -EPERM; 1126 return -EPERM;
1127
1128 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
1129 if (dn->data_blkaddr == NEW_ADDR)
1130 goto alloc;
1131
572 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 1132 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
573 return -ENOSPC; 1133 return -ENOSPC;
574 1134
1135alloc:
575 get_node_info(sbi, dn->nid, &ni); 1136 get_node_info(sbi, dn->nid, &ni);
576 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1137 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
577 1138
578 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) 1139 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
579 seg = CURSEG_DIRECT_IO; 1140 seg = CURSEG_DIRECT_IO;
580 1141
581 allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); 1142 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
1143 &sum, seg);
582 1144
583 /* direct IO doesn't use extent cache to maximize the performance */ 1145 /* direct IO doesn't use extent cache to maximize the performance */
584 __set_data_blkaddr(dn); 1146 set_data_blkaddr(dn);
585 1147
586 /* update i_size */ 1148 /* update i_size */
587 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 1149 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -615,7 +1177,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
615 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1177 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
616 1178
617 while (dn.ofs_in_node < end_offset && len) { 1179 while (dn.ofs_in_node < end_offset && len) {
618 if (dn.data_blkaddr == NULL_ADDR) { 1180 block_t blkaddr;
1181
1182 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
1183 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
619 if (__allocate_data_block(&dn)) 1184 if (__allocate_data_block(&dn))
620 goto sync_out; 1185 goto sync_out;
621 allocated = true; 1186 allocated = true;
@@ -659,13 +1224,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
659 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 1224 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
660 pgoff_t pgofs, end_offset; 1225 pgoff_t pgofs, end_offset;
661 int err = 0, ofs = 1; 1226 int err = 0, ofs = 1;
1227 struct extent_info ei;
662 bool allocated = false; 1228 bool allocated = false;
663 1229
664 /* Get the page offset from the block offset(iblock) */ 1230 /* Get the page offset from the block offset(iblock) */
665 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 1231 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
666 1232
667 if (check_extent_cache(inode, pgofs, bh_result)) 1233 if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1234 f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result);
668 goto out; 1235 goto out;
1236 }
669 1237
670 if (create) 1238 if (create)
671 f2fs_lock_op(F2FS_I_SB(inode)); 1239 f2fs_lock_op(F2FS_I_SB(inode));
@@ -682,7 +1250,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
682 goto put_out; 1250 goto put_out;
683 1251
684 if (dn.data_blkaddr != NULL_ADDR) { 1252 if (dn.data_blkaddr != NULL_ADDR) {
685 set_buffer_new(bh_result); 1253 clear_buffer_new(bh_result);
686 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 1254 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
687 } else if (create) { 1255 } else if (create) {
688 err = __allocate_data_block(&dn); 1256 err = __allocate_data_block(&dn);
@@ -727,6 +1295,7 @@ get_next:
727 if (err) 1295 if (err)
728 goto sync_out; 1296 goto sync_out;
729 allocated = true; 1297 allocated = true;
1298 set_buffer_new(bh_result);
730 blkaddr = dn.data_blkaddr; 1299 blkaddr = dn.data_blkaddr;
731 } 1300 }
732 /* Give more consecutive addresses for the readahead */ 1301 /* Give more consecutive addresses for the readahead */
@@ -813,8 +1382,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
813 fio->blk_addr = dn.data_blkaddr; 1382 fio->blk_addr = dn.data_blkaddr;
814 1383
815 /* This page is already truncated */ 1384 /* This page is already truncated */
816 if (fio->blk_addr == NULL_ADDR) 1385 if (fio->blk_addr == NULL_ADDR) {
1386 ClearPageUptodate(page);
817 goto out_writepage; 1387 goto out_writepage;
1388 }
818 1389
819 set_page_writeback(page); 1390 set_page_writeback(page);
820 1391
@@ -827,10 +1398,15 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
827 need_inplace_update(inode))) { 1398 need_inplace_update(inode))) {
828 rewrite_data_page(page, fio); 1399 rewrite_data_page(page, fio);
829 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1400 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1401 trace_f2fs_do_write_data_page(page, IPU);
830 } else { 1402 } else {
831 write_data_page(page, &dn, fio); 1403 write_data_page(page, &dn, fio);
832 update_extent_cache(&dn); 1404 set_data_blkaddr(&dn);
1405 f2fs_update_extent_cache(&dn);
1406 trace_f2fs_do_write_data_page(page, OPU);
833 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 1407 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1408 if (page->index == 0)
1409 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
834 } 1410 }
835out_writepage: 1411out_writepage:
836 f2fs_put_dnode(&dn); 1412 f2fs_put_dnode(&dn);
@@ -909,6 +1485,8 @@ done:
909 clear_cold_data(page); 1485 clear_cold_data(page);
910out: 1486out:
911 inode_dec_dirty_pages(inode); 1487 inode_dec_dirty_pages(inode);
1488 if (err)
1489 ClearPageUptodate(page);
912 unlock_page(page); 1490 unlock_page(page);
913 if (need_balance_fs) 1491 if (need_balance_fs)
914 f2fs_balance_fs(sbi); 1492 f2fs_balance_fs(sbi);
@@ -935,7 +1513,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
935{ 1513{
936 struct inode *inode = mapping->host; 1514 struct inode *inode = mapping->host;
937 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
938 bool locked = false;
939 int ret; 1516 int ret;
940 long diff; 1517 long diff;
941 1518
@@ -950,15 +1527,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
950 available_free_memory(sbi, DIRTY_DENTS)) 1527 available_free_memory(sbi, DIRTY_DENTS))
951 goto skip_write; 1528 goto skip_write;
952 1529
1530 /* during POR, we don't need to trigger writepage at all. */
1531 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1532 goto skip_write;
1533
953 diff = nr_pages_to_write(sbi, DATA, wbc); 1534 diff = nr_pages_to_write(sbi, DATA, wbc);
954 1535
955 if (!S_ISDIR(inode->i_mode)) {
956 mutex_lock(&sbi->writepages);
957 locked = true;
958 }
959 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1536 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
960 if (locked)
961 mutex_unlock(&sbi->writepages);
962 1537
963 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1538 f2fs_submit_merged_bio(sbi, DATA, WRITE);
964 1539
@@ -1118,12 +1693,12 @@ static int f2fs_write_end(struct file *file,
1118 return copied; 1693 return copied;
1119} 1694}
1120 1695
1121static int check_direct_IO(struct inode *inode, int rw, 1696static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
1122 struct iov_iter *iter, loff_t offset) 1697 loff_t offset)
1123{ 1698{
1124 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 1699 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1125 1700
1126 if (rw == READ) 1701 if (iov_iter_rw(iter) == READ)
1127 return 0; 1702 return 0;
1128 1703
1129 if (offset & blocksize_mask) 1704 if (offset & blocksize_mask)
@@ -1135,8 +1710,8 @@ static int check_direct_IO(struct inode *inode, int rw,
1135 return 0; 1710 return 0;
1136} 1711}
1137 1712
1138static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 1713static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1139 struct iov_iter *iter, loff_t offset) 1714 loff_t offset)
1140{ 1715{
1141 struct file *file = iocb->ki_filp; 1716 struct file *file = iocb->ki_filp;
1142 struct address_space *mapping = file->f_mapping; 1717 struct address_space *mapping = file->f_mapping;
@@ -1151,19 +1726,19 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1151 return err; 1726 return err;
1152 } 1727 }
1153 1728
1154 if (check_direct_IO(inode, rw, iter, offset)) 1729 if (check_direct_IO(inode, iter, offset))
1155 return 0; 1730 return 0;
1156 1731
1157 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1732 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1158 1733
1159 if (rw & WRITE) 1734 if (iov_iter_rw(iter) == WRITE)
1160 __allocate_data_blocks(inode, offset, count); 1735 __allocate_data_blocks(inode, offset, count);
1161 1736
1162 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); 1737 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
1163 if (err < 0 && (rw & WRITE)) 1738 if (err < 0 && iov_iter_rw(iter) == WRITE)
1164 f2fs_write_failed(mapping, offset + count); 1739 f2fs_write_failed(mapping, offset + count);
1165 1740
1166 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); 1741 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1167 1742
1168 return err; 1743 return err;
1169} 1744}
@@ -1236,6 +1811,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1236 return generic_block_bmap(mapping, block, get_data_block); 1811 return generic_block_bmap(mapping, block, get_data_block);
1237} 1812}
1238 1813
1814void init_extent_cache_info(struct f2fs_sb_info *sbi)
1815{
1816 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
1817 init_rwsem(&sbi->extent_tree_lock);
1818 INIT_LIST_HEAD(&sbi->extent_list);
1819 spin_lock_init(&sbi->extent_lock);
1820 sbi->total_ext_tree = 0;
1821 atomic_set(&sbi->total_ext_node, 0);
1822}
1823
1824int __init create_extent_cache(void)
1825{
1826 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1827 sizeof(struct extent_tree));
1828 if (!extent_tree_slab)
1829 return -ENOMEM;
1830 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1831 sizeof(struct extent_node));
1832 if (!extent_node_slab) {
1833 kmem_cache_destroy(extent_tree_slab);
1834 return -ENOMEM;
1835 }
1836 return 0;
1837}
1838
1839void destroy_extent_cache(void)
1840{
1841 kmem_cache_destroy(extent_node_slab);
1842 kmem_cache_destroy(extent_tree_slab);
1843}
1844
1239const struct address_space_operations f2fs_dblock_aops = { 1845const struct address_space_operations f2fs_dblock_aops = {
1240 .readpage = f2fs_read_data_page, 1846 .readpage = f2fs_read_data_page,
1241 .readpages = f2fs_read_data_pages, 1847 .readpages = f2fs_read_data_pages,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index e671373cc8ab..f5388f37217e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
35 /* validation check of the segment numbers */ 35 /* validation check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext; 36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext; 37 si->total_ext = sbi->total_hit_ext;
38 si->ext_tree = sbi->total_ext_tree;
39 si->ext_node = atomic_read(&sbi->total_ext_node);
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 40 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
39 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); 41 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
40 si->ndirty_dirs = sbi->n_dirty_dirs; 42 si->ndirty_dirs = sbi->n_dirty_dirs;
@@ -185,6 +187,9 @@ get_cache:
185 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); 187 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
186 for (i = 0; i <= UPDATE_INO; i++) 188 for (i = 0; i <= UPDATE_INO; i++)
187 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 189 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
190 si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
191 si->cache_mem += atomic_read(&sbi->total_ext_node) *
192 sizeof(struct extent_node);
188 193
189 si->page_mem = 0; 194 si->page_mem = 0;
190 npages = NODE_MAPPING(sbi)->nrpages; 195 npages = NODE_MAPPING(sbi)->nrpages;
@@ -260,13 +265,20 @@ static int stat_show(struct seq_file *s, void *v)
260 seq_printf(s, "CP calls: %d\n", si->cp_count); 265 seq_printf(s, "CP calls: %d\n", si->cp_count);
261 seq_printf(s, "GC calls: %d (BG: %d)\n", 266 seq_printf(s, "GC calls: %d (BG: %d)\n",
262 si->call_count, si->bg_gc); 267 si->call_count, si->bg_gc);
263 seq_printf(s, " - data segments : %d\n", si->data_segs); 268 seq_printf(s, " - data segments : %d (%d)\n",
264 seq_printf(s, " - node segments : %d\n", si->node_segs); 269 si->data_segs, si->bg_data_segs);
265 seq_printf(s, "Try to move %d blocks\n", si->tot_blks); 270 seq_printf(s, " - node segments : %d (%d)\n",
266 seq_printf(s, " - data blocks : %d\n", si->data_blks); 271 si->node_segs, si->bg_node_segs);
267 seq_printf(s, " - node blocks : %d\n", si->node_blks); 272 seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
273 si->bg_data_blks + si->bg_node_blks);
274 seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks,
275 si->bg_data_blks);
276 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
277 si->bg_node_blks);
268 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 278 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
269 si->hit_ext, si->total_ext); 279 si->hit_ext, si->total_ext);
280 seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree);
281 seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node);
270 seq_puts(s, "\nBalancing F2FS Async:\n"); 282 seq_puts(s, "\nBalancing F2FS Async:\n");
271 seq_printf(s, " - inmem: %4d, wb: %4d\n", 283 seq_printf(s, " - inmem: %4d, wb: %4d\n",
272 si->inmem_pages, si->wb_pages); 284 si->inmem_pages, si->wb_pages);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b74097a7f6d9..3a3302ab7871 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -59,9 +59,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
59 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, 59 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
60}; 60};
61 61
62void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) 62void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
63{ 63{
64 umode_t mode = inode->i_mode;
65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
66} 65}
67 66
@@ -127,22 +126,19 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
127 *max_slots = 0; 126 *max_slots = 0;
128 while (bit_pos < d->max) { 127 while (bit_pos < d->max) {
129 if (!test_bit_le(bit_pos, d->bitmap)) { 128 if (!test_bit_le(bit_pos, d->bitmap)) {
130 if (bit_pos == 0)
131 max_len = 1;
132 else if (!test_bit_le(bit_pos - 1, d->bitmap))
133 max_len++;
134 bit_pos++; 129 bit_pos++;
130 max_len++;
135 continue; 131 continue;
136 } 132 }
133
137 de = &d->dentry[bit_pos]; 134 de = &d->dentry[bit_pos];
138 if (early_match_name(name->len, namehash, de) && 135 if (early_match_name(name->len, namehash, de) &&
139 !memcmp(d->filename[bit_pos], name->name, name->len)) 136 !memcmp(d->filename[bit_pos], name->name, name->len))
140 goto found; 137 goto found;
141 138
142 if (max_slots && *max_slots >= 0 && max_len > *max_slots) { 139 if (max_slots && max_len > *max_slots)
143 *max_slots = max_len; 140 *max_slots = max_len;
144 max_len = 0; 141 max_len = 0;
145 }
146 142
147 /* remain bug on condition */ 143 /* remain bug on condition */
148 if (unlikely(!de->name_len)) 144 if (unlikely(!de->name_len))
@@ -219,14 +215,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
219 unsigned int max_depth; 215 unsigned int max_depth;
220 unsigned int level; 216 unsigned int level;
221 217
218 *res_page = NULL;
219
222 if (f2fs_has_inline_dentry(dir)) 220 if (f2fs_has_inline_dentry(dir))
223 return find_in_inline_dir(dir, child, res_page); 221 return find_in_inline_dir(dir, child, res_page);
224 222
225 if (npages == 0) 223 if (npages == 0)
226 return NULL; 224 return NULL;
227 225
228 *res_page = NULL;
229
230 name_hash = f2fs_dentry_hash(child); 226 name_hash = f2fs_dentry_hash(child);
231 max_depth = F2FS_I(dir)->i_current_depth; 227 max_depth = F2FS_I(dir)->i_current_depth;
232 228
@@ -285,7 +281,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
285 lock_page(page); 281 lock_page(page);
286 f2fs_wait_on_page_writeback(page, type); 282 f2fs_wait_on_page_writeback(page, type);
287 de->ino = cpu_to_le32(inode->i_ino); 283 de->ino = cpu_to_le32(inode->i_ino);
288 set_de_type(de, inode); 284 set_de_type(de, inode->i_mode);
289 f2fs_dentry_kunmap(dir, page); 285 f2fs_dentry_kunmap(dir, page);
290 set_page_dirty(page); 286 set_page_dirty(page);
291 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 287 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
@@ -331,14 +327,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent,
331 de->hash_code = 0; 327 de->hash_code = 0;
332 de->ino = cpu_to_le32(inode->i_ino); 328 de->ino = cpu_to_le32(inode->i_ino);
333 memcpy(d->filename[0], ".", 1); 329 memcpy(d->filename[0], ".", 1);
334 set_de_type(de, inode); 330 set_de_type(de, inode->i_mode);
335 331
336 de = &d->dentry[1]; 332 de = &d->dentry[1];
337 de->hash_code = 0; 333 de->hash_code = 0;
338 de->name_len = cpu_to_le16(2); 334 de->name_len = cpu_to_le16(2);
339 de->ino = cpu_to_le32(parent->i_ino); 335 de->ino = cpu_to_le32(parent->i_ino);
340 memcpy(d->filename[1], "..", 2); 336 memcpy(d->filename[1], "..", 2);
341 set_de_type(de, inode); 337 set_de_type(de, parent->i_mode);
342 338
343 test_and_set_bit_le(0, (void *)d->bitmap); 339 test_and_set_bit_le(0, (void *)d->bitmap);
344 test_and_set_bit_le(1, (void *)d->bitmap); 340 test_and_set_bit_le(1, (void *)d->bitmap);
@@ -435,7 +431,7 @@ error:
435void update_parent_metadata(struct inode *dir, struct inode *inode, 431void update_parent_metadata(struct inode *dir, struct inode *inode,
436 unsigned int current_depth) 432 unsigned int current_depth)
437{ 433{
438 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 434 if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
439 if (S_ISDIR(inode->i_mode)) { 435 if (S_ISDIR(inode->i_mode)) {
440 inc_nlink(dir); 436 inc_nlink(dir);
441 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 437 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -450,7 +446,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
450 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 446 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
451 } 447 }
452 448
453 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 449 if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
454 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 450 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
455} 451}
456 452
@@ -474,30 +470,47 @@ next:
474 goto next; 470 goto next;
475} 471}
476 472
473void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
474 const struct qstr *name, f2fs_hash_t name_hash,
475 unsigned int bit_pos)
476{
477 struct f2fs_dir_entry *de;
478 int slots = GET_DENTRY_SLOTS(name->len);
479 int i;
480
481 de = &d->dentry[bit_pos];
482 de->hash_code = name_hash;
483 de->name_len = cpu_to_le16(name->len);
484 memcpy(d->filename[bit_pos], name->name, name->len);
485 de->ino = cpu_to_le32(ino);
486 set_de_type(de, mode);
487 for (i = 0; i < slots; i++)
488 test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
489}
490
477/* 491/*
478 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 492 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
479 * f2fs_unlock_op(). 493 * f2fs_unlock_op().
480 */ 494 */
481int __f2fs_add_link(struct inode *dir, const struct qstr *name, 495int __f2fs_add_link(struct inode *dir, const struct qstr *name,
482 struct inode *inode) 496 struct inode *inode, nid_t ino, umode_t mode)
483{ 497{
484 unsigned int bit_pos; 498 unsigned int bit_pos;
485 unsigned int level; 499 unsigned int level;
486 unsigned int current_depth; 500 unsigned int current_depth;
487 unsigned long bidx, block; 501 unsigned long bidx, block;
488 f2fs_hash_t dentry_hash; 502 f2fs_hash_t dentry_hash;
489 struct f2fs_dir_entry *de;
490 unsigned int nbucket, nblock; 503 unsigned int nbucket, nblock;
491 size_t namelen = name->len; 504 size_t namelen = name->len;
492 struct page *dentry_page = NULL; 505 struct page *dentry_page = NULL;
493 struct f2fs_dentry_block *dentry_blk = NULL; 506 struct f2fs_dentry_block *dentry_blk = NULL;
507 struct f2fs_dentry_ptr d;
494 int slots = GET_DENTRY_SLOTS(namelen); 508 int slots = GET_DENTRY_SLOTS(namelen);
495 struct page *page; 509 struct page *page = NULL;
496 int err = 0; 510 int err = 0;
497 int i;
498 511
499 if (f2fs_has_inline_dentry(dir)) { 512 if (f2fs_has_inline_dentry(dir)) {
500 err = f2fs_add_inline_entry(dir, name, inode); 513 err = f2fs_add_inline_entry(dir, name, inode, ino, mode);
501 if (!err || err != -EAGAIN) 514 if (!err || err != -EAGAIN)
502 return err; 515 return err;
503 else 516 else
@@ -547,30 +560,31 @@ start:
547add_dentry: 560add_dentry:
548 f2fs_wait_on_page_writeback(dentry_page, DATA); 561 f2fs_wait_on_page_writeback(dentry_page, DATA);
549 562
550 down_write(&F2FS_I(inode)->i_sem); 563 if (inode) {
551 page = init_inode_metadata(inode, dir, name, NULL); 564 down_write(&F2FS_I(inode)->i_sem);
552 if (IS_ERR(page)) { 565 page = init_inode_metadata(inode, dir, name, NULL);
553 err = PTR_ERR(page); 566 if (IS_ERR(page)) {
554 goto fail; 567 err = PTR_ERR(page);
568 goto fail;
569 }
555 } 570 }
556 de = &dentry_blk->dentry[bit_pos]; 571
557 de->hash_code = dentry_hash; 572 make_dentry_ptr(&d, (void *)dentry_blk, 1);
558 de->name_len = cpu_to_le16(namelen); 573 f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos);
559 memcpy(dentry_blk->filename[bit_pos], name->name, name->len); 574
560 de->ino = cpu_to_le32(inode->i_ino);
561 set_de_type(de, inode);
562 for (i = 0; i < slots; i++)
563 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
564 set_page_dirty(dentry_page); 575 set_page_dirty(dentry_page);
565 576
566 /* we don't need to mark_inode_dirty now */ 577 if (inode) {
567 F2FS_I(inode)->i_pino = dir->i_ino; 578 /* we don't need to mark_inode_dirty now */
568 update_inode(inode, page); 579 F2FS_I(inode)->i_pino = dir->i_ino;
569 f2fs_put_page(page, 1); 580 update_inode(inode, page);
581 f2fs_put_page(page, 1);
582 }
570 583
571 update_parent_metadata(dir, inode, current_depth); 584 update_parent_metadata(dir, inode, current_depth);
572fail: 585fail:
573 up_write(&F2FS_I(inode)->i_sem); 586 if (inode)
587 up_write(&F2FS_I(inode)->i_sem);
574 588
575 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { 589 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
576 update_inode_page(dir); 590 update_inode_page(dir);
@@ -669,6 +683,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
669 if (bit_pos == NR_DENTRY_IN_BLOCK) { 683 if (bit_pos == NR_DENTRY_IN_BLOCK) {
670 truncate_hole(dir, page->index, page->index + 1); 684 truncate_hole(dir, page->index, page->index + 1);
671 clear_page_dirty_for_io(page); 685 clear_page_dirty_for_io(page);
686 ClearPagePrivate(page);
672 ClearPageUptodate(page); 687 ClearPageUptodate(page);
673 inode_dec_dirty_pages(dir); 688 inode_dec_dirty_pages(dir);
674 } 689 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7fa3313ab0e2..c06a25e5cec3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -50,6 +50,7 @@
50#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 50#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
51#define F2FS_MOUNT_NOBARRIER 0x00000800 51#define F2FS_MOUNT_NOBARRIER 0x00000800
52#define F2FS_MOUNT_FASTBOOT 0x00001000 52#define F2FS_MOUNT_FASTBOOT 0x00001000
53#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
53 54
54#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 55#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
55#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 56#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -102,6 +103,7 @@ enum {
102 CP_UMOUNT, 103 CP_UMOUNT,
103 CP_FASTBOOT, 104 CP_FASTBOOT,
104 CP_SYNC, 105 CP_SYNC,
106 CP_RECOVERY,
105 CP_DISCARD, 107 CP_DISCARD,
106}; 108};
107 109
@@ -216,6 +218,15 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
216#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 218#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
217#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 219#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
218 220
221/*
222 * should be same as XFS_IOC_GOINGDOWN.
223 * Flags for going down operation used by FS_IOC_GOINGDOWN
224 */
225#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */
226#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
227#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
228#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
229
219#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 230#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
220/* 231/*
221 * ioctl commands in 32 bit emulation 232 * ioctl commands in 32 bit emulation
@@ -273,14 +284,34 @@ enum {
273 284
274#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ 285#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
275 286
287/* vector size for gang look-up from extent cache that consists of radix tree */
288#define EXT_TREE_VEC_SIZE 64
289
276/* for in-memory extent cache entry */ 290/* for in-memory extent cache entry */
277#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ 291#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
292
293/* number of extent info in extent cache we try to shrink */
294#define EXTENT_CACHE_SHRINK_NUMBER 128
278 295
279struct extent_info { 296struct extent_info {
280 rwlock_t ext_lock; /* rwlock for consistency */ 297 unsigned int fofs; /* start offset in a file */
281 unsigned int fofs; /* start offset in a file */ 298 u32 blk; /* start block address of the extent */
282 u32 blk_addr; /* start block address of the extent */ 299 unsigned int len; /* length of the extent */
283 unsigned int len; /* length of the extent */ 300};
301
302struct extent_node {
303 struct rb_node rb_node; /* rb node located in rb-tree */
304 struct list_head list; /* node in global extent list of sbi */
305 struct extent_info ei; /* extent info */
306};
307
308struct extent_tree {
309 nid_t ino; /* inode number */
310 struct rb_root root; /* root of extent info rb-tree */
311 struct extent_node *cached_en; /* recently accessed extent node */
312 rwlock_t lock; /* protect extent info rb-tree */
313 atomic_t refcount; /* reference count of rb-tree */
314 unsigned int count; /* # of extent node in rb-tree*/
284}; 315};
285 316
286/* 317/*
@@ -309,6 +340,7 @@ struct f2fs_inode_info {
309 nid_t i_xattr_nid; /* node id that contains xattrs */ 340 nid_t i_xattr_nid; /* node id that contains xattrs */
310 unsigned long long xattr_ver; /* cp version of xattr modification */ 341 unsigned long long xattr_ver; /* cp version of xattr modification */
311 struct extent_info ext; /* in-memory extent cache entry */ 342 struct extent_info ext; /* in-memory extent cache entry */
343 rwlock_t ext_lock; /* rwlock for single extent cache */
312 struct inode_entry *dirty_dir; /* the pointer of dirty dir */ 344 struct inode_entry *dirty_dir; /* the pointer of dirty dir */
313 345
314 struct radix_tree_root inmem_root; /* radix tree for inmem pages */ 346 struct radix_tree_root inmem_root; /* radix tree for inmem pages */
@@ -319,21 +351,51 @@ struct f2fs_inode_info {
319static inline void get_extent_info(struct extent_info *ext, 351static inline void get_extent_info(struct extent_info *ext,
320 struct f2fs_extent i_ext) 352 struct f2fs_extent i_ext)
321{ 353{
322 write_lock(&ext->ext_lock);
323 ext->fofs = le32_to_cpu(i_ext.fofs); 354 ext->fofs = le32_to_cpu(i_ext.fofs);
324 ext->blk_addr = le32_to_cpu(i_ext.blk_addr); 355 ext->blk = le32_to_cpu(i_ext.blk);
325 ext->len = le32_to_cpu(i_ext.len); 356 ext->len = le32_to_cpu(i_ext.len);
326 write_unlock(&ext->ext_lock);
327} 357}
328 358
329static inline void set_raw_extent(struct extent_info *ext, 359static inline void set_raw_extent(struct extent_info *ext,
330 struct f2fs_extent *i_ext) 360 struct f2fs_extent *i_ext)
331{ 361{
332 read_lock(&ext->ext_lock);
333 i_ext->fofs = cpu_to_le32(ext->fofs); 362 i_ext->fofs = cpu_to_le32(ext->fofs);
334 i_ext->blk_addr = cpu_to_le32(ext->blk_addr); 363 i_ext->blk = cpu_to_le32(ext->blk);
335 i_ext->len = cpu_to_le32(ext->len); 364 i_ext->len = cpu_to_le32(ext->len);
336 read_unlock(&ext->ext_lock); 365}
366
367static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
368 u32 blk, unsigned int len)
369{
370 ei->fofs = fofs;
371 ei->blk = blk;
372 ei->len = len;
373}
374
375static inline bool __is_extent_same(struct extent_info *ei1,
376 struct extent_info *ei2)
377{
378 return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
379 ei1->len == ei2->len);
380}
381
382static inline bool __is_extent_mergeable(struct extent_info *back,
383 struct extent_info *front)
384{
385 return (back->fofs + back->len == front->fofs &&
386 back->blk + back->len == front->blk);
387}
388
389static inline bool __is_back_mergeable(struct extent_info *cur,
390 struct extent_info *back)
391{
392 return __is_extent_mergeable(back, cur);
393}
394
395static inline bool __is_front_mergeable(struct extent_info *cur,
396 struct extent_info *front)
397{
398 return __is_extent_mergeable(cur, front);
337} 399}
338 400
339struct f2fs_nm_info { 401struct f2fs_nm_info {
@@ -502,6 +564,10 @@ enum page_type {
502 META, 564 META,
503 NR_PAGE_TYPE, 565 NR_PAGE_TYPE,
504 META_FLUSH, 566 META_FLUSH,
567 INMEM, /* the below types are used by tracepoints only. */
568 INMEM_DROP,
569 IPU,
570 OPU,
505}; 571};
506 572
507struct f2fs_io_info { 573struct f2fs_io_info {
@@ -559,7 +625,6 @@ struct f2fs_sb_info {
559 struct mutex cp_mutex; /* checkpoint procedure lock */ 625 struct mutex cp_mutex; /* checkpoint procedure lock */
560 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 626 struct rw_semaphore cp_rwsem; /* blocking FS operations */
561 struct rw_semaphore node_write; /* locking node writes */ 627 struct rw_semaphore node_write; /* locking node writes */
562 struct mutex writepages; /* mutex for writepages() */
563 wait_queue_head_t cp_wait; 628 wait_queue_head_t cp_wait;
564 629
565 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 630 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@@ -571,6 +636,14 @@ struct f2fs_sb_info {
571 struct list_head dir_inode_list; /* dir inode list */ 636 struct list_head dir_inode_list; /* dir inode list */
572 spinlock_t dir_inode_lock; /* for dir inode list lock */ 637 spinlock_t dir_inode_lock; /* for dir inode list lock */
573 638
639 /* for extent tree cache */
640 struct radix_tree_root extent_tree_root;/* cache extent cache entries */
641 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
642 struct list_head extent_list; /* lru list for shrinker */
643 spinlock_t extent_lock; /* locking extent lru list */
644 int total_ext_tree; /* extent tree count */
645 atomic_t total_ext_node; /* extent info count */
646
574 /* basic filesystem units */ 647 /* basic filesystem units */
575 unsigned int log_sectors_per_block; /* log2 sectors per block */ 648 unsigned int log_sectors_per_block; /* log2 sectors per block */
576 unsigned int log_blocksize; /* log2 block size */ 649 unsigned int log_blocksize; /* log2 block size */
@@ -920,12 +993,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
920 return 0; 993 return 0;
921} 994}
922 995
996static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
997{
998 return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
999}
1000
923static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) 1001static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
924{ 1002{
925 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1003 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
926 int offset; 1004 int offset;
927 1005
928 if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { 1006 if (__cp_payload(sbi) > 0) {
929 if (flag == NAT_BITMAP) 1007 if (flag == NAT_BITMAP)
930 return &ckpt->sit_nat_version_bitmap; 1008 return &ckpt->sit_nat_version_bitmap;
931 else 1009 else
@@ -1166,8 +1244,10 @@ enum {
1166 FI_NEED_IPU, /* used for ipu per file */ 1244 FI_NEED_IPU, /* used for ipu per file */
1167 FI_ATOMIC_FILE, /* indicate atomic file */ 1245 FI_ATOMIC_FILE, /* indicate atomic file */
1168 FI_VOLATILE_FILE, /* indicate volatile file */ 1246 FI_VOLATILE_FILE, /* indicate volatile file */
1247 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
1169 FI_DROP_CACHE, /* drop dirty page cache */ 1248 FI_DROP_CACHE, /* drop dirty page cache */
1170 FI_DATA_EXIST, /* indicate data exists */ 1249 FI_DATA_EXIST, /* indicate data exists */
1250 FI_INLINE_DOTS, /* indicate inline dot dentries */
1171}; 1251};
1172 1252
1173static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1253static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1204,6 +1284,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
1204 set_inode_flag(fi, FI_INLINE_DENTRY); 1284 set_inode_flag(fi, FI_INLINE_DENTRY);
1205 if (ri->i_inline & F2FS_DATA_EXIST) 1285 if (ri->i_inline & F2FS_DATA_EXIST)
1206 set_inode_flag(fi, FI_DATA_EXIST); 1286 set_inode_flag(fi, FI_DATA_EXIST);
1287 if (ri->i_inline & F2FS_INLINE_DOTS)
1288 set_inode_flag(fi, FI_INLINE_DOTS);
1207} 1289}
1208 1290
1209static inline void set_raw_inline(struct f2fs_inode_info *fi, 1291static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1219,6 +1301,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
1219 ri->i_inline |= F2FS_INLINE_DENTRY; 1301 ri->i_inline |= F2FS_INLINE_DENTRY;
1220 if (is_inode_flag_set(fi, FI_DATA_EXIST)) 1302 if (is_inode_flag_set(fi, FI_DATA_EXIST))
1221 ri->i_inline |= F2FS_DATA_EXIST; 1303 ri->i_inline |= F2FS_DATA_EXIST;
1304 if (is_inode_flag_set(fi, FI_INLINE_DOTS))
1305 ri->i_inline |= F2FS_INLINE_DOTS;
1222} 1306}
1223 1307
1224static inline int f2fs_has_inline_xattr(struct inode *inode) 1308static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1264,6 +1348,11 @@ static inline int f2fs_exist_data(struct inode *inode)
1264 return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); 1348 return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
1265} 1349}
1266 1350
1351static inline int f2fs_has_inline_dots(struct inode *inode)
1352{
1353 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS);
1354}
1355
1267static inline bool f2fs_is_atomic_file(struct inode *inode) 1356static inline bool f2fs_is_atomic_file(struct inode *inode)
1268{ 1357{
1269 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); 1358 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1274,6 +1363,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode)
1274 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); 1363 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
1275} 1364}
1276 1365
1366static inline bool f2fs_is_first_block_written(struct inode *inode)
1367{
1368 return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1369}
1370
1277static inline bool f2fs_is_drop_cache(struct inode *inode) 1371static inline bool f2fs_is_drop_cache(struct inode *inode)
1278{ 1372{
1279 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); 1373 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
@@ -1290,12 +1384,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode)
1290 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); 1384 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
1291} 1385}
1292 1386
1293static inline void *inline_dentry_addr(struct page *page)
1294{
1295 struct f2fs_inode *ri = F2FS_INODE(page);
1296 return (void *)&(ri->i_addr[1]);
1297}
1298
1299static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) 1387static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
1300{ 1388{
1301 if (!f2fs_has_inline_dentry(dir)) 1389 if (!f2fs_has_inline_dentry(dir))
@@ -1363,7 +1451,7 @@ struct dentry *f2fs_get_parent(struct dentry *child);
1363 * dir.c 1451 * dir.c
1364 */ 1452 */
1365extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; 1453extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
1366void set_de_type(struct f2fs_dir_entry *, struct inode *); 1454void set_de_type(struct f2fs_dir_entry *, umode_t);
1367struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, 1455struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
1368 struct f2fs_dentry_ptr *); 1456 struct f2fs_dentry_ptr *);
1369bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 1457bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
@@ -1382,7 +1470,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
1382void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 1470void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1383 struct page *, struct inode *); 1471 struct page *, struct inode *);
1384int update_dent_inode(struct inode *, const struct qstr *); 1472int update_dent_inode(struct inode *, const struct qstr *);
1385int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1473void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
1474 const struct qstr *, f2fs_hash_t , unsigned int);
1475int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
1476 umode_t);
1386void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, 1477void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
1387 struct inode *); 1478 struct inode *);
1388int f2fs_do_tmpfile(struct inode *, struct inode *); 1479int f2fs_do_tmpfile(struct inode *, struct inode *);
@@ -1392,7 +1483,7 @@ bool f2fs_empty_dir(struct inode *);
1392static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) 1483static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
1393{ 1484{
1394 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, 1485 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
1395 inode); 1486 inode, inode->i_ino, inode->i_mode);
1396} 1487}
1397 1488
1398/* 1489/*
@@ -1519,14 +1610,22 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
1519 struct f2fs_io_info *); 1610 struct f2fs_io_info *);
1520void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, 1611void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
1521 struct f2fs_io_info *); 1612 struct f2fs_io_info *);
1613void set_data_blkaddr(struct dnode_of_data *);
1522int reserve_new_block(struct dnode_of_data *); 1614int reserve_new_block(struct dnode_of_data *);
1523int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1615int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
1524void update_extent_cache(struct dnode_of_data *); 1616void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
1617void f2fs_destroy_extent_tree(struct inode *);
1618void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
1619void f2fs_update_extent_cache(struct dnode_of_data *);
1620void f2fs_preserve_extent_tree(struct inode *);
1525struct page *find_data_page(struct inode *, pgoff_t, bool); 1621struct page *find_data_page(struct inode *, pgoff_t, bool);
1526struct page *get_lock_data_page(struct inode *, pgoff_t); 1622struct page *get_lock_data_page(struct inode *, pgoff_t);
1527struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1623struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1528int do_write_data_page(struct page *, struct f2fs_io_info *); 1624int do_write_data_page(struct page *, struct f2fs_io_info *);
1529int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1625int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1626void init_extent_cache_info(struct f2fs_sb_info *);
1627int __init create_extent_cache(void);
1628void destroy_extent_cache(void);
1530void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 1629void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
1531int f2fs_release_page(struct page *, gfp_t); 1630int f2fs_release_page(struct page *, gfp_t);
1532 1631
@@ -1554,7 +1653,7 @@ struct f2fs_stat_info {
1554 struct f2fs_sb_info *sbi; 1653 struct f2fs_sb_info *sbi;
1555 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1654 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1556 int main_area_segs, main_area_sections, main_area_zones; 1655 int main_area_segs, main_area_sections, main_area_zones;
1557 int hit_ext, total_ext; 1656 int hit_ext, total_ext, ext_tree, ext_node;
1558 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1657 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1559 int nats, dirty_nats, sits, dirty_sits, fnids; 1658 int nats, dirty_nats, sits, dirty_sits, fnids;
1560 int total_count, utilization; 1659 int total_count, utilization;
@@ -1566,7 +1665,9 @@ struct f2fs_stat_info {
1566 int dirty_count, node_pages, meta_pages; 1665 int dirty_count, node_pages, meta_pages;
1567 int prefree_count, call_count, cp_count; 1666 int prefree_count, call_count, cp_count;
1568 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1667 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1668 int bg_node_segs, bg_data_segs;
1569 int tot_blks, data_blks, node_blks; 1669 int tot_blks, data_blks, node_blks;
1670 int bg_data_blks, bg_node_blks;
1570 int curseg[NR_CURSEG_TYPE]; 1671 int curseg[NR_CURSEG_TYPE];
1571 int cursec[NR_CURSEG_TYPE]; 1672 int cursec[NR_CURSEG_TYPE];
1572 int curzone[NR_CURSEG_TYPE]; 1673 int curzone[NR_CURSEG_TYPE];
@@ -1615,31 +1716,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1615 ((sbi)->block_count[(curseg)->alloc_type]++) 1716 ((sbi)->block_count[(curseg)->alloc_type]++)
1616#define stat_inc_inplace_blocks(sbi) \ 1717#define stat_inc_inplace_blocks(sbi) \
1617 (atomic_inc(&(sbi)->inplace_count)) 1718 (atomic_inc(&(sbi)->inplace_count))
1618#define stat_inc_seg_count(sbi, type) \ 1719#define stat_inc_seg_count(sbi, type, gc_type) \
1619 do { \ 1720 do { \
1620 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1721 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1621 (si)->tot_segs++; \ 1722 (si)->tot_segs++; \
1622 if (type == SUM_TYPE_DATA) \ 1723 if (type == SUM_TYPE_DATA) { \
1623 si->data_segs++; \ 1724 si->data_segs++; \
1624 else \ 1725 si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \
1726 } else { \
1625 si->node_segs++; \ 1727 si->node_segs++; \
1728 si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \
1729 } \
1626 } while (0) 1730 } while (0)
1627 1731
1628#define stat_inc_tot_blk_count(si, blks) \ 1732#define stat_inc_tot_blk_count(si, blks) \
1629 (si->tot_blks += (blks)) 1733 (si->tot_blks += (blks))
1630 1734
1631#define stat_inc_data_blk_count(sbi, blks) \ 1735#define stat_inc_data_blk_count(sbi, blks, gc_type) \
1632 do { \ 1736 do { \
1633 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1737 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1634 stat_inc_tot_blk_count(si, blks); \ 1738 stat_inc_tot_blk_count(si, blks); \
1635 si->data_blks += (blks); \ 1739 si->data_blks += (blks); \
1740 si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \
1636 } while (0) 1741 } while (0)
1637 1742
1638#define stat_inc_node_blk_count(sbi, blks) \ 1743#define stat_inc_node_blk_count(sbi, blks, gc_type) \
1639 do { \ 1744 do { \
1640 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1745 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1641 stat_inc_tot_blk_count(si, blks); \ 1746 stat_inc_tot_blk_count(si, blks); \
1642 si->node_blks += (blks); \ 1747 si->node_blks += (blks); \
1748 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
1643 } while (0) 1749 } while (0)
1644 1750
1645int f2fs_build_stats(struct f2fs_sb_info *); 1751int f2fs_build_stats(struct f2fs_sb_info *);
@@ -1661,10 +1767,10 @@ void f2fs_destroy_root_stats(void);
1661#define stat_inc_seg_type(sbi, curseg) 1767#define stat_inc_seg_type(sbi, curseg)
1662#define stat_inc_block_count(sbi, curseg) 1768#define stat_inc_block_count(sbi, curseg)
1663#define stat_inc_inplace_blocks(sbi) 1769#define stat_inc_inplace_blocks(sbi)
1664#define stat_inc_seg_count(si, type) 1770#define stat_inc_seg_count(sbi, type, gc_type)
1665#define stat_inc_tot_blk_count(si, blks) 1771#define stat_inc_tot_blk_count(si, blks)
1666#define stat_inc_data_blk_count(si, blks) 1772#define stat_inc_data_blk_count(sbi, blks, gc_type)
1667#define stat_inc_node_blk_count(sbi, blks) 1773#define stat_inc_node_blk_count(sbi, blks, gc_type)
1668 1774
1669static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 1775static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1670static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 1776static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
@@ -1688,6 +1794,7 @@ extern struct kmem_cache *inode_entry_slab;
1688 */ 1794 */
1689bool f2fs_may_inline(struct inode *); 1795bool f2fs_may_inline(struct inode *);
1690void read_inline_data(struct page *, struct page *); 1796void read_inline_data(struct page *, struct page *);
1797bool truncate_inline_inode(struct page *, u64);
1691int f2fs_read_inline_data(struct inode *, struct page *); 1798int f2fs_read_inline_data(struct inode *, struct page *);
1692int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); 1799int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
1693int f2fs_convert_inline_inode(struct inode *); 1800int f2fs_convert_inline_inode(struct inode *);
@@ -1697,7 +1804,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
1697 struct page **); 1804 struct page **);
1698struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); 1805struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
1699int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); 1806int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
1700int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); 1807int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
1808 nid_t, umode_t);
1701void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, 1809void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
1702 struct inode *, struct inode *); 1810 struct inode *, struct inode *);
1703bool f2fs_empty_inline_dir(struct inode *); 1811bool f2fs_empty_inline_dir(struct inode *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 98dac27bc3f7..a6f3f6186588 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -241,6 +241,8 @@ go_write:
241 * will be used only for fsynced inodes after checkpoint. 241 * will be used only for fsynced inodes after checkpoint.
242 */ 242 */
243 try_to_fix_pino(inode); 243 try_to_fix_pino(inode);
244 clear_inode_flag(fi, FI_APPEND_WRITE);
245 clear_inode_flag(fi, FI_UPDATE_WRITE);
244 goto out; 246 goto out;
245 } 247 }
246sync_nodes: 248sync_nodes:
@@ -433,8 +435,12 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
433 continue; 435 continue;
434 436
435 dn->data_blkaddr = NULL_ADDR; 437 dn->data_blkaddr = NULL_ADDR;
436 update_extent_cache(dn); 438 set_data_blkaddr(dn);
439 f2fs_update_extent_cache(dn);
437 invalidate_blocks(sbi, blkaddr); 440 invalidate_blocks(sbi, blkaddr);
441 if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
442 clear_inode_flag(F2FS_I(dn->inode),
443 FI_FIRST_BLOCK_WRITTEN);
438 nr_free++; 444 nr_free++;
439 } 445 }
440 if (nr_free) { 446 if (nr_free) {
@@ -454,15 +460,16 @@ void truncate_data_blocks(struct dnode_of_data *dn)
454 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); 460 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
455} 461}
456 462
457static int truncate_partial_data_page(struct inode *inode, u64 from) 463static int truncate_partial_data_page(struct inode *inode, u64 from,
464 bool force)
458{ 465{
459 unsigned offset = from & (PAGE_CACHE_SIZE - 1); 466 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
460 struct page *page; 467 struct page *page;
461 468
462 if (!offset) 469 if (!offset && !force)
463 return 0; 470 return 0;
464 471
465 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); 472 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force);
466 if (IS_ERR(page)) 473 if (IS_ERR(page))
467 return 0; 474 return 0;
468 475
@@ -473,7 +480,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from)
473 480
474 f2fs_wait_on_page_writeback(page, DATA); 481 f2fs_wait_on_page_writeback(page, DATA);
475 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 482 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
476 set_page_dirty(page); 483 if (!force)
484 set_page_dirty(page);
477out: 485out:
478 f2fs_put_page(page, 1); 486 f2fs_put_page(page, 1);
479 return 0; 487 return 0;
@@ -487,6 +495,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
487 pgoff_t free_from; 495 pgoff_t free_from;
488 int count = 0, err = 0; 496 int count = 0, err = 0;
489 struct page *ipage; 497 struct page *ipage;
498 bool truncate_page = false;
490 499
491 trace_f2fs_truncate_blocks_enter(inode, from); 500 trace_f2fs_truncate_blocks_enter(inode, from);
492 501
@@ -502,7 +511,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
502 } 511 }
503 512
504 if (f2fs_has_inline_data(inode)) { 513 if (f2fs_has_inline_data(inode)) {
514 if (truncate_inline_inode(ipage, from))
515 set_page_dirty(ipage);
505 f2fs_put_page(ipage, 1); 516 f2fs_put_page(ipage, 1);
517 truncate_page = true;
506 goto out; 518 goto out;
507 } 519 }
508 520
@@ -533,7 +545,7 @@ out:
533 545
534 /* lastly zero out the first data page */ 546 /* lastly zero out the first data page */
535 if (!err) 547 if (!err)
536 err = truncate_partial_data_page(inode, from); 548 err = truncate_partial_data_page(inode, from, truncate_page);
537 549
538 trace_f2fs_truncate_blocks_exit(inode, err); 550 trace_f2fs_truncate_blocks_exit(inode, err);
539 return err; 551 return err;
@@ -997,6 +1009,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
997 if (!f2fs_is_volatile_file(inode)) 1009 if (!f2fs_is_volatile_file(inode))
998 return 0; 1010 return 0;
999 1011
1012 if (!f2fs_is_first_block_written(inode))
1013 return truncate_partial_data_page(inode, 0, true);
1014
1000 punch_hole(inode, 0, F2FS_BLKSIZE); 1015 punch_hole(inode, 0, F2FS_BLKSIZE);
1001 return 0; 1016 return 0;
1002} 1017}
@@ -1029,6 +1044,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
1029 return ret; 1044 return ret;
1030} 1045}
1031 1046
1047static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1048{
1049 struct inode *inode = file_inode(filp);
1050 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1051 struct super_block *sb = sbi->sb;
1052 __u32 in;
1053
1054 if (!capable(CAP_SYS_ADMIN))
1055 return -EPERM;
1056
1057 if (get_user(in, (__u32 __user *)arg))
1058 return -EFAULT;
1059
1060 switch (in) {
1061 case F2FS_GOING_DOWN_FULLSYNC:
1062 sb = freeze_bdev(sb->s_bdev);
1063 if (sb && !IS_ERR(sb)) {
1064 f2fs_stop_checkpoint(sbi);
1065 thaw_bdev(sb->s_bdev, sb);
1066 }
1067 break;
1068 case F2FS_GOING_DOWN_METASYNC:
1069 /* do checkpoint only */
1070 f2fs_sync_fs(sb, 1);
1071 f2fs_stop_checkpoint(sbi);
1072 break;
1073 case F2FS_GOING_DOWN_NOSYNC:
1074 f2fs_stop_checkpoint(sbi);
1075 break;
1076 default:
1077 return -EINVAL;
1078 }
1079 return 0;
1080}
1081
1032static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 1082static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1033{ 1083{
1034 struct inode *inode = file_inode(filp); 1084 struct inode *inode = file_inode(filp);
@@ -1078,6 +1128,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1078 return f2fs_ioc_release_volatile_write(filp); 1128 return f2fs_ioc_release_volatile_write(filp);
1079 case F2FS_IOC_ABORT_VOLATILE_WRITE: 1129 case F2FS_IOC_ABORT_VOLATILE_WRITE:
1080 return f2fs_ioc_abort_volatile_write(filp); 1130 return f2fs_ioc_abort_volatile_write(filp);
1131 case F2FS_IOC_SHUTDOWN:
1132 return f2fs_ioc_shutdown(filp, arg);
1081 case FITRIM: 1133 case FITRIM:
1082 return f2fs_ioc_fitrim(filp, arg); 1134 return f2fs_ioc_fitrim(filp, arg);
1083 default: 1135 default:
@@ -1104,8 +1156,6 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1104 1156
1105const struct file_operations f2fs_file_operations = { 1157const struct file_operations f2fs_file_operations = {
1106 .llseek = f2fs_llseek, 1158 .llseek = f2fs_llseek,
1107 .read = new_sync_read,
1108 .write = new_sync_write,
1109 .read_iter = generic_file_read_iter, 1159 .read_iter = generic_file_read_iter,
1110 .write_iter = generic_file_write_iter, 1160 .write_iter = generic_file_write_iter,
1111 .open = generic_file_open, 1161 .open = generic_file_open,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 76adbc3641f1..ed58211fe79b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -435,7 +435,7 @@ next_step:
435 set_page_dirty(node_page); 435 set_page_dirty(node_page);
436 } 436 }
437 f2fs_put_page(node_page, 1); 437 f2fs_put_page(node_page, 1);
438 stat_inc_node_blk_count(sbi, 1); 438 stat_inc_node_blk_count(sbi, 1, gc_type);
439 } 439 }
440 440
441 if (initial) { 441 if (initial) {
@@ -622,7 +622,7 @@ next_step:
622 if (IS_ERR(data_page)) 622 if (IS_ERR(data_page))
623 continue; 623 continue;
624 move_data_page(inode, data_page, gc_type); 624 move_data_page(inode, data_page, gc_type);
625 stat_inc_data_blk_count(sbi, 1); 625 stat_inc_data_blk_count(sbi, 1, gc_type);
626 } 626 }
627 } 627 }
628 628
@@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
680 } 680 }
681 blk_finish_plug(&plug); 681 blk_finish_plug(&plug);
682 682
683 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); 683 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
684 stat_inc_call_count(sbi->stat_info); 684 stat_inc_call_count(sbi->stat_info);
685 685
686 f2fs_put_page(sum_page, 1); 686 f2fs_put_page(sum_page, 1);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1484c00133cd..8140e4f0e538 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode)
21 if (f2fs_is_atomic_file(inode)) 21 if (f2fs_is_atomic_file(inode))
22 return false; 22 return false;
23 23
24 if (!S_ISREG(inode->i_mode)) 24 if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
25 return false; 25 return false;
26 26
27 if (i_size_read(inode) > MAX_INLINE_DATA) 27 if (i_size_read(inode) > MAX_INLINE_DATA)
@@ -50,10 +50,19 @@ void read_inline_data(struct page *page, struct page *ipage)
50 SetPageUptodate(page); 50 SetPageUptodate(page);
51} 51}
52 52
53static void truncate_inline_data(struct page *ipage) 53bool truncate_inline_inode(struct page *ipage, u64 from)
54{ 54{
55 void *addr;
56
57 if (from >= MAX_INLINE_DATA)
58 return false;
59
60 addr = inline_data_addr(ipage);
61
55 f2fs_wait_on_page_writeback(ipage, NODE); 62 f2fs_wait_on_page_writeback(ipage, NODE);
56 memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); 63 memset(addr + from, 0, MAX_INLINE_DATA - from);
64
65 return true;
57} 66}
58 67
59int f2fs_read_inline_data(struct inode *inode, struct page *page) 68int f2fs_read_inline_data(struct inode *inode, struct page *page)
@@ -122,7 +131,8 @@ no_update:
122 set_page_writeback(page); 131 set_page_writeback(page);
123 fio.blk_addr = dn->data_blkaddr; 132 fio.blk_addr = dn->data_blkaddr;
124 write_data_page(page, dn, &fio); 133 write_data_page(page, dn, &fio);
125 update_extent_cache(dn); 134 set_data_blkaddr(dn);
135 f2fs_update_extent_cache(dn);
126 f2fs_wait_on_page_writeback(page, DATA); 136 f2fs_wait_on_page_writeback(page, DATA);
127 if (dirty) 137 if (dirty)
128 inode_dec_dirty_pages(dn->inode); 138 inode_dec_dirty_pages(dn->inode);
@@ -131,7 +141,7 @@ no_update:
131 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); 141 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
132 142
133 /* clear inline data and flag after data writeback */ 143 /* clear inline data and flag after data writeback */
134 truncate_inline_data(dn->inode_page); 144 truncate_inline_inode(dn->inode_page, 0);
135clear_out: 145clear_out:
136 stat_dec_inline_inode(dn->inode); 146 stat_dec_inline_inode(dn->inode);
137 f2fs_clear_inline_inode(dn->inode); 147 f2fs_clear_inline_inode(dn->inode);
@@ -245,7 +255,7 @@ process_inline:
245 if (f2fs_has_inline_data(inode)) { 255 if (f2fs_has_inline_data(inode)) {
246 ipage = get_node_page(sbi, inode->i_ino); 256 ipage = get_node_page(sbi, inode->i_ino);
247 f2fs_bug_on(sbi, IS_ERR(ipage)); 257 f2fs_bug_on(sbi, IS_ERR(ipage));
248 truncate_inline_data(ipage); 258 truncate_inline_inode(ipage, 0);
249 f2fs_clear_inline_inode(inode); 259 f2fs_clear_inline_inode(inode);
250 update_inode(inode, ipage); 260 update_inode(inode, ipage);
251 f2fs_put_page(ipage, 1); 261 f2fs_put_page(ipage, 1);
@@ -363,7 +373,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
363 set_page_dirty(page); 373 set_page_dirty(page);
364 374
365 /* clear inline dir and flag after data writeback */ 375 /* clear inline dir and flag after data writeback */
366 truncate_inline_data(ipage); 376 truncate_inline_inode(ipage, 0);
367 377
368 stat_dec_inline_dir(dir); 378 stat_dec_inline_dir(dir);
369 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 379 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
@@ -380,21 +390,18 @@ out:
380} 390}
381 391
382int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, 392int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
383 struct inode *inode) 393 struct inode *inode, nid_t ino, umode_t mode)
384{ 394{
385 struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 395 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
386 struct page *ipage; 396 struct page *ipage;
387 unsigned int bit_pos; 397 unsigned int bit_pos;
388 f2fs_hash_t name_hash; 398 f2fs_hash_t name_hash;
389 struct f2fs_dir_entry *de;
390 size_t namelen = name->len; 399 size_t namelen = name->len;
391 struct f2fs_inline_dentry *dentry_blk = NULL; 400 struct f2fs_inline_dentry *dentry_blk = NULL;
401 struct f2fs_dentry_ptr d;
392 int slots = GET_DENTRY_SLOTS(namelen); 402 int slots = GET_DENTRY_SLOTS(namelen);
393 struct page *page; 403 struct page *page = NULL;
394 int err = 0; 404 int err = 0;
395 int i;
396
397 name_hash = f2fs_dentry_hash(name);
398 405
399 ipage = get_node_page(sbi, dir->i_ino); 406 ipage = get_node_page(sbi, dir->i_ino);
400 if (IS_ERR(ipage)) 407 if (IS_ERR(ipage))
@@ -410,32 +417,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
410 goto out; 417 goto out;
411 } 418 }
412 419
413 down_write(&F2FS_I(inode)->i_sem); 420 if (inode) {
414 page = init_inode_metadata(inode, dir, name, ipage); 421 down_write(&F2FS_I(inode)->i_sem);
415 if (IS_ERR(page)) { 422 page = init_inode_metadata(inode, dir, name, ipage);
416 err = PTR_ERR(page); 423 if (IS_ERR(page)) {
417 goto fail; 424 err = PTR_ERR(page);
425 goto fail;
426 }
418 } 427 }
419 428
420 f2fs_wait_on_page_writeback(ipage, NODE); 429 f2fs_wait_on_page_writeback(ipage, NODE);
421 de = &dentry_blk->dentry[bit_pos]; 430
422 de->hash_code = name_hash; 431 name_hash = f2fs_dentry_hash(name);
423 de->name_len = cpu_to_le16(namelen); 432 make_dentry_ptr(&d, (void *)dentry_blk, 2);
424 memcpy(dentry_blk->filename[bit_pos], name->name, name->len); 433 f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
425 de->ino = cpu_to_le32(inode->i_ino); 434
426 set_de_type(de, inode);
427 for (i = 0; i < slots; i++)
428 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
429 set_page_dirty(ipage); 435 set_page_dirty(ipage);
430 436
431 /* we don't need to mark_inode_dirty now */ 437 /* we don't need to mark_inode_dirty now */
432 F2FS_I(inode)->i_pino = dir->i_ino; 438 if (inode) {
433 update_inode(inode, page); 439 F2FS_I(inode)->i_pino = dir->i_ino;
434 f2fs_put_page(page, 1); 440 update_inode(inode, page);
441 f2fs_put_page(page, 1);
442 }
435 443
436 update_parent_metadata(dir, inode, 0); 444 update_parent_metadata(dir, inode, 0);
437fail: 445fail:
438 up_write(&F2FS_I(inode)->i_sem); 446 if (inode)
447 up_write(&F2FS_I(inode)->i_sem);
439 448
440 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { 449 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
441 update_inode(dir, ipage); 450 update_inode(dir, ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2d002e3738a7..e622ec95409e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -51,6 +51,15 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
51 } 51 }
52} 52}
53 53
54static bool __written_first_block(struct f2fs_inode *ri)
55{
56 block_t addr = le32_to_cpu(ri->i_addr[0]);
57
58 if (addr != NEW_ADDR && addr != NULL_ADDR)
59 return true;
60 return false;
61}
62
54static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 63static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
55{ 64{
56 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 65 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -130,7 +139,8 @@ static int do_read_inode(struct inode *inode)
130 fi->i_pino = le32_to_cpu(ri->i_pino); 139 fi->i_pino = le32_to_cpu(ri->i_pino);
131 fi->i_dir_level = ri->i_dir_level; 140 fi->i_dir_level = ri->i_dir_level;
132 141
133 get_extent_info(&fi->ext, ri->i_ext); 142 f2fs_init_extent_cache(inode, &ri->i_ext);
143
134 get_inline_info(fi, ri); 144 get_inline_info(fi, ri);
135 145
136 /* check data exist */ 146 /* check data exist */
@@ -140,6 +150,9 @@ static int do_read_inode(struct inode *inode)
140 /* get rdev by using inline_info */ 150 /* get rdev by using inline_info */
141 __get_inode_rdev(inode, ri); 151 __get_inode_rdev(inode, ri);
142 152
153 if (__written_first_block(ri))
154 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
155
143 f2fs_put_page(node_page, 1); 156 f2fs_put_page(node_page, 1);
144 157
145 stat_inc_inline_inode(inode); 158 stat_inc_inline_inode(inode);
@@ -220,7 +233,11 @@ void update_inode(struct inode *inode, struct page *node_page)
220 ri->i_links = cpu_to_le32(inode->i_nlink); 233 ri->i_links = cpu_to_le32(inode->i_nlink);
221 ri->i_size = cpu_to_le64(i_size_read(inode)); 234 ri->i_size = cpu_to_le64(i_size_read(inode));
222 ri->i_blocks = cpu_to_le64(inode->i_blocks); 235 ri->i_blocks = cpu_to_le64(inode->i_blocks);
236
237 read_lock(&F2FS_I(inode)->ext_lock);
223 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); 238 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
239 read_unlock(&F2FS_I(inode)->ext_lock);
240
224 set_raw_inline(F2FS_I(inode), ri); 241 set_raw_inline(F2FS_I(inode), ri);
225 242
226 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 243 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -328,6 +345,12 @@ void f2fs_evict_inode(struct inode *inode)
328no_delete: 345no_delete:
329 stat_dec_inline_dir(inode); 346 stat_dec_inline_dir(inode);
330 stat_dec_inline_inode(inode); 347 stat_dec_inline_inode(inode);
348
349 /* update extent info in inode */
350 if (inode->i_nlink)
351 f2fs_preserve_extent_tree(inode);
352 f2fs_destroy_extent_tree(inode);
353
331 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 354 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
332 if (xnid) 355 if (xnid)
333 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 356 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e79639a9787a..407dde3d7a92 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -14,6 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dcache.h> 16#include <linux/dcache.h>
17#include <linux/namei.h>
17 18
18#include "f2fs.h" 19#include "f2fs.h"
19#include "node.h" 20#include "node.h"
@@ -187,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child)
187 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); 188 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
188} 189}
189 190
191static int __recover_dot_dentries(struct inode *dir, nid_t pino)
192{
193 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
194 struct qstr dot = QSTR_INIT(".", 1);
195 struct qstr dotdot = QSTR_INIT("..", 2);
196 struct f2fs_dir_entry *de;
197 struct page *page;
198 int err = 0;
199
200 f2fs_lock_op(sbi);
201
202 de = f2fs_find_entry(dir, &dot, &page);
203 if (de) {
204 f2fs_dentry_kunmap(dir, page);
205 f2fs_put_page(page, 0);
206 } else {
207 err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
208 if (err)
209 goto out;
210 }
211
212 de = f2fs_find_entry(dir, &dotdot, &page);
213 if (de) {
214 f2fs_dentry_kunmap(dir, page);
215 f2fs_put_page(page, 0);
216 } else {
217 err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
218 }
219out:
220 if (!err) {
221 clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS);
222 mark_inode_dirty(dir);
223 }
224
225 f2fs_unlock_op(sbi);
226 return err;
227}
228
190static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, 229static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
191 unsigned int flags) 230 unsigned int flags)
192{ 231{
@@ -206,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
206 inode = f2fs_iget(dir->i_sb, ino); 245 inode = f2fs_iget(dir->i_sb, ino);
207 if (IS_ERR(inode)) 246 if (IS_ERR(inode))
208 return ERR_CAST(inode); 247 return ERR_CAST(inode);
248
249 if (f2fs_has_inline_dots(inode)) {
250 int err;
251
252 err = __recover_dot_dentries(inode, dir->i_ino);
253 if (err) {
254 iget_failed(inode);
255 return ERR_PTR(err);
256 }
257 }
209 } 258 }
210 259
211 return d_splice_alias(inode, dentry); 260 return d_splice_alias(inode, dentry);
@@ -247,6 +296,23 @@ fail:
247 return err; 296 return err;
248} 297}
249 298
299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
300{
301 struct page *page;
302
303 page = page_follow_link_light(dentry, nd);
304 if (IS_ERR(page))
305 return page;
306
307 /* this is broken symlink case */
308 if (*nd_get_link(nd) == 0) {
309 kunmap(page);
310 page_cache_release(page);
311 return ERR_PTR(-ENOENT);
312 }
313 return page;
314}
315
250static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 316static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
251 const char *symname) 317 const char *symname)
252{ 318{
@@ -276,6 +342,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
276 d_instantiate(dentry, inode); 342 d_instantiate(dentry, inode);
277 unlock_new_inode(inode); 343 unlock_new_inode(inode);
278 344
345 /*
346 * Let's flush symlink data in order to avoid broken symlink as much as
347 * possible. Nevertheless, fsyncing is the best way, but there is no
348 * way to get a file descriptor in order to flush that.
349 *
350 * Note that, it needs to do dir->fsync to make this recoverable.
351 * If the symlink path is stored into inline_data, there is no
352 * performance regression.
353 */
354 filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1);
355
279 if (IS_DIRSYNC(dir)) 356 if (IS_DIRSYNC(dir))
280 f2fs_sync_fs(sbi->sb, 1); 357 f2fs_sync_fs(sbi->sb, 1);
281 return err; 358 return err;
@@ -693,6 +770,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
693 f2fs_unlock_op(sbi); 770 f2fs_unlock_op(sbi);
694 771
695 alloc_nid_done(sbi, inode->i_ino); 772 alloc_nid_done(sbi, inode->i_ino);
773
774 stat_inc_inline_inode(inode);
696 d_tmpfile(dentry, inode); 775 d_tmpfile(dentry, inode);
697 unlock_new_inode(inode); 776 unlock_new_inode(inode);
698 return 0; 777 return 0;
@@ -729,7 +808,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
729 808
730const struct inode_operations f2fs_symlink_inode_operations = { 809const struct inode_operations f2fs_symlink_inode_operations = {
731 .readlink = generic_readlink, 810 .readlink = generic_readlink,
732 .follow_link = page_follow_link_light, 811 .follow_link = f2fs_follow_link,
733 .put_link = page_put_link, 812 .put_link = page_put_link,
734 .getattr = f2fs_getattr, 813 .getattr = f2fs_getattr,
735 .setattr = f2fs_setattr, 814 .setattr = f2fs_setattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 97bd9d3db882..8ab0cf1930bd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
41 /* only uses low memory */ 41 /* only uses low memory */
42 avail_ram = val.totalram - val.totalhigh; 42 avail_ram = val.totalram - val.totalhigh;
43 43
44 /* give 25%, 25%, 50%, 50% memory for each components respectively */ 44 /*
45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
46 */
45 if (type == FREE_NIDS) { 47 if (type == FREE_NIDS) {
46 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 48 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
47 PAGE_CACHE_SHIFT; 49 PAGE_CACHE_SHIFT;
@@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
62 mem_size += (sbi->im[i].ino_num * 64 mem_size += (sbi->im[i].ino_num *
63 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; 65 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
64 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 66 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
67 } else if (type == EXTENT_CACHE) {
68 mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
69 atomic_read(&sbi->total_ext_node) *
70 sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
71 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
65 } else { 72 } else {
66 if (sbi->sb->s_bdi->dirty_exceeded) 73 if (sbi->sb->s_bdi->dirty_exceeded)
67 return false; 74 return false;
@@ -494,7 +501,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
494 501
495 /* if inline_data is set, should not report any block indices */ 502 /* if inline_data is set, should not report any block indices */
496 if (f2fs_has_inline_data(dn->inode) && index) { 503 if (f2fs_has_inline_data(dn->inode) && index) {
497 err = -EINVAL; 504 err = -ENOENT;
498 f2fs_put_page(npage[0], 1); 505 f2fs_put_page(npage[0], 1);
499 goto release_out; 506 goto release_out;
500 } 507 }
@@ -995,6 +1002,7 @@ static int read_node_page(struct page *page, int rw)
995 get_node_info(sbi, page->index, &ni); 1002 get_node_info(sbi, page->index, &ni);
996 1003
997 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1004 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1005 ClearPageUptodate(page);
998 f2fs_put_page(page, 1); 1006 f2fs_put_page(page, 1);
999 return -ENOENT; 1007 return -ENOENT;
1000 } 1008 }
@@ -1306,6 +1314,7 @@ static int f2fs_write_node_page(struct page *page,
1306 1314
1307 /* This page is already truncated */ 1315 /* This page is already truncated */
1308 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1316 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1317 ClearPageUptodate(page);
1309 dec_page_count(sbi, F2FS_DIRTY_NODES); 1318 dec_page_count(sbi, F2FS_DIRTY_NODES);
1310 unlock_page(page); 1319 unlock_page(page);
1311 return 0; 1320 return 0;
@@ -1821,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1821 struct f2fs_nat_block *nat_blk; 1830 struct f2fs_nat_block *nat_blk;
1822 struct nat_entry *ne, *cur; 1831 struct nat_entry *ne, *cur;
1823 struct page *page = NULL; 1832 struct page *page = NULL;
1833 struct f2fs_nm_info *nm_i = NM_I(sbi);
1824 1834
1825 /* 1835 /*
1826 * there are two steps to flush nat entries: 1836 * there are two steps to flush nat entries:
@@ -1874,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1874 1884
1875 f2fs_bug_on(sbi, set->entry_cnt); 1885 f2fs_bug_on(sbi, set->entry_cnt);
1876 1886
1887 down_write(&nm_i->nat_tree_lock);
1877 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 1888 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1889 up_write(&nm_i->nat_tree_lock);
1878 kmem_cache_free(nat_entry_set_slab, set); 1890 kmem_cache_free(nat_entry_set_slab, set);
1879} 1891}
1880 1892
@@ -1902,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1902 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 1914 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1903 remove_nats_in_journal(sbi); 1915 remove_nats_in_journal(sbi);
1904 1916
1917 down_write(&nm_i->nat_tree_lock);
1905 while ((found = __gang_lookup_nat_set(nm_i, 1918 while ((found = __gang_lookup_nat_set(nm_i,
1906 set_idx, SETVEC_SIZE, setvec))) { 1919 set_idx, SETVEC_SIZE, setvec))) {
1907 unsigned idx; 1920 unsigned idx;
@@ -1910,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1910 __adjust_nat_entry_set(setvec[idx], &sets, 1923 __adjust_nat_entry_set(setvec[idx], &sets,
1911 MAX_NAT_JENTRIES(sum)); 1924 MAX_NAT_JENTRIES(sum));
1912 } 1925 }
1926 up_write(&nm_i->nat_tree_lock);
1913 1927
1914 /* flush dirty nats in nat entry set */ 1928 /* flush dirty nats in nat entry set */
1915 list_for_each_entry_safe(set, tmp, &sets, set_list) 1929 list_for_each_entry_safe(set, tmp, &sets, set_list)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index f405bbf2435a..c56026f1725c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -120,6 +120,7 @@ enum mem_type {
120 NAT_ENTRIES, /* indicates the cached nat entry */ 120 NAT_ENTRIES, /* indicates the cached nat entry */
121 DIRTY_DENTS, /* indicates dirty dentry pages */ 121 DIRTY_DENTS, /* indicates dirty dentry pages */
122 INO_ENTRIES, /* indicates inode entries */ 122 INO_ENTRIES, /* indicates inode entries */
123 EXTENT_CACHE, /* indicates extent cache */
123 BASE_CHECK, /* check kernel status */ 124 BASE_CHECK, /* check kernel status */
124}; 125};
125 126
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 41afb9534bbd..8d8ea99f2156 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
93 } 93 }
94retry: 94retry:
95 de = f2fs_find_entry(dir, &name, &page); 95 de = f2fs_find_entry(dir, &name, &page);
96 if (de && inode->i_ino == le32_to_cpu(de->ino)) { 96 if (de && inode->i_ino == le32_to_cpu(de->ino))
97 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98 goto out_unmap_put; 97 goto out_unmap_put;
99 } 98
100 if (de) { 99 if (de) {
101 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 100 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
102 if (IS_ERR(einode)) { 101 if (IS_ERR(einode)) {
@@ -115,7 +114,7 @@ retry:
115 iput(einode); 114 iput(einode);
116 goto retry; 115 goto retry;
117 } 116 }
118 err = __f2fs_add_link(dir, &name, inode); 117 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
119 if (err) 118 if (err)
120 goto out_err; 119 goto out_err;
121 120
@@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
187 goto next; 186 goto next;
188 187
189 entry = get_fsync_inode(head, ino_of_node(page)); 188 entry = get_fsync_inode(head, ino_of_node(page));
190 if (entry) { 189 if (!entry) {
191 if (IS_INODE(page) && is_dent_dnode(page))
192 set_inode_flag(F2FS_I(entry->inode),
193 FI_INC_LINK);
194 } else {
195 if (IS_INODE(page) && is_dent_dnode(page)) { 190 if (IS_INODE(page) && is_dent_dnode(page)) {
196 err = recover_inode_page(sbi, page); 191 err = recover_inode_page(sbi, page);
197 if (err) 192 if (err)
@@ -212,8 +207,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
212 if (IS_ERR(entry->inode)) { 207 if (IS_ERR(entry->inode)) {
213 err = PTR_ERR(entry->inode); 208 err = PTR_ERR(entry->inode);
214 kmem_cache_free(fsync_entry_slab, entry); 209 kmem_cache_free(fsync_entry_slab, entry);
215 if (err == -ENOENT) 210 if (err == -ENOENT) {
211 err = 0;
216 goto next; 212 goto next;
213 }
217 break; 214 break;
218 } 215 }
219 list_add_tail(&entry->list, head); 216 list_add_tail(&entry->list, head);
@@ -256,6 +253,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
256 struct f2fs_summary_block *sum_node; 253 struct f2fs_summary_block *sum_node;
257 struct f2fs_summary sum; 254 struct f2fs_summary sum;
258 struct page *sum_page, *node_page; 255 struct page *sum_page, *node_page;
256 struct dnode_of_data tdn = *dn;
259 nid_t ino, nid; 257 nid_t ino, nid;
260 struct inode *inode; 258 struct inode *inode;
261 unsigned int offset; 259 unsigned int offset;
@@ -283,17 +281,15 @@ got_it:
283 /* Use the locked dnode page and inode */ 281 /* Use the locked dnode page and inode */
284 nid = le32_to_cpu(sum.nid); 282 nid = le32_to_cpu(sum.nid);
285 if (dn->inode->i_ino == nid) { 283 if (dn->inode->i_ino == nid) {
286 struct dnode_of_data tdn = *dn;
287 tdn.nid = nid; 284 tdn.nid = nid;
285 if (!dn->inode_page_locked)
286 lock_page(dn->inode_page);
288 tdn.node_page = dn->inode_page; 287 tdn.node_page = dn->inode_page;
289 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 288 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
290 truncate_data_blocks_range(&tdn, 1); 289 goto truncate_out;
291 return 0;
292 } else if (dn->nid == nid) { 290 } else if (dn->nid == nid) {
293 struct dnode_of_data tdn = *dn;
294 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 291 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
295 truncate_data_blocks_range(&tdn, 1); 292 goto truncate_out;
296 return 0;
297 } 293 }
298 294
299 /* Get the node page */ 295 /* Get the node page */
@@ -317,18 +313,33 @@ got_it:
317 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 313 bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
318 le16_to_cpu(sum.ofs_in_node); 314 le16_to_cpu(sum.ofs_in_node);
319 315
320 if (ino != dn->inode->i_ino) { 316 /*
321 truncate_hole(inode, bidx, bidx + 1); 317 * if inode page is locked, unlock temporarily, but its reference
318 * count keeps alive.
319 */
320 if (ino == dn->inode->i_ino && dn->inode_page_locked)
321 unlock_page(dn->inode_page);
322
323 set_new_dnode(&tdn, inode, NULL, NULL, 0);
324 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
325 goto out;
326
327 if (tdn.data_blkaddr == blkaddr)
328 truncate_data_blocks_range(&tdn, 1);
329
330 f2fs_put_dnode(&tdn);
331out:
332 if (ino != dn->inode->i_ino)
322 iput(inode); 333 iput(inode);
323 } else { 334 else if (dn->inode_page_locked)
324 struct dnode_of_data tdn; 335 lock_page(dn->inode_page);
325 set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); 336 return 0;
326 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) 337
327 return 0; 338truncate_out:
328 if (tdn.data_blkaddr != NULL_ADDR) 339 if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
329 truncate_data_blocks_range(&tdn, 1); 340 truncate_data_blocks_range(&tdn, 1);
330 f2fs_put_page(tdn.node_page, 1); 341 if (dn->inode->i_ino == nid && !dn->inode_page_locked)
331 } 342 unlock_page(dn->inode_page);
332 return 0; 343 return 0;
333} 344}
334 345
@@ -384,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
384 src = datablock_addr(dn.node_page, dn.ofs_in_node); 395 src = datablock_addr(dn.node_page, dn.ofs_in_node);
385 dest = datablock_addr(page, dn.ofs_in_node); 396 dest = datablock_addr(page, dn.ofs_in_node);
386 397
387 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { 398 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
399 dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) {
400
388 if (src == NULL_ADDR) { 401 if (src == NULL_ADDR) {
389 err = reserve_new_block(&dn); 402 err = reserve_new_block(&dn);
390 /* We should not get -ENOSPC */ 403 /* We should not get -ENOSPC */
@@ -401,14 +414,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
401 /* write dummy data page */ 414 /* write dummy data page */
402 recover_data_page(sbi, NULL, &sum, src, dest); 415 recover_data_page(sbi, NULL, &sum, src, dest);
403 dn.data_blkaddr = dest; 416 dn.data_blkaddr = dest;
404 update_extent_cache(&dn); 417 set_data_blkaddr(&dn);
418 f2fs_update_extent_cache(&dn);
405 recovered++; 419 recovered++;
406 } 420 }
407 dn.ofs_in_node++; 421 dn.ofs_in_node++;
408 } 422 }
409 423
410 /* write node page in place */
411 set_summary(&sum, dn.nid, 0, 0);
412 if (IS_INODE(dn.node_page)) 424 if (IS_INODE(dn.node_page))
413 sync_inode_page(&dn); 425 sync_inode_page(&dn);
414 426
@@ -552,7 +564,7 @@ out:
552 mutex_unlock(&sbi->cp_mutex); 564 mutex_unlock(&sbi->cp_mutex);
553 } else if (need_writecp) { 565 } else if (need_writecp) {
554 struct cp_control cpc = { 566 struct cp_control cpc = {
555 .reason = CP_SYNC, 567 .reason = CP_RECOVERY,
556 }; 568 };
557 mutex_unlock(&sbi->cp_mutex); 569 mutex_unlock(&sbi->cp_mutex);
558 write_checkpoint(sbi, &cpc); 570 write_checkpoint(sbi, &cpc);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index daee4ab913da..f939660941bb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -205,6 +205,8 @@ retry:
205 list_add_tail(&new->list, &fi->inmem_pages); 205 list_add_tail(&new->list, &fi->inmem_pages);
206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
207 mutex_unlock(&fi->inmem_lock); 207 mutex_unlock(&fi->inmem_lock);
208
209 trace_f2fs_register_inmem_page(page, INMEM);
208} 210}
209 211
210void commit_inmem_pages(struct inode *inode, bool abort) 212void commit_inmem_pages(struct inode *inode, bool abort)
@@ -238,11 +240,13 @@ void commit_inmem_pages(struct inode *inode, bool abort)
238 f2fs_wait_on_page_writeback(cur->page, DATA); 240 f2fs_wait_on_page_writeback(cur->page, DATA);
239 if (clear_page_dirty_for_io(cur->page)) 241 if (clear_page_dirty_for_io(cur->page))
240 inode_dec_dirty_pages(inode); 242 inode_dec_dirty_pages(inode);
243 trace_f2fs_commit_inmem_page(cur->page, INMEM);
241 do_write_data_page(cur->page, &fio); 244 do_write_data_page(cur->page, &fio);
242 submit_bio = true; 245 submit_bio = true;
243 } 246 }
244 f2fs_put_page(cur->page, 1); 247 f2fs_put_page(cur->page, 1);
245 } else { 248 } else {
249 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
246 put_page(cur->page); 250 put_page(cur->page);
247 } 251 }
248 radix_tree_delete(&fi->inmem_root, cur->page->index); 252 radix_tree_delete(&fi->inmem_root, cur->page->index);
@@ -277,6 +281,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
277 281
278void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 282void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
279{ 283{
284 /* try to shrink extent cache when there is no enough memory */
285 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
286
280 /* check the # of cached NAT entries and prefree segments */ 287 /* check the # of cached NAT entries and prefree segments */
281 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || 288 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
282 excess_prefree_segs(sbi) || 289 excess_prefree_segs(sbi) ||
@@ -549,7 +556,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
549 556
550 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 557 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
551 558
552 if (end - start < cpc->trim_minlen) 559 if (force && end - start < cpc->trim_minlen)
553 continue; 560 continue;
554 561
555 __add_discard_entry(sbi, cpc, start, end); 562 __add_discard_entry(sbi, cpc, start, end);
@@ -1164,6 +1171,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1164 curseg = CURSEG_I(sbi, type); 1171 curseg = CURSEG_I(sbi, type);
1165 1172
1166 mutex_lock(&curseg->curseg_mutex); 1173 mutex_lock(&curseg->curseg_mutex);
1174 mutex_lock(&sit_i->sentry_lock);
1167 1175
1168 /* direct_io'ed data is aligned to the segment for better performance */ 1176 /* direct_io'ed data is aligned to the segment for better performance */
1169 if (direct_io && curseg->next_blkoff) 1177 if (direct_io && curseg->next_blkoff)
@@ -1178,7 +1186,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1178 */ 1186 */
1179 __add_sum_entry(sbi, type, sum); 1187 __add_sum_entry(sbi, type, sum);
1180 1188
1181 mutex_lock(&sit_i->sentry_lock);
1182 __refresh_next_blkoff(sbi, curseg); 1189 __refresh_next_blkoff(sbi, curseg);
1183 1190
1184 stat_inc_block_count(sbi, curseg); 1191 stat_inc_block_count(sbi, curseg);
@@ -1730,6 +1737,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1730 mutex_lock(&curseg->curseg_mutex); 1737 mutex_lock(&curseg->curseg_mutex);
1731 mutex_lock(&sit_i->sentry_lock); 1738 mutex_lock(&sit_i->sentry_lock);
1732 1739
1740 if (!sit_i->dirty_sentries)
1741 goto out;
1742
1733 /* 1743 /*
1734 * add and account sit entries of dirty bitmap in sit entry 1744 * add and account sit entries of dirty bitmap in sit entry
1735 * set temporarily 1745 * set temporarily
@@ -1744,9 +1754,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1744 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) 1754 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1745 remove_sits_in_journal(sbi); 1755 remove_sits_in_journal(sbi);
1746 1756
1747 if (!sit_i->dirty_sentries)
1748 goto out;
1749
1750 /* 1757 /*
1751 * there are two steps to flush sit entries: 1758 * there are two steps to flush sit entries:
1752 * #1, flush sit entries to journal in current cold data summary block. 1759 * #1, flush sit entries to journal in current cold data summary block.
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7fd35111cf62..85d7fa7514b2 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
336 clear_bit(segno, free_i->free_segmap); 336 clear_bit(segno, free_i->free_segmap);
337 free_i->free_segments++; 337 free_i->free_segments++;
338 338
339 next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); 339 next = find_next_bit(free_i->free_segmap,
340 start_segno + sbi->segs_per_sec, start_segno);
340 if (next >= start_segno + sbi->segs_per_sec) { 341 if (next >= start_segno + sbi->segs_per_sec) {
341 clear_bit(secno, free_i->free_secmap); 342 clear_bit(secno, free_i->free_secmap);
342 free_i->free_sections++; 343 free_i->free_sections++;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2fe666a6ea9..160b88346b24 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -57,6 +57,8 @@ enum {
57 Opt_flush_merge, 57 Opt_flush_merge,
58 Opt_nobarrier, 58 Opt_nobarrier,
59 Opt_fastboot, 59 Opt_fastboot,
60 Opt_extent_cache,
61 Opt_noinline_data,
60 Opt_err, 62 Opt_err,
61}; 63};
62 64
@@ -78,6 +80,8 @@ static match_table_t f2fs_tokens = {
78 {Opt_flush_merge, "flush_merge"}, 80 {Opt_flush_merge, "flush_merge"},
79 {Opt_nobarrier, "nobarrier"}, 81 {Opt_nobarrier, "nobarrier"},
80 {Opt_fastboot, "fastboot"}, 82 {Opt_fastboot, "fastboot"},
83 {Opt_extent_cache, "extent_cache"},
84 {Opt_noinline_data, "noinline_data"},
81 {Opt_err, NULL}, 85 {Opt_err, NULL},
82}; 86};
83 87
@@ -367,6 +371,12 @@ static int parse_options(struct super_block *sb, char *options)
367 case Opt_fastboot: 371 case Opt_fastboot:
368 set_opt(sbi, FASTBOOT); 372 set_opt(sbi, FASTBOOT);
369 break; 373 break;
374 case Opt_extent_cache:
375 set_opt(sbi, EXTENT_CACHE);
376 break;
377 case Opt_noinline_data:
378 clear_opt(sbi, INLINE_DATA);
379 break;
370 default: 380 default:
371 f2fs_msg(sb, KERN_ERR, 381 f2fs_msg(sb, KERN_ERR,
372 "Unrecognized mount option \"%s\" or missing value", 382 "Unrecognized mount option \"%s\" or missing value",
@@ -392,7 +402,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
392 atomic_set(&fi->dirty_pages, 0); 402 atomic_set(&fi->dirty_pages, 0);
393 fi->i_current_depth = 1; 403 fi->i_current_depth = 1;
394 fi->i_advise = 0; 404 fi->i_advise = 0;
395 rwlock_init(&fi->ext.ext_lock); 405 rwlock_init(&fi->ext_lock);
396 init_rwsem(&fi->i_sem); 406 init_rwsem(&fi->i_sem);
397 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); 407 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
398 INIT_LIST_HEAD(&fi->inmem_pages); 408 INIT_LIST_HEAD(&fi->inmem_pages);
@@ -591,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
591 seq_puts(seq, ",disable_ext_identify"); 601 seq_puts(seq, ",disable_ext_identify");
592 if (test_opt(sbi, INLINE_DATA)) 602 if (test_opt(sbi, INLINE_DATA))
593 seq_puts(seq, ",inline_data"); 603 seq_puts(seq, ",inline_data");
604 else
605 seq_puts(seq, ",noinline_data");
594 if (test_opt(sbi, INLINE_DENTRY)) 606 if (test_opt(sbi, INLINE_DENTRY))
595 seq_puts(seq, ",inline_dentry"); 607 seq_puts(seq, ",inline_dentry");
596 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 608 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
@@ -599,6 +611,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
599 seq_puts(seq, ",nobarrier"); 611 seq_puts(seq, ",nobarrier");
600 if (test_opt(sbi, FASTBOOT)) 612 if (test_opt(sbi, FASTBOOT))
601 seq_puts(seq, ",fastboot"); 613 seq_puts(seq, ",fastboot");
614 if (test_opt(sbi, EXTENT_CACHE))
615 seq_puts(seq, ",extent_cache");
602 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 616 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
603 617
604 return 0; 618 return 0;
@@ -959,7 +973,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
959 struct buffer_head *raw_super_buf; 973 struct buffer_head *raw_super_buf;
960 struct inode *root; 974 struct inode *root;
961 long err = -EINVAL; 975 long err = -EINVAL;
962 bool retry = true; 976 bool retry = true, need_fsck = false;
963 char *options = NULL; 977 char *options = NULL;
964 int i; 978 int i;
965 979
@@ -984,6 +998,7 @@ try_onemore:
984 sbi->active_logs = NR_CURSEG_TYPE; 998 sbi->active_logs = NR_CURSEG_TYPE;
985 999
986 set_opt(sbi, BG_GC); 1000 set_opt(sbi, BG_GC);
1001 set_opt(sbi, INLINE_DATA);
987 1002
988#ifdef CONFIG_F2FS_FS_XATTR 1003#ifdef CONFIG_F2FS_FS_XATTR
989 set_opt(sbi, XATTR_USER); 1004 set_opt(sbi, XATTR_USER);
@@ -1020,7 +1035,6 @@ try_onemore:
1020 sbi->raw_super = raw_super; 1035 sbi->raw_super = raw_super;
1021 sbi->raw_super_buf = raw_super_buf; 1036 sbi->raw_super_buf = raw_super_buf;
1022 mutex_init(&sbi->gc_mutex); 1037 mutex_init(&sbi->gc_mutex);
1023 mutex_init(&sbi->writepages);
1024 mutex_init(&sbi->cp_mutex); 1038 mutex_init(&sbi->cp_mutex);
1025 init_rwsem(&sbi->node_write); 1039 init_rwsem(&sbi->node_write);
1026 clear_sbi_flag(sbi, SBI_POR_DOING); 1040 clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -1072,6 +1086,8 @@ try_onemore:
1072 INIT_LIST_HEAD(&sbi->dir_inode_list); 1086 INIT_LIST_HEAD(&sbi->dir_inode_list);
1073 spin_lock_init(&sbi->dir_inode_lock); 1087 spin_lock_init(&sbi->dir_inode_lock);
1074 1088
1089 init_extent_cache_info(sbi);
1090
1075 init_ino_entry_info(sbi); 1091 init_ino_entry_info(sbi);
1076 1092
1077 /* setup f2fs internal modules */ 1093 /* setup f2fs internal modules */
@@ -1146,9 +1162,6 @@ try_onemore:
1146 if (err) 1162 if (err)
1147 goto free_proc; 1163 goto free_proc;
1148 1164
1149 if (!retry)
1150 set_sbi_flag(sbi, SBI_NEED_FSCK);
1151
1152 /* recover fsynced data */ 1165 /* recover fsynced data */
1153 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1166 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1154 /* 1167 /*
@@ -1160,8 +1173,13 @@ try_onemore:
1160 err = -EROFS; 1173 err = -EROFS;
1161 goto free_kobj; 1174 goto free_kobj;
1162 } 1175 }
1176
1177 if (need_fsck)
1178 set_sbi_flag(sbi, SBI_NEED_FSCK);
1179
1163 err = recover_fsync_data(sbi); 1180 err = recover_fsync_data(sbi);
1164 if (err) { 1181 if (err) {
1182 need_fsck = true;
1165 f2fs_msg(sb, KERN_ERR, 1183 f2fs_msg(sb, KERN_ERR,
1166 "Cannot recover all fsync data errno=%ld", err); 1184 "Cannot recover all fsync data errno=%ld", err);
1167 goto free_kobj; 1185 goto free_kobj;
@@ -1212,7 +1230,7 @@ free_sbi:
1212 1230
1213 /* give only one another chance */ 1231 /* give only one another chance */
1214 if (retry) { 1232 if (retry) {
1215 retry = 0; 1233 retry = false;
1216 shrink_dcache_sb(sb); 1234 shrink_dcache_sb(sb);
1217 goto try_onemore; 1235 goto try_onemore;
1218 } 1236 }
@@ -1278,10 +1296,13 @@ static int __init init_f2fs_fs(void)
1278 err = create_checkpoint_caches(); 1296 err = create_checkpoint_caches();
1279 if (err) 1297 if (err)
1280 goto free_segment_manager_caches; 1298 goto free_segment_manager_caches;
1299 err = create_extent_cache();
1300 if (err)
1301 goto free_checkpoint_caches;
1281 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); 1302 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
1282 if (!f2fs_kset) { 1303 if (!f2fs_kset) {
1283 err = -ENOMEM; 1304 err = -ENOMEM;
1284 goto free_checkpoint_caches; 1305 goto free_extent_cache;
1285 } 1306 }
1286 err = register_filesystem(&f2fs_fs_type); 1307 err = register_filesystem(&f2fs_fs_type);
1287 if (err) 1308 if (err)
@@ -1292,6 +1313,8 @@ static int __init init_f2fs_fs(void)
1292 1313
1293free_kset: 1314free_kset:
1294 kset_unregister(f2fs_kset); 1315 kset_unregister(f2fs_kset);
1316free_extent_cache:
1317 destroy_extent_cache();
1295free_checkpoint_caches: 1318free_checkpoint_caches:
1296 destroy_checkpoint_caches(); 1319 destroy_checkpoint_caches();
1297free_segment_manager_caches: 1320free_segment_manager_caches:
@@ -1309,6 +1332,7 @@ static void __exit exit_f2fs_fs(void)
1309 remove_proc_entry("fs/f2fs", NULL); 1332 remove_proc_entry("fs/f2fs", NULL);
1310 f2fs_destroy_root_stats(); 1333 f2fs_destroy_root_stats();
1311 unregister_filesystem(&f2fs_fs_type); 1334 unregister_filesystem(&f2fs_fs_type);
1335 destroy_extent_cache();
1312 destroy_checkpoint_caches(); 1336 destroy_checkpoint_caches();
1313 destroy_segment_manager_caches(); 1337 destroy_segment_manager_caches();
1314 destroy_node_manager_caches(); 1338 destroy_node_manager_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 5072bf9ae0ef..b0fd2f2d0716 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -135,7 +135,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
135 if (strcmp(name, "") != 0) 135 if (strcmp(name, "") != 0)
136 return -EINVAL; 136 return -EINVAL;
137 137
138 *((char *)buffer) = F2FS_I(inode)->i_advise; 138 if (buffer)
139 *((char *)buffer) = F2FS_I(inode)->i_advise;
139 return sizeof(char); 140 return sizeof(char);
140} 141}
141 142
@@ -152,6 +153,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
152 return -EINVAL; 153 return -EINVAL;
153 154
154 F2FS_I(inode)->i_advise |= *(char *)value; 155 F2FS_I(inode)->i_advise |= *(char *)value;
156 mark_inode_dirty(inode);
155 return 0; 157 return 0;
156} 158}
157 159
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..93fc62232ec2 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -8,9 +8,7 @@
8 * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers. 8 * May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers.
9 */ 9 */
10 10
11#include <linux/fs.h>
12#include <linux/slab.h> 11#include <linux/slab.h>
13#include <linux/buffer_head.h>
14#include "fat.h" 12#include "fat.h"
15 13
16/* this must be > 0. */ 14/* this must be > 0. */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c5d6bb939d19..4afc4d9d2e41 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -13,13 +13,9 @@
13 * Short name translation 1999, 2001 by Wolfram Pienkoss <wp@bszh.de> 13 * Short name translation 1999, 2001 by Wolfram Pienkoss <wp@bszh.de>
14 */ 14 */
15 15
16#include <linux/module.h>
17#include <linux/slab.h> 16#include <linux/slab.h>
18#include <linux/time.h>
19#include <linux/buffer_head.h>
20#include <linux/compat.h> 17#include <linux/compat.h>
21#include <linux/uaccess.h> 18#include <linux/uaccess.h>
22#include <linux/kernel.h>
23#include "fat.h" 19#include "fat.h"
24 20
25/* 21/*
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 64e295e8ff38..be5e15323bab 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -2,11 +2,8 @@
2#define _FAT_H 2#define _FAT_H
3 3
4#include <linux/buffer_head.h> 4#include <linux/buffer_head.h>
5#include <linux/string.h>
6#include <linux/nls.h> 5#include <linux/nls.h>
7#include <linux/fs.h>
8#include <linux/hash.h> 6#include <linux/hash.h>
9#include <linux/mutex.h>
10#include <linux/ratelimit.h> 7#include <linux/ratelimit.h>
11#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
12 9
@@ -66,7 +63,7 @@ struct msdos_sb_info {
66 unsigned short sec_per_clus; /* sectors/cluster */ 63 unsigned short sec_per_clus; /* sectors/cluster */
67 unsigned short cluster_bits; /* log2(cluster_size) */ 64 unsigned short cluster_bits; /* log2(cluster_size) */
68 unsigned int cluster_size; /* cluster size */ 65 unsigned int cluster_size; /* cluster size */
69 unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */ 66 unsigned char fats, fat_bits; /* number of FATs, FAT bits (12,16 or 32) */
70 unsigned short fat_start; 67 unsigned short fat_start;
71 unsigned long fat_length; /* FAT start & length (sec.) */ 68 unsigned long fat_length; /* FAT start & length (sec.) */
72 unsigned long dir_start; 69 unsigned long dir_start;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 260705c58062..8226557130a2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -3,9 +3,6 @@
3 * Released under GPL v2. 3 * Released under GPL v2.
4 */ 4 */
5 5
6#include <linux/module.h>
7#include <linux/fs.h>
8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h> 6#include <linux/blkdev.h>
10#include "fat.h" 7#include "fat.h"
11 8
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8429c68e3057..cf50d93565a2 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -10,10 +10,6 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/compat.h> 11#include <linux/compat.h>
12#include <linux/mount.h> 12#include <linux/mount.h>
13#include <linux/time.h>
14#include <linux/buffer_head.h>
15#include <linux/writeback.h>
16#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 13#include <linux/blkdev.h>
18#include <linux/fsnotify.h> 14#include <linux/fsnotify.h>
19#include <linux/security.h> 15#include <linux/security.h>
@@ -170,8 +166,6 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
170 166
171const struct file_operations fat_file_operations = { 167const struct file_operations fat_file_operations = {
172 .llseek = generic_file_llseek, 168 .llseek = generic_file_llseek,
173 .read = new_sync_read,
174 .write = new_sync_write,
175 .read_iter = generic_file_read_iter, 169 .read_iter = generic_file_read_iter,
176 .write_iter = generic_file_write_iter, 170 .write_iter = generic_file_write_iter,
177 .mmap = generic_file_mmap, 171 .mmap = generic_file_mmap,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5263c7..c06774658345 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -11,21 +11,12 @@
11 */ 11 */
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/time.h>
16#include <linux/slab.h>
17#include <linux/seq_file.h>
18#include <linux/pagemap.h> 14#include <linux/pagemap.h>
19#include <linux/mpage.h> 15#include <linux/mpage.h>
20#include <linux/buffer_head.h>
21#include <linux/mount.h>
22#include <linux/aio.h>
23#include <linux/vfs.h> 16#include <linux/vfs.h>
17#include <linux/seq_file.h>
24#include <linux/parser.h> 18#include <linux/parser.h>
25#include <linux/uio.h> 19#include <linux/uio.h>
26#include <linux/writeback.h>
27#include <linux/log2.h>
28#include <linux/hash.h>
29#include <linux/blkdev.h> 20#include <linux/blkdev.h>
30#include <asm/unaligned.h> 21#include <asm/unaligned.h>
31#include "fat.h" 22#include "fat.h"
@@ -246,8 +237,7 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
246 return err; 237 return err;
247} 238}
248 239
249static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, 240static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
250 struct iov_iter *iter,
251 loff_t offset) 241 loff_t offset)
252{ 242{
253 struct file *file = iocb->ki_filp; 243 struct file *file = iocb->ki_filp;
@@ -256,7 +246,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
256 size_t count = iov_iter_count(iter); 246 size_t count = iov_iter_count(iter);
257 ssize_t ret; 247 ssize_t ret;
258 248
259 if (rw == WRITE) { 249 if (iov_iter_rw(iter) == WRITE) {
260 /* 250 /*
261 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), 251 * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
262 * so we need to update the ->mmu_private to block boundary. 252 * so we need to update the ->mmu_private to block boundary.
@@ -275,8 +265,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
275 * FAT need to use the DIO_LOCKING for avoiding the race 265 * FAT need to use the DIO_LOCKING for avoiding the race
276 * condition of fat_get_block() and ->truncate(). 266 * condition of fat_get_block() and ->truncate().
277 */ 267 */
278 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); 268 ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block);
279 if (ret < 0 && (rw & WRITE)) 269 if (ret < 0 && iov_iter_rw(iter) == WRITE)
280 fat_write_failed(mapping, offset + count); 270 fat_write_failed(mapping, offset + count);
281 271
282 return ret; 272 return ret;
@@ -1280,8 +1270,7 @@ out:
1280 1270
1281static int fat_read_root(struct inode *inode) 1271static int fat_read_root(struct inode *inode)
1282{ 1272{
1283 struct super_block *sb = inode->i_sb; 1273 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
1284 struct msdos_sb_info *sbi = MSDOS_SB(sb);
1285 int error; 1274 int error;
1286 1275
1287 MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO; 1276 MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index d8da2d2e30ae..c4589e981760 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -6,10 +6,6 @@
6 * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru) 6 * and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
7 */ 7 */
8 8
9#include <linux/module.h>
10#include <linux/fs.h>
11#include <linux/buffer_head.h>
12#include <linux/time.h>
13#include "fat.h" 9#include "fat.h"
14 10
15/* 11/*
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index a783b0e1272a..cc6a8541b668 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -7,8 +7,6 @@
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h>
11#include <linux/buffer_head.h>
12#include "fat.h" 10#include "fat.h"
13 11
14/* Characters that are undesirable in an MS-DOS file name */ 12/* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b8b92c2f9683..7e0974eebd8e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -16,10 +16,8 @@
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/jiffies.h>
20#include <linux/ctype.h> 19#include <linux/ctype.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22#include <linux/buffer_head.h>
23#include <linux/namei.h> 21#include <linux/namei.h>
24#include "fat.h" 22#include "fat.h"
25 23
diff --git a/fs/file.c b/fs/file.c
index ee738ea028fa..93c5f89c248b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,8 +638,7 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
638 file = fcheck_files(files, fd); 638 file = fcheck_files(files, fd);
639 if (file) { 639 if (file) {
640 /* File object ref couldn't be taken */ 640 /* File object ref couldn't be taken */
641 if ((file->f_mode & mask) || 641 if ((file->f_mode & mask) || !get_file_rcu(file))
642 !atomic_long_inc_not_zero(&file->f_count))
643 file = NULL; 642 file = NULL;
644 } 643 }
645 rcu_read_unlock(); 644 rcu_read_unlock();
diff --git a/fs/file_table.c b/fs/file_table.c
index 3f85411b03ce..294174dcc226 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -168,10 +168,10 @@ struct file *alloc_file(struct path *path, fmode_t mode,
168 file->f_inode = path->dentry->d_inode; 168 file->f_inode = path->dentry->d_inode;
169 file->f_mapping = path->dentry->d_inode->i_mapping; 169 file->f_mapping = path->dentry->d_inode->i_mapping;
170 if ((mode & FMODE_READ) && 170 if ((mode & FMODE_READ) &&
171 likely(fop->read || fop->aio_read || fop->read_iter)) 171 likely(fop->read || fop->read_iter))
172 mode |= FMODE_CAN_READ; 172 mode |= FMODE_CAN_READ;
173 if ((mode & FMODE_WRITE) && 173 if ((mode & FMODE_WRITE) &&
174 likely(fop->write || fop->aio_write || fop->write_iter)) 174 likely(fop->write || fop->write_iter))
175 mode |= FMODE_CAN_WRITE; 175 mode |= FMODE_CAN_WRITE;
176 file->f_mode = mode; 176 file->f_mode = mode;
177 file->f_op = fop; 177 file->f_op = fop;
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index b06c98796afb..611b5408f6ec 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock);
9void pin_remove(struct fs_pin *pin) 9void pin_remove(struct fs_pin *pin)
10{ 10{
11 spin_lock(&pin_lock); 11 spin_lock(&pin_lock);
12 hlist_del(&pin->m_list); 12 hlist_del_init(&pin->m_list);
13 hlist_del(&pin->s_list); 13 hlist_del_init(&pin->s_list);
14 spin_unlock(&pin_lock); 14 spin_unlock(&pin_lock);
15 spin_lock_irq(&pin->wait.lock); 15 spin_lock_irq(&pin->wait.lock);
16 pin->done = 1; 16 pin->done = 1;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7abba1c..e5bbf748b698 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
38#include <linux/device.h> 38#include <linux/device.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/fs.h> 40#include <linux/fs.h>
41#include <linux/aio.h>
42#include <linux/kdev_t.h> 41#include <linux/kdev_t.h>
43#include <linux/kthread.h> 42#include <linux/kthread.h>
44#include <linux/list.h> 43#include <linux/list.h>
@@ -48,6 +47,7 @@
48#include <linux/slab.h> 47#include <linux/slab.h>
49#include <linux/stat.h> 48#include <linux/stat.h>
50#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/uio.h>
51 51
52#include "fuse_i.h" 52#include "fuse_i.h"
53 53
@@ -88,32 +88,23 @@ static struct list_head *cuse_conntbl_head(dev_t devt)
88 * FUSE file. 88 * FUSE file.
89 */ 89 */
90 90
91static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, 91static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
92 loff_t *ppos)
93{ 92{
93 struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
94 loff_t pos = 0; 94 loff_t pos = 0;
95 struct iovec iov = { .iov_base = buf, .iov_len = count };
96 struct fuse_io_priv io = { .async = 0, .file = file };
97 struct iov_iter ii;
98 iov_iter_init(&ii, READ, &iov, 1, count);
99 95
100 return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE); 96 return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE);
101} 97}
102 98
103static ssize_t cuse_write(struct file *file, const char __user *buf, 99static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from)
104 size_t count, loff_t *ppos)
105{ 100{
101 struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
106 loff_t pos = 0; 102 loff_t pos = 0;
107 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
108 struct fuse_io_priv io = { .async = 0, .file = file };
109 struct iov_iter ii;
110 iov_iter_init(&ii, WRITE, &iov, 1, count);
111
112 /* 103 /*
113 * No locking or generic_write_checks(), the server is 104 * No locking or generic_write_checks(), the server is
114 * responsible for locking and sanity checks. 105 * responsible for locking and sanity checks.
115 */ 106 */
116 return fuse_direct_io(&io, &ii, &pos, 107 return fuse_direct_io(&io, from, &pos,
117 FUSE_DIO_WRITE | FUSE_DIO_CUSE); 108 FUSE_DIO_WRITE | FUSE_DIO_CUSE);
118} 109}
119 110
@@ -186,8 +177,8 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
186 177
187static const struct file_operations cuse_frontend_fops = { 178static const struct file_operations cuse_frontend_fops = {
188 .owner = THIS_MODULE, 179 .owner = THIS_MODULE,
189 .read = cuse_read, 180 .read_iter = cuse_read_iter,
190 .write = cuse_write, 181 .write_iter = cuse_write_iter,
191 .open = cuse_open, 182 .open = cuse_open,
192 .release = cuse_release, 183 .release = cuse_release,
193 .unlocked_ioctl = cuse_file_ioctl, 184 .unlocked_ioctl = cuse_file_ioctl,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 39706c57ad3c..c8b68ab2e574 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
19#include <linux/pipe_fs_i.h> 19#include <linux/pipe_fs_i.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/splice.h> 21#include <linux/splice.h>
22#include <linux/aio.h>
23 22
24MODULE_ALIAS_MISCDEV(FUSE_MINOR); 23MODULE_ALIAS_MISCDEV(FUSE_MINOR);
25MODULE_ALIAS("devname:fuse"); 24MODULE_ALIAS("devname:fuse");
@@ -711,28 +710,26 @@ struct fuse_copy_state {
711 struct fuse_conn *fc; 710 struct fuse_conn *fc;
712 int write; 711 int write;
713 struct fuse_req *req; 712 struct fuse_req *req;
714 const struct iovec *iov; 713 struct iov_iter *iter;
715 struct pipe_buffer *pipebufs; 714 struct pipe_buffer *pipebufs;
716 struct pipe_buffer *currbuf; 715 struct pipe_buffer *currbuf;
717 struct pipe_inode_info *pipe; 716 struct pipe_inode_info *pipe;
718 unsigned long nr_segs; 717 unsigned long nr_segs;
719 unsigned long seglen;
720 unsigned long addr;
721 struct page *pg; 718 struct page *pg;
722 unsigned len; 719 unsigned len;
723 unsigned offset; 720 unsigned offset;
724 unsigned move_pages:1; 721 unsigned move_pages:1;
725}; 722};
726 723
727static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, 724static void fuse_copy_init(struct fuse_copy_state *cs,
725 struct fuse_conn *fc,
728 int write, 726 int write,
729 const struct iovec *iov, unsigned long nr_segs) 727 struct iov_iter *iter)
730{ 728{
731 memset(cs, 0, sizeof(*cs)); 729 memset(cs, 0, sizeof(*cs));
732 cs->fc = fc; 730 cs->fc = fc;
733 cs->write = write; 731 cs->write = write;
734 cs->iov = iov; 732 cs->iter = iter;
735 cs->nr_segs = nr_segs;
736} 733}
737 734
738/* Unmap and put previous page of userspace buffer */ 735/* Unmap and put previous page of userspace buffer */
@@ -800,22 +797,16 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
800 cs->nr_segs++; 797 cs->nr_segs++;
801 } 798 }
802 } else { 799 } else {
803 if (!cs->seglen) { 800 size_t off;
804 BUG_ON(!cs->nr_segs); 801 err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
805 cs->seglen = cs->iov[0].iov_len;
806 cs->addr = (unsigned long) cs->iov[0].iov_base;
807 cs->iov++;
808 cs->nr_segs--;
809 }
810 err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
811 if (err < 0) 802 if (err < 0)
812 return err; 803 return err;
813 BUG_ON(err != 1); 804 BUG_ON(!err);
805 cs->len = err;
806 cs->offset = off;
814 cs->pg = page; 807 cs->pg = page;
815 cs->offset = cs->addr % PAGE_SIZE; 808 cs->offset = off;
816 cs->len = min(PAGE_SIZE - cs->offset, cs->seglen); 809 iov_iter_advance(cs->iter, err);
817 cs->seglen -= cs->len;
818 cs->addr += cs->len;
819 } 810 }
820 811
821 return lock_request(cs->fc, cs->req); 812 return lock_request(cs->fc, cs->req);
@@ -1364,8 +1355,7 @@ static int fuse_dev_open(struct inode *inode, struct file *file)
1364 return 0; 1355 return 0;
1365} 1356}
1366 1357
1367static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, 1358static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1368 unsigned long nr_segs, loff_t pos)
1369{ 1359{
1370 struct fuse_copy_state cs; 1360 struct fuse_copy_state cs;
1371 struct file *file = iocb->ki_filp; 1361 struct file *file = iocb->ki_filp;
@@ -1373,9 +1363,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1373 if (!fc) 1363 if (!fc)
1374 return -EPERM; 1364 return -EPERM;
1375 1365
1376 fuse_copy_init(&cs, fc, 1, iov, nr_segs); 1366 if (!iter_is_iovec(to))
1367 return -EINVAL;
1377 1368
1378 return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); 1369 fuse_copy_init(&cs, fc, 1, to);
1370
1371 return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to));
1379} 1372}
1380 1373
1381static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, 1374static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
@@ -1395,7 +1388,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1395 if (!bufs) 1388 if (!bufs)
1396 return -ENOMEM; 1389 return -ENOMEM;
1397 1390
1398 fuse_copy_init(&cs, fc, 1, NULL, 0); 1391 fuse_copy_init(&cs, fc, 1, NULL);
1399 cs.pipebufs = bufs; 1392 cs.pipebufs = bufs;
1400 cs.pipe = pipe; 1393 cs.pipe = pipe;
1401 ret = fuse_dev_do_read(fc, in, &cs, len); 1394 ret = fuse_dev_do_read(fc, in, &cs, len);
@@ -1971,17 +1964,19 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1971 return err; 1964 return err;
1972} 1965}
1973 1966
1974static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, 1967static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
1975 unsigned long nr_segs, loff_t pos)
1976{ 1968{
1977 struct fuse_copy_state cs; 1969 struct fuse_copy_state cs;
1978 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); 1970 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1979 if (!fc) 1971 if (!fc)
1980 return -EPERM; 1972 return -EPERM;
1981 1973
1982 fuse_copy_init(&cs, fc, 0, iov, nr_segs); 1974 if (!iter_is_iovec(from))
1975 return -EINVAL;
1976
1977 fuse_copy_init(&cs, fc, 0, from);
1983 1978
1984 return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs)); 1979 return fuse_dev_do_write(fc, &cs, iov_iter_count(from));
1985} 1980}
1986 1981
1987static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, 1982static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
@@ -2044,8 +2039,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2044 } 2039 }
2045 pipe_unlock(pipe); 2040 pipe_unlock(pipe);
2046 2041
2047 fuse_copy_init(&cs, fc, 0, NULL, nbuf); 2042 fuse_copy_init(&cs, fc, 0, NULL);
2048 cs.pipebufs = bufs; 2043 cs.pipebufs = bufs;
2044 cs.nr_segs = nbuf;
2049 cs.pipe = pipe; 2045 cs.pipe = pipe;
2050 2046
2051 if (flags & SPLICE_F_MOVE) 2047 if (flags & SPLICE_F_MOVE)
@@ -2233,11 +2229,9 @@ const struct file_operations fuse_dev_operations = {
2233 .owner = THIS_MODULE, 2229 .owner = THIS_MODULE,
2234 .open = fuse_dev_open, 2230 .open = fuse_dev_open,
2235 .llseek = no_llseek, 2231 .llseek = no_llseek,
2236 .read = do_sync_read, 2232 .read_iter = fuse_dev_read,
2237 .aio_read = fuse_dev_read,
2238 .splice_read = fuse_dev_splice_read, 2233 .splice_read = fuse_dev_splice_read,
2239 .write = do_sync_write, 2234 .write_iter = fuse_dev_write,
2240 .aio_write = fuse_dev_write,
2241 .splice_write = fuse_dev_splice_write, 2235 .splice_write = fuse_dev_splice_write,
2242 .poll = fuse_dev_poll, 2236 .poll = fuse_dev_poll,
2243 .release = fuse_dev_release, 2237 .release = fuse_dev_release,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3bdcfd8..5ef05b5c4cff 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/aio.h>
19#include <linux/falloc.h> 18#include <linux/falloc.h>
19#include <linux/uio.h>
20 20
21static const struct file_operations fuse_direct_io_file_operations; 21static const struct file_operations fuse_direct_io_file_operations;
22 22
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
528 } 528 }
529} 529}
530 530
531static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
532{
533 if (io->err)
534 return io->err;
535
536 if (io->bytes >= 0 && io->write)
537 return -EIO;
538
539 return io->bytes < 0 ? io->size : io->bytes;
540}
541
531/** 542/**
532 * In case of short read, the caller sets 'pos' to the position of 543 * In case of short read, the caller sets 'pos' to the position of
533 * actual end of fuse request in IO request. Otherwise, if bytes_requested 544 * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
546 */ 557 */
547static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 558static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
548{ 559{
560 bool is_sync = is_sync_kiocb(io->iocb);
549 int left; 561 int left;
550 562
551 spin_lock(&io->lock); 563 spin_lock(&io->lock);
@@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
555 io->bytes = pos; 567 io->bytes = pos;
556 568
557 left = --io->reqs; 569 left = --io->reqs;
570 if (!left && is_sync)
571 complete(io->done);
558 spin_unlock(&io->lock); 572 spin_unlock(&io->lock);
559 573
560 if (!left) { 574 if (!left && !is_sync) {
561 long res; 575 ssize_t res = fuse_get_res_by_io(io);
562 576
563 if (io->err) 577 if (res >= 0) {
564 res = io->err; 578 struct inode *inode = file_inode(io->iocb->ki_filp);
565 else if (io->bytes >= 0 && io->write) 579 struct fuse_conn *fc = get_fuse_conn(inode);
566 res = -EIO; 580 struct fuse_inode *fi = get_fuse_inode(inode);
567 else {
568 res = io->bytes < 0 ? io->size : io->bytes;
569
570 if (!is_sync_kiocb(io->iocb)) {
571 struct inode *inode = file_inode(io->iocb->ki_filp);
572 struct fuse_conn *fc = get_fuse_conn(inode);
573 struct fuse_inode *fi = get_fuse_inode(inode);
574 581
575 spin_lock(&fc->lock); 582 spin_lock(&fc->lock);
576 fi->attr_version = ++fc->attr_version; 583 fi->attr_version = ++fc->attr_version;
577 spin_unlock(&fc->lock); 584 spin_unlock(&fc->lock);
578 }
579 } 585 }
580 586
581 aio_complete(io->iocb, res, 0); 587 io->iocb->ki_complete(io->iocb, res, 0);
582 kfree(io); 588 kfree(io);
583 } 589 }
584} 590}
@@ -1139,13 +1145,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1139{ 1145{
1140 struct file *file = iocb->ki_filp; 1146 struct file *file = iocb->ki_filp;
1141 struct address_space *mapping = file->f_mapping; 1147 struct address_space *mapping = file->f_mapping;
1142 size_t count = iov_iter_count(from);
1143 ssize_t written = 0; 1148 ssize_t written = 0;
1144 ssize_t written_buffered = 0; 1149 ssize_t written_buffered = 0;
1145 struct inode *inode = mapping->host; 1150 struct inode *inode = mapping->host;
1146 ssize_t err; 1151 ssize_t err;
1147 loff_t endbyte = 0; 1152 loff_t endbyte = 0;
1148 loff_t pos = iocb->ki_pos;
1149 1153
1150 if (get_fuse_conn(inode)->writeback_cache) { 1154 if (get_fuse_conn(inode)->writeback_cache) {
1151 /* Update size (EOF optimization) and mode (SUID clearing) */ 1155 /* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1161,14 +1165,10 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1161 /* We can write back this queue in page reclaim */ 1165 /* We can write back this queue in page reclaim */
1162 current->backing_dev_info = inode_to_bdi(inode); 1166 current->backing_dev_info = inode_to_bdi(inode);
1163 1167
1164 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1168 err = generic_write_checks(iocb, from);
1165 if (err) 1169 if (err <= 0)
1166 goto out; 1170 goto out;
1167 1171
1168 if (count == 0)
1169 goto out;
1170
1171 iov_iter_truncate(from, count);
1172 err = file_remove_suid(file); 1172 err = file_remove_suid(file);
1173 if (err) 1173 if (err)
1174 goto out; 1174 goto out;
@@ -1177,7 +1177,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1177 if (err) 1177 if (err)
1178 goto out; 1178 goto out;
1179 1179
1180 if (file->f_flags & O_DIRECT) { 1180 if (iocb->ki_flags & IOCB_DIRECT) {
1181 loff_t pos = iocb->ki_pos;
1181 written = generic_file_direct_write(iocb, from, pos); 1182 written = generic_file_direct_write(iocb, from, pos);
1182 if (written < 0 || !iov_iter_count(from)) 1183 if (written < 0 || !iov_iter_count(from))
1183 goto out; 1184 goto out;
@@ -1203,9 +1204,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1203 written += written_buffered; 1204 written += written_buffered;
1204 iocb->ki_pos = pos + written_buffered; 1205 iocb->ki_pos = pos + written_buffered;
1205 } else { 1206 } else {
1206 written = fuse_perform_write(file, mapping, from, pos); 1207 written = fuse_perform_write(file, mapping, from, iocb->ki_pos);
1207 if (written >= 0) 1208 if (written >= 0)
1208 iocb->ki_pos = pos + written; 1209 iocb->ki_pos += written;
1209 } 1210 }
1210out: 1211out:
1211 current->backing_dev_info = NULL; 1212 current->backing_dev_info = NULL;
@@ -1395,55 +1396,30 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
1395 return res; 1396 return res;
1396} 1397}
1397 1398
1398static ssize_t fuse_direct_read(struct file *file, char __user *buf, 1399static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
1399 size_t count, loff_t *ppos)
1400{
1401 struct fuse_io_priv io = { .async = 0, .file = file };
1402 struct iovec iov = { .iov_base = buf, .iov_len = count };
1403 struct iov_iter ii;
1404 iov_iter_init(&ii, READ, &iov, 1, count);
1405 return __fuse_direct_read(&io, &ii, ppos);
1406}
1407
1408static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
1409 struct iov_iter *iter,
1410 loff_t *ppos)
1411{ 1400{
1412 struct file *file = io->file; 1401 struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp };
1413 struct inode *inode = file_inode(file); 1402 return __fuse_direct_read(&io, to, &iocb->ki_pos);
1414 size_t count = iov_iter_count(iter);
1415 ssize_t res;
1416
1417
1418 res = generic_write_checks(file, ppos, &count, 0);
1419 if (!res) {
1420 iov_iter_truncate(iter, count);
1421 res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
1422 }
1423
1424 fuse_invalidate_attr(inode);
1425
1426 return res;
1427} 1403}
1428 1404
1429static ssize_t fuse_direct_write(struct file *file, const char __user *buf, 1405static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
1430 size_t count, loff_t *ppos)
1431{ 1406{
1432 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 1407 struct file *file = iocb->ki_filp;
1433 struct inode *inode = file_inode(file); 1408 struct inode *inode = file_inode(file);
1434 ssize_t res;
1435 struct fuse_io_priv io = { .async = 0, .file = file }; 1409 struct fuse_io_priv io = { .async = 0, .file = file };
1436 struct iov_iter ii; 1410 ssize_t res;
1437 iov_iter_init(&ii, WRITE, &iov, 1, count);
1438 1411
1439 if (is_bad_inode(inode)) 1412 if (is_bad_inode(inode))
1440 return -EIO; 1413 return -EIO;
1441 1414
1442 /* Don't allow parallel writes to the same file */ 1415 /* Don't allow parallel writes to the same file */
1443 mutex_lock(&inode->i_mutex); 1416 mutex_lock(&inode->i_mutex);
1444 res = __fuse_direct_write(&io, &ii, ppos); 1417 res = generic_write_checks(iocb, from);
1445 if (res > 0) 1418 if (res > 0)
1446 fuse_write_update_size(inode, *ppos); 1419 res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
1420 fuse_invalidate_attr(inode);
1421 if (res > 0)
1422 fuse_write_update_size(inode, iocb->ki_pos);
1447 mutex_unlock(&inode->i_mutex); 1423 mutex_unlock(&inode->i_mutex);
1448 1424
1449 return res; 1425 return res;
@@ -2798,9 +2774,9 @@ static inline loff_t fuse_round_up(loff_t off)
2798} 2774}
2799 2775
2800static ssize_t 2776static ssize_t
2801fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 2777fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
2802 loff_t offset)
2803{ 2778{
2779 DECLARE_COMPLETION_ONSTACK(wait);
2804 ssize_t ret = 0; 2780 ssize_t ret = 0;
2805 struct file *file = iocb->ki_filp; 2781 struct file *file = iocb->ki_filp;
2806 struct fuse_file *ff = file->private_data; 2782 struct fuse_file *ff = file->private_data;
@@ -2815,15 +2791,15 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2815 inode = file->f_mapping->host; 2791 inode = file->f_mapping->host;
2816 i_size = i_size_read(inode); 2792 i_size = i_size_read(inode);
2817 2793
2818 if ((rw == READ) && (offset > i_size)) 2794 if ((iov_iter_rw(iter) == READ) && (offset > i_size))
2819 return 0; 2795 return 0;
2820 2796
2821 /* optimization for short read */ 2797 /* optimization for short read */
2822 if (async_dio && rw != WRITE && offset + count > i_size) { 2798 if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
2823 if (offset >= i_size) 2799 if (offset >= i_size)
2824 return 0; 2800 return 0;
2825 count = min_t(loff_t, count, fuse_round_up(i_size - offset)); 2801 iov_iter_truncate(iter, fuse_round_up(i_size - offset));
2826 iov_iter_truncate(iter, count); 2802 count = iov_iter_count(iter);
2827 } 2803 }
2828 2804
2829 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 2805 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2834,7 +2810,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2834 io->bytes = -1; 2810 io->bytes = -1;
2835 io->size = 0; 2811 io->size = 0;
2836 io->offset = offset; 2812 io->offset = offset;
2837 io->write = (rw == WRITE); 2813 io->write = (iov_iter_rw(iter) == WRITE);
2838 io->err = 0; 2814 io->err = 0;
2839 io->file = file; 2815 io->file = file;
2840 /* 2816 /*
@@ -2849,13 +2825,19 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2849 * to wait on real async I/O requests, so we must submit this request 2825 * to wait on real async I/O requests, so we must submit this request
2850 * synchronously. 2826 * synchronously.
2851 */ 2827 */
2852 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) 2828 if (!is_sync_kiocb(iocb) && (offset + count > i_size) &&
2829 iov_iter_rw(iter) == WRITE)
2853 io->async = false; 2830 io->async = false;
2854 2831
2855 if (rw == WRITE) 2832 if (io->async && is_sync_kiocb(iocb))
2856 ret = __fuse_direct_write(io, iter, &pos); 2833 io->done = &wait;
2857 else 2834
2835 if (iov_iter_rw(iter) == WRITE) {
2836 ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
2837 fuse_invalidate_attr(inode);
2838 } else {
2858 ret = __fuse_direct_read(io, iter, &pos); 2839 ret = __fuse_direct_read(io, iter, &pos);
2840 }
2859 2841
2860 if (io->async) { 2842 if (io->async) {
2861 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 2843 fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -2864,12 +2846,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
2864 if (!is_sync_kiocb(iocb)) 2846 if (!is_sync_kiocb(iocb))
2865 return -EIOCBQUEUED; 2847 return -EIOCBQUEUED;
2866 2848
2867 ret = wait_on_sync_kiocb(iocb); 2849 wait_for_completion(&wait);
2868 } else { 2850 ret = fuse_get_res_by_io(io);
2869 kfree(io);
2870 } 2851 }
2871 2852
2872 if (rw == WRITE) { 2853 kfree(io);
2854
2855 if (iov_iter_rw(iter) == WRITE) {
2873 if (ret > 0) 2856 if (ret > 0)
2874 fuse_write_update_size(inode, pos); 2857 fuse_write_update_size(inode, pos);
2875 else if (ret < 0 && offset + count > i_size) 2858 else if (ret < 0 && offset + count > i_size)
@@ -2957,9 +2940,7 @@ out:
2957 2940
2958static const struct file_operations fuse_file_operations = { 2941static const struct file_operations fuse_file_operations = {
2959 .llseek = fuse_file_llseek, 2942 .llseek = fuse_file_llseek,
2960 .read = new_sync_read,
2961 .read_iter = fuse_file_read_iter, 2943 .read_iter = fuse_file_read_iter,
2962 .write = new_sync_write,
2963 .write_iter = fuse_file_write_iter, 2944 .write_iter = fuse_file_write_iter,
2964 .mmap = fuse_file_mmap, 2945 .mmap = fuse_file_mmap,
2965 .open = fuse_open, 2946 .open = fuse_open,
@@ -2977,8 +2958,8 @@ static const struct file_operations fuse_file_operations = {
2977 2958
2978static const struct file_operations fuse_direct_io_file_operations = { 2959static const struct file_operations fuse_direct_io_file_operations = {
2979 .llseek = fuse_file_llseek, 2960 .llseek = fuse_file_llseek,
2980 .read = fuse_direct_read, 2961 .read_iter = fuse_direct_read_iter,
2981 .write = fuse_direct_write, 2962 .write_iter = fuse_direct_write_iter,
2982 .mmap = fuse_direct_mmap, 2963 .mmap = fuse_direct_mmap,
2983 .open = fuse_open, 2964 .open = fuse_open,
2984 .flush = fuse_flush, 2965 .flush = fuse_flush,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07c1376..7354dc142a50 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@ struct fuse_io_priv {
263 int err; 263 int err;
264 struct kiocb *iocb; 264 struct kiocb *iocb;
265 struct file *file; 265 struct file *file;
266 struct completion *done;
266}; 267};
267 268
268/** 269/**
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); 110 error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
111 if (error) 111 if (error)
112 goto out; 112 goto out;
113 113 set_cached_acl(inode, type, acl);
114 if (acl)
115 set_cached_acl(inode, type, acl);
116 else
117 forget_cached_acl(inode, type);
118out: 114out:
119 kfree(data); 115 kfree(data);
120 return error; 116 return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94edebe..5551fea0afd7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/gfs2_ondisk.h> 21#include <linux/gfs2_ondisk.h>
22#include <linux/backing-dev.h> 22#include <linux/backing-dev.h>
23#include <linux/aio.h> 23#include <linux/uio.h>
24#include <trace/events/writeback.h> 24#include <trace/events/writeback.h>
25 25
26#include "gfs2.h" 26#include "gfs2.h"
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
671 671
672 if (alloc_required) { 672 if (alloc_required) {
673 struct gfs2_alloc_parms ap = { .aflags = 0, }; 673 struct gfs2_alloc_parms ap = { .aflags = 0, };
674 error = gfs2_quota_lock_check(ip); 674 requested = data_blocks + ind_blocks;
675 ap.target = requested;
676 error = gfs2_quota_lock_check(ip, &ap);
675 if (error) 677 if (error)
676 goto out_unlock; 678 goto out_unlock;
677 679
678 requested = data_blocks + ind_blocks;
679 ap.target = requested;
680 error = gfs2_inplace_reserve(ip, &ap); 680 error = gfs2_inplace_reserve(ip, &ap);
681 if (error) 681 if (error)
682 goto out_qunlock; 682 goto out_qunlock;
@@ -1016,13 +1016,12 @@ out:
1016/** 1016/**
1017 * gfs2_ok_for_dio - check that dio is valid on this file 1017 * gfs2_ok_for_dio - check that dio is valid on this file
1018 * @ip: The inode 1018 * @ip: The inode
1019 * @rw: READ or WRITE
1020 * @offset: The offset at which we are reading or writing 1019 * @offset: The offset at which we are reading or writing
1021 * 1020 *
1022 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) 1021 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
1023 * 1 (to accept the i/o request) 1022 * 1 (to accept the i/o request)
1024 */ 1023 */
1025static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) 1024static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
1026{ 1025{
1027 /* 1026 /*
1028 * Should we return an error here? I can't see that O_DIRECT for 1027 * Should we return an error here? I can't see that O_DIRECT for
@@ -1039,8 +1038,8 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
1039 1038
1040 1039
1041 1040
1042static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, 1041static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
1043 struct iov_iter *iter, loff_t offset) 1042 loff_t offset)
1044{ 1043{
1045 struct file *file = iocb->ki_filp; 1044 struct file *file = iocb->ki_filp;
1046 struct inode *inode = file->f_mapping->host; 1045 struct inode *inode = file->f_mapping->host;
@@ -1061,7 +1060,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1061 rv = gfs2_glock_nq(&gh); 1060 rv = gfs2_glock_nq(&gh);
1062 if (rv) 1061 if (rv)
1063 return rv; 1062 return rv;
1064 rv = gfs2_ok_for_dio(ip, rw, offset); 1063 rv = gfs2_ok_for_dio(ip, offset);
1065 if (rv != 1) 1064 if (rv != 1)
1066 goto out; /* dio not valid, fall back to buffered i/o */ 1065 goto out; /* dio not valid, fall back to buffered i/o */
1067 1066
@@ -1091,13 +1090,12 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1091 rv = filemap_write_and_wait_range(mapping, lstart, end); 1090 rv = filemap_write_and_wait_range(mapping, lstart, end);
1092 if (rv) 1091 if (rv)
1093 goto out; 1092 goto out;
1094 if (rw == WRITE) 1093 if (iov_iter_rw(iter) == WRITE)
1095 truncate_inode_pages_range(mapping, lstart, end); 1094 truncate_inode_pages_range(mapping, lstart, end);
1096 } 1095 }
1097 1096
1098 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 1097 rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
1099 iter, offset, 1098 offset, gfs2_get_block_direct, NULL, NULL, 0);
1100 gfs2_get_block_direct, NULL, NULL, 0);
1101out: 1099out:
1102 gfs2_glock_dq(&gh); 1100 gfs2_glock_dq(&gh);
1103 gfs2_holder_uninit(&gh); 1101 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
1224 1224
1225 if (gfs2_is_stuffed(ip) && 1225 if (gfs2_is_stuffed(ip) &&
1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1226 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1227 error = gfs2_quota_lock_check(ip); 1227 error = gfs2_quota_lock_check(ip, &ap);
1228 if (error) 1228 if (error)
1229 return error; 1229 return error;
1230 1230
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8e2d7e..31892871ea87 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <linux/dlm.h> 26#include <linux/dlm.h>
27#include <linux/dlm_plock.h> 27#include <linux/dlm_plock.h>
28#include <linux/aio.h>
29#include <linux/delay.h> 28#include <linux/delay.h>
30 29
31#include "gfs2.h" 30#include "gfs2.h"
@@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
429 if (ret) 428 if (ret)
430 goto out_unlock; 429 goto out_unlock;
431 430
432 ret = gfs2_quota_lock_check(ip);
433 if (ret)
434 goto out_unlock;
435 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 431 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
436 ap.target = data_blocks + ind_blocks; 432 ap.target = data_blocks + ind_blocks;
433 ret = gfs2_quota_lock_check(ip, &ap);
434 if (ret)
435 goto out_unlock;
437 ret = gfs2_inplace_reserve(ip, &ap); 436 ret = gfs2_inplace_reserve(ip, &ap);
438 if (ret) 437 if (ret)
439 goto out_quota_unlock; 438 goto out_quota_unlock;
@@ -710,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
710 709
711 gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); 710 gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
712 711
713 if (file->f_flags & O_APPEND) { 712 if (iocb->ki_flags & IOCB_APPEND) {
714 struct gfs2_holder gh; 713 struct gfs2_holder gh;
715 714
716 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 715 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -765,22 +764,30 @@ out:
765 brelse(dibh); 764 brelse(dibh);
766 return error; 765 return error;
767} 766}
768 767/**
769static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, 768 * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
770 unsigned int *data_blocks, unsigned int *ind_blocks) 769 * blocks, determine how many bytes can be written.
770 * @ip: The inode in question.
771 * @len: Max cap of bytes. What we return in *len must be <= this.
772 * @data_blocks: Compute and return the number of data blocks needed
773 * @ind_blocks: Compute and return the number of indirect blocks needed
774 * @max_blocks: The total blocks available to work with.
775 *
776 * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
777 */
778static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
779 unsigned int *data_blocks, unsigned int *ind_blocks,
780 unsigned int max_blocks)
771{ 781{
782 loff_t max = *len;
772 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 783 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
773 unsigned int max_blocks = ip->i_rgd->rd_free_clone;
774 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); 784 unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
775 785
776 for (tmp = max_data; tmp > sdp->sd_diptrs;) { 786 for (tmp = max_data; tmp > sdp->sd_diptrs;) {
777 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); 787 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
778 max_data -= tmp; 788 max_data -= tmp;
779 } 789 }
780 /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, 790
781 so it might end up with fewer data blocks */
782 if (max_data <= *data_blocks)
783 return;
784 *data_blocks = max_data; 791 *data_blocks = max_data;
785 *ind_blocks = max_blocks - max_data; 792 *ind_blocks = max_blocks - max_data;
786 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; 793 *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
797 struct gfs2_inode *ip = GFS2_I(inode); 804 struct gfs2_inode *ip = GFS2_I(inode);
798 struct gfs2_alloc_parms ap = { .aflags = 0, }; 805 struct gfs2_alloc_parms ap = { .aflags = 0, };
799 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 806 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
800 loff_t bytes, max_bytes; 807 loff_t bytes, max_bytes, max_blks = UINT_MAX;
801 int error; 808 int error;
802 const loff_t pos = offset; 809 const loff_t pos = offset;
803 const loff_t count = len; 810 const loff_t count = len;
@@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
819 826
820 gfs2_size_hint(file, offset, len); 827 gfs2_size_hint(file, offset, len);
821 828
829 gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
830 ap.min_target = data_blocks + ind_blocks;
831
822 while (len > 0) { 832 while (len > 0) {
823 if (len < bytes) 833 if (len < bytes)
824 bytes = len; 834 bytes = len;
@@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
827 offset += bytes; 837 offset += bytes;
828 continue; 838 continue;
829 } 839 }
830 error = gfs2_quota_lock_check(ip); 840
841 /* We need to determine how many bytes we can actually
842 * fallocate without exceeding quota or going over the
843 * end of the fs. We start off optimistically by assuming
844 * we can write max_bytes */
845 max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
846
847 /* Since max_bytes is most likely a theoretical max, we
848 * calculate a more realistic 'bytes' to serve as a good
849 * starting point for the number of bytes we may be able
850 * to write */
851 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
852 ap.target = data_blocks + ind_blocks;
853
854 error = gfs2_quota_lock_check(ip, &ap);
831 if (error) 855 if (error)
832 return error; 856 return error;
833retry: 857 /* ap.allowed tells us how many blocks quota will allow
834 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 858 * us to write. Check if this reduces max_blks */
859 if (ap.allowed && ap.allowed < max_blks)
860 max_blks = ap.allowed;
835 861
836 ap.target = data_blocks + ind_blocks;
837 error = gfs2_inplace_reserve(ip, &ap); 862 error = gfs2_inplace_reserve(ip, &ap);
838 if (error) { 863 if (error)
839 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
840 bytes >>= 1;
841 bytes &= bsize_mask;
842 if (bytes == 0)
843 bytes = sdp->sd_sb.sb_bsize;
844 goto retry;
845 }
846 goto out_qunlock; 864 goto out_qunlock;
847 } 865
848 max_bytes = bytes; 866 /* check if the selected rgrp limits our max_blks further */
849 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 867 if (ap.allowed && ap.allowed < max_blks)
850 &max_bytes, &data_blocks, &ind_blocks); 868 max_blks = ap.allowed;
869
870 /* Almost done. Calculate bytes that can be written using
871 * max_blks. We also recompute max_bytes, data_blocks and
872 * ind_blocks */
873 calc_max_reserv(ip, &max_bytes, &data_blocks,
874 &ind_blocks, max_blks);
851 875
852 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 876 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
853 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); 877 RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -931,6 +955,22 @@ out_uninit:
931 return ret; 955 return ret;
932} 956}
933 957
958static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
959 struct file *out, loff_t *ppos,
960 size_t len, unsigned int flags)
961{
962 int error;
963 struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
964
965 error = gfs2_rs_alloc(ip);
966 if (error)
967 return (ssize_t)error;
968
969 gfs2_size_hint(out, *ppos, len);
970
971 return iter_file_splice_write(pipe, out, ppos, len, flags);
972}
973
934#ifdef CONFIG_GFS2_FS_LOCKING_DLM 974#ifdef CONFIG_GFS2_FS_LOCKING_DLM
935 975
936/** 976/**
@@ -1065,9 +1105,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1065 1105
1066const struct file_operations gfs2_file_fops = { 1106const struct file_operations gfs2_file_fops = {
1067 .llseek = gfs2_llseek, 1107 .llseek = gfs2_llseek,
1068 .read = new_sync_read,
1069 .read_iter = generic_file_read_iter, 1108 .read_iter = generic_file_read_iter,
1070 .write = new_sync_write,
1071 .write_iter = gfs2_file_write_iter, 1109 .write_iter = gfs2_file_write_iter,
1072 .unlocked_ioctl = gfs2_ioctl, 1110 .unlocked_ioctl = gfs2_ioctl,
1073 .mmap = gfs2_mmap, 1111 .mmap = gfs2_mmap,
@@ -1077,7 +1115,7 @@ const struct file_operations gfs2_file_fops = {
1077 .lock = gfs2_lock, 1115 .lock = gfs2_lock,
1078 .flock = gfs2_flock, 1116 .flock = gfs2_flock,
1079 .splice_read = generic_file_splice_read, 1117 .splice_read = generic_file_splice_read,
1080 .splice_write = iter_file_splice_write, 1118 .splice_write = gfs2_file_splice_write,
1081 .setlease = simple_nosetlease, 1119 .setlease = simple_nosetlease,
1082 .fallocate = gfs2_fallocate, 1120 .fallocate = gfs2_fallocate,
1083}; 1121};
@@ -1097,9 +1135,7 @@ const struct file_operations gfs2_dir_fops = {
1097 1135
1098const struct file_operations gfs2_file_fops_nolock = { 1136const struct file_operations gfs2_file_fops_nolock = {
1099 .llseek = gfs2_llseek, 1137 .llseek = gfs2_llseek,
1100 .read = new_sync_read,
1101 .read_iter = generic_file_read_iter, 1138 .read_iter = generic_file_read_iter,
1102 .write = new_sync_write,
1103 .write_iter = gfs2_file_write_iter, 1139 .write_iter = gfs2_file_write_iter,
1104 .unlocked_ioctl = gfs2_ioctl, 1140 .unlocked_ioctl = gfs2_ioctl,
1105 .mmap = gfs2_mmap, 1141 .mmap = gfs2_mmap,
@@ -1107,7 +1143,7 @@ const struct file_operations gfs2_file_fops_nolock = {
1107 .release = gfs2_release, 1143 .release = gfs2_release,
1108 .fsync = gfs2_fsync, 1144 .fsync = gfs2_fsync,
1109 .splice_read = generic_file_splice_read, 1145 .splice_read = generic_file_splice_read,
1110 .splice_write = iter_file_splice_write, 1146 .splice_write = gfs2_file_splice_write,
1111 .setlease = generic_setlease, 1147 .setlease = generic_setlease,
1112 .fallocate = gfs2_fallocate, 1148 .fallocate = gfs2_fallocate,
1113}; 1149};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
2047 2047
2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) 2048int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2049{ 2049{
2050 sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); 2050 struct dentry *dent;
2051 if (!sdp->debugfs_dir) 2051
2052 return -ENOMEM; 2052 dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2053 sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", 2053 if (IS_ERR_OR_NULL(dent))
2054 S_IFREG | S_IRUGO, 2054 goto fail;
2055 sdp->debugfs_dir, sdp, 2055 sdp->debugfs_dir = dent;
2056 &gfs2_glocks_fops); 2056
2057 if (!sdp->debugfs_dentry_glocks) 2057 dent = debugfs_create_file("glocks",
2058 S_IFREG | S_IRUGO,
2059 sdp->debugfs_dir, sdp,
2060 &gfs2_glocks_fops);
2061 if (IS_ERR_OR_NULL(dent))
2058 goto fail; 2062 goto fail;
2063 sdp->debugfs_dentry_glocks = dent;
2059 2064
2060 sdp->debugfs_dentry_glstats = debugfs_create_file("glstats", 2065 dent = debugfs_create_file("glstats",
2061 S_IFREG | S_IRUGO, 2066 S_IFREG | S_IRUGO,
2062 sdp->debugfs_dir, sdp, 2067 sdp->debugfs_dir, sdp,
2063 &gfs2_glstats_fops); 2068 &gfs2_glstats_fops);
2064 if (!sdp->debugfs_dentry_glstats) 2069 if (IS_ERR_OR_NULL(dent))
2065 goto fail; 2070 goto fail;
2071 sdp->debugfs_dentry_glstats = dent;
2066 2072
2067 sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats", 2073 dent = debugfs_create_file("sbstats",
2068 S_IFREG | S_IRUGO, 2074 S_IFREG | S_IRUGO,
2069 sdp->debugfs_dir, sdp, 2075 sdp->debugfs_dir, sdp,
2070 &gfs2_sbstats_fops); 2076 &gfs2_sbstats_fops);
2071 if (!sdp->debugfs_dentry_sbstats) 2077 if (IS_ERR_OR_NULL(dent))
2072 goto fail; 2078 goto fail;
2079 sdp->debugfs_dentry_sbstats = dent;
2073 2080
2074 return 0; 2081 return 0;
2075fail: 2082fail:
2076 gfs2_delete_debugfs_file(sdp); 2083 gfs2_delete_debugfs_file(sdp);
2077 return -ENOMEM; 2084 return dent ? PTR_ERR(dent) : -ENOMEM;
2078} 2085}
2079 2086
2080void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) 2087void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2100int gfs2_register_debugfs(void) 2107int gfs2_register_debugfs(void)
2101{ 2108{
2102 gfs2_root = debugfs_create_dir("gfs2", NULL); 2109 gfs2_root = debugfs_create_dir("gfs2", NULL);
2110 if (IS_ERR(gfs2_root))
2111 return PTR_ERR(gfs2_root);
2103 return gfs2_root ? 0 : -ENOMEM; 2112 return gfs2_root ? 0 : -ENOMEM;
2104} 2113}
2105 2114
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
301 * to the allocation code. 301 * to the allocation code.
302 */ 302 */
303struct gfs2_alloc_parms { 303struct gfs2_alloc_parms {
304 u32 target; 304 u64 target;
305 u32 min_target;
305 u32 aflags; 306 u32 aflags;
307 u64 allowed;
306}; 308};
307 309
308enum { 310enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; 382 struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
383 int error; 383 int error;
384 384
385 error = gfs2_quota_lock_check(ip); 385 error = gfs2_quota_lock_check(ip, &ap);
386 if (error) 386 if (error)
387 goto out; 387 goto out;
388 388
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
525 int error; 525 int error;
526 526
527 if (da->nr_blocks) { 527 if (da->nr_blocks) {
528 error = gfs2_quota_lock_check(dip); 528 error = gfs2_quota_lock_check(dip, &ap);
529 if (error) 529 if (error)
530 goto fail_quota_locks; 530 goto fail_quota_locks;
531 531
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
953 953
954 if (da.nr_blocks) { 954 if (da.nr_blocks) {
955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 955 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
956 error = gfs2_quota_lock_check(dip); 956 error = gfs2_quota_lock_check(dip, &ap);
957 if (error) 957 if (error)
958 goto out_gunlock; 958 goto out_gunlock;
959 959
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1470 1470
1471 if (da.nr_blocks) { 1471 if (da.nr_blocks) {
1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; 1472 struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
1473 error = gfs2_quota_lock_check(ndip); 1473 error = gfs2_quota_lock_check(ndip, &ap);
1474 if (error) 1474 if (error)
1475 goto out_gunlock; 1475 goto out_gunlock;
1476 1476
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1669 kuid_t ouid, nuid; 1669 kuid_t ouid, nuid;
1670 kgid_t ogid, ngid; 1670 kgid_t ogid, ngid;
1671 int error; 1671 int error;
1672 struct gfs2_alloc_parms ap;
1672 1673
1673 ouid = inode->i_uid; 1674 ouid = inode->i_uid;
1674 ogid = inode->i_gid; 1675 ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1696 if (error) 1697 if (error)
1697 goto out; 1698 goto out;
1698 1699
1700 ap.target = gfs2_get_inode_blocks(&ip->i_inode);
1701
1699 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1702 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1700 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1703 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1701 error = gfs2_quota_check(ip, nuid, ngid); 1704 error = gfs2_quota_check(ip, nuid, ngid, &ap);
1702 if (error) 1705 if (error)
1703 goto out_gunlock_q; 1706 goto out_gunlock_q;
1704 } 1707 }
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1713 1716
1714 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || 1717 if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
1715 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { 1718 !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
1716 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1719 gfs2_quota_change(ip, -ap.target, ouid, ogid);
1717 gfs2_quota_change(ip, -blocks, ouid, ogid); 1720 gfs2_quota_change(ip, ap.target, nuid, ngid);
1718 gfs2_quota_change(ip, blocks, nuid, ngid);
1719 } 1721 }
1720 1722
1721out_end_trans: 1723out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..e3065cb9ab08 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
923 if (error) 923 if (error)
924 return error; 924 return error;
925 925
926 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
927 force_refresh = FORCE;
928
926 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; 929 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
927 930
928 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 931 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
974 sizeof(struct gfs2_quota_data *), sort_qd, NULL); 977 sizeof(struct gfs2_quota_data *), sort_qd, NULL);
975 978
976 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { 979 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
977 int force = NO_FORCE;
978 qd = ip->i_res->rs_qa_qd[x]; 980 qd = ip->i_res->rs_qa_qd[x];
979 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 981 error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
980 force = FORCE;
981 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
982 if (error) 982 if (error)
983 break; 983 break;
984 } 984 }
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1094 return 0; 1094 return 0;
1095} 1095}
1096 1096
1097int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) 1097/**
1098 * gfs2_quota_check - check if allocating new blocks will exceed quota
1099 * @ip: The inode for which this check is being performed
1100 * @uid: The uid to check against
1101 * @gid: The gid to check against
1102 * @ap: The allocation parameters. ap->target contains the requested
1103 * blocks. ap->min_target, if set, contains the minimum blks
1104 * requested.
1105 *
1106 * Returns: 0 on success.
1107 * min_req = ap->min_target ? ap->min_target : ap->target;
1108 * quota must allow atleast min_req blks for success and
1109 * ap->allowed is set to the number of blocks allowed
1110 *
1111 * -EDQUOT otherwise, quota violation. ap->allowed is set to number
1112 * of blocks available.
1113 */
1114int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
1115 struct gfs2_alloc_parms *ap)
1098{ 1116{
1099 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1117 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1100 struct gfs2_quota_data *qd; 1118 struct gfs2_quota_data *qd;
1101 s64 value; 1119 s64 value, warn, limit;
1102 unsigned int x; 1120 unsigned int x;
1103 int error = 0; 1121 int error = 0;
1104 1122
1123 ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
1105 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) 1124 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
1106 return 0; 1125 return 0;
1107 1126
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
1115 qid_eq(qd->qd_id, make_kqid_gid(gid)))) 1134 qid_eq(qd->qd_id, make_kqid_gid(gid))))
1116 continue; 1135 continue;
1117 1136
1137 warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
1138 limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
1118 value = (s64)be64_to_cpu(qd->qd_qb.qb_value); 1139 value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
1119 spin_lock(&qd_lock); 1140 spin_lock(&qd_lock);
1120 value += qd->qd_change; 1141 value += qd->qd_change;
1121 spin_unlock(&qd_lock); 1142 spin_unlock(&qd_lock);
1122 1143
1123 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1144 if (limit > 0 && (limit - value) < ap->allowed)
1124 print_message(qd, "exceeded"); 1145 ap->allowed = limit - value;
1125 quota_send_warning(qd->qd_id, 1146 /* If we can't meet the target */
1126 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); 1147 if (limit && limit < (value + (s64)ap->target)) {
1127 1148 /* If no min_target specified or we don't meet
1128 error = -EDQUOT; 1149 * min_target, return -EDQUOT */
1129 break; 1150 if (!ap->min_target || ap->min_target > ap->allowed) {
1130 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1151 print_message(qd, "exceeded");
1131 (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value && 1152 quota_send_warning(qd->qd_id,
1153 sdp->sd_vfs->s_dev,
1154 QUOTA_NL_BHARDWARN);
1155 error = -EDQUOT;
1156 break;
1157 }
1158 } else if (warn && warn < value &&
1132 time_after_eq(jiffies, qd->qd_last_warn + 1159 time_after_eq(jiffies, qd->qd_last_warn +
1133 gfs2_tune_get(sdp, 1160 gfs2_tune_get(sdp, gt_quota_warn_period)
1134 gt_quota_warn_period) * HZ)) { 1161 * HZ)) {
1135 quota_send_warning(qd->qd_id, 1162 quota_send_warning(qd->qd_id,
1136 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); 1163 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1137 error = print_message(qd, "warning"); 1164 error = print_message(qd, "warning");
1138 qd->qd_last_warn = jiffies; 1165 qd->qd_last_warn = jiffies;
1139 } 1166 }
1140 } 1167 }
1141
1142 return error; 1168 return error;
1143} 1169}
1144 1170
@@ -1468,32 +1494,34 @@ int gfs2_quotad(void *data)
1468 return 0; 1494 return 0;
1469} 1495}
1470 1496
1471static int gfs2_quota_get_xstate(struct super_block *sb, 1497static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
1472 struct fs_quota_stat *fqs)
1473{ 1498{
1474 struct gfs2_sbd *sdp = sb->s_fs_info; 1499 struct gfs2_sbd *sdp = sb->s_fs_info;
1475 1500
1476 memset(fqs, 0, sizeof(struct fs_quota_stat)); 1501 memset(state, 0, sizeof(*state));
1477 fqs->qs_version = FS_QSTAT_VERSION;
1478 1502
1479 switch (sdp->sd_args.ar_quota) { 1503 switch (sdp->sd_args.ar_quota) {
1480 case GFS2_QUOTA_ON: 1504 case GFS2_QUOTA_ON:
1481 fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD); 1505 state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
1506 state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
1482 /*FALLTHRU*/ 1507 /*FALLTHRU*/
1483 case GFS2_QUOTA_ACCOUNT: 1508 case GFS2_QUOTA_ACCOUNT:
1484 fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT); 1509 state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED |
1510 QCI_SYSFILE;
1511 state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED |
1512 QCI_SYSFILE;
1485 break; 1513 break;
1486 case GFS2_QUOTA_OFF: 1514 case GFS2_QUOTA_OFF:
1487 break; 1515 break;
1488 } 1516 }
1489
1490 if (sdp->sd_quota_inode) { 1517 if (sdp->sd_quota_inode) {
1491 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; 1518 state->s_state[USRQUOTA].ino =
1492 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; 1519 GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1520 state->s_state[USRQUOTA].blocks = sdp->sd_quota_inode->i_blocks;
1493 } 1521 }
1494 fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ 1522 state->s_state[USRQUOTA].nextents = 1; /* unsupported */
1495 fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ 1523 state->s_state[GRPQUOTA] = state->s_state[USRQUOTA];
1496 fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru); 1524 state->s_incoredqs = list_lru_count(&gfs2_qd_lru);
1497 return 0; 1525 return 0;
1498} 1526}
1499 1527
@@ -1638,7 +1666,7 @@ out_put:
1638 1666
1639const struct quotactl_ops gfs2_quotactl_ops = { 1667const struct quotactl_ops gfs2_quotactl_ops = {
1640 .quota_sync = gfs2_quota_sync, 1668 .quota_sync = gfs2_quota_sync,
1641 .get_xstate = gfs2_quota_get_xstate, 1669 .get_state = gfs2_quota_get_state,
1642 .get_dqblk = gfs2_get_dqblk, 1670 .get_dqblk = gfs2_get_dqblk,
1643 .set_dqblk = gfs2_set_dqblk, 1671 .set_dqblk = gfs2_set_dqblk,
1644}; 1672};
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 24extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
25extern void gfs2_quota_unlock(struct gfs2_inode *ip); 25extern void gfs2_quota_unlock(struct gfs2_inode *ip);
26 26
27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); 27extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
28 struct gfs2_alloc_parms *ap);
28extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 29extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
29 kuid_t uid, kgid_t gid); 30 kuid_t uid, kgid_t gid);
30 31
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
37 38
38extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); 39extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
39 40
40static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) 41static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
42 struct gfs2_alloc_parms *ap)
41{ 43{
42 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 44 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
43 int ret; 45 int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
48 return ret; 50 return ret;
49 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 51 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
50 return 0; 52 return 0;
51 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); 53 ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
52 if (ret) 54 if (ret)
53 gfs2_quota_unlock(ip); 55 gfs2_quota_unlock(ip);
54 return ret; 56 return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
1946 * @ip: the inode to reserve space for 1946 * @ip: the inode to reserve space for
1947 * @ap: the allocation parameters 1947 * @ap: the allocation parameters
1948 * 1948 *
1949 * Returns: errno 1949 * We try our best to find an rgrp that has at least ap->target blocks
1950 * available. After a couple of passes (loops == 2), the prospects of finding
1951 * such an rgrp diminish. At this stage, we return the first rgrp that has
1952 * atleast ap->min_target blocks available. Either way, we set ap->allowed to
1953 * the number of blocks available in the chosen rgrp.
1954 *
1955 * Returns: 0 on success,
1956 * -ENOMEM if a suitable rgrp can't be found
1957 * errno otherwise
1950 */ 1958 */
1951 1959
1952int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) 1960int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
1953{ 1961{
1954 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1962 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1955 struct gfs2_rgrpd *begin = NULL; 1963 struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2012 /* Skip unuseable resource groups */ 2020 /* Skip unuseable resource groups */
2013 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | 2021 if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
2014 GFS2_RDF_ERROR)) || 2022 GFS2_RDF_ERROR)) ||
2015 (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) 2023 (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
2016 goto skip_rgrp; 2024 goto skip_rgrp;
2017 2025
2018 if (sdp->sd_args.ar_rgrplvb) 2026 if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
2027 goto check_rgrp; 2035 goto check_rgrp;
2028 2036
2029 /* If rgrp has enough free space, use it */ 2037 /* If rgrp has enough free space, use it */
2030 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { 2038 if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
2039 (loops == 2 && ap->min_target &&
2040 rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
2031 ip->i_rgd = rs->rs_rbm.rgd; 2041 ip->i_rgd = rs->rs_rbm.rgd;
2042 ap->allowed = ip->i_rgd->rd_free_clone;
2032 return 0; 2043 return 0;
2033 } 2044 }
2034
2035check_rgrp: 2045check_rgrp:
2036 /* Check for unlinked inodes which can be reclaimed */ 2046 /* Check for unlinked inodes which can be reclaimed */
2037 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) 2047 if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 41extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
42 42
43#define GFS2_AF_ORLOV 1 43#define GFS2_AF_ORLOV 1
44extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); 44extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
45 struct gfs2_alloc_parms *ap);
45extern void gfs2_inplace_release(struct gfs2_inode *ip); 46extern void gfs2_inplace_release(struct gfs2_inode *ip);
46 47
47extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 48extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
732 if (error) 732 if (error)
733 return error; 733 return error;
734 734
735 error = gfs2_quota_lock_check(ip); 735 error = gfs2_quota_lock_check(ip, &ap);
736 if (error) 736 if (error)
737 return error; 737 return error;
738 738
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 145566851e7a..36d1a6ae7655 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -197,7 +197,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
197 197
198 inode = hfs_new_inode(dir, &dentry->d_name, mode); 198 inode = hfs_new_inode(dir, &dentry->d_name, mode);
199 if (!inode) 199 if (!inode)
200 return -ENOSPC; 200 return -ENOMEM;
201 201
202 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 202 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
203 if (res) { 203 if (res) {
@@ -226,7 +226,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
226 226
227 inode = hfs_new_inode(dir, &dentry->d_name, S_IFDIR | mode); 227 inode = hfs_new_inode(dir, &dentry->d_name, S_IFDIR | mode);
228 if (!inode) 228 if (!inode)
229 return -ENOSPC; 229 return -ENOMEM;
230 230
231 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 231 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
232 if (res) { 232 if (res) {
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc81782..75fd5d873c19 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfs_fs.h" 19#include "hfs_fs.h"
20#include "btree.h" 20#include "btree.h"
@@ -124,8 +124,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
124 return res ? try_to_free_buffers(page) : 0; 124 return res ? try_to_free_buffers(page) : 0;
125} 125}
126 126
127static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, 127static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
128 struct iov_iter *iter, loff_t offset) 128 loff_t offset)
129{ 129{
130 struct file *file = iocb->ki_filp; 130 struct file *file = iocb->ki_filp;
131 struct address_space *mapping = file->f_mapping; 131 struct address_space *mapping = file->f_mapping;
@@ -133,13 +133,13 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
133 size_t count = iov_iter_count(iter); 133 size_t count = iov_iter_count(iter);
134 ssize_t ret; 134 ssize_t ret;
135 135
136 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); 136 ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block);
137 137
138 /* 138 /*
139 * In case of error extending write may have instantiated a few 139 * In case of error extending write may have instantiated a few
140 * blocks outside i_size. Trim these off again. 140 * blocks outside i_size. Trim these off again.
141 */ 141 */
142 if (unlikely((rw & WRITE) && ret < 0)) { 142 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
143 loff_t isize = i_size_read(inode); 143 loff_t isize = i_size_read(inode);
144 loff_t end = offset + count; 144 loff_t end = offset + count;
145 145
@@ -674,9 +674,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
674 674
675static const struct file_operations hfs_file_operations = { 675static const struct file_operations hfs_file_operations = {
676 .llseek = generic_file_llseek, 676 .llseek = generic_file_llseek,
677 .read = new_sync_read,
678 .read_iter = generic_file_read_iter, 677 .read_iter = generic_file_read_iter,
679 .write = new_sync_write,
680 .write_iter = generic_file_write_iter, 678 .write_iter = generic_file_write_iter,
681 .mmap = generic_file_mmap, 679 .mmap = generic_file_mmap,
682 .splice_read = generic_file_splice_read, 680 .splice_read = generic_file_splice_read,
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index c1422d91cd36..528e38b5af7f 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -118,9 +118,7 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd,
118 int b, e; 118 int b, e;
119 int res; 119 int res;
120 120
121 if (!rec_found) 121 BUG_ON(!rec_found);
122 BUG();
123
124 b = 0; 122 b = 0;
125 e = bnode->num_recs - 1; 123 e = bnode->num_recs - 1;
126 res = -ENOENT; 124 res = -ENOENT;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 7892e6fddb66..022974ab6e3c 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -350,10 +350,11 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
350 &fd.search_key->cat.name.unicode, 350 &fd.search_key->cat.name.unicode,
351 off + 2, len); 351 off + 2, len);
352 fd.search_key->key_len = cpu_to_be16(6 + len); 352 fd.search_key->key_len = cpu_to_be16(6 + len);
353 } else 353 } else {
354 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); 354 err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
355 if (unlikely(err)) 355 if (unlikely(err))
356 goto out; 356 goto out;
357 }
357 358
358 err = hfs_brec_find(&fd, hfs_find_rec_by_key); 359 err = hfs_brec_find(&fd, hfs_find_rec_by_key);
359 if (err) 360 if (err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f0235c1640af..3074609befc3 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -434,7 +434,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
434{ 434{
435 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); 435 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
436 struct inode *inode; 436 struct inode *inode;
437 int res = -ENOSPC; 437 int res = -ENOMEM;
438 438
439 mutex_lock(&sbi->vh_mutex); 439 mutex_lock(&sbi->vh_mutex);
440 inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); 440 inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO);
@@ -476,7 +476,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
476{ 476{
477 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); 477 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
478 struct inode *inode; 478 struct inode *inode;
479 int res = -ENOSPC; 479 int res = -ENOMEM;
480 480
481 mutex_lock(&sbi->vh_mutex); 481 mutex_lock(&sbi->vh_mutex);
482 inode = hfsplus_new_inode(dir->i_sb, mode); 482 inode = hfsplus_new_inode(dir->i_sb, mode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f2d046..b0afedbef12b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/aio.h> 17#include <linux/uio.h>
18 18
19#include "hfsplus_fs.h" 19#include "hfsplus_fs.h"
20#include "hfsplus_raw.h" 20#include "hfsplus_raw.h"
@@ -122,8 +122,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
122 return res ? try_to_free_buffers(page) : 0; 122 return res ? try_to_free_buffers(page) : 0;
123} 123}
124 124
125static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, 125static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
126 struct iov_iter *iter, loff_t offset) 126 loff_t offset)
127{ 127{
128 struct file *file = iocb->ki_filp; 128 struct file *file = iocb->ki_filp;
129 struct address_space *mapping = file->f_mapping; 129 struct address_space *mapping = file->f_mapping;
@@ -131,14 +131,13 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
131 size_t count = iov_iter_count(iter); 131 size_t count = iov_iter_count(iter);
132 ssize_t ret; 132 ssize_t ret;
133 133
134 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 134 ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block);
135 hfsplus_get_block);
136 135
137 /* 136 /*
138 * In case of error extending write may have instantiated a few 137 * In case of error extending write may have instantiated a few
139 * blocks outside i_size. Trim these off again. 138 * blocks outside i_size. Trim these off again.
140 */ 139 */
141 if (unlikely((rw & WRITE) && ret < 0)) { 140 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
142 loff_t isize = i_size_read(inode); 141 loff_t isize = i_size_read(inode);
143 loff_t end = offset + count; 142 loff_t end = offset + count;
144 143
@@ -254,6 +253,12 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
254 if ((attr->ia_valid & ATTR_SIZE) && 253 if ((attr->ia_valid & ATTR_SIZE) &&
255 attr->ia_size != i_size_read(inode)) { 254 attr->ia_size != i_size_read(inode)) {
256 inode_dio_wait(inode); 255 inode_dio_wait(inode);
256 if (attr->ia_size > inode->i_size) {
257 error = generic_cont_expand_simple(inode,
258 attr->ia_size);
259 if (error)
260 return error;
261 }
257 truncate_setsize(inode, attr->ia_size); 262 truncate_setsize(inode, attr->ia_size);
258 hfsplus_file_truncate(inode); 263 hfsplus_file_truncate(inode);
259 } 264 }
@@ -341,9 +346,7 @@ static const struct inode_operations hfsplus_file_inode_operations = {
341 346
342static const struct file_operations hfsplus_file_operations = { 347static const struct file_operations hfsplus_file_operations = {
343 .llseek = generic_file_llseek, 348 .llseek = generic_file_llseek,
344 .read = new_sync_read,
345 .read_iter = generic_file_read_iter, 349 .read_iter = generic_file_read_iter,
346 .write = new_sync_write,
347 .write_iter = generic_file_write_iter, 350 .write_iter = generic_file_write_iter,
348 .mmap = generic_file_mmap, 351 .mmap = generic_file_mmap,
349 .splice_read = generic_file_splice_read, 352 .splice_read = generic_file_splice_read,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index d3ff5cc317d7..8e98f5db6ad6 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -76,7 +76,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
76{ 76{
77 struct inode *inode = file_inode(file); 77 struct inode *inode = file_inode(file);
78 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 78 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
79 unsigned int flags; 79 unsigned int flags, new_fl = 0;
80 int err = 0; 80 int err = 0;
81 81
82 err = mnt_want_write_file(file); 82 err = mnt_want_write_file(file);
@@ -110,14 +110,12 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
110 } 110 }
111 111
112 if (flags & FS_IMMUTABLE_FL) 112 if (flags & FS_IMMUTABLE_FL)
113 inode->i_flags |= S_IMMUTABLE; 113 new_fl |= S_IMMUTABLE;
114 else
115 inode->i_flags &= ~S_IMMUTABLE;
116 114
117 if (flags & FS_APPEND_FL) 115 if (flags & FS_APPEND_FL)
118 inode->i_flags |= S_APPEND; 116 new_fl |= S_APPEND;
119 else 117
120 inode->i_flags &= ~S_APPEND; 118 inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND);
121 119
122 if (flags & FS_NODUMP_FL) 120 if (flags & FS_NODUMP_FL)
123 hip->userflags |= HFSPLUS_FLG_NODUMP; 121 hip->userflags |= HFSPLUS_FLG_NODUMP;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index d98094a9f476..89f262d8fcd8 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -44,7 +44,7 @@ static int strcmp_xattr_acl(const char *name)
44 return -1; 44 return -1;
45} 45}
46 46
47static inline int is_known_namespace(const char *name) 47static bool is_known_namespace(const char *name)
48{ 48{
49 if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && 49 if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
50 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && 50 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
@@ -424,6 +424,28 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len)
424 return len; 424 return len;
425} 425}
426 426
427int hfsplus_setxattr(struct dentry *dentry, const char *name,
428 const void *value, size_t size, int flags,
429 const char *prefix, size_t prefixlen)
430{
431 char *xattr_name;
432 int res;
433
434 if (!strcmp(name, ""))
435 return -EINVAL;
436
437 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
438 GFP_KERNEL);
439 if (!xattr_name)
440 return -ENOMEM;
441 strcpy(xattr_name, prefix);
442 strcpy(xattr_name + prefixlen, name);
443 res = __hfsplus_setxattr(dentry->d_inode, xattr_name, value, size,
444 flags);
445 kfree(xattr_name);
446 return res;
447}
448
427static ssize_t hfsplus_getxattr_finder_info(struct inode *inode, 449static ssize_t hfsplus_getxattr_finder_info(struct inode *inode,
428 void *value, size_t size) 450 void *value, size_t size)
429{ 451{
@@ -560,6 +582,30 @@ failed_getxattr_init:
560 return res; 582 return res;
561} 583}
562 584
585ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
586 void *value, size_t size,
587 const char *prefix, size_t prefixlen)
588{
589 int res;
590 char *xattr_name;
591
592 if (!strcmp(name, ""))
593 return -EINVAL;
594
595 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
596 GFP_KERNEL);
597 if (!xattr_name)
598 return -ENOMEM;
599
600 strcpy(xattr_name, prefix);
601 strcpy(xattr_name + prefixlen, name);
602
603 res = __hfsplus_getxattr(dentry->d_inode, xattr_name, value, size);
604 kfree(xattr_name);
605 return res;
606
607}
608
563static inline int can_list(const char *xattr_name) 609static inline int can_list(const char *xattr_name)
564{ 610{
565 if (!xattr_name) 611 if (!xattr_name)
@@ -806,9 +852,6 @@ end_removexattr:
806static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name, 852static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
807 void *buffer, size_t size, int type) 853 void *buffer, size_t size, int type)
808{ 854{
809 char *xattr_name;
810 int res;
811
812 if (!strcmp(name, "")) 855 if (!strcmp(name, ""))
813 return -EINVAL; 856 return -EINVAL;
814 857
@@ -818,24 +861,19 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
818 */ 861 */
819 if (is_known_namespace(name)) 862 if (is_known_namespace(name))
820 return -EOPNOTSUPP; 863 return -EOPNOTSUPP;
821 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
822 + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
823 if (!xattr_name)
824 return -ENOMEM;
825 strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
826 strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
827 864
828 res = hfsplus_getxattr(dentry, xattr_name, buffer, size); 865 /*
829 kfree(xattr_name); 866 * osx is the namespace we use to indicate an unprefixed
830 return res; 867 * attribute on the filesystem (like the ones that OS X
868 * creates), so we pass the name through unmodified (after
869 * ensuring it doesn't conflict with another namespace).
870 */
871 return __hfsplus_getxattr(dentry->d_inode, name, buffer, size);
831} 872}
832 873
833static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name, 874static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
834 const void *buffer, size_t size, int flags, int type) 875 const void *buffer, size_t size, int flags, int type)
835{ 876{
836 char *xattr_name;
837 int res;
838
839 if (!strcmp(name, "")) 877 if (!strcmp(name, ""))
840 return -EINVAL; 878 return -EINVAL;
841 879
@@ -845,16 +883,14 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
845 */ 883 */
846 if (is_known_namespace(name)) 884 if (is_known_namespace(name))
847 return -EOPNOTSUPP; 885 return -EOPNOTSUPP;
848 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
849 + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
850 if (!xattr_name)
851 return -ENOMEM;
852 strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
853 strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
854 886
855 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags); 887 /*
856 kfree(xattr_name); 888 * osx is the namespace we use to indicate an unprefixed
857 return res; 889 * attribute on the filesystem (like the ones that OS X
890 * creates), so we pass the name through unmodified (after
891 * ensuring it doesn't conflict with another namespace).
892 */
893 return __hfsplus_setxattr(dentry->d_inode, name, buffer, size, flags);
858} 894}
859 895
860static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list, 896static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
index 288530cf80b5..f9b0955b3d28 100644
--- a/fs/hfsplus/xattr.h
+++ b/fs/hfsplus/xattr.h
@@ -21,22 +21,16 @@ extern const struct xattr_handler *hfsplus_xattr_handlers[];
21int __hfsplus_setxattr(struct inode *inode, const char *name, 21int __hfsplus_setxattr(struct inode *inode, const char *name,
22 const void *value, size_t size, int flags); 22 const void *value, size_t size, int flags);
23 23
24static inline int hfsplus_setxattr(struct dentry *dentry, const char *name, 24int hfsplus_setxattr(struct dentry *dentry, const char *name,
25 const void *value, size_t size, int flags) 25 const void *value, size_t size, int flags,
26{ 26 const char *prefix, size_t prefixlen);
27 return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
28}
29 27
30ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, 28ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
31 void *value, size_t size); 29 void *value, size_t size);
32 30
33static inline ssize_t hfsplus_getxattr(struct dentry *dentry, 31ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
34 const char *name, 32 void *value, size_t size,
35 void *value, 33 const char *prefix, size_t prefixlen);
36 size_t size)
37{
38 return __hfsplus_getxattr(dentry->d_inode, name, value, size);
39}
40 34
41ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size); 35ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
42 36
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index 6ec5e107691f..aacff00a9ff9 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -16,43 +16,17 @@
16static int hfsplus_security_getxattr(struct dentry *dentry, const char *name, 16static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
17 void *buffer, size_t size, int type) 17 void *buffer, size_t size, int type)
18{ 18{
19 char *xattr_name; 19 return hfsplus_getxattr(dentry, name, buffer, size,
20 int res; 20 XATTR_SECURITY_PREFIX,
21 21 XATTR_SECURITY_PREFIX_LEN);
22 if (!strcmp(name, ""))
23 return -EINVAL;
24
25 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
26 GFP_KERNEL);
27 if (!xattr_name)
28 return -ENOMEM;
29 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
30 strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
31
32 res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
33 kfree(xattr_name);
34 return res;
35} 22}
36 23
37static int hfsplus_security_setxattr(struct dentry *dentry, const char *name, 24static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
38 const void *buffer, size_t size, int flags, int type) 25 const void *buffer, size_t size, int flags, int type)
39{ 26{
40 char *xattr_name; 27 return hfsplus_setxattr(dentry, name, buffer, size, flags,
41 int res; 28 XATTR_SECURITY_PREFIX,
42 29 XATTR_SECURITY_PREFIX_LEN);
43 if (!strcmp(name, ""))
44 return -EINVAL;
45
46 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
47 GFP_KERNEL);
48 if (!xattr_name)
49 return -ENOMEM;
50 strcpy(xattr_name, XATTR_SECURITY_PREFIX);
51 strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
52
53 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
54 kfree(xattr_name);
55 return res;
56} 30}
57 31
58static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list, 32static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
index 3c5f27e4746a..bcf65089b7f7 100644
--- a/fs/hfsplus/xattr_trusted.c
+++ b/fs/hfsplus/xattr_trusted.c
@@ -14,43 +14,16 @@
14static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name, 14static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
15 void *buffer, size_t size, int type) 15 void *buffer, size_t size, int type)
16{ 16{
17 char *xattr_name; 17 return hfsplus_getxattr(dentry, name, buffer, size,
18 int res; 18 XATTR_TRUSTED_PREFIX,
19 19 XATTR_TRUSTED_PREFIX_LEN);
20 if (!strcmp(name, ""))
21 return -EINVAL;
22
23 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
24 GFP_KERNEL);
25 if (!xattr_name)
26 return -ENOMEM;
27 strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
28 strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
29
30 res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
31 kfree(xattr_name);
32 return res;
33} 20}
34 21
35static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name, 22static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
36 const void *buffer, size_t size, int flags, int type) 23 const void *buffer, size_t size, int flags, int type)
37{ 24{
38 char *xattr_name; 25 return hfsplus_setxattr(dentry, name, buffer, size, flags,
39 int res; 26 XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
40
41 if (!strcmp(name, ""))
42 return -EINVAL;
43
44 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
45 GFP_KERNEL);
46 if (!xattr_name)
47 return -ENOMEM;
48 strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
49 strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
50
51 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
52 kfree(xattr_name);
53 return res;
54} 27}
55 28
56static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list, 29static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
index 2b625a538b64..5aa0e6dc4a1e 100644
--- a/fs/hfsplus/xattr_user.c
+++ b/fs/hfsplus/xattr_user.c
@@ -14,43 +14,16 @@
14static int hfsplus_user_getxattr(struct dentry *dentry, const char *name, 14static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
15 void *buffer, size_t size, int type) 15 void *buffer, size_t size, int type)
16{ 16{
17 char *xattr_name;
18 int res;
19 17
20 if (!strcmp(name, "")) 18 return hfsplus_getxattr(dentry, name, buffer, size,
21 return -EINVAL; 19 XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
22
23 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
24 GFP_KERNEL);
25 if (!xattr_name)
26 return -ENOMEM;
27 strcpy(xattr_name, XATTR_USER_PREFIX);
28 strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
29
30 res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
31 kfree(xattr_name);
32 return res;
33} 20}
34 21
35static int hfsplus_user_setxattr(struct dentry *dentry, const char *name, 22static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
36 const void *buffer, size_t size, int flags, int type) 23 const void *buffer, size_t size, int flags, int type)
37{ 24{
38 char *xattr_name; 25 return hfsplus_setxattr(dentry, name, buffer, size, flags,
39 int res; 26 XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
40
41 if (!strcmp(name, ""))
42 return -EINVAL;
43
44 xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
45 GFP_KERNEL);
46 if (!xattr_name)
47 return -ENOMEM;
48 strcpy(xattr_name, XATTR_USER_PREFIX);
49 strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
50
51 res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
52 kfree(xattr_name);
53 return res;
54} 27}
55 28
56static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list, 29static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 4fcd40d6f308..91e19f9dffe5 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -66,7 +66,8 @@ extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
66extern int access_file(char *path, int r, int w, int x); 66extern int access_file(char *path, int r, int w, int x);
67extern int open_file(char *path, int r, int w, int append); 67extern int open_file(char *path, int r, int w, int append);
68extern void *open_dir(char *path, int *err_out); 68extern void *open_dir(char *path, int *err_out);
69extern char *read_dir(void *stream, unsigned long long *pos, 69extern void seek_dir(void *stream, unsigned long long pos);
70extern char *read_dir(void *stream, unsigned long long *pos_out,
70 unsigned long long *ino_out, int *len_out, 71 unsigned long long *ino_out, int *len_out,
71 unsigned int *type_out); 72 unsigned int *type_out);
72extern void close_file(void *stream); 73extern void close_file(void *stream);
@@ -77,8 +78,7 @@ extern int write_file(int fd, unsigned long long *offset, const char *buf,
77 int len); 78 int len);
78extern int lseek_file(int fd, long long offset, int whence); 79extern int lseek_file(int fd, long long offset, int whence);
79extern int fsync_file(int fd, int datasync); 80extern int fsync_file(int fd, int datasync);
80extern int file_create(char *name, int ur, int uw, int ux, int gr, 81extern int file_create(char *name, int mode);
81 int gw, int gx, int or, int ow, int ox);
82extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd); 82extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd);
83extern int make_symlink(const char *from, const char *to); 83extern int make_symlink(const char *from, const char *to);
84extern int unlink_file(const char *file); 84extern int unlink_file(const char *file);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fd62cae0fdcb..b83a0343378b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -24,6 +24,7 @@ struct hostfs_inode_info {
24 int fd; 24 int fd;
25 fmode_t mode; 25 fmode_t mode;
26 struct inode vfs_inode; 26 struct inode vfs_inode;
27 struct mutex open_mutex;
27}; 28};
28 29
29static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) 30static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
@@ -92,16 +93,22 @@ static char *__dentry_name(struct dentry *dentry, char *name)
92 __putname(name); 93 __putname(name);
93 return NULL; 94 return NULL;
94 } 95 }
96
97 /*
98 * This function relies on the fact that dentry_path_raw() will place
99 * the path name at the end of the provided buffer.
100 */
101 BUG_ON(p + strlen(p) + 1 != name + PATH_MAX);
102
95 strlcpy(name, root, PATH_MAX); 103 strlcpy(name, root, PATH_MAX);
96 if (len > p - name) { 104 if (len > p - name) {
97 __putname(name); 105 __putname(name);
98 return NULL; 106 return NULL;
99 } 107 }
100 if (p > name + len) { 108
101 char *s = name + len; 109 if (p > name + len)
102 while ((*s++ = *p++) != '\0') 110 strcpy(name + len, p);
103 ; 111
104 }
105 return name; 112 return name;
106} 113}
107 114
@@ -135,21 +142,19 @@ static char *follow_link(char *link)
135 int len, n; 142 int len, n;
136 char *name, *resolved, *end; 143 char *name, *resolved, *end;
137 144
138 len = 64; 145 name = __getname();
139 while (1) { 146 if (!name) {
140 n = -ENOMEM; 147 n = -ENOMEM;
141 name = kmalloc(len, GFP_KERNEL); 148 goto out_free;
142 if (name == NULL)
143 goto out;
144
145 n = hostfs_do_readlink(link, name, len);
146 if (n < len)
147 break;
148 len *= 2;
149 kfree(name);
150 } 149 }
150
151 n = hostfs_do_readlink(link, name, PATH_MAX);
151 if (n < 0) 152 if (n < 0)
152 goto out_free; 153 goto out_free;
154 else if (n == PATH_MAX) {
155 n = -E2BIG;
156 goto out_free;
157 }
153 158
154 if (*name == '/') 159 if (*name == '/')
155 return name; 160 return name;
@@ -168,13 +173,12 @@ static char *follow_link(char *link)
168 } 173 }
169 174
170 sprintf(resolved, "%s%s", link, name); 175 sprintf(resolved, "%s%s", link, name);
171 kfree(name); 176 __putname(name);
172 kfree(link); 177 kfree(link);
173 return resolved; 178 return resolved;
174 179
175 out_free: 180 out_free:
176 kfree(name); 181 __putname(name);
177 out:
178 return ERR_PTR(n); 182 return ERR_PTR(n);
179} 183}
180 184
@@ -225,6 +229,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
225 hi->fd = -1; 229 hi->fd = -1;
226 hi->mode = 0; 230 hi->mode = 0;
227 inode_init_once(&hi->vfs_inode); 231 inode_init_once(&hi->vfs_inode);
232 mutex_init(&hi->open_mutex);
228 return &hi->vfs_inode; 233 return &hi->vfs_inode;
229} 234}
230 235
@@ -257,6 +262,9 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
257 if (strlen(root_path) > offset) 262 if (strlen(root_path) > offset)
258 seq_printf(seq, ",%s", root_path + offset); 263 seq_printf(seq, ",%s", root_path + offset);
259 264
265 if (append)
266 seq_puts(seq, ",append");
267
260 return 0; 268 return 0;
261} 269}
262 270
@@ -284,6 +292,7 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx)
284 if (dir == NULL) 292 if (dir == NULL)
285 return -error; 293 return -error;
286 next = ctx->pos; 294 next = ctx->pos;
295 seek_dir(dir, next);
287 while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { 296 while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) {
288 if (!dir_emit(ctx, name, len, ino, type)) 297 if (!dir_emit(ctx, name, len, ino, type))
289 break; 298 break;
@@ -293,13 +302,12 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx)
293 return 0; 302 return 0;
294} 303}
295 304
296static int hostfs_file_open(struct inode *ino, struct file *file) 305static int hostfs_open(struct inode *ino, struct file *file)
297{ 306{
298 static DEFINE_MUTEX(open_mutex);
299 char *name; 307 char *name;
300 fmode_t mode = 0; 308 fmode_t mode;
301 int err; 309 int err;
302 int r = 0, w = 0, fd; 310 int r, w, fd;
303 311
304 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 312 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
305 if ((mode & HOSTFS_I(ino)->mode) == mode) 313 if ((mode & HOSTFS_I(ino)->mode) == mode)
@@ -308,12 +316,12 @@ static int hostfs_file_open(struct inode *ino, struct file *file)
308 mode |= HOSTFS_I(ino)->mode; 316 mode |= HOSTFS_I(ino)->mode;
309 317
310retry: 318retry:
319 r = w = 0;
320
311 if (mode & FMODE_READ) 321 if (mode & FMODE_READ)
312 r = 1; 322 r = 1;
313 if (mode & FMODE_WRITE) 323 if (mode & FMODE_WRITE)
314 w = 1; 324 r = w = 1;
315 if (w)
316 r = 1;
317 325
318 name = dentry_name(file->f_path.dentry); 326 name = dentry_name(file->f_path.dentry);
319 if (name == NULL) 327 if (name == NULL)
@@ -324,15 +332,16 @@ retry:
324 if (fd < 0) 332 if (fd < 0)
325 return fd; 333 return fd;
326 334
327 mutex_lock(&open_mutex); 335 mutex_lock(&HOSTFS_I(ino)->open_mutex);
328 /* somebody else had handled it first? */ 336 /* somebody else had handled it first? */
329 if ((mode & HOSTFS_I(ino)->mode) == mode) { 337 if ((mode & HOSTFS_I(ino)->mode) == mode) {
330 mutex_unlock(&open_mutex); 338 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
339 close_file(&fd);
331 return 0; 340 return 0;
332 } 341 }
333 if ((mode | HOSTFS_I(ino)->mode) != mode) { 342 if ((mode | HOSTFS_I(ino)->mode) != mode) {
334 mode |= HOSTFS_I(ino)->mode; 343 mode |= HOSTFS_I(ino)->mode;
335 mutex_unlock(&open_mutex); 344 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
336 close_file(&fd); 345 close_file(&fd);
337 goto retry; 346 goto retry;
338 } 347 }
@@ -342,12 +351,12 @@ retry:
342 err = replace_file(fd, HOSTFS_I(ino)->fd); 351 err = replace_file(fd, HOSTFS_I(ino)->fd);
343 close_file(&fd); 352 close_file(&fd);
344 if (err < 0) { 353 if (err < 0) {
345 mutex_unlock(&open_mutex); 354 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
346 return err; 355 return err;
347 } 356 }
348 } 357 }
349 HOSTFS_I(ino)->mode = mode; 358 HOSTFS_I(ino)->mode = mode;
350 mutex_unlock(&open_mutex); 359 mutex_unlock(&HOSTFS_I(ino)->open_mutex);
351 360
352 return 0; 361 return 0;
353} 362}
@@ -378,13 +387,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
378 387
379static const struct file_operations hostfs_file_fops = { 388static const struct file_operations hostfs_file_fops = {
380 .llseek = generic_file_llseek, 389 .llseek = generic_file_llseek,
381 .read = new_sync_read,
382 .splice_read = generic_file_splice_read, 390 .splice_read = generic_file_splice_read,
383 .read_iter = generic_file_read_iter, 391 .read_iter = generic_file_read_iter,
384 .write_iter = generic_file_write_iter, 392 .write_iter = generic_file_write_iter,
385 .write = new_sync_write,
386 .mmap = generic_file_mmap, 393 .mmap = generic_file_mmap,
387 .open = hostfs_file_open, 394 .open = hostfs_open,
388 .release = hostfs_file_release, 395 .release = hostfs_file_release,
389 .fsync = hostfs_fsync, 396 .fsync = hostfs_fsync,
390}; 397};
@@ -393,6 +400,8 @@ static const struct file_operations hostfs_dir_fops = {
393 .llseek = generic_file_llseek, 400 .llseek = generic_file_llseek,
394 .iterate = hostfs_readdir, 401 .iterate = hostfs_readdir,
395 .read = generic_read_dir, 402 .read = generic_read_dir,
403 .open = hostfs_open,
404 .fsync = hostfs_fsync,
396}; 405};
397 406
398static int hostfs_writepage(struct page *page, struct writeback_control *wbc) 407static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -400,7 +409,7 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
400 struct address_space *mapping = page->mapping; 409 struct address_space *mapping = page->mapping;
401 struct inode *inode = mapping->host; 410 struct inode *inode = mapping->host;
402 char *buffer; 411 char *buffer;
403 unsigned long long base; 412 loff_t base = page_offset(page);
404 int count = PAGE_CACHE_SIZE; 413 int count = PAGE_CACHE_SIZE;
405 int end_index = inode->i_size >> PAGE_CACHE_SHIFT; 414 int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
406 int err; 415 int err;
@@ -409,7 +418,6 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
409 count = inode->i_size & (PAGE_CACHE_SIZE-1); 418 count = inode->i_size & (PAGE_CACHE_SIZE-1);
410 419
411 buffer = kmap(page); 420 buffer = kmap(page);
412 base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
413 421
414 err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); 422 err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
415 if (err != count) { 423 if (err != count) {
@@ -434,26 +442,29 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
434static int hostfs_readpage(struct file *file, struct page *page) 442static int hostfs_readpage(struct file *file, struct page *page)
435{ 443{
436 char *buffer; 444 char *buffer;
437 long long start; 445 loff_t start = page_offset(page);
438 int err = 0; 446 int bytes_read, ret = 0;
439 447
440 start = (long long) page->index << PAGE_CACHE_SHIFT;
441 buffer = kmap(page); 448 buffer = kmap(page);
442 err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, 449 bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
443 PAGE_CACHE_SIZE); 450 PAGE_CACHE_SIZE);
444 if (err < 0) 451 if (bytes_read < 0) {
452 ClearPageUptodate(page);
453 SetPageError(page);
454 ret = bytes_read;
445 goto out; 455 goto out;
456 }
446 457
447 memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); 458 memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read);
448 459
449 flush_dcache_page(page); 460 ClearPageError(page);
450 SetPageUptodate(page); 461 SetPageUptodate(page);
451 if (PageError(page)) ClearPageError(page); 462
452 err = 0;
453 out: 463 out:
464 flush_dcache_page(page);
454 kunmap(page); 465 kunmap(page);
455 unlock_page(page); 466 unlock_page(page);
456 return err; 467 return ret;
457} 468}
458 469
459static int hostfs_write_begin(struct file *file, struct address_space *mapping, 470static int hostfs_write_begin(struct file *file, struct address_space *mapping,
@@ -530,11 +541,13 @@ static int read_name(struct inode *ino, char *name)
530 init_special_inode(ino, st.mode & S_IFMT, rdev); 541 init_special_inode(ino, st.mode & S_IFMT, rdev);
531 ino->i_op = &hostfs_iops; 542 ino->i_op = &hostfs_iops;
532 break; 543 break;
533 544 case S_IFREG:
534 default:
535 ino->i_op = &hostfs_iops; 545 ino->i_op = &hostfs_iops;
536 ino->i_fop = &hostfs_file_fops; 546 ino->i_fop = &hostfs_file_fops;
537 ino->i_mapping->a_ops = &hostfs_aops; 547 ino->i_mapping->a_ops = &hostfs_aops;
548 break;
549 default:
550 return -EIO;
538 } 551 }
539 552
540 ino->i_ino = st.ino; 553 ino->i_ino = st.ino;
@@ -568,10 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
568 if (name == NULL) 581 if (name == NULL)
569 goto out_put; 582 goto out_put;
570 583
571 fd = file_create(name, 584 fd = file_create(name, mode & S_IFMT);
572 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
573 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
574 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
575 if (fd < 0) 585 if (fd < 0)
576 error = fd; 586 error = fd;
577 else 587 else
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 9765dab95cbd..9c1e0f019880 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -97,21 +97,27 @@ void *open_dir(char *path, int *err_out)
97 return dir; 97 return dir;
98} 98}
99 99
100char *read_dir(void *stream, unsigned long long *pos, 100void seek_dir(void *stream, unsigned long long pos)
101{
102 DIR *dir = stream;
103
104 seekdir(dir, pos);
105}
106
107char *read_dir(void *stream, unsigned long long *pos_out,
101 unsigned long long *ino_out, int *len_out, 108 unsigned long long *ino_out, int *len_out,
102 unsigned int *type_out) 109 unsigned int *type_out)
103{ 110{
104 DIR *dir = stream; 111 DIR *dir = stream;
105 struct dirent *ent; 112 struct dirent *ent;
106 113
107 seekdir(dir, *pos);
108 ent = readdir(dir); 114 ent = readdir(dir);
109 if (ent == NULL) 115 if (ent == NULL)
110 return NULL; 116 return NULL;
111 *len_out = strlen(ent->d_name); 117 *len_out = strlen(ent->d_name);
112 *ino_out = ent->d_ino; 118 *ino_out = ent->d_ino;
113 *type_out = ent->d_type; 119 *type_out = ent->d_type;
114 *pos = telldir(dir); 120 *pos_out = ent->d_off;
115 return ent->d_name; 121 return ent->d_name;
116} 122}
117 123
@@ -175,21 +181,10 @@ void close_dir(void *stream)
175 closedir(stream); 181 closedir(stream);
176} 182}
177 183
178int file_create(char *name, int ur, int uw, int ux, int gr, 184int file_create(char *name, int mode)
179 int gw, int gx, int or, int ow, int ox)
180{ 185{
181 int mode, fd; 186 int fd;
182 187
183 mode = 0;
184 mode |= ur ? S_IRUSR : 0;
185 mode |= uw ? S_IWUSR : 0;
186 mode |= ux ? S_IXUSR : 0;
187 mode |= gr ? S_IRGRP : 0;
188 mode |= gw ? S_IWGRP : 0;
189 mode |= gx ? S_IXGRP : 0;
190 mode |= or ? S_IROTH : 0;
191 mode |= ow ? S_IWOTH : 0;
192 mode |= ox ? S_IXOTH : 0;
193 fd = open64(name, O_CREAT | O_RDWR, mode); 188 fd = open64(name, O_CREAT | O_RDWR, mode);
194 if (fd < 0) 189 if (fd < 0)
195 return -errno; 190 return -errno;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 7f54e5f76cec..6d8cfe9b52d6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -197,9 +197,7 @@ const struct address_space_operations hpfs_aops = {
197const struct file_operations hpfs_file_ops = 197const struct file_operations hpfs_file_ops =
198{ 198{
199 .llseek = generic_file_llseek, 199 .llseek = generic_file_llseek,
200 .read = new_sync_read,
201 .read_iter = generic_file_read_iter, 200 .read_iter = generic_file_read_iter,
202 .write = new_sync_write,
203 .write_iter = generic_file_write_iter, 201 .write_iter = generic_file_write_iter,
204 .mmap = generic_file_mmap, 202 .mmap = generic_file_mmap,
205 .release = hpfs_file_release, 203 .release = hpfs_file_release,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca8e8dc..2640d88b0e63 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,6 +34,7 @@
34#include <linux/security.h> 34#include <linux/security.h>
35#include <linux/magic.h> 35#include <linux/magic.h>
36#include <linux/migrate.h> 36#include <linux/migrate.h>
37#include <linux/uio.h>
37 38
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
39 40
@@ -47,9 +48,10 @@ struct hugetlbfs_config {
47 kuid_t uid; 48 kuid_t uid;
48 kgid_t gid; 49 kgid_t gid;
49 umode_t mode; 50 umode_t mode;
50 long nr_blocks; 51 long max_hpages;
51 long nr_inodes; 52 long nr_inodes;
52 struct hstate *hstate; 53 struct hstate *hstate;
54 long min_hpages;
53}; 55};
54 56
55struct hugetlbfs_inode_info { 57struct hugetlbfs_inode_info {
@@ -67,7 +69,7 @@ int sysctl_hugetlb_shm_group;
67enum { 69enum {
68 Opt_size, Opt_nr_inodes, 70 Opt_size, Opt_nr_inodes,
69 Opt_mode, Opt_uid, Opt_gid, 71 Opt_mode, Opt_uid, Opt_gid,
70 Opt_pagesize, 72 Opt_pagesize, Opt_min_size,
71 Opt_err, 73 Opt_err,
72}; 74};
73 75
@@ -78,6 +80,7 @@ static const match_table_t tokens = {
78 {Opt_uid, "uid=%u"}, 80 {Opt_uid, "uid=%u"},
79 {Opt_gid, "gid=%u"}, 81 {Opt_gid, "gid=%u"},
80 {Opt_pagesize, "pagesize=%s"}, 82 {Opt_pagesize, "pagesize=%s"},
83 {Opt_min_size, "min_size=%s"},
81 {Opt_err, NULL}, 84 {Opt_err, NULL},
82}; 85};
83 86
@@ -179,42 +182,33 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
179} 182}
180#endif 183#endif
181 184
182static int 185static size_t
183hugetlbfs_read_actor(struct page *page, unsigned long offset, 186hugetlbfs_read_actor(struct page *page, unsigned long offset,
184 char __user *buf, unsigned long count, 187 struct iov_iter *to, unsigned long size)
185 unsigned long size)
186{ 188{
187 char *kaddr; 189 size_t copied = 0;
188 unsigned long left, copied = 0;
189 int i, chunksize; 190 int i, chunksize;
190 191
191 if (size > count)
192 size = count;
193
194 /* Find which 4k chunk and offset with in that chunk */ 192 /* Find which 4k chunk and offset with in that chunk */
195 i = offset >> PAGE_CACHE_SHIFT; 193 i = offset >> PAGE_CACHE_SHIFT;
196 offset = offset & ~PAGE_CACHE_MASK; 194 offset = offset & ~PAGE_CACHE_MASK;
197 195
198 while (size) { 196 while (size) {
197 size_t n;
199 chunksize = PAGE_CACHE_SIZE; 198 chunksize = PAGE_CACHE_SIZE;
200 if (offset) 199 if (offset)
201 chunksize -= offset; 200 chunksize -= offset;
202 if (chunksize > size) 201 if (chunksize > size)
203 chunksize = size; 202 chunksize = size;
204 kaddr = kmap(&page[i]); 203 n = copy_page_to_iter(&page[i], offset, chunksize, to);
205 left = __copy_to_user(buf, kaddr + offset, chunksize); 204 copied += n;
206 kunmap(&page[i]); 205 if (n != chunksize)
207 if (left) { 206 return copied;
208 copied += (chunksize - left);
209 break;
210 }
211 offset = 0; 207 offset = 0;
212 size -= chunksize; 208 size -= chunksize;
213 buf += chunksize;
214 copied += chunksize;
215 i++; 209 i++;
216 } 210 }
217 return copied ? copied : -EFAULT; 211 return copied;
218} 212}
219 213
220/* 214/*
@@ -222,39 +216,34 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
222 * data. Its *very* similar to do_generic_mapping_read(), we can't use that 216 * data. Its *very* similar to do_generic_mapping_read(), we can't use that
223 * since it has PAGE_CACHE_SIZE assumptions. 217 * since it has PAGE_CACHE_SIZE assumptions.
224 */ 218 */
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 219static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
226 size_t len, loff_t *ppos)
227{ 220{
228 struct hstate *h = hstate_file(filp); 221 struct file *file = iocb->ki_filp;
229 struct address_space *mapping = filp->f_mapping; 222 struct hstate *h = hstate_file(file);
223 struct address_space *mapping = file->f_mapping;
230 struct inode *inode = mapping->host; 224 struct inode *inode = mapping->host;
231 unsigned long index = *ppos >> huge_page_shift(h); 225 unsigned long index = iocb->ki_pos >> huge_page_shift(h);
232 unsigned long offset = *ppos & ~huge_page_mask(h); 226 unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
233 unsigned long end_index; 227 unsigned long end_index;
234 loff_t isize; 228 loff_t isize;
235 ssize_t retval = 0; 229 ssize_t retval = 0;
236 230
237 /* validate length */ 231 while (iov_iter_count(to)) {
238 if (len == 0)
239 goto out;
240
241 for (;;) {
242 struct page *page; 232 struct page *page;
243 unsigned long nr, ret; 233 size_t nr, copied;
244 int ra;
245 234
246 /* nr is the maximum number of bytes to copy from this page */ 235 /* nr is the maximum number of bytes to copy from this page */
247 nr = huge_page_size(h); 236 nr = huge_page_size(h);
248 isize = i_size_read(inode); 237 isize = i_size_read(inode);
249 if (!isize) 238 if (!isize)
250 goto out; 239 break;
251 end_index = (isize - 1) >> huge_page_shift(h); 240 end_index = (isize - 1) >> huge_page_shift(h);
252 if (index >= end_index) { 241 if (index > end_index)
253 if (index > end_index) 242 break;
254 goto out; 243 if (index == end_index) {
255 nr = ((isize - 1) & ~huge_page_mask(h)) + 1; 244 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
256 if (nr <= offset) 245 if (nr <= offset)
257 goto out; 246 break;
258 } 247 }
259 nr = nr - offset; 248 nr = nr - offset;
260 249
@@ -265,39 +254,27 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
265 * We have a HOLE, zero out the user-buffer for the 254 * We have a HOLE, zero out the user-buffer for the
266 * length of the hole or request. 255 * length of the hole or request.
267 */ 256 */
268 ret = len < nr ? len : nr; 257 copied = iov_iter_zero(nr, to);
269 if (clear_user(buf, ret))
270 ra = -EFAULT;
271 else
272 ra = 0;
273 } else { 258 } else {
274 unlock_page(page); 259 unlock_page(page);
275 260
276 /* 261 /*
277 * We have the page, copy it to user space buffer. 262 * We have the page, copy it to user space buffer.
278 */ 263 */
279 ra = hugetlbfs_read_actor(page, offset, buf, len, nr); 264 copied = hugetlbfs_read_actor(page, offset, to, nr);
280 ret = ra;
281 page_cache_release(page); 265 page_cache_release(page);
282 } 266 }
283 if (ra < 0) { 267 offset += copied;
284 if (retval == 0) 268 retval += copied;
285 retval = ra; 269 if (copied != nr && iov_iter_count(to)) {
286 goto out; 270 if (!retval)
271 retval = -EFAULT;
272 break;
287 } 273 }
288
289 offset += ret;
290 retval += ret;
291 len -= ret;
292 index += offset >> huge_page_shift(h); 274 index += offset >> huge_page_shift(h);
293 offset &= ~huge_page_mask(h); 275 offset &= ~huge_page_mask(h);
294
295 /* short read or no more work */
296 if ((ret != nr) || (len == 0))
297 break;
298 } 276 }
299out: 277 iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
300 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
301 return retval; 278 return retval;
302} 279}
303 280
@@ -319,7 +296,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
319 296
320static void truncate_huge_page(struct page *page) 297static void truncate_huge_page(struct page *page)
321{ 298{
322 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 299 ClearPageDirty(page);
323 ClearPageUptodate(page); 300 ClearPageUptodate(page);
324 delete_from_page_cache(page); 301 delete_from_page_cache(page);
325} 302}
@@ -721,7 +698,7 @@ static void init_once(void *foo)
721} 698}
722 699
723const struct file_operations hugetlbfs_file_operations = { 700const struct file_operations hugetlbfs_file_operations = {
724 .read = hugetlbfs_read, 701 .read_iter = hugetlbfs_read_iter,
725 .mmap = hugetlbfs_file_mmap, 702 .mmap = hugetlbfs_file_mmap,
726 .fsync = noop_fsync, 703 .fsync = noop_fsync,
727 .get_unmapped_area = hugetlb_get_unmapped_area, 704 .get_unmapped_area = hugetlb_get_unmapped_area,
@@ -754,14 +731,38 @@ static const struct super_operations hugetlbfs_ops = {
754 .show_options = generic_show_options, 731 .show_options = generic_show_options,
755}; 732};
756 733
734enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
735
736/*
737 * Convert size option passed from command line to number of huge pages
738 * in the pool specified by hstate. Size option could be in bytes
739 * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
740 */
741static long long
742hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
743 int val_type)
744{
745 if (val_type == NO_SIZE)
746 return -1;
747
748 if (val_type == SIZE_PERCENT) {
749 size_opt <<= huge_page_shift(h);
750 size_opt *= h->max_huge_pages;
751 do_div(size_opt, 100);
752 }
753
754 size_opt >>= huge_page_shift(h);
755 return size_opt;
756}
757
757static int 758static int
758hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 759hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
759{ 760{
760 char *p, *rest; 761 char *p, *rest;
761 substring_t args[MAX_OPT_ARGS]; 762 substring_t args[MAX_OPT_ARGS];
762 int option; 763 int option;
763 unsigned long long size = 0; 764 unsigned long long max_size_opt = 0, min_size_opt = 0;
764 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; 765 int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
765 766
766 if (!options) 767 if (!options)
767 return 0; 768 return 0;
@@ -799,10 +800,10 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
799 /* memparse() will accept a K/M/G without a digit */ 800 /* memparse() will accept a K/M/G without a digit */
800 if (!isdigit(*args[0].from)) 801 if (!isdigit(*args[0].from))
801 goto bad_val; 802 goto bad_val;
802 size = memparse(args[0].from, &rest); 803 max_size_opt = memparse(args[0].from, &rest);
803 setsize = SIZE_STD; 804 max_val_type = SIZE_STD;
804 if (*rest == '%') 805 if (*rest == '%')
805 setsize = SIZE_PERCENT; 806 max_val_type = SIZE_PERCENT;
806 break; 807 break;
807 } 808 }
808 809
@@ -825,6 +826,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
825 break; 826 break;
826 } 827 }
827 828
829 case Opt_min_size: {
830 /* memparse() will accept a K/M/G without a digit */
831 if (!isdigit(*args[0].from))
832 goto bad_val;
833 min_size_opt = memparse(args[0].from, &rest);
834 min_val_type = SIZE_STD;
835 if (*rest == '%')
836 min_val_type = SIZE_PERCENT;
837 break;
838 }
839
828 default: 840 default:
829 pr_err("Bad mount option: \"%s\"\n", p); 841 pr_err("Bad mount option: \"%s\"\n", p);
830 return -EINVAL; 842 return -EINVAL;
@@ -832,15 +844,22 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
832 } 844 }
833 } 845 }
834 846
835 /* Do size after hstate is set up */ 847 /*
836 if (setsize > NO_SIZE) { 848 * Use huge page pool size (in hstate) to convert the size
837 struct hstate *h = pconfig->hstate; 849 * options to number of huge pages. If NO_SIZE, -1 is returned.
838 if (setsize == SIZE_PERCENT) { 850 */
839 size <<= huge_page_shift(h); 851 pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
840 size *= h->max_huge_pages; 852 max_size_opt, max_val_type);
841 do_div(size, 100); 853 pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
842 } 854 min_size_opt, min_val_type);
843 pconfig->nr_blocks = (size >> huge_page_shift(h)); 855
856 /*
857 * If max_size was specified, then min_size must be smaller
858 */
859 if (max_val_type > NO_SIZE &&
860 pconfig->min_hpages > pconfig->max_hpages) {
861 pr_err("minimum size can not be greater than maximum size\n");
862 return -EINVAL;
844 } 863 }
845 864
846 return 0; 865 return 0;
@@ -859,12 +878,13 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
859 878
860 save_mount_options(sb, data); 879 save_mount_options(sb, data);
861 880
862 config.nr_blocks = -1; /* No limit on size by default */ 881 config.max_hpages = -1; /* No limit on size by default */
863 config.nr_inodes = -1; /* No limit on number of inodes by default */ 882 config.nr_inodes = -1; /* No limit on number of inodes by default */
864 config.uid = current_fsuid(); 883 config.uid = current_fsuid();
865 config.gid = current_fsgid(); 884 config.gid = current_fsgid();
866 config.mode = 0755; 885 config.mode = 0755;
867 config.hstate = &default_hstate; 886 config.hstate = &default_hstate;
887 config.min_hpages = -1; /* No default minimum size */
868 ret = hugetlbfs_parse_options(data, &config); 888 ret = hugetlbfs_parse_options(data, &config);
869 if (ret) 889 if (ret)
870 return ret; 890 return ret;
@@ -878,8 +898,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
878 sbinfo->max_inodes = config.nr_inodes; 898 sbinfo->max_inodes = config.nr_inodes;
879 sbinfo->free_inodes = config.nr_inodes; 899 sbinfo->free_inodes = config.nr_inodes;
880 sbinfo->spool = NULL; 900 sbinfo->spool = NULL;
881 if (config.nr_blocks != -1) { 901 /*
882 sbinfo->spool = hugepage_new_subpool(config.nr_blocks); 902 * Allocate and initialize subpool if maximum or minimum size is
903 * specified. Any needed reservations (for minimim size) are taken
904 * taken when the subpool is created.
905 */
906 if (config.max_hpages != -1 || config.min_hpages != -1) {
907 sbinfo->spool = hugepage_new_subpool(config.hstate,
908 config.max_hpages,
909 config.min_hpages);
883 if (!sbinfo->spool) 910 if (!sbinfo->spool)
884 goto out_free; 911 goto out_free;
885 } 912 }
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 64989ca9ba90..f509f62e12f6 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -51,9 +51,7 @@ const struct file_operations jffs2_file_operations =
51{ 51{
52 .llseek = generic_file_llseek, 52 .llseek = generic_file_llseek,
53 .open = generic_file_open, 53 .open = generic_file_open,
54 .read = new_sync_read,
55 .read_iter = generic_file_read_iter, 54 .read_iter = generic_file_read_iter,
56 .write = new_sync_write,
57 .write_iter = generic_file_write_iter, 55 .write_iter = generic_file_write_iter,
58 .unlocked_ioctl=jffs2_ioctl, 56 .unlocked_ioctl=jffs2_ioctl,
59 .mmap = generic_file_readonly_mmap, 57 .mmap = generic_file_readonly_mmap,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817ac51f6..762c7a3cf43d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
195 /* unchecked xdatum is chained with c->xattr_unchecked */ 195 /* unchecked xdatum is chained with c->xattr_unchecked */
196 list_del_init(&xd->xindex); 196 list_del_init(&xd->xindex);
197 197
198 dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n", 198 dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
199 xd->xid, xd->version); 199 xd->xid, xd->version);
200 200
201 return 0; 201 return 0;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 10815f8dfd8b..ae46788b9723 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -151,8 +151,6 @@ const struct inode_operations jfs_file_inode_operations = {
151const struct file_operations jfs_file_operations = { 151const struct file_operations jfs_file_operations = {
152 .open = jfs_open, 152 .open = jfs_open,
153 .llseek = generic_file_llseek, 153 .llseek = generic_file_llseek,
154 .write = new_sync_write,
155 .read = new_sync_read,
156 .read_iter = generic_file_read_iter, 154 .read_iter = generic_file_read_iter,
157 .write_iter = generic_file_write_iter, 155 .write_iter = generic_file_write_iter,
158 .mmap = generic_file_mmap, 156 .mmap = generic_file_mmap,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1ca3c9b..070dc4b33544 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
22#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/quotaops.h> 24#include <linux/quotaops.h>
25#include <linux/uio.h>
25#include <linux/writeback.h> 26#include <linux/writeback.h>
26#include <linux/aio.h>
27#include "jfs_incore.h" 27#include "jfs_incore.h"
28#include "jfs_inode.h" 28#include "jfs_inode.h"
29#include "jfs_filsys.h" 29#include "jfs_filsys.h"
@@ -330,8 +330,8 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
330 return generic_block_bmap(mapping, block, jfs_get_block); 330 return generic_block_bmap(mapping, block, jfs_get_block);
331} 331}
332 332
333static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, 333static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
334 struct iov_iter *iter, loff_t offset) 334 loff_t offset)
335{ 335{
336 struct file *file = iocb->ki_filp; 336 struct file *file = iocb->ki_filp;
337 struct address_space *mapping = file->f_mapping; 337 struct address_space *mapping = file->f_mapping;
@@ -339,13 +339,13 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
339 size_t count = iov_iter_count(iter); 339 size_t count = iov_iter_count(iter);
340 ssize_t ret; 340 ssize_t ret;
341 341
342 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); 342 ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block);
343 343
344 /* 344 /*
345 * In case of error extending write may have instantiated a few 345 * In case of error extending write may have instantiated a few
346 * blocks outside i_size. Trim these off again. 346 * blocks outside i_size. Trim these off again.
347 */ 347 */
348 if (unlikely((rw & WRITE) && ret < 0)) { 348 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
349 loff_t isize = i_size_read(inode); 349 loff_t isize = i_size_read(inode);
350 loff_t end = offset + count; 350 loff_t end = offset + count;
351 351
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 49ba7ff1bbb9..16a0922beb59 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
183 183
184#endif 184#endif
185 185
186static void init_once(void *foo)
187{
188 struct metapage *mp = (struct metapage *)foo;
189
190 mp->lid = 0;
191 mp->lsn = 0;
192 mp->flag = 0;
193 mp->data = NULL;
194 mp->clsn = 0;
195 mp->log = NULL;
196 set_bit(META_free, &mp->flag);
197 init_waitqueue_head(&mp->wait);
198}
199
200static inline struct metapage *alloc_metapage(gfp_t gfp_mask) 186static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
201{ 187{
202 return mempool_alloc(metapage_mempool, gfp_mask); 188 struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
189
190 if (mp) {
191 mp->lid = 0;
192 mp->lsn = 0;
193 mp->data = NULL;
194 mp->clsn = 0;
195 mp->log = NULL;
196 init_waitqueue_head(&mp->wait);
197 }
198 return mp;
203} 199}
204 200
205static inline void free_metapage(struct metapage *mp) 201static inline void free_metapage(struct metapage *mp)
206{ 202{
207 mp->flag = 0;
208 set_bit(META_free, &mp->flag);
209
210 mempool_free(mp, metapage_mempool); 203 mempool_free(mp, metapage_mempool);
211} 204}
212 205
@@ -216,7 +209,7 @@ int __init metapage_init(void)
216 * Allocate the metapage structures 209 * Allocate the metapage structures
217 */ 210 */
218 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), 211 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
219 0, 0, init_once); 212 0, 0, NULL);
220 if (metapage_cache == NULL) 213 if (metapage_cache == NULL)
221 return -ENOMEM; 214 return -ENOMEM;
222 215
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a78beda85f68..337e9e51ac06 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -48,7 +48,6 @@ struct metapage {
48 48
49/* metapage flag */ 49/* metapage flag */
50#define META_locked 0 50#define META_locked 0
51#define META_free 1
52#define META_dirty 2 51#define META_dirty 2
53#define META_sync 3 52#define META_sync 3
54#define META_discard 4 53#define META_discard 4
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
102 vaf.fmt = fmt; 102 vaf.fmt = fmt;
103 vaf.va = &args; 103 vaf.va = &args;
104 104
105 pr_err("ERROR: (device %s): %pf: %pV\n", 105 pr_err("ERROR: (device %s): %ps: %pV\n",
106 sb->s_id, __builtin_return_address(0), &vaf); 106 sb->s_id, __builtin_return_address(0), &vaf);
107 107
108 va_end(args); 108 va_end(args);
diff --git a/fs/locks.c b/fs/locks.c
index 40bc384728c0..653faabb07f4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -203,11 +203,11 @@ static struct kmem_cache *flctx_cache __read_mostly;
203static struct kmem_cache *filelock_cache __read_mostly; 203static struct kmem_cache *filelock_cache __read_mostly;
204 204
205static struct file_lock_context * 205static struct file_lock_context *
206locks_get_lock_context(struct inode *inode) 206locks_get_lock_context(struct inode *inode, int type)
207{ 207{
208 struct file_lock_context *new; 208 struct file_lock_context *new;
209 209
210 if (likely(inode->i_flctx)) 210 if (likely(inode->i_flctx) || type == F_UNLCK)
211 goto out; 211 goto out;
212 212
213 new = kmem_cache_alloc(flctx_cache, GFP_KERNEL); 213 new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
@@ -223,14 +223,7 @@ locks_get_lock_context(struct inode *inode)
223 * Assign the pointer if it's not already assigned. If it is, then 223 * Assign the pointer if it's not already assigned. If it is, then
224 * free the context we just allocated. 224 * free the context we just allocated.
225 */ 225 */
226 spin_lock(&inode->i_lock); 226 if (cmpxchg(&inode->i_flctx, NULL, new))
227 if (likely(!inode->i_flctx)) {
228 inode->i_flctx = new;
229 new = NULL;
230 }
231 spin_unlock(&inode->i_lock);
232
233 if (new)
234 kmem_cache_free(flctx_cache, new); 227 kmem_cache_free(flctx_cache, new);
235out: 228out:
236 return inode->i_flctx; 229 return inode->i_flctx;
@@ -276,8 +269,10 @@ void locks_release_private(struct file_lock *fl)
276 } 269 }
277 270
278 if (fl->fl_lmops) { 271 if (fl->fl_lmops) {
279 if (fl->fl_lmops->lm_put_owner) 272 if (fl->fl_lmops->lm_put_owner) {
280 fl->fl_lmops->lm_put_owner(fl); 273 fl->fl_lmops->lm_put_owner(fl->fl_owner);
274 fl->fl_owner = NULL;
275 }
281 fl->fl_lmops = NULL; 276 fl->fl_lmops = NULL;
282 } 277 }
283} 278}
@@ -333,7 +328,7 @@ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
333 328
334 if (fl->fl_lmops) { 329 if (fl->fl_lmops) {
335 if (fl->fl_lmops->lm_get_owner) 330 if (fl->fl_lmops->lm_get_owner)
336 fl->fl_lmops->lm_get_owner(new, fl); 331 fl->fl_lmops->lm_get_owner(fl->fl_owner);
337 } 332 }
338} 333}
339EXPORT_SYMBOL(locks_copy_conflock); 334EXPORT_SYMBOL(locks_copy_conflock);
@@ -592,11 +587,15 @@ posix_owner_key(struct file_lock *fl)
592 587
593static void locks_insert_global_blocked(struct file_lock *waiter) 588static void locks_insert_global_blocked(struct file_lock *waiter)
594{ 589{
590 lockdep_assert_held(&blocked_lock_lock);
591
595 hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); 592 hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
596} 593}
597 594
598static void locks_delete_global_blocked(struct file_lock *waiter) 595static void locks_delete_global_blocked(struct file_lock *waiter)
599{ 596{
597 lockdep_assert_held(&blocked_lock_lock);
598
600 hash_del(&waiter->fl_link); 599 hash_del(&waiter->fl_link);
601} 600}
602 601
@@ -730,7 +729,7 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
730 /* POSIX locks owned by the same process do not conflict with 729 /* POSIX locks owned by the same process do not conflict with
731 * each other. 730 * each other.
732 */ 731 */
733 if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl)) 732 if (posix_same_owner(caller_fl, sys_fl))
734 return (0); 733 return (0);
735 734
736 /* Check whether they overlap */ 735 /* Check whether they overlap */
@@ -748,7 +747,7 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
748 /* FLOCK locks referring to the same filp do not conflict with 747 /* FLOCK locks referring to the same filp do not conflict with
749 * each other. 748 * each other.
750 */ 749 */
751 if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file)) 750 if (caller_fl->fl_file == sys_fl->fl_file)
752 return (0); 751 return (0);
753 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) 752 if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
754 return 0; 753 return 0;
@@ -838,6 +837,8 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
838{ 837{
839 int i = 0; 838 int i = 0;
840 839
840 lockdep_assert_held(&blocked_lock_lock);
841
841 /* 842 /*
842 * This deadlock detector can't reasonably detect deadlocks with 843 * This deadlock detector can't reasonably detect deadlocks with
843 * FL_OFDLCK locks, since they aren't owned by a process, per-se. 844 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
@@ -871,9 +872,12 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
871 bool found = false; 872 bool found = false;
872 LIST_HEAD(dispose); 873 LIST_HEAD(dispose);
873 874
874 ctx = locks_get_lock_context(inode); 875 ctx = locks_get_lock_context(inode, request->fl_type);
875 if (!ctx) 876 if (!ctx) {
876 return -ENOMEM; 877 if (request->fl_type != F_UNLCK)
878 return -ENOMEM;
879 return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
880 }
877 881
878 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 882 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
879 new_fl = locks_alloc_lock(); 883 new_fl = locks_alloc_lock();
@@ -939,9 +943,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
939 bool added = false; 943 bool added = false;
940 LIST_HEAD(dispose); 944 LIST_HEAD(dispose);
941 945
942 ctx = locks_get_lock_context(inode); 946 ctx = locks_get_lock_context(inode, request->fl_type);
943 if (!ctx) 947 if (!ctx)
944 return -ENOMEM; 948 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
945 949
946 /* 950 /*
947 * We may need two file_lock structures for this operation, 951 * We may need two file_lock structures for this operation,
@@ -964,8 +968,6 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
964 */ 968 */
965 if (request->fl_type != F_UNLCK) { 969 if (request->fl_type != F_UNLCK) {
966 list_for_each_entry(fl, &ctx->flc_posix, fl_list) { 970 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
967 if (!IS_POSIX(fl))
968 continue;
969 if (!posix_locks_conflict(request, fl)) 971 if (!posix_locks_conflict(request, fl))
970 continue; 972 continue;
971 if (conflock) 973 if (conflock)
@@ -1605,7 +1607,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
1605 lease = *flp; 1607 lease = *flp;
1606 trace_generic_add_lease(inode, lease); 1608 trace_generic_add_lease(inode, lease);
1607 1609
1608 ctx = locks_get_lock_context(inode); 1610 /* Note that arg is never F_UNLCK here */
1611 ctx = locks_get_lock_context(inode, arg);
1609 if (!ctx) 1612 if (!ctx)
1610 return -ENOMEM; 1613 return -ENOMEM;
1611 1614
@@ -2555,15 +2558,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2555 : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ "); 2558 : (fl->fl_type == F_WRLCK) ? "WRITE" : "READ ");
2556 } 2559 }
2557 if (inode) { 2560 if (inode) {
2558#ifdef WE_CAN_BREAK_LSLK_NOW 2561 /* userspace relies on this representation of dev_t */
2559 seq_printf(f, "%d %s:%ld ", fl_pid,
2560 inode->i_sb->s_id, inode->i_ino);
2561#else
2562 /* userspace relies on this representation of dev_t ;-( */
2563 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, 2562 seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
2564 MAJOR(inode->i_sb->s_dev), 2563 MAJOR(inode->i_sb->s_dev),
2565 MINOR(inode->i_sb->s_dev), inode->i_ino); 2564 MINOR(inode->i_sb->s_dev), inode->i_ino);
2566#endif
2567 } else { 2565 } else {
2568 seq_printf(f, "%d <none>:0 ", fl_pid); 2566 seq_printf(f, "%d <none>:0 ", fl_pid);
2569 } 2567 }
@@ -2592,6 +2590,44 @@ static int locks_show(struct seq_file *f, void *v)
2592 return 0; 2590 return 0;
2593} 2591}
2594 2592
2593static void __show_fd_locks(struct seq_file *f,
2594 struct list_head *head, int *id,
2595 struct file *filp, struct files_struct *files)
2596{
2597 struct file_lock *fl;
2598
2599 list_for_each_entry(fl, head, fl_list) {
2600
2601 if (filp != fl->fl_file)
2602 continue;
2603 if (fl->fl_owner != files &&
2604 fl->fl_owner != filp)
2605 continue;
2606
2607 (*id)++;
2608 seq_puts(f, "lock:\t");
2609 lock_get_status(f, fl, *id, "");
2610 }
2611}
2612
2613void show_fd_locks(struct seq_file *f,
2614 struct file *filp, struct files_struct *files)
2615{
2616 struct inode *inode = file_inode(filp);
2617 struct file_lock_context *ctx;
2618 int id = 0;
2619
2620 ctx = inode->i_flctx;
2621 if (!ctx)
2622 return;
2623
2624 spin_lock(&ctx->flc_lock);
2625 __show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
2626 __show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
2627 __show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
2628 spin_unlock(&ctx->flc_lock);
2629}
2630
2595static void *locks_start(struct seq_file *f, loff_t *pos) 2631static void *locks_start(struct seq_file *f, loff_t *pos)
2596 __acquires(&blocked_lock_lock) 2632 __acquires(&blocked_lock_lock)
2597{ 2633{
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 8538752df2f6..b2c13f739ffa 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -271,8 +271,6 @@ const struct file_operations logfs_reg_fops = {
271 .llseek = generic_file_llseek, 271 .llseek = generic_file_llseek,
272 .mmap = generic_file_readonly_mmap, 272 .mmap = generic_file_readonly_mmap,
273 .open = generic_file_open, 273 .open = generic_file_open,
274 .read = new_sync_read,
275 .write = new_sync_write,
276}; 274};
277 275
278const struct address_space_operations logfs_reg_aops = { 276const struct address_space_operations logfs_reg_aops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index a967de085ac0..6d63e27ec961 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -14,9 +14,7 @@
14 */ 14 */
15const struct file_operations minix_file_operations = { 15const struct file_operations minix_file_operations = {
16 .llseek = generic_file_llseek, 16 .llseek = generic_file_llseek,
17 .read = new_sync_read,
18 .read_iter = generic_file_read_iter, 17 .read_iter = generic_file_read_iter,
19 .write = new_sync_write,
20 .write_iter = generic_file_write_iter, 18 .write_iter = generic_file_write_iter,
21 .mmap = generic_file_mmap, 19 .mmap = generic_file_mmap,
22 .fsync = generic_file_fsync, 20 .fsync = generic_file_fsync,
diff --git a/fs/namei.c b/fs/namei.c
index c83145af4bfc..ffab2e06e147 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
119 * PATH_MAX includes the nul terminator --RR. 119 * PATH_MAX includes the nul terminator --RR.
120 */ 120 */
121 121
122#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) 122#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
123 123
124struct filename * 124struct filename *
125getname_flags(const char __user *filename, int flags, int *empty) 125getname_flags(const char __user *filename, int flags, int *empty)
126{ 126{
127 struct filename *result, *err; 127 struct filename *result;
128 int len;
129 long max;
130 char *kname; 128 char *kname;
129 int len;
131 130
132 result = audit_reusename(filename); 131 result = audit_reusename(filename);
133 if (result) 132 if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
136 result = __getname(); 135 result = __getname();
137 if (unlikely(!result)) 136 if (unlikely(!result))
138 return ERR_PTR(-ENOMEM); 137 return ERR_PTR(-ENOMEM);
139 result->refcnt = 1;
140 138
141 /* 139 /*
142 * First, try to embed the struct filename inside the names_cache 140 * First, try to embed the struct filename inside the names_cache
143 * allocation 141 * allocation
144 */ 142 */
145 kname = (char *)result + sizeof(*result); 143 kname = (char *)result->iname;
146 result->name = kname; 144 result->name = kname;
147 result->separate = false;
148 max = EMBEDDED_NAME_MAX;
149 145
150recopy: 146 len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
151 len = strncpy_from_user(kname, filename, max);
152 if (unlikely(len < 0)) { 147 if (unlikely(len < 0)) {
153 err = ERR_PTR(len); 148 __putname(result);
154 goto error; 149 return ERR_PTR(len);
155 } 150 }
156 151
157 /* 152 /*
@@ -160,43 +155,49 @@ recopy:
160 * names_cache allocation for the pathname, and re-do the copy from 155 * names_cache allocation for the pathname, and re-do the copy from
161 * userland. 156 * userland.
162 */ 157 */
163 if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) { 158 if (unlikely(len == EMBEDDED_NAME_MAX)) {
159 const size_t size = offsetof(struct filename, iname[1]);
164 kname = (char *)result; 160 kname = (char *)result;
165 161
166 result = kzalloc(sizeof(*result), GFP_KERNEL); 162 /*
167 if (!result) { 163 * size is chosen that way we to guarantee that
168 err = ERR_PTR(-ENOMEM); 164 * result->iname[0] is within the same object and that
169 result = (struct filename *)kname; 165 * kname can't be equal to result->iname, no matter what.
170 goto error; 166 */
167 result = kzalloc(size, GFP_KERNEL);
168 if (unlikely(!result)) {
169 __putname(kname);
170 return ERR_PTR(-ENOMEM);
171 } 171 }
172 result->name = kname; 172 result->name = kname;
173 result->separate = true; 173 len = strncpy_from_user(kname, filename, PATH_MAX);
174 result->refcnt = 1; 174 if (unlikely(len < 0)) {
175 max = PATH_MAX; 175 __putname(kname);
176 goto recopy; 176 kfree(result);
177 return ERR_PTR(len);
178 }
179 if (unlikely(len == PATH_MAX)) {
180 __putname(kname);
181 kfree(result);
182 return ERR_PTR(-ENAMETOOLONG);
183 }
177 } 184 }
178 185
186 result->refcnt = 1;
179 /* The empty path is special. */ 187 /* The empty path is special. */
180 if (unlikely(!len)) { 188 if (unlikely(!len)) {
181 if (empty) 189 if (empty)
182 *empty = 1; 190 *empty = 1;
183 err = ERR_PTR(-ENOENT); 191 if (!(flags & LOOKUP_EMPTY)) {
184 if (!(flags & LOOKUP_EMPTY)) 192 putname(result);
185 goto error; 193 return ERR_PTR(-ENOENT);
194 }
186 } 195 }
187 196
188 err = ERR_PTR(-ENAMETOOLONG);
189 if (unlikely(len >= PATH_MAX))
190 goto error;
191
192 result->uptr = filename; 197 result->uptr = filename;
193 result->aname = NULL; 198 result->aname = NULL;
194 audit_getname(result); 199 audit_getname(result);
195 return result; 200 return result;
196
197error:
198 putname(result);
199 return err;
200} 201}
201 202
202struct filename * 203struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
216 return ERR_PTR(-ENOMEM); 217 return ERR_PTR(-ENOMEM);
217 218
218 if (len <= EMBEDDED_NAME_MAX) { 219 if (len <= EMBEDDED_NAME_MAX) {
219 result->name = (char *)(result) + sizeof(*result); 220 result->name = (char *)result->iname;
220 result->separate = false;
221 } else if (len <= PATH_MAX) { 221 } else if (len <= PATH_MAX) {
222 struct filename *tmp; 222 struct filename *tmp;
223 223
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
227 return ERR_PTR(-ENOMEM); 227 return ERR_PTR(-ENOMEM);
228 } 228 }
229 tmp->name = (char *)result; 229 tmp->name = (char *)result;
230 tmp->separate = true;
231 result = tmp; 230 result = tmp;
232 } else { 231 } else {
233 __putname(result); 232 __putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
249 if (--name->refcnt > 0) 248 if (--name->refcnt > 0)
250 return; 249 return;
251 250
252 if (name->separate) { 251 if (name->name != name->iname) {
253 __putname(name->name); 252 __putname(name->name);
254 kfree(name); 253 kfree(name);
255 } else 254 } else
@@ -1586,7 +1585,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1586 inode = path->dentry->d_inode; 1585 inode = path->dentry->d_inode;
1587 } 1586 }
1588 err = -ENOENT; 1587 err = -ENOENT;
1589 if (!inode || d_is_negative(path->dentry)) 1588 if (d_is_negative(path->dentry))
1590 goto out_path_put; 1589 goto out_path_put;
1591 1590
1592 if (should_follow_link(path->dentry, follow)) { 1591 if (should_follow_link(path->dentry, follow)) {
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1851 return err; 1850 return err;
1852} 1851}
1853 1852
1854static int path_init(int dfd, const char *name, unsigned int flags, 1853static int path_init(int dfd, const struct filename *name, unsigned int flags,
1855 struct nameidata *nd) 1854 struct nameidata *nd)
1856{ 1855{
1857 int retval = 0; 1856 int retval = 0;
1857 const char *s = name->name;
1858 1858
1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1859 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; 1860 nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1863 if (flags & LOOKUP_ROOT) { 1863 if (flags & LOOKUP_ROOT) {
1864 struct dentry *root = nd->root.dentry; 1864 struct dentry *root = nd->root.dentry;
1865 struct inode *inode = root->d_inode; 1865 struct inode *inode = root->d_inode;
1866 if (*name) { 1866 if (*s) {
1867 if (!d_can_lookup(root)) 1867 if (!d_can_lookup(root))
1868 return -ENOTDIR; 1868 return -ENOTDIR;
1869 retval = inode_permission(inode, MAY_EXEC); 1869 retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1885 nd->root.mnt = NULL; 1885 nd->root.mnt = NULL;
1886 1886
1887 nd->m_seq = read_seqbegin(&mount_lock); 1887 nd->m_seq = read_seqbegin(&mount_lock);
1888 if (*name=='/') { 1888 if (*s == '/') {
1889 if (flags & LOOKUP_RCU) { 1889 if (flags & LOOKUP_RCU) {
1890 rcu_read_lock(); 1890 rcu_read_lock();
1891 nd->seq = set_root_rcu(nd); 1891 nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1919 1919
1920 dentry = f.file->f_path.dentry; 1920 dentry = f.file->f_path.dentry;
1921 1921
1922 if (*name) { 1922 if (*s) {
1923 if (!d_can_lookup(dentry)) { 1923 if (!d_can_lookup(dentry)) {
1924 fdput(f); 1924 fdput(f);
1925 return -ENOTDIR; 1925 return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1949 return -ECHILD; 1949 return -ECHILD;
1950done: 1950done:
1951 current->total_link_count = 0; 1951 current->total_link_count = 0;
1952 return link_path_walk(name, nd); 1952 return link_path_walk(s, nd);
1953} 1953}
1954 1954
1955static void path_cleanup(struct nameidata *nd) 1955static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
1972} 1972}
1973 1973
1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1974/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1975static int path_lookupat(int dfd, const char *name, 1975static int path_lookupat(int dfd, const struct filename *name,
1976 unsigned int flags, struct nameidata *nd) 1976 unsigned int flags, struct nameidata *nd)
1977{ 1977{
1978 struct path path; 1978 struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
2027static int filename_lookup(int dfd, struct filename *name, 2027static int filename_lookup(int dfd, struct filename *name,
2028 unsigned int flags, struct nameidata *nd) 2028 unsigned int flags, struct nameidata *nd)
2029{ 2029{
2030 int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd); 2030 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
2031 if (unlikely(retval == -ECHILD)) 2031 if (unlikely(retval == -ECHILD))
2032 retval = path_lookupat(dfd, name->name, flags, nd); 2032 retval = path_lookupat(dfd, name, flags, nd);
2033 if (unlikely(retval == -ESTALE)) 2033 if (unlikely(retval == -ESTALE))
2034 retval = path_lookupat(dfd, name->name, 2034 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
2035 flags | LOOKUP_REVAL, nd);
2036 2035
2037 if (likely(!retval)) 2036 if (likely(!retval))
2038 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); 2037 audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
2039 return retval; 2038 return retval;
2040} 2039}
2041 2040
2042static int do_path_lookup(int dfd, const char *name,
2043 unsigned int flags, struct nameidata *nd)
2044{
2045 struct filename *filename = getname_kernel(name);
2046 int retval = PTR_ERR(filename);
2047
2048 if (!IS_ERR(filename)) {
2049 retval = filename_lookup(dfd, filename, flags, nd);
2050 putname(filename);
2051 }
2052 return retval;
2053}
2054
2055/* does lookup, returns the object with parent locked */ 2041/* does lookup, returns the object with parent locked */
2056struct dentry *kern_path_locked(const char *name, struct path *path) 2042struct dentry *kern_path_locked(const char *name, struct path *path)
2057{ 2043{
@@ -2089,9 +2075,15 @@ out:
2089int kern_path(const char *name, unsigned int flags, struct path *path) 2075int kern_path(const char *name, unsigned int flags, struct path *path)
2090{ 2076{
2091 struct nameidata nd; 2077 struct nameidata nd;
2092 int res = do_path_lookup(AT_FDCWD, name, flags, &nd); 2078 struct filename *filename = getname_kernel(name);
2093 if (!res) 2079 int res = PTR_ERR(filename);
2094 *path = nd.path; 2080
2081 if (!IS_ERR(filename)) {
2082 res = filename_lookup(AT_FDCWD, filename, flags, &nd);
2083 putname(filename);
2084 if (!res)
2085 *path = nd.path;
2086 }
2095 return res; 2087 return res;
2096} 2088}
2097EXPORT_SYMBOL(kern_path); 2089EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2108 const char *name, unsigned int flags, 2100 const char *name, unsigned int flags,
2109 struct path *path) 2101 struct path *path)
2110{ 2102{
2111 struct nameidata nd; 2103 struct filename *filename = getname_kernel(name);
2112 int err; 2104 int err = PTR_ERR(filename);
2113 nd.root.dentry = dentry; 2105
2114 nd.root.mnt = mnt;
2115 BUG_ON(flags & LOOKUP_PARENT); 2106 BUG_ON(flags & LOOKUP_PARENT);
2116 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 2107
2117 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd); 2108 /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
2118 if (!err) 2109 if (!IS_ERR(filename)) {
2119 *path = nd.path; 2110 struct nameidata nd;
2111 nd.root.dentry = dentry;
2112 nd.root.mnt = mnt;
2113 err = filename_lookup(AT_FDCWD, filename,
2114 flags | LOOKUP_ROOT, &nd);
2115 if (!err)
2116 *path = nd.path;
2117 putname(filename);
2118 }
2120 return err; 2119 return err;
2121} 2120}
2122EXPORT_SYMBOL(vfs_path_lookup); 2121EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
2138 * @len: maximum length @len should be interpreted to 2137 * @len: maximum length @len should be interpreted to
2139 * 2138 *
2140 * Note that this routine is purely a helper for filesystem usage and should 2139 * Note that this routine is purely a helper for filesystem usage and should
2141 * not be called by generic code. Also note that by using this function the 2140 * not be called by generic code.
2142 * nameidata argument is passed to the filesystem methods and a filesystem
2143 * using this helper needs to be prepared for that.
2144 */ 2141 */
2145struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 2142struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2146{ 2143{
@@ -2313,7 +2310,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2313 mutex_unlock(&dir->d_inode->i_mutex); 2310 mutex_unlock(&dir->d_inode->i_mutex);
2314 2311
2315done: 2312done:
2316 if (!dentry->d_inode || d_is_negative(dentry)) { 2313 if (d_is_negative(dentry)) {
2317 error = -ENOENT; 2314 error = -ENOENT;
2318 dput(dentry); 2315 dput(dentry);
2319 goto out; 2316 goto out;
@@ -2341,7 +2338,8 @@ out:
2341 * Returns 0 and "path" will be valid on success; Returns error otherwise. 2338 * Returns 0 and "path" will be valid on success; Returns error otherwise.
2342 */ 2339 */
2343static int 2340static int
2344path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) 2341path_mountpoint(int dfd, const struct filename *name, struct path *path,
2342 unsigned int flags)
2345{ 2343{
2346 struct nameidata nd; 2344 struct nameidata nd;
2347 int err; 2345 int err;
@@ -2370,20 +2368,20 @@ out:
2370} 2368}
2371 2369
2372static int 2370static int
2373filename_mountpoint(int dfd, struct filename *s, struct path *path, 2371filename_mountpoint(int dfd, struct filename *name, struct path *path,
2374 unsigned int flags) 2372 unsigned int flags)
2375{ 2373{
2376 int error; 2374 int error;
2377 if (IS_ERR(s)) 2375 if (IS_ERR(name))
2378 return PTR_ERR(s); 2376 return PTR_ERR(name);
2379 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); 2377 error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
2380 if (unlikely(error == -ECHILD)) 2378 if (unlikely(error == -ECHILD))
2381 error = path_mountpoint(dfd, s->name, path, flags); 2379 error = path_mountpoint(dfd, name, path, flags);
2382 if (unlikely(error == -ESTALE)) 2380 if (unlikely(error == -ESTALE))
2383 error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); 2381 error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
2384 if (likely(!error)) 2382 if (likely(!error))
2385 audit_inode(s, path->dentry, 0); 2383 audit_inode(name, path->dentry, 0);
2386 putname(s); 2384 putname(name);
2387 return error; 2385 return error;
2388} 2386}
2389 2387
@@ -3040,7 +3038,7 @@ retry_lookup:
3040finish_lookup: 3038finish_lookup:
3041 /* we _can_ be in RCU mode here */ 3039 /* we _can_ be in RCU mode here */
3042 error = -ENOENT; 3040 error = -ENOENT;
3043 if (!inode || d_is_negative(path->dentry)) { 3041 if (d_is_negative(path->dentry)) {
3044 path_to_nameidata(path, nd); 3042 path_to_nameidata(path, nd);
3045 goto out; 3043 goto out;
3046 } 3044 }
@@ -3079,7 +3077,7 @@ finish_open:
3079 error = -ENOTDIR; 3077 error = -ENOTDIR;
3080 if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) 3078 if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
3081 goto out; 3079 goto out;
3082 if (!S_ISREG(nd->inode->i_mode)) 3080 if (!d_is_reg(nd->path.dentry))
3083 will_truncate = false; 3081 will_truncate = false;
3084 3082
3085 if (will_truncate) { 3083 if (will_truncate) {
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
3156 static const struct qstr name = QSTR_INIT("/", 1); 3154 static const struct qstr name = QSTR_INIT("/", 1);
3157 struct dentry *dentry, *child; 3155 struct dentry *dentry, *child;
3158 struct inode *dir; 3156 struct inode *dir;
3159 int error = path_lookupat(dfd, pathname->name, 3157 int error = path_lookupat(dfd, pathname,
3160 flags | LOOKUP_DIRECTORY, nd); 3158 flags | LOOKUP_DIRECTORY, nd);
3161 if (unlikely(error)) 3159 if (unlikely(error))
3162 return error; 3160 return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
3229 goto out; 3227 goto out;
3230 } 3228 }
3231 3229
3232 error = path_init(dfd, pathname->name, flags, nd); 3230 error = path_init(dfd, pathname, flags, nd);
3233 if (unlikely(error)) 3231 if (unlikely(error))
3234 goto out; 3232 goto out;
3235 3233
diff --git a/fs/namespace.c b/fs/namespace.c
index 82ef1405260e..1f4f9dac6e5a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
632 */ 632 */
633struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) 633struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
634{ 634{
635 struct mount *p, *res; 635 struct mount *p, *res = NULL;
636 res = p = __lookup_mnt(mnt, dentry); 636 p = __lookup_mnt(mnt, dentry);
637 if (!p) 637 if (!p)
638 goto out; 638 goto out;
639 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
640 res = p;
639 hlist_for_each_entry_continue(p, mnt_hash) { 641 hlist_for_each_entry_continue(p, mnt_hash) {
640 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) 642 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
641 break; 643 break;
642 res = p; 644 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
645 res = p;
643 } 646 }
644out: 647out:
645 return res; 648 return res;
@@ -795,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
795/* 798/*
796 * vfsmount lock must be held for write 799 * vfsmount lock must be held for write
797 */ 800 */
798static void detach_mnt(struct mount *mnt, struct path *old_path) 801static void unhash_mnt(struct mount *mnt)
799{ 802{
800 old_path->dentry = mnt->mnt_mountpoint;
801 old_path->mnt = &mnt->mnt_parent->mnt;
802 mnt->mnt_parent = mnt; 803 mnt->mnt_parent = mnt;
803 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 804 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
804 list_del_init(&mnt->mnt_child); 805 list_del_init(&mnt->mnt_child);
@@ -811,6 +812,26 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
811/* 812/*
812 * vfsmount lock must be held for write 813 * vfsmount lock must be held for write
813 */ 814 */
815static void detach_mnt(struct mount *mnt, struct path *old_path)
816{
817 old_path->dentry = mnt->mnt_mountpoint;
818 old_path->mnt = &mnt->mnt_parent->mnt;
819 unhash_mnt(mnt);
820}
821
822/*
823 * vfsmount lock must be held for write
824 */
825static void umount_mnt(struct mount *mnt)
826{
827 /* old mountpoint will be dropped when we can do that */
828 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
829 unhash_mnt(mnt);
830}
831
832/*
833 * vfsmount lock must be held for write
834 */
814void mnt_set_mountpoint(struct mount *mnt, 835void mnt_set_mountpoint(struct mount *mnt,
815 struct mountpoint *mp, 836 struct mountpoint *mp,
816 struct mount *child_mnt) 837 struct mount *child_mnt)
@@ -1078,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt)
1078 rcu_read_unlock(); 1099 rcu_read_unlock();
1079 1100
1080 list_del(&mnt->mnt_instance); 1101 list_del(&mnt->mnt_instance);
1102
1103 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1104 struct mount *p, *tmp;
1105 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1106 umount_mnt(p);
1107 }
1108 }
1081 unlock_mount_hash(); 1109 unlock_mount_hash();
1082 1110
1083 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { 1111 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
@@ -1298,17 +1326,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
1298 1326
1299static void namespace_unlock(void) 1327static void namespace_unlock(void)
1300{ 1328{
1301 struct hlist_head head = unmounted; 1329 struct hlist_head head;
1302 1330
1303 if (likely(hlist_empty(&head))) { 1331 hlist_move_list(&unmounted, &head);
1304 up_write(&namespace_sem);
1305 return;
1306 }
1307 1332
1308 head.first->pprev = &head.first;
1309 INIT_HLIST_HEAD(&unmounted);
1310 up_write(&namespace_sem); 1333 up_write(&namespace_sem);
1311 1334
1335 if (likely(hlist_empty(&head)))
1336 return;
1337
1312 synchronize_rcu(); 1338 synchronize_rcu();
1313 1339
1314 group_pin_kill(&head); 1340 group_pin_kill(&head);
@@ -1319,49 +1345,63 @@ static inline void namespace_lock(void)
1319 down_write(&namespace_sem); 1345 down_write(&namespace_sem);
1320} 1346}
1321 1347
1348enum umount_tree_flags {
1349 UMOUNT_SYNC = 1,
1350 UMOUNT_PROPAGATE = 2,
1351 UMOUNT_CONNECTED = 4,
1352};
1322/* 1353/*
1323 * mount_lock must be held 1354 * mount_lock must be held
1324 * namespace_sem must be held for write 1355 * namespace_sem must be held for write
1325 * how = 0 => just this tree, don't propagate
1326 * how = 1 => propagate; we know that nobody else has reference to any victims
1327 * how = 2 => lazy umount
1328 */ 1356 */
1329void umount_tree(struct mount *mnt, int how) 1357static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1330{ 1358{
1331 HLIST_HEAD(tmp_list); 1359 LIST_HEAD(tmp_list);
1332 struct mount *p; 1360 struct mount *p;
1333 1361
1362 if (how & UMOUNT_PROPAGATE)
1363 propagate_mount_unlock(mnt);
1364
1365 /* Gather the mounts to umount */
1334 for (p = mnt; p; p = next_mnt(p, mnt)) { 1366 for (p = mnt; p; p = next_mnt(p, mnt)) {
1335 hlist_del_init_rcu(&p->mnt_hash); 1367 p->mnt.mnt_flags |= MNT_UMOUNT;
1336 hlist_add_head(&p->mnt_hash, &tmp_list); 1368 list_move(&p->mnt_list, &tmp_list);
1337 } 1369 }
1338 1370
1339 hlist_for_each_entry(p, &tmp_list, mnt_hash) 1371 /* Hide the mounts from mnt_mounts */
1372 list_for_each_entry(p, &tmp_list, mnt_list) {
1340 list_del_init(&p->mnt_child); 1373 list_del_init(&p->mnt_child);
1374 }
1341 1375
1342 if (how) 1376 /* Add propogated mounts to the tmp_list */
1377 if (how & UMOUNT_PROPAGATE)
1343 propagate_umount(&tmp_list); 1378 propagate_umount(&tmp_list);
1344 1379
1345 while (!hlist_empty(&tmp_list)) { 1380 while (!list_empty(&tmp_list)) {
1346 p = hlist_entry(tmp_list.first, struct mount, mnt_hash); 1381 bool disconnect;
1347 hlist_del_init_rcu(&p->mnt_hash); 1382 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1348 list_del_init(&p->mnt_expire); 1383 list_del_init(&p->mnt_expire);
1349 list_del_init(&p->mnt_list); 1384 list_del_init(&p->mnt_list);
1350 __touch_mnt_namespace(p->mnt_ns); 1385 __touch_mnt_namespace(p->mnt_ns);
1351 p->mnt_ns = NULL; 1386 p->mnt_ns = NULL;
1352 if (how < 2) 1387 if (how & UMOUNT_SYNC)
1353 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1388 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1354 1389
1355 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted); 1390 disconnect = !(((how & UMOUNT_CONNECTED) &&
1391 mnt_has_parent(p) &&
1392 (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) ||
1393 IS_MNT_LOCKED_AND_LAZY(p));
1394
1395 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1396 disconnect ? &unmounted : NULL);
1356 if (mnt_has_parent(p)) { 1397 if (mnt_has_parent(p)) {
1357 hlist_del_init(&p->mnt_mp_list);
1358 put_mountpoint(p->mnt_mp);
1359 mnt_add_count(p->mnt_parent, -1); 1398 mnt_add_count(p->mnt_parent, -1);
1360 /* old mountpoint will be dropped when we can do that */ 1399 if (!disconnect) {
1361 p->mnt_ex_mountpoint = p->mnt_mountpoint; 1400 /* Don't forget about p */
1362 p->mnt_mountpoint = p->mnt.mnt_root; 1401 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1363 p->mnt_parent = p; 1402 } else {
1364 p->mnt_mp = NULL; 1403 umount_mnt(p);
1404 }
1365 } 1405 }
1366 change_mnt_propagation(p, MS_PRIVATE); 1406 change_mnt_propagation(p, MS_PRIVATE);
1367 } 1407 }
@@ -1447,14 +1487,14 @@ static int do_umount(struct mount *mnt, int flags)
1447 1487
1448 if (flags & MNT_DETACH) { 1488 if (flags & MNT_DETACH) {
1449 if (!list_empty(&mnt->mnt_list)) 1489 if (!list_empty(&mnt->mnt_list))
1450 umount_tree(mnt, 2); 1490 umount_tree(mnt, UMOUNT_PROPAGATE);
1451 retval = 0; 1491 retval = 0;
1452 } else { 1492 } else {
1453 shrink_submounts(mnt); 1493 shrink_submounts(mnt);
1454 retval = -EBUSY; 1494 retval = -EBUSY;
1455 if (!propagate_mount_busy(mnt, 2)) { 1495 if (!propagate_mount_busy(mnt, 2)) {
1456 if (!list_empty(&mnt->mnt_list)) 1496 if (!list_empty(&mnt->mnt_list))
1457 umount_tree(mnt, 1); 1497 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1458 retval = 0; 1498 retval = 0;
1459 } 1499 }
1460 } 1500 }
@@ -1480,13 +1520,20 @@ void __detach_mounts(struct dentry *dentry)
1480 1520
1481 namespace_lock(); 1521 namespace_lock();
1482 mp = lookup_mountpoint(dentry); 1522 mp = lookup_mountpoint(dentry);
1483 if (!mp) 1523 if (IS_ERR_OR_NULL(mp))
1484 goto out_unlock; 1524 goto out_unlock;
1485 1525
1486 lock_mount_hash(); 1526 lock_mount_hash();
1487 while (!hlist_empty(&mp->m_list)) { 1527 while (!hlist_empty(&mp->m_list)) {
1488 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); 1528 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1489 umount_tree(mnt, 2); 1529 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1530 struct mount *p, *tmp;
1531 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1532 hlist_add_head(&p->mnt_umount.s_list, &unmounted);
1533 umount_mnt(p);
1534 }
1535 }
1536 else umount_tree(mnt, UMOUNT_CONNECTED);
1490 } 1537 }
1491 unlock_mount_hash(); 1538 unlock_mount_hash();
1492 put_mountpoint(mp); 1539 put_mountpoint(mp);
@@ -1648,7 +1695,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1648out: 1695out:
1649 if (res) { 1696 if (res) {
1650 lock_mount_hash(); 1697 lock_mount_hash();
1651 umount_tree(res, 0); 1698 umount_tree(res, UMOUNT_SYNC);
1652 unlock_mount_hash(); 1699 unlock_mount_hash();
1653 } 1700 }
1654 return q; 1701 return q;
@@ -1660,8 +1707,11 @@ struct vfsmount *collect_mounts(struct path *path)
1660{ 1707{
1661 struct mount *tree; 1708 struct mount *tree;
1662 namespace_lock(); 1709 namespace_lock();
1663 tree = copy_tree(real_mount(path->mnt), path->dentry, 1710 if (!check_mnt(real_mount(path->mnt)))
1664 CL_COPY_ALL | CL_PRIVATE); 1711 tree = ERR_PTR(-EINVAL);
1712 else
1713 tree = copy_tree(real_mount(path->mnt), path->dentry,
1714 CL_COPY_ALL | CL_PRIVATE);
1665 namespace_unlock(); 1715 namespace_unlock();
1666 if (IS_ERR(tree)) 1716 if (IS_ERR(tree))
1667 return ERR_CAST(tree); 1717 return ERR_CAST(tree);
@@ -1672,7 +1722,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
1672{ 1722{
1673 namespace_lock(); 1723 namespace_lock();
1674 lock_mount_hash(); 1724 lock_mount_hash();
1675 umount_tree(real_mount(mnt), 0); 1725 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1676 unlock_mount_hash(); 1726 unlock_mount_hash();
1677 namespace_unlock(); 1727 namespace_unlock();
1678} 1728}
@@ -1855,7 +1905,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1855 out_cleanup_ids: 1905 out_cleanup_ids:
1856 while (!hlist_empty(&tree_list)) { 1906 while (!hlist_empty(&tree_list)) {
1857 child = hlist_entry(tree_list.first, struct mount, mnt_hash); 1907 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1858 umount_tree(child, 0); 1908 umount_tree(child, UMOUNT_SYNC);
1859 } 1909 }
1860 unlock_mount_hash(); 1910 unlock_mount_hash();
1861 cleanup_group_ids(source_mnt, NULL); 1911 cleanup_group_ids(source_mnt, NULL);
@@ -2035,7 +2085,7 @@ static int do_loopback(struct path *path, const char *old_name,
2035 err = graft_tree(mnt, parent, mp); 2085 err = graft_tree(mnt, parent, mp);
2036 if (err) { 2086 if (err) {
2037 lock_mount_hash(); 2087 lock_mount_hash();
2038 umount_tree(mnt, 0); 2088 umount_tree(mnt, UMOUNT_SYNC);
2039 unlock_mount_hash(); 2089 unlock_mount_hash();
2040 } 2090 }
2041out2: 2091out2:
@@ -2406,7 +2456,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2406 while (!list_empty(&graveyard)) { 2456 while (!list_empty(&graveyard)) {
2407 mnt = list_first_entry(&graveyard, struct mount, mnt_expire); 2457 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2408 touch_mnt_namespace(mnt->mnt_ns); 2458 touch_mnt_namespace(mnt->mnt_ns);
2409 umount_tree(mnt, 1); 2459 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2410 } 2460 }
2411 unlock_mount_hash(); 2461 unlock_mount_hash();
2412 namespace_unlock(); 2462 namespace_unlock();
@@ -2477,7 +2527,7 @@ static void shrink_submounts(struct mount *mnt)
2477 m = list_first_entry(&graveyard, struct mount, 2527 m = list_first_entry(&graveyard, struct mount,
2478 mnt_expire); 2528 mnt_expire);
2479 touch_mnt_namespace(m->mnt_ns); 2529 touch_mnt_namespace(m->mnt_ns);
2480 umount_tree(m, 1); 2530 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2481 } 2531 }
2482 } 2532 }
2483} 2533}
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 1dd7007f974d..011324ce9df2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -98,30 +98,24 @@ out:
98} 98}
99 99
100static ssize_t 100static ssize_t
101ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 101ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
102{ 102{
103 struct file *file = iocb->ki_filp;
103 struct inode *inode = file_inode(file); 104 struct inode *inode = file_inode(file);
104 size_t already_read = 0; 105 size_t already_read = 0;
105 off_t pos; 106 off_t pos = iocb->ki_pos;
106 size_t bufsize; 107 size_t bufsize;
107 int error; 108 int error;
108 void* freepage; 109 void *freepage;
109 size_t freelen; 110 size_t freelen;
110 111
111 ncp_dbg(1, "enter %pD2\n", file); 112 ncp_dbg(1, "enter %pD2\n", file);
112 113
113 pos = *ppos; 114 if (!iov_iter_count(to))
114
115 if ((ssize_t) count < 0) {
116 return -EINVAL;
117 }
118 if (!count)
119 return 0; 115 return 0;
120 if (pos > inode->i_sb->s_maxbytes) 116 if (pos > inode->i_sb->s_maxbytes)
121 return 0; 117 return 0;
122 if (pos + count > inode->i_sb->s_maxbytes) { 118 iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos);
123 count = inode->i_sb->s_maxbytes - pos;
124 }
125 119
126 error = ncp_make_open(inode, O_RDONLY); 120 error = ncp_make_open(inode, O_RDONLY);
127 if (error) { 121 if (error) {
@@ -138,31 +132,29 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
138 goto outrel; 132 goto outrel;
139 error = 0; 133 error = 0;
140 /* First read in as much as possible for each bufsize. */ 134 /* First read in as much as possible for each bufsize. */
141 while (already_read < count) { 135 while (iov_iter_count(to)) {
142 int read_this_time; 136 int read_this_time;
143 size_t to_read = min_t(unsigned int, 137 size_t to_read = min_t(size_t,
144 bufsize - (pos % bufsize), 138 bufsize - (pos % bufsize),
145 count - already_read); 139 iov_iter_count(to));
146 140
147 error = ncp_read_bounce(NCP_SERVER(inode), 141 error = ncp_read_bounce(NCP_SERVER(inode),
148 NCP_FINFO(inode)->file_handle, 142 NCP_FINFO(inode)->file_handle,
149 pos, to_read, buf, &read_this_time, 143 pos, to_read, to, &read_this_time,
150 freepage, freelen); 144 freepage, freelen);
151 if (error) { 145 if (error) {
152 error = -EIO; /* NW errno -> Linux errno */ 146 error = -EIO; /* NW errno -> Linux errno */
153 break; 147 break;
154 } 148 }
155 pos += read_this_time; 149 pos += read_this_time;
156 buf += read_this_time;
157 already_read += read_this_time; 150 already_read += read_this_time;
158 151
159 if (read_this_time != to_read) { 152 if (read_this_time != to_read)
160 break; 153 break;
161 }
162 } 154 }
163 vfree(freepage); 155 vfree(freepage);
164 156
165 *ppos = pos; 157 iocb->ki_pos = pos;
166 158
167 file_accessed(file); 159 file_accessed(file);
168 160
@@ -173,42 +165,21 @@ outrel:
173} 165}
174 166
175static ssize_t 167static ssize_t
176ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 168ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
177{ 169{
170 struct file *file = iocb->ki_filp;
178 struct inode *inode = file_inode(file); 171 struct inode *inode = file_inode(file);
179 size_t already_written = 0; 172 size_t already_written = 0;
180 off_t pos;
181 size_t bufsize; 173 size_t bufsize;
182 int errno; 174 int errno;
183 void* bouncebuffer; 175 void *bouncebuffer;
176 off_t pos;
184 177
185 ncp_dbg(1, "enter %pD2\n", file); 178 ncp_dbg(1, "enter %pD2\n", file);
186 if ((ssize_t) count < 0) 179 errno = generic_write_checks(iocb, from);
187 return -EINVAL; 180 if (errno <= 0)
188 pos = *ppos; 181 return errno;
189 if (file->f_flags & O_APPEND) {
190 pos = i_size_read(inode);
191 }
192 182
193 if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
194 if (pos >= MAX_NON_LFS) {
195 return -EFBIG;
196 }
197 if (count > MAX_NON_LFS - (u32)pos) {
198 count = MAX_NON_LFS - (u32)pos;
199 }
200 }
201 if (pos >= inode->i_sb->s_maxbytes) {
202 if (count || pos > inode->i_sb->s_maxbytes) {
203 return -EFBIG;
204 }
205 }
206 if (pos + count > inode->i_sb->s_maxbytes) {
207 count = inode->i_sb->s_maxbytes - pos;
208 }
209
210 if (!count)
211 return 0;
212 errno = ncp_make_open(inode, O_WRONLY); 183 errno = ncp_make_open(inode, O_WRONLY);
213 if (errno) { 184 if (errno) {
214 ncp_dbg(1, "open failed, error=%d\n", errno); 185 ncp_dbg(1, "open failed, error=%d\n", errno);
@@ -216,8 +187,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
216 } 187 }
217 bufsize = NCP_SERVER(inode)->buffer_size; 188 bufsize = NCP_SERVER(inode)->buffer_size;
218 189
219 already_written = 0;
220
221 errno = file_update_time(file); 190 errno = file_update_time(file);
222 if (errno) 191 if (errno)
223 goto outrel; 192 goto outrel;
@@ -227,13 +196,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
227 errno = -EIO; /* -ENOMEM */ 196 errno = -EIO; /* -ENOMEM */
228 goto outrel; 197 goto outrel;
229 } 198 }
230 while (already_written < count) { 199 pos = iocb->ki_pos;
200 while (iov_iter_count(from)) {
231 int written_this_time; 201 int written_this_time;
232 size_t to_write = min_t(unsigned int, 202 size_t to_write = min_t(size_t,
233 bufsize - (pos % bufsize), 203 bufsize - (pos % bufsize),
234 count - already_written); 204 iov_iter_count(from));
235 205
236 if (copy_from_user(bouncebuffer, buf, to_write)) { 206 if (copy_from_iter(bouncebuffer, to_write, from) != to_write) {
237 errno = -EFAULT; 207 errno = -EFAULT;
238 break; 208 break;
239 } 209 }
@@ -244,16 +214,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
244 break; 214 break;
245 } 215 }
246 pos += written_this_time; 216 pos += written_this_time;
247 buf += written_this_time;
248 already_written += written_this_time; 217 already_written += written_this_time;
249 218
250 if (written_this_time != to_write) { 219 if (written_this_time != to_write)
251 break; 220 break;
252 }
253 } 221 }
254 vfree(bouncebuffer); 222 vfree(bouncebuffer);
255 223
256 *ppos = pos; 224 iocb->ki_pos = pos;
257 225
258 if (pos > i_size_read(inode)) { 226 if (pos > i_size_read(inode)) {
259 mutex_lock(&inode->i_mutex); 227 mutex_lock(&inode->i_mutex);
@@ -277,8 +245,8 @@ static int ncp_release(struct inode *inode, struct file *file) {
277const struct file_operations ncp_file_operations = 245const struct file_operations ncp_file_operations =
278{ 246{
279 .llseek = generic_file_llseek, 247 .llseek = generic_file_llseek,
280 .read = ncp_file_read, 248 .read_iter = ncp_file_read_iter,
281 .write = ncp_file_write, 249 .write_iter = ncp_file_write_iter,
282 .unlocked_ioctl = ncp_ioctl, 250 .unlocked_ioctl = ncp_ioctl,
283#ifdef CONFIG_COMPAT 251#ifdef CONFIG_COMPAT
284 .compat_ioctl = ncp_compat_ioctl, 252 .compat_ioctl = ncp_compat_ioctl,
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 482387532f54..2b502a0d7941 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -1001,8 +1001,8 @@ out:
1001 */ 1001 */
1002int 1002int
1003ncp_read_bounce(struct ncp_server *server, const char *file_id, 1003ncp_read_bounce(struct ncp_server *server, const char *file_id,
1004 __u32 offset, __u16 to_read, char __user *target, int *bytes_read, 1004 __u32 offset, __u16 to_read, struct iov_iter *to,
1005 void* bounce, __u32 bufsize) 1005 int *bytes_read, void *bounce, __u32 bufsize)
1006{ 1006{
1007 int result; 1007 int result;
1008 1008
@@ -1025,7 +1025,7 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
1025 (offset & 1); 1025 (offset & 1);
1026 *bytes_read = len; 1026 *bytes_read = len;
1027 result = 0; 1027 result = 0;
1028 if (copy_to_user(target, source, len)) 1028 if (copy_to_iter(source, len, to) != len)
1029 result = -EFAULT; 1029 result = -EFAULT;
1030 } 1030 }
1031 } 1031 }
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 250e443a07f3..5233fbc1747a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -53,7 +53,7 @@ static inline int ncp_read_bounce_size(__u32 size) {
53 return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8; 53 return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8;
54}; 54};
55int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16, 55int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16,
56 char __user *, int *, void* bounce, __u32 bouncelen); 56 struct iov_iter *, int *, void *bounce, __u32 bouncelen);
57int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16, 57int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16,
58 char *, int *); 58 char *, int *);
59int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16, 59int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16,
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index c7abc10279af..f31fd0dd92c6 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,6 @@
1config NFS_FS 1config NFS_FS
2 tristate "NFS client support" 2 tristate "NFS client support"
3 depends on INET && FILE_LOCKING 3 depends on INET && FILE_LOCKING && MULTIUSER
4 select LOCKD 4 select LOCKD
5 select SUNRPC 5 select SUNRPC
6 select NFS_ACL_SUPPORT if NFS_V3_ACL 6 select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8cf732e..682f65fe09b5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -240,7 +240,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
240 240
241/** 241/**
242 * nfs_direct_IO - NFS address space operation for direct I/O 242 * nfs_direct_IO - NFS address space operation for direct I/O
243 * @rw: direction (read or write)
244 * @iocb: target I/O control block 243 * @iocb: target I/O control block
245 * @iov: array of vectors that define I/O buffer 244 * @iov: array of vectors that define I/O buffer
246 * @pos: offset in file to begin the operation 245 * @pos: offset in file to begin the operation
@@ -251,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
251 * shunt off direct read and write requests before the VFS gets them, 250 * shunt off direct read and write requests before the VFS gets them,
252 * so this method is only ever called for swap. 251 * so this method is only ever called for swap.
253 */ 252 */
254ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) 253ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
255{ 254{
256 struct inode *inode = iocb->ki_filp->f_mapping->host; 255 struct inode *inode = iocb->ki_filp->f_mapping->host;
257 256
@@ -265,11 +264,11 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
265 264
266 return -EINVAL; 265 return -EINVAL;
267#else 266#else
268 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); 267 VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
269 268
270 if (rw == READ) 269 if (iov_iter_rw(iter) == READ)
271 return nfs_file_direct_read(iocb, iter, pos); 270 return nfs_file_direct_read(iocb, iter, pos);
272 return nfs_file_direct_write(iocb, iter, pos); 271 return nfs_file_direct_write(iocb, iter);
273#endif /* CONFIG_NFS_SWAP */ 272#endif /* CONFIG_NFS_SWAP */
274} 273}
275 274
@@ -393,7 +392,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
393 long res = (long) dreq->error; 392 long res = (long) dreq->error;
394 if (!res) 393 if (!res)
395 res = (long) dreq->count; 394 res = (long) dreq->count;
396 aio_complete(dreq->iocb, res, 0); 395 dreq->iocb->ki_complete(dreq->iocb, res, 0);
397 } 396 }
398 397
399 complete_all(&dreq->completion); 398 complete_all(&dreq->completion);
@@ -960,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
960 * Note that O_APPEND is not supported for NFS direct writes, as there 959 * Note that O_APPEND is not supported for NFS direct writes, as there
961 * is no atomic O_APPEND write facility in the NFS protocol. 960 * is no atomic O_APPEND write facility in the NFS protocol.
962 */ 961 */
963ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 962ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
964 loff_t pos)
965{ 963{
966 ssize_t result = -EINVAL; 964 ssize_t result = -EINVAL;
967 struct file *file = iocb->ki_filp; 965 struct file *file = iocb->ki_filp;
@@ -969,25 +967,16 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
969 struct inode *inode = mapping->host; 967 struct inode *inode = mapping->host;
970 struct nfs_direct_req *dreq; 968 struct nfs_direct_req *dreq;
971 struct nfs_lock_context *l_ctx; 969 struct nfs_lock_context *l_ctx;
972 loff_t end; 970 loff_t pos, end;
973 size_t count = iov_iter_count(iter);
974 end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
975
976 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
977 971
978 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", 972 dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
979 file, count, (long long) pos); 973 file, iov_iter_count(iter), (long long) iocb->ki_pos);
980 974
981 result = generic_write_checks(file, &pos, &count, 0); 975 nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES,
982 if (result) 976 iov_iter_count(iter));
983 goto out;
984 977
985 result = -EINVAL; 978 pos = iocb->ki_pos;
986 if ((ssize_t) count < 0) 979 end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
987 goto out;
988 result = 0;
989 if (!count)
990 goto out;
991 980
992 mutex_lock(&inode->i_mutex); 981 mutex_lock(&inode->i_mutex);
993 982
@@ -1002,7 +991,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
1002 goto out_unlock; 991 goto out_unlock;
1003 } 992 }
1004 993
1005 task_io_account_write(count); 994 task_io_account_write(iov_iter_count(iter));
1006 995
1007 result = -ENOMEM; 996 result = -ENOMEM;
1008 dreq = nfs_direct_req_alloc(); 997 dreq = nfs_direct_req_alloc();
@@ -1010,7 +999,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
1010 goto out_unlock; 999 goto out_unlock;
1011 1000
1012 dreq->inode = inode; 1001 dreq->inode = inode;
1013 dreq->bytes_left = count; 1002 dreq->bytes_left = iov_iter_count(iter);
1014 dreq->io_start = pos; 1003 dreq->io_start = pos;
1015 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 1004 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
1016 l_ctx = nfs_get_lock_context(dreq->ctx); 1005 l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -1050,7 +1039,6 @@ out_release:
1050 nfs_direct_req_release(dreq); 1039 nfs_direct_req_release(dreq);
1051out_unlock: 1040out_unlock:
1052 mutex_unlock(&inode->i_mutex); 1041 mutex_unlock(&inode->i_mutex);
1053out:
1054 return result; 1042 return result;
1055} 1043}
1056 1044
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e679d24c39d3..c40e4363e746 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
26#include <linux/nfs_mount.h> 26#include <linux/nfs_mount.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/aio.h>
30#include <linux/gfp.h> 29#include <linux/gfp.h>
31#include <linux/swap.h> 30#include <linux/swap.h>
32 31
@@ -171,7 +170,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
171 struct inode *inode = file_inode(iocb->ki_filp); 170 struct inode *inode = file_inode(iocb->ki_filp);
172 ssize_t result; 171 ssize_t result;
173 172
174 if (iocb->ki_filp->f_flags & O_DIRECT) 173 if (iocb->ki_flags & IOCB_DIRECT)
175 return nfs_file_direct_read(iocb, to, iocb->ki_pos); 174 return nfs_file_direct_read(iocb, to, iocb->ki_pos);
176 175
177 dprintk("NFS: read(%pD2, %zu@%lu)\n", 176 dprintk("NFS: read(%pD2, %zu@%lu)\n",
@@ -675,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
675 unsigned long written = 0; 674 unsigned long written = 0;
676 ssize_t result; 675 ssize_t result;
677 size_t count = iov_iter_count(from); 676 size_t count = iov_iter_count(from);
678 loff_t pos = iocb->ki_pos;
679 677
680 result = nfs_key_timeout_notify(file, inode); 678 result = nfs_key_timeout_notify(file, inode);
681 if (result) 679 if (result)
682 return result; 680 return result;
683 681
684 if (file->f_flags & O_DIRECT) 682 if (iocb->ki_flags & IOCB_DIRECT) {
685 return nfs_file_direct_write(iocb, from, pos); 683 result = generic_write_checks(iocb, from);
684 if (result <= 0)
685 return result;
686 return nfs_file_direct_write(iocb, from);
687 }
686 688
687 dprintk("NFS: write(%pD2, %zu@%Ld)\n", 689 dprintk("NFS: write(%pD2, %zu@%Ld)\n",
688 file, count, (long long) pos); 690 file, count, (long long) iocb->ki_pos);
689 691
690 result = -EBUSY; 692 result = -EBUSY;
691 if (IS_SWAPFILE(inode)) 693 if (IS_SWAPFILE(inode))
@@ -693,7 +695,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
693 /* 695 /*
694 * O_APPEND implies that we must revalidate the file length. 696 * O_APPEND implies that we must revalidate the file length.
695 */ 697 */
696 if (file->f_flags & O_APPEND) { 698 if (iocb->ki_flags & IOCB_APPEND) {
697 result = nfs_revalidate_file_size(inode, file); 699 result = nfs_revalidate_file_size(inode, file);
698 if (result) 700 if (result)
699 goto out; 701 goto out;
@@ -927,8 +929,6 @@ EXPORT_SYMBOL_GPL(nfs_flock);
927 929
928const struct file_operations nfs_file_operations = { 930const struct file_operations nfs_file_operations = {
929 .llseek = nfs_file_llseek, 931 .llseek = nfs_file_llseek,
930 .read = new_sync_read,
931 .write = new_sync_write,
932 .read_iter = nfs_file_read, 932 .read_iter = nfs_file_read,
933 .write_iter = nfs_file_write, 933 .write_iter = nfs_file_write,
934 .mmap = nfs_file_mmap, 934 .mmap = nfs_file_mmap,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8b46389c4c5b..0181cde1d102 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -170,8 +170,6 @@ const struct file_operations nfs4_file_operations = {
170#else 170#else
171 .llseek = nfs_file_llseek, 171 .llseek = nfs_file_llseek,
172#endif 172#endif
173 .read = new_sync_read,
174 .write = new_sync_write,
175 .read_iter = nfs_file_read, 173 .read_iter = nfs_file_read,
176 .write_iter = nfs_file_write, 174 .write_iter = nfs_file_write,
177 .mmap = nfs_file_mmap, 175 .mmap = nfs_file_mmap,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 568ecf0a880f..b8f5c63f77b2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -117,15 +117,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
117 117
118static void nfs_readpage_release(struct nfs_page *req) 118static void nfs_readpage_release(struct nfs_page *req)
119{ 119{
120 struct inode *d_inode = req->wb_context->dentry->d_inode; 120 struct inode *inode = req->wb_context->dentry->d_inode;
121 121
122 dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, 122 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
123 (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, 123 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
124 (long long)req_offset(req)); 124 (long long)req_offset(req));
125 125
126 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { 126 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
127 if (PageUptodate(req->wb_page)) 127 if (PageUptodate(req->wb_page))
128 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 128 nfs_readpage_to_fscache(inode, req->wb_page, 0);
129 129
130 unlock_page(req->wb_page); 130 unlock_page(req->wb_page);
131 } 131 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 849ed784d6ac..759931088094 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1876 * request from the inode / page_private pointer and 1876 * request from the inode / page_private pointer and
1877 * release it */ 1877 * release it */
1878 nfs_inode_remove_request(req); 1878 nfs_inode_remove_request(req);
1879 /*
1880 * In case nfs_inode_remove_request has marked the
1881 * page as being dirty
1882 */
1883 cancel_dirty_page(page, PAGE_CACHE_SIZE);
1884 nfs_unlock_and_release_request(req); 1879 nfs_unlock_and_release_request(req);
1885 } 1880 }
1886 1881
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 683bf718aead..fc2d108f5272 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -6,6 +6,7 @@ config NFSD
6 select SUNRPC 6 select SUNRPC
7 select EXPORTFS 7 select EXPORTFS
8 select NFS_ACL_SUPPORT if NFSD_V2_ACL 8 select NFS_ACL_SUPPORT if NFSD_V2_ACL
9 depends on MULTIUSER
9 help 10 help
10 Choose Y here if you want to allow other computers to access 11 Choose Y here if you want to allow other computers to access
11 files residing on this system using Sun's Network File System 12 files residing on this system using Sun's Network File System
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8ba1d888f1e6..326a545ea7b2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4932,20 +4932,22 @@ nfs4_transform_lock_offset(struct file_lock *lock)
4932 lock->fl_end = OFFSET_MAX; 4932 lock->fl_end = OFFSET_MAX;
4933} 4933}
4934 4934
4935static void nfsd4_fl_get_owner(struct file_lock *dst, struct file_lock *src) 4935static fl_owner_t
4936nfsd4_fl_get_owner(fl_owner_t owner)
4936{ 4937{
4937 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)src->fl_owner; 4938 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
4938 dst->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lo->lo_owner)); 4939
4940 nfs4_get_stateowner(&lo->lo_owner);
4941 return owner;
4939} 4942}
4940 4943
4941static void nfsd4_fl_put_owner(struct file_lock *fl) 4944static void
4945nfsd4_fl_put_owner(fl_owner_t owner)
4942{ 4946{
4943 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; 4947 struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
4944 4948
4945 if (lo) { 4949 if (lo)
4946 nfs4_put_stateowner(&lo->lo_owner); 4950 nfs4_put_stateowner(&lo->lo_owner);
4947 fl->fl_owner = NULL;
4948 }
4949} 4951}
4950 4952
4951static const struct lock_manager_operations nfsd_posix_mng_ops = { 4953static const struct lock_manager_operations nfsd_posix_mng_ops = {
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 741fd02e0444..8df0f3b7839b 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -405,13 +405,14 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
405static int nilfs_palloc_count_desc_blocks(struct inode *inode, 405static int nilfs_palloc_count_desc_blocks(struct inode *inode,
406 unsigned long *desc_blocks) 406 unsigned long *desc_blocks)
407{ 407{
408 unsigned long blknum; 408 __u64 blknum;
409 int ret; 409 int ret;
410 410
411 ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); 411 ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
412 if (likely(!ret)) 412 if (likely(!ret))
413 *desc_blocks = DIV_ROUND_UP( 413 *desc_blocks = DIV_ROUND_UP(
414 blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); 414 (unsigned long)blknum,
415 NILFS_MDT(inode)->mi_blocks_per_desc_block);
415 return ret; 416 return ret;
416} 417}
417 418
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index aadbd0b5e3e8..27f75bcbeb30 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -152,9 +152,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
152 * 152 *
153 * %-EEXIST - A record associated with @key already exist. 153 * %-EEXIST - A record associated with @key already exist.
154 */ 154 */
155int nilfs_bmap_insert(struct nilfs_bmap *bmap, 155int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec)
156 unsigned long key,
157 unsigned long rec)
158{ 156{
159 int ret; 157 int ret;
160 158
@@ -191,19 +189,47 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
191 return bmap->b_ops->bop_delete(bmap, key); 189 return bmap->b_ops->bop_delete(bmap, key);
192} 190}
193 191
194int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) 192/**
193 * nilfs_bmap_seek_key - seek a valid entry and return its key
194 * @bmap: bmap struct
195 * @start: start key number
196 * @keyp: place to store valid key
197 *
198 * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap
199 * starting from @start, and stores it to @keyp if found.
200 *
201 * Return Value: On success, 0 is returned. On error, one of the following
202 * negative error codes is returned.
203 *
204 * %-EIO - I/O error.
205 *
206 * %-ENOMEM - Insufficient amount of memory available.
207 *
208 * %-ENOENT - No valid entry was found
209 */
210int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp)
195{ 211{
196 __u64 lastkey;
197 int ret; 212 int ret;
198 213
199 down_read(&bmap->b_sem); 214 down_read(&bmap->b_sem);
200 ret = bmap->b_ops->bop_last_key(bmap, &lastkey); 215 ret = bmap->b_ops->bop_seek_key(bmap, start, keyp);
216 up_read(&bmap->b_sem);
217
218 if (ret < 0)
219 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
220 return ret;
221}
222
223int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp)
224{
225 int ret;
226
227 down_read(&bmap->b_sem);
228 ret = bmap->b_ops->bop_last_key(bmap, keyp);
201 up_read(&bmap->b_sem); 229 up_read(&bmap->b_sem);
202 230
203 if (ret < 0) 231 if (ret < 0)
204 ret = nilfs_bmap_convert_error(bmap, __func__, ret); 232 ret = nilfs_bmap_convert_error(bmap, __func__, ret);
205 else
206 *key = lastkey;
207 return ret; 233 return ret;
208} 234}
209 235
@@ -224,7 +250,7 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
224 * 250 *
225 * %-ENOENT - A record associated with @key does not exist. 251 * %-ENOENT - A record associated with @key does not exist.
226 */ 252 */
227int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) 253int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key)
228{ 254{
229 int ret; 255 int ret;
230 256
@@ -235,7 +261,7 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
235 return nilfs_bmap_convert_error(bmap, __func__, ret); 261 return nilfs_bmap_convert_error(bmap, __func__, ret);
236} 262}
237 263
238static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) 264static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key)
239{ 265{
240 __u64 lastkey; 266 __u64 lastkey;
241 int ret; 267 int ret;
@@ -276,7 +302,7 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
276 * 302 *
277 * %-ENOMEM - Insufficient amount of memory available. 303 * %-ENOMEM - Insufficient amount of memory available.
278 */ 304 */
279int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) 305int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key)
280{ 306{
281 int ret; 307 int ret;
282 308
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b89e68076adc..bfa817ce40b3 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -76,8 +76,10 @@ struct nilfs_bmap_operations {
76 union nilfs_binfo *); 76 union nilfs_binfo *);
77 int (*bop_mark)(struct nilfs_bmap *, __u64, int); 77 int (*bop_mark)(struct nilfs_bmap *, __u64, int);
78 78
79 /* The following functions are internal use only. */ 79 int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *);
80 int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); 80 int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
81
82 /* The following functions are internal use only. */
81 int (*bop_check_insert)(const struct nilfs_bmap *, __u64); 83 int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
82 int (*bop_check_delete)(struct nilfs_bmap *, __u64); 84 int (*bop_check_delete)(struct nilfs_bmap *, __u64);
83 int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); 85 int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
@@ -153,10 +155,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
153int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); 155int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
154void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); 156void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
155int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); 157int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
156int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); 158int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec);
157int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); 159int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key);
158int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); 160int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp);
159int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long); 161int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp);
162int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key);
160void nilfs_bmap_clear(struct nilfs_bmap *); 163void nilfs_bmap_clear(struct nilfs_bmap *);
161int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *); 164int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *);
162void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *); 165void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index ecdbae19a766..059f37137f9a 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -633,6 +633,44 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
633 return 0; 633 return 0;
634} 634}
635 635
636/**
637 * nilfs_btree_get_next_key - get next valid key from btree path array
638 * @btree: bmap struct of btree
639 * @path: array of nilfs_btree_path struct
640 * @minlevel: start level
641 * @nextkey: place to store the next valid key
642 *
643 * Return Value: If a next key was found, 0 is returned. Otherwise,
644 * -ENOENT is returned.
645 */
646static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
647 const struct nilfs_btree_path *path,
648 int minlevel, __u64 *nextkey)
649{
650 struct nilfs_btree_node *node;
651 int maxlevel = nilfs_btree_height(btree) - 1;
652 int index, next_adj, level;
653
654 /* Next index is already set to bp_index for leaf nodes. */
655 next_adj = 0;
656 for (level = minlevel; level <= maxlevel; level++) {
657 if (level == maxlevel)
658 node = nilfs_btree_get_root(btree);
659 else
660 node = nilfs_btree_get_nonroot_node(path, level);
661
662 index = path[level].bp_index + next_adj;
663 if (index < nilfs_btree_node_get_nchildren(node)) {
664 /* Next key is in this node */
665 *nextkey = nilfs_btree_node_get_key(node, index);
666 return 0;
667 }
668 /* For non-leaf nodes, next index is stored at bp_index + 1. */
669 next_adj = 1;
670 }
671 return -ENOENT;
672}
673
636static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 674static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
637 __u64 key, int level, __u64 *ptrp) 675 __u64 key, int level, __u64 *ptrp)
638{ 676{
@@ -1563,6 +1601,27 @@ out:
1563 return ret; 1601 return ret;
1564} 1602}
1565 1603
1604static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1605 __u64 *keyp)
1606{
1607 struct nilfs_btree_path *path;
1608 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1609 int ret;
1610
1611 path = nilfs_btree_alloc_path();
1612 if (!path)
1613 return -ENOMEM;
1614
1615 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1616 if (!ret)
1617 *keyp = start;
1618 else if (ret == -ENOENT)
1619 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1620
1621 nilfs_btree_free_path(path);
1622 return ret;
1623}
1624
1566static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1625static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1567{ 1626{
1568 struct nilfs_btree_path *path; 1627 struct nilfs_btree_path *path;
@@ -2298,7 +2357,9 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = {
2298 .bop_assign = nilfs_btree_assign, 2357 .bop_assign = nilfs_btree_assign,
2299 .bop_mark = nilfs_btree_mark, 2358 .bop_mark = nilfs_btree_mark,
2300 2359
2360 .bop_seek_key = nilfs_btree_seek_key,
2301 .bop_last_key = nilfs_btree_last_key, 2361 .bop_last_key = nilfs_btree_last_key,
2362
2302 .bop_check_insert = NULL, 2363 .bop_check_insert = NULL,
2303 .bop_check_delete = nilfs_btree_check_delete, 2364 .bop_check_delete = nilfs_btree_check_delete,
2304 .bop_gather_data = nilfs_btree_gather_data, 2365 .bop_gather_data = nilfs_btree_gather_data,
@@ -2318,7 +2379,9 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2318 .bop_assign = nilfs_btree_assign_gc, 2379 .bop_assign = nilfs_btree_assign_gc,
2319 .bop_mark = NULL, 2380 .bop_mark = NULL,
2320 2381
2382 .bop_seek_key = NULL,
2321 .bop_last_key = NULL, 2383 .bop_last_key = NULL,
2384
2322 .bop_check_insert = NULL, 2385 .bop_check_insert = NULL,
2323 .bop_check_delete = NULL, 2386 .bop_check_delete = NULL,
2324 .bop_gather_data = NULL, 2387 .bop_gather_data = NULL,
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 0d58075f34e2..b6596cab9e99 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -53,6 +53,13 @@ nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno)
53 return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); 53 return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
54} 54}
55 55
56static __u64 nilfs_cpfile_first_checkpoint_in_block(const struct inode *cpfile,
57 unsigned long blkoff)
58{
59 return (__u64)nilfs_cpfile_checkpoints_per_block(cpfile) * blkoff
60 + 1 - NILFS_MDT(cpfile)->mi_first_entry_offset;
61}
62
56static unsigned long 63static unsigned long
57nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile, 64nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile,
58 __u64 curr, 65 __u64 curr,
@@ -146,6 +153,44 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
146 create, nilfs_cpfile_block_init, bhp); 153 create, nilfs_cpfile_block_init, bhp);
147} 154}
148 155
156/**
157 * nilfs_cpfile_find_checkpoint_block - find and get a buffer on cpfile
158 * @cpfile: inode of cpfile
159 * @start_cno: start checkpoint number (inclusive)
160 * @end_cno: end checkpoint number (inclusive)
161 * @cnop: place to store the next checkpoint number
162 * @bhp: place to store a pointer to buffer_head struct
163 *
164 * Return Value: On success, it returns 0. On error, the following negative
165 * error code is returned.
166 *
167 * %-ENOMEM - Insufficient memory available.
168 *
169 * %-EIO - I/O error
170 *
171 * %-ENOENT - no block exists in the range.
172 */
173static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile,
174 __u64 start_cno, __u64 end_cno,
175 __u64 *cnop,
176 struct buffer_head **bhp)
177{
178 unsigned long start, end, blkoff;
179 int ret;
180
181 if (unlikely(start_cno > end_cno))
182 return -ENOENT;
183
184 start = nilfs_cpfile_get_blkoff(cpfile, start_cno);
185 end = nilfs_cpfile_get_blkoff(cpfile, end_cno);
186
187 ret = nilfs_mdt_find_block(cpfile, start, end, &blkoff, bhp);
188 if (!ret)
189 *cnop = (blkoff == start) ? start_cno :
190 nilfs_cpfile_first_checkpoint_in_block(cpfile, blkoff);
191 return ret;
192}
193
149static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, 194static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
150 __u64 cno) 195 __u64 cno)
151{ 196{
@@ -403,14 +448,15 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
403 return -ENOENT; /* checkpoint number 0 is invalid */ 448 return -ENOENT; /* checkpoint number 0 is invalid */
404 down_read(&NILFS_MDT(cpfile)->mi_sem); 449 down_read(&NILFS_MDT(cpfile)->mi_sem);
405 450
406 for (n = 0; cno < cur_cno && n < nci; cno += ncps) { 451 for (n = 0; n < nci; cno += ncps) {
407 ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); 452 ret = nilfs_cpfile_find_checkpoint_block(
408 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 453 cpfile, cno, cur_cno - 1, &cno, &bh);
409 if (ret < 0) { 454 if (ret < 0) {
410 if (ret != -ENOENT) 455 if (likely(ret == -ENOENT))
411 goto out; 456 break;
412 continue; /* skip hole */ 457 goto out;
413 } 458 }
459 ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
414 460
415 kaddr = kmap_atomic(bh->b_page); 461 kaddr = kmap_atomic(bh->b_page);
416 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 462 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 82f4865e86dd..ebf89fd8ac1a 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -173,6 +173,21 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
173 return ret; 173 return ret;
174} 174}
175 175
176static int nilfs_direct_seek_key(const struct nilfs_bmap *direct, __u64 start,
177 __u64 *keyp)
178{
179 __u64 key;
180
181 for (key = start; key <= NILFS_DIRECT_KEY_MAX; key++) {
182 if (nilfs_direct_get_ptr(direct, key) !=
183 NILFS_BMAP_INVALID_PTR) {
184 *keyp = key;
185 return 0;
186 }
187 }
188 return -ENOENT;
189}
190
176static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) 191static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
177{ 192{
178 __u64 key, lastkey; 193 __u64 key, lastkey;
@@ -355,7 +370,9 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = {
355 .bop_assign = nilfs_direct_assign, 370 .bop_assign = nilfs_direct_assign,
356 .bop_mark = NULL, 371 .bop_mark = NULL,
357 372
373 .bop_seek_key = nilfs_direct_seek_key,
358 .bop_last_key = nilfs_direct_last_key, 374 .bop_last_key = nilfs_direct_last_key,
375
359 .bop_check_insert = nilfs_direct_check_insert, 376 .bop_check_insert = nilfs_direct_check_insert,
360 .bop_check_delete = NULL, 377 .bop_check_delete = NULL,
361 .bop_gather_data = nilfs_direct_gather_data, 378 .bop_gather_data = nilfs_direct_gather_data,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a8c728acb7a8..54575e3cc1a2 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -143,8 +143,6 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
143 */ 143 */
144const struct file_operations nilfs_file_operations = { 144const struct file_operations nilfs_file_operations = {
145 .llseek = generic_file_llseek, 145 .llseek = generic_file_llseek,
146 .read = new_sync_read,
147 .write = new_sync_write,
148 .read_iter = generic_file_read_iter, 146 .read_iter = generic_file_read_iter,
149 .write_iter = generic_file_write_iter, 147 .write_iter = generic_file_write_iter,
150 .unlocked_ioctl = nilfs_ioctl, 148 .unlocked_ioctl = nilfs_ioctl,
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b5969538f39..be936df4ba73 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/pagemap.h> 27#include <linux/pagemap.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/aio.h> 29#include <linux/uio.h>
30#include "nilfs.h" 30#include "nilfs.h"
31#include "btnode.h" 31#include "btnode.h"
32#include "segment.h" 32#include "segment.h"
@@ -106,7 +106,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
106 err = nilfs_transaction_begin(inode->i_sb, &ti, 1); 106 err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
107 if (unlikely(err)) 107 if (unlikely(err))
108 goto out; 108 goto out;
109 err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, 109 err = nilfs_bmap_insert(ii->i_bmap, blkoff,
110 (unsigned long)bh_result); 110 (unsigned long)bh_result);
111 if (unlikely(err != 0)) { 111 if (unlikely(err != 0)) {
112 if (err == -EEXIST) { 112 if (err == -EEXIST) {
@@ -305,8 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
305} 305}
306 306
307static ssize_t 307static ssize_t
308nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, 308nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
309 loff_t offset)
310{ 309{
311 struct file *file = iocb->ki_filp; 310 struct file *file = iocb->ki_filp;
312 struct address_space *mapping = file->f_mapping; 311 struct address_space *mapping = file->f_mapping;
@@ -314,18 +313,17 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
314 size_t count = iov_iter_count(iter); 313 size_t count = iov_iter_count(iter);
315 ssize_t size; 314 ssize_t size;
316 315
317 if (rw == WRITE) 316 if (iov_iter_rw(iter) == WRITE)
318 return 0; 317 return 0;
319 318
320 /* Needs synchronization with the cleaner */ 319 /* Needs synchronization with the cleaner */
321 size = blockdev_direct_IO(rw, iocb, inode, iter, offset, 320 size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block);
322 nilfs_get_block);
323 321
324 /* 322 /*
325 * In case of error extending write may have instantiated a few 323 * In case of error extending write may have instantiated a few
326 * blocks outside i_size. Trim these off again. 324 * blocks outside i_size. Trim these off again.
327 */ 325 */
328 if (unlikely((rw & WRITE) && size < 0)) { 326 if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) {
329 loff_t isize = i_size_read(inode); 327 loff_t isize = i_size_read(inode);
330 loff_t end = offset + count; 328 loff_t end = offset + count;
331 329
@@ -443,21 +441,20 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
443void nilfs_set_inode_flags(struct inode *inode) 441void nilfs_set_inode_flags(struct inode *inode)
444{ 442{
445 unsigned int flags = NILFS_I(inode)->i_flags; 443 unsigned int flags = NILFS_I(inode)->i_flags;
444 unsigned int new_fl = 0;
446 445
447 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
448 S_DIRSYNC);
449 if (flags & FS_SYNC_FL) 446 if (flags & FS_SYNC_FL)
450 inode->i_flags |= S_SYNC; 447 new_fl |= S_SYNC;
451 if (flags & FS_APPEND_FL) 448 if (flags & FS_APPEND_FL)
452 inode->i_flags |= S_APPEND; 449 new_fl |= S_APPEND;
453 if (flags & FS_IMMUTABLE_FL) 450 if (flags & FS_IMMUTABLE_FL)
454 inode->i_flags |= S_IMMUTABLE; 451 new_fl |= S_IMMUTABLE;
455 if (flags & FS_NOATIME_FL) 452 if (flags & FS_NOATIME_FL)
456 inode->i_flags |= S_NOATIME; 453 new_fl |= S_NOATIME;
457 if (flags & FS_DIRSYNC_FL) 454 if (flags & FS_DIRSYNC_FL)
458 inode->i_flags |= S_DIRSYNC; 455 new_fl |= S_DIRSYNC;
459 mapping_set_gfp_mask(inode->i_mapping, 456 inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
460 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 457 S_NOATIME | S_DIRSYNC);
461} 458}
462 459
463int nilfs_read_inode_common(struct inode *inode, 460int nilfs_read_inode_common(struct inode *inode,
@@ -542,6 +539,8 @@ static int __nilfs_read_inode(struct super_block *sb,
542 brelse(bh); 539 brelse(bh);
543 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); 540 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
544 nilfs_set_inode_flags(inode); 541 nilfs_set_inode_flags(inode);
542 mapping_set_gfp_mask(inode->i_mapping,
543 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
545 return 0; 544 return 0;
546 545
547 failed_unmap: 546 failed_unmap:
@@ -714,7 +713,7 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
714static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, 713static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
715 unsigned long from) 714 unsigned long from)
716{ 715{
717 unsigned long b; 716 __u64 b;
718 int ret; 717 int ret;
719 718
720 if (!test_bit(NILFS_I_BMAP, &ii->i_state)) 719 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
@@ -729,7 +728,7 @@ repeat:
729 if (b < from) 728 if (b < from)
730 return; 729 return;
731 730
732 b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); 731 b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
733 ret = nilfs_bmap_truncate(ii->i_bmap, b); 732 ret = nilfs_bmap_truncate(ii->i_bmap, b);
734 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); 733 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
735 if (!ret || (ret == -ENOMEM && 734 if (!ret || (ret == -ENOMEM &&
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 892cf5ffdb8e..dee34d990281 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -261,6 +261,60 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
261} 261}
262 262
263/** 263/**
264 * nilfs_mdt_find_block - find and get a buffer on meta data file.
265 * @inode: inode of the meta data file
266 * @start: start block offset (inclusive)
267 * @end: end block offset (inclusive)
268 * @blkoff: block offset
269 * @out_bh: place to store a pointer to buffer_head struct
270 *
271 * nilfs_mdt_find_block() looks up an existing block in range of
272 * [@start, @end] and stores pointer to a buffer head of the block to
273 * @out_bh, and block offset to @blkoff, respectively. @out_bh and
274 * @blkoff are substituted only when zero is returned.
275 *
276 * Return Value: On success, it returns 0. On error, the following negative
277 * error code is returned.
278 *
279 * %-ENOMEM - Insufficient memory available.
280 *
281 * %-EIO - I/O error
282 *
283 * %-ENOENT - no block was found in the range
284 */
285int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
286 unsigned long end, unsigned long *blkoff,
287 struct buffer_head **out_bh)
288{
289 __u64 next;
290 int ret;
291
292 if (unlikely(start > end))
293 return -ENOENT;
294
295 ret = nilfs_mdt_read_block(inode, start, true, out_bh);
296 if (!ret) {
297 *blkoff = start;
298 goto out;
299 }
300 if (unlikely(ret != -ENOENT || start == ULONG_MAX))
301 goto out;
302
303 ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
304 if (!ret) {
305 if (next <= end) {
306 ret = nilfs_mdt_read_block(inode, next, true, out_bh);
307 if (!ret)
308 *blkoff = next;
309 } else {
310 ret = -ENOENT;
311 }
312 }
313out:
314 return ret;
315}
316
317/**
264 * nilfs_mdt_delete_block - make a hole on the meta data file. 318 * nilfs_mdt_delete_block - make a hole on the meta data file.
265 * @inode: inode of the meta data file 319 * @inode: inode of the meta data file
266 * @block: block offset 320 * @block: block offset
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ab172e8549c5..fe529a87a208 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -78,6 +78,9 @@ int nilfs_mdt_get_block(struct inode *, unsigned long, int,
78 void (*init_block)(struct inode *, 78 void (*init_block)(struct inode *,
79 struct buffer_head *, void *), 79 struct buffer_head *, void *),
80 struct buffer_head **); 80 struct buffer_head **);
81int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
82 unsigned long end, unsigned long *blkoff,
83 struct buffer_head **out_bh);
81int nilfs_mdt_delete_block(struct inode *, unsigned long); 84int nilfs_mdt_delete_block(struct inode *, unsigned long);
82int nilfs_mdt_forget_block(struct inode *, unsigned long); 85int nilfs_mdt_forget_block(struct inode *, unsigned long);
83int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); 86int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
@@ -111,7 +114,10 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
111 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno; 114 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 115}
113 116
114#define nilfs_mdt_bgl_lock(inode, bg) \ 117static inline spinlock_t *
115 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) 118nilfs_mdt_bgl_lock(struct inode *inode, unsigned int block_group)
119{
120 return bgl_lock_ptr(NILFS_MDT(inode)->mi_bgl, block_group);
121}
116 122
117#endif /* _NILFS_MDT_H */ 123#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 700ecbcca55d..45d650addd56 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -89,18 +89,16 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
89void nilfs_forget_buffer(struct buffer_head *bh) 89void nilfs_forget_buffer(struct buffer_head *bh)
90{ 90{
91 struct page *page = bh->b_page; 91 struct page *page = bh->b_page;
92 const unsigned long clear_bits =
93 (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
94 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
95 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
92 96
93 lock_buffer(bh); 97 lock_buffer(bh);
94 clear_buffer_nilfs_volatile(bh); 98 set_mask_bits(&bh->b_state, clear_bits, 0);
95 clear_buffer_nilfs_checked(bh);
96 clear_buffer_nilfs_redirected(bh);
97 clear_buffer_async_write(bh);
98 clear_buffer_dirty(bh);
99 if (nilfs_page_buffers_clean(page)) 99 if (nilfs_page_buffers_clean(page))
100 __nilfs_clear_page_dirty(page); 100 __nilfs_clear_page_dirty(page);
101 101
102 clear_buffer_uptodate(bh);
103 clear_buffer_mapped(bh);
104 bh->b_blocknr = -1; 102 bh->b_blocknr = -1;
105 ClearPageUptodate(page); 103 ClearPageUptodate(page);
106 ClearPageMappedToDisk(page); 104 ClearPageMappedToDisk(page);
@@ -421,6 +419,10 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
421 419
422 if (page_has_buffers(page)) { 420 if (page_has_buffers(page)) {
423 struct buffer_head *bh, *head; 421 struct buffer_head *bh, *head;
422 const unsigned long clear_bits =
423 (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
424 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
425 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
424 426
425 bh = head = page_buffers(page); 427 bh = head = page_buffers(page);
426 do { 428 do {
@@ -430,13 +432,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
430 "discard block %llu, size %zu", 432 "discard block %llu, size %zu",
431 (u64)bh->b_blocknr, bh->b_size); 433 (u64)bh->b_blocknr, bh->b_size);
432 } 434 }
433 clear_buffer_async_write(bh); 435 set_mask_bits(&bh->b_state, clear_bits, 0);
434 clear_buffer_dirty(bh);
435 clear_buffer_nilfs_volatile(bh);
436 clear_buffer_nilfs_checked(bh);
437 clear_buffer_nilfs_redirected(bh);
438 clear_buffer_uptodate(bh);
439 clear_buffer_mapped(bh);
440 unlock_buffer(bh); 436 unlock_buffer(bh);
441 } while (bh = bh->b_this_page, bh != head); 437 } while (bh = bh->b_this_page, bh != head);
442 } 438 }
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0c3f303baf32..c6abbad9b8e3 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -24,6 +24,7 @@
24#include <linux/pagemap.h> 24#include <linux/pagemap.h>
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include <linux/writeback.h> 26#include <linux/writeback.h>
27#include <linux/bitops.h>
27#include <linux/bio.h> 28#include <linux/bio.h>
28#include <linux/completion.h> 29#include <linux/completion.h>
29#include <linux/blkdev.h> 30#include <linux/blkdev.h>
@@ -1588,7 +1589,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1588 1589
1589 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1590 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1590 b_assoc_buffers) { 1591 b_assoc_buffers) {
1591 set_buffer_async_write(bh);
1592 if (bh->b_page != bd_page) { 1592 if (bh->b_page != bd_page) {
1593 if (bd_page) { 1593 if (bd_page) {
1594 lock_page(bd_page); 1594 lock_page(bd_page);
@@ -1688,7 +1688,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
1688 list_for_each_entry(segbuf, logs, sb_list) { 1688 list_for_each_entry(segbuf, logs, sb_list) {
1689 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1689 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1690 b_assoc_buffers) { 1690 b_assoc_buffers) {
1691 clear_buffer_async_write(bh);
1692 if (bh->b_page != bd_page) { 1691 if (bh->b_page != bd_page) {
1693 if (bd_page) 1692 if (bd_page)
1694 end_page_writeback(bd_page); 1693 end_page_writeback(bd_page);
@@ -1768,7 +1767,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1768 b_assoc_buffers) { 1767 b_assoc_buffers) {
1769 set_buffer_uptodate(bh); 1768 set_buffer_uptodate(bh);
1770 clear_buffer_dirty(bh); 1769 clear_buffer_dirty(bh);
1771 clear_buffer_async_write(bh);
1772 if (bh->b_page != bd_page) { 1770 if (bh->b_page != bd_page) {
1773 if (bd_page) 1771 if (bd_page)
1774 end_page_writeback(bd_page); 1772 end_page_writeback(bd_page);
@@ -1788,12 +1786,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1788 */ 1786 */
1789 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1787 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1790 b_assoc_buffers) { 1788 b_assoc_buffers) {
1791 set_buffer_uptodate(bh); 1789 const unsigned long set_bits = (1 << BH_Uptodate);
1792 clear_buffer_dirty(bh); 1790 const unsigned long clear_bits =
1793 clear_buffer_async_write(bh); 1791 (1 << BH_Dirty | 1 << BH_Async_Write |
1794 clear_buffer_delay(bh); 1792 1 << BH_Delay | 1 << BH_NILFS_Volatile |
1795 clear_buffer_nilfs_volatile(bh); 1793 1 << BH_NILFS_Redirected);
1796 clear_buffer_nilfs_redirected(bh); 1794
1795 set_mask_bits(&bh->b_state, clear_bits, set_bits);
1797 if (bh == segbuf->sb_super_root) { 1796 if (bh == segbuf->sb_super_root) {
1798 if (bh->b_page != bd_page) { 1797 if (bh->b_page != bd_page) {
1799 end_page_writeback(bd_page); 1798 end_page_writeback(bd_page);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5bc2a1cf73c3..c1725f20a9d1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1020,7 +1020,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
1020 struct dentry *dentry; 1020 struct dentry *dentry;
1021 int ret; 1021 int ret;
1022 1022
1023 if (cno < 0 || cno > nilfs->ns_cno) 1023 if (cno > nilfs->ns_cno)
1024 return false; 1024 return false;
1025 1025
1026 if (cno >= nilfs_last_cno(nilfs)) 1026 if (cno >= nilfs_last_cno(nilfs))
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529511c4..2ff263e6d363 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
8 8
9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ccflags-y := -DNTFS_VERSION=\"2.1.31\" 11ccflags-y := -DNTFS_VERSION=\"2.1.32\"
12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d184dc..7bb487e663b4 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 2 * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. 4 * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
5 * 5 *
6 * This program/include file is free software; you can redistribute it and/or 6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published 7 * modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
28#include <linux/swap.h> 28#include <linux/swap.h>
29#include <linux/uio.h> 29#include <linux/uio.h>
30#include <linux/writeback.h> 30#include <linux/writeback.h>
31#include <linux/aio.h>
32 31
33#include <asm/page.h> 32#include <asm/page.h>
34#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -329,62 +328,166 @@ err_out:
329 return err; 328 return err;
330} 329}
331 330
332/** 331static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
333 * ntfs_fault_in_pages_readable - 332 struct iov_iter *from)
334 *
335 * Fault a number of userspace pages into pagetables.
336 *
337 * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
338 * with more than two userspace pages as well as handling the single page case
339 * elegantly.
340 *
341 * If you find this difficult to understand, then think of the while loop being
342 * the following code, except that we do without the integer variable ret:
343 *
344 * do {
345 * ret = __get_user(c, uaddr);
346 * uaddr += PAGE_SIZE;
347 * } while (!ret && uaddr < end);
348 *
349 * Note, the final __get_user() may well run out-of-bounds of the user buffer,
350 * but _not_ out-of-bounds of the page the user buffer belongs to, and since
351 * this is only a read and not a write, and since it is still in the same page,
352 * it should not matter and this makes the code much simpler.
353 */
354static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
355 int bytes)
356{
357 const char __user *end;
358 volatile char c;
359
360 /* Set @end to the first byte outside the last page we care about. */
361 end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
362
363 while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
364 ;
365}
366
367/**
368 * ntfs_fault_in_pages_readable_iovec -
369 *
370 * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
371 */
372static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
373 size_t iov_ofs, int bytes)
374{ 333{
375 do { 334 loff_t pos;
376 const char __user *buf; 335 s64 end, ll;
377 unsigned len; 336 ssize_t err;
337 unsigned long flags;
338 struct file *file = iocb->ki_filp;
339 struct inode *vi = file_inode(file);
340 ntfs_inode *base_ni, *ni = NTFS_I(vi);
341 ntfs_volume *vol = ni->vol;
378 342
379 buf = iov->iov_base + iov_ofs; 343 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
380 len = iov->iov_len - iov_ofs; 344 "0x%llx, count 0x%zx.", vi->i_ino,
381 if (len > bytes) 345 (unsigned)le32_to_cpu(ni->type),
382 len = bytes; 346 (unsigned long long)iocb->ki_pos,
383 ntfs_fault_in_pages_readable(buf, len); 347 iov_iter_count(from));
384 bytes -= len; 348 err = generic_write_checks(iocb, from);
385 iov++; 349 if (unlikely(err <= 0))
386 iov_ofs = 0; 350 goto out;
387 } while (bytes); 351 /*
352 * All checks have passed. Before we start doing any writing we want
353 * to abort any totally illegal writes.
354 */
355 BUG_ON(NInoMstProtected(ni));
356 BUG_ON(ni->type != AT_DATA);
357 /* If file is encrypted, deny access, just like NT4. */
358 if (NInoEncrypted(ni)) {
359 /* Only $DATA attributes can be encrypted. */
360 /*
361 * Reminder for later: Encrypted files are _always_
362 * non-resident so that the content can always be encrypted.
363 */
364 ntfs_debug("Denying write access to encrypted file.");
365 err = -EACCES;
366 goto out;
367 }
368 if (NInoCompressed(ni)) {
369 /* Only unnamed $DATA attribute can be compressed. */
370 BUG_ON(ni->name_len);
371 /*
372 * Reminder for later: If resident, the data is not actually
373 * compressed. Only on the switch to non-resident does
374 * compression kick in. This is in contrast to encrypted files
375 * (see above).
376 */
377 ntfs_error(vi->i_sb, "Writing to compressed files is not "
378 "implemented yet. Sorry.");
379 err = -EOPNOTSUPP;
380 goto out;
381 }
382 base_ni = ni;
383 if (NInoAttr(ni))
384 base_ni = ni->ext.base_ntfs_ino;
385 err = file_remove_suid(file);
386 if (unlikely(err))
387 goto out;
388 /*
389 * Our ->update_time method always succeeds thus file_update_time()
390 * cannot fail either so there is no need to check the return code.
391 */
392 file_update_time(file);
393 pos = iocb->ki_pos;
394 /* The first byte after the last cluster being written to. */
395 end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
396 ~(u64)vol->cluster_size_mask;
397 /*
398 * If the write goes beyond the allocated size, extend the allocation
399 * to cover the whole of the write, rounded up to the nearest cluster.
400 */
401 read_lock_irqsave(&ni->size_lock, flags);
402 ll = ni->allocated_size;
403 read_unlock_irqrestore(&ni->size_lock, flags);
404 if (end > ll) {
405 /*
406 * Extend the allocation without changing the data size.
407 *
408 * Note we ensure the allocation is big enough to at least
409 * write some data but we do not require the allocation to be
410 * complete, i.e. it may be partial.
411 */
412 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
413 if (likely(ll >= 0)) {
414 BUG_ON(pos >= ll);
415 /* If the extension was partial truncate the write. */
416 if (end > ll) {
417 ntfs_debug("Truncating write to inode 0x%lx, "
418 "attribute type 0x%x, because "
419 "the allocation was only "
420 "partially extended.",
421 vi->i_ino, (unsigned)
422 le32_to_cpu(ni->type));
423 iov_iter_truncate(from, ll - pos);
424 }
425 } else {
426 err = ll;
427 read_lock_irqsave(&ni->size_lock, flags);
428 ll = ni->allocated_size;
429 read_unlock_irqrestore(&ni->size_lock, flags);
430 /* Perform a partial write if possible or fail. */
431 if (pos < ll) {
432 ntfs_debug("Truncating write to inode 0x%lx "
433 "attribute type 0x%x, because "
434 "extending the allocation "
435 "failed (error %d).",
436 vi->i_ino, (unsigned)
437 le32_to_cpu(ni->type),
438 (int)-err);
439 iov_iter_truncate(from, ll - pos);
440 } else {
441 if (err != -ENOSPC)
442 ntfs_error(vi->i_sb, "Cannot perform "
443 "write to inode "
444 "0x%lx, attribute "
445 "type 0x%x, because "
446 "extending the "
447 "allocation failed "
448 "(error %ld).",
449 vi->i_ino, (unsigned)
450 le32_to_cpu(ni->type),
451 (long)-err);
452 else
453 ntfs_debug("Cannot perform write to "
454 "inode 0x%lx, "
455 "attribute type 0x%x, "
456 "because there is not "
457 "space left.",
458 vi->i_ino, (unsigned)
459 le32_to_cpu(ni->type));
460 goto out;
461 }
462 }
463 }
464 /*
465 * If the write starts beyond the initialized size, extend it up to the
466 * beginning of the write and initialize all non-sparse space between
467 * the old initialized size and the new one. This automatically also
468 * increments the vfs inode->i_size to keep it above or equal to the
469 * initialized_size.
470 */
471 read_lock_irqsave(&ni->size_lock, flags);
472 ll = ni->initialized_size;
473 read_unlock_irqrestore(&ni->size_lock, flags);
474 if (pos > ll) {
475 /*
476 * Wait for ongoing direct i/o to complete before proceeding.
477 * New direct i/o cannot start as we hold i_mutex.
478 */
479 inode_dio_wait(vi);
480 err = ntfs_attr_extend_initialized(ni, pos);
481 if (unlikely(err < 0))
482 ntfs_error(vi->i_sb, "Cannot perform write to inode "
483 "0x%lx, attribute type 0x%x, because "
484 "extending the initialized size "
485 "failed (error %d).", vi->i_ino,
486 (unsigned)le32_to_cpu(ni->type),
487 (int)-err);
488 }
489out:
490 return err;
388} 491}
389 492
390/** 493/**
@@ -421,8 +524,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
421 goto err_out; 524 goto err_out;
422 } 525 }
423 } 526 }
424 err = add_to_page_cache_lru(*cached_page, mapping, index, 527 err = add_to_page_cache_lru(*cached_page, mapping,
425 GFP_KERNEL); 528 index, GFP_KERNEL);
426 if (unlikely(err)) { 529 if (unlikely(err)) {
427 if (err == -EEXIST) 530 if (err == -EEXIST)
428 continue; 531 continue;
@@ -1268,180 +1371,6 @@ rl_not_mapped_enoent:
1268 return err; 1371 return err;
1269} 1372}
1270 1373
1271/*
1272 * Copy as much as we can into the pages and return the number of bytes which
1273 * were successfully copied. If a fault is encountered then clear the pages
1274 * out to (ofs + bytes) and return the number of bytes which were copied.
1275 */
1276static inline size_t ntfs_copy_from_user(struct page **pages,
1277 unsigned nr_pages, unsigned ofs, const char __user *buf,
1278 size_t bytes)
1279{
1280 struct page **last_page = pages + nr_pages;
1281 char *addr;
1282 size_t total = 0;
1283 unsigned len;
1284 int left;
1285
1286 do {
1287 len = PAGE_CACHE_SIZE - ofs;
1288 if (len > bytes)
1289 len = bytes;
1290 addr = kmap_atomic(*pages);
1291 left = __copy_from_user_inatomic(addr + ofs, buf, len);
1292 kunmap_atomic(addr);
1293 if (unlikely(left)) {
1294 /* Do it the slow way. */
1295 addr = kmap(*pages);
1296 left = __copy_from_user(addr + ofs, buf, len);
1297 kunmap(*pages);
1298 if (unlikely(left))
1299 goto err_out;
1300 }
1301 total += len;
1302 bytes -= len;
1303 if (!bytes)
1304 break;
1305 buf += len;
1306 ofs = 0;
1307 } while (++pages < last_page);
1308out:
1309 return total;
1310err_out:
1311 total += len - left;
1312 /* Zero the rest of the target like __copy_from_user(). */
1313 while (++pages < last_page) {
1314 bytes -= len;
1315 if (!bytes)
1316 break;
1317 len = PAGE_CACHE_SIZE;
1318 if (len > bytes)
1319 len = bytes;
1320 zero_user(*pages, 0, len);
1321 }
1322 goto out;
1323}
1324
1325static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
1326 const struct iovec *iov, size_t iov_ofs, size_t bytes)
1327{
1328 size_t total = 0;
1329
1330 while (1) {
1331 const char __user *buf = iov->iov_base + iov_ofs;
1332 unsigned len;
1333 size_t left;
1334
1335 len = iov->iov_len - iov_ofs;
1336 if (len > bytes)
1337 len = bytes;
1338 left = __copy_from_user_inatomic(vaddr, buf, len);
1339 total += len;
1340 bytes -= len;
1341 vaddr += len;
1342 if (unlikely(left)) {
1343 total -= left;
1344 break;
1345 }
1346 if (!bytes)
1347 break;
1348 iov++;
1349 iov_ofs = 0;
1350 }
1351 return total;
1352}
1353
1354static inline void ntfs_set_next_iovec(const struct iovec **iovp,
1355 size_t *iov_ofsp, size_t bytes)
1356{
1357 const struct iovec *iov = *iovp;
1358 size_t iov_ofs = *iov_ofsp;
1359
1360 while (bytes) {
1361 unsigned len;
1362
1363 len = iov->iov_len - iov_ofs;
1364 if (len > bytes)
1365 len = bytes;
1366 bytes -= len;
1367 iov_ofs += len;
1368 if (iov->iov_len == iov_ofs) {
1369 iov++;
1370 iov_ofs = 0;
1371 }
1372 }
1373 *iovp = iov;
1374 *iov_ofsp = iov_ofs;
1375}
1376
1377/*
1378 * This has the same side-effects and return value as ntfs_copy_from_user().
1379 * The difference is that on a fault we need to memset the remainder of the
1380 * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
1381 * single-segment behaviour.
1382 *
1383 * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
1384 * atomic and when not atomic. This is ok because it calls
1385 * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
1386 * fact, the only difference between __copy_from_user_inatomic() and
1387 * __copy_from_user() is that the latter calls might_sleep() and the former
1388 * should not zero the tail of the buffer on error. And on many architectures
1389 * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
1390 * makes no difference at all on those architectures.
1391 */
1392static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
1393 unsigned nr_pages, unsigned ofs, const struct iovec **iov,
1394 size_t *iov_ofs, size_t bytes)
1395{
1396 struct page **last_page = pages + nr_pages;
1397 char *addr;
1398 size_t copied, len, total = 0;
1399
1400 do {
1401 len = PAGE_CACHE_SIZE - ofs;
1402 if (len > bytes)
1403 len = bytes;
1404 addr = kmap_atomic(*pages);
1405 copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
1406 *iov, *iov_ofs, len);
1407 kunmap_atomic(addr);
1408 if (unlikely(copied != len)) {
1409 /* Do it the slow way. */
1410 addr = kmap(*pages);
1411 copied = __ntfs_copy_from_user_iovec_inatomic(addr +
1412 ofs, *iov, *iov_ofs, len);
1413 if (unlikely(copied != len))
1414 goto err_out;
1415 kunmap(*pages);
1416 }
1417 total += len;
1418 ntfs_set_next_iovec(iov, iov_ofs, len);
1419 bytes -= len;
1420 if (!bytes)
1421 break;
1422 ofs = 0;
1423 } while (++pages < last_page);
1424out:
1425 return total;
1426err_out:
1427 BUG_ON(copied > len);
1428 /* Zero the rest of the target like __copy_from_user(). */
1429 memset(addr + ofs + copied, 0, len - copied);
1430 kunmap(*pages);
1431 total += copied;
1432 ntfs_set_next_iovec(iov, iov_ofs, copied);
1433 while (++pages < last_page) {
1434 bytes -= len;
1435 if (!bytes)
1436 break;
1437 len = PAGE_CACHE_SIZE;
1438 if (len > bytes)
1439 len = bytes;
1440 zero_user(*pages, 0, len);
1441 }
1442 goto out;
1443}
1444
1445static inline void ntfs_flush_dcache_pages(struct page **pages, 1374static inline void ntfs_flush_dcache_pages(struct page **pages,
1446 unsigned nr_pages) 1375 unsigned nr_pages)
1447{ 1376{
@@ -1762,86 +1691,83 @@ err_out:
1762 return err; 1691 return err;
1763} 1692}
1764 1693
1765static void ntfs_write_failed(struct address_space *mapping, loff_t to) 1694/*
1695 * Copy as much as we can into the pages and return the number of bytes which
1696 * were successfully copied. If a fault is encountered then clear the pages
1697 * out to (ofs + bytes) and return the number of bytes which were copied.
1698 */
1699static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
1700 unsigned ofs, struct iov_iter *i, size_t bytes)
1766{ 1701{
1767 struct inode *inode = mapping->host; 1702 struct page **last_page = pages + nr_pages;
1703 size_t total = 0;
1704 struct iov_iter data = *i;
1705 unsigned len, copied;
1768 1706
1769 if (to > inode->i_size) { 1707 do {
1770 truncate_pagecache(inode, inode->i_size); 1708 len = PAGE_CACHE_SIZE - ofs;
1771 ntfs_truncate_vfs(inode); 1709 if (len > bytes)
1772 } 1710 len = bytes;
1711 copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
1712 len);
1713 total += copied;
1714 bytes -= copied;
1715 if (!bytes)
1716 break;
1717 iov_iter_advance(&data, copied);
1718 if (copied < len)
1719 goto err;
1720 ofs = 0;
1721 } while (++pages < last_page);
1722out:
1723 return total;
1724err:
1725 /* Zero the rest of the target like __copy_from_user(). */
1726 len = PAGE_CACHE_SIZE - copied;
1727 do {
1728 if (len > bytes)
1729 len = bytes;
1730 zero_user(*pages, copied, len);
1731 bytes -= len;
1732 copied = 0;
1733 len = PAGE_CACHE_SIZE;
1734 } while (++pages < last_page);
1735 goto out;
1773} 1736}
1774 1737
1775/** 1738/**
1776 * ntfs_file_buffered_write - 1739 * ntfs_perform_write - perform buffered write to a file
1777 * 1740 * @file: file to write to
1778 * Locking: The vfs is holding ->i_mutex on the inode. 1741 * @i: iov_iter with data to write
1742 * @pos: byte offset in file at which to begin writing to
1779 */ 1743 */
1780static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, 1744static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
1781 const struct iovec *iov, unsigned long nr_segs, 1745 loff_t pos)
1782 loff_t pos, loff_t *ppos, size_t count)
1783{ 1746{
1784 struct file *file = iocb->ki_filp;
1785 struct address_space *mapping = file->f_mapping; 1747 struct address_space *mapping = file->f_mapping;
1786 struct inode *vi = mapping->host; 1748 struct inode *vi = mapping->host;
1787 ntfs_inode *ni = NTFS_I(vi); 1749 ntfs_inode *ni = NTFS_I(vi);
1788 ntfs_volume *vol = ni->vol; 1750 ntfs_volume *vol = ni->vol;
1789 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1751 struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
1790 struct page *cached_page = NULL; 1752 struct page *cached_page = NULL;
1791 char __user *buf = NULL;
1792 s64 end, ll;
1793 VCN last_vcn; 1753 VCN last_vcn;
1794 LCN lcn; 1754 LCN lcn;
1795 unsigned long flags; 1755 size_t bytes;
1796 size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */ 1756 ssize_t status, written = 0;
1797 ssize_t status, written;
1798 unsigned nr_pages; 1757 unsigned nr_pages;
1799 int err;
1800 1758
1801 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 1759 ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
1802 "pos 0x%llx, count 0x%lx.", 1760 "0x%llx, count 0x%lx.", vi->i_ino,
1803 vi->i_ino, (unsigned)le32_to_cpu(ni->type), 1761 (unsigned)le32_to_cpu(ni->type),
1804 (unsigned long long)pos, (unsigned long)count); 1762 (unsigned long long)pos,
1805 if (unlikely(!count)) 1763 (unsigned long)iov_iter_count(i));
1806 return 0;
1807 BUG_ON(NInoMstProtected(ni));
1808 /*
1809 * If the attribute is not an index root and it is encrypted or
1810 * compressed, we cannot write to it yet. Note we need to check for
1811 * AT_INDEX_ALLOCATION since this is the type of both directory and
1812 * index inodes.
1813 */
1814 if (ni->type != AT_INDEX_ALLOCATION) {
1815 /* If file is encrypted, deny access, just like NT4. */
1816 if (NInoEncrypted(ni)) {
1817 /*
1818 * Reminder for later: Encrypted files are _always_
1819 * non-resident so that the content can always be
1820 * encrypted.
1821 */
1822 ntfs_debug("Denying write access to encrypted file.");
1823 return -EACCES;
1824 }
1825 if (NInoCompressed(ni)) {
1826 /* Only unnamed $DATA attribute can be compressed. */
1827 BUG_ON(ni->type != AT_DATA);
1828 BUG_ON(ni->name_len);
1829 /*
1830 * Reminder for later: If resident, the data is not
1831 * actually compressed. Only on the switch to non-
1832 * resident does compression kick in. This is in
1833 * contrast to encrypted files (see above).
1834 */
1835 ntfs_error(vi->i_sb, "Writing to compressed files is "
1836 "not implemented yet. Sorry.");
1837 return -EOPNOTSUPP;
1838 }
1839 }
1840 /* 1764 /*
1841 * If a previous ntfs_truncate() failed, repeat it and abort if it 1765 * If a previous ntfs_truncate() failed, repeat it and abort if it
1842 * fails again. 1766 * fails again.
1843 */ 1767 */
1844 if (unlikely(NInoTruncateFailed(ni))) { 1768 if (unlikely(NInoTruncateFailed(ni))) {
1769 int err;
1770
1845 inode_dio_wait(vi); 1771 inode_dio_wait(vi);
1846 err = ntfs_truncate(vi); 1772 err = ntfs_truncate(vi);
1847 if (err || NInoTruncateFailed(ni)) { 1773 if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1781,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1855 return err; 1781 return err;
1856 } 1782 }
1857 } 1783 }
1858 /* The first byte after the write. */
1859 end = pos + count;
1860 /*
1861 * If the write goes beyond the allocated size, extend the allocation
1862 * to cover the whole of the write, rounded up to the nearest cluster.
1863 */
1864 read_lock_irqsave(&ni->size_lock, flags);
1865 ll = ni->allocated_size;
1866 read_unlock_irqrestore(&ni->size_lock, flags);
1867 if (end > ll) {
1868 /* Extend the allocation without changing the data size. */
1869 ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
1870 if (likely(ll >= 0)) {
1871 BUG_ON(pos >= ll);
1872 /* If the extension was partial truncate the write. */
1873 if (end > ll) {
1874 ntfs_debug("Truncating write to inode 0x%lx, "
1875 "attribute type 0x%x, because "
1876 "the allocation was only "
1877 "partially extended.",
1878 vi->i_ino, (unsigned)
1879 le32_to_cpu(ni->type));
1880 end = ll;
1881 count = ll - pos;
1882 }
1883 } else {
1884 err = ll;
1885 read_lock_irqsave(&ni->size_lock, flags);
1886 ll = ni->allocated_size;
1887 read_unlock_irqrestore(&ni->size_lock, flags);
1888 /* Perform a partial write if possible or fail. */
1889 if (pos < ll) {
1890 ntfs_debug("Truncating write to inode 0x%lx, "
1891 "attribute type 0x%x, because "
1892 "extending the allocation "
1893 "failed (error code %i).",
1894 vi->i_ino, (unsigned)
1895 le32_to_cpu(ni->type), err);
1896 end = ll;
1897 count = ll - pos;
1898 } else {
1899 ntfs_error(vol->sb, "Cannot perform write to "
1900 "inode 0x%lx, attribute type "
1901 "0x%x, because extending the "
1902 "allocation failed (error "
1903 "code %i).", vi->i_ino,
1904 (unsigned)
1905 le32_to_cpu(ni->type), err);
1906 return err;
1907 }
1908 }
1909 }
1910 written = 0;
1911 /*
1912 * If the write starts beyond the initialized size, extend it up to the
1913 * beginning of the write and initialize all non-sparse space between
1914 * the old initialized size and the new one. This automatically also
1915 * increments the vfs inode->i_size to keep it above or equal to the
1916 * initialized_size.
1917 */
1918 read_lock_irqsave(&ni->size_lock, flags);
1919 ll = ni->initialized_size;
1920 read_unlock_irqrestore(&ni->size_lock, flags);
1921 if (pos > ll) {
1922 err = ntfs_attr_extend_initialized(ni, pos);
1923 if (err < 0) {
1924 ntfs_error(vol->sb, "Cannot perform write to inode "
1925 "0x%lx, attribute type 0x%x, because "
1926 "extending the initialized size "
1927 "failed (error code %i).", vi->i_ino,
1928 (unsigned)le32_to_cpu(ni->type), err);
1929 status = err;
1930 goto err_out;
1931 }
1932 }
1933 /* 1784 /*
1934 * Determine the number of pages per cluster for non-resident 1785 * Determine the number of pages per cluster for non-resident
1935 * attributes. 1786 * attributes.
@@ -1937,10 +1788,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1937 nr_pages = 1; 1788 nr_pages = 1;
1938 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni)) 1789 if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
1939 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT; 1790 nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
1940 /* Finally, perform the actual write. */
1941 last_vcn = -1; 1791 last_vcn = -1;
1942 if (likely(nr_segs == 1))
1943 buf = iov->iov_base;
1944 do { 1792 do {
1945 VCN vcn; 1793 VCN vcn;
1946 pgoff_t idx, start_idx; 1794 pgoff_t idx, start_idx;
@@ -1965,10 +1813,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1965 vol->cluster_size_bits, false); 1813 vol->cluster_size_bits, false);
1966 up_read(&ni->runlist.lock); 1814 up_read(&ni->runlist.lock);
1967 if (unlikely(lcn < LCN_HOLE)) { 1815 if (unlikely(lcn < LCN_HOLE)) {
1968 status = -EIO;
1969 if (lcn == LCN_ENOMEM) 1816 if (lcn == LCN_ENOMEM)
1970 status = -ENOMEM; 1817 status = -ENOMEM;
1971 else 1818 else {
1819 status = -EIO;
1972 ntfs_error(vol->sb, "Cannot " 1820 ntfs_error(vol->sb, "Cannot "
1973 "perform write to " 1821 "perform write to "
1974 "inode 0x%lx, " 1822 "inode 0x%lx, "
@@ -1977,6 +1825,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1977 "is corrupt.", 1825 "is corrupt.",
1978 vi->i_ino, (unsigned) 1826 vi->i_ino, (unsigned)
1979 le32_to_cpu(ni->type)); 1827 le32_to_cpu(ni->type));
1828 }
1980 break; 1829 break;
1981 } 1830 }
1982 if (lcn == LCN_HOLE) { 1831 if (lcn == LCN_HOLE) {
@@ -1989,8 +1838,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1989 } 1838 }
1990 } 1839 }
1991 } 1840 }
1992 if (bytes > count) 1841 if (bytes > iov_iter_count(i))
1993 bytes = count; 1842 bytes = iov_iter_count(i);
1843again:
1994 /* 1844 /*
1995 * Bring in the user page(s) that we will copy from _first_. 1845 * Bring in the user page(s) that we will copy from _first_.
1996 * Otherwise there is a nasty deadlock on copying from the same 1846 * Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1849,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1999 * pages being swapped out between us bringing them into memory 1849 * pages being swapped out between us bringing them into memory
2000 * and doing the actual copying. 1850 * and doing the actual copying.
2001 */ 1851 */
2002 if (likely(nr_segs == 1)) 1852 if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
2003 ntfs_fault_in_pages_readable(buf, bytes); 1853 status = -EFAULT;
2004 else 1854 break;
2005 ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes); 1855 }
2006 /* Get and lock @do_pages starting at index @start_idx. */ 1856 /* Get and lock @do_pages starting at index @start_idx. */
2007 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1857 status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
2008 pages, &cached_page); 1858 pages, &cached_page);
@@ -2018,56 +1868,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
2018 status = ntfs_prepare_pages_for_non_resident_write( 1868 status = ntfs_prepare_pages_for_non_resident_write(
2019 pages, do_pages, pos, bytes); 1869 pages, do_pages, pos, bytes);
2020 if (unlikely(status)) { 1870 if (unlikely(status)) {
2021 loff_t i_size;
2022
2023 do { 1871 do {
2024 unlock_page(pages[--do_pages]); 1872 unlock_page(pages[--do_pages]);
2025 page_cache_release(pages[do_pages]); 1873 page_cache_release(pages[do_pages]);
2026 } while (do_pages); 1874 } while (do_pages);
2027 /*
2028 * The write preparation may have instantiated
2029 * allocated space outside i_size. Trim this
2030 * off again. We can ignore any errors in this
2031 * case as we will just be waisting a bit of
2032 * allocated space, which is not a disaster.
2033 */
2034 i_size = i_size_read(vi);
2035 if (pos + bytes > i_size) {
2036 ntfs_write_failed(mapping, pos + bytes);
2037 }
2038 break; 1875 break;
2039 } 1876 }
2040 } 1877 }
2041 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index; 1878 u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
2042 if (likely(nr_segs == 1)) { 1879 copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
2043 copied = ntfs_copy_from_user(pages + u, do_pages - u, 1880 i, bytes);
2044 ofs, buf, bytes);
2045 buf += copied;
2046 } else
2047 copied = ntfs_copy_from_user_iovec(pages + u,
2048 do_pages - u, ofs, &iov, &iov_ofs,
2049 bytes);
2050 ntfs_flush_dcache_pages(pages + u, do_pages - u); 1881 ntfs_flush_dcache_pages(pages + u, do_pages - u);
2051 status = ntfs_commit_pages_after_write(pages, do_pages, pos, 1882 status = 0;
2052 bytes); 1883 if (likely(copied == bytes)) {
2053 if (likely(!status)) { 1884 status = ntfs_commit_pages_after_write(pages, do_pages,
2054 written += copied; 1885 pos, bytes);
2055 count -= copied; 1886 if (!status)
2056 pos += copied; 1887 status = bytes;
2057 if (unlikely(copied != bytes))
2058 status = -EFAULT;
2059 } 1888 }
2060 do { 1889 do {
2061 unlock_page(pages[--do_pages]); 1890 unlock_page(pages[--do_pages]);
2062 page_cache_release(pages[do_pages]); 1891 page_cache_release(pages[do_pages]);
2063 } while (do_pages); 1892 } while (do_pages);
2064 if (unlikely(status)) 1893 if (unlikely(status < 0))
2065 break; 1894 break;
2066 balance_dirty_pages_ratelimited(mapping); 1895 copied = status;
2067 cond_resched(); 1896 cond_resched();
2068 } while (count); 1897 if (unlikely(!copied)) {
2069err_out: 1898 size_t sc;
2070 *ppos = pos; 1899
1900 /*
1901 * We failed to copy anything. Fall back to single
1902 * segment length write.
1903 *
1904 * This is needed to avoid possible livelock in the
1905 * case that all segments in the iov cannot be copied
1906 * at once without a pagefault.
1907 */
1908 sc = iov_iter_single_seg_count(i);
1909 if (bytes > sc)
1910 bytes = sc;
1911 goto again;
1912 }
1913 iov_iter_advance(i, copied);
1914 pos += copied;
1915 written += copied;
1916 balance_dirty_pages_ratelimited(mapping);
1917 if (fatal_signal_pending(current)) {
1918 status = -EINTR;
1919 break;
1920 }
1921 } while (iov_iter_count(i));
2071 if (cached_page) 1922 if (cached_page)
2072 page_cache_release(cached_page); 1923 page_cache_release(cached_page);
2073 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1924 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
@@ -2077,63 +1928,36 @@ err_out:
2077} 1928}
2078 1929
2079/** 1930/**
2080 * ntfs_file_aio_write_nolock - 1931 * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
1932 * @iocb: IO state structure
1933 * @from: iov_iter with data to write
1934 *
1935 * Basically the same as generic_file_write_iter() except that it ends up
1936 * up calling ntfs_perform_write() instead of generic_perform_write() and that
1937 * O_DIRECT is not implemented.
2081 */ 1938 */
2082static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, 1939static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2083 const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
2084{ 1940{
2085 struct file *file = iocb->ki_filp; 1941 struct file *file = iocb->ki_filp;
2086 struct address_space *mapping = file->f_mapping; 1942 struct inode *vi = file_inode(file);
2087 struct inode *inode = mapping->host; 1943 ssize_t written = 0;
2088 loff_t pos; 1944 ssize_t err;
2089 size_t count; /* after file limit checks */
2090 ssize_t written, err;
2091 1945
2092 count = iov_length(iov, nr_segs); 1946 mutex_lock(&vi->i_mutex);
2093 pos = *ppos;
2094 /* We can write back this queue in page reclaim. */ 1947 /* We can write back this queue in page reclaim. */
2095 current->backing_dev_info = inode_to_bdi(inode); 1948 current->backing_dev_info = inode_to_bdi(vi);
2096 written = 0; 1949 err = ntfs_prepare_file_for_write(iocb, from);
2097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 1950 if (iov_iter_count(from) && !err)
2098 if (err) 1951 written = ntfs_perform_write(file, from, iocb->ki_pos);
2099 goto out;
2100 if (!count)
2101 goto out;
2102 err = file_remove_suid(file);
2103 if (err)
2104 goto out;
2105 err = file_update_time(file);
2106 if (err)
2107 goto out;
2108 written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
2109 count);
2110out:
2111 current->backing_dev_info = NULL; 1952 current->backing_dev_info = NULL;
2112 return written ? written : err; 1953 mutex_unlock(&vi->i_mutex);
2113} 1954 if (likely(written > 0)) {
2114 1955 err = generic_write_sync(file, iocb->ki_pos, written);
2115/**
2116 * ntfs_file_aio_write -
2117 */
2118static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2119 unsigned long nr_segs, loff_t pos)
2120{
2121 struct file *file = iocb->ki_filp;
2122 struct address_space *mapping = file->f_mapping;
2123 struct inode *inode = mapping->host;
2124 ssize_t ret;
2125
2126 BUG_ON(iocb->ki_pos != pos);
2127
2128 mutex_lock(&inode->i_mutex);
2129 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2130 mutex_unlock(&inode->i_mutex);
2131 if (ret > 0) {
2132 int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
2133 if (err < 0) 1956 if (err < 0)
2134 ret = err; 1957 written = 0;
2135 } 1958 }
2136 return ret; 1959 iocb->ki_pos += written;
1960 return written ? written : err;
2137} 1961}
2138 1962
2139/** 1963/**
@@ -2197,37 +2021,15 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2197#endif /* NTFS_RW */ 2021#endif /* NTFS_RW */
2198 2022
2199const struct file_operations ntfs_file_ops = { 2023const struct file_operations ntfs_file_ops = {
2200 .llseek = generic_file_llseek, /* Seek inside file. */ 2024 .llseek = generic_file_llseek,
2201 .read = new_sync_read, /* Read from file. */ 2025 .read_iter = generic_file_read_iter,
2202 .read_iter = generic_file_read_iter, /* Async read from file. */
2203#ifdef NTFS_RW 2026#ifdef NTFS_RW
2204 .write = do_sync_write, /* Write to file. */ 2027 .write_iter = ntfs_file_write_iter,
2205 .aio_write = ntfs_file_aio_write, /* Async write to file. */ 2028 .fsync = ntfs_file_fsync,
2206 /*.release = ,*/ /* Last file is closed. See
2207 fs/ext2/file.c::
2208 ext2_release_file() for
2209 how to use this to discard
2210 preallocated space for
2211 write opened files. */
2212 .fsync = ntfs_file_fsync, /* Sync a file to disk. */
2213 /*.aio_fsync = ,*/ /* Sync all outstanding async
2214 i/o operations on a
2215 kiocb. */
2216#endif /* NTFS_RW */ 2029#endif /* NTFS_RW */
2217 /*.ioctl = ,*/ /* Perform function on the 2030 .mmap = generic_file_mmap,
2218 mounted filesystem. */ 2031 .open = ntfs_file_open,
2219 .mmap = generic_file_mmap, /* Mmap file. */ 2032 .splice_read = generic_file_splice_read,
2220 .open = ntfs_file_open, /* Open file. */
2221 .splice_read = generic_file_splice_read /* Zero-copy data send with
2222 the data source being on
2223 the ntfs partition. We do
2224 not need to care about the
2225 data destination. */
2226 /*.sendpage = ,*/ /* Zero-copy data send with
2227 the data destination being
2228 on the ntfs partition. We
2229 do not need to care about
2230 the data source. */
2231}; 2033};
2232 2034
2233const struct inode_operations ntfs_file_inode_ops = { 2035const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b9949d363..1d0c21df0d80 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
28#include <linux/quotaops.h> 28#include <linux/quotaops.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/log2.h> 30#include <linux/log2.h>
31#include <linux/aio.h>
32 31
33#include "aops.h" 32#include "aops.h"
34#include "attrib.h" 33#include "attrib.h"
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 044158bd22be..2d7f76e52c37 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3370 ret = ocfs2_get_right_path(et, left_path, &right_path); 3370 ret = ocfs2_get_right_path(et, left_path, &right_path);
3371 if (ret) { 3371 if (ret) {
3372 mlog_errno(ret); 3372 mlog_errno(ret);
3373 goto out; 3373 return ret;
3374 } 3374 }
3375 3375
3376 right_el = path_leaf_el(right_path); 3376 right_el = path_leaf_el(right_path);
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3453 subtree_index); 3453 subtree_index);
3454 } 3454 }
3455out: 3455out:
3456 if (right_path) 3456 ocfs2_free_path(right_path);
3457 ocfs2_free_path(right_path);
3458 return ret; 3457 return ret;
3459} 3458}
3460 3459
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3536 ret = ocfs2_get_left_path(et, right_path, &left_path); 3535 ret = ocfs2_get_left_path(et, right_path, &left_path);
3537 if (ret) { 3536 if (ret) {
3538 mlog_errno(ret); 3537 mlog_errno(ret);
3539 goto out; 3538 return ret;
3540 } 3539 }
3541 3540
3542 left_el = path_leaf_el(left_path); 3541 left_el = path_leaf_el(left_path);
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3647 right_path, subtree_index); 3646 right_path, subtree_index);
3648 } 3647 }
3649out: 3648out:
3650 if (left_path) 3649 ocfs2_free_path(left_path);
3651 ocfs2_free_path(left_path);
3652 return ret; 3650 return ret;
3653} 3651}
3654 3652
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4334 } else if (path->p_tree_depth > 0) { 4332 } else if (path->p_tree_depth > 0) {
4335 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos); 4333 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
4336 if (status) 4334 if (status)
4337 goto out; 4335 goto exit;
4338 4336
4339 if (left_cpos != 0) { 4337 if (left_cpos != 0) {
4340 left_path = ocfs2_new_path_from_path(path); 4338 left_path = ocfs2_new_path_from_path(path);
4341 if (!left_path) 4339 if (!left_path)
4342 goto out; 4340 goto exit;
4343 4341
4344 status = ocfs2_find_path(et->et_ci, left_path, 4342 status = ocfs2_find_path(et->et_ci, left_path,
4345 left_cpos); 4343 left_cpos);
4346 if (status) 4344 if (status)
4347 goto out; 4345 goto free_left_path;
4348 4346
4349 new_el = path_leaf_el(left_path); 4347 new_el = path_leaf_el(left_path);
4350 4348
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4361 le16_to_cpu(new_el->l_next_free_rec), 4359 le16_to_cpu(new_el->l_next_free_rec),
4362 le16_to_cpu(new_el->l_count)); 4360 le16_to_cpu(new_el->l_count));
4363 status = -EINVAL; 4361 status = -EINVAL;
4364 goto out; 4362 goto free_left_path;
4365 } 4363 }
4366 rec = &new_el->l_recs[ 4364 rec = &new_el->l_recs[
4367 le16_to_cpu(new_el->l_next_free_rec) - 1]; 4365 le16_to_cpu(new_el->l_next_free_rec) - 1];
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4388 path->p_tree_depth > 0) { 4386 path->p_tree_depth > 0) {
4389 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos); 4387 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
4390 if (status) 4388 if (status)
4391 goto out; 4389 goto free_left_path;
4392 4390
4393 if (right_cpos == 0) 4391 if (right_cpos == 0)
4394 goto out; 4392 goto free_left_path;
4395 4393
4396 right_path = ocfs2_new_path_from_path(path); 4394 right_path = ocfs2_new_path_from_path(path);
4397 if (!right_path) 4395 if (!right_path)
4398 goto out; 4396 goto free_left_path;
4399 4397
4400 status = ocfs2_find_path(et->et_ci, right_path, right_cpos); 4398 status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
4401 if (status) 4399 if (status)
4402 goto out; 4400 goto free_right_path;
4403 4401
4404 new_el = path_leaf_el(right_path); 4402 new_el = path_leaf_el(right_path);
4405 rec = &new_el->l_recs[0]; 4403 rec = &new_el->l_recs[0];
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4413 (unsigned long long)le64_to_cpu(eb->h_blkno), 4411 (unsigned long long)le64_to_cpu(eb->h_blkno),
4414 le16_to_cpu(new_el->l_next_free_rec)); 4412 le16_to_cpu(new_el->l_next_free_rec));
4415 status = -EINVAL; 4413 status = -EINVAL;
4416 goto out; 4414 goto free_right_path;
4417 } 4415 }
4418 rec = &new_el->l_recs[1]; 4416 rec = &new_el->l_recs[1];
4419 } 4417 }
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4430 ret = contig_type; 4428 ret = contig_type;
4431 } 4429 }
4432 4430
4433out: 4431free_right_path:
4434 if (left_path) 4432 ocfs2_free_path(right_path);
4435 ocfs2_free_path(left_path); 4433free_left_path:
4436 if (right_path) 4434 ocfs2_free_path(left_path);
4437 ocfs2_free_path(right_path); 4435exit:
4438
4439 return ret; 4436 return ret;
4440} 4437}
4441 4438
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6858 if (pages == NULL) { 6855 if (pages == NULL) {
6859 ret = -ENOMEM; 6856 ret = -ENOMEM;
6860 mlog_errno(ret); 6857 mlog_errno(ret);
6861 goto out; 6858 return ret;
6862 } 6859 }
6863 6860
6864 ret = ocfs2_reserve_clusters(osb, 1, &data_ac); 6861 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
6865 if (ret) { 6862 if (ret) {
6866 mlog_errno(ret); 6863 mlog_errno(ret);
6867 goto out; 6864 goto free_pages;
6868 } 6865 }
6869 } 6866 }
6870 6867
@@ -6996,9 +6993,8 @@ out_commit:
6996out: 6993out:
6997 if (data_ac) 6994 if (data_ac)
6998 ocfs2_free_alloc_context(data_ac); 6995 ocfs2_free_alloc_context(data_ac);
6999 if (pages) 6996free_pages:
7000 kfree(pages); 6997 kfree(pages);
7001
7002 return ret; 6998 return ret;
7003} 6999}
7004 7000
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 44db1808cdb5..f906a250da6a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/uio.h>
32 33
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
34 35
@@ -663,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb,
663 return 0; 664 return 0;
664} 665}
665 666
667static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
668 struct inode *inode, loff_t offset,
669 u64 zero_len, int cluster_align)
670{
671 u32 p_cpos = 0;
672 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
673 unsigned int num_clusters = 0;
674 unsigned int ext_flags = 0;
675 int ret = 0;
676
677 if (offset <= i_size_read(inode) || cluster_align)
678 return 0;
679
680 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
681 &ext_flags);
682 if (ret < 0) {
683 mlog_errno(ret);
684 return ret;
685 }
686
687 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
688 u64 s = i_size_read(inode);
689 sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
690 (do_div(s, osb->s_clustersize) >> 9);
691
692 ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
693 zero_len >> 9, GFP_NOFS, false);
694 if (ret < 0)
695 mlog_errno(ret);
696 }
697
698 return ret;
699}
700
701static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
702 struct inode *inode, loff_t offset)
703{
704 u64 zero_start, zero_len, total_zero_len;
705 u32 p_cpos = 0, clusters_to_add;
706 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
707 unsigned int num_clusters = 0;
708 unsigned int ext_flags = 0;
709 u32 size_div, offset_div;
710 int ret = 0;
711
712 {
713 u64 o = offset;
714 u64 s = i_size_read(inode);
715
716 offset_div = do_div(o, osb->s_clustersize);
717 size_div = do_div(s, osb->s_clustersize);
718 }
719
720 if (offset <= i_size_read(inode))
721 return 0;
722
723 clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
724 ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
725 total_zero_len = offset - i_size_read(inode);
726 if (clusters_to_add)
727 total_zero_len -= offset_div;
728
729 /* Allocate clusters to fill out holes, and this is only needed
730 * when we add more than one clusters. Otherwise the cluster will
731 * be allocated during direct IO */
732 if (clusters_to_add > 1) {
733 ret = ocfs2_extend_allocation(inode,
734 OCFS2_I(inode)->ip_clusters,
735 clusters_to_add - 1, 0);
736 if (ret) {
737 mlog_errno(ret);
738 goto out;
739 }
740 }
741
742 while (total_zero_len) {
743 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
744 &ext_flags);
745 if (ret < 0) {
746 mlog_errno(ret);
747 goto out;
748 }
749
750 zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
751 size_div;
752 zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
753 size_div;
754 zero_len = min(total_zero_len, zero_len);
755
756 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
757 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
758 zero_start >> 9, zero_len >> 9,
759 GFP_NOFS, false);
760 if (ret < 0) {
761 mlog_errno(ret);
762 goto out;
763 }
764 }
765
766 total_zero_len -= zero_len;
767 v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
768
769 /* Only at first iteration can be cluster not aligned.
770 * So set size_div to 0 for the rest */
771 size_div = 0;
772 }
773
774out:
775 return ret;
776}
777
666static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, 778static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
667 struct iov_iter *iter, 779 struct iov_iter *iter,
668 loff_t offset) 780 loff_t offset)
@@ -677,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
677 struct buffer_head *di_bh = NULL; 789 struct buffer_head *di_bh = NULL;
678 size_t count = iter->count; 790 size_t count = iter->count;
679 journal_t *journal = osb->journal->j_journal; 791 journal_t *journal = osb->journal->j_journal;
680 u32 zero_len; 792 u64 zero_len_head, zero_len_tail;
681 int cluster_align; 793 int cluster_align_head, cluster_align_tail;
682 loff_t final_size = offset + count; 794 loff_t final_size = offset + count;
683 int append_write = offset >= i_size_read(inode) ? 1 : 0; 795 int append_write = offset >= i_size_read(inode) ? 1 : 0;
684 unsigned int num_clusters = 0; 796 unsigned int num_clusters = 0;
@@ -686,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
686 798
687 { 799 {
688 u64 o = offset; 800 u64 o = offset;
801 u64 s = i_size_read(inode);
802
803 zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
804 cluster_align_head = !zero_len_head;
689 805
690 zero_len = do_div(o, 1 << osb->s_clustersize_bits); 806 zero_len_tail = osb->s_clustersize -
691 cluster_align = !zero_len; 807 do_div(s, osb->s_clustersize);
808 if ((offset - i_size_read(inode)) < zero_len_tail)
809 zero_len_tail = offset - i_size_read(inode);
810 cluster_align_tail = !zero_len_tail;
692 } 811 }
693 812
694 /* 813 /*
@@ -706,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
706 } 825 }
707 826
708 if (append_write) { 827 if (append_write) {
709 ret = ocfs2_inode_lock(inode, &di_bh, 1); 828 ret = ocfs2_inode_lock(inode, NULL, 1);
710 if (ret < 0) { 829 if (ret < 0) {
711 mlog_errno(ret); 830 mlog_errno(ret);
712 goto clean_orphan; 831 goto clean_orphan;
713 } 832 }
714 833
834 /* zeroing out the previously allocated cluster tail
835 * that but not zeroed */
715 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 836 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
716 ret = ocfs2_zero_extend(inode, di_bh, offset); 837 ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
838 zero_len_tail, cluster_align_tail);
717 else 839 else
718 ret = ocfs2_extend_no_holes(inode, di_bh, offset, 840 ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
719 offset); 841 offset);
720 if (ret < 0) { 842 if (ret < 0) {
721 mlog_errno(ret); 843 mlog_errno(ret);
722 ocfs2_inode_unlock(inode, 1); 844 ocfs2_inode_unlock(inode, 1);
723 brelse(di_bh);
724 goto clean_orphan; 845 goto clean_orphan;
725 } 846 }
726 847
@@ -728,19 +849,15 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
728 if (is_overwrite < 0) { 849 if (is_overwrite < 0) {
729 mlog_errno(is_overwrite); 850 mlog_errno(is_overwrite);
730 ocfs2_inode_unlock(inode, 1); 851 ocfs2_inode_unlock(inode, 1);
731 brelse(di_bh);
732 goto clean_orphan; 852 goto clean_orphan;
733 } 853 }
734 854
735 ocfs2_inode_unlock(inode, 1); 855 ocfs2_inode_unlock(inode, 1);
736 brelse(di_bh);
737 di_bh = NULL;
738 } 856 }
739 857
740 written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, 858 written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
741 iter, offset, 859 offset, ocfs2_direct_IO_get_blocks,
742 ocfs2_direct_IO_get_blocks, 860 ocfs2_dio_end_io, NULL, 0);
743 ocfs2_dio_end_io, NULL, 0);
744 if (unlikely(written < 0)) { 861 if (unlikely(written < 0)) {
745 loff_t i_size = i_size_read(inode); 862 loff_t i_size = i_size_read(inode);
746 863
@@ -771,15 +888,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
771 if (ret < 0) 888 if (ret < 0)
772 mlog_errno(ret); 889 mlog_errno(ret);
773 } 890 }
774 } else if (written < 0 && append_write && !is_overwrite && 891 } else if (written > 0 && append_write && !is_overwrite &&
775 !cluster_align) { 892 !cluster_align_head) {
893 /* zeroing out the allocated cluster head */
776 u32 p_cpos = 0; 894 u32 p_cpos = 0;
777 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); 895 u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
778 896
897 ret = ocfs2_inode_lock(inode, NULL, 0);
898 if (ret < 0) {
899 mlog_errno(ret);
900 goto clean_orphan;
901 }
902
779 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, 903 ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
780 &num_clusters, &ext_flags); 904 &num_clusters, &ext_flags);
781 if (ret < 0) { 905 if (ret < 0) {
782 mlog_errno(ret); 906 mlog_errno(ret);
907 ocfs2_inode_unlock(inode, 0);
783 goto clean_orphan; 908 goto clean_orphan;
784 } 909 }
785 910
@@ -787,9 +912,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
787 912
788 ret = blkdev_issue_zeroout(osb->sb->s_bdev, 913 ret = blkdev_issue_zeroout(osb->sb->s_bdev,
789 p_cpos << (osb->s_clustersize_bits - 9), 914 p_cpos << (osb->s_clustersize_bits - 9),
790 zero_len >> 9, GFP_KERNEL, false); 915 zero_len_head >> 9, GFP_NOFS, false);
791 if (ret < 0) 916 if (ret < 0)
792 mlog_errno(ret); 917 mlog_errno(ret);
918
919 ocfs2_inode_unlock(inode, 0);
793 } 920 }
794 921
795clean_orphan: 922clean_orphan:
@@ -818,9 +945,7 @@ out:
818 return ret; 945 return ret;
819} 946}
820 947
821static ssize_t ocfs2_direct_IO(int rw, 948static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
822 struct kiocb *iocb,
823 struct iov_iter *iter,
824 loff_t offset) 949 loff_t offset)
825{ 950{
826 struct file *file = iocb->ki_filp; 951 struct file *file = iocb->ki_filp;
@@ -842,12 +967,11 @@ static ssize_t ocfs2_direct_IO(int rw,
842 if (i_size_read(inode) <= offset && !full_coherency) 967 if (i_size_read(inode) <= offset && !full_coherency)
843 return 0; 968 return 0;
844 969
845 if (rw == READ) 970 if (iov_iter_rw(iter) == READ)
846 return __blockdev_direct_IO(rw, iocb, inode, 971 return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
847 inode->i_sb->s_bdev, 972 iter, offset,
848 iter, offset, 973 ocfs2_direct_IO_get_blocks,
849 ocfs2_direct_IO_get_blocks, 974 ocfs2_dio_end_io, NULL, 0);
850 ocfs2_dio_end_io, NULL, 0);
851 else 975 else
852 return ocfs2_direct_IO_write(iocb, iter, offset); 976 return ocfs2_direct_IO_write(iocb, iter, offset);
853} 977}
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155d54df..dd59599b022d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25#include <linux/aio.h> 25#include <linux/fs.h>
26 26
27handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 27handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
28 struct page *page, 28 struct page *page,
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9e6508..7fdc25a4d8c0 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
196 } \ 196 } \
197} while (0) 197} while (0)
198 198
199#define mlog_errno(st) do { \ 199#define mlog_errno(st) ({ \
200 int _st = (st); \ 200 int _st = (st); \
201 if (_st != -ERESTARTSYS && _st != -EINTR && \ 201 if (_st != -ERESTARTSYS && _st != -EINTR && \
202 _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ 202 _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
203 _st != -EDQUOT) \ 203 _st != -EDQUOT) \
204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ 204 mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
205} while (0) 205 _st; \
206})
206 207
207#define mlog_bug_on_msg(cond, fmt, args...) do { \ 208#define mlog_bug_on_msg(cond, fmt, args...) do { \
208 if (cond) { \ 209 if (cond) { \
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b08050bd3f2e..ccd4dcfc3645 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -18,7 +18,7 @@
18 * 18 *
19 * linux/fs/minix/dir.c 19 * linux/fs/minix/dir.c
20 * 20 *
21 * Copyright (C) 1991, 1992 Linux Torvalds 21 * Copyright (C) 1991, 1992 Linus Torvalds
22 * 22 *
23 * This program is free software; you can redistribute it and/or 23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public 24 * modify it under the terms of the GNU General Public
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
2047 const char *name, 2047 const char *name,
2048 int namelen) 2048 int namelen)
2049{ 2049{
2050 int ret; 2050 int ret = 0;
2051 struct ocfs2_dir_lookup_result lookup = { NULL, }; 2051 struct ocfs2_dir_lookup_result lookup = { NULL, };
2052 2052
2053 trace_ocfs2_check_dir_for_entry( 2053 trace_ocfs2_check_dir_for_entry(
2054 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); 2054 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2055 2055
2056 ret = -EEXIST; 2056 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
2057 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) 2057 ret = -EEXIST;
2058 goto bail; 2058 mlog_errno(ret);
2059 }
2059 2060
2060 ret = 0;
2061bail:
2062 ocfs2_free_dir_lookup_result(&lookup); 2061 ocfs2_free_dir_lookup_result(&lookup);
2063 2062
2064 if (ret)
2065 mlog_errno(ret);
2066 return ret; 2063 return ret;
2067} 2064}
2068 2065
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 11849a44dc5a..8b23aa2f52dd 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1391 int noqueue_attempted = 0; 1391 int noqueue_attempted = 0;
1392 int dlm_locked = 0; 1392 int dlm_locked = 0;
1393 1393
1394 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
1395 mlog_errno(-EINVAL);
1396 return -EINVAL;
1397 }
1398
1394 ocfs2_init_mask_waiter(&mw); 1399 ocfs2_init_mask_waiter(&mw);
1395 1400
1396 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) 1401 if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 29651167190d..540dc4bdd042 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
82 } 82 }
83 83
84 status = ocfs2_test_inode_bit(osb, blkno, &set); 84 status = ocfs2_test_inode_bit(osb, blkno, &set);
85 trace_ocfs2_get_dentry_test_bit(status, set);
86 if (status < 0) { 85 if (status < 0) {
87 if (status == -EINVAL) { 86 if (status == -EINVAL) {
88 /* 87 /*
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
96 goto unlock_nfs_sync; 95 goto unlock_nfs_sync;
97 } 96 }
98 97
98 trace_ocfs2_get_dentry_test_bit(status, set);
99 /* If the inode allocator bit is clear, this inode must be stale */ 99 /* If the inode allocator bit is clear, this inode must be stale */
100 if (!set) { 100 if (!set) {
101 status = -ESTALE; 101 status = -ESTALE;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ba1790e52ff2..913fc250d85a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2106,7 +2106,7 @@ out:
2106} 2106}
2107 2107
2108static int ocfs2_prepare_inode_for_write(struct file *file, 2108static int ocfs2_prepare_inode_for_write(struct file *file,
2109 loff_t *ppos, 2109 loff_t pos,
2110 size_t count, 2110 size_t count,
2111 int appending, 2111 int appending,
2112 int *direct_io, 2112 int *direct_io,
@@ -2115,7 +2115,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2115 int ret = 0, meta_level = 0; 2115 int ret = 0, meta_level = 0;
2116 struct dentry *dentry = file->f_path.dentry; 2116 struct dentry *dentry = file->f_path.dentry;
2117 struct inode *inode = dentry->d_inode; 2117 struct inode *inode = dentry->d_inode;
2118 loff_t saved_pos = 0, end; 2118 loff_t end;
2119 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2119 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2120 int full_coherency = !(osb->s_mount_opt & 2120 int full_coherency = !(osb->s_mount_opt &
2121 OCFS2_MOUNT_COHERENCY_BUFFERED); 2121 OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2155,23 +2155,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2155 } 2155 }
2156 } 2156 }
2157 2157
2158 /* work on a copy of ppos until we're sure that we won't have 2158 end = pos + count;
2159 * to recalculate it due to relocking. */
2160 if (appending)
2161 saved_pos = i_size_read(inode);
2162 else
2163 saved_pos = *ppos;
2164
2165 end = saved_pos + count;
2166 2159
2167 ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); 2160 ret = ocfs2_check_range_for_refcount(inode, pos, count);
2168 if (ret == 1) { 2161 if (ret == 1) {
2169 ocfs2_inode_unlock(inode, meta_level); 2162 ocfs2_inode_unlock(inode, meta_level);
2170 meta_level = -1; 2163 meta_level = -1;
2171 2164
2172 ret = ocfs2_prepare_inode_for_refcount(inode, 2165 ret = ocfs2_prepare_inode_for_refcount(inode,
2173 file, 2166 file,
2174 saved_pos, 2167 pos,
2175 count, 2168 count,
2176 &meta_level); 2169 &meta_level);
2177 if (has_refcount) 2170 if (has_refcount)
@@ -2227,7 +2220,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2227 * caller will have to retake some cluster 2220 * caller will have to retake some cluster
2228 * locks and initiate the io as buffered. 2221 * locks and initiate the io as buffered.
2229 */ 2222 */
2230 ret = ocfs2_check_range_for_holes(inode, saved_pos, count); 2223 ret = ocfs2_check_range_for_holes(inode, pos, count);
2231 if (ret == 1) { 2224 if (ret == 1) {
2232 /* 2225 /*
2233 * Fallback to old way if the feature bit is not set. 2226 * Fallback to old way if the feature bit is not set.
@@ -2242,12 +2235,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2242 break; 2235 break;
2243 } 2236 }
2244 2237
2245 if (appending)
2246 *ppos = saved_pos;
2247
2248out_unlock: 2238out_unlock:
2249 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, 2239 trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
2250 saved_pos, appending, count, 2240 pos, appending, count,
2251 direct_io, has_refcount); 2241 direct_io, has_refcount);
2252 2242
2253 if (meta_level >= 0) 2243 if (meta_level >= 0)
@@ -2260,19 +2250,20 @@ out:
2260static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, 2250static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2261 struct iov_iter *from) 2251 struct iov_iter *from)
2262{ 2252{
2263 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 2253 int direct_io, appending, rw_level, have_alloc_sem = 0;
2264 int can_do_direct, has_refcount = 0; 2254 int can_do_direct, has_refcount = 0;
2265 ssize_t written = 0; 2255 ssize_t written = 0;
2266 size_t count = iov_iter_count(from); 2256 ssize_t ret;
2267 loff_t old_size, *ppos = &iocb->ki_pos; 2257 size_t count = iov_iter_count(from), orig_count;
2258 loff_t old_size;
2268 u32 old_clusters; 2259 u32 old_clusters;
2269 struct file *file = iocb->ki_filp; 2260 struct file *file = iocb->ki_filp;
2270 struct inode *inode = file_inode(file); 2261 struct inode *inode = file_inode(file);
2271 struct address_space *mapping = file->f_mapping;
2272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2262 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2273 int full_coherency = !(osb->s_mount_opt & 2263 int full_coherency = !(osb->s_mount_opt &
2274 OCFS2_MOUNT_COHERENCY_BUFFERED); 2264 OCFS2_MOUNT_COHERENCY_BUFFERED);
2275 int unaligned_dio = 0; 2265 int unaligned_dio = 0;
2266 int dropped_dio = 0;
2276 2267
2277 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, 2268 trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
2278 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2269 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2280,11 +2271,11 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
2280 file->f_path.dentry->d_name.name, 2271 file->f_path.dentry->d_name.name,
2281 (unsigned int)from->nr_segs); /* GRRRRR */ 2272 (unsigned int)from->nr_segs); /* GRRRRR */
2282 2273
2283 if (iocb->ki_nbytes == 0) 2274 if (count == 0)
2284 return 0; 2275 return 0;
2285 2276
2286 appending = file->f_flags & O_APPEND ? 1 : 0; 2277 appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
2287 direct_io = file->f_flags & O_DIRECT ? 1 : 0; 2278 direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
2288 2279
2289 mutex_lock(&inode->i_mutex); 2280 mutex_lock(&inode->i_mutex);
2290 2281
@@ -2329,9 +2320,17 @@ relock:
2329 ocfs2_inode_unlock(inode, 1); 2320 ocfs2_inode_unlock(inode, 1);
2330 } 2321 }
2331 2322
2323 orig_count = iov_iter_count(from);
2324 ret = generic_write_checks(iocb, from);
2325 if (ret <= 0) {
2326 if (ret)
2327 mlog_errno(ret);
2328 goto out;
2329 }
2330 count = ret;
2331
2332 can_do_direct = direct_io; 2332 can_do_direct = direct_io;
2333 ret = ocfs2_prepare_inode_for_write(file, ppos, 2333 ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
2334 iocb->ki_nbytes, appending,
2335 &can_do_direct, &has_refcount); 2334 &can_do_direct, &has_refcount);
2336 if (ret < 0) { 2335 if (ret < 0) {
2337 mlog_errno(ret); 2336 mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
2339 } 2338 }
2340 2339
2341 if (direct_io && !is_sync_kiocb(iocb)) 2340 if (direct_io && !is_sync_kiocb(iocb))
2342 unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, 2341 unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
2343 *ppos);
2344 2342
2345 /* 2343 /*
2346 * We can't complete the direct I/O as requested, fall back to 2344 * We can't complete the direct I/O as requested, fall back to
@@ -2353,6 +2351,9 @@ relock:
2353 rw_level = -1; 2351 rw_level = -1;
2354 2352
2355 direct_io = 0; 2353 direct_io = 0;
2354 iocb->ki_flags &= ~IOCB_DIRECT;
2355 iov_iter_reexpand(from, orig_count);
2356 dropped_dio = 1;
2356 goto relock; 2357 goto relock;
2357 } 2358 }
2358 2359
@@ -2376,74 +2377,15 @@ relock:
2376 /* communicate with ocfs2_dio_end_io */ 2377 /* communicate with ocfs2_dio_end_io */
2377 ocfs2_iocb_set_rw_locked(iocb, rw_level); 2378 ocfs2_iocb_set_rw_locked(iocb, rw_level);
2378 2379
2379 ret = generic_write_checks(file, ppos, &count, 2380 written = __generic_file_write_iter(iocb, from);
2380 S_ISBLK(inode->i_mode));
2381 if (ret)
2382 goto out_dio;
2383
2384 iov_iter_truncate(from, count);
2385 if (direct_io) {
2386 loff_t endbyte;
2387 ssize_t written_buffered;
2388 written = generic_file_direct_write(iocb, from, *ppos);
2389 if (written < 0 || written == count) {
2390 ret = written;
2391 goto out_dio;
2392 }
2393
2394 /*
2395 * for completing the rest of the request.
2396 */
2397 count -= written;
2398 written_buffered = generic_perform_write(file, from, *ppos);
2399 /*
2400 * If generic_file_buffered_write() returned a synchronous error
2401 * then we want to return the number of bytes which were
2402 * direct-written, or the error code if that was zero. Note
2403 * that this differs from normal direct-io semantics, which
2404 * will return -EFOO even if some bytes were written.
2405 */
2406 if (written_buffered < 0) {
2407 ret = written_buffered;
2408 goto out_dio;
2409 }
2410
2411 /* We need to ensure that the page cache pages are written to
2412 * disk and invalidated to preserve the expected O_DIRECT
2413 * semantics.
2414 */
2415 endbyte = *ppos + written_buffered - 1;
2416 ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
2417 endbyte);
2418 if (ret == 0) {
2419 iocb->ki_pos = *ppos + written_buffered;
2420 written += written_buffered;
2421 invalidate_mapping_pages(mapping,
2422 *ppos >> PAGE_CACHE_SHIFT,
2423 endbyte >> PAGE_CACHE_SHIFT);
2424 } else {
2425 /*
2426 * We don't know how much we wrote, so just return
2427 * the number of bytes which were direct-written
2428 */
2429 }
2430 } else {
2431 current->backing_dev_info = inode_to_bdi(inode);
2432 written = generic_perform_write(file, from, *ppos);
2433 if (likely(written >= 0))
2434 iocb->ki_pos = *ppos + written;
2435 current->backing_dev_info = NULL;
2436 }
2437
2438out_dio:
2439 /* buffered aio wouldn't have proper lock coverage today */ 2381 /* buffered aio wouldn't have proper lock coverage today */
2440 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2382 BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
2441 2383
2442 if (unlikely(written <= 0)) 2384 if (unlikely(written <= 0))
2443 goto no_sync; 2385 goto no_sync;
2444 2386
2445 if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || 2387 if (((file->f_flags & O_DSYNC) && !direct_io) ||
2446 ((file->f_flags & O_DIRECT) && !direct_io)) { 2388 IS_SYNC(inode) || dropped_dio) {
2447 ret = filemap_fdatawrite_range(file->f_mapping, 2389 ret = filemap_fdatawrite_range(file->f_mapping,
2448 iocb->ki_pos - written, 2390 iocb->ki_pos - written,
2449 iocb->ki_pos - 1); 2391 iocb->ki_pos - 1);
@@ -2554,7 +2496,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2554 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2496 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2555 * need locks to protect pending reads from racing with truncate. 2497 * need locks to protect pending reads from racing with truncate.
2556 */ 2498 */
2557 if (filp->f_flags & O_DIRECT) { 2499 if (iocb->ki_flags & IOCB_DIRECT) {
2558 have_alloc_sem = 1; 2500 have_alloc_sem = 1;
2559 ocfs2_iocb_set_sem_locked(iocb); 2501 ocfs2_iocb_set_sem_locked(iocb);
2560 2502
@@ -2588,7 +2530,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
2588 trace_generic_file_aio_read_ret(ret); 2530 trace_generic_file_aio_read_ret(ret);
2589 2531
2590 /* buffered aio wouldn't have proper lock coverage today */ 2532 /* buffered aio wouldn't have proper lock coverage today */
2591 BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); 2533 BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
2592 2534
2593 /* see ocfs2_file_write_iter */ 2535 /* see ocfs2_file_write_iter */
2594 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { 2536 if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
@@ -2683,8 +2625,6 @@ const struct inode_operations ocfs2_special_file_iops = {
2683 */ 2625 */
2684const struct file_operations ocfs2_fops = { 2626const struct file_operations ocfs2_fops = {
2685 .llseek = ocfs2_file_llseek, 2627 .llseek = ocfs2_file_llseek,
2686 .read = new_sync_read,
2687 .write = new_sync_write,
2688 .mmap = ocfs2_mmap, 2628 .mmap = ocfs2_mmap,
2689 .fsync = ocfs2_sync_file, 2629 .fsync = ocfs2_sync_file,
2690 .release = ocfs2_file_release, 2630 .release = ocfs2_file_release,
@@ -2731,8 +2671,6 @@ const struct file_operations ocfs2_dops = {
2731 */ 2671 */
2732const struct file_operations ocfs2_fops_no_plocks = { 2672const struct file_operations ocfs2_fops_no_plocks = {
2733 .llseek = ocfs2_file_llseek, 2673 .llseek = ocfs2_file_llseek,
2734 .read = new_sync_read,
2735 .write = new_sync_write,
2736 .mmap = ocfs2_mmap, 2674 .mmap = ocfs2_mmap,
2737 .fsync = ocfs2_sync_file, 2675 .fsync = ocfs2_sync_file,
2738 .release = ocfs2_file_release, 2676 .release = ocfs2_file_release,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3025c0da6b8a..be71ca0937f7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode,
624 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, 624 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
625 le16_to_cpu(di->i_suballoc_slot)); 625 le16_to_cpu(di->i_suballoc_slot));
626 if (!inode_alloc_inode) { 626 if (!inode_alloc_inode) {
627 status = -EEXIST; 627 status = -ENOENT;
628 mlog_errno(status); 628 mlog_errno(status);
629 goto bail; 629 goto bail;
630 } 630 }
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
742 ORPHAN_DIR_SYSTEM_INODE, 742 ORPHAN_DIR_SYSTEM_INODE,
743 orphaned_slot); 743 orphaned_slot);
744 if (!orphan_dir_inode) { 744 if (!orphan_dir_inode) {
745 status = -EEXIST; 745 status = -ENOENT;
746 mlog_errno(status); 746 mlog_errno(status);
747 goto bail; 747 goto bail;
748 } 748 }
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 044013455621..857bbbcd39f3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
666 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 666 if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
667 ocfs2_local_alloc_count_bits(alloc)) { 667 ocfs2_local_alloc_count_bits(alloc)) {
668 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 668 ocfs2_error(osb->sb, "local alloc inode %llu says it has "
669 "%u free bits, but a count shows %u", 669 "%u used bits, but a count shows %u",
670 (unsigned long long)le64_to_cpu(alloc->i_blkno), 670 (unsigned long long)le64_to_cpu(alloc->i_blkno),
671 le32_to_cpu(alloc->id1.bitmap1.i_used), 671 le32_to_cpu(alloc->id1.bitmap1.i_used),
672 ocfs2_local_alloc_count_bits(alloc)); 672 ocfs2_local_alloc_count_bits(alloc));
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
839 u32 *numbits, 839 u32 *numbits,
840 struct ocfs2_alloc_reservation *resv) 840 struct ocfs2_alloc_reservation *resv)
841{ 841{
842 int numfound, bitoff, left, startoff, lastzero; 842 int numfound = 0, bitoff, left, startoff, lastzero;
843 int local_resv = 0; 843 int local_resv = 0;
844 struct ocfs2_alloc_reservation r; 844 struct ocfs2_alloc_reservation r;
845 void *bitmap = NULL; 845 void *bitmap = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b5c3a5ea3ee6..09f90cbf0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2322 2322
2323 trace_ocfs2_orphan_del( 2323 trace_ocfs2_orphan_del(
2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno, 2324 (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2325 name, namelen); 2325 name, strlen(name));
2326 2326
2327 /* find it's spot in the orphan directory */ 2327 /* find it's spot in the orphan directory */
2328 status = ocfs2_find_entry(name, namelen, orphan_dir_inode, 2328 status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
2329 &lookup); 2329 &lookup);
2330 if (status) { 2330 if (status) {
2331 mlog_errno(status); 2331 mlog_errno(status);
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2808 ORPHAN_DIR_SYSTEM_INODE, 2808 ORPHAN_DIR_SYSTEM_INODE,
2809 osb->slot_num); 2809 osb->slot_num);
2810 if (!orphan_dir_inode) { 2810 if (!orphan_dir_inode) {
2811 status = -EEXIST; 2811 status = -ENOENT;
2812 mlog_errno(status); 2812 mlog_errno(status);
2813 goto leave; 2813 goto leave;
2814 } 2814 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee541f92dab4..df3a500789c7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4276 error = posix_acl_create(dir, &mode, &default_acl, &acl); 4276 error = posix_acl_create(dir, &mode, &default_acl, &acl);
4277 if (error) { 4277 if (error) {
4278 mlog_errno(error); 4278 mlog_errno(error);
4279 goto out; 4279 return error;
4280 } 4280 }
4281 4281
4282 error = ocfs2_create_inode_in_orphan(dir, mode, 4282 error = ocfs2_create_inode_in_orphan(dir, mode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d5493e361a38..e78a203d44c8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
427 if (!si) { 427 if (!si) {
428 status = -ENOMEM; 428 status = -ENOMEM;
429 mlog_errno(status); 429 mlog_errno(status);
430 goto bail; 430 return status;
431 } 431 }
432 432
433 si->si_extended = ocfs2_uses_extended_slot_map(osb); 433 si->si_extended = ocfs2_uses_extended_slot_map(osb);
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
452 452
453 osb->slot_info = (struct ocfs2_slot_info *)si; 453 osb->slot_info = (struct ocfs2_slot_info *)si;
454bail: 454bail:
455 if (status < 0 && si) 455 if (status < 0)
456 __ocfs2_free_slot_info(si); 456 __ocfs2_free_slot_info(si);
457 457
458 return status; 458 return status;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 1724d43d3da1..220cae7bbdbc 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void)
295 set_bit(node_num, netmap); 295 set_bit(node_num, netmap);
296 if (!memcmp(hbmap, netmap, sizeof(hbmap))) 296 if (!memcmp(hbmap, netmap, sizeof(hbmap)))
297 return 0; 297 return 0;
298 if (i < O2CB_MAP_STABILIZE_COUNT) 298 if (i < O2CB_MAP_STABILIZE_COUNT - 1)
299 msleep(1000); 299 msleep(1000);
300 } 300 }
301 301
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 720aa389e0ea..2768eb1da2b8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
1004 BUG_ON(conn == NULL); 1004 BUG_ON(conn == NULL);
1005 1005
1006 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 1006 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
1007 if (!lc) { 1007 if (!lc)
1008 rc = -ENOMEM; 1008 return -ENOMEM;
1009 goto out;
1010 }
1011 1009
1012 init_waitqueue_head(&lc->oc_wait); 1010 init_waitqueue_head(&lc->oc_wait);
1013 init_completion(&lc->oc_sync_wait); 1011 init_completion(&lc->oc_sync_wait);
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
1063 } 1061 }
1064 1062
1065out: 1063out:
1066 if (rc && lc) 1064 if (rc)
1067 kfree(lc); 1065 kfree(lc);
1068 return rc; 1066 return rc;
1069} 1067}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0cb889a17ae1..4479029630bb 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2499 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE); 2499 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2500 if (status < 0) { 2500 if (status < 0) {
2501 mlog_errno(status); 2501 mlog_errno(status);
2502 ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
2503 start_bit, count);
2502 goto bail; 2504 goto bail;
2503 } 2505 }
2504 2506
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26675185b886..403c5660b306 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2069,6 +2069,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2069 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); 2069 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
2070 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); 2070 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
2071 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); 2071 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
2072 memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
2073 sizeof(di->id2.i_super.s_uuid));
2072 2074
2073 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; 2075 osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
2074 2076
@@ -2333,7 +2335,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2333 mlog_errno(status); 2335 mlog_errno(status);
2334 goto bail; 2336 goto bail;
2335 } 2337 }
2336 cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); 2338 cleancache_init_shared_fs(sb);
2337 2339
2338bail: 2340bail:
2339 return status; 2341 return status;
@@ -2563,22 +2565,22 @@ static void ocfs2_handle_error(struct super_block *sb)
2563 ocfs2_set_ro_flag(osb, 0); 2565 ocfs2_set_ro_flag(osb, 0);
2564} 2566}
2565 2567
2566static char error_buf[1024]; 2568void __ocfs2_error(struct super_block *sb, const char *function,
2567 2569 const char *fmt, ...)
2568void __ocfs2_error(struct super_block *sb,
2569 const char *function,
2570 const char *fmt, ...)
2571{ 2570{
2571 struct va_format vaf;
2572 va_list args; 2572 va_list args;
2573 2573
2574 va_start(args, fmt); 2574 va_start(args, fmt);
2575 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2575 vaf.fmt = fmt;
2576 va_end(args); 2576 vaf.va = &args;
2577 2577
2578 /* Not using mlog here because we want to show the actual 2578 /* Not using mlog here because we want to show the actual
2579 * function the error came from. */ 2579 * function the error came from. */
2580 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n", 2580 printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
2581 sb->s_id, function, error_buf); 2581 sb->s_id, function, &vaf);
2582
2583 va_end(args);
2582 2584
2583 ocfs2_handle_error(sb); 2585 ocfs2_handle_error(sb);
2584} 2586}
@@ -2586,18 +2588,21 @@ void __ocfs2_error(struct super_block *sb,
2586/* Handle critical errors. This is intentionally more drastic than 2588/* Handle critical errors. This is intentionally more drastic than
2587 * ocfs2_handle_error, so we only use for things like journal errors, 2589 * ocfs2_handle_error, so we only use for things like journal errors,
2588 * etc. */ 2590 * etc. */
2589void __ocfs2_abort(struct super_block* sb, 2591void __ocfs2_abort(struct super_block *sb, const char *function,
2590 const char *function,
2591 const char *fmt, ...) 2592 const char *fmt, ...)
2592{ 2593{
2594 struct va_format vaf;
2593 va_list args; 2595 va_list args;
2594 2596
2595 va_start(args, fmt); 2597 va_start(args, fmt);
2596 vsnprintf(error_buf, sizeof(error_buf), fmt, args);
2597 va_end(args);
2598 2598
2599 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n", 2599 vaf.fmt = fmt;
2600 sb->s_id, function, error_buf); 2600 vaf.va = &args;
2601
2602 printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
2603 sb->s_id, function, &vaf);
2604
2605 va_end(args);
2601 2606
2602 /* We don't have the cluster support yet to go straight to 2607 /* We don't have the cluster support yet to go straight to
2603 * hard readonly in here. Until then, we want to keep 2608 * hard readonly in here. Until then, we want to keep
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 85b190dc132f..4ca7533be479 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1238 i, 1238 i,
1239 &block_off, 1239 &block_off,
1240 &name_offset); 1240 &name_offset);
1241 if (ret) {
1242 mlog_errno(ret);
1243 goto cleanup;
1244 }
1241 xs->base = bucket_block(xs->bucket, block_off); 1245 xs->base = bucket_block(xs->bucket, block_off);
1242 } 1246 }
1243 if (ocfs2_xattr_is_local(xs->here)) { 1247 if (ocfs2_xattr_is_local(xs->here)) {
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5665 5669
5666 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, 5670 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5667 i, &xv, NULL); 5671 i, &xv, NULL);
5672 if (ret) {
5673 mlog_errno(ret);
5674 break;
5675 }
5668 5676
5669 ret = ocfs2_lock_xattr_remove_allocators(inode, xv, 5677 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5670 args->ref_ci, 5678 args->ref_ci,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 902e88527fce..f993be7f2156 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -337,8 +337,6 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
337 337
338const struct file_operations omfs_file_operations = { 338const struct file_operations omfs_file_operations = {
339 .llseek = generic_file_llseek, 339 .llseek = generic_file_llseek,
340 .read = new_sync_read,
341 .write = new_sync_write,
342 .read_iter = generic_file_read_iter, 340 .read_iter = generic_file_read_iter,
343 .write_iter = generic_file_write_iter, 341 .write_iter = generic_file_write_iter,
344 .mmap = generic_file_mmap, 342 .mmap = generic_file_mmap,
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..6796f04d6032 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
570 uid = make_kuid(current_user_ns(), user); 570 uid = make_kuid(current_user_ns(), user);
571 gid = make_kgid(current_user_ns(), group); 571 gid = make_kgid(current_user_ns(), group);
572 572
573retry_deleg:
573 newattrs.ia_valid = ATTR_CTIME; 574 newattrs.ia_valid = ATTR_CTIME;
574 if (user != (uid_t) -1) { 575 if (user != (uid_t) -1) {
575 if (!uid_valid(uid)) 576 if (!uid_valid(uid))
@@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
586 if (!S_ISDIR(inode->i_mode)) 587 if (!S_ISDIR(inode->i_mode))
587 newattrs.ia_valid |= 588 newattrs.ia_valid |=
588 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 589 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
589retry_deleg:
590 mutex_lock(&inode->i_mutex); 590 mutex_lock(&inode->i_mutex);
591 error = security_path_chown(path, uid, gid); 591 error = security_path_chown(path, uid, gid);
592 if (!error) 592 if (!error)
@@ -734,10 +734,10 @@ static int do_dentry_open(struct file *f,
734 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 734 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
735 i_readcount_inc(inode); 735 i_readcount_inc(inode);
736 if ((f->f_mode & FMODE_READ) && 736 if ((f->f_mode & FMODE_READ) &&
737 likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) 737 likely(f->f_op->read || f->f_op->read_iter))
738 f->f_mode |= FMODE_CAN_READ; 738 f->f_mode |= FMODE_CAN_READ;
739 if ((f->f_mode & FMODE_WRITE) && 739 if ((f->f_mode & FMODE_WRITE) &&
740 likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) 740 likely(f->f_op->write || f->f_op->write_iter))
741 f->f_mode |= FMODE_CAN_WRITE; 741 f->f_mode |= FMODE_CAN_WRITE;
742 742
743 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 743 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
988 return ERR_PTR(err); 988 return ERR_PTR(err);
989 if (flags & O_CREAT) 989 if (flags & O_CREAT)
990 return ERR_PTR(-EINVAL); 990 return ERR_PTR(-EINVAL);
991 if (!filename && (flags & O_DIRECTORY))
992 if (!dentry->d_inode->i_op->lookup)
993 return ERR_PTR(-ENOTDIR);
994 return do_file_open_root(dentry, mnt, filename, &op); 991 return do_file_open_root(dentry, mnt, filename, &op);
995} 992}
996EXPORT_SYMBOL(file_open_root); 993EXPORT_SYMBOL(file_open_root);
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e58e2a6..822da5b7cff0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/syscalls.h> 22#include <linux/syscalls.h>
23#include <linux/fcntl.h> 23#include <linux/fcntl.h>
24#include <linux/aio.h>
25 24
26#include <asm/uaccess.h> 25#include <asm/uaccess.h>
27#include <asm/ioctls.h> 26#include <asm/ioctls.h>
@@ -947,9 +946,7 @@ err:
947const struct file_operations pipefifo_fops = { 946const struct file_operations pipefifo_fops = {
948 .open = fifo_open, 947 .open = fifo_open,
949 .llseek = no_llseek, 948 .llseek = no_llseek,
950 .read = new_sync_read,
951 .read_iter = pipe_read, 949 .read_iter = pipe_read,
952 .write = new_sync_write,
953 .write_iter = pipe_write, 950 .write_iter = pipe_write,
954 .poll = pipe_poll, 951 .poll = pipe_poll,
955 .unlocked_ioctl = pipe_ioctl, 952 .unlocked_ioctl = pipe_ioctl,
diff --git a/fs/pnode.c b/fs/pnode.c
index 260ac8f898a4..6367e1e435c6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -362,6 +362,46 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
362} 362}
363 363
364/* 364/*
365 * Clear MNT_LOCKED when it can be shown to be safe.
366 *
367 * mount_lock lock must be held for write
368 */
369void propagate_mount_unlock(struct mount *mnt)
370{
371 struct mount *parent = mnt->mnt_parent;
372 struct mount *m, *child;
373
374 BUG_ON(parent == mnt);
375
376 for (m = propagation_next(parent, parent); m;
377 m = propagation_next(m, parent)) {
378 child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
379 if (child)
380 child->mnt.mnt_flags &= ~MNT_LOCKED;
381 }
382}
383
384/*
385 * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
386 */
387static void mark_umount_candidates(struct mount *mnt)
388{
389 struct mount *parent = mnt->mnt_parent;
390 struct mount *m;
391
392 BUG_ON(parent == mnt);
393
394 for (m = propagation_next(parent, parent); m;
395 m = propagation_next(m, parent)) {
396 struct mount *child = __lookup_mnt_last(&m->mnt,
397 mnt->mnt_mountpoint);
398 if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
399 SET_MNT_MARK(child);
400 }
401 }
402}
403
404/*
365 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its 405 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
366 * parent propagates to. 406 * parent propagates to.
367 */ 407 */
@@ -378,13 +418,16 @@ static void __propagate_umount(struct mount *mnt)
378 struct mount *child = __lookup_mnt_last(&m->mnt, 418 struct mount *child = __lookup_mnt_last(&m->mnt,
379 mnt->mnt_mountpoint); 419 mnt->mnt_mountpoint);
380 /* 420 /*
381 * umount the child only if the child has no 421 * umount the child only if the child has no children
382 * other children 422 * and the child is marked safe to unmount.
383 */ 423 */
384 if (child && list_empty(&child->mnt_mounts)) { 424 if (!child || !IS_MNT_MARKED(child))
425 continue;
426 CLEAR_MNT_MARK(child);
427 if (list_empty(&child->mnt_mounts)) {
385 list_del_init(&child->mnt_child); 428 list_del_init(&child->mnt_child);
386 hlist_del_init_rcu(&child->mnt_hash); 429 child->mnt.mnt_flags |= MNT_UMOUNT;
387 hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); 430 list_move_tail(&child->mnt_list, &mnt->mnt_list);
388 } 431 }
389 } 432 }
390} 433}
@@ -396,11 +439,14 @@ static void __propagate_umount(struct mount *mnt)
396 * 439 *
397 * vfsmount lock must be held for write 440 * vfsmount lock must be held for write
398 */ 441 */
399int propagate_umount(struct hlist_head *list) 442int propagate_umount(struct list_head *list)
400{ 443{
401 struct mount *mnt; 444 struct mount *mnt;
402 445
403 hlist_for_each_entry(mnt, list, mnt_hash) 446 list_for_each_entry_reverse(mnt, list, mnt_list)
447 mark_umount_candidates(mnt);
448
449 list_for_each_entry(mnt, list, mnt_list)
404 __propagate_umount(mnt); 450 __propagate_umount(mnt);
405 return 0; 451 return 0;
406} 452}
diff --git a/fs/pnode.h b/fs/pnode.h
index 4a246358b031..7114ce6e6b9e 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -19,6 +19,9 @@
19#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) 19#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
20#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED) 20#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
21#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED) 21#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
22#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
23#define IS_MNT_LOCKED_AND_LAZY(m) \
24 (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED)
22 25
23#define CL_EXPIRE 0x01 26#define CL_EXPIRE 0x01
24#define CL_SLAVE 0x02 27#define CL_SLAVE 0x02
@@ -40,14 +43,14 @@ static inline void set_mnt_shared(struct mount *mnt)
40void change_mnt_propagation(struct mount *, int); 43void change_mnt_propagation(struct mount *, int);
41int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, 44int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
42 struct hlist_head *); 45 struct hlist_head *);
43int propagate_umount(struct hlist_head *); 46int propagate_umount(struct list_head *);
44int propagate_mount_busy(struct mount *, int); 47int propagate_mount_busy(struct mount *, int);
48void propagate_mount_unlock(struct mount *);
45void mnt_release_group_id(struct mount *); 49void mnt_release_group_id(struct mount *);
46int get_dominating_id(struct mount *mnt, const struct path *root); 50int get_dominating_id(struct mount *mnt, const struct path *root);
47unsigned int mnt_get_count(struct mount *mnt); 51unsigned int mnt_get_count(struct mount *mnt);
48void mnt_set_mountpoint(struct mount *, struct mountpoint *, 52void mnt_set_mountpoint(struct mount *, struct mountpoint *,
49 struct mount *); 53 struct mount *);
50void umount_tree(struct mount *, int);
51struct mount *copy_tree(struct mount *, struct dentry *, int); 54struct mount *copy_tree(struct mount *, struct dentry *, int);
52bool is_path_reachable(struct mount *, struct dentry *, 55bool is_path_reachable(struct mount *, struct dentry *,
53 const struct path *root); 56 const struct path *root);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1295a00ca316..fd02a9ebfc30 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -99,8 +99,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
99 buf = m->buf + m->count; 99 buf = m->buf + m->count;
100 100
101 /* Ignore error for now */ 101 /* Ignore error for now */
102 string_escape_str(tcomm, &buf, m->size - m->count, 102 buf += string_escape_str(tcomm, buf, m->size - m->count,
103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); 103 ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
104 104
105 m->count = buf - m->buf; 105 m->count = buf - m->buf;
106 seq_putc(m, '\n'); 106 seq_putc(m, '\n');
@@ -188,6 +188,24 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
188 from_kgid_munged(user_ns, GROUP_AT(group_info, g))); 188 from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
189 put_cred(cred); 189 put_cred(cred);
190 190
191#ifdef CONFIG_PID_NS
192 seq_puts(m, "\nNStgid:");
193 for (g = ns->level; g <= pid->level; g++)
194 seq_printf(m, "\t%d",
195 task_tgid_nr_ns(p, pid->numbers[g].ns));
196 seq_puts(m, "\nNSpid:");
197 for (g = ns->level; g <= pid->level; g++)
198 seq_printf(m, "\t%d",
199 task_pid_nr_ns(p, pid->numbers[g].ns));
200 seq_puts(m, "\nNSpgid:");
201 for (g = ns->level; g <= pid->level; g++)
202 seq_printf(m, "\t%d",
203 task_pgrp_nr_ns(p, pid->numbers[g].ns));
204 seq_puts(m, "\nNSsid:");
205 for (g = ns->level; g <= pid->level; g++)
206 seq_printf(m, "\t%d",
207 task_session_nr_ns(p, pid->numbers[g].ns));
208#endif
191 seq_putc(m, '\n'); 209 seq_putc(m, '\n');
192} 210}
193 211
@@ -614,7 +632,9 @@ static int children_seq_show(struct seq_file *seq, void *v)
614 pid_t pid; 632 pid_t pid;
615 633
616 pid = pid_nr_ns(v, inode->i_sb->s_fs_info); 634 pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
617 return seq_printf(seq, "%d ", pid); 635 seq_printf(seq, "%d ", pid);
636
637 return 0;
618} 638}
619 639
620static void *children_seq_start(struct seq_file *seq, loff_t *pos) 640static void *children_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3f3d7aeb0712..7a3b82f986dd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -238,13 +238,15 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
238 238
239 wchan = get_wchan(task); 239 wchan = get_wchan(task);
240 240
241 if (lookup_symbol_name(wchan, symname) < 0) 241 if (lookup_symbol_name(wchan, symname) < 0) {
242 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 242 if (!ptrace_may_access(task, PTRACE_MODE_READ))
243 return 0; 243 return 0;
244 else 244 seq_printf(m, "%lu", wchan);
245 return seq_printf(m, "%lu", wchan); 245 } else {
246 else 246 seq_printf(m, "%s", symname);
247 return seq_printf(m, "%s", symname); 247 }
248
249 return 0;
248} 250}
249#endif /* CONFIG_KALLSYMS */ 251#endif /* CONFIG_KALLSYMS */
250 252
@@ -309,10 +311,12 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
309static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 311static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
310 struct pid *pid, struct task_struct *task) 312 struct pid *pid, struct task_struct *task)
311{ 313{
312 return seq_printf(m, "%llu %llu %lu\n", 314 seq_printf(m, "%llu %llu %lu\n",
313 (unsigned long long)task->se.sum_exec_runtime, 315 (unsigned long long)task->se.sum_exec_runtime,
314 (unsigned long long)task->sched_info.run_delay, 316 (unsigned long long)task->sched_info.run_delay,
315 task->sched_info.pcount); 317 task->sched_info.pcount);
318
319 return 0;
316} 320}
317#endif 321#endif
318 322
@@ -387,7 +391,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
387 points = oom_badness(task, NULL, NULL, totalpages) * 391 points = oom_badness(task, NULL, NULL, totalpages) *
388 1000 / totalpages; 392 1000 / totalpages;
389 read_unlock(&tasklist_lock); 393 read_unlock(&tasklist_lock);
390 return seq_printf(m, "%lu\n", points); 394 seq_printf(m, "%lu\n", points);
395
396 return 0;
391} 397}
392 398
393struct limit_names { 399struct limit_names {
@@ -432,15 +438,15 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
432 * print the file header 438 * print the file header
433 */ 439 */
434 seq_printf(m, "%-25s %-20s %-20s %-10s\n", 440 seq_printf(m, "%-25s %-20s %-20s %-10s\n",
435 "Limit", "Soft Limit", "Hard Limit", "Units"); 441 "Limit", "Soft Limit", "Hard Limit", "Units");
436 442
437 for (i = 0; i < RLIM_NLIMITS; i++) { 443 for (i = 0; i < RLIM_NLIMITS; i++) {
438 if (rlim[i].rlim_cur == RLIM_INFINITY) 444 if (rlim[i].rlim_cur == RLIM_INFINITY)
439 seq_printf(m, "%-25s %-20s ", 445 seq_printf(m, "%-25s %-20s ",
440 lnames[i].name, "unlimited"); 446 lnames[i].name, "unlimited");
441 else 447 else
442 seq_printf(m, "%-25s %-20lu ", 448 seq_printf(m, "%-25s %-20lu ",
443 lnames[i].name, rlim[i].rlim_cur); 449 lnames[i].name, rlim[i].rlim_cur);
444 450
445 if (rlim[i].rlim_max == RLIM_INFINITY) 451 if (rlim[i].rlim_max == RLIM_INFINITY)
446 seq_printf(m, "%-20s ", "unlimited"); 452 seq_printf(m, "%-20s ", "unlimited");
@@ -462,7 +468,9 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
462{ 468{
463 long nr; 469 long nr;
464 unsigned long args[6], sp, pc; 470 unsigned long args[6], sp, pc;
465 int res = lock_trace(task); 471 int res;
472
473 res = lock_trace(task);
466 if (res) 474 if (res)
467 return res; 475 return res;
468 476
@@ -477,7 +485,8 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
477 args[0], args[1], args[2], args[3], args[4], args[5], 485 args[0], args[1], args[2], args[3], args[4], args[5],
478 sp, pc); 486 sp, pc);
479 unlock_trace(task); 487 unlock_trace(task);
480 return res; 488
489 return 0;
481} 490}
482#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 491#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
483 492
@@ -2002,12 +2011,13 @@ static int show_timer(struct seq_file *m, void *v)
2002 notify = timer->it_sigev_notify; 2011 notify = timer->it_sigev_notify;
2003 2012
2004 seq_printf(m, "ID: %d\n", timer->it_id); 2013 seq_printf(m, "ID: %d\n", timer->it_id);
2005 seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, 2014 seq_printf(m, "signal: %d/%p\n",
2006 timer->sigq->info.si_value.sival_ptr); 2015 timer->sigq->info.si_signo,
2016 timer->sigq->info.si_value.sival_ptr);
2007 seq_printf(m, "notify: %s/%s.%d\n", 2017 seq_printf(m, "notify: %s/%s.%d\n",
2008 nstr[notify & ~SIGEV_THREAD_ID], 2018 nstr[notify & ~SIGEV_THREAD_ID],
2009 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2019 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2010 pid_nr_ns(timer->it_pid, tp->ns)); 2020 pid_nr_ns(timer->it_pid, tp->ns));
2011 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2021 seq_printf(m, "ClockID: %d\n", timer->it_clock);
2012 2022
2013 return 0; 2023 return 0;
@@ -2352,21 +2362,23 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
2352 2362
2353 unlock_task_sighand(task, &flags); 2363 unlock_task_sighand(task, &flags);
2354 } 2364 }
2355 result = seq_printf(m, 2365 seq_printf(m,
2356 "rchar: %llu\n" 2366 "rchar: %llu\n"
2357 "wchar: %llu\n" 2367 "wchar: %llu\n"
2358 "syscr: %llu\n" 2368 "syscr: %llu\n"
2359 "syscw: %llu\n" 2369 "syscw: %llu\n"
2360 "read_bytes: %llu\n" 2370 "read_bytes: %llu\n"
2361 "write_bytes: %llu\n" 2371 "write_bytes: %llu\n"
2362 "cancelled_write_bytes: %llu\n", 2372 "cancelled_write_bytes: %llu\n",
2363 (unsigned long long)acct.rchar, 2373 (unsigned long long)acct.rchar,
2364 (unsigned long long)acct.wchar, 2374 (unsigned long long)acct.wchar,
2365 (unsigned long long)acct.syscr, 2375 (unsigned long long)acct.syscr,
2366 (unsigned long long)acct.syscw, 2376 (unsigned long long)acct.syscw,
2367 (unsigned long long)acct.read_bytes, 2377 (unsigned long long)acct.read_bytes,
2368 (unsigned long long)acct.write_bytes, 2378 (unsigned long long)acct.write_bytes,
2369 (unsigned long long)acct.cancelled_write_bytes); 2379 (unsigned long long)acct.cancelled_write_bytes);
2380 result = 0;
2381
2370out_unlock: 2382out_unlock:
2371 mutex_unlock(&task->signal->cred_guard_mutex); 2383 mutex_unlock(&task->signal->cred_guard_mutex);
2372 return result; 2384 return result;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 8e5ad83b629a..af84ad04df77 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -8,6 +8,7 @@
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/seq_file.h> 10#include <linux/seq_file.h>
11#include <linux/fs.h>
11 12
12#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
13 14
@@ -48,17 +49,23 @@ static int seq_show(struct seq_file *m, void *v)
48 put_files_struct(files); 49 put_files_struct(files);
49 } 50 }
50 51
51 if (!ret) { 52 if (ret)
52 seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n", 53 return ret;
53 (long long)file->f_pos, f_flags,
54 real_mount(file->f_path.mnt)->mnt_id);
55 if (file->f_op->show_fdinfo)
56 file->f_op->show_fdinfo(m, file);
57 ret = seq_has_overflowed(m);
58 fput(file);
59 }
60 54
61 return ret; 55 seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
56 (long long)file->f_pos, f_flags,
57 real_mount(file->f_path.mnt)->mnt_id);
58
59 show_fd_locks(m, file, files);
60 if (seq_has_overflowed(m))
61 goto out;
62
63 if (file->f_op->show_fdinfo)
64 file->f_op->show_fdinfo(m, file);
65
66out:
67 fput(file);
68 return 0;
62} 69}
63 70
64static int seq_fdinfo_open(struct inode *inode, struct file *file) 71static int seq_fdinfo_open(struct inode *inode, struct file *file)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b32ce53d24ee..56e1ffda4d89 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -364,6 +364,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
364 case PSTORE_TYPE_PMSG: 364 case PSTORE_TYPE_PMSG:
365 scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); 365 scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
366 break; 366 break;
367 case PSTORE_TYPE_PPC_OPAL:
368 sprintf(name, "powerpc-opal-%s-%lld", psname, id);
369 break;
367 case PSTORE_TYPE_UNKNOWN: 370 case PSTORE_TYPE_UNKNOWN:
368 scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); 371 scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
369 break; 372 break;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
539 mem_address = pdata->mem_address; 539 mem_address = pdata->mem_address;
540 record_size = pdata->record_size; 540 record_size = pdata->record_size;
541 dump_oops = pdata->dump_oops; 541 dump_oops = pdata->dump_oops;
542 ramoops_console_size = pdata->console_size;
543 ramoops_pmsg_size = pdata->pmsg_size;
544 ramoops_ftrace_size = pdata->ftrace_size;
542 545
543 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n", 546 pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
544 cxt->size, (unsigned long long)cxt->phys_addr, 547 cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0ccd4ba3a246..ecc25cf0ee6e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -900,14 +900,17 @@ static inline struct dquot **i_dquot(struct inode *inode)
900 900
901static int dqinit_needed(struct inode *inode, int type) 901static int dqinit_needed(struct inode *inode, int type)
902{ 902{
903 struct dquot * const *dquots;
903 int cnt; 904 int cnt;
904 905
905 if (IS_NOQUOTA(inode)) 906 if (IS_NOQUOTA(inode))
906 return 0; 907 return 0;
908
909 dquots = i_dquot(inode);
907 if (type != -1) 910 if (type != -1)
908 return !i_dquot(inode)[type]; 911 return !dquots[type];
909 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 912 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
910 if (!i_dquot(inode)[cnt]) 913 if (!dquots[cnt])
911 return 1; 914 return 1;
912 return 0; 915 return 0;
913} 916}
@@ -970,12 +973,13 @@ static void add_dquot_ref(struct super_block *sb, int type)
970static void remove_inode_dquot_ref(struct inode *inode, int type, 973static void remove_inode_dquot_ref(struct inode *inode, int type,
971 struct list_head *tofree_head) 974 struct list_head *tofree_head)
972{ 975{
973 struct dquot *dquot = i_dquot(inode)[type]; 976 struct dquot **dquots = i_dquot(inode);
977 struct dquot *dquot = dquots[type];
974 978
975 i_dquot(inode)[type] = NULL;
976 if (!dquot) 979 if (!dquot)
977 return; 980 return;
978 981
982 dquots[type] = NULL;
979 if (list_empty(&dquot->dq_free)) { 983 if (list_empty(&dquot->dq_free)) {
980 /* 984 /*
981 * The inode still has reference to dquot so it can't be in the 985 * The inode still has reference to dquot so it can't be in the
@@ -1159,8 +1163,8 @@ static int need_print_warning(struct dquot_warn *warn)
1159 return uid_eq(current_fsuid(), warn->w_dq_id.uid); 1163 return uid_eq(current_fsuid(), warn->w_dq_id.uid);
1160 case GRPQUOTA: 1164 case GRPQUOTA:
1161 return in_group_p(warn->w_dq_id.gid); 1165 return in_group_p(warn->w_dq_id.gid);
1162 case PRJQUOTA: /* Never taken... Just make gcc happy */ 1166 case PRJQUOTA:
1163 return 0; 1167 return 1;
1164 } 1168 }
1165 return 0; 1169 return 0;
1166} 1170}
@@ -1389,16 +1393,21 @@ static int dquot_active(const struct inode *inode)
1389static void __dquot_initialize(struct inode *inode, int type) 1393static void __dquot_initialize(struct inode *inode, int type)
1390{ 1394{
1391 int cnt, init_needed = 0; 1395 int cnt, init_needed = 0;
1392 struct dquot *got[MAXQUOTAS]; 1396 struct dquot **dquots, *got[MAXQUOTAS];
1393 struct super_block *sb = inode->i_sb; 1397 struct super_block *sb = inode->i_sb;
1394 qsize_t rsv; 1398 qsize_t rsv;
1395 1399
1396 if (!dquot_active(inode)) 1400 if (!dquot_active(inode))
1397 return; 1401 return;
1398 1402
1403 dquots = i_dquot(inode);
1404
1399 /* First get references to structures we might need. */ 1405 /* First get references to structures we might need. */
1400 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1406 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1401 struct kqid qid; 1407 struct kqid qid;
1408 kprojid_t projid;
1409 int rc;
1410
1402 got[cnt] = NULL; 1411 got[cnt] = NULL;
1403 if (type != -1 && cnt != type) 1412 if (type != -1 && cnt != type)
1404 continue; 1413 continue;
@@ -1407,8 +1416,12 @@ static void __dquot_initialize(struct inode *inode, int type)
1407 * we check it without locking here to avoid unnecessary 1416 * we check it without locking here to avoid unnecessary
1408 * dqget()/dqput() calls. 1417 * dqget()/dqput() calls.
1409 */ 1418 */
1410 if (i_dquot(inode)[cnt]) 1419 if (dquots[cnt])
1420 continue;
1421
1422 if (!sb_has_quota_active(sb, cnt))
1411 continue; 1423 continue;
1424
1412 init_needed = 1; 1425 init_needed = 1;
1413 1426
1414 switch (cnt) { 1427 switch (cnt) {
@@ -1418,6 +1431,12 @@ static void __dquot_initialize(struct inode *inode, int type)
1418 case GRPQUOTA: 1431 case GRPQUOTA:
1419 qid = make_kqid_gid(inode->i_gid); 1432 qid = make_kqid_gid(inode->i_gid);
1420 break; 1433 break;
1434 case PRJQUOTA:
1435 rc = inode->i_sb->dq_op->get_projid(inode, &projid);
1436 if (rc)
1437 continue;
1438 qid = make_kqid_projid(projid);
1439 break;
1421 } 1440 }
1422 got[cnt] = dqget(sb, qid); 1441 got[cnt] = dqget(sb, qid);
1423 } 1442 }
@@ -1438,8 +1457,8 @@ static void __dquot_initialize(struct inode *inode, int type)
1438 /* We could race with quotaon or dqget() could have failed */ 1457 /* We could race with quotaon or dqget() could have failed */
1439 if (!got[cnt]) 1458 if (!got[cnt])
1440 continue; 1459 continue;
1441 if (!i_dquot(inode)[cnt]) { 1460 if (!dquots[cnt]) {
1442 i_dquot(inode)[cnt] = got[cnt]; 1461 dquots[cnt] = got[cnt];
1443 got[cnt] = NULL; 1462 got[cnt] = NULL;
1444 /* 1463 /*
1445 * Make quota reservation system happy if someone 1464 * Make quota reservation system happy if someone
@@ -1447,7 +1466,7 @@ static void __dquot_initialize(struct inode *inode, int type)
1447 */ 1466 */
1448 rsv = inode_get_rsv_space(inode); 1467 rsv = inode_get_rsv_space(inode);
1449 if (unlikely(rsv)) 1468 if (unlikely(rsv))
1450 dquot_resv_space(i_dquot(inode)[cnt], rsv); 1469 dquot_resv_space(dquots[cnt], rsv);
1451 } 1470 }
1452 } 1471 }
1453out_err: 1472out_err:
@@ -1473,12 +1492,13 @@ EXPORT_SYMBOL(dquot_initialize);
1473static void __dquot_drop(struct inode *inode) 1492static void __dquot_drop(struct inode *inode)
1474{ 1493{
1475 int cnt; 1494 int cnt;
1495 struct dquot **dquots = i_dquot(inode);
1476 struct dquot *put[MAXQUOTAS]; 1496 struct dquot *put[MAXQUOTAS];
1477 1497
1478 spin_lock(&dq_data_lock); 1498 spin_lock(&dq_data_lock);
1479 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1499 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1480 put[cnt] = i_dquot(inode)[cnt]; 1500 put[cnt] = dquots[cnt];
1481 i_dquot(inode)[cnt] = NULL; 1501 dquots[cnt] = NULL;
1482 } 1502 }
1483 spin_unlock(&dq_data_lock); 1503 spin_unlock(&dq_data_lock);
1484 dqput_all(put); 1504 dqput_all(put);
@@ -1486,6 +1506,7 @@ static void __dquot_drop(struct inode *inode)
1486 1506
1487void dquot_drop(struct inode *inode) 1507void dquot_drop(struct inode *inode)
1488{ 1508{
1509 struct dquot * const *dquots;
1489 int cnt; 1510 int cnt;
1490 1511
1491 if (IS_NOQUOTA(inode)) 1512 if (IS_NOQUOTA(inode))
@@ -1498,8 +1519,9 @@ void dquot_drop(struct inode *inode)
1498 * must assure that nobody can come after the DQUOT_DROP and 1519 * must assure that nobody can come after the DQUOT_DROP and
1499 * add quota pointers back anyway. 1520 * add quota pointers back anyway.
1500 */ 1521 */
1522 dquots = i_dquot(inode);
1501 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1523 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1502 if (i_dquot(inode)[cnt]) 1524 if (dquots[cnt])
1503 break; 1525 break;
1504 } 1526 }
1505 1527
@@ -1600,8 +1622,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1600{ 1622{
1601 int cnt, ret = 0, index; 1623 int cnt, ret = 0, index;
1602 struct dquot_warn warn[MAXQUOTAS]; 1624 struct dquot_warn warn[MAXQUOTAS];
1603 struct dquot **dquots = i_dquot(inode);
1604 int reserve = flags & DQUOT_SPACE_RESERVE; 1625 int reserve = flags & DQUOT_SPACE_RESERVE;
1626 struct dquot **dquots;
1605 1627
1606 if (!dquot_active(inode)) { 1628 if (!dquot_active(inode)) {
1607 inode_incr_space(inode, number, reserve); 1629 inode_incr_space(inode, number, reserve);
@@ -1611,6 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
1611 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1633 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1612 warn[cnt].w_type = QUOTA_NL_NOWARN; 1634 warn[cnt].w_type = QUOTA_NL_NOWARN;
1613 1635
1636 dquots = i_dquot(inode);
1614 index = srcu_read_lock(&dquot_srcu); 1637 index = srcu_read_lock(&dquot_srcu);
1615 spin_lock(&dq_data_lock); 1638 spin_lock(&dq_data_lock);
1616 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1639 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1652,13 +1675,14 @@ int dquot_alloc_inode(struct inode *inode)
1652{ 1675{
1653 int cnt, ret = 0, index; 1676 int cnt, ret = 0, index;
1654 struct dquot_warn warn[MAXQUOTAS]; 1677 struct dquot_warn warn[MAXQUOTAS];
1655 struct dquot * const *dquots = i_dquot(inode); 1678 struct dquot * const *dquots;
1656 1679
1657 if (!dquot_active(inode)) 1680 if (!dquot_active(inode))
1658 return 0; 1681 return 0;
1659 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1682 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1660 warn[cnt].w_type = QUOTA_NL_NOWARN; 1683 warn[cnt].w_type = QUOTA_NL_NOWARN;
1661 1684
1685 dquots = i_dquot(inode);
1662 index = srcu_read_lock(&dquot_srcu); 1686 index = srcu_read_lock(&dquot_srcu);
1663 spin_lock(&dq_data_lock); 1687 spin_lock(&dq_data_lock);
1664 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1688 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1690,6 +1714,7 @@ EXPORT_SYMBOL(dquot_alloc_inode);
1690 */ 1714 */
1691int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) 1715int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1692{ 1716{
1717 struct dquot **dquots;
1693 int cnt, index; 1718 int cnt, index;
1694 1719
1695 if (!dquot_active(inode)) { 1720 if (!dquot_active(inode)) {
@@ -1697,18 +1722,18 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
1697 return 0; 1722 return 0;
1698 } 1723 }
1699 1724
1725 dquots = i_dquot(inode);
1700 index = srcu_read_lock(&dquot_srcu); 1726 index = srcu_read_lock(&dquot_srcu);
1701 spin_lock(&dq_data_lock); 1727 spin_lock(&dq_data_lock);
1702 /* Claim reserved quotas to allocated quotas */ 1728 /* Claim reserved quotas to allocated quotas */
1703 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1729 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1704 if (i_dquot(inode)[cnt]) 1730 if (dquots[cnt])
1705 dquot_claim_reserved_space(i_dquot(inode)[cnt], 1731 dquot_claim_reserved_space(dquots[cnt], number);
1706 number);
1707 } 1732 }
1708 /* Update inode bytes */ 1733 /* Update inode bytes */
1709 inode_claim_rsv_space(inode, number); 1734 inode_claim_rsv_space(inode, number);
1710 spin_unlock(&dq_data_lock); 1735 spin_unlock(&dq_data_lock);
1711 mark_all_dquot_dirty(i_dquot(inode)); 1736 mark_all_dquot_dirty(dquots);
1712 srcu_read_unlock(&dquot_srcu, index); 1737 srcu_read_unlock(&dquot_srcu, index);
1713 return 0; 1738 return 0;
1714} 1739}
@@ -1719,6 +1744,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
1719 */ 1744 */
1720void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) 1745void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1721{ 1746{
1747 struct dquot **dquots;
1722 int cnt, index; 1748 int cnt, index;
1723 1749
1724 if (!dquot_active(inode)) { 1750 if (!dquot_active(inode)) {
@@ -1726,18 +1752,18 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
1726 return; 1752 return;
1727 } 1753 }
1728 1754
1755 dquots = i_dquot(inode);
1729 index = srcu_read_lock(&dquot_srcu); 1756 index = srcu_read_lock(&dquot_srcu);
1730 spin_lock(&dq_data_lock); 1757 spin_lock(&dq_data_lock);
1731 /* Claim reserved quotas to allocated quotas */ 1758 /* Claim reserved quotas to allocated quotas */
1732 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1759 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1733 if (i_dquot(inode)[cnt]) 1760 if (dquots[cnt])
1734 dquot_reclaim_reserved_space(i_dquot(inode)[cnt], 1761 dquot_reclaim_reserved_space(dquots[cnt], number);
1735 number);
1736 } 1762 }
1737 /* Update inode bytes */ 1763 /* Update inode bytes */
1738 inode_reclaim_rsv_space(inode, number); 1764 inode_reclaim_rsv_space(inode, number);
1739 spin_unlock(&dq_data_lock); 1765 spin_unlock(&dq_data_lock);
1740 mark_all_dquot_dirty(i_dquot(inode)); 1766 mark_all_dquot_dirty(dquots);
1741 srcu_read_unlock(&dquot_srcu, index); 1767 srcu_read_unlock(&dquot_srcu, index);
1742 return; 1768 return;
1743} 1769}
@@ -1750,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1750{ 1776{
1751 unsigned int cnt; 1777 unsigned int cnt;
1752 struct dquot_warn warn[MAXQUOTAS]; 1778 struct dquot_warn warn[MAXQUOTAS];
1753 struct dquot **dquots = i_dquot(inode); 1779 struct dquot **dquots;
1754 int reserve = flags & DQUOT_SPACE_RESERVE, index; 1780 int reserve = flags & DQUOT_SPACE_RESERVE, index;
1755 1781
1756 if (!dquot_active(inode)) { 1782 if (!dquot_active(inode)) {
@@ -1758,6 +1784,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
1758 return; 1784 return;
1759 } 1785 }
1760 1786
1787 dquots = i_dquot(inode);
1761 index = srcu_read_lock(&dquot_srcu); 1788 index = srcu_read_lock(&dquot_srcu);
1762 spin_lock(&dq_data_lock); 1789 spin_lock(&dq_data_lock);
1763 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1790 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1793,12 +1820,13 @@ void dquot_free_inode(struct inode *inode)
1793{ 1820{
1794 unsigned int cnt; 1821 unsigned int cnt;
1795 struct dquot_warn warn[MAXQUOTAS]; 1822 struct dquot_warn warn[MAXQUOTAS];
1796 struct dquot * const *dquots = i_dquot(inode); 1823 struct dquot * const *dquots;
1797 int index; 1824 int index;
1798 1825
1799 if (!dquot_active(inode)) 1826 if (!dquot_active(inode))
1800 return; 1827 return;
1801 1828
1829 dquots = i_dquot(inode);
1802 index = srcu_read_lock(&dquot_srcu); 1830 index = srcu_read_lock(&dquot_srcu);
1803 spin_lock(&dq_data_lock); 1831 spin_lock(&dq_data_lock);
1804 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1832 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -2161,7 +2189,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2161 error = -EROFS; 2189 error = -EROFS;
2162 goto out_fmt; 2190 goto out_fmt;
2163 } 2191 }
2164 if (!sb->s_op->quota_write || !sb->s_op->quota_read) { 2192 if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
2193 (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
2165 error = -EINVAL; 2194 error = -EINVAL;
2166 goto out_fmt; 2195 goto out_fmt;
2167 } 2196 }
@@ -2614,55 +2643,73 @@ out:
2614EXPORT_SYMBOL(dquot_set_dqblk); 2643EXPORT_SYMBOL(dquot_set_dqblk);
2615 2644
2616/* Generic routine for getting common part of quota file information */ 2645/* Generic routine for getting common part of quota file information */
2617int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) 2646int dquot_get_state(struct super_block *sb, struct qc_state *state)
2618{ 2647{
2619 struct mem_dqinfo *mi; 2648 struct mem_dqinfo *mi;
2649 struct qc_type_state *tstate;
2650 struct quota_info *dqopt = sb_dqopt(sb);
2651 int type;
2620 2652
2621 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 2653 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
2622 if (!sb_has_quota_active(sb, type)) { 2654 memset(state, 0, sizeof(*state));
2623 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 2655 for (type = 0; type < MAXQUOTAS; type++) {
2624 return -ESRCH; 2656 if (!sb_has_quota_active(sb, type))
2657 continue;
2658 tstate = state->s_state + type;
2659 mi = sb_dqopt(sb)->info + type;
2660 tstate->flags = QCI_ACCT_ENABLED;
2661 spin_lock(&dq_data_lock);
2662 if (mi->dqi_flags & DQF_SYS_FILE)
2663 tstate->flags |= QCI_SYSFILE;
2664 if (mi->dqi_flags & DQF_ROOT_SQUASH)
2665 tstate->flags |= QCI_ROOT_SQUASH;
2666 if (sb_has_quota_limits_enabled(sb, type))
2667 tstate->flags |= QCI_LIMITS_ENFORCED;
2668 tstate->spc_timelimit = mi->dqi_bgrace;
2669 tstate->ino_timelimit = mi->dqi_igrace;
2670 tstate->ino = dqopt->files[type]->i_ino;
2671 tstate->blocks = dqopt->files[type]->i_blocks;
2672 tstate->nextents = 1; /* We don't know... */
2673 spin_unlock(&dq_data_lock);
2625 } 2674 }
2626 mi = sb_dqopt(sb)->info + type;
2627 spin_lock(&dq_data_lock);
2628 ii->dqi_bgrace = mi->dqi_bgrace;
2629 ii->dqi_igrace = mi->dqi_igrace;
2630 ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
2631 ii->dqi_valid = IIF_ALL;
2632 spin_unlock(&dq_data_lock);
2633 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 2675 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
2634 return 0; 2676 return 0;
2635} 2677}
2636EXPORT_SYMBOL(dquot_get_dqinfo); 2678EXPORT_SYMBOL(dquot_get_state);
2637 2679
2638/* Generic routine for setting common part of quota file information */ 2680/* Generic routine for setting common part of quota file information */
2639int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) 2681int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
2640{ 2682{
2641 struct mem_dqinfo *mi; 2683 struct mem_dqinfo *mi;
2642 int err = 0; 2684 int err = 0;
2643 2685
2686 if ((ii->i_fieldmask & QC_WARNS_MASK) ||
2687 (ii->i_fieldmask & QC_RT_SPC_TIMER))
2688 return -EINVAL;
2644 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 2689 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
2645 if (!sb_has_quota_active(sb, type)) { 2690 if (!sb_has_quota_active(sb, type)) {
2646 err = -ESRCH; 2691 err = -ESRCH;
2647 goto out; 2692 goto out;
2648 } 2693 }
2649 mi = sb_dqopt(sb)->info + type; 2694 mi = sb_dqopt(sb)->info + type;
2650 if (ii->dqi_valid & IIF_FLAGS) { 2695 if (ii->i_fieldmask & QC_FLAGS) {
2651 if (ii->dqi_flags & ~DQF_SETINFO_MASK || 2696 if ((ii->i_flags & QCI_ROOT_SQUASH &&
2652 (ii->dqi_flags & DQF_ROOT_SQUASH &&
2653 mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) { 2697 mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) {
2654 err = -EINVAL; 2698 err = -EINVAL;
2655 goto out; 2699 goto out;
2656 } 2700 }
2657 } 2701 }
2658 spin_lock(&dq_data_lock); 2702 spin_lock(&dq_data_lock);
2659 if (ii->dqi_valid & IIF_BGRACE) 2703 if (ii->i_fieldmask & QC_SPC_TIMER)
2660 mi->dqi_bgrace = ii->dqi_bgrace; 2704 mi->dqi_bgrace = ii->i_spc_timelimit;
2661 if (ii->dqi_valid & IIF_IGRACE) 2705 if (ii->i_fieldmask & QC_INO_TIMER)
2662 mi->dqi_igrace = ii->dqi_igrace; 2706 mi->dqi_igrace = ii->i_ino_timelimit;
2663 if (ii->dqi_valid & IIF_FLAGS) 2707 if (ii->i_fieldmask & QC_FLAGS) {
2664 mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) | 2708 if (ii->i_flags & QCI_ROOT_SQUASH)
2665 (ii->dqi_flags & DQF_SETINFO_MASK); 2709 mi->dqi_flags |= DQF_ROOT_SQUASH;
2710 else
2711 mi->dqi_flags &= ~DQF_ROOT_SQUASH;
2712 }
2666 spin_unlock(&dq_data_lock); 2713 spin_unlock(&dq_data_lock);
2667 mark_info_dirty(sb, type); 2714 mark_info_dirty(sb, type);
2668 /* Force write to disk */ 2715 /* Force write to disk */
@@ -2677,7 +2724,7 @@ const struct quotactl_ops dquot_quotactl_ops = {
2677 .quota_on = dquot_quota_on, 2724 .quota_on = dquot_quota_on,
2678 .quota_off = dquot_quota_off, 2725 .quota_off = dquot_quota_off,
2679 .quota_sync = dquot_quota_sync, 2726 .quota_sync = dquot_quota_sync,
2680 .get_info = dquot_get_dqinfo, 2727 .get_state = dquot_get_state,
2681 .set_info = dquot_set_dqinfo, 2728 .set_info = dquot_set_dqinfo,
2682 .get_dqblk = dquot_get_dqblk, 2729 .get_dqblk = dquot_get_dqblk,
2683 .set_dqblk = dquot_set_dqblk 2730 .set_dqblk = dquot_set_dqblk
@@ -2688,7 +2735,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
2688 .quota_enable = dquot_quota_enable, 2735 .quota_enable = dquot_quota_enable,
2689 .quota_disable = dquot_quota_disable, 2736 .quota_disable = dquot_quota_disable,
2690 .quota_sync = dquot_quota_sync, 2737 .quota_sync = dquot_quota_sync,
2691 .get_info = dquot_get_dqinfo, 2738 .get_state = dquot_get_state,
2692 .set_info = dquot_set_dqinfo, 2739 .set_info = dquot_set_dqinfo,
2693 .get_dqblk = dquot_get_dqblk, 2740 .get_dqblk = dquot_get_dqblk,
2694 .set_dqblk = dquot_set_dqblk 2741 .set_dqblk = dquot_set_dqblk
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d14a799c7785..86ded7375c21 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -118,13 +118,30 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
118 118
119static int quota_getinfo(struct super_block *sb, int type, void __user *addr) 119static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
120{ 120{
121 struct if_dqinfo info; 121 struct qc_state state;
122 struct qc_type_state *tstate;
123 struct if_dqinfo uinfo;
122 int ret; 124 int ret;
123 125
124 if (!sb->s_qcop->get_info) 126 /* This checks whether qc_state has enough entries... */
127 BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS);
128 if (!sb->s_qcop->get_state)
125 return -ENOSYS; 129 return -ENOSYS;
126 ret = sb->s_qcop->get_info(sb, type, &info); 130 ret = sb->s_qcop->get_state(sb, &state);
127 if (!ret && copy_to_user(addr, &info, sizeof(info))) 131 if (ret)
132 return ret;
133 tstate = state.s_state + type;
134 if (!(tstate->flags & QCI_ACCT_ENABLED))
135 return -ESRCH;
136 memset(&uinfo, 0, sizeof(uinfo));
137 uinfo.dqi_bgrace = tstate->spc_timelimit;
138 uinfo.dqi_igrace = tstate->ino_timelimit;
139 if (tstate->flags & QCI_SYSFILE)
140 uinfo.dqi_flags |= DQF_SYS_FILE;
141 if (tstate->flags & QCI_ROOT_SQUASH)
142 uinfo.dqi_flags |= DQF_ROOT_SQUASH;
143 uinfo.dqi_valid = IIF_ALL;
144 if (!ret && copy_to_user(addr, &uinfo, sizeof(uinfo)))
128 return -EFAULT; 145 return -EFAULT;
129 return ret; 146 return ret;
130} 147}
@@ -132,12 +149,31 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
132static int quota_setinfo(struct super_block *sb, int type, void __user *addr) 149static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
133{ 150{
134 struct if_dqinfo info; 151 struct if_dqinfo info;
152 struct qc_info qinfo;
135 153
136 if (copy_from_user(&info, addr, sizeof(info))) 154 if (copy_from_user(&info, addr, sizeof(info)))
137 return -EFAULT; 155 return -EFAULT;
138 if (!sb->s_qcop->set_info) 156 if (!sb->s_qcop->set_info)
139 return -ENOSYS; 157 return -ENOSYS;
140 return sb->s_qcop->set_info(sb, type, &info); 158 if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE))
159 return -EINVAL;
160 memset(&qinfo, 0, sizeof(qinfo));
161 if (info.dqi_valid & IIF_FLAGS) {
162 if (info.dqi_flags & ~DQF_SETINFO_MASK)
163 return -EINVAL;
164 if (info.dqi_flags & DQF_ROOT_SQUASH)
165 qinfo.i_flags |= QCI_ROOT_SQUASH;
166 qinfo.i_fieldmask |= QC_FLAGS;
167 }
168 if (info.dqi_valid & IIF_BGRACE) {
169 qinfo.i_spc_timelimit = info.dqi_bgrace;
170 qinfo.i_fieldmask |= QC_SPC_TIMER;
171 }
172 if (info.dqi_valid & IIF_IGRACE) {
173 qinfo.i_ino_timelimit = info.dqi_igrace;
174 qinfo.i_fieldmask |= QC_INO_TIMER;
175 }
176 return sb->s_qcop->set_info(sb, type, &qinfo);
141} 177}
142 178
143static inline qsize_t qbtos(qsize_t blocks) 179static inline qsize_t qbtos(qsize_t blocks)
@@ -252,25 +288,149 @@ static int quota_disable(struct super_block *sb, void __user *addr)
252 return sb->s_qcop->quota_disable(sb, flags); 288 return sb->s_qcop->quota_disable(sb, flags);
253} 289}
254 290
291static int quota_state_to_flags(struct qc_state *state)
292{
293 int flags = 0;
294
295 if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED)
296 flags |= FS_QUOTA_UDQ_ACCT;
297 if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED)
298 flags |= FS_QUOTA_UDQ_ENFD;
299 if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)
300 flags |= FS_QUOTA_GDQ_ACCT;
301 if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED)
302 flags |= FS_QUOTA_GDQ_ENFD;
303 if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED)
304 flags |= FS_QUOTA_PDQ_ACCT;
305 if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED)
306 flags |= FS_QUOTA_PDQ_ENFD;
307 return flags;
308}
309
310static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
311{
312 int type;
313 struct qc_state state;
314 int ret;
315
316 ret = sb->s_qcop->get_state(sb, &state);
317 if (ret < 0)
318 return ret;
319
320 memset(fqs, 0, sizeof(*fqs));
321 fqs->qs_version = FS_QSTAT_VERSION;
322 fqs->qs_flags = quota_state_to_flags(&state);
323 /* No quota enabled? */
324 if (!fqs->qs_flags)
325 return -ENOSYS;
326 fqs->qs_incoredqs = state.s_incoredqs;
327 /*
328 * GETXSTATE quotactl has space for just one set of time limits so
329 * report them for the first enabled quota type
330 */
331 for (type = 0; type < XQM_MAXQUOTAS; type++)
332 if (state.s_state[type].flags & QCI_ACCT_ENABLED)
333 break;
334 BUG_ON(type == XQM_MAXQUOTAS);
335 fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
336 fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
337 fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
338 fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
339 fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
340 if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
341 fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
342 fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
343 fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
344 }
345 if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
346 fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
347 fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
348 fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
349 }
350 if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
351 /*
352 * Q_XGETQSTAT doesn't have room for both group and project
353 * quotas. So, allow the project quota values to be copied out
354 * only if there is no group quota information available.
355 */
356 if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) {
357 fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino;
358 fqs->qs_gquota.qfs_nblks =
359 state.s_state[PRJQUOTA].blocks;
360 fqs->qs_gquota.qfs_nextents =
361 state.s_state[PRJQUOTA].nextents;
362 }
363 }
364 return 0;
365}
366
255static int quota_getxstate(struct super_block *sb, void __user *addr) 367static int quota_getxstate(struct super_block *sb, void __user *addr)
256{ 368{
257 struct fs_quota_stat fqs; 369 struct fs_quota_stat fqs;
258 int ret; 370 int ret;
259 371
260 if (!sb->s_qcop->get_xstate) 372 if (!sb->s_qcop->get_state)
261 return -ENOSYS; 373 return -ENOSYS;
262 ret = sb->s_qcop->get_xstate(sb, &fqs); 374 ret = quota_getstate(sb, &fqs);
263 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) 375 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
264 return -EFAULT; 376 return -EFAULT;
265 return ret; 377 return ret;
266} 378}
267 379
380static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
381{
382 int type;
383 struct qc_state state;
384 int ret;
385
386 ret = sb->s_qcop->get_state(sb, &state);
387 if (ret < 0)
388 return ret;
389
390 memset(fqs, 0, sizeof(*fqs));
391 fqs->qs_version = FS_QSTAT_VERSION;
392 fqs->qs_flags = quota_state_to_flags(&state);
393 /* No quota enabled? */
394 if (!fqs->qs_flags)
395 return -ENOSYS;
396 fqs->qs_incoredqs = state.s_incoredqs;
397 /*
398 * GETXSTATV quotactl has space for just one set of time limits so
399 * report them for the first enabled quota type
400 */
401 for (type = 0; type < XQM_MAXQUOTAS; type++)
402 if (state.s_state[type].flags & QCI_ACCT_ENABLED)
403 break;
404 BUG_ON(type == XQM_MAXQUOTAS);
405 fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
406 fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
407 fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
408 fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
409 fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
410 if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
411 fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
412 fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
413 fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
414 }
415 if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
416 fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
417 fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
418 fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
419 }
420 if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
421 fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino;
422 fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks;
423 fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents;
424 }
425 return 0;
426}
427
268static int quota_getxstatev(struct super_block *sb, void __user *addr) 428static int quota_getxstatev(struct super_block *sb, void __user *addr)
269{ 429{
270 struct fs_quota_statv fqs; 430 struct fs_quota_statv fqs;
271 int ret; 431 int ret;
272 432
273 if (!sb->s_qcop->get_xstatev) 433 if (!sb->s_qcop->get_state)
274 return -ENOSYS; 434 return -ENOSYS;
275 435
276 memset(&fqs, 0, sizeof(fqs)); 436 memset(&fqs, 0, sizeof(fqs));
@@ -284,7 +444,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
284 default: 444 default:
285 return -EINVAL; 445 return -EINVAL;
286 } 446 }
287 ret = sb->s_qcop->get_xstatev(sb, &fqs); 447 ret = quota_getstatev(sb, &fqs);
288 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) 448 if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
289 return -EFAULT; 449 return -EFAULT;
290 return ret; 450 return ret;
@@ -357,6 +517,30 @@ static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
357 dst->d_fieldmask |= QC_RT_SPACE; 517 dst->d_fieldmask |= QC_RT_SPACE;
358} 518}
359 519
520static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst,
521 struct fs_disk_quota *src)
522{
523 memset(dst, 0, sizeof(*dst));
524 dst->i_spc_timelimit = src->d_btimer;
525 dst->i_ino_timelimit = src->d_itimer;
526 dst->i_rt_spc_timelimit = src->d_rtbtimer;
527 dst->i_ino_warnlimit = src->d_iwarns;
528 dst->i_spc_warnlimit = src->d_bwarns;
529 dst->i_rt_spc_warnlimit = src->d_rtbwarns;
530 if (src->d_fieldmask & FS_DQ_BWARNS)
531 dst->i_fieldmask |= QC_SPC_WARNS;
532 if (src->d_fieldmask & FS_DQ_IWARNS)
533 dst->i_fieldmask |= QC_INO_WARNS;
534 if (src->d_fieldmask & FS_DQ_RTBWARNS)
535 dst->i_fieldmask |= QC_RT_SPC_WARNS;
536 if (src->d_fieldmask & FS_DQ_BTIMER)
537 dst->i_fieldmask |= QC_SPC_TIMER;
538 if (src->d_fieldmask & FS_DQ_ITIMER)
539 dst->i_fieldmask |= QC_INO_TIMER;
540 if (src->d_fieldmask & FS_DQ_RTBTIMER)
541 dst->i_fieldmask |= QC_RT_SPC_TIMER;
542}
543
360static int quota_setxquota(struct super_block *sb, int type, qid_t id, 544static int quota_setxquota(struct super_block *sb, int type, qid_t id,
361 void __user *addr) 545 void __user *addr)
362{ 546{
@@ -371,6 +555,21 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
371 qid = make_kqid(current_user_ns(), type, id); 555 qid = make_kqid(current_user_ns(), type, id);
372 if (!qid_valid(qid)) 556 if (!qid_valid(qid))
373 return -EINVAL; 557 return -EINVAL;
558 /* Are we actually setting timer / warning limits for all users? */
559 if (from_kqid(&init_user_ns, qid) == 0 &&
560 fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
561 struct qc_info qinfo;
562 int ret;
563
564 if (!sb->s_qcop->set_info)
565 return -EINVAL;
566 copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq);
567 ret = sb->s_qcop->set_info(sb, type, &qinfo);
568 if (ret)
569 return ret;
570 /* These are already done */
571 fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK);
572 }
374 copy_from_xfs_dqblk(&qdq, &fdq); 573 copy_from_xfs_dqblk(&qdq, &fdq);
375 return sb->s_qcop->set_dqblk(sb, qid, &qdq); 574 return sb->s_qcop->set_dqblk(sb, qid, &qdq);
376} 575}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index d65877fbe8f4..58efb83dec1c 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -349,6 +349,13 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
349 struct dquot *dquot) 349 struct dquot *dquot)
350{ 350{
351 int tmp = QT_TREEOFF; 351 int tmp = QT_TREEOFF;
352
353#ifdef __QUOTA_QT_PARANOIA
354 if (info->dqi_blocks <= QT_TREEOFF) {
355 quota_error(dquot->dq_sb, "Quota tree root isn't allocated!");
356 return -EIO;
357 }
358#endif
352 return do_insert_tree(info, dquot, &tmp, 0); 359 return do_insert_tree(info, dquot, &tmp, 0);
353} 360}
354 361
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 9cb10d7197f7..2aa012a68e90 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -117,12 +117,16 @@ static int v2_read_file_info(struct super_block *sb, int type)
117 qinfo = info->dqi_priv; 117 qinfo = info->dqi_priv;
118 if (version == 0) { 118 if (version == 0) {
119 /* limits are stored as unsigned 32-bit data */ 119 /* limits are stored as unsigned 32-bit data */
120 info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS; 120 info->dqi_max_spc_limit = 0xffffffffLL << QUOTABLOCK_BITS;
121 info->dqi_max_ino_limit = 0xffffffff; 121 info->dqi_max_ino_limit = 0xffffffff;
122 } else { 122 } else {
123 /* used space is stored as unsigned 64-bit value in bytes */ 123 /*
124 info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */ 124 * Used space is stored as unsigned 64-bit value in bytes but
125 info->dqi_max_ino_limit = 0xffffffffffffffffULL; 125 * quota core supports only signed 64-bit values so use that
126 * as a limit
127 */
128 info->dqi_max_spc_limit = 0x7fffffffffffffffLL; /* 2^63-1 */
129 info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
126 } 130 }
127 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 131 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
128 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 132 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index f1966b42c2fd..4e95430093d9 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -13,12 +13,14 @@
13 */ 13 */
14#define V2_INITQMAGICS {\ 14#define V2_INITQMAGICS {\
15 0xd9c01f11, /* USRQUOTA */\ 15 0xd9c01f11, /* USRQUOTA */\
16 0xd9c01927 /* GRPQUOTA */\ 16 0xd9c01927, /* GRPQUOTA */\
17 0xd9c03f14, /* PRJQUOTA */\
17} 18}
18 19
19#define V2_INITQVERSIONS {\ 20#define V2_INITQVERSIONS {\
20 1, /* USRQUOTA */\ 21 1, /* USRQUOTA */\
21 1 /* GRPQUOTA */\ 22 1, /* GRPQUOTA */\
23 1, /* PRJQUOTA */\
22} 24}
23 25
24/* First generic header */ 26/* First generic header */
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 4f56de822d2f..183a212694bf 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -31,9 +31,7 @@
31#include "internal.h" 31#include "internal.h"
32 32
33const struct file_operations ramfs_file_operations = { 33const struct file_operations ramfs_file_operations = {
34 .read = new_sync_read,
35 .read_iter = generic_file_read_iter, 34 .read_iter = generic_file_read_iter,
36 .write = new_sync_write,
37 .write_iter = generic_file_write_iter, 35 .write_iter = generic_file_write_iter,
38 .mmap = generic_file_mmap, 36 .mmap = generic_file_mmap,
39 .fsync = noop_fsync, 37 .fsync = noop_fsync,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index f6ab41b39612..0b38befa69f3 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -44,9 +44,7 @@ const struct file_operations ramfs_file_operations = {
44 .mmap_capabilities = ramfs_mmap_capabilities, 44 .mmap_capabilities = ramfs_mmap_capabilities,
45 .mmap = ramfs_nommu_mmap, 45 .mmap = ramfs_nommu_mmap,
46 .get_unmapped_area = ramfs_nommu_get_unmapped_area, 46 .get_unmapped_area = ramfs_nommu_get_unmapped_area,
47 .read = new_sync_read,
48 .read_iter = generic_file_read_iter, 47 .read_iter = generic_file_read_iter,
49 .write = new_sync_write,
50 .write_iter = generic_file_write_iter, 48 .write_iter = generic_file_write_iter,
51 .fsync = noop_fsync, 49 .fsync = noop_fsync,
52 .splice_read = generic_file_splice_read, 50 .splice_read = generic_file_splice_read,
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b68786d66..819ef3faf1bb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
9#include <linux/fcntl.h> 9#include <linux/fcntl.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/aio.h>
13#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
14#include <linux/security.h> 13#include <linux/security.h>
15#include <linux/export.h> 14#include <linux/export.h>
@@ -23,13 +22,10 @@
23#include <asm/unistd.h> 22#include <asm/unistd.h>
24 23
25typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 24typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
26typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
27 unsigned long, loff_t);
28typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); 25typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
29 26
30const struct file_operations generic_ro_fops = { 27const struct file_operations generic_ro_fops = {
31 .llseek = generic_file_llseek, 28 .llseek = generic_file_llseek,
32 .read = new_sync_read,
33 .read_iter = generic_file_read_iter, 29 .read_iter = generic_file_read_iter,
34 .mmap = generic_file_readonly_mmap, 30 .mmap = generic_file_readonly_mmap,
35 .splice_read = generic_file_splice_read, 31 .splice_read = generic_file_splice_read,
@@ -343,13 +339,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
343 339
344 init_sync_kiocb(&kiocb, file); 340 init_sync_kiocb(&kiocb, file);
345 kiocb.ki_pos = *ppos; 341 kiocb.ki_pos = *ppos;
346 kiocb.ki_nbytes = iov_iter_count(iter);
347 342
348 iter->type |= READ; 343 iter->type |= READ;
349 ret = file->f_op->read_iter(&kiocb, iter); 344 ret = file->f_op->read_iter(&kiocb, iter);
350 if (ret == -EIOCBQUEUED) 345 BUG_ON(ret == -EIOCBQUEUED);
351 ret = wait_on_sync_kiocb(&kiocb);
352
353 if (ret > 0) 346 if (ret > 0)
354 *ppos = kiocb.ki_pos; 347 *ppos = kiocb.ki_pos;
355 return ret; 348 return ret;
@@ -366,13 +359,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
366 359
367 init_sync_kiocb(&kiocb, file); 360 init_sync_kiocb(&kiocb, file);
368 kiocb.ki_pos = *ppos; 361 kiocb.ki_pos = *ppos;
369 kiocb.ki_nbytes = iov_iter_count(iter);
370 362
371 iter->type |= WRITE; 363 iter->type |= WRITE;
372 ret = file->f_op->write_iter(&kiocb, iter); 364 ret = file->f_op->write_iter(&kiocb, iter);
373 if (ret == -EIOCBQUEUED) 365 BUG_ON(ret == -EIOCBQUEUED);
374 ret = wait_on_sync_kiocb(&kiocb);
375
376 if (ret > 0) 366 if (ret > 0)
377 *ppos = kiocb.ki_pos; 367 *ppos = kiocb.ki_pos;
378 return ret; 368 return ret;
@@ -418,26 +408,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
418 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 408 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
419} 409}
420 410
421ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 411static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
422{
423 struct iovec iov = { .iov_base = buf, .iov_len = len };
424 struct kiocb kiocb;
425 ssize_t ret;
426
427 init_sync_kiocb(&kiocb, filp);
428 kiocb.ki_pos = *ppos;
429 kiocb.ki_nbytes = len;
430
431 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
432 if (-EIOCBQUEUED == ret)
433 ret = wait_on_sync_kiocb(&kiocb);
434 *ppos = kiocb.ki_pos;
435 return ret;
436}
437
438EXPORT_SYMBOL(do_sync_read);
439
440ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
441{ 412{
442 struct iovec iov = { .iov_base = buf, .iov_len = len }; 413 struct iovec iov = { .iov_base = buf, .iov_len = len };
443 struct kiocb kiocb; 414 struct kiocb kiocb;
@@ -446,34 +417,25 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
446 417
447 init_sync_kiocb(&kiocb, filp); 418 init_sync_kiocb(&kiocb, filp);
448 kiocb.ki_pos = *ppos; 419 kiocb.ki_pos = *ppos;
449 kiocb.ki_nbytes = len;
450 iov_iter_init(&iter, READ, &iov, 1, len); 420 iov_iter_init(&iter, READ, &iov, 1, len);
451 421
452 ret = filp->f_op->read_iter(&kiocb, &iter); 422 ret = filp->f_op->read_iter(&kiocb, &iter);
453 if (-EIOCBQUEUED == ret) 423 BUG_ON(ret == -EIOCBQUEUED);
454 ret = wait_on_sync_kiocb(&kiocb);
455 *ppos = kiocb.ki_pos; 424 *ppos = kiocb.ki_pos;
456 return ret; 425 return ret;
457} 426}
458 427
459EXPORT_SYMBOL(new_sync_read);
460
461ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, 428ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
462 loff_t *pos) 429 loff_t *pos)
463{ 430{
464 ssize_t ret;
465
466 if (file->f_op->read) 431 if (file->f_op->read)
467 ret = file->f_op->read(file, buf, count, pos); 432 return file->f_op->read(file, buf, count, pos);
468 else if (file->f_op->aio_read)
469 ret = do_sync_read(file, buf, count, pos);
470 else if (file->f_op->read_iter) 433 else if (file->f_op->read_iter)
471 ret = new_sync_read(file, buf, count, pos); 434 return new_sync_read(file, buf, count, pos);
472 else 435 else
473 ret = -EINVAL; 436 return -EINVAL;
474
475 return ret;
476} 437}
438EXPORT_SYMBOL(__vfs_read);
477 439
478ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 440ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
479{ 441{
@@ -502,26 +464,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
502 464
503EXPORT_SYMBOL(vfs_read); 465EXPORT_SYMBOL(vfs_read);
504 466
505ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 467static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
506{
507 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
508 struct kiocb kiocb;
509 ssize_t ret;
510
511 init_sync_kiocb(&kiocb, filp);
512 kiocb.ki_pos = *ppos;
513 kiocb.ki_nbytes = len;
514
515 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
516 if (-EIOCBQUEUED == ret)
517 ret = wait_on_sync_kiocb(&kiocb);
518 *ppos = kiocb.ki_pos;
519 return ret;
520}
521
522EXPORT_SYMBOL(do_sync_write);
523
524ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
525{ 468{
526 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 469 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
527 struct kiocb kiocb; 470 struct kiocb kiocb;
@@ -530,17 +473,26 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
530 473
531 init_sync_kiocb(&kiocb, filp); 474 init_sync_kiocb(&kiocb, filp);
532 kiocb.ki_pos = *ppos; 475 kiocb.ki_pos = *ppos;
533 kiocb.ki_nbytes = len;
534 iov_iter_init(&iter, WRITE, &iov, 1, len); 476 iov_iter_init(&iter, WRITE, &iov, 1, len);
535 477
536 ret = filp->f_op->write_iter(&kiocb, &iter); 478 ret = filp->f_op->write_iter(&kiocb, &iter);
537 if (-EIOCBQUEUED == ret) 479 BUG_ON(ret == -EIOCBQUEUED);
538 ret = wait_on_sync_kiocb(&kiocb); 480 if (ret > 0)
539 *ppos = kiocb.ki_pos; 481 *ppos = kiocb.ki_pos;
540 return ret; 482 return ret;
541} 483}
542 484
543EXPORT_SYMBOL(new_sync_write); 485ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
486 loff_t *pos)
487{
488 if (file->f_op->write)
489 return file->f_op->write(file, p, count, pos);
490 else if (file->f_op->write_iter)
491 return new_sync_write(file, p, count, pos);
492 else
493 return -EINVAL;
494}
495EXPORT_SYMBOL(__vfs_write);
544 496
545ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 497ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
546{ 498{
@@ -556,12 +508,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
556 p = (__force const char __user *)buf; 508 p = (__force const char __user *)buf;
557 if (count > MAX_RW_COUNT) 509 if (count > MAX_RW_COUNT)
558 count = MAX_RW_COUNT; 510 count = MAX_RW_COUNT;
559 if (file->f_op->write) 511 ret = __vfs_write(file, p, count, pos);
560 ret = file->f_op->write(file, p, count, pos);
561 else if (file->f_op->aio_write)
562 ret = do_sync_write(file, p, count, pos);
563 else
564 ret = new_sync_write(file, p, count, pos);
565 set_fs(old_fs); 512 set_fs(old_fs);
566 if (ret > 0) { 513 if (ret > 0) {
567 fsnotify_modify(file); 514 fsnotify_modify(file);
@@ -588,12 +535,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
588 if (ret >= 0) { 535 if (ret >= 0) {
589 count = ret; 536 count = ret;
590 file_start_write(file); 537 file_start_write(file);
591 if (file->f_op->write) 538 ret = __vfs_write(file, buf, count, pos);
592 ret = file->f_op->write(file, buf, count, pos);
593 else if (file->f_op->aio_write)
594 ret = do_sync_write(file, buf, count, pos);
595 else
596 ret = new_sync_write(file, buf, count, pos);
597 if (ret > 0) { 539 if (ret > 0) {
598 fsnotify_modify(file); 540 fsnotify_modify(file);
599 add_wchar(current, ret); 541 add_wchar(current, ret);
@@ -710,60 +652,32 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
710} 652}
711EXPORT_SYMBOL(iov_shorten); 653EXPORT_SYMBOL(iov_shorten);
712 654
713static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, 655static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
714 unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) 656 loff_t *ppos, iter_fn_t fn)
715{
716 struct kiocb kiocb;
717 struct iov_iter iter;
718 ssize_t ret;
719
720 init_sync_kiocb(&kiocb, filp);
721 kiocb.ki_pos = *ppos;
722 kiocb.ki_nbytes = len;
723
724 iov_iter_init(&iter, rw, iov, nr_segs, len);
725 ret = fn(&kiocb, &iter);
726 if (ret == -EIOCBQUEUED)
727 ret = wait_on_sync_kiocb(&kiocb);
728 *ppos = kiocb.ki_pos;
729 return ret;
730}
731
732static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
733 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
734{ 657{
735 struct kiocb kiocb; 658 struct kiocb kiocb;
736 ssize_t ret; 659 ssize_t ret;
737 660
738 init_sync_kiocb(&kiocb, filp); 661 init_sync_kiocb(&kiocb, filp);
739 kiocb.ki_pos = *ppos; 662 kiocb.ki_pos = *ppos;
740 kiocb.ki_nbytes = len;
741 663
742 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 664 ret = fn(&kiocb, iter);
743 if (ret == -EIOCBQUEUED) 665 BUG_ON(ret == -EIOCBQUEUED);
744 ret = wait_on_sync_kiocb(&kiocb);
745 *ppos = kiocb.ki_pos; 666 *ppos = kiocb.ki_pos;
746 return ret; 667 return ret;
747} 668}
748 669
749/* Do it by hand, with file-ops */ 670/* Do it by hand, with file-ops */
750static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 671static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
751 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 672 loff_t *ppos, io_fn_t fn)
752{ 673{
753 struct iovec *vector = iov;
754 ssize_t ret = 0; 674 ssize_t ret = 0;
755 675
756 while (nr_segs > 0) { 676 while (iov_iter_count(iter)) {
757 void __user *base; 677 struct iovec iovec = iov_iter_iovec(iter);
758 size_t len;
759 ssize_t nr; 678 ssize_t nr;
760 679
761 base = vector->iov_base; 680 nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
762 len = vector->iov_len;
763 vector++;
764 nr_segs--;
765
766 nr = fn(filp, base, len, ppos);
767 681
768 if (nr < 0) { 682 if (nr < 0) {
769 if (!ret) 683 if (!ret)
@@ -771,8 +685,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
771 break; 685 break;
772 } 686 }
773 ret += nr; 687 ret += nr;
774 if (nr != len) 688 if (nr != iovec.iov_len)
775 break; 689 break;
690 iov_iter_advance(iter, nr);
776 } 691 }
777 692
778 return ret; 693 return ret;
@@ -863,48 +778,42 @@ static ssize_t do_readv_writev(int type, struct file *file,
863 size_t tot_len; 778 size_t tot_len;
864 struct iovec iovstack[UIO_FASTIOV]; 779 struct iovec iovstack[UIO_FASTIOV];
865 struct iovec *iov = iovstack; 780 struct iovec *iov = iovstack;
781 struct iov_iter iter;
866 ssize_t ret; 782 ssize_t ret;
867 io_fn_t fn; 783 io_fn_t fn;
868 iov_fn_t fnv;
869 iter_fn_t iter_fn; 784 iter_fn_t iter_fn;
870 785
871 ret = rw_copy_check_uvector(type, uvector, nr_segs, 786 ret = import_iovec(type, uvector, nr_segs,
872 ARRAY_SIZE(iovstack), iovstack, &iov); 787 ARRAY_SIZE(iovstack), &iov, &iter);
873 if (ret <= 0) 788 if (ret < 0)
874 goto out; 789 return ret;
875 790
876 tot_len = ret; 791 tot_len = iov_iter_count(&iter);
792 if (!tot_len)
793 goto out;
877 ret = rw_verify_area(type, file, pos, tot_len); 794 ret = rw_verify_area(type, file, pos, tot_len);
878 if (ret < 0) 795 if (ret < 0)
879 goto out; 796 goto out;
880 797
881 fnv = NULL;
882 if (type == READ) { 798 if (type == READ) {
883 fn = file->f_op->read; 799 fn = file->f_op->read;
884 fnv = file->f_op->aio_read;
885 iter_fn = file->f_op->read_iter; 800 iter_fn = file->f_op->read_iter;
886 } else { 801 } else {
887 fn = (io_fn_t)file->f_op->write; 802 fn = (io_fn_t)file->f_op->write;
888 fnv = file->f_op->aio_write;
889 iter_fn = file->f_op->write_iter; 803 iter_fn = file->f_op->write_iter;
890 file_start_write(file); 804 file_start_write(file);
891 } 805 }
892 806
893 if (iter_fn) 807 if (iter_fn)
894 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 808 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
895 pos, iter_fn);
896 else if (fnv)
897 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
898 pos, fnv);
899 else 809 else
900 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 810 ret = do_loop_readv_writev(file, &iter, pos, fn);
901 811
902 if (type != READ) 812 if (type != READ)
903 file_end_write(file); 813 file_end_write(file);
904 814
905out: 815out:
906 if (iov != iovstack) 816 kfree(iov);
907 kfree(iov);
908 if ((ret + (type == READ)) > 0) { 817 if ((ret + (type == READ)) > 0) {
909 if (type == READ) 818 if (type == READ)
910 fsnotify_access(file); 819 fsnotify_access(file);
@@ -1043,48 +952,42 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1043 compat_ssize_t tot_len; 952 compat_ssize_t tot_len;
1044 struct iovec iovstack[UIO_FASTIOV]; 953 struct iovec iovstack[UIO_FASTIOV];
1045 struct iovec *iov = iovstack; 954 struct iovec *iov = iovstack;
955 struct iov_iter iter;
1046 ssize_t ret; 956 ssize_t ret;
1047 io_fn_t fn; 957 io_fn_t fn;
1048 iov_fn_t fnv;
1049 iter_fn_t iter_fn; 958 iter_fn_t iter_fn;
1050 959
1051 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 960 ret = compat_import_iovec(type, uvector, nr_segs,
1052 UIO_FASTIOV, iovstack, &iov); 961 UIO_FASTIOV, &iov, &iter);
1053 if (ret <= 0) 962 if (ret < 0)
1054 goto out; 963 return ret;
1055 964
1056 tot_len = ret; 965 tot_len = iov_iter_count(&iter);
966 if (!tot_len)
967 goto out;
1057 ret = rw_verify_area(type, file, pos, tot_len); 968 ret = rw_verify_area(type, file, pos, tot_len);
1058 if (ret < 0) 969 if (ret < 0)
1059 goto out; 970 goto out;
1060 971
1061 fnv = NULL;
1062 if (type == READ) { 972 if (type == READ) {
1063 fn = file->f_op->read; 973 fn = file->f_op->read;
1064 fnv = file->f_op->aio_read;
1065 iter_fn = file->f_op->read_iter; 974 iter_fn = file->f_op->read_iter;
1066 } else { 975 } else {
1067 fn = (io_fn_t)file->f_op->write; 976 fn = (io_fn_t)file->f_op->write;
1068 fnv = file->f_op->aio_write;
1069 iter_fn = file->f_op->write_iter; 977 iter_fn = file->f_op->write_iter;
1070 file_start_write(file); 978 file_start_write(file);
1071 } 979 }
1072 980
1073 if (iter_fn) 981 if (iter_fn)
1074 ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, 982 ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
1075 pos, iter_fn);
1076 else if (fnv)
1077 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
1078 pos, fnv);
1079 else 983 else
1080 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 984 ret = do_loop_readv_writev(file, &iter, pos, fn);
1081 985
1082 if (type != READ) 986 if (type != READ)
1083 file_end_write(file); 987 file_end_write(file);
1084 988
1085out: 989out:
1086 if (iov != iovstack) 990 kfree(iov);
1087 kfree(iov);
1088 if ((ret + (type == READ)) > 0) { 991 if ((ret + (type == READ)) > 0) {
1089 if (type == READ) 992 if (type == READ)
1090 fsnotify_access(file); 993 fsnotify_access(file);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 751dd3f4346b..96a1bcf33db4 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -243,8 +243,6 @@ drop_write_lock:
243} 243}
244 244
245const struct file_operations reiserfs_file_operations = { 245const struct file_operations reiserfs_file_operations = {
246 .read = new_sync_read,
247 .write = new_sync_write,
248 .unlocked_ioctl = reiserfs_ioctl, 246 .unlocked_ioctl = reiserfs_ioctl,
249#ifdef CONFIG_COMPAT 247#ifdef CONFIG_COMPAT
250 .compat_ioctl = reiserfs_compat_ioctl, 248 .compat_ioctl = reiserfs_compat_ioctl,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e1f995..742242b60972 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/swap.h> 20#include <linux/swap.h>
21#include <linux/aio.h> 21#include <linux/uio.h>
22 22
23int reiserfs_commit_write(struct file *f, struct page *page, 23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
@@ -3278,22 +3278,22 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3278 * We thank Mingming Cao for helping us understand in great detail what 3278 * We thank Mingming Cao for helping us understand in great detail what
3279 * to do in this section of the code. 3279 * to do in this section of the code.
3280 */ 3280 */
3281static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 3281static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3282 struct iov_iter *iter, loff_t offset) 3282 loff_t offset)
3283{ 3283{
3284 struct file *file = iocb->ki_filp; 3284 struct file *file = iocb->ki_filp;
3285 struct inode *inode = file->f_mapping->host; 3285 struct inode *inode = file->f_mapping->host;
3286 size_t count = iov_iter_count(iter); 3286 size_t count = iov_iter_count(iter);
3287 ssize_t ret; 3287 ssize_t ret;
3288 3288
3289 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 3289 ret = blockdev_direct_IO(iocb, inode, iter, offset,
3290 reiserfs_get_blocks_direct_io); 3290 reiserfs_get_blocks_direct_io);
3291 3291
3292 /* 3292 /*
3293 * In case of error extending write may have instantiated a few 3293 * In case of error extending write may have instantiated a few
3294 * blocks outside i_size. Trim these off again. 3294 * blocks outside i_size. Trim these off again.
3295 */ 3295 */
3296 if (unlikely((rw & WRITE) && ret < 0)) { 3296 if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
3297 loff_t isize = i_size_read(inode); 3297 loff_t isize = i_size_read(inode);
3298 loff_t end = offset + count; 3298 loff_t end = offset + count;
3299 3299
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index bb79cddf0a1f..2adcde137c3f 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -910,7 +910,6 @@ do { \
910 if (!(cond)) \ 910 if (!(cond)) \
911 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ 911 reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
912 __FILE__ ":%i:%s: " format "\n", \ 912 __FILE__ ":%i:%s: " format "\n", \
913 in_interrupt() ? -1 : task_pid_nr(current), \
914 __LINE__, __func__ , ##args); \ 913 __LINE__, __func__ , ##args); \
915} while (0) 914} while (0)
916 915
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 71fbbe3e2dab..68b5f182984e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -805,7 +805,7 @@ static const struct quotactl_ops reiserfs_qctl_operations = {
805 .quota_on = reiserfs_quota_on, 805 .quota_on = reiserfs_quota_on,
806 .quota_off = dquot_quota_off, 806 .quota_off = dquot_quota_off,
807 .quota_sync = dquot_quota_sync, 807 .quota_sync = dquot_quota_sync,
808 .get_info = dquot_get_dqinfo, 808 .get_state = dquot_get_state,
809 .set_info = dquot_set_dqinfo, 809 .set_info = dquot_set_dqinfo,
810 .get_dqblk = dquot_get_dqblk, 810 .get_dqblk = dquot_get_dqblk,
811 .set_dqblk = dquot_set_dqblk, 811 .set_dqblk = dquot_set_dqblk,
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index 7da9e2153953..1118a0dc6b45 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -81,7 +81,6 @@ static unsigned romfs_mmap_capabilities(struct file *file)
81 81
82const struct file_operations romfs_ro_fops = { 82const struct file_operations romfs_ro_fops = {
83 .llseek = generic_file_llseek, 83 .llseek = generic_file_llseek,
84 .read = new_sync_read,
85 .read_iter = generic_file_read_iter, 84 .read_iter = generic_file_read_iter,
86 .splice_read = generic_file_splice_read, 85 .splice_read = generic_file_splice_read,
87 .mmap = romfs_mmap, 86 .mmap = romfs_mmap,
diff --git a/fs/splice.c b/fs/splice.c
index 7968da96bebb..476024bb6546 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
36#include "internal.h" 35#include "internal.h"
37 36
38/* 37/*
@@ -524,6 +523,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
524 loff_t isize, left; 523 loff_t isize, left;
525 int ret; 524 int ret;
526 525
526 if (IS_DAX(in->f_mapping->host))
527 return default_file_splice_read(in, ppos, pipe, len, flags);
528
527 isize = i_size_read(in->f_mapping->host); 529 isize = i_size_read(in->f_mapping->host);
528 if (unlikely(*ppos >= isize)) 530 if (unlikely(*ppos >= isize))
529 return 0; 531 return 0;
@@ -1534,34 +1536,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1534 struct iovec iovstack[UIO_FASTIOV]; 1536 struct iovec iovstack[UIO_FASTIOV];
1535 struct iovec *iov = iovstack; 1537 struct iovec *iov = iovstack;
1536 struct iov_iter iter; 1538 struct iov_iter iter;
1537 ssize_t count;
1538 1539
1539 pipe = get_pipe_info(file); 1540 pipe = get_pipe_info(file);
1540 if (!pipe) 1541 if (!pipe)
1541 return -EBADF; 1542 return -EBADF;
1542 1543
1543 ret = rw_copy_check_uvector(READ, uiov, nr_segs, 1544 ret = import_iovec(READ, uiov, nr_segs,
1544 ARRAY_SIZE(iovstack), iovstack, &iov); 1545 ARRAY_SIZE(iovstack), &iov, &iter);
1545 if (ret <= 0) 1546 if (ret < 0)
1546 goto out; 1547 return ret;
1547
1548 count = ret;
1549 iov_iter_init(&iter, READ, iov, nr_segs, count);
1550 1548
1549 sd.total_len = iov_iter_count(&iter);
1551 sd.len = 0; 1550 sd.len = 0;
1552 sd.total_len = count;
1553 sd.flags = flags; 1551 sd.flags = flags;
1554 sd.u.data = &iter; 1552 sd.u.data = &iter;
1555 sd.pos = 0; 1553 sd.pos = 0;
1556 1554
1557 pipe_lock(pipe); 1555 if (sd.total_len) {
1558 ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1556 pipe_lock(pipe);
1559 pipe_unlock(pipe); 1557 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
1560 1558 pipe_unlock(pipe);
1561out: 1559 }
1562 if (iov != iovstack)
1563 kfree(iov);
1564 1560
1561 kfree(iov);
1565 return ret; 1562 return ret;
1566} 1563}
1567 1564
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3cef9927..19636af5e75c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
66{ 66{
67 int retval; 67 int retval;
68 68
69 retval = security_inode_getattr(path->mnt, path->dentry); 69 retval = security_inode_getattr(path);
70 if (retval) 70 if (retval)
71 return retval; 71 return retval;
72 return vfs_getattr_nosec(path, stat); 72 return vfs_getattr_nosec(path, stat);
diff --git a/fs/super.c b/fs/super.c
index 2b7dc90ccdbb..928c20f47af9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
224 s->s_maxbytes = MAX_NON_LFS; 224 s->s_maxbytes = MAX_NON_LFS;
225 s->s_op = &default_op; 225 s->s_op = &default_op;
226 s->s_time_gran = 1000000000; 226 s->s_time_gran = 1000000000;
227 s->cleancache_poolid = -1; 227 s->cleancache_poolid = CLEANCACHE_NO_POOL;
228 228
229 s->s_shrink.seeks = DEFAULT_SEEKS; 229 s->s_shrink.seeks = DEFAULT_SEEKS;
230 s->s_shrink.scan_objects = super_cache_scan; 230 s->s_shrink.scan_objects = super_cache_scan;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2554d8835b48..b400c04371f0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
41 41
42 if (grp->attrs) { 42 if (grp->attrs) {
43 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { 43 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
44 umode_t mode = 0; 44 umode_t mode = (*attr)->mode;
45 45
46 /* 46 /*
47 * In update mode, we're changing the permissions or 47 * In update mode, we're changing the permissions or
@@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
55 if (!mode) 55 if (!mode)
56 continue; 56 continue;
57 } 57 }
58
59 WARN(mode & ~(SYSFS_PREALLOC | 0664),
60 "Attribute %s: Invalid permissions 0%o\n",
61 (*attr)->name, mode);
62
63 mode &= SYSFS_PREALLOC | 0664;
58 error = sysfs_add_file_mode_ns(parent, *attr, false, 64 error = sysfs_add_file_mode_ns(parent, *attr, false,
59 (*attr)->mode | mode, 65 mode, NULL);
60 NULL);
61 if (unlikely(error)) 66 if (unlikely(error))
62 break; 67 break;
63 } 68 }
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index b00811c75b24..a48e30410ad1 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -21,9 +21,7 @@
21 */ 21 */
22const struct file_operations sysv_file_operations = { 22const struct file_operations sysv_file_operations = {
23 .llseek = generic_file_llseek, 23 .llseek = generic_file_llseek,
24 .read = new_sync_read,
25 .read_iter = generic_file_read_iter, 24 .read_iter = generic_file_read_iter,
26 .write = new_sync_write,
27 .write_iter = generic_file_write_iter, 25 .write_iter = generic_file_write_iter,
28 .mmap = generic_file_mmap, 26 .mmap = generic_file_mmap,
29 .fsync = generic_file_fsync, 27 .fsync = generic_file_fsync,
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
1tracefs-objs := inode.o
2
3obj-$(CONFIG_TRACING) += tracefs.o
4
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
1/*
2 * inode.c - part of tracefs, a pseudo file system for activating tracing
3 *
4 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
5 *
6 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * tracefs is the file system that is used by the tracing infrastructure.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/kobject.h>
20#include <linux/namei.h>
21#include <linux/tracefs.h>
22#include <linux/fsnotify.h>
23#include <linux/seq_file.h>
24#include <linux/parser.h>
25#include <linux/magic.h>
26#include <linux/slab.h>
27
28#define TRACEFS_DEFAULT_MODE 0700
29
30static struct vfsmount *tracefs_mount;
31static int tracefs_mount_count;
32static bool tracefs_registered;
33
34static ssize_t default_read_file(struct file *file, char __user *buf,
35 size_t count, loff_t *ppos)
36{
37 return 0;
38}
39
40static ssize_t default_write_file(struct file *file, const char __user *buf,
41 size_t count, loff_t *ppos)
42{
43 return count;
44}
45
46static const struct file_operations tracefs_file_operations = {
47 .read = default_read_file,
48 .write = default_write_file,
49 .open = simple_open,
50 .llseek = noop_llseek,
51};
52
53static struct tracefs_dir_ops {
54 int (*mkdir)(const char *name);
55 int (*rmdir)(const char *name);
56} tracefs_ops;
57
58static char *get_dname(struct dentry *dentry)
59{
60 const char *dname;
61 char *name;
62 int len = dentry->d_name.len;
63
64 dname = dentry->d_name.name;
65 name = kmalloc(len + 1, GFP_KERNEL);
66 if (!name)
67 return NULL;
68 memcpy(name, dname, len);
69 name[len] = 0;
70 return name;
71}
72
73static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
74{
75 char *name;
76 int ret;
77
78 name = get_dname(dentry);
79 if (!name)
80 return -ENOMEM;
81
82 /*
83 * The mkdir call can call the generic functions that create
84 * the files within the tracefs system. It is up to the individual
85 * mkdir routine to handle races.
86 */
87 mutex_unlock(&inode->i_mutex);
88 ret = tracefs_ops.mkdir(name);
89 mutex_lock(&inode->i_mutex);
90
91 kfree(name);
92
93 return ret;
94}
95
96static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
97{
98 char *name;
99 int ret;
100
101 name = get_dname(dentry);
102 if (!name)
103 return -ENOMEM;
104
105 /*
106 * The rmdir call can call the generic functions that create
107 * the files within the tracefs system. It is up to the individual
108 * rmdir routine to handle races.
109 * This time we need to unlock not only the parent (inode) but
110 * also the directory that is being deleted.
111 */
112 mutex_unlock(&inode->i_mutex);
113 mutex_unlock(&dentry->d_inode->i_mutex);
114
115 ret = tracefs_ops.rmdir(name);
116
117 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
118 mutex_lock(&dentry->d_inode->i_mutex);
119
120 kfree(name);
121
122 return ret;
123}
124
125static const struct inode_operations tracefs_dir_inode_operations = {
126 .lookup = simple_lookup,
127 .mkdir = tracefs_syscall_mkdir,
128 .rmdir = tracefs_syscall_rmdir,
129};
130
131static struct inode *tracefs_get_inode(struct super_block *sb)
132{
133 struct inode *inode = new_inode(sb);
134 if (inode) {
135 inode->i_ino = get_next_ino();
136 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
137 }
138 return inode;
139}
140
141struct tracefs_mount_opts {
142 kuid_t uid;
143 kgid_t gid;
144 umode_t mode;
145};
146
147enum {
148 Opt_uid,
149 Opt_gid,
150 Opt_mode,
151 Opt_err
152};
153
154static const match_table_t tokens = {
155 {Opt_uid, "uid=%u"},
156 {Opt_gid, "gid=%u"},
157 {Opt_mode, "mode=%o"},
158 {Opt_err, NULL}
159};
160
161struct tracefs_fs_info {
162 struct tracefs_mount_opts mount_opts;
163};
164
165static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
166{
167 substring_t args[MAX_OPT_ARGS];
168 int option;
169 int token;
170 kuid_t uid;
171 kgid_t gid;
172 char *p;
173
174 opts->mode = TRACEFS_DEFAULT_MODE;
175
176 while ((p = strsep(&data, ",")) != NULL) {
177 if (!*p)
178 continue;
179
180 token = match_token(p, tokens, args);
181 switch (token) {
182 case Opt_uid:
183 if (match_int(&args[0], &option))
184 return -EINVAL;
185 uid = make_kuid(current_user_ns(), option);
186 if (!uid_valid(uid))
187 return -EINVAL;
188 opts->uid = uid;
189 break;
190 case Opt_gid:
191 if (match_int(&args[0], &option))
192 return -EINVAL;
193 gid = make_kgid(current_user_ns(), option);
194 if (!gid_valid(gid))
195 return -EINVAL;
196 opts->gid = gid;
197 break;
198 case Opt_mode:
199 if (match_octal(&args[0], &option))
200 return -EINVAL;
201 opts->mode = option & S_IALLUGO;
202 break;
203 /*
204 * We might like to report bad mount options here;
205 * but traditionally tracefs has ignored all mount options
206 */
207 }
208 }
209
210 return 0;
211}
212
213static int tracefs_apply_options(struct super_block *sb)
214{
215 struct tracefs_fs_info *fsi = sb->s_fs_info;
216 struct inode *inode = sb->s_root->d_inode;
217 struct tracefs_mount_opts *opts = &fsi->mount_opts;
218
219 inode->i_mode &= ~S_IALLUGO;
220 inode->i_mode |= opts->mode;
221
222 inode->i_uid = opts->uid;
223 inode->i_gid = opts->gid;
224
225 return 0;
226}
227
228static int tracefs_remount(struct super_block *sb, int *flags, char *data)
229{
230 int err;
231 struct tracefs_fs_info *fsi = sb->s_fs_info;
232
233 sync_filesystem(sb);
234 err = tracefs_parse_options(data, &fsi->mount_opts);
235 if (err)
236 goto fail;
237
238 tracefs_apply_options(sb);
239
240fail:
241 return err;
242}
243
244static int tracefs_show_options(struct seq_file *m, struct dentry *root)
245{
246 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
247 struct tracefs_mount_opts *opts = &fsi->mount_opts;
248
249 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
250 seq_printf(m, ",uid=%u",
251 from_kuid_munged(&init_user_ns, opts->uid));
252 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
253 seq_printf(m, ",gid=%u",
254 from_kgid_munged(&init_user_ns, opts->gid));
255 if (opts->mode != TRACEFS_DEFAULT_MODE)
256 seq_printf(m, ",mode=%o", opts->mode);
257
258 return 0;
259}
260
261static const struct super_operations tracefs_super_operations = {
262 .statfs = simple_statfs,
263 .remount_fs = tracefs_remount,
264 .show_options = tracefs_show_options,
265};
266
267static int trace_fill_super(struct super_block *sb, void *data, int silent)
268{
269 static struct tree_descr trace_files[] = {{""}};
270 struct tracefs_fs_info *fsi;
271 int err;
272
273 save_mount_options(sb, data);
274
275 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
276 sb->s_fs_info = fsi;
277 if (!fsi) {
278 err = -ENOMEM;
279 goto fail;
280 }
281
282 err = tracefs_parse_options(data, &fsi->mount_opts);
283 if (err)
284 goto fail;
285
286 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
287 if (err)
288 goto fail;
289
290 sb->s_op = &tracefs_super_operations;
291
292 tracefs_apply_options(sb);
293
294 return 0;
295
296fail:
297 kfree(fsi);
298 sb->s_fs_info = NULL;
299 return err;
300}
301
302static struct dentry *trace_mount(struct file_system_type *fs_type,
303 int flags, const char *dev_name,
304 void *data)
305{
306 return mount_single(fs_type, flags, data, trace_fill_super);
307}
308
309static struct file_system_type trace_fs_type = {
310 .owner = THIS_MODULE,
311 .name = "tracefs",
312 .mount = trace_mount,
313 .kill_sb = kill_litter_super,
314};
315MODULE_ALIAS_FS("tracefs");
316
317static struct dentry *start_creating(const char *name, struct dentry *parent)
318{
319 struct dentry *dentry;
320 int error;
321
322 pr_debug("tracefs: creating file '%s'\n",name);
323
324 error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
325 &tracefs_mount_count);
326 if (error)
327 return ERR_PTR(error);
328
329 /* If the parent is not specified, we create it in the root.
330 * We need the root dentry to do this, which is in the super
331 * block. A pointer to that is in the struct vfsmount that we
332 * have around.
333 */
334 if (!parent)
335 parent = tracefs_mount->mnt_root;
336
337 mutex_lock(&parent->d_inode->i_mutex);
338 dentry = lookup_one_len(name, parent, strlen(name));
339 if (!IS_ERR(dentry) && dentry->d_inode) {
340 dput(dentry);
341 dentry = ERR_PTR(-EEXIST);
342 }
343 if (IS_ERR(dentry))
344 mutex_unlock(&parent->d_inode->i_mutex);
345 return dentry;
346}
347
348static struct dentry *failed_creating(struct dentry *dentry)
349{
350 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
351 dput(dentry);
352 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
353 return NULL;
354}
355
356static struct dentry *end_creating(struct dentry *dentry)
357{
358 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
359 return dentry;
360}
361
362/**
363 * tracefs_create_file - create a file in the tracefs filesystem
364 * @name: a pointer to a string containing the name of the file to create.
365 * @mode: the permission that the file should have.
366 * @parent: a pointer to the parent dentry for this file. This should be a
367 * directory dentry if set. If this parameter is NULL, then the
368 * file will be created in the root of the tracefs filesystem.
369 * @data: a pointer to something that the caller will want to get to later
370 * on. The inode.i_private pointer will point to this value on
371 * the open() call.
372 * @fops: a pointer to a struct file_operations that should be used for
373 * this file.
374 *
375 * This is the basic "create a file" function for tracefs. It allows for a
376 * wide range of flexibility in creating a file, or a directory (if you want
377 * to create a directory, the tracefs_create_dir() function is
378 * recommended to be used instead.)
379 *
380 * This function will return a pointer to a dentry if it succeeds. This
381 * pointer must be passed to the tracefs_remove() function when the file is
382 * to be removed (no automatic cleanup happens if your module is unloaded,
383 * you are responsible here.) If an error occurs, %NULL will be returned.
384 *
385 * If tracefs is not enabled in the kernel, the value -%ENODEV will be
386 * returned.
387 */
388struct dentry *tracefs_create_file(const char *name, umode_t mode,
389 struct dentry *parent, void *data,
390 const struct file_operations *fops)
391{
392 struct dentry *dentry;
393 struct inode *inode;
394
395 if (!(mode & S_IFMT))
396 mode |= S_IFREG;
397 BUG_ON(!S_ISREG(mode));
398 dentry = start_creating(name, parent);
399
400 if (IS_ERR(dentry))
401 return NULL;
402
403 inode = tracefs_get_inode(dentry->d_sb);
404 if (unlikely(!inode))
405 return failed_creating(dentry);
406
407 inode->i_mode = mode;
408 inode->i_fop = fops ? fops : &tracefs_file_operations;
409 inode->i_private = data;
410 d_instantiate(dentry, inode);
411 fsnotify_create(dentry->d_parent->d_inode, dentry);
412 return end_creating(dentry);
413}
414
415static struct dentry *__create_dir(const char *name, struct dentry *parent,
416 const struct inode_operations *ops)
417{
418 struct dentry *dentry = start_creating(name, parent);
419 struct inode *inode;
420
421 if (IS_ERR(dentry))
422 return NULL;
423
424 inode = tracefs_get_inode(dentry->d_sb);
425 if (unlikely(!inode))
426 return failed_creating(dentry);
427
428 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
429 inode->i_op = ops;
430 inode->i_fop = &simple_dir_operations;
431
432 /* directory inodes start off with i_nlink == 2 (for "." entry) */
433 inc_nlink(inode);
434 d_instantiate(dentry, inode);
435 inc_nlink(dentry->d_parent->d_inode);
436 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
437 return end_creating(dentry);
438}
439
440/**
441 * tracefs_create_dir - create a directory in the tracefs filesystem
442 * @name: a pointer to a string containing the name of the directory to
443 * create.
444 * @parent: a pointer to the parent dentry for this file. This should be a
445 * directory dentry if set. If this parameter is NULL, then the
446 * directory will be created in the root of the tracefs filesystem.
447 *
448 * This function creates a directory in tracefs with the given name.
449 *
450 * This function will return a pointer to a dentry if it succeeds. This
451 * pointer must be passed to the tracefs_remove() function when the file is
452 * to be removed. If an error occurs, %NULL will be returned.
453 *
454 * If tracing is not enabled in the kernel, the value -%ENODEV will be
455 * returned.
456 */
457struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
458{
459 return __create_dir(name, parent, &simple_dir_inode_operations);
460}
461
462/**
463 * tracefs_create_instance_dir - create the tracing instances directory
464 * @name: The name of the instances directory to create
465 * @parent: The parent directory that the instances directory will exist
466 * @mkdir: The function to call when a mkdir is performed.
467 * @rmdir: The function to call when a rmdir is performed.
468 *
469 * Only one instances directory is allowed.
470 *
471 * The instances directory is special as it allows for mkdir and rmdir to
472 * to be done by userspace. When a mkdir or rmdir is performed, the inode
473 * locks are released and the methhods passed in (@mkdir and @rmdir) are
474 * called without locks and with the name of the directory being created
475 * within the instances directory.
476 *
477 * Returns the dentry of the instances directory.
478 */
479struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
480 int (*mkdir)(const char *name),
481 int (*rmdir)(const char *name))
482{
483 struct dentry *dentry;
484
485 /* Only allow one instance of the instances directory. */
486 if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
487 return NULL;
488
489 dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
490 if (!dentry)
491 return NULL;
492
493 tracefs_ops.mkdir = mkdir;
494 tracefs_ops.rmdir = rmdir;
495
496 return dentry;
497}
498
499static inline int tracefs_positive(struct dentry *dentry)
500{
501 return dentry->d_inode && !d_unhashed(dentry);
502}
503
504static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
505{
506 int ret = 0;
507
508 if (tracefs_positive(dentry)) {
509 if (dentry->d_inode) {
510 dget(dentry);
511 switch (dentry->d_inode->i_mode & S_IFMT) {
512 case S_IFDIR:
513 ret = simple_rmdir(parent->d_inode, dentry);
514 break;
515 default:
516 simple_unlink(parent->d_inode, dentry);
517 break;
518 }
519 if (!ret)
520 d_delete(dentry);
521 dput(dentry);
522 }
523 }
524 return ret;
525}
526
527/**
528 * tracefs_remove - removes a file or directory from the tracefs filesystem
529 * @dentry: a pointer to a the dentry of the file or directory to be
530 * removed.
531 *
532 * This function removes a file or directory in tracefs that was previously
533 * created with a call to another tracefs function (like
534 * tracefs_create_file() or variants thereof.)
535 */
536void tracefs_remove(struct dentry *dentry)
537{
538 struct dentry *parent;
539 int ret;
540
541 if (IS_ERR_OR_NULL(dentry))
542 return;
543
544 parent = dentry->d_parent;
545 if (!parent || !parent->d_inode)
546 return;
547
548 mutex_lock(&parent->d_inode->i_mutex);
549 ret = __tracefs_remove(dentry, parent);
550 mutex_unlock(&parent->d_inode->i_mutex);
551 if (!ret)
552 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
553}
554
555/**
556 * tracefs_remove_recursive - recursively removes a directory
557 * @dentry: a pointer to a the dentry of the directory to be removed.
558 *
559 * This function recursively removes a directory tree in tracefs that
560 * was previously created with a call to another tracefs function
561 * (like tracefs_create_file() or variants thereof.)
562 */
563void tracefs_remove_recursive(struct dentry *dentry)
564{
565 struct dentry *child, *parent;
566
567 if (IS_ERR_OR_NULL(dentry))
568 return;
569
570 parent = dentry->d_parent;
571 if (!parent || !parent->d_inode)
572 return;
573
574 parent = dentry;
575 down:
576 mutex_lock(&parent->d_inode->i_mutex);
577 loop:
578 /*
579 * The parent->d_subdirs is protected by the d_lock. Outside that
580 * lock, the child can be unlinked and set to be freed which can
581 * use the d_u.d_child as the rcu head and corrupt this list.
582 */
583 spin_lock(&parent->d_lock);
584 list_for_each_entry(child, &parent->d_subdirs, d_child) {
585 if (!tracefs_positive(child))
586 continue;
587
588 /* perhaps simple_empty(child) makes more sense */
589 if (!list_empty(&child->d_subdirs)) {
590 spin_unlock(&parent->d_lock);
591 mutex_unlock(&parent->d_inode->i_mutex);
592 parent = child;
593 goto down;
594 }
595
596 spin_unlock(&parent->d_lock);
597
598 if (!__tracefs_remove(child, parent))
599 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
600
601 /*
602 * The parent->d_lock protects agaist child from unlinking
603 * from d_subdirs. When releasing the parent->d_lock we can
604 * no longer trust that the next pointer is valid.
605 * Restart the loop. We'll skip this one with the
606 * tracefs_positive() check.
607 */
608 goto loop;
609 }
610 spin_unlock(&parent->d_lock);
611
612 mutex_unlock(&parent->d_inode->i_mutex);
613 child = parent;
614 parent = parent->d_parent;
615 mutex_lock(&parent->d_inode->i_mutex);
616
617 if (child != dentry)
618 /* go up */
619 goto loop;
620
621 if (!__tracefs_remove(child, parent))
622 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
623 mutex_unlock(&parent->d_inode->i_mutex);
624}
625
626/**
627 * tracefs_initialized - Tells whether tracefs has been registered
628 */
629bool tracefs_initialized(void)
630{
631 return tracefs_registered;
632}
633
634static struct kobject *trace_kobj;
635
636static int __init tracefs_init(void)
637{
638 int retval;
639
640 trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
641 if (!trace_kobj)
642 return -EINVAL;
643
644 retval = register_filesystem(&trace_fs_type);
645 if (!retval)
646 tracefs_registered = true;
647
648 return retval;
649}
650core_initcall(tracefs_init);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eb997e9c4ab0..11a11b32a2a9 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -509,7 +509,7 @@ again:
509 c->bi.nospace_rp = 1; 509 c->bi.nospace_rp = 1;
510 smp_wmb(); 510 smp_wmb();
511 } else 511 } else
512 ubifs_err("cannot budget space, error %d", err); 512 ubifs_err(c, "cannot budget space, error %d", err);
513 return err; 513 return err;
514} 514}
515 515
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 26b69b2d4a45..63f56619991d 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -225,7 +225,7 @@ out_cancel:
225out_up: 225out_up:
226 up_write(&c->commit_sem); 226 up_write(&c->commit_sem);
227out: 227out:
228 ubifs_err("commit failed, error %d", err); 228 ubifs_err(c, "commit failed, error %d", err);
229 spin_lock(&c->cs_lock); 229 spin_lock(&c->cs_lock);
230 c->cmt_state = COMMIT_BROKEN; 230 c->cmt_state = COMMIT_BROKEN;
231 wake_up(&c->cmt_wq); 231 wake_up(&c->cmt_wq);
@@ -289,7 +289,7 @@ int ubifs_bg_thread(void *info)
289 int err; 289 int err;
290 struct ubifs_info *c = info; 290 struct ubifs_info *c = info;
291 291
292 ubifs_msg("background thread \"%s\" started, PID %d", 292 ubifs_msg(c, "background thread \"%s\" started, PID %d",
293 c->bgt_name, current->pid); 293 c->bgt_name, current->pid);
294 set_freezable(); 294 set_freezable();
295 295
@@ -324,7 +324,7 @@ int ubifs_bg_thread(void *info)
324 cond_resched(); 324 cond_resched();
325 } 325 }
326 326
327 ubifs_msg("background thread \"%s\" stops", c->bgt_name); 327 ubifs_msg(c, "background thread \"%s\" stops", c->bgt_name);
328 return 0; 328 return 0;
329} 329}
330 330
@@ -712,13 +712,13 @@ out:
712 return 0; 712 return 0;
713 713
714out_dump: 714out_dump:
715 ubifs_err("dumping index node (iip=%d)", i->iip); 715 ubifs_err(c, "dumping index node (iip=%d)", i->iip);
716 ubifs_dump_node(c, idx); 716 ubifs_dump_node(c, idx);
717 list_del(&i->list); 717 list_del(&i->list);
718 kfree(i); 718 kfree(i);
719 if (!list_empty(&list)) { 719 if (!list_empty(&list)) {
720 i = list_entry(list.prev, struct idx_node, list); 720 i = list_entry(list.prev, struct idx_node, list);
721 ubifs_err("dumping parent index node"); 721 ubifs_err(c, "dumping parent index node");
722 ubifs_dump_node(c, &i->idx); 722 ubifs_dump_node(c, &i->idx);
723 } 723 }
724out_free: 724out_free:
@@ -727,7 +727,7 @@ out_free:
727 list_del(&i->list); 727 list_del(&i->list);
728 kfree(i); 728 kfree(i);
729 } 729 }
730 ubifs_err("failed, error %d", err); 730 ubifs_err(c, "failed, error %d", err);
731 if (err > 0) 731 if (err > 0)
732 err = -EINVAL; 732 err = -EINVAL;
733 return err; 733 return err;
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 2bfa0953335d..565cb56d7225 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -92,8 +92,8 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
92 * Note, if the input buffer was not compressed, it is copied to the output 92 * Note, if the input buffer was not compressed, it is copied to the output
93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. 93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
94 */ 94 */
95void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 95void ubifs_compress(const struct ubifs_info *c, const void *in_buf,
96 int *compr_type) 96 int in_len, void *out_buf, int *out_len, int *compr_type)
97{ 97{
98 int err; 98 int err;
99 struct ubifs_compressor *compr = ubifs_compressors[*compr_type]; 99 struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
@@ -112,9 +112,9 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
112 if (compr->comp_mutex) 112 if (compr->comp_mutex)
113 mutex_unlock(compr->comp_mutex); 113 mutex_unlock(compr->comp_mutex);
114 if (unlikely(err)) { 114 if (unlikely(err)) {
115 ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed", 115 ubifs_warn(c, "cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
116 in_len, compr->name, err); 116 in_len, compr->name, err);
117 goto no_compr; 117 goto no_compr;
118 } 118 }
119 119
120 /* 120 /*
@@ -144,21 +144,21 @@ no_compr:
144 * The length of the uncompressed data is returned in @out_len. This functions 144 * The length of the uncompressed data is returned in @out_len. This functions
145 * returns %0 on success or a negative error code on failure. 145 * returns %0 on success or a negative error code on failure.
146 */ 146 */
147int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, 147int ubifs_decompress(const struct ubifs_info *c, const void *in_buf,
148 int *out_len, int compr_type) 148 int in_len, void *out_buf, int *out_len, int compr_type)
149{ 149{
150 int err; 150 int err;
151 struct ubifs_compressor *compr; 151 struct ubifs_compressor *compr;
152 152
153 if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { 153 if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
154 ubifs_err("invalid compression type %d", compr_type); 154 ubifs_err(c, "invalid compression type %d", compr_type);
155 return -EINVAL; 155 return -EINVAL;
156 } 156 }
157 157
158 compr = ubifs_compressors[compr_type]; 158 compr = ubifs_compressors[compr_type];
159 159
160 if (unlikely(!compr->capi_name)) { 160 if (unlikely(!compr->capi_name)) {
161 ubifs_err("%s compression is not compiled in", compr->name); 161 ubifs_err(c, "%s compression is not compiled in", compr->name);
162 return -EINVAL; 162 return -EINVAL;
163 } 163 }
164 164
@@ -175,7 +175,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
175 if (compr->decomp_mutex) 175 if (compr->decomp_mutex)
176 mutex_unlock(compr->decomp_mutex); 176 mutex_unlock(compr->decomp_mutex);
177 if (err) 177 if (err)
178 ubifs_err("cannot decompress %d bytes, compressor %s, error %d", 178 ubifs_err(c, "cannot decompress %d bytes, compressor %s, error %d",
179 in_len, compr->name, err); 179 in_len, compr->name, err);
180 180
181 return err; 181 return err;
@@ -193,8 +193,8 @@ static int __init compr_init(struct ubifs_compressor *compr)
193 if (compr->capi_name) { 193 if (compr->capi_name) {
194 compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0); 194 compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
195 if (IS_ERR(compr->cc)) { 195 if (IS_ERR(compr->cc)) {
196 ubifs_err("cannot initialize compressor %s, error %ld", 196 pr_err("UBIFS error (pid %d): cannot initialize compressor %s, error %ld",
197 compr->name, PTR_ERR(compr->cc)); 197 current->pid, compr->name, PTR_ERR(compr->cc));
198 return PTR_ERR(compr->cc); 198 return PTR_ERR(compr->cc);
199 } 199 }
200 } 200 }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4cfb3e82c56f..4c46a9865fa7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -746,7 +746,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
746 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { 746 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
747 err = ubifs_read_one_lp(c, lnum, &lp); 747 err = ubifs_read_one_lp(c, lnum, &lp);
748 if (err) { 748 if (err) {
749 ubifs_err("cannot read lprops for LEB %d", lnum); 749 ubifs_err(c, "cannot read lprops for LEB %d", lnum);
750 continue; 750 continue;
751 } 751 }
752 752
@@ -819,13 +819,13 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
819 819
820 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 820 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
821 if (!buf) { 821 if (!buf) {
822 ubifs_err("cannot allocate memory for dumping LEB %d", lnum); 822 ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum);
823 return; 823 return;
824 } 824 }
825 825
826 sleb = ubifs_scan(c, lnum, 0, buf, 0); 826 sleb = ubifs_scan(c, lnum, 0, buf, 0);
827 if (IS_ERR(sleb)) { 827 if (IS_ERR(sleb)) {
828 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 828 ubifs_err(c, "scan error %d", (int)PTR_ERR(sleb));
829 goto out; 829 goto out;
830 } 830 }
831 831
@@ -1032,7 +1032,7 @@ int dbg_check_space_info(struct ubifs_info *c)
1032 spin_unlock(&c->space_lock); 1032 spin_unlock(&c->space_lock);
1033 1033
1034 if (free != d->saved_free) { 1034 if (free != d->saved_free) {
1035 ubifs_err("free space changed from %lld to %lld", 1035 ubifs_err(c, "free space changed from %lld to %lld",
1036 d->saved_free, free); 1036 d->saved_free, free);
1037 goto out; 1037 goto out;
1038 } 1038 }
@@ -1040,15 +1040,15 @@ int dbg_check_space_info(struct ubifs_info *c)
1040 return 0; 1040 return 0;
1041 1041
1042out: 1042out:
1043 ubifs_msg("saved lprops statistics dump"); 1043 ubifs_msg(c, "saved lprops statistics dump");
1044 ubifs_dump_lstats(&d->saved_lst); 1044 ubifs_dump_lstats(&d->saved_lst);
1045 ubifs_msg("saved budgeting info dump"); 1045 ubifs_msg(c, "saved budgeting info dump");
1046 ubifs_dump_budg(c, &d->saved_bi); 1046 ubifs_dump_budg(c, &d->saved_bi);
1047 ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); 1047 ubifs_msg(c, "saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
1048 ubifs_msg("current lprops statistics dump"); 1048 ubifs_msg(c, "current lprops statistics dump");
1049 ubifs_get_lp_stats(c, &lst); 1049 ubifs_get_lp_stats(c, &lst);
1050 ubifs_dump_lstats(&lst); 1050 ubifs_dump_lstats(&lst);
1051 ubifs_msg("current budgeting info dump"); 1051 ubifs_msg(c, "current budgeting info dump");
1052 ubifs_dump_budg(c, &c->bi); 1052 ubifs_dump_budg(c, &c->bi);
1053 dump_stack(); 1053 dump_stack();
1054 return -EINVAL; 1054 return -EINVAL;
@@ -1077,9 +1077,9 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
1077 mutex_lock(&ui->ui_mutex); 1077 mutex_lock(&ui->ui_mutex);
1078 spin_lock(&ui->ui_lock); 1078 spin_lock(&ui->ui_lock);
1079 if (ui->ui_size != ui->synced_i_size && !ui->dirty) { 1079 if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
1080 ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean", 1080 ubifs_err(c, "ui_size is %lld, synced_i_size is %lld, but inode is clean",
1081 ui->ui_size, ui->synced_i_size); 1081 ui->ui_size, ui->synced_i_size);
1082 ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino, 1082 ubifs_err(c, "i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
1083 inode->i_mode, i_size_read(inode)); 1083 inode->i_mode, i_size_read(inode));
1084 dump_stack(); 1084 dump_stack();
1085 err = -EINVAL; 1085 err = -EINVAL;
@@ -1140,7 +1140,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
1140 kfree(pdent); 1140 kfree(pdent);
1141 1141
1142 if (i_size_read(dir) != size) { 1142 if (i_size_read(dir) != size) {
1143 ubifs_err("directory inode %lu has size %llu, but calculated size is %llu", 1143 ubifs_err(c, "directory inode %lu has size %llu, but calculated size is %llu",
1144 dir->i_ino, (unsigned long long)i_size_read(dir), 1144 dir->i_ino, (unsigned long long)i_size_read(dir),
1145 (unsigned long long)size); 1145 (unsigned long long)size);
1146 ubifs_dump_inode(c, dir); 1146 ubifs_dump_inode(c, dir);
@@ -1148,7 +1148,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
1148 return -EINVAL; 1148 return -EINVAL;
1149 } 1149 }
1150 if (dir->i_nlink != nlink) { 1150 if (dir->i_nlink != nlink) {
1151 ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u", 1151 ubifs_err(c, "directory inode %lu has nlink %u, but calculated nlink is %u",
1152 dir->i_ino, dir->i_nlink, nlink); 1152 dir->i_ino, dir->i_nlink, nlink);
1153 ubifs_dump_inode(c, dir); 1153 ubifs_dump_inode(c, dir);
1154 dump_stack(); 1154 dump_stack();
@@ -1207,10 +1207,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1207 err = 1; 1207 err = 1;
1208 key_read(c, &dent1->key, &key); 1208 key_read(c, &dent1->key, &key);
1209 if (keys_cmp(c, &zbr1->key, &key)) { 1209 if (keys_cmp(c, &zbr1->key, &key)) {
1210 ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum, 1210 ubifs_err(c, "1st entry at %d:%d has key %s", zbr1->lnum,
1211 zbr1->offs, dbg_snprintf_key(c, &key, key_buf, 1211 zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
1212 DBG_KEY_BUF_LEN)); 1212 DBG_KEY_BUF_LEN));
1213 ubifs_err("but it should have key %s according to tnc", 1213 ubifs_err(c, "but it should have key %s according to tnc",
1214 dbg_snprintf_key(c, &zbr1->key, key_buf, 1214 dbg_snprintf_key(c, &zbr1->key, key_buf,
1215 DBG_KEY_BUF_LEN)); 1215 DBG_KEY_BUF_LEN));
1216 ubifs_dump_node(c, dent1); 1216 ubifs_dump_node(c, dent1);
@@ -1219,10 +1219,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1219 1219
1220 key_read(c, &dent2->key, &key); 1220 key_read(c, &dent2->key, &key);
1221 if (keys_cmp(c, &zbr2->key, &key)) { 1221 if (keys_cmp(c, &zbr2->key, &key)) {
1222 ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum, 1222 ubifs_err(c, "2nd entry at %d:%d has key %s", zbr1->lnum,
1223 zbr1->offs, dbg_snprintf_key(c, &key, key_buf, 1223 zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
1224 DBG_KEY_BUF_LEN)); 1224 DBG_KEY_BUF_LEN));
1225 ubifs_err("but it should have key %s according to tnc", 1225 ubifs_err(c, "but it should have key %s according to tnc",
1226 dbg_snprintf_key(c, &zbr2->key, key_buf, 1226 dbg_snprintf_key(c, &zbr2->key, key_buf,
1227 DBG_KEY_BUF_LEN)); 1227 DBG_KEY_BUF_LEN));
1228 ubifs_dump_node(c, dent2); 1228 ubifs_dump_node(c, dent2);
@@ -1238,14 +1238,14 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
1238 goto out_free; 1238 goto out_free;
1239 } 1239 }
1240 if (cmp == 0 && nlen1 == nlen2) 1240 if (cmp == 0 && nlen1 == nlen2)
1241 ubifs_err("2 xent/dent nodes with the same name"); 1241 ubifs_err(c, "2 xent/dent nodes with the same name");
1242 else 1242 else
1243 ubifs_err("bad order of colliding key %s", 1243 ubifs_err(c, "bad order of colliding key %s",
1244 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); 1244 dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
1245 1245
1246 ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); 1246 ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs);
1247 ubifs_dump_node(c, dent1); 1247 ubifs_dump_node(c, dent1);
1248 ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); 1248 ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs);
1249 ubifs_dump_node(c, dent2); 1249 ubifs_dump_node(c, dent2);
1250 1250
1251out_free: 1251out_free:
@@ -1447,11 +1447,11 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
1447 return 0; 1447 return 0;
1448 1448
1449out: 1449out:
1450 ubifs_err("failed, error %d", err); 1450 ubifs_err(c, "failed, error %d", err);
1451 ubifs_msg("dump of the znode"); 1451 ubifs_msg(c, "dump of the znode");
1452 ubifs_dump_znode(c, znode); 1452 ubifs_dump_znode(c, znode);
1453 if (zp) { 1453 if (zp) {
1454 ubifs_msg("dump of the parent znode"); 1454 ubifs_msg(c, "dump of the parent znode");
1455 ubifs_dump_znode(c, zp); 1455 ubifs_dump_znode(c, zp);
1456 } 1456 }
1457 dump_stack(); 1457 dump_stack();
@@ -1518,9 +1518,9 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
1518 if (err < 0) 1518 if (err < 0)
1519 return err; 1519 return err;
1520 if (err) { 1520 if (err) {
1521 ubifs_msg("first znode"); 1521 ubifs_msg(c, "first znode");
1522 ubifs_dump_znode(c, prev); 1522 ubifs_dump_znode(c, prev);
1523 ubifs_msg("second znode"); 1523 ubifs_msg(c, "second znode");
1524 ubifs_dump_znode(c, znode); 1524 ubifs_dump_znode(c, znode);
1525 return -EINVAL; 1525 return -EINVAL;
1526 } 1526 }
@@ -1529,13 +1529,13 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
1529 1529
1530 if (extra) { 1530 if (extra) {
1531 if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) { 1531 if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) {
1532 ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld", 1532 ubifs_err(c, "incorrect clean_zn_cnt %ld, calculated %ld",
1533 atomic_long_read(&c->clean_zn_cnt), 1533 atomic_long_read(&c->clean_zn_cnt),
1534 clean_cnt); 1534 clean_cnt);
1535 return -EINVAL; 1535 return -EINVAL;
1536 } 1536 }
1537 if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) { 1537 if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) {
1538 ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld", 1538 ubifs_err(c, "incorrect dirty_zn_cnt %ld, calculated %ld",
1539 atomic_long_read(&c->dirty_zn_cnt), 1539 atomic_long_read(&c->dirty_zn_cnt),
1540 dirty_cnt); 1540 dirty_cnt);
1541 return -EINVAL; 1541 return -EINVAL;
@@ -1608,7 +1608,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1608 if (znode_cb) { 1608 if (znode_cb) {
1609 err = znode_cb(c, znode, priv); 1609 err = znode_cb(c, znode, priv);
1610 if (err) { 1610 if (err) {
1611 ubifs_err("znode checking function returned error %d", 1611 ubifs_err(c, "znode checking function returned error %d",
1612 err); 1612 err);
1613 ubifs_dump_znode(c, znode); 1613 ubifs_dump_znode(c, znode);
1614 goto out_dump; 1614 goto out_dump;
@@ -1619,7 +1619,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1619 zbr = &znode->zbranch[idx]; 1619 zbr = &znode->zbranch[idx];
1620 err = leaf_cb(c, zbr, priv); 1620 err = leaf_cb(c, zbr, priv);
1621 if (err) { 1621 if (err) {
1622 ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d", 1622 ubifs_err(c, "leaf checking function returned error %d, for leaf at LEB %d:%d",
1623 err, zbr->lnum, zbr->offs); 1623 err, zbr->lnum, zbr->offs);
1624 goto out_dump; 1624 goto out_dump;
1625 } 1625 }
@@ -1675,7 +1675,7 @@ out_dump:
1675 zbr = &znode->parent->zbranch[znode->iip]; 1675 zbr = &znode->parent->zbranch[znode->iip];
1676 else 1676 else
1677 zbr = &c->zroot; 1677 zbr = &c->zroot;
1678 ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs); 1678 ubifs_msg(c, "dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
1679 ubifs_dump_znode(c, znode); 1679 ubifs_dump_znode(c, znode);
1680out_unlock: 1680out_unlock:
1681 mutex_unlock(&c->tnc_mutex); 1681 mutex_unlock(&c->tnc_mutex);
@@ -1722,12 +1722,12 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
1722 1722
1723 err = dbg_walk_index(c, NULL, add_size, &calc); 1723 err = dbg_walk_index(c, NULL, add_size, &calc);
1724 if (err) { 1724 if (err) {
1725 ubifs_err("error %d while walking the index", err); 1725 ubifs_err(c, "error %d while walking the index", err);
1726 return err; 1726 return err;
1727 } 1727 }
1728 1728
1729 if (calc != idx_size) { 1729 if (calc != idx_size) {
1730 ubifs_err("index size check failed: calculated size is %lld, should be %lld", 1730 ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld",
1731 calc, idx_size); 1731 calc, idx_size);
1732 dump_stack(); 1732 dump_stack();
1733 return -EINVAL; 1733 return -EINVAL;
@@ -1814,7 +1814,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1814 } 1814 }
1815 1815
1816 if (inum > c->highest_inum) { 1816 if (inum > c->highest_inum) {
1817 ubifs_err("too high inode number, max. is %lu", 1817 ubifs_err(c, "too high inode number, max. is %lu",
1818 (unsigned long)c->highest_inum); 1818 (unsigned long)c->highest_inum);
1819 return ERR_PTR(-EINVAL); 1819 return ERR_PTR(-EINVAL);
1820 } 1820 }
@@ -1921,17 +1921,17 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1921 ino_key_init(c, &key, inum); 1921 ino_key_init(c, &key, inum);
1922 err = ubifs_lookup_level0(c, &key, &znode, &n); 1922 err = ubifs_lookup_level0(c, &key, &znode, &n);
1923 if (!err) { 1923 if (!err) {
1924 ubifs_err("inode %lu not found in index", (unsigned long)inum); 1924 ubifs_err(c, "inode %lu not found in index", (unsigned long)inum);
1925 return ERR_PTR(-ENOENT); 1925 return ERR_PTR(-ENOENT);
1926 } else if (err < 0) { 1926 } else if (err < 0) {
1927 ubifs_err("error %d while looking up inode %lu", 1927 ubifs_err(c, "error %d while looking up inode %lu",
1928 err, (unsigned long)inum); 1928 err, (unsigned long)inum);
1929 return ERR_PTR(err); 1929 return ERR_PTR(err);
1930 } 1930 }
1931 1931
1932 zbr = &znode->zbranch[n]; 1932 zbr = &znode->zbranch[n];
1933 if (zbr->len < UBIFS_INO_NODE_SZ) { 1933 if (zbr->len < UBIFS_INO_NODE_SZ) {
1934 ubifs_err("bad node %lu node length %d", 1934 ubifs_err(c, "bad node %lu node length %d",
1935 (unsigned long)inum, zbr->len); 1935 (unsigned long)inum, zbr->len);
1936 return ERR_PTR(-EINVAL); 1936 return ERR_PTR(-EINVAL);
1937 } 1937 }
@@ -1942,7 +1942,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1942 1942
1943 err = ubifs_tnc_read_node(c, zbr, ino); 1943 err = ubifs_tnc_read_node(c, zbr, ino);
1944 if (err) { 1944 if (err) {
1945 ubifs_err("cannot read inode node at LEB %d:%d, error %d", 1945 ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d",
1946 zbr->lnum, zbr->offs, err); 1946 zbr->lnum, zbr->offs, err);
1947 kfree(ino); 1947 kfree(ino);
1948 return ERR_PTR(err); 1948 return ERR_PTR(err);
@@ -1951,7 +1951,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1951 fscki = add_inode(c, fsckd, ino); 1951 fscki = add_inode(c, fsckd, ino);
1952 kfree(ino); 1952 kfree(ino);
1953 if (IS_ERR(fscki)) { 1953 if (IS_ERR(fscki)) {
1954 ubifs_err("error %ld while adding inode %lu node", 1954 ubifs_err(c, "error %ld while adding inode %lu node",
1955 PTR_ERR(fscki), (unsigned long)inum); 1955 PTR_ERR(fscki), (unsigned long)inum);
1956 return fscki; 1956 return fscki;
1957 } 1957 }
@@ -1985,7 +1985,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1985 struct fsck_inode *fscki; 1985 struct fsck_inode *fscki;
1986 1986
1987 if (zbr->len < UBIFS_CH_SZ) { 1987 if (zbr->len < UBIFS_CH_SZ) {
1988 ubifs_err("bad leaf length %d (LEB %d:%d)", 1988 ubifs_err(c, "bad leaf length %d (LEB %d:%d)",
1989 zbr->len, zbr->lnum, zbr->offs); 1989 zbr->len, zbr->lnum, zbr->offs);
1990 return -EINVAL; 1990 return -EINVAL;
1991 } 1991 }
@@ -1996,7 +1996,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1996 1996
1997 err = ubifs_tnc_read_node(c, zbr, node); 1997 err = ubifs_tnc_read_node(c, zbr, node);
1998 if (err) { 1998 if (err) {
1999 ubifs_err("cannot read leaf node at LEB %d:%d, error %d", 1999 ubifs_err(c, "cannot read leaf node at LEB %d:%d, error %d",
2000 zbr->lnum, zbr->offs, err); 2000 zbr->lnum, zbr->offs, err);
2001 goto out_free; 2001 goto out_free;
2002 } 2002 }
@@ -2006,7 +2006,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2006 fscki = add_inode(c, priv, node); 2006 fscki = add_inode(c, priv, node);
2007 if (IS_ERR(fscki)) { 2007 if (IS_ERR(fscki)) {
2008 err = PTR_ERR(fscki); 2008 err = PTR_ERR(fscki);
2009 ubifs_err("error %d while adding inode node", err); 2009 ubifs_err(c, "error %d while adding inode node", err);
2010 goto out_dump; 2010 goto out_dump;
2011 } 2011 }
2012 goto out; 2012 goto out;
@@ -2014,7 +2014,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2014 2014
2015 if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY && 2015 if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY &&
2016 type != UBIFS_DATA_KEY) { 2016 type != UBIFS_DATA_KEY) {
2017 ubifs_err("unexpected node type %d at LEB %d:%d", 2017 ubifs_err(c, "unexpected node type %d at LEB %d:%d",
2018 type, zbr->lnum, zbr->offs); 2018 type, zbr->lnum, zbr->offs);
2019 err = -EINVAL; 2019 err = -EINVAL;
2020 goto out_free; 2020 goto out_free;
@@ -2022,7 +2022,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2022 2022
2023 ch = node; 2023 ch = node;
2024 if (le64_to_cpu(ch->sqnum) > c->max_sqnum) { 2024 if (le64_to_cpu(ch->sqnum) > c->max_sqnum) {
2025 ubifs_err("too high sequence number, max. is %llu", 2025 ubifs_err(c, "too high sequence number, max. is %llu",
2026 c->max_sqnum); 2026 c->max_sqnum);
2027 err = -EINVAL; 2027 err = -EINVAL;
2028 goto out_dump; 2028 goto out_dump;
@@ -2042,7 +2042,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2042 fscki = read_add_inode(c, priv, inum); 2042 fscki = read_add_inode(c, priv, inum);
2043 if (IS_ERR(fscki)) { 2043 if (IS_ERR(fscki)) {
2044 err = PTR_ERR(fscki); 2044 err = PTR_ERR(fscki);
2045 ubifs_err("error %d while processing data node and trying to find inode node %lu", 2045 ubifs_err(c, "error %d while processing data node and trying to find inode node %lu",
2046 err, (unsigned long)inum); 2046 err, (unsigned long)inum);
2047 goto out_dump; 2047 goto out_dump;
2048 } 2048 }
@@ -2052,7 +2052,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2052 blk_offs <<= UBIFS_BLOCK_SHIFT; 2052 blk_offs <<= UBIFS_BLOCK_SHIFT;
2053 blk_offs += le32_to_cpu(dn->size); 2053 blk_offs += le32_to_cpu(dn->size);
2054 if (blk_offs > fscki->size) { 2054 if (blk_offs > fscki->size) {
2055 ubifs_err("data node at LEB %d:%d is not within inode size %lld", 2055 ubifs_err(c, "data node at LEB %d:%d is not within inode size %lld",
2056 zbr->lnum, zbr->offs, fscki->size); 2056 zbr->lnum, zbr->offs, fscki->size);
2057 err = -EINVAL; 2057 err = -EINVAL;
2058 goto out_dump; 2058 goto out_dump;
@@ -2076,7 +2076,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2076 fscki = read_add_inode(c, priv, inum); 2076 fscki = read_add_inode(c, priv, inum);
2077 if (IS_ERR(fscki)) { 2077 if (IS_ERR(fscki)) {
2078 err = PTR_ERR(fscki); 2078 err = PTR_ERR(fscki);
2079 ubifs_err("error %d while processing entry node and trying to find inode node %lu", 2079 ubifs_err(c, "error %d while processing entry node and trying to find inode node %lu",
2080 err, (unsigned long)inum); 2080 err, (unsigned long)inum);
2081 goto out_dump; 2081 goto out_dump;
2082 } 2082 }
@@ -2088,7 +2088,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2088 fscki1 = read_add_inode(c, priv, inum); 2088 fscki1 = read_add_inode(c, priv, inum);
2089 if (IS_ERR(fscki1)) { 2089 if (IS_ERR(fscki1)) {
2090 err = PTR_ERR(fscki1); 2090 err = PTR_ERR(fscki1);
2091 ubifs_err("error %d while processing entry node and trying to find parent inode node %lu", 2091 ubifs_err(c, "error %d while processing entry node and trying to find parent inode node %lu",
2092 err, (unsigned long)inum); 2092 err, (unsigned long)inum);
2093 goto out_dump; 2093 goto out_dump;
2094 } 2094 }
@@ -2111,7 +2111,7 @@ out:
2111 return 0; 2111 return 0;
2112 2112
2113out_dump: 2113out_dump:
2114 ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs); 2114 ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
2115 ubifs_dump_node(c, node); 2115 ubifs_dump_node(c, node);
2116out_free: 2116out_free:
2117 kfree(node); 2117 kfree(node);
@@ -2162,52 +2162,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
2162 */ 2162 */
2163 if (fscki->inum != UBIFS_ROOT_INO && 2163 if (fscki->inum != UBIFS_ROOT_INO &&
2164 fscki->references != 1) { 2164 fscki->references != 1) {
2165 ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1", 2165 ubifs_err(c, "directory inode %lu has %d direntries which refer it, but should be 1",
2166 (unsigned long)fscki->inum, 2166 (unsigned long)fscki->inum,
2167 fscki->references); 2167 fscki->references);
2168 goto out_dump; 2168 goto out_dump;
2169 } 2169 }
2170 if (fscki->inum == UBIFS_ROOT_INO && 2170 if (fscki->inum == UBIFS_ROOT_INO &&
2171 fscki->references != 0) { 2171 fscki->references != 0) {
2172 ubifs_err("root inode %lu has non-zero (%d) direntries which refer it", 2172 ubifs_err(c, "root inode %lu has non-zero (%d) direntries which refer it",
2173 (unsigned long)fscki->inum, 2173 (unsigned long)fscki->inum,
2174 fscki->references); 2174 fscki->references);
2175 goto out_dump; 2175 goto out_dump;
2176 } 2176 }
2177 if (fscki->calc_sz != fscki->size) { 2177 if (fscki->calc_sz != fscki->size) {
2178 ubifs_err("directory inode %lu size is %lld, but calculated size is %lld", 2178 ubifs_err(c, "directory inode %lu size is %lld, but calculated size is %lld",
2179 (unsigned long)fscki->inum, 2179 (unsigned long)fscki->inum,
2180 fscki->size, fscki->calc_sz); 2180 fscki->size, fscki->calc_sz);
2181 goto out_dump; 2181 goto out_dump;
2182 } 2182 }
2183 if (fscki->calc_cnt != fscki->nlink) { 2183 if (fscki->calc_cnt != fscki->nlink) {
2184 ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d", 2184 ubifs_err(c, "directory inode %lu nlink is %d, but calculated nlink is %d",
2185 (unsigned long)fscki->inum, 2185 (unsigned long)fscki->inum,
2186 fscki->nlink, fscki->calc_cnt); 2186 fscki->nlink, fscki->calc_cnt);
2187 goto out_dump; 2187 goto out_dump;
2188 } 2188 }
2189 } else { 2189 } else {
2190 if (fscki->references != fscki->nlink) { 2190 if (fscki->references != fscki->nlink) {
2191 ubifs_err("inode %lu nlink is %d, but calculated nlink is %d", 2191 ubifs_err(c, "inode %lu nlink is %d, but calculated nlink is %d",
2192 (unsigned long)fscki->inum, 2192 (unsigned long)fscki->inum,
2193 fscki->nlink, fscki->references); 2193 fscki->nlink, fscki->references);
2194 goto out_dump; 2194 goto out_dump;
2195 } 2195 }
2196 } 2196 }
2197 if (fscki->xattr_sz != fscki->calc_xsz) { 2197 if (fscki->xattr_sz != fscki->calc_xsz) {
2198 ubifs_err("inode %lu has xattr size %u, but calculated size is %lld", 2198 ubifs_err(c, "inode %lu has xattr size %u, but calculated size is %lld",
2199 (unsigned long)fscki->inum, fscki->xattr_sz, 2199 (unsigned long)fscki->inum, fscki->xattr_sz,
2200 fscki->calc_xsz); 2200 fscki->calc_xsz);
2201 goto out_dump; 2201 goto out_dump;
2202 } 2202 }
2203 if (fscki->xattr_cnt != fscki->calc_xcnt) { 2203 if (fscki->xattr_cnt != fscki->calc_xcnt) {
2204 ubifs_err("inode %lu has %u xattrs, but calculated count is %lld", 2204 ubifs_err(c, "inode %lu has %u xattrs, but calculated count is %lld",
2205 (unsigned long)fscki->inum, 2205 (unsigned long)fscki->inum,
2206 fscki->xattr_cnt, fscki->calc_xcnt); 2206 fscki->xattr_cnt, fscki->calc_xcnt);
2207 goto out_dump; 2207 goto out_dump;
2208 } 2208 }
2209 if (fscki->xattr_nms != fscki->calc_xnms) { 2209 if (fscki->xattr_nms != fscki->calc_xnms) {
2210 ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld", 2210 ubifs_err(c, "inode %lu has xattr names' size %u, but calculated names' size is %lld",
2211 (unsigned long)fscki->inum, fscki->xattr_nms, 2211 (unsigned long)fscki->inum, fscki->xattr_nms,
2212 fscki->calc_xnms); 2212 fscki->calc_xnms);
2213 goto out_dump; 2213 goto out_dump;
@@ -2221,11 +2221,11 @@ out_dump:
2221 ino_key_init(c, &key, fscki->inum); 2221 ino_key_init(c, &key, fscki->inum);
2222 err = ubifs_lookup_level0(c, &key, &znode, &n); 2222 err = ubifs_lookup_level0(c, &key, &znode, &n);
2223 if (!err) { 2223 if (!err) {
2224 ubifs_err("inode %lu not found in index", 2224 ubifs_err(c, "inode %lu not found in index",
2225 (unsigned long)fscki->inum); 2225 (unsigned long)fscki->inum);
2226 return -ENOENT; 2226 return -ENOENT;
2227 } else if (err < 0) { 2227 } else if (err < 0) {
2228 ubifs_err("error %d while looking up inode %lu", 2228 ubifs_err(c, "error %d while looking up inode %lu",
2229 err, (unsigned long)fscki->inum); 2229 err, (unsigned long)fscki->inum);
2230 return err; 2230 return err;
2231 } 2231 }
@@ -2237,13 +2237,13 @@ out_dump:
2237 2237
2238 err = ubifs_tnc_read_node(c, zbr, ino); 2238 err = ubifs_tnc_read_node(c, zbr, ino);
2239 if (err) { 2239 if (err) {
2240 ubifs_err("cannot read inode node at LEB %d:%d, error %d", 2240 ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d",
2241 zbr->lnum, zbr->offs, err); 2241 zbr->lnum, zbr->offs, err);
2242 kfree(ino); 2242 kfree(ino);
2243 return err; 2243 return err;
2244 } 2244 }
2245 2245
2246 ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", 2246 ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d",
2247 (unsigned long)fscki->inum, zbr->lnum, zbr->offs); 2247 (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
2248 ubifs_dump_node(c, ino); 2248 ubifs_dump_node(c, ino);
2249 kfree(ino); 2249 kfree(ino);
@@ -2284,7 +2284,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
2284 return 0; 2284 return 0;
2285 2285
2286out_free: 2286out_free:
2287 ubifs_err("file-system check failed with error %d", err); 2287 ubifs_err(c, "file-system check failed with error %d", err);
2288 dump_stack(); 2288 dump_stack();
2289 free_inodes(&fsckd); 2289 free_inodes(&fsckd);
2290 return err; 2290 return err;
@@ -2315,12 +2315,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2315 sb = container_of(cur->next, struct ubifs_scan_node, list); 2315 sb = container_of(cur->next, struct ubifs_scan_node, list);
2316 2316
2317 if (sa->type != UBIFS_DATA_NODE) { 2317 if (sa->type != UBIFS_DATA_NODE) {
2318 ubifs_err("bad node type %d", sa->type); 2318 ubifs_err(c, "bad node type %d", sa->type);
2319 ubifs_dump_node(c, sa->node); 2319 ubifs_dump_node(c, sa->node);
2320 return -EINVAL; 2320 return -EINVAL;
2321 } 2321 }
2322 if (sb->type != UBIFS_DATA_NODE) { 2322 if (sb->type != UBIFS_DATA_NODE) {
2323 ubifs_err("bad node type %d", sb->type); 2323 ubifs_err(c, "bad node type %d", sb->type);
2324 ubifs_dump_node(c, sb->node); 2324 ubifs_dump_node(c, sb->node);
2325 return -EINVAL; 2325 return -EINVAL;
2326 } 2326 }
@@ -2331,7 +2331,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2331 if (inuma < inumb) 2331 if (inuma < inumb)
2332 continue; 2332 continue;
2333 if (inuma > inumb) { 2333 if (inuma > inumb) {
2334 ubifs_err("larger inum %lu goes before inum %lu", 2334 ubifs_err(c, "larger inum %lu goes before inum %lu",
2335 (unsigned long)inuma, (unsigned long)inumb); 2335 (unsigned long)inuma, (unsigned long)inumb);
2336 goto error_dump; 2336 goto error_dump;
2337 } 2337 }
@@ -2340,11 +2340,11 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2340 blkb = key_block(c, &sb->key); 2340 blkb = key_block(c, &sb->key);
2341 2341
2342 if (blka > blkb) { 2342 if (blka > blkb) {
2343 ubifs_err("larger block %u goes before %u", blka, blkb); 2343 ubifs_err(c, "larger block %u goes before %u", blka, blkb);
2344 goto error_dump; 2344 goto error_dump;
2345 } 2345 }
2346 if (blka == blkb) { 2346 if (blka == blkb) {
2347 ubifs_err("two data nodes for the same block"); 2347 ubifs_err(c, "two data nodes for the same block");
2348 goto error_dump; 2348 goto error_dump;
2349 } 2349 }
2350 } 2350 }
@@ -2383,19 +2383,19 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2383 2383
2384 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && 2384 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
2385 sa->type != UBIFS_XENT_NODE) { 2385 sa->type != UBIFS_XENT_NODE) {
2386 ubifs_err("bad node type %d", sa->type); 2386 ubifs_err(c, "bad node type %d", sa->type);
2387 ubifs_dump_node(c, sa->node); 2387 ubifs_dump_node(c, sa->node);
2388 return -EINVAL; 2388 return -EINVAL;
2389 } 2389 }
2390 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && 2390 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
2391 sa->type != UBIFS_XENT_NODE) { 2391 sa->type != UBIFS_XENT_NODE) {
2392 ubifs_err("bad node type %d", sb->type); 2392 ubifs_err(c, "bad node type %d", sb->type);
2393 ubifs_dump_node(c, sb->node); 2393 ubifs_dump_node(c, sb->node);
2394 return -EINVAL; 2394 return -EINVAL;
2395 } 2395 }
2396 2396
2397 if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { 2397 if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
2398 ubifs_err("non-inode node goes before inode node"); 2398 ubifs_err(c, "non-inode node goes before inode node");
2399 goto error_dump; 2399 goto error_dump;
2400 } 2400 }
2401 2401
@@ -2405,7 +2405,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2405 if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { 2405 if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
2406 /* Inode nodes are sorted in descending size order */ 2406 /* Inode nodes are sorted in descending size order */
2407 if (sa->len < sb->len) { 2407 if (sa->len < sb->len) {
2408 ubifs_err("smaller inode node goes first"); 2408 ubifs_err(c, "smaller inode node goes first");
2409 goto error_dump; 2409 goto error_dump;
2410 } 2410 }
2411 continue; 2411 continue;
@@ -2421,7 +2421,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2421 if (inuma < inumb) 2421 if (inuma < inumb)
2422 continue; 2422 continue;
2423 if (inuma > inumb) { 2423 if (inuma > inumb) {
2424 ubifs_err("larger inum %lu goes before inum %lu", 2424 ubifs_err(c, "larger inum %lu goes before inum %lu",
2425 (unsigned long)inuma, (unsigned long)inumb); 2425 (unsigned long)inuma, (unsigned long)inumb);
2426 goto error_dump; 2426 goto error_dump;
2427 } 2427 }
@@ -2430,7 +2430,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2430 hashb = key_block(c, &sb->key); 2430 hashb = key_block(c, &sb->key);
2431 2431
2432 if (hasha > hashb) { 2432 if (hasha > hashb) {
2433 ubifs_err("larger hash %u goes before %u", 2433 ubifs_err(c, "larger hash %u goes before %u",
2434 hasha, hashb); 2434 hasha, hashb);
2435 goto error_dump; 2435 goto error_dump;
2436 } 2436 }
@@ -2439,9 +2439,9 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2439 return 0; 2439 return 0;
2440 2440
2441error_dump: 2441error_dump:
2442 ubifs_msg("dumping first node"); 2442 ubifs_msg(c, "dumping first node");
2443 ubifs_dump_node(c, sa->node); 2443 ubifs_dump_node(c, sa->node);
2444 ubifs_msg("dumping second node"); 2444 ubifs_msg(c, "dumping second node");
2445 ubifs_dump_node(c, sb->node); 2445 ubifs_dump_node(c, sb->node);
2446 return -EINVAL; 2446 return -EINVAL;
2447 return 0; 2447 return 0;
@@ -2470,13 +2470,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2470 delay = prandom_u32() % 60000; 2470 delay = prandom_u32() % 60000;
2471 d->pc_timeout = jiffies; 2471 d->pc_timeout = jiffies;
2472 d->pc_timeout += msecs_to_jiffies(delay); 2472 d->pc_timeout += msecs_to_jiffies(delay);
2473 ubifs_warn("failing after %lums", delay); 2473 ubifs_warn(c, "failing after %lums", delay);
2474 } else { 2474 } else {
2475 d->pc_delay = 2; 2475 d->pc_delay = 2;
2476 delay = prandom_u32() % 10000; 2476 delay = prandom_u32() % 10000;
2477 /* Fail within 10000 operations */ 2477 /* Fail within 10000 operations */
2478 d->pc_cnt_max = delay; 2478 d->pc_cnt_max = delay;
2479 ubifs_warn("failing after %lu calls", delay); 2479 ubifs_warn(c, "failing after %lu calls", delay);
2480 } 2480 }
2481 } 2481 }
2482 2482
@@ -2494,55 +2494,55 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2494 return 0; 2494 return 0;
2495 if (chance(19, 20)) 2495 if (chance(19, 20))
2496 return 0; 2496 return 0;
2497 ubifs_warn("failing in super block LEB %d", lnum); 2497 ubifs_warn(c, "failing in super block LEB %d", lnum);
2498 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { 2498 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
2499 if (chance(19, 20)) 2499 if (chance(19, 20))
2500 return 0; 2500 return 0;
2501 ubifs_warn("failing in master LEB %d", lnum); 2501 ubifs_warn(c, "failing in master LEB %d", lnum);
2502 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { 2502 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
2503 if (write && chance(99, 100)) 2503 if (write && chance(99, 100))
2504 return 0; 2504 return 0;
2505 if (chance(399, 400)) 2505 if (chance(399, 400))
2506 return 0; 2506 return 0;
2507 ubifs_warn("failing in log LEB %d", lnum); 2507 ubifs_warn(c, "failing in log LEB %d", lnum);
2508 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { 2508 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
2509 if (write && chance(7, 8)) 2509 if (write && chance(7, 8))
2510 return 0; 2510 return 0;
2511 if (chance(19, 20)) 2511 if (chance(19, 20))
2512 return 0; 2512 return 0;
2513 ubifs_warn("failing in LPT LEB %d", lnum); 2513 ubifs_warn(c, "failing in LPT LEB %d", lnum);
2514 } else if (lnum >= c->orph_first && lnum <= c->orph_last) { 2514 } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
2515 if (write && chance(1, 2)) 2515 if (write && chance(1, 2))
2516 return 0; 2516 return 0;
2517 if (chance(9, 10)) 2517 if (chance(9, 10))
2518 return 0; 2518 return 0;
2519 ubifs_warn("failing in orphan LEB %d", lnum); 2519 ubifs_warn(c, "failing in orphan LEB %d", lnum);
2520 } else if (lnum == c->ihead_lnum) { 2520 } else if (lnum == c->ihead_lnum) {
2521 if (chance(99, 100)) 2521 if (chance(99, 100))
2522 return 0; 2522 return 0;
2523 ubifs_warn("failing in index head LEB %d", lnum); 2523 ubifs_warn(c, "failing in index head LEB %d", lnum);
2524 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { 2524 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
2525 if (chance(9, 10)) 2525 if (chance(9, 10))
2526 return 0; 2526 return 0;
2527 ubifs_warn("failing in GC head LEB %d", lnum); 2527 ubifs_warn(c, "failing in GC head LEB %d", lnum);
2528 } else if (write && !RB_EMPTY_ROOT(&c->buds) && 2528 } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
2529 !ubifs_search_bud(c, lnum)) { 2529 !ubifs_search_bud(c, lnum)) {
2530 if (chance(19, 20)) 2530 if (chance(19, 20))
2531 return 0; 2531 return 0;
2532 ubifs_warn("failing in non-bud LEB %d", lnum); 2532 ubifs_warn(c, "failing in non-bud LEB %d", lnum);
2533 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || 2533 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
2534 c->cmt_state == COMMIT_RUNNING_REQUIRED) { 2534 c->cmt_state == COMMIT_RUNNING_REQUIRED) {
2535 if (chance(999, 1000)) 2535 if (chance(999, 1000))
2536 return 0; 2536 return 0;
2537 ubifs_warn("failing in bud LEB %d commit running", lnum); 2537 ubifs_warn(c, "failing in bud LEB %d commit running", lnum);
2538 } else { 2538 } else {
2539 if (chance(9999, 10000)) 2539 if (chance(9999, 10000))
2540 return 0; 2540 return 0;
2541 ubifs_warn("failing in bud LEB %d commit not running", lnum); 2541 ubifs_warn(c, "failing in bud LEB %d commit not running", lnum);
2542 } 2542 }
2543 2543
2544 d->pc_happened = 1; 2544 d->pc_happened = 1;
2545 ubifs_warn("========== Power cut emulated =========="); 2545 ubifs_warn(c, "========== Power cut emulated ==========");
2546 dump_stack(); 2546 dump_stack();
2547 return 1; 2547 return 1;
2548} 2548}
@@ -2557,7 +2557,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
2557 /* Corruption span max to end of write unit */ 2557 /* Corruption span max to end of write unit */
2558 to = min(len, ALIGN(from + 1, c->max_write_size)); 2558 to = min(len, ALIGN(from + 1, c->max_write_size));
2559 2559
2560 ubifs_warn("filled bytes %u-%u with %s", from, to - 1, 2560 ubifs_warn(c, "filled bytes %u-%u with %s", from, to - 1,
2561 ffs ? "0xFFs" : "random data"); 2561 ffs ? "0xFFs" : "random data");
2562 2562
2563 if (ffs) 2563 if (ffs)
@@ -2579,7 +2579,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
2579 failing = power_cut_emulated(c, lnum, 1); 2579 failing = power_cut_emulated(c, lnum, 1);
2580 if (failing) { 2580 if (failing) {
2581 len = corrupt_data(c, buf, len); 2581 len = corrupt_data(c, buf, len);
2582 ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)", 2582 ubifs_warn(c, "actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
2583 len, lnum, offs); 2583 len, lnum, offs);
2584 } 2584 }
2585 err = ubi_leb_write(c->ubi, lnum, buf, offs, len); 2585 err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
@@ -2909,7 +2909,7 @@ out_remove:
2909 debugfs_remove_recursive(d->dfs_dir); 2909 debugfs_remove_recursive(d->dfs_dir);
2910out: 2910out:
2911 err = dent ? PTR_ERR(dent) : -ENODEV; 2911 err = dent ? PTR_ERR(dent) : -ENODEV;
2912 ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", 2912 ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n",
2913 fname, err); 2913 fname, err);
2914 return err; 2914 return err;
2915} 2915}
@@ -3063,8 +3063,8 @@ out_remove:
3063 debugfs_remove_recursive(dfs_rootdir); 3063 debugfs_remove_recursive(dfs_rootdir);
3064out: 3064out:
3065 err = dent ? PTR_ERR(dent) : -ENODEV; 3065 err = dent ? PTR_ERR(dent) : -ENODEV;
3066 ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", 3066 pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n",
3067 fname, err); 3067 current->pid, fname, err);
3068 return err; 3068 return err;
3069} 3069}
3070 3070
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0fa6c803992e..02d1ee778df0 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -146,12 +146,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
146 if (c->highest_inum >= INUM_WARN_WATERMARK) { 146 if (c->highest_inum >= INUM_WARN_WATERMARK) {
147 if (c->highest_inum >= INUM_WATERMARK) { 147 if (c->highest_inum >= INUM_WATERMARK) {
148 spin_unlock(&c->cnt_lock); 148 spin_unlock(&c->cnt_lock);
149 ubifs_err("out of inode numbers"); 149 ubifs_err(c, "out of inode numbers");
150 make_bad_inode(inode); 150 make_bad_inode(inode);
151 iput(inode); 151 iput(inode);
152 return ERR_PTR(-EINVAL); 152 return ERR_PTR(-EINVAL);
153 } 153 }
154 ubifs_warn("running out of inode numbers (current %lu, max %d)", 154 ubifs_warn(c, "running out of inode numbers (current %lu, max %u)",
155 (unsigned long)c->highest_inum, INUM_WATERMARK); 155 (unsigned long)c->highest_inum, INUM_WATERMARK);
156 } 156 }
157 157
@@ -222,7 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
222 * checking. 222 * checking.
223 */ 223 */
224 err = PTR_ERR(inode); 224 err = PTR_ERR(inode);
225 ubifs_err("dead directory entry '%pd', error %d", 225 ubifs_err(c, "dead directory entry '%pd', error %d",
226 dentry, err); 226 dentry, err);
227 ubifs_ro_mode(c, err); 227 ubifs_ro_mode(c, err);
228 goto out; 228 goto out;
@@ -272,7 +272,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
272 272
273 err = ubifs_init_security(dir, inode, &dentry->d_name); 273 err = ubifs_init_security(dir, inode, &dentry->d_name);
274 if (err) 274 if (err)
275 goto out_cancel; 275 goto out_inode;
276 276
277 mutex_lock(&dir_ui->ui_mutex); 277 mutex_lock(&dir_ui->ui_mutex);
278 dir->i_size += sz_change; 278 dir->i_size += sz_change;
@@ -292,11 +292,12 @@ out_cancel:
292 dir->i_size -= sz_change; 292 dir->i_size -= sz_change;
293 dir_ui->ui_size = dir->i_size; 293 dir_ui->ui_size = dir->i_size;
294 mutex_unlock(&dir_ui->ui_mutex); 294 mutex_unlock(&dir_ui->ui_mutex);
295out_inode:
295 make_bad_inode(inode); 296 make_bad_inode(inode);
296 iput(inode); 297 iput(inode);
297out_budg: 298out_budg:
298 ubifs_release_budget(c, &req); 299 ubifs_release_budget(c, &req);
299 ubifs_err("cannot create regular file, error %d", err); 300 ubifs_err(c, "cannot create regular file, error %d", err);
300 return err; 301 return err;
301} 302}
302 303
@@ -449,7 +450,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
449 450
450out: 451out:
451 if (err != -ENOENT) { 452 if (err != -ENOENT) {
452 ubifs_err("cannot find next direntry, error %d", err); 453 ubifs_err(c, "cannot find next direntry, error %d", err);
453 return err; 454 return err;
454 } 455 }
455 456
@@ -732,7 +733,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
732 733
733 err = ubifs_init_security(dir, inode, &dentry->d_name); 734 err = ubifs_init_security(dir, inode, &dentry->d_name);
734 if (err) 735 if (err)
735 goto out_cancel; 736 goto out_inode;
736 737
737 mutex_lock(&dir_ui->ui_mutex); 738 mutex_lock(&dir_ui->ui_mutex);
738 insert_inode_hash(inode); 739 insert_inode_hash(inode);
@@ -743,7 +744,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
743 dir->i_mtime = dir->i_ctime = inode->i_ctime; 744 dir->i_mtime = dir->i_ctime = inode->i_ctime;
744 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0); 745 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
745 if (err) { 746 if (err) {
746 ubifs_err("cannot create directory, error %d", err); 747 ubifs_err(c, "cannot create directory, error %d", err);
747 goto out_cancel; 748 goto out_cancel;
748 } 749 }
749 mutex_unlock(&dir_ui->ui_mutex); 750 mutex_unlock(&dir_ui->ui_mutex);
@@ -757,6 +758,7 @@ out_cancel:
757 dir_ui->ui_size = dir->i_size; 758 dir_ui->ui_size = dir->i_size;
758 drop_nlink(dir); 759 drop_nlink(dir);
759 mutex_unlock(&dir_ui->ui_mutex); 760 mutex_unlock(&dir_ui->ui_mutex);
761out_inode:
760 make_bad_inode(inode); 762 make_bad_inode(inode);
761 iput(inode); 763 iput(inode);
762out_budg: 764out_budg:
@@ -816,7 +818,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
816 818
817 err = ubifs_init_security(dir, inode, &dentry->d_name); 819 err = ubifs_init_security(dir, inode, &dentry->d_name);
818 if (err) 820 if (err)
819 goto out_cancel; 821 goto out_inode;
820 822
821 mutex_lock(&dir_ui->ui_mutex); 823 mutex_lock(&dir_ui->ui_mutex);
822 dir->i_size += sz_change; 824 dir->i_size += sz_change;
@@ -836,6 +838,7 @@ out_cancel:
836 dir->i_size -= sz_change; 838 dir->i_size -= sz_change;
837 dir_ui->ui_size = dir->i_size; 839 dir_ui->ui_size = dir->i_size;
838 mutex_unlock(&dir_ui->ui_mutex); 840 mutex_unlock(&dir_ui->ui_mutex);
841out_inode:
839 make_bad_inode(inode); 842 make_bad_inode(inode);
840 iput(inode); 843 iput(inode);
841out_budg: 844out_budg:
@@ -896,7 +899,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
896 899
897 err = ubifs_init_security(dir, inode, &dentry->d_name); 900 err = ubifs_init_security(dir, inode, &dentry->d_name);
898 if (err) 901 if (err)
899 goto out_cancel; 902 goto out_inode;
900 903
901 mutex_lock(&dir_ui->ui_mutex); 904 mutex_lock(&dir_ui->ui_mutex);
902 dir->i_size += sz_change; 905 dir->i_size += sz_change;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0acf626..3ba3fef64e9e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
50 */ 50 */
51 51
52#include "ubifs.h" 52#include "ubifs.h"
53#include <linux/aio.h>
54#include <linux/mount.h> 53#include <linux/mount.h>
55#include <linux/namei.h> 54#include <linux/namei.h>
56#include <linux/slab.h> 55#include <linux/slab.h>
@@ -80,7 +79,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
80 79
81 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; 80 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
82 out_len = UBIFS_BLOCK_SIZE; 81 out_len = UBIFS_BLOCK_SIZE;
83 err = ubifs_decompress(&dn->data, dlen, addr, &out_len, 82 err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
84 le16_to_cpu(dn->compr_type)); 83 le16_to_cpu(dn->compr_type));
85 if (err || len != out_len) 84 if (err || len != out_len)
86 goto dump; 85 goto dump;
@@ -96,7 +95,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
96 return 0; 95 return 0;
97 96
98dump: 97dump:
99 ubifs_err("bad data node (block %u, inode %lu)", 98 ubifs_err(c, "bad data node (block %u, inode %lu)",
100 block, inode->i_ino); 99 block, inode->i_ino);
101 ubifs_dump_node(c, dn); 100 ubifs_dump_node(c, dn);
102 return -EINVAL; 101 return -EINVAL;
@@ -161,13 +160,14 @@ static int do_readpage(struct page *page)
161 addr += UBIFS_BLOCK_SIZE; 160 addr += UBIFS_BLOCK_SIZE;
162 } 161 }
163 if (err) { 162 if (err) {
163 struct ubifs_info *c = inode->i_sb->s_fs_info;
164 if (err == -ENOENT) { 164 if (err == -ENOENT) {
165 /* Not found, so it must be a hole */ 165 /* Not found, so it must be a hole */
166 SetPageChecked(page); 166 SetPageChecked(page);
167 dbg_gen("hole"); 167 dbg_gen("hole");
168 goto out_free; 168 goto out_free;
169 } 169 }
170 ubifs_err("cannot read page %lu of inode %lu, error %d", 170 ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
171 page->index, inode->i_ino, err); 171 page->index, inode->i_ino, err);
172 goto error; 172 goto error;
173 } 173 }
@@ -650,7 +650,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
650 650
651 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; 651 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
652 out_len = UBIFS_BLOCK_SIZE; 652 out_len = UBIFS_BLOCK_SIZE;
653 err = ubifs_decompress(&dn->data, dlen, addr, &out_len, 653 err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
654 le16_to_cpu(dn->compr_type)); 654 le16_to_cpu(dn->compr_type));
655 if (err || len != out_len) 655 if (err || len != out_len)
656 goto out_err; 656 goto out_err;
@@ -698,7 +698,7 @@ out_err:
698 SetPageError(page); 698 SetPageError(page);
699 flush_dcache_page(page); 699 flush_dcache_page(page);
700 kunmap(page); 700 kunmap(page);
701 ubifs_err("bad data node (block %u, inode %lu)", 701 ubifs_err(c, "bad data node (block %u, inode %lu)",
702 page_block, inode->i_ino); 702 page_block, inode->i_ino);
703 return -EINVAL; 703 return -EINVAL;
704} 704}
@@ -802,7 +802,7 @@ out_free:
802 return ret; 802 return ret;
803 803
804out_warn: 804out_warn:
805 ubifs_warn("ignoring error %d and skipping bulk-read", err); 805 ubifs_warn(c, "ignoring error %d and skipping bulk-read", err);
806 goto out_free; 806 goto out_free;
807 807
808out_bu_off: 808out_bu_off:
@@ -930,7 +930,7 @@ static int do_writepage(struct page *page, int len)
930 } 930 }
931 if (err) { 931 if (err) {
932 SetPageError(page); 932 SetPageError(page);
933 ubifs_err("cannot write page %lu of inode %lu, error %d", 933 ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
934 page->index, inode->i_ino, err); 934 page->index, inode->i_ino, err);
935 ubifs_ro_mode(c, err); 935 ubifs_ro_mode(c, err);
936 } 936 }
@@ -1485,7 +1485,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1485 err = ubifs_budget_space(c, &req); 1485 err = ubifs_budget_space(c, &req);
1486 if (unlikely(err)) { 1486 if (unlikely(err)) {
1487 if (err == -ENOSPC) 1487 if (err == -ENOSPC)
1488 ubifs_warn("out of space for mmapped file (inode number %lu)", 1488 ubifs_warn(c, "out of space for mmapped file (inode number %lu)",
1489 inode->i_ino); 1489 inode->i_ino);
1490 return VM_FAULT_SIGBUS; 1490 return VM_FAULT_SIGBUS;
1491 } 1491 }
@@ -1581,8 +1581,6 @@ const struct inode_operations ubifs_symlink_inode_operations = {
1581 1581
1582const struct file_operations ubifs_file_operations = { 1582const struct file_operations ubifs_file_operations = {
1583 .llseek = generic_file_llseek, 1583 .llseek = generic_file_llseek,
1584 .read = new_sync_read,
1585 .write = new_sync_write,
1586 .read_iter = generic_file_read_iter, 1584 .read_iter = generic_file_read_iter,
1587 .write_iter = ubifs_write_iter, 1585 .write_iter = ubifs_write_iter,
1588 .mmap = ubifs_file_mmap, 1586 .mmap = ubifs_file_mmap,
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index fb08b0c514b6..97be41215332 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -85,7 +85,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
85 c->ro_error = 1; 85 c->ro_error = 1;
86 c->no_chk_data_crc = 0; 86 c->no_chk_data_crc = 0;
87 c->vfs_sb->s_flags |= MS_RDONLY; 87 c->vfs_sb->s_flags |= MS_RDONLY;
88 ubifs_warn("switched to read-only mode, error %d", err); 88 ubifs_warn(c, "switched to read-only mode, error %d", err);
89 dump_stack(); 89 dump_stack();
90 } 90 }
91} 91}
@@ -107,7 +107,7 @@ int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
107 * @even_ebadmsg is true. 107 * @even_ebadmsg is true.
108 */ 108 */
109 if (err && (err != -EBADMSG || even_ebadmsg)) { 109 if (err && (err != -EBADMSG || even_ebadmsg)) {
110 ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", 110 ubifs_err(c, "reading %d bytes from LEB %d:%d failed, error %d",
111 len, lnum, offs, err); 111 len, lnum, offs, err);
112 dump_stack(); 112 dump_stack();
113 } 113 }
@@ -127,7 +127,7 @@ int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
127 else 127 else
128 err = dbg_leb_write(c, lnum, buf, offs, len); 128 err = dbg_leb_write(c, lnum, buf, offs, len);
129 if (err) { 129 if (err) {
130 ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", 130 ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d",
131 len, lnum, offs, err); 131 len, lnum, offs, err);
132 ubifs_ro_mode(c, err); 132 ubifs_ro_mode(c, err);
133 dump_stack(); 133 dump_stack();
@@ -147,7 +147,7 @@ int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len)
147 else 147 else
148 err = dbg_leb_change(c, lnum, buf, len); 148 err = dbg_leb_change(c, lnum, buf, len);
149 if (err) { 149 if (err) {
150 ubifs_err("changing %d bytes in LEB %d failed, error %d", 150 ubifs_err(c, "changing %d bytes in LEB %d failed, error %d",
151 len, lnum, err); 151 len, lnum, err);
152 ubifs_ro_mode(c, err); 152 ubifs_ro_mode(c, err);
153 dump_stack(); 153 dump_stack();
@@ -167,7 +167,7 @@ int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
167 else 167 else
168 err = dbg_leb_unmap(c, lnum); 168 err = dbg_leb_unmap(c, lnum);
169 if (err) { 169 if (err) {
170 ubifs_err("unmap LEB %d failed, error %d", lnum, err); 170 ubifs_err(c, "unmap LEB %d failed, error %d", lnum, err);
171 ubifs_ro_mode(c, err); 171 ubifs_ro_mode(c, err);
172 dump_stack(); 172 dump_stack();
173 } 173 }
@@ -186,7 +186,7 @@ int ubifs_leb_map(struct ubifs_info *c, int lnum)
186 else 186 else
187 err = dbg_leb_map(c, lnum); 187 err = dbg_leb_map(c, lnum);
188 if (err) { 188 if (err) {
189 ubifs_err("mapping LEB %d failed, error %d", lnum, err); 189 ubifs_err(c, "mapping LEB %d failed, error %d", lnum, err);
190 ubifs_ro_mode(c, err); 190 ubifs_ro_mode(c, err);
191 dump_stack(); 191 dump_stack();
192 } 192 }
@@ -199,7 +199,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
199 199
200 err = ubi_is_mapped(c->ubi, lnum); 200 err = ubi_is_mapped(c->ubi, lnum);
201 if (err < 0) { 201 if (err < 0) {
202 ubifs_err("ubi_is_mapped failed for LEB %d, error %d", 202 ubifs_err(c, "ubi_is_mapped failed for LEB %d, error %d",
203 lnum, err); 203 lnum, err);
204 dump_stack(); 204 dump_stack();
205 } 205 }
@@ -247,7 +247,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
247 magic = le32_to_cpu(ch->magic); 247 magic = le32_to_cpu(ch->magic);
248 if (magic != UBIFS_NODE_MAGIC) { 248 if (magic != UBIFS_NODE_MAGIC) {
249 if (!quiet) 249 if (!quiet)
250 ubifs_err("bad magic %#08x, expected %#08x", 250 ubifs_err(c, "bad magic %#08x, expected %#08x",
251 magic, UBIFS_NODE_MAGIC); 251 magic, UBIFS_NODE_MAGIC);
252 err = -EUCLEAN; 252 err = -EUCLEAN;
253 goto out; 253 goto out;
@@ -256,7 +256,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
256 type = ch->node_type; 256 type = ch->node_type;
257 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { 257 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
258 if (!quiet) 258 if (!quiet)
259 ubifs_err("bad node type %d", type); 259 ubifs_err(c, "bad node type %d", type);
260 goto out; 260 goto out;
261 } 261 }
262 262
@@ -279,7 +279,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
279 node_crc = le32_to_cpu(ch->crc); 279 node_crc = le32_to_cpu(ch->crc);
280 if (crc != node_crc) { 280 if (crc != node_crc) {
281 if (!quiet) 281 if (!quiet)
282 ubifs_err("bad CRC: calculated %#08x, read %#08x", 282 ubifs_err(c, "bad CRC: calculated %#08x, read %#08x",
283 crc, node_crc); 283 crc, node_crc);
284 err = -EUCLEAN; 284 err = -EUCLEAN;
285 goto out; 285 goto out;
@@ -289,10 +289,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
289 289
290out_len: 290out_len:
291 if (!quiet) 291 if (!quiet)
292 ubifs_err("bad node length %d", node_len); 292 ubifs_err(c, "bad node length %d", node_len);
293out: 293out:
294 if (!quiet) { 294 if (!quiet) {
295 ubifs_err("bad node at LEB %d:%d", lnum, offs); 295 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
296 ubifs_dump_node(c, buf); 296 ubifs_dump_node(c, buf);
297 dump_stack(); 297 dump_stack();
298 } 298 }
@@ -355,11 +355,11 @@ static unsigned long long next_sqnum(struct ubifs_info *c)
355 355
356 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { 356 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
357 if (sqnum >= SQNUM_WATERMARK) { 357 if (sqnum >= SQNUM_WATERMARK) {
358 ubifs_err("sequence number overflow %llu, end of life", 358 ubifs_err(c, "sequence number overflow %llu, end of life",
359 sqnum); 359 sqnum);
360 ubifs_ro_mode(c, -EINVAL); 360 ubifs_ro_mode(c, -EINVAL);
361 } 361 }
362 ubifs_warn("running out of sequence numbers, end of life soon"); 362 ubifs_warn(c, "running out of sequence numbers, end of life soon");
363 } 363 }
364 364
365 return sqnum; 365 return sqnum;
@@ -636,7 +636,7 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
636 err = ubifs_wbuf_sync_nolock(wbuf); 636 err = ubifs_wbuf_sync_nolock(wbuf);
637 mutex_unlock(&wbuf->io_mutex); 637 mutex_unlock(&wbuf->io_mutex);
638 if (err) { 638 if (err) {
639 ubifs_err("cannot sync write-buffer, error %d", err); 639 ubifs_err(c, "cannot sync write-buffer, error %d", err);
640 ubifs_ro_mode(c, err); 640 ubifs_ro_mode(c, err);
641 goto out_timers; 641 goto out_timers;
642 } 642 }
@@ -833,7 +833,7 @@ exit:
833 return 0; 833 return 0;
834 834
835out: 835out:
836 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", 836 ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d",
837 len, wbuf->lnum, wbuf->offs, err); 837 len, wbuf->lnum, wbuf->offs, err);
838 ubifs_dump_node(c, buf); 838 ubifs_dump_node(c, buf);
839 dump_stack(); 839 dump_stack();
@@ -932,27 +932,27 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
932 } 932 }
933 933
934 if (type != ch->node_type) { 934 if (type != ch->node_type) {
935 ubifs_err("bad node type (%d but expected %d)", 935 ubifs_err(c, "bad node type (%d but expected %d)",
936 ch->node_type, type); 936 ch->node_type, type);
937 goto out; 937 goto out;
938 } 938 }
939 939
940 err = ubifs_check_node(c, buf, lnum, offs, 0, 0); 940 err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
941 if (err) { 941 if (err) {
942 ubifs_err("expected node type %d", type); 942 ubifs_err(c, "expected node type %d", type);
943 return err; 943 return err;
944 } 944 }
945 945
946 rlen = le32_to_cpu(ch->len); 946 rlen = le32_to_cpu(ch->len);
947 if (rlen != len) { 947 if (rlen != len) {
948 ubifs_err("bad node length %d, expected %d", rlen, len); 948 ubifs_err(c, "bad node length %d, expected %d", rlen, len);
949 goto out; 949 goto out;
950 } 950 }
951 951
952 return 0; 952 return 0;
953 953
954out: 954out:
955 ubifs_err("bad node at LEB %d:%d", lnum, offs); 955 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
956 ubifs_dump_node(c, buf); 956 ubifs_dump_node(c, buf);
957 dump_stack(); 957 dump_stack();
958 return -EINVAL; 958 return -EINVAL;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 648b143606cc..3c7b29de0ca7 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -138,7 +138,7 @@ static int setflags(struct inode *inode, int flags)
138 return err; 138 return err;
139 139
140out_unlock: 140out_unlock:
141 ubifs_err("can't modify inode %lu attributes", inode->i_ino); 141 ubifs_err(c, "can't modify inode %lu attributes", inode->i_ino);
142 mutex_unlock(&ui->ui_mutex); 142 mutex_unlock(&ui->ui_mutex);
143 ubifs_release_budget(c, &req); 143 ubifs_release_budget(c, &req);
144 return err; 144 return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f6ac3f29323c..90ae1a8439d9 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -363,11 +363,11 @@ again:
363 * This should not happen unless the journal size limitations 363 * This should not happen unless the journal size limitations
364 * are too tough. 364 * are too tough.
365 */ 365 */
366 ubifs_err("stuck in space allocation"); 366 ubifs_err(c, "stuck in space allocation");
367 err = -ENOSPC; 367 err = -ENOSPC;
368 goto out; 368 goto out;
369 } else if (cmt_retries > 32) 369 } else if (cmt_retries > 32)
370 ubifs_warn("too many space allocation re-tries (%d)", 370 ubifs_warn(c, "too many space allocation re-tries (%d)",
371 cmt_retries); 371 cmt_retries);
372 372
373 dbg_jnl("-EAGAIN, commit and retry (retried %d times)", 373 dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
@@ -380,7 +380,7 @@ again:
380 goto again; 380 goto again;
381 381
382out: 382out:
383 ubifs_err("cannot reserve %d bytes in jhead %d, error %d", 383 ubifs_err(c, "cannot reserve %d bytes in jhead %d, error %d",
384 len, jhead, err); 384 len, jhead, err);
385 if (err == -ENOSPC) { 385 if (err == -ENOSPC) {
386 /* This are some budgeting problems, print useful information */ 386 /* This are some budgeting problems, print useful information */
@@ -731,7 +731,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
731 compr_type = ui->compr_type; 731 compr_type = ui->compr_type;
732 732
733 out_len = dlen - UBIFS_DATA_NODE_SZ; 733 out_len = dlen - UBIFS_DATA_NODE_SZ;
734 ubifs_compress(buf, len, &data->data, &out_len, &compr_type); 734 ubifs_compress(c, buf, len, &data->data, &out_len, &compr_type);
735 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); 735 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
736 736
737 dlen = UBIFS_DATA_NODE_SZ + out_len; 737 dlen = UBIFS_DATA_NODE_SZ + out_len;
@@ -1100,7 +1100,8 @@ out_free:
1100 * This function is used when an inode is truncated and the last data node of 1100 * This function is used when an inode is truncated and the last data node of
1101 * the inode has to be re-compressed and re-written. 1101 * the inode has to be re-compressed and re-written.
1102 */ 1102 */
1103static int recomp_data_node(struct ubifs_data_node *dn, int *new_len) 1103static int recomp_data_node(const struct ubifs_info *c,
1104 struct ubifs_data_node *dn, int *new_len)
1104{ 1105{
1105 void *buf; 1106 void *buf;
1106 int err, len, compr_type, out_len; 1107 int err, len, compr_type, out_len;
@@ -1112,11 +1113,11 @@ static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
1112 1113
1113 len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; 1114 len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
1114 compr_type = le16_to_cpu(dn->compr_type); 1115 compr_type = le16_to_cpu(dn->compr_type);
1115 err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type); 1116 err = ubifs_decompress(c, &dn->data, len, buf, &out_len, compr_type);
1116 if (err) 1117 if (err)
1117 goto out; 1118 goto out;
1118 1119
1119 ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type); 1120 ubifs_compress(c, buf, *new_len, &dn->data, &out_len, &compr_type);
1120 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE); 1121 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
1121 dn->compr_type = cpu_to_le16(compr_type); 1122 dn->compr_type = cpu_to_le16(compr_type);
1122 dn->size = cpu_to_le32(*new_len); 1123 dn->size = cpu_to_le32(*new_len);
@@ -1191,7 +1192,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1191 int compr_type = le16_to_cpu(dn->compr_type); 1192 int compr_type = le16_to_cpu(dn->compr_type);
1192 1193
1193 if (compr_type != UBIFS_COMPR_NONE) { 1194 if (compr_type != UBIFS_COMPR_NONE) {
1194 err = recomp_data_node(dn, &dlen); 1195 err = recomp_data_node(c, dn, &dlen);
1195 if (err) 1196 if (err)
1196 goto out_free; 1197 goto out_free;
1197 } else { 1198 } else {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index c14628fbeee2..8c795e6392b1 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -696,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
696 destroy_done_tree(&done_tree); 696 destroy_done_tree(&done_tree);
697 vfree(buf); 697 vfree(buf);
698 if (write_lnum == c->lhead_lnum) { 698 if (write_lnum == c->lhead_lnum) {
699 ubifs_err("log is too full"); 699 ubifs_err(c, "log is too full");
700 return -EINVAL; 700 return -EINVAL;
701 } 701 }
702 /* Unmap remaining LEBs */ 702 /* Unmap remaining LEBs */
@@ -743,7 +743,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
743 bud_bytes += c->leb_size - bud->start; 743 bud_bytes += c->leb_size - bud->start;
744 744
745 if (c->bud_bytes != bud_bytes) { 745 if (c->bud_bytes != bud_bytes) {
746 ubifs_err("bad bud_bytes %lld, calculated %lld", 746 ubifs_err(c, "bad bud_bytes %lld, calculated %lld",
747 c->bud_bytes, bud_bytes); 747 c->bud_bytes, bud_bytes);
748 err = -EINVAL; 748 err = -EINVAL;
749 } 749 }
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 46190a7c42a6..a0011aa3a779 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -682,7 +682,7 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
682out: 682out:
683 ubifs_release_lprops(c); 683 ubifs_release_lprops(c);
684 if (err) 684 if (err)
685 ubifs_err("cannot change properties of LEB %d, error %d", 685 ubifs_err(c, "cannot change properties of LEB %d, error %d",
686 lnum, err); 686 lnum, err);
687 return err; 687 return err;
688} 688}
@@ -721,7 +721,7 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
721out: 721out:
722 ubifs_release_lprops(c); 722 ubifs_release_lprops(c);
723 if (err) 723 if (err)
724 ubifs_err("cannot update properties of LEB %d, error %d", 724 ubifs_err(c, "cannot update properties of LEB %d, error %d",
725 lnum, err); 725 lnum, err);
726 return err; 726 return err;
727} 727}
@@ -746,7 +746,7 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
746 lpp = ubifs_lpt_lookup(c, lnum); 746 lpp = ubifs_lpt_lookup(c, lnum);
747 if (IS_ERR(lpp)) { 747 if (IS_ERR(lpp)) {
748 err = PTR_ERR(lpp); 748 err = PTR_ERR(lpp);
749 ubifs_err("cannot read properties of LEB %d, error %d", 749 ubifs_err(c, "cannot read properties of LEB %d, error %d",
750 lnum, err); 750 lnum, err);
751 goto out; 751 goto out;
752 } 752 }
@@ -873,13 +873,13 @@ int dbg_check_cats(struct ubifs_info *c)
873 873
874 list_for_each_entry(lprops, &c->empty_list, list) { 874 list_for_each_entry(lprops, &c->empty_list, list) {
875 if (lprops->free != c->leb_size) { 875 if (lprops->free != c->leb_size) {
876 ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)", 876 ubifs_err(c, "non-empty LEB %d on empty list (free %d dirty %d flags %d)",
877 lprops->lnum, lprops->free, lprops->dirty, 877 lprops->lnum, lprops->free, lprops->dirty,
878 lprops->flags); 878 lprops->flags);
879 return -EINVAL; 879 return -EINVAL;
880 } 880 }
881 if (lprops->flags & LPROPS_TAKEN) { 881 if (lprops->flags & LPROPS_TAKEN) {
882 ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)", 882 ubifs_err(c, "taken LEB %d on empty list (free %d dirty %d flags %d)",
883 lprops->lnum, lprops->free, lprops->dirty, 883 lprops->lnum, lprops->free, lprops->dirty,
884 lprops->flags); 884 lprops->flags);
885 return -EINVAL; 885 return -EINVAL;
@@ -889,13 +889,13 @@ int dbg_check_cats(struct ubifs_info *c)
889 i = 0; 889 i = 0;
890 list_for_each_entry(lprops, &c->freeable_list, list) { 890 list_for_each_entry(lprops, &c->freeable_list, list) {
891 if (lprops->free + lprops->dirty != c->leb_size) { 891 if (lprops->free + lprops->dirty != c->leb_size) {
892 ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)", 892 ubifs_err(c, "non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
893 lprops->lnum, lprops->free, lprops->dirty, 893 lprops->lnum, lprops->free, lprops->dirty,
894 lprops->flags); 894 lprops->flags);
895 return -EINVAL; 895 return -EINVAL;
896 } 896 }
897 if (lprops->flags & LPROPS_TAKEN) { 897 if (lprops->flags & LPROPS_TAKEN) {
898 ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)", 898 ubifs_err(c, "taken LEB %d on freeable list (free %d dirty %d flags %d)",
899 lprops->lnum, lprops->free, lprops->dirty, 899 lprops->lnum, lprops->free, lprops->dirty,
900 lprops->flags); 900 lprops->flags);
901 return -EINVAL; 901 return -EINVAL;
@@ -903,7 +903,7 @@ int dbg_check_cats(struct ubifs_info *c)
903 i += 1; 903 i += 1;
904 } 904 }
905 if (i != c->freeable_cnt) { 905 if (i != c->freeable_cnt) {
906 ubifs_err("freeable list count %d expected %d", i, 906 ubifs_err(c, "freeable list count %d expected %d", i,
907 c->freeable_cnt); 907 c->freeable_cnt);
908 return -EINVAL; 908 return -EINVAL;
909 } 909 }
@@ -912,26 +912,26 @@ int dbg_check_cats(struct ubifs_info *c)
912 list_for_each(pos, &c->idx_gc) 912 list_for_each(pos, &c->idx_gc)
913 i += 1; 913 i += 1;
914 if (i != c->idx_gc_cnt) { 914 if (i != c->idx_gc_cnt) {
915 ubifs_err("idx_gc list count %d expected %d", i, 915 ubifs_err(c, "idx_gc list count %d expected %d", i,
916 c->idx_gc_cnt); 916 c->idx_gc_cnt);
917 return -EINVAL; 917 return -EINVAL;
918 } 918 }
919 919
920 list_for_each_entry(lprops, &c->frdi_idx_list, list) { 920 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
921 if (lprops->free + lprops->dirty != c->leb_size) { 921 if (lprops->free + lprops->dirty != c->leb_size) {
922 ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)", 922 ubifs_err(c, "non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
923 lprops->lnum, lprops->free, lprops->dirty, 923 lprops->lnum, lprops->free, lprops->dirty,
924 lprops->flags); 924 lprops->flags);
925 return -EINVAL; 925 return -EINVAL;
926 } 926 }
927 if (lprops->flags & LPROPS_TAKEN) { 927 if (lprops->flags & LPROPS_TAKEN) {
928 ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)", 928 ubifs_err(c, "taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
929 lprops->lnum, lprops->free, lprops->dirty, 929 lprops->lnum, lprops->free, lprops->dirty,
930 lprops->flags); 930 lprops->flags);
931 return -EINVAL; 931 return -EINVAL;
932 } 932 }
933 if (!(lprops->flags & LPROPS_INDEX)) { 933 if (!(lprops->flags & LPROPS_INDEX)) {
934 ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)", 934 ubifs_err(c, "non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
935 lprops->lnum, lprops->free, lprops->dirty, 935 lprops->lnum, lprops->free, lprops->dirty,
936 lprops->flags); 936 lprops->flags);
937 return -EINVAL; 937 return -EINVAL;
@@ -944,15 +944,15 @@ int dbg_check_cats(struct ubifs_info *c)
944 for (i = 0; i < heap->cnt; i++) { 944 for (i = 0; i < heap->cnt; i++) {
945 lprops = heap->arr[i]; 945 lprops = heap->arr[i];
946 if (!lprops) { 946 if (!lprops) {
947 ubifs_err("null ptr in LPT heap cat %d", cat); 947 ubifs_err(c, "null ptr in LPT heap cat %d", cat);
948 return -EINVAL; 948 return -EINVAL;
949 } 949 }
950 if (lprops->hpos != i) { 950 if (lprops->hpos != i) {
951 ubifs_err("bad ptr in LPT heap cat %d", cat); 951 ubifs_err(c, "bad ptr in LPT heap cat %d", cat);
952 return -EINVAL; 952 return -EINVAL;
953 } 953 }
954 if (lprops->flags & LPROPS_TAKEN) { 954 if (lprops->flags & LPROPS_TAKEN) {
955 ubifs_err("taken LEB in LPT heap cat %d", cat); 955 ubifs_err(c, "taken LEB in LPT heap cat %d", cat);
956 return -EINVAL; 956 return -EINVAL;
957 } 957 }
958 } 958 }
@@ -988,7 +988,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
988 goto out; 988 goto out;
989 } 989 }
990 if (lprops != lp) { 990 if (lprops != lp) {
991 ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d", 991 ubifs_err(c, "lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
992 (size_t)lprops, (size_t)lp, lprops->lnum, 992 (size_t)lprops, (size_t)lp, lprops->lnum,
993 lp->lnum); 993 lp->lnum);
994 err = 4; 994 err = 4;
@@ -1008,7 +1008,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
1008 } 1008 }
1009out: 1009out:
1010 if (err) { 1010 if (err) {
1011 ubifs_err("failed cat %d hpos %d err %d", cat, i, err); 1011 ubifs_err(c, "failed cat %d hpos %d err %d", cat, i, err);
1012 dump_stack(); 1012 dump_stack();
1013 ubifs_dump_heap(c, heap, cat); 1013 ubifs_dump_heap(c, heap, cat);
1014 } 1014 }
@@ -1039,7 +1039,7 @@ static int scan_check_cb(struct ubifs_info *c,
1039 if (cat != LPROPS_UNCAT) { 1039 if (cat != LPROPS_UNCAT) {
1040 cat = ubifs_categorize_lprops(c, lp); 1040 cat = ubifs_categorize_lprops(c, lp);
1041 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1041 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1042 ubifs_err("bad LEB category %d expected %d", 1042 ubifs_err(c, "bad LEB category %d expected %d",
1043 (lp->flags & LPROPS_CAT_MASK), cat); 1043 (lp->flags & LPROPS_CAT_MASK), cat);
1044 return -EINVAL; 1044 return -EINVAL;
1045 } 1045 }
@@ -1074,7 +1074,7 @@ static int scan_check_cb(struct ubifs_info *c,
1074 } 1074 }
1075 } 1075 }
1076 if (!found) { 1076 if (!found) {
1077 ubifs_err("bad LPT list (category %d)", cat); 1077 ubifs_err(c, "bad LPT list (category %d)", cat);
1078 return -EINVAL; 1078 return -EINVAL;
1079 } 1079 }
1080 } 1080 }
@@ -1086,7 +1086,7 @@ static int scan_check_cb(struct ubifs_info *c,
1086 1086
1087 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1087 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1088 lp != heap->arr[lp->hpos]) { 1088 lp != heap->arr[lp->hpos]) {
1089 ubifs_err("bad LPT heap (category %d)", cat); 1089 ubifs_err(c, "bad LPT heap (category %d)", cat);
1090 return -EINVAL; 1090 return -EINVAL;
1091 } 1091 }
1092 } 1092 }
@@ -1133,7 +1133,7 @@ static int scan_check_cb(struct ubifs_info *c,
1133 is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0; 1133 is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0;
1134 1134
1135 if (is_idx && snod->type != UBIFS_IDX_NODE) { 1135 if (is_idx && snod->type != UBIFS_IDX_NODE) {
1136 ubifs_err("indexing node in data LEB %d:%d", 1136 ubifs_err(c, "indexing node in data LEB %d:%d",
1137 lnum, snod->offs); 1137 lnum, snod->offs);
1138 goto out_destroy; 1138 goto out_destroy;
1139 } 1139 }
@@ -1159,7 +1159,7 @@ static int scan_check_cb(struct ubifs_info *c,
1159 1159
1160 if (free > c->leb_size || free < 0 || dirty > c->leb_size || 1160 if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
1161 dirty < 0) { 1161 dirty < 0) {
1162 ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d", 1162 ubifs_err(c, "bad calculated accounting for LEB %d: free %d, dirty %d",
1163 lnum, free, dirty); 1163 lnum, free, dirty);
1164 goto out_destroy; 1164 goto out_destroy;
1165 } 1165 }
@@ -1206,13 +1206,13 @@ static int scan_check_cb(struct ubifs_info *c,
1206 /* Free but not unmapped LEB, it's fine */ 1206 /* Free but not unmapped LEB, it's fine */
1207 is_idx = 0; 1207 is_idx = 0;
1208 else { 1208 else {
1209 ubifs_err("indexing node without indexing flag"); 1209 ubifs_err(c, "indexing node without indexing flag");
1210 goto out_print; 1210 goto out_print;
1211 } 1211 }
1212 } 1212 }
1213 1213
1214 if (!is_idx && (lp->flags & LPROPS_INDEX)) { 1214 if (!is_idx && (lp->flags & LPROPS_INDEX)) {
1215 ubifs_err("data node with indexing flag"); 1215 ubifs_err(c, "data node with indexing flag");
1216 goto out_print; 1216 goto out_print;
1217 } 1217 }
1218 1218
@@ -1241,7 +1241,7 @@ static int scan_check_cb(struct ubifs_info *c,
1241 return LPT_SCAN_CONTINUE; 1241 return LPT_SCAN_CONTINUE;
1242 1242
1243out_print: 1243out_print:
1244 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d", 1244 ubifs_err(c, "bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
1245 lnum, lp->free, lp->dirty, lp->flags, free, dirty); 1245 lnum, lp->free, lp->dirty, lp->flags, free, dirty);
1246 ubifs_dump_leb(c, lnum); 1246 ubifs_dump_leb(c, lnum);
1247out_destroy: 1247out_destroy:
@@ -1293,11 +1293,11 @@ int dbg_check_lprops(struct ubifs_info *c)
1293 lst.total_free != c->lst.total_free || 1293 lst.total_free != c->lst.total_free ||
1294 lst.total_dirty != c->lst.total_dirty || 1294 lst.total_dirty != c->lst.total_dirty ||
1295 lst.total_used != c->lst.total_used) { 1295 lst.total_used != c->lst.total_used) {
1296 ubifs_err("bad overall accounting"); 1296 ubifs_err(c, "bad overall accounting");
1297 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", 1297 ubifs_err(c, "calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
1298 lst.empty_lebs, lst.idx_lebs, lst.total_free, 1298 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1299 lst.total_dirty, lst.total_used); 1299 lst.total_dirty, lst.total_used);
1300 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld", 1300 ubifs_err(c, "read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
1301 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1301 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
1302 c->lst.total_dirty, c->lst.total_used); 1302 c->lst.total_dirty, c->lst.total_used);
1303 err = -EINVAL; 1303 err = -EINVAL;
@@ -1306,10 +1306,10 @@ int dbg_check_lprops(struct ubifs_info *c)
1306 1306
1307 if (lst.total_dead != c->lst.total_dead || 1307 if (lst.total_dead != c->lst.total_dead ||
1308 lst.total_dark != c->lst.total_dark) { 1308 lst.total_dark != c->lst.total_dark) {
1309 ubifs_err("bad dead/dark space accounting"); 1309 ubifs_err(c, "bad dead/dark space accounting");
1310 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1310 ubifs_err(c, "calculated: total_dead %lld, total_dark %lld",
1311 lst.total_dead, lst.total_dark); 1311 lst.total_dead, lst.total_dark);
1312 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1312 ubifs_err(c, "read from lprops: total_dead %lld, total_dark %lld",
1313 c->lst.total_dead, c->lst.total_dark); 1313 c->lst.total_dead, c->lst.total_dark);
1314 err = -EINVAL; 1314 err = -EINVAL;
1315 goto out; 1315 goto out;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 421bd0a80424..dc9f27e9d61b 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -145,13 +145,13 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
145 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ 145 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
146 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); 146 lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
147 if (lebs_needed > c->lpt_lebs) { 147 if (lebs_needed > c->lpt_lebs) {
148 ubifs_err("too few LPT LEBs"); 148 ubifs_err(c, "too few LPT LEBs");
149 return -EINVAL; 149 return -EINVAL;
150 } 150 }
151 151
152 /* Verify that ltab fits in a single LEB (since ltab is a single node */ 152 /* Verify that ltab fits in a single LEB (since ltab is a single node */
153 if (c->ltab_sz > c->leb_size) { 153 if (c->ltab_sz > c->leb_size) {
154 ubifs_err("LPT ltab too big"); 154 ubifs_err(c, "LPT ltab too big");
155 return -EINVAL; 155 return -EINVAL;
156 } 156 }
157 157
@@ -213,7 +213,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
213 continue; 213 continue;
214 } 214 }
215 if (c->ltab_sz > c->leb_size) { 215 if (c->ltab_sz > c->leb_size) {
216 ubifs_err("LPT ltab too big"); 216 ubifs_err(c, "LPT ltab too big");
217 return -EINVAL; 217 return -EINVAL;
218 } 218 }
219 *main_lebs = c->main_lebs; 219 *main_lebs = c->main_lebs;
@@ -911,7 +911,7 @@ static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode,
911 * 911 *
912 * This function returns %0 on success and a negative error code on failure. 912 * This function returns %0 on success and a negative error code on failure.
913 */ 913 */
914static int check_lpt_crc(void *buf, int len) 914static int check_lpt_crc(const struct ubifs_info *c, void *buf, int len)
915{ 915{
916 int pos = 0; 916 int pos = 0;
917 uint8_t *addr = buf; 917 uint8_t *addr = buf;
@@ -921,8 +921,8 @@ static int check_lpt_crc(void *buf, int len)
921 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, 921 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
922 len - UBIFS_LPT_CRC_BYTES); 922 len - UBIFS_LPT_CRC_BYTES);
923 if (crc != calc_crc) { 923 if (crc != calc_crc) {
924 ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, 924 ubifs_err(c, "invalid crc in LPT node: crc %hx calc %hx",
925 calc_crc); 925 crc, calc_crc);
926 dump_stack(); 926 dump_stack();
927 return -EINVAL; 927 return -EINVAL;
928 } 928 }
@@ -938,14 +938,15 @@ static int check_lpt_crc(void *buf, int len)
938 * 938 *
939 * This function returns %0 on success and a negative error code on failure. 939 * This function returns %0 on success and a negative error code on failure.
940 */ 940 */
941static int check_lpt_type(uint8_t **addr, int *pos, int type) 941static int check_lpt_type(const struct ubifs_info *c, uint8_t **addr,
942 int *pos, int type)
942{ 943{
943 int node_type; 944 int node_type;
944 945
945 node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); 946 node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
946 if (node_type != type) { 947 if (node_type != type) {
947 ubifs_err("invalid type (%d) in LPT node type %d", node_type, 948 ubifs_err(c, "invalid type (%d) in LPT node type %d",
948 type); 949 node_type, type);
949 dump_stack(); 950 dump_stack();
950 return -EINVAL; 951 return -EINVAL;
951 } 952 }
@@ -966,7 +967,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
966 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 967 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
967 int i, pos = 0, err; 968 int i, pos = 0, err;
968 969
969 err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); 970 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_PNODE);
970 if (err) 971 if (err)
971 return err; 972 return err;
972 if (c->big_lpt) 973 if (c->big_lpt)
@@ -985,7 +986,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
985 lprops->flags = 0; 986 lprops->flags = 0;
986 lprops->flags |= ubifs_categorize_lprops(c, lprops); 987 lprops->flags |= ubifs_categorize_lprops(c, lprops);
987 } 988 }
988 err = check_lpt_crc(buf, c->pnode_sz); 989 err = check_lpt_crc(c, buf, c->pnode_sz);
989 return err; 990 return err;
990} 991}
991 992
@@ -1003,7 +1004,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1003 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1004 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1004 int i, pos = 0, err; 1005 int i, pos = 0, err;
1005 1006
1006 err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); 1007 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_NNODE);
1007 if (err) 1008 if (err)
1008 return err; 1009 return err;
1009 if (c->big_lpt) 1010 if (c->big_lpt)
@@ -1019,7 +1020,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
1019 nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, 1020 nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
1020 c->lpt_offs_bits); 1021 c->lpt_offs_bits);
1021 } 1022 }
1022 err = check_lpt_crc(buf, c->nnode_sz); 1023 err = check_lpt_crc(c, buf, c->nnode_sz);
1023 return err; 1024 return err;
1024} 1025}
1025 1026
@@ -1035,7 +1036,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
1035 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1036 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1036 int i, pos = 0, err; 1037 int i, pos = 0, err;
1037 1038
1038 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); 1039 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LTAB);
1039 if (err) 1040 if (err)
1040 return err; 1041 return err;
1041 for (i = 0; i < c->lpt_lebs; i++) { 1042 for (i = 0; i < c->lpt_lebs; i++) {
@@ -1051,7 +1052,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
1051 c->ltab[i].tgc = 0; 1052 c->ltab[i].tgc = 0;
1052 c->ltab[i].cmt = 0; 1053 c->ltab[i].cmt = 0;
1053 } 1054 }
1054 err = check_lpt_crc(buf, c->ltab_sz); 1055 err = check_lpt_crc(c, buf, c->ltab_sz);
1055 return err; 1056 return err;
1056} 1057}
1057 1058
@@ -1067,7 +1068,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
1067 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; 1068 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1068 int i, pos = 0, err; 1069 int i, pos = 0, err;
1069 1070
1070 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE); 1071 err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LSAVE);
1071 if (err) 1072 if (err)
1072 return err; 1073 return err;
1073 for (i = 0; i < c->lsave_cnt; i++) { 1074 for (i = 0; i < c->lsave_cnt; i++) {
@@ -1077,7 +1078,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
1077 return -EINVAL; 1078 return -EINVAL;
1078 c->lsave[i] = lnum; 1079 c->lsave[i] = lnum;
1079 } 1080 }
1080 err = check_lpt_crc(buf, c->lsave_sz); 1081 err = check_lpt_crc(c, buf, c->lsave_sz);
1081 return err; 1082 return err;
1082} 1083}
1083 1084
@@ -1243,7 +1244,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1243 return 0; 1244 return 0;
1244 1245
1245out: 1246out:
1246 ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); 1247 ubifs_err(c, "error %d reading nnode at %d:%d", err, lnum, offs);
1247 dump_stack(); 1248 dump_stack();
1248 kfree(nnode); 1249 kfree(nnode);
1249 return err; 1250 return err;
@@ -1308,10 +1309,10 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1308 return 0; 1309 return 0;
1309 1310
1310out: 1311out:
1311 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); 1312 ubifs_err(c, "error %d reading pnode at %d:%d", err, lnum, offs);
1312 ubifs_dump_pnode(c, pnode, parent, iip); 1313 ubifs_dump_pnode(c, pnode, parent, iip);
1313 dump_stack(); 1314 dump_stack();
1314 ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); 1315 ubifs_err(c, "calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
1315 kfree(pnode); 1316 kfree(pnode);
1316 return err; 1317 return err;
1317} 1318}
@@ -2095,7 +2096,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2095 int i; 2096 int i;
2096 2097
2097 if (pnode->num != col) { 2098 if (pnode->num != col) {
2098 ubifs_err("pnode num %d expected %d parent num %d iip %d", 2099 ubifs_err(c, "pnode num %d expected %d parent num %d iip %d",
2099 pnode->num, col, pnode->parent->num, pnode->iip); 2100 pnode->num, col, pnode->parent->num, pnode->iip);
2100 return -EINVAL; 2101 return -EINVAL;
2101 } 2102 }
@@ -2110,13 +2111,13 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2110 if (lnum >= c->leb_cnt) 2111 if (lnum >= c->leb_cnt)
2111 continue; 2112 continue;
2112 if (lprops->lnum != lnum) { 2113 if (lprops->lnum != lnum) {
2113 ubifs_err("bad LEB number %d expected %d", 2114 ubifs_err(c, "bad LEB number %d expected %d",
2114 lprops->lnum, lnum); 2115 lprops->lnum, lnum);
2115 return -EINVAL; 2116 return -EINVAL;
2116 } 2117 }
2117 if (lprops->flags & LPROPS_TAKEN) { 2118 if (lprops->flags & LPROPS_TAKEN) {
2118 if (cat != LPROPS_UNCAT) { 2119 if (cat != LPROPS_UNCAT) {
2119 ubifs_err("LEB %d taken but not uncat %d", 2120 ubifs_err(c, "LEB %d taken but not uncat %d",
2120 lprops->lnum, cat); 2121 lprops->lnum, cat);
2121 return -EINVAL; 2122 return -EINVAL;
2122 } 2123 }
@@ -2129,7 +2130,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2129 case LPROPS_FRDI_IDX: 2130 case LPROPS_FRDI_IDX:
2130 break; 2131 break;
2131 default: 2132 default:
2132 ubifs_err("LEB %d index but cat %d", 2133 ubifs_err(c, "LEB %d index but cat %d",
2133 lprops->lnum, cat); 2134 lprops->lnum, cat);
2134 return -EINVAL; 2135 return -EINVAL;
2135 } 2136 }
@@ -2142,7 +2143,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2142 case LPROPS_FREEABLE: 2143 case LPROPS_FREEABLE:
2143 break; 2144 break;
2144 default: 2145 default:
2145 ubifs_err("LEB %d not index but cat %d", 2146 ubifs_err(c, "LEB %d not index but cat %d",
2146 lprops->lnum, cat); 2147 lprops->lnum, cat);
2147 return -EINVAL; 2148 return -EINVAL;
2148 } 2149 }
@@ -2183,14 +2184,14 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2183 break; 2184 break;
2184 } 2185 }
2185 if (!found) { 2186 if (!found) {
2186 ubifs_err("LEB %d cat %d not found in cat heap/list", 2187 ubifs_err(c, "LEB %d cat %d not found in cat heap/list",
2187 lprops->lnum, cat); 2188 lprops->lnum, cat);
2188 return -EINVAL; 2189 return -EINVAL;
2189 } 2190 }
2190 switch (cat) { 2191 switch (cat) {
2191 case LPROPS_EMPTY: 2192 case LPROPS_EMPTY:
2192 if (lprops->free != c->leb_size) { 2193 if (lprops->free != c->leb_size) {
2193 ubifs_err("LEB %d cat %d free %d dirty %d", 2194 ubifs_err(c, "LEB %d cat %d free %d dirty %d",
2194 lprops->lnum, cat, lprops->free, 2195 lprops->lnum, cat, lprops->free,
2195 lprops->dirty); 2196 lprops->dirty);
2196 return -EINVAL; 2197 return -EINVAL;
@@ -2199,7 +2200,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2199 case LPROPS_FREEABLE: 2200 case LPROPS_FREEABLE:
2200 case LPROPS_FRDI_IDX: 2201 case LPROPS_FRDI_IDX:
2201 if (lprops->free + lprops->dirty != c->leb_size) { 2202 if (lprops->free + lprops->dirty != c->leb_size) {
2202 ubifs_err("LEB %d cat %d free %d dirty %d", 2203 ubifs_err(c, "LEB %d cat %d free %d dirty %d",
2203 lprops->lnum, cat, lprops->free, 2204 lprops->lnum, cat, lprops->free,
2204 lprops->dirty); 2205 lprops->dirty);
2205 return -EINVAL; 2206 return -EINVAL;
@@ -2236,7 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
2236 /* cnode is a nnode */ 2237 /* cnode is a nnode */
2237 num = calc_nnode_num(row, col); 2238 num = calc_nnode_num(row, col);
2238 if (cnode->num != num) { 2239 if (cnode->num != num) {
2239 ubifs_err("nnode num %d expected %d parent num %d iip %d", 2240 ubifs_err(c, "nnode num %d expected %d parent num %d iip %d",
2240 cnode->num, num, 2241 cnode->num, num,
2241 (nnode ? nnode->num : 0), cnode->iip); 2242 (nnode ? nnode->num : 0), cnode->iip);
2242 return -EINVAL; 2243 return -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index d9c02928e992..ce89bdc3eb02 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -319,7 +319,7 @@ static int layout_cnodes(struct ubifs_info *c)
319 return 0; 319 return 0;
320 320
321no_space: 321no_space:
322 ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", 322 ubifs_err(c, "LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
323 lnum, offs, len, done_ltab, done_lsave); 323 lnum, offs, len, done_ltab, done_lsave);
324 ubifs_dump_lpt_info(c); 324 ubifs_dump_lpt_info(c);
325 ubifs_dump_lpt_lebs(c); 325 ubifs_dump_lpt_lebs(c);
@@ -543,7 +543,7 @@ static int write_cnodes(struct ubifs_info *c)
543 return 0; 543 return 0;
544 544
545no_space: 545no_space:
546 ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d", 546 ubifs_err(c, "LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
547 lnum, offs, len, done_ltab, done_lsave); 547 lnum, offs, len, done_ltab, done_lsave);
548 ubifs_dump_lpt_info(c); 548 ubifs_dump_lpt_info(c);
549 ubifs_dump_lpt_lebs(c); 549 ubifs_dump_lpt_lebs(c);
@@ -1638,7 +1638,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1638 1638
1639 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1639 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1640 if (!buf) { 1640 if (!buf) {
1641 ubifs_err("cannot allocate memory for ltab checking"); 1641 ubifs_err(c, "cannot allocate memory for ltab checking");
1642 return 0; 1642 return 0;
1643 } 1643 }
1644 1644
@@ -1660,18 +1660,18 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1660 continue; 1660 continue;
1661 } 1661 }
1662 if (!dbg_is_all_ff(p, len)) { 1662 if (!dbg_is_all_ff(p, len)) {
1663 ubifs_err("invalid empty space in LEB %d at %d", 1663 ubifs_err(c, "invalid empty space in LEB %d at %d",
1664 lnum, c->leb_size - len); 1664 lnum, c->leb_size - len);
1665 err = -EINVAL; 1665 err = -EINVAL;
1666 } 1666 }
1667 i = lnum - c->lpt_first; 1667 i = lnum - c->lpt_first;
1668 if (len != c->ltab[i].free) { 1668 if (len != c->ltab[i].free) {
1669 ubifs_err("invalid free space in LEB %d (free %d, expected %d)", 1669 ubifs_err(c, "invalid free space in LEB %d (free %d, expected %d)",
1670 lnum, len, c->ltab[i].free); 1670 lnum, len, c->ltab[i].free);
1671 err = -EINVAL; 1671 err = -EINVAL;
1672 } 1672 }
1673 if (dirty != c->ltab[i].dirty) { 1673 if (dirty != c->ltab[i].dirty) {
1674 ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)", 1674 ubifs_err(c, "invalid dirty space in LEB %d (dirty %d, expected %d)",
1675 lnum, dirty, c->ltab[i].dirty); 1675 lnum, dirty, c->ltab[i].dirty);
1676 err = -EINVAL; 1676 err = -EINVAL;
1677 } 1677 }
@@ -1725,7 +1725,7 @@ int dbg_check_ltab(struct ubifs_info *c)
1725 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { 1725 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
1726 err = dbg_check_ltab_lnum(c, lnum); 1726 err = dbg_check_ltab_lnum(c, lnum);
1727 if (err) { 1727 if (err) {
1728 ubifs_err("failed at LEB %d", lnum); 1728 ubifs_err(c, "failed at LEB %d", lnum);
1729 return err; 1729 return err;
1730 } 1730 }
1731 } 1731 }
@@ -1757,7 +1757,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1757 free += c->leb_size; 1757 free += c->leb_size;
1758 } 1758 }
1759 if (free < c->lpt_sz) { 1759 if (free < c->lpt_sz) {
1760 ubifs_err("LPT space error: free %lld lpt_sz %lld", 1760 ubifs_err(c, "LPT space error: free %lld lpt_sz %lld",
1761 free, c->lpt_sz); 1761 free, c->lpt_sz);
1762 ubifs_dump_lpt_info(c); 1762 ubifs_dump_lpt_info(c);
1763 ubifs_dump_lpt_lebs(c); 1763 ubifs_dump_lpt_lebs(c);
@@ -1797,12 +1797,12 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1797 d->chk_lpt_lebs = 0; 1797 d->chk_lpt_lebs = 0;
1798 d->chk_lpt_wastage = 0; 1798 d->chk_lpt_wastage = 0;
1799 if (c->dirty_pn_cnt > c->pnode_cnt) { 1799 if (c->dirty_pn_cnt > c->pnode_cnt) {
1800 ubifs_err("dirty pnodes %d exceed max %d", 1800 ubifs_err(c, "dirty pnodes %d exceed max %d",
1801 c->dirty_pn_cnt, c->pnode_cnt); 1801 c->dirty_pn_cnt, c->pnode_cnt);
1802 err = -EINVAL; 1802 err = -EINVAL;
1803 } 1803 }
1804 if (c->dirty_nn_cnt > c->nnode_cnt) { 1804 if (c->dirty_nn_cnt > c->nnode_cnt) {
1805 ubifs_err("dirty nnodes %d exceed max %d", 1805 ubifs_err(c, "dirty nnodes %d exceed max %d",
1806 c->dirty_nn_cnt, c->nnode_cnt); 1806 c->dirty_nn_cnt, c->nnode_cnt);
1807 err = -EINVAL; 1807 err = -EINVAL;
1808 } 1808 }
@@ -1820,22 +1820,22 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1820 chk_lpt_sz *= d->chk_lpt_lebs; 1820 chk_lpt_sz *= d->chk_lpt_lebs;
1821 chk_lpt_sz += len - c->nhead_offs; 1821 chk_lpt_sz += len - c->nhead_offs;
1822 if (d->chk_lpt_sz != chk_lpt_sz) { 1822 if (d->chk_lpt_sz != chk_lpt_sz) {
1823 ubifs_err("LPT wrote %lld but space used was %lld", 1823 ubifs_err(c, "LPT wrote %lld but space used was %lld",
1824 d->chk_lpt_sz, chk_lpt_sz); 1824 d->chk_lpt_sz, chk_lpt_sz);
1825 err = -EINVAL; 1825 err = -EINVAL;
1826 } 1826 }
1827 if (d->chk_lpt_sz > c->lpt_sz) { 1827 if (d->chk_lpt_sz > c->lpt_sz) {
1828 ubifs_err("LPT wrote %lld but lpt_sz is %lld", 1828 ubifs_err(c, "LPT wrote %lld but lpt_sz is %lld",
1829 d->chk_lpt_sz, c->lpt_sz); 1829 d->chk_lpt_sz, c->lpt_sz);
1830 err = -EINVAL; 1830 err = -EINVAL;
1831 } 1831 }
1832 if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { 1832 if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
1833 ubifs_err("LPT layout size %lld but wrote %lld", 1833 ubifs_err(c, "LPT layout size %lld but wrote %lld",
1834 d->chk_lpt_sz, d->chk_lpt_sz2); 1834 d->chk_lpt_sz, d->chk_lpt_sz2);
1835 err = -EINVAL; 1835 err = -EINVAL;
1836 } 1836 }
1837 if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { 1837 if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
1838 ubifs_err("LPT new nhead offs: expected %d was %d", 1838 ubifs_err(c, "LPT new nhead offs: expected %d was %d",
1839 d->new_nhead_offs, len); 1839 d->new_nhead_offs, len);
1840 err = -EINVAL; 1840 err = -EINVAL;
1841 } 1841 }
@@ -1845,7 +1845,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1845 if (c->big_lpt) 1845 if (c->big_lpt)
1846 lpt_sz += c->lsave_sz; 1846 lpt_sz += c->lsave_sz;
1847 if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { 1847 if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
1848 ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", 1848 ubifs_err(c, "LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
1849 d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); 1849 d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
1850 err = -EINVAL; 1850 err = -EINVAL;
1851 } 1851 }
@@ -1887,7 +1887,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1887 pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); 1887 pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
1888 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1888 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1889 if (!buf) { 1889 if (!buf) {
1890 ubifs_err("cannot allocate memory to dump LPT"); 1890 ubifs_err(c, "cannot allocate memory to dump LPT");
1891 return; 1891 return;
1892 } 1892 }
1893 1893
@@ -1962,7 +1962,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1962 pr_err("LEB %d:%d, lsave len\n", lnum, offs); 1962 pr_err("LEB %d:%d, lsave len\n", lnum, offs);
1963 break; 1963 break;
1964 default: 1964 default:
1965 ubifs_err("LPT node type %d not recognized", node_type); 1965 ubifs_err(c, "LPT node type %d not recognized", node_type);
1966 goto out; 1966 goto out;
1967 } 1967 }
1968 1968
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 1a4bb9e8b3b8..c6a5e39e2ba5 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -82,7 +82,7 @@ out:
82 return -EUCLEAN; 82 return -EUCLEAN;
83 83
84out_dump: 84out_dump:
85 ubifs_err("unexpected node type %d master LEB %d:%d", 85 ubifs_err(c, "unexpected node type %d master LEB %d:%d",
86 snod->type, lnum, snod->offs); 86 snod->type, lnum, snod->offs);
87 ubifs_scan_destroy(sleb); 87 ubifs_scan_destroy(sleb);
88 return -EINVAL; 88 return -EINVAL;
@@ -240,7 +240,7 @@ static int validate_master(const struct ubifs_info *c)
240 return 0; 240 return 0;
241 241
242out: 242out:
243 ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); 243 ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err);
244 ubifs_dump_node(c, c->mst_node); 244 ubifs_dump_node(c, c->mst_node);
245 return -EINVAL; 245 return -EINVAL;
246} 246}
@@ -316,7 +316,7 @@ int ubifs_read_master(struct ubifs_info *c)
316 316
317 if (c->leb_cnt < old_leb_cnt || 317 if (c->leb_cnt < old_leb_cnt ||
318 c->leb_cnt < UBIFS_MIN_LEB_CNT) { 318 c->leb_cnt < UBIFS_MIN_LEB_CNT) {
319 ubifs_err("bad leb_cnt on master node"); 319 ubifs_err(c, "bad leb_cnt on master node");
320 ubifs_dump_node(c, c->mst_node); 320 ubifs_dump_node(c, c->mst_node);
321 return -EINVAL; 321 return -EINVAL;
322 } 322 }
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 4409f486ecef..caf2d123e9ee 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -88,7 +88,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
88 else if (inum > o->inum) 88 else if (inum > o->inum)
89 p = &(*p)->rb_right; 89 p = &(*p)->rb_right;
90 else { 90 else {
91 ubifs_err("orphaned twice"); 91 ubifs_err(c, "orphaned twice");
92 spin_unlock(&c->orphan_lock); 92 spin_unlock(&c->orphan_lock);
93 kfree(orphan); 93 kfree(orphan);
94 return 0; 94 return 0;
@@ -155,7 +155,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
155 } 155 }
156 } 156 }
157 spin_unlock(&c->orphan_lock); 157 spin_unlock(&c->orphan_lock);
158 ubifs_err("missing orphan ino %lu", (unsigned long)inum); 158 ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
159 dump_stack(); 159 dump_stack();
160} 160}
161 161
@@ -287,7 +287,7 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
287 * We limit the number of orphans so that this should 287 * We limit the number of orphans so that this should
288 * never happen. 288 * never happen.
289 */ 289 */
290 ubifs_err("out of space in orphan area"); 290 ubifs_err(c, "out of space in orphan area");
291 return -EINVAL; 291 return -EINVAL;
292 } 292 }
293 } 293 }
@@ -397,7 +397,7 @@ static int consolidate(struct ubifs_info *c)
397 * We limit the number of orphans so that this should 397 * We limit the number of orphans so that this should
398 * never happen. 398 * never happen.
399 */ 399 */
400 ubifs_err("out of space in orphan area"); 400 ubifs_err(c, "out of space in orphan area");
401 err = -EINVAL; 401 err = -EINVAL;
402 } 402 }
403 spin_unlock(&c->orphan_lock); 403 spin_unlock(&c->orphan_lock);
@@ -569,7 +569,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
569 569
570 list_for_each_entry(snod, &sleb->nodes, list) { 570 list_for_each_entry(snod, &sleb->nodes, list) {
571 if (snod->type != UBIFS_ORPH_NODE) { 571 if (snod->type != UBIFS_ORPH_NODE) {
572 ubifs_err("invalid node type %d in orphan area at %d:%d", 572 ubifs_err(c, "invalid node type %d in orphan area at %d:%d",
573 snod->type, sleb->lnum, snod->offs); 573 snod->type, sleb->lnum, snod->offs);
574 ubifs_dump_node(c, snod->node); 574 ubifs_dump_node(c, snod->node);
575 return -EINVAL; 575 return -EINVAL;
@@ -596,7 +596,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
596 * number. That makes this orphan node, out of date. 596 * number. That makes this orphan node, out of date.
597 */ 597 */
598 if (!first) { 598 if (!first) {
599 ubifs_err("out of order commit number %llu in orphan node at %d:%d", 599 ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d",
600 cmt_no, sleb->lnum, snod->offs); 600 cmt_no, sleb->lnum, snod->offs);
601 ubifs_dump_node(c, snod->node); 601 ubifs_dump_node(c, snod->node);
602 return -EINVAL; 602 return -EINVAL;
@@ -831,20 +831,20 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
831 if (inum != ci->last_ino) { 831 if (inum != ci->last_ino) {
832 /* Lowest node type is the inode node, so it comes first */ 832 /* Lowest node type is the inode node, so it comes first */
833 if (key_type(c, &zbr->key) != UBIFS_INO_KEY) 833 if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
834 ubifs_err("found orphan node ino %lu, type %d", 834 ubifs_err(c, "found orphan node ino %lu, type %d",
835 (unsigned long)inum, key_type(c, &zbr->key)); 835 (unsigned long)inum, key_type(c, &zbr->key));
836 ci->last_ino = inum; 836 ci->last_ino = inum;
837 ci->tot_inos += 1; 837 ci->tot_inos += 1;
838 err = ubifs_tnc_read_node(c, zbr, ci->node); 838 err = ubifs_tnc_read_node(c, zbr, ci->node);
839 if (err) { 839 if (err) {
840 ubifs_err("node read failed, error %d", err); 840 ubifs_err(c, "node read failed, error %d", err);
841 return err; 841 return err;
842 } 842 }
843 if (ci->node->nlink == 0) 843 if (ci->node->nlink == 0)
844 /* Must be recorded as an orphan */ 844 /* Must be recorded as an orphan */
845 if (!dbg_find_check_orphan(&ci->root, inum) && 845 if (!dbg_find_check_orphan(&ci->root, inum) &&
846 !dbg_find_orphan(c, inum)) { 846 !dbg_find_orphan(c, inum)) {
847 ubifs_err("missing orphan, ino %lu", 847 ubifs_err(c, "missing orphan, ino %lu",
848 (unsigned long)inum); 848 (unsigned long)inum);
849 ci->missing += 1; 849 ci->missing += 1;
850 } 850 }
@@ -887,7 +887,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
887 887
888 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 888 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
889 if (!buf) { 889 if (!buf) {
890 ubifs_err("cannot allocate memory to check orphans"); 890 ubifs_err(c, "cannot allocate memory to check orphans");
891 return 0; 891 return 0;
892 } 892 }
893 893
@@ -925,7 +925,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
925 ci.root = RB_ROOT; 925 ci.root = RB_ROOT;
926 ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); 926 ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
927 if (!ci.node) { 927 if (!ci.node) {
928 ubifs_err("out of memory"); 928 ubifs_err(c, "out of memory");
929 return -ENOMEM; 929 return -ENOMEM;
930 } 930 }
931 931
@@ -935,12 +935,12 @@ static int dbg_check_orphans(struct ubifs_info *c)
935 935
936 err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci); 936 err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
937 if (err) { 937 if (err) {
938 ubifs_err("cannot scan TNC, error %d", err); 938 ubifs_err(c, "cannot scan TNC, error %d", err);
939 goto out; 939 goto out;
940 } 940 }
941 941
942 if (ci.missing) { 942 if (ci.missing) {
943 ubifs_err("%lu missing orphan(s)", ci.missing); 943 ubifs_err(c, "%lu missing orphan(s)", ci.missing);
944 err = -EINVAL; 944 err = -EINVAL;
945 goto out; 945 goto out;
946 } 946 }
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c640938f62f0..695fc71d5244 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -305,7 +305,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
305 mst = mst2; 305 mst = mst2;
306 } 306 }
307 307
308 ubifs_msg("recovered master node from LEB %d", 308 ubifs_msg(c, "recovered master node from LEB %d",
309 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); 309 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
310 310
311 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); 311 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
@@ -360,13 +360,13 @@ int ubifs_recover_master_node(struct ubifs_info *c)
360out_err: 360out_err:
361 err = -EINVAL; 361 err = -EINVAL;
362out_free: 362out_free:
363 ubifs_err("failed to recover master node"); 363 ubifs_err(c, "failed to recover master node");
364 if (mst1) { 364 if (mst1) {
365 ubifs_err("dumping first master node"); 365 ubifs_err(c, "dumping first master node");
366 ubifs_dump_node(c, mst1); 366 ubifs_dump_node(c, mst1);
367 } 367 }
368 if (mst2) { 368 if (mst2) {
369 ubifs_err("dumping second master node"); 369 ubifs_err(c, "dumping second master node");
370 ubifs_dump_node(c, mst2); 370 ubifs_dump_node(c, mst2);
371 } 371 }
372 vfree(buf2); 372 vfree(buf2);
@@ -682,7 +682,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
682 ret, lnum, offs); 682 ret, lnum, offs);
683 break; 683 break;
684 } else { 684 } else {
685 ubifs_err("unexpected return value %d", ret); 685 ubifs_err(c, "unexpected return value %d", ret);
686 err = -EINVAL; 686 err = -EINVAL;
687 goto error; 687 goto error;
688 } 688 }
@@ -702,7 +702,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
702 * See header comment for this file for more 702 * See header comment for this file for more
703 * explanations about the reasons we have this check. 703 * explanations about the reasons we have this check.
704 */ 704 */
705 ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d", 705 ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d",
706 lnum, offs, corruption); 706 lnum, offs, corruption);
707 /* Make sure we dump interesting non-0xFF data */ 707 /* Make sure we dump interesting non-0xFF data */
708 offs += corruption; 708 offs += corruption;
@@ -788,13 +788,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
788 788
789corrupted_rescan: 789corrupted_rescan:
790 /* Re-scan the corrupted data with verbose messages */ 790 /* Re-scan the corrupted data with verbose messages */
791 ubifs_err("corruption %d", ret); 791 ubifs_err(c, "corruption %d", ret);
792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1); 792 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
793corrupted: 793corrupted:
794 ubifs_scanned_corruption(c, lnum, offs, buf); 794 ubifs_scanned_corruption(c, lnum, offs, buf);
795 err = -EUCLEAN; 795 err = -EUCLEAN;
796error: 796error:
797 ubifs_err("LEB %d scanning failed", lnum); 797 ubifs_err(c, "LEB %d scanning failed", lnum);
798 ubifs_scan_destroy(sleb); 798 ubifs_scan_destroy(sleb);
799 return ERR_PTR(err); 799 return ERR_PTR(err);
800} 800}
@@ -826,15 +826,15 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
826 goto out_free; 826 goto out_free;
827 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); 827 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
828 if (ret != SCANNED_A_NODE) { 828 if (ret != SCANNED_A_NODE) {
829 ubifs_err("Not a valid node"); 829 ubifs_err(c, "Not a valid node");
830 goto out_err; 830 goto out_err;
831 } 831 }
832 if (cs_node->ch.node_type != UBIFS_CS_NODE) { 832 if (cs_node->ch.node_type != UBIFS_CS_NODE) {
833 ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type); 833 ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type);
834 goto out_err; 834 goto out_err;
835 } 835 }
836 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { 836 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
837 ubifs_err("CS node cmt_no %llu != current cmt_no %llu", 837 ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu",
838 (unsigned long long)le64_to_cpu(cs_node->cmt_no), 838 (unsigned long long)le64_to_cpu(cs_node->cmt_no),
839 c->cmt_no); 839 c->cmt_no);
840 goto out_err; 840 goto out_err;
@@ -847,7 +847,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
847out_err: 847out_err:
848 err = -EINVAL; 848 err = -EINVAL;
849out_free: 849out_free:
850 ubifs_err("failed to get CS sqnum"); 850 ubifs_err(c, "failed to get CS sqnum");
851 kfree(cs_node); 851 kfree(cs_node);
852 return err; 852 return err;
853} 853}
@@ -899,7 +899,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
899 } 899 }
900 } 900 }
901 if (snod->sqnum > cs_sqnum) { 901 if (snod->sqnum > cs_sqnum) {
902 ubifs_err("unrecoverable log corruption in LEB %d", 902 ubifs_err(c, "unrecoverable log corruption in LEB %d",
903 lnum); 903 lnum);
904 ubifs_scan_destroy(sleb); 904 ubifs_scan_destroy(sleb);
905 return ERR_PTR(-EUCLEAN); 905 return ERR_PTR(-EUCLEAN);
@@ -975,11 +975,8 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
975 return err; 975 return err;
976 976
977 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); 977 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
978 err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
979 if (err)
980 return err;
981 978
982 return 0; 979 return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
983} 980}
984 981
985/** 982/**
@@ -1004,10 +1001,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
1004 1001
1005 if (len == 0) { 1002 if (len == 0) {
1006 /* Nothing to read, just unmap it */ 1003 /* Nothing to read, just unmap it */
1007 err = ubifs_leb_unmap(c, lnum); 1004 return ubifs_leb_unmap(c, lnum);
1008 if (err)
1009 return err;
1010 return 0;
1011 } 1005 }
1012 1006
1013 err = ubifs_leb_read(c, lnum, buf, offs, len, 0); 1007 err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
@@ -1043,7 +1037,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
1043 } 1037 }
1044 1038
1045 if (ret == SCANNED_EMPTY_SPACE) { 1039 if (ret == SCANNED_EMPTY_SPACE) {
1046 ubifs_err("unexpected empty space at %d:%d", 1040 ubifs_err(c, "unexpected empty space at %d:%d",
1047 lnum, offs); 1041 lnum, offs);
1048 return -EUCLEAN; 1042 return -EUCLEAN;
1049 } 1043 }
@@ -1137,7 +1131,7 @@ static int grab_empty_leb(struct ubifs_info *c)
1137 */ 1131 */
1138 lnum = ubifs_find_free_leb_for_idx(c); 1132 lnum = ubifs_find_free_leb_for_idx(c);
1139 if (lnum < 0) { 1133 if (lnum < 0) {
1140 ubifs_err("could not find an empty LEB"); 1134 ubifs_err(c, "could not find an empty LEB");
1141 ubifs_dump_lprops(c); 1135 ubifs_dump_lprops(c);
1142 ubifs_dump_budg(c, &c->bi); 1136 ubifs_dump_budg(c, &c->bi);
1143 return lnum; 1137 return lnum;
@@ -1217,7 +1211,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1217 } 1211 }
1218 mutex_unlock(&wbuf->io_mutex); 1212 mutex_unlock(&wbuf->io_mutex);
1219 if (err < 0) { 1213 if (err < 0) {
1220 ubifs_err("GC failed, error %d", err); 1214 ubifs_err(c, "GC failed, error %d", err);
1221 if (err == -EAGAIN) 1215 if (err == -EAGAIN)
1222 err = -EINVAL; 1216 err = -EINVAL;
1223 return err; 1217 return err;
@@ -1464,7 +1458,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1464 return 0; 1458 return 0;
1465 1459
1466out: 1460out:
1467 ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", 1461 ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d",
1468 (unsigned long)e->inum, e->i_size, e->d_size, err); 1462 (unsigned long)e->inum, e->i_size, e->d_size, err);
1469 return err; 1463 return err;
1470} 1464}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 9b40a1c5e160..3ca4540130b5 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -458,13 +458,13 @@ int ubifs_validate_entry(struct ubifs_info *c,
458 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 458 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
459 strnlen(dent->name, nlen) != nlen || 459 strnlen(dent->name, nlen) != nlen ||
460 le64_to_cpu(dent->inum) > MAX_INUM) { 460 le64_to_cpu(dent->inum) > MAX_INUM) {
461 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 461 ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ?
462 "directory entry" : "extended attribute entry"); 462 "directory entry" : "extended attribute entry");
463 return -EINVAL; 463 return -EINVAL;
464 } 464 }
465 465
466 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 466 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
467 ubifs_err("bad key type %d", key_type); 467 ubifs_err(c, "bad key type %d", key_type);
468 return -EINVAL; 468 return -EINVAL;
469 } 469 }
470 470
@@ -589,7 +589,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
589 cond_resched(); 589 cond_resched();
590 590
591 if (snod->sqnum >= SQNUM_WATERMARK) { 591 if (snod->sqnum >= SQNUM_WATERMARK) {
592 ubifs_err("file system's life ended"); 592 ubifs_err(c, "file system's life ended");
593 goto out_dump; 593 goto out_dump;
594 } 594 }
595 595
@@ -647,7 +647,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
647 if (old_size < 0 || old_size > c->max_inode_sz || 647 if (old_size < 0 || old_size > c->max_inode_sz ||
648 new_size < 0 || new_size > c->max_inode_sz || 648 new_size < 0 || new_size > c->max_inode_sz ||
649 old_size <= new_size) { 649 old_size <= new_size) {
650 ubifs_err("bad truncation node"); 650 ubifs_err(c, "bad truncation node");
651 goto out_dump; 651 goto out_dump;
652 } 652 }
653 653
@@ -662,7 +662,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
662 break; 662 break;
663 } 663 }
664 default: 664 default:
665 ubifs_err("unexpected node type %d in bud LEB %d:%d", 665 ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
666 snod->type, lnum, snod->offs); 666 snod->type, lnum, snod->offs);
667 err = -EINVAL; 667 err = -EINVAL;
668 goto out_dump; 668 goto out_dump;
@@ -685,7 +685,7 @@ out:
685 return err; 685 return err;
686 686
687out_dump: 687out_dump:
688 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 688 ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
689 ubifs_dump_node(c, snod->node); 689 ubifs_dump_node(c, snod->node);
690 ubifs_scan_destroy(sleb); 690 ubifs_scan_destroy(sleb);
691 return -EINVAL; 691 return -EINVAL;
@@ -805,7 +805,7 @@ static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
805 if (bud) { 805 if (bud) {
806 if (bud->jhead == jhead && bud->start <= offs) 806 if (bud->jhead == jhead && bud->start <= offs)
807 return 1; 807 return 1;
808 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 808 ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs);
809 return -EINVAL; 809 return -EINVAL;
810 } 810 }
811 811
@@ -861,12 +861,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
861 * numbers. 861 * numbers.
862 */ 862 */
863 if (snod->type != UBIFS_CS_NODE) { 863 if (snod->type != UBIFS_CS_NODE) {
864 ubifs_err("first log node at LEB %d:%d is not CS node", 864 ubifs_err(c, "first log node at LEB %d:%d is not CS node",
865 lnum, offs); 865 lnum, offs);
866 goto out_dump; 866 goto out_dump;
867 } 867 }
868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
869 ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu", 869 ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
870 lnum, offs, 870 lnum, offs,
871 (unsigned long long)le64_to_cpu(node->cmt_no), 871 (unsigned long long)le64_to_cpu(node->cmt_no),
872 c->cmt_no); 872 c->cmt_no);
@@ -891,7 +891,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
891 891
892 /* Make sure the first node sits at offset zero of the LEB */ 892 /* Make sure the first node sits at offset zero of the LEB */
893 if (snod->offs != 0) { 893 if (snod->offs != 0) {
894 ubifs_err("first node is not at zero offset"); 894 ubifs_err(c, "first node is not at zero offset");
895 goto out_dump; 895 goto out_dump;
896 } 896 }
897 897
@@ -899,12 +899,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
899 cond_resched(); 899 cond_resched();
900 900
901 if (snod->sqnum >= SQNUM_WATERMARK) { 901 if (snod->sqnum >= SQNUM_WATERMARK) {
902 ubifs_err("file system's life ended"); 902 ubifs_err(c, "file system's life ended");
903 goto out_dump; 903 goto out_dump;
904 } 904 }
905 905
906 if (snod->sqnum < c->cs_sqnum) { 906 if (snod->sqnum < c->cs_sqnum) {
907 ubifs_err("bad sqnum %llu, commit sqnum %llu", 907 ubifs_err(c, "bad sqnum %llu, commit sqnum %llu",
908 snod->sqnum, c->cs_sqnum); 908 snod->sqnum, c->cs_sqnum);
909 goto out_dump; 909 goto out_dump;
910 } 910 }
@@ -934,12 +934,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
934 case UBIFS_CS_NODE: 934 case UBIFS_CS_NODE:
935 /* Make sure it sits at the beginning of LEB */ 935 /* Make sure it sits at the beginning of LEB */
936 if (snod->offs != 0) { 936 if (snod->offs != 0) {
937 ubifs_err("unexpected node in log"); 937 ubifs_err(c, "unexpected node in log");
938 goto out_dump; 938 goto out_dump;
939 } 939 }
940 break; 940 break;
941 default: 941 default:
942 ubifs_err("unexpected node in log"); 942 ubifs_err(c, "unexpected node in log");
943 goto out_dump; 943 goto out_dump;
944 } 944 }
945 } 945 }
@@ -955,7 +955,7 @@ out:
955 return err; 955 return err;
956 956
957out_dump: 957out_dump:
958 ubifs_err("log error detected while replaying the log at LEB %d:%d", 958 ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
959 lnum, offs + snod->offs); 959 lnum, offs + snod->offs);
960 ubifs_dump_node(c, snod->node); 960 ubifs_dump_node(c, snod->node);
961 ubifs_scan_destroy(sleb); 961 ubifs_scan_destroy(sleb);
@@ -1017,7 +1017,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1017 return free; /* Error code */ 1017 return free; /* Error code */
1018 1018
1019 if (c->ihead_offs != c->leb_size - free) { 1019 if (c->ihead_offs != c->leb_size - free) {
1020 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1020 ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum,
1021 c->ihead_offs); 1021 c->ihead_offs);
1022 return -EINVAL; 1022 return -EINVAL;
1023 } 1023 }
@@ -1040,7 +1040,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1040 * someting went wrong and we cannot proceed mounting 1040 * someting went wrong and we cannot proceed mounting
1041 * the file-system. 1041 * the file-system.
1042 */ 1042 */
1043 ubifs_err("no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted", 1043 ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
1044 lnum, 0); 1044 lnum, 0);
1045 err = -EINVAL; 1045 err = -EINVAL;
1046 } 1046 }
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 79c6dbbc0e04..f4fbc7b6b794 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -335,7 +335,7 @@ static int create_default_filesystem(struct ubifs_info *c)
335 if (err) 335 if (err)
336 return err; 336 return err;
337 337
338 ubifs_msg("default file-system created"); 338 ubifs_msg(c, "default file-system created");
339 return 0; 339 return 0;
340} 340}
341 341
@@ -365,13 +365,13 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
365 } 365 }
366 366
367 if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { 367 if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
368 ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", 368 ubifs_err(c, "min. I/O unit mismatch: %d in superblock, %d real",
369 le32_to_cpu(sup->min_io_size), c->min_io_size); 369 le32_to_cpu(sup->min_io_size), c->min_io_size);
370 goto failed; 370 goto failed;
371 } 371 }
372 372
373 if (le32_to_cpu(sup->leb_size) != c->leb_size) { 373 if (le32_to_cpu(sup->leb_size) != c->leb_size) {
374 ubifs_err("LEB size mismatch: %d in superblock, %d real", 374 ubifs_err(c, "LEB size mismatch: %d in superblock, %d real",
375 le32_to_cpu(sup->leb_size), c->leb_size); 375 le32_to_cpu(sup->leb_size), c->leb_size);
376 goto failed; 376 goto failed;
377 } 377 }
@@ -393,33 +393,33 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
393 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; 393 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
394 394
395 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { 395 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
396 ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required", 396 ubifs_err(c, "bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
397 c->leb_cnt, c->vi.size, min_leb_cnt); 397 c->leb_cnt, c->vi.size, min_leb_cnt);
398 goto failed; 398 goto failed;
399 } 399 }
400 400
401 if (c->max_leb_cnt < c->leb_cnt) { 401 if (c->max_leb_cnt < c->leb_cnt) {
402 ubifs_err("max. LEB count %d less than LEB count %d", 402 ubifs_err(c, "max. LEB count %d less than LEB count %d",
403 c->max_leb_cnt, c->leb_cnt); 403 c->max_leb_cnt, c->leb_cnt);
404 goto failed; 404 goto failed;
405 } 405 }
406 406
407 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { 407 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
408 ubifs_err("too few main LEBs count %d, must be at least %d", 408 ubifs_err(c, "too few main LEBs count %d, must be at least %d",
409 c->main_lebs, UBIFS_MIN_MAIN_LEBS); 409 c->main_lebs, UBIFS_MIN_MAIN_LEBS);
410 goto failed; 410 goto failed;
411 } 411 }
412 412
413 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; 413 max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
414 if (c->max_bud_bytes < max_bytes) { 414 if (c->max_bud_bytes < max_bytes) {
415 ubifs_err("too small journal (%lld bytes), must be at least %lld bytes", 415 ubifs_err(c, "too small journal (%lld bytes), must be at least %lld bytes",
416 c->max_bud_bytes, max_bytes); 416 c->max_bud_bytes, max_bytes);
417 goto failed; 417 goto failed;
418 } 418 }
419 419
420 max_bytes = (long long)c->leb_size * c->main_lebs; 420 max_bytes = (long long)c->leb_size * c->main_lebs;
421 if (c->max_bud_bytes > max_bytes) { 421 if (c->max_bud_bytes > max_bytes) {
422 ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area", 422 ubifs_err(c, "too large journal size (%lld bytes), only %lld bytes available in the main area",
423 c->max_bud_bytes, max_bytes); 423 c->max_bud_bytes, max_bytes);
424 goto failed; 424 goto failed;
425 } 425 }
@@ -468,7 +468,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
468 return 0; 468 return 0;
469 469
470failed: 470failed:
471 ubifs_err("bad superblock, error %d", err); 471 ubifs_err(c, "bad superblock, error %d", err);
472 ubifs_dump_node(c, sup); 472 ubifs_dump_node(c, sup);
473 return -EINVAL; 473 return -EINVAL;
474} 474}
@@ -549,12 +549,12 @@ int ubifs_read_superblock(struct ubifs_info *c)
549 ubifs_assert(!c->ro_media || c->ro_mount); 549 ubifs_assert(!c->ro_media || c->ro_mount);
550 if (!c->ro_mount || 550 if (!c->ro_mount ||
551 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { 551 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
552 ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", 552 ubifs_err(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
553 c->fmt_version, c->ro_compat_version, 553 c->fmt_version, c->ro_compat_version,
554 UBIFS_FORMAT_VERSION, 554 UBIFS_FORMAT_VERSION,
555 UBIFS_RO_COMPAT_VERSION); 555 UBIFS_RO_COMPAT_VERSION);
556 if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { 556 if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
557 ubifs_msg("only R/O mounting is possible"); 557 ubifs_msg(c, "only R/O mounting is possible");
558 err = -EROFS; 558 err = -EROFS;
559 } else 559 } else
560 err = -EINVAL; 560 err = -EINVAL;
@@ -570,7 +570,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
570 } 570 }
571 571
572 if (c->fmt_version < 3) { 572 if (c->fmt_version < 3) {
573 ubifs_err("on-flash format version %d is not supported", 573 ubifs_err(c, "on-flash format version %d is not supported",
574 c->fmt_version); 574 c->fmt_version);
575 err = -EINVAL; 575 err = -EINVAL;
576 goto out; 576 goto out;
@@ -595,7 +595,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
595 c->key_len = UBIFS_SK_LEN; 595 c->key_len = UBIFS_SK_LEN;
596 break; 596 break;
597 default: 597 default:
598 ubifs_err("unsupported key format"); 598 ubifs_err(c, "unsupported key format");
599 err = -EINVAL; 599 err = -EINVAL;
600 goto out; 600 goto out;
601 } 601 }
@@ -785,7 +785,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
785 ubifs_assert(c->space_fixup); 785 ubifs_assert(c->space_fixup);
786 ubifs_assert(!c->ro_mount); 786 ubifs_assert(!c->ro_mount);
787 787
788 ubifs_msg("start fixing up free space"); 788 ubifs_msg(c, "start fixing up free space");
789 789
790 err = fixup_free_space(c); 790 err = fixup_free_space(c);
791 if (err) 791 if (err)
@@ -804,6 +804,6 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
804 if (err) 804 if (err)
805 return err; 805 return err;
806 806
807 ubifs_msg("free space fixup complete"); 807 ubifs_msg(c, "free space fixup complete");
808 return err; 808 return err;
809} 809}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 89adbc4d08ac..aab87340d3de 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -100,7 +100,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
100 if (pad_len < 0 || 100 if (pad_len < 0 ||
101 offs + node_len + pad_len > c->leb_size) { 101 offs + node_len + pad_len > c->leb_size) {
102 if (!quiet) { 102 if (!quiet) {
103 ubifs_err("bad pad node at LEB %d:%d", 103 ubifs_err(c, "bad pad node at LEB %d:%d",
104 lnum, offs); 104 lnum, offs);
105 ubifs_dump_node(c, pad); 105 ubifs_dump_node(c, pad);
106 } 106 }
@@ -110,7 +110,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
110 /* Make the node pads to 8-byte boundary */ 110 /* Make the node pads to 8-byte boundary */
111 if ((node_len + pad_len) & 7) { 111 if ((node_len + pad_len) & 7) {
112 if (!quiet) 112 if (!quiet)
113 ubifs_err("bad padding length %d - %d", 113 ubifs_err(c, "bad padding length %d - %d",
114 offs, offs + node_len + pad_len); 114 offs, offs + node_len + pad_len);
115 return SCANNED_A_BAD_PAD_NODE; 115 return SCANNED_A_BAD_PAD_NODE;
116 } 116 }
@@ -152,7 +152,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
152 152
153 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); 153 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
154 if (err && err != -EBADMSG) { 154 if (err && err != -EBADMSG) {
155 ubifs_err("cannot read %d bytes from LEB %d:%d, error %d", 155 ubifs_err(c, "cannot read %d bytes from LEB %d:%d, error %d",
156 c->leb_size - offs, lnum, offs, err); 156 c->leb_size - offs, lnum, offs, err);
157 kfree(sleb); 157 kfree(sleb);
158 return ERR_PTR(err); 158 return ERR_PTR(err);
@@ -240,11 +240,11 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
240{ 240{
241 int len; 241 int len;
242 242
243 ubifs_err("corruption at LEB %d:%d", lnum, offs); 243 ubifs_err(c, "corruption at LEB %d:%d", lnum, offs);
244 len = c->leb_size - offs; 244 len = c->leb_size - offs;
245 if (len > 8192) 245 if (len > 8192)
246 len = 8192; 246 len = 8192;
247 ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs); 247 ubifs_err(c, "first %d bytes from LEB %d:%d", len, lnum, offs);
248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); 248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
249} 249}
250 250
@@ -299,16 +299,16 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
299 299
300 switch (ret) { 300 switch (ret) {
301 case SCANNED_GARBAGE: 301 case SCANNED_GARBAGE:
302 ubifs_err("garbage"); 302 ubifs_err(c, "garbage");
303 goto corrupted; 303 goto corrupted;
304 case SCANNED_A_NODE: 304 case SCANNED_A_NODE:
305 break; 305 break;
306 case SCANNED_A_CORRUPT_NODE: 306 case SCANNED_A_CORRUPT_NODE:
307 case SCANNED_A_BAD_PAD_NODE: 307 case SCANNED_A_BAD_PAD_NODE:
308 ubifs_err("bad node"); 308 ubifs_err(c, "bad node");
309 goto corrupted; 309 goto corrupted;
310 default: 310 default:
311 ubifs_err("unknown"); 311 ubifs_err(c, "unknown");
312 err = -EINVAL; 312 err = -EINVAL;
313 goto error; 313 goto error;
314 } 314 }
@@ -325,7 +325,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
325 325
326 if (offs % c->min_io_size) { 326 if (offs % c->min_io_size) {
327 if (!quiet) 327 if (!quiet)
328 ubifs_err("empty space starts at non-aligned offset %d", 328 ubifs_err(c, "empty space starts at non-aligned offset %d",
329 offs); 329 offs);
330 goto corrupted; 330 goto corrupted;
331 } 331 }
@@ -338,7 +338,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
338 for (; len; offs++, buf++, len--) 338 for (; len; offs++, buf++, len--)
339 if (*(uint8_t *)buf != 0xff) { 339 if (*(uint8_t *)buf != 0xff) {
340 if (!quiet) 340 if (!quiet)
341 ubifs_err("corrupt empty space at LEB %d:%d", 341 ubifs_err(c, "corrupt empty space at LEB %d:%d",
342 lnum, offs); 342 lnum, offs);
343 goto corrupted; 343 goto corrupted;
344 } 344 }
@@ -348,14 +348,14 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
348corrupted: 348corrupted:
349 if (!quiet) { 349 if (!quiet) {
350 ubifs_scanned_corruption(c, lnum, offs, buf); 350 ubifs_scanned_corruption(c, lnum, offs, buf);
351 ubifs_err("LEB %d scanning failed", lnum); 351 ubifs_err(c, "LEB %d scanning failed", lnum);
352 } 352 }
353 err = -EUCLEAN; 353 err = -EUCLEAN;
354 ubifs_scan_destroy(sleb); 354 ubifs_scan_destroy(sleb);
355 return ERR_PTR(err); 355 return ERR_PTR(err);
356 356
357error: 357error:
358 ubifs_err("LEB %d scanning failed, error %d", lnum, err); 358 ubifs_err(c, "LEB %d scanning failed, error %d", lnum, err);
359 ubifs_scan_destroy(sleb); 359 ubifs_scan_destroy(sleb);
360 return ERR_PTR(err); 360 return ERR_PTR(err);
361} 361}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 93e946561c5c..75e6f04bb795 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -70,13 +70,13 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
70 const struct ubifs_inode *ui = ubifs_inode(inode); 70 const struct ubifs_inode *ui = ubifs_inode(inode);
71 71
72 if (inode->i_size > c->max_inode_sz) { 72 if (inode->i_size > c->max_inode_sz) {
73 ubifs_err("inode is too large (%lld)", 73 ubifs_err(c, "inode is too large (%lld)",
74 (long long)inode->i_size); 74 (long long)inode->i_size);
75 return 1; 75 return 1;
76 } 76 }
77 77
78 if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { 78 if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
79 ubifs_err("unknown compression type %d", ui->compr_type); 79 ubifs_err(c, "unknown compression type %d", ui->compr_type);
80 return 2; 80 return 2;
81 } 81 }
82 82
@@ -90,7 +90,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
90 return 5; 90 return 5;
91 91
92 if (!ubifs_compr_present(ui->compr_type)) { 92 if (!ubifs_compr_present(ui->compr_type)) {
93 ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in", 93 ubifs_warn(c, "inode %lu uses '%s' compression, but it was not compiled in",
94 inode->i_ino, ubifs_compr_name(ui->compr_type)); 94 inode->i_ino, ubifs_compr_name(ui->compr_type));
95 } 95 }
96 96
@@ -242,14 +242,14 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
242 return inode; 242 return inode;
243 243
244out_invalid: 244out_invalid:
245 ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); 245 ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err);
246 ubifs_dump_node(c, ino); 246 ubifs_dump_node(c, ino);
247 ubifs_dump_inode(c, inode); 247 ubifs_dump_inode(c, inode);
248 err = -EINVAL; 248 err = -EINVAL;
249out_ino: 249out_ino:
250 kfree(ino); 250 kfree(ino);
251out: 251out:
252 ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); 252 ubifs_err(c, "failed to read inode %lu, error %d", inode->i_ino, err);
253 iget_failed(inode); 253 iget_failed(inode);
254 return ERR_PTR(err); 254 return ERR_PTR(err);
255} 255}
@@ -319,7 +319,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
319 if (inode->i_nlink) { 319 if (inode->i_nlink) {
320 err = ubifs_jnl_write_inode(c, inode); 320 err = ubifs_jnl_write_inode(c, inode);
321 if (err) 321 if (err)
322 ubifs_err("can't write inode %lu, error %d", 322 ubifs_err(c, "can't write inode %lu, error %d",
323 inode->i_ino, err); 323 inode->i_ino, err);
324 else 324 else
325 err = dbg_check_inode_size(c, inode, ui->ui_size); 325 err = dbg_check_inode_size(c, inode, ui->ui_size);
@@ -363,7 +363,7 @@ static void ubifs_evict_inode(struct inode *inode)
363 * Worst case we have a lost orphan inode wasting space, so a 363 * Worst case we have a lost orphan inode wasting space, so a
364 * simple error message is OK here. 364 * simple error message is OK here.
365 */ 365 */
366 ubifs_err("can't delete inode %lu, error %d", 366 ubifs_err(c, "can't delete inode %lu, error %d",
367 inode->i_ino, err); 367 inode->i_ino, err);
368 368
369out: 369out:
@@ -492,17 +492,17 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
492static int init_constants_early(struct ubifs_info *c) 492static int init_constants_early(struct ubifs_info *c)
493{ 493{
494 if (c->vi.corrupted) { 494 if (c->vi.corrupted) {
495 ubifs_warn("UBI volume is corrupted - read-only mode"); 495 ubifs_warn(c, "UBI volume is corrupted - read-only mode");
496 c->ro_media = 1; 496 c->ro_media = 1;
497 } 497 }
498 498
499 if (c->di.ro_mode) { 499 if (c->di.ro_mode) {
500 ubifs_msg("read-only UBI device"); 500 ubifs_msg(c, "read-only UBI device");
501 c->ro_media = 1; 501 c->ro_media = 1;
502 } 502 }
503 503
504 if (c->vi.vol_type == UBI_STATIC_VOLUME) { 504 if (c->vi.vol_type == UBI_STATIC_VOLUME) {
505 ubifs_msg("static UBI volume - read-only mode"); 505 ubifs_msg(c, "static UBI volume - read-only mode");
506 c->ro_media = 1; 506 c->ro_media = 1;
507 } 507 }
508 508
@@ -516,19 +516,19 @@ static int init_constants_early(struct ubifs_info *c)
516 c->max_write_shift = fls(c->max_write_size) - 1; 516 c->max_write_shift = fls(c->max_write_size) - 1;
517 517
518 if (c->leb_size < UBIFS_MIN_LEB_SZ) { 518 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
519 ubifs_err("too small LEBs (%d bytes), min. is %d bytes", 519 ubifs_err(c, "too small LEBs (%d bytes), min. is %d bytes",
520 c->leb_size, UBIFS_MIN_LEB_SZ); 520 c->leb_size, UBIFS_MIN_LEB_SZ);
521 return -EINVAL; 521 return -EINVAL;
522 } 522 }
523 523
524 if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { 524 if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
525 ubifs_err("too few LEBs (%d), min. is %d", 525 ubifs_err(c, "too few LEBs (%d), min. is %d",
526 c->leb_cnt, UBIFS_MIN_LEB_CNT); 526 c->leb_cnt, UBIFS_MIN_LEB_CNT);
527 return -EINVAL; 527 return -EINVAL;
528 } 528 }
529 529
530 if (!is_power_of_2(c->min_io_size)) { 530 if (!is_power_of_2(c->min_io_size)) {
531 ubifs_err("bad min. I/O size %d", c->min_io_size); 531 ubifs_err(c, "bad min. I/O size %d", c->min_io_size);
532 return -EINVAL; 532 return -EINVAL;
533 } 533 }
534 534
@@ -539,7 +539,7 @@ static int init_constants_early(struct ubifs_info *c)
539 if (c->max_write_size < c->min_io_size || 539 if (c->max_write_size < c->min_io_size ||
540 c->max_write_size % c->min_io_size || 540 c->max_write_size % c->min_io_size ||
541 !is_power_of_2(c->max_write_size)) { 541 !is_power_of_2(c->max_write_size)) {
542 ubifs_err("bad write buffer size %d for %d min. I/O unit", 542 ubifs_err(c, "bad write buffer size %d for %d min. I/O unit",
543 c->max_write_size, c->min_io_size); 543 c->max_write_size, c->min_io_size);
544 return -EINVAL; 544 return -EINVAL;
545 } 545 }
@@ -665,7 +665,7 @@ static int init_constants_sb(struct ubifs_info *c)
665 tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; 665 tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
666 tmp = ALIGN(tmp, c->min_io_size); 666 tmp = ALIGN(tmp, c->min_io_size);
667 if (tmp > c->leb_size) { 667 if (tmp > c->leb_size) {
668 ubifs_err("too small LEB size %d, at least %d needed", 668 ubifs_err(c, "too small LEB size %d, at least %d needed",
669 c->leb_size, tmp); 669 c->leb_size, tmp);
670 return -EINVAL; 670 return -EINVAL;
671 } 671 }
@@ -680,7 +680,7 @@ static int init_constants_sb(struct ubifs_info *c)
680 tmp /= c->leb_size; 680 tmp /= c->leb_size;
681 tmp += 1; 681 tmp += 1;
682 if (c->log_lebs < tmp) { 682 if (c->log_lebs < tmp) {
683 ubifs_err("too small log %d LEBs, required min. %d LEBs", 683 ubifs_err(c, "too small log %d LEBs, required min. %d LEBs",
684 c->log_lebs, tmp); 684 c->log_lebs, tmp);
685 return -EINVAL; 685 return -EINVAL;
686 } 686 }
@@ -772,7 +772,7 @@ static int take_gc_lnum(struct ubifs_info *c)
772 int err; 772 int err;
773 773
774 if (c->gc_lnum == -1) { 774 if (c->gc_lnum == -1) {
775 ubifs_err("no LEB for GC"); 775 ubifs_err(c, "no LEB for GC");
776 return -EINVAL; 776 return -EINVAL;
777 } 777 }
778 778
@@ -857,7 +857,7 @@ static void free_orphans(struct ubifs_info *c)
857 orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); 857 orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
858 list_del(&orph->list); 858 list_del(&orph->list);
859 kfree(orph); 859 kfree(orph);
860 ubifs_err("orphan list not empty at unmount"); 860 ubifs_err(c, "orphan list not empty at unmount");
861 } 861 }
862 862
863 vfree(c->orph_buf); 863 vfree(c->orph_buf);
@@ -954,7 +954,8 @@ static const match_table_t tokens = {
954 */ 954 */
955static int parse_standard_option(const char *option) 955static int parse_standard_option(const char *option)
956{ 956{
957 ubifs_msg("parse %s", option); 957
958 pr_notice("UBIFS: parse %s\n", option);
958 if (!strcmp(option, "sync")) 959 if (!strcmp(option, "sync"))
959 return MS_SYNCHRONOUS; 960 return MS_SYNCHRONOUS;
960 return 0; 961 return 0;
@@ -1026,7 +1027,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
1026 else if (!strcmp(name, "zlib")) 1027 else if (!strcmp(name, "zlib"))
1027 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; 1028 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
1028 else { 1029 else {
1029 ubifs_err("unknown compressor \"%s\"", name); 1030 ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready?
1030 kfree(name); 1031 kfree(name);
1031 return -EINVAL; 1032 return -EINVAL;
1032 } 1033 }
@@ -1042,7 +1043,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
1042 1043
1043 flag = parse_standard_option(p); 1044 flag = parse_standard_option(p);
1044 if (!flag) { 1045 if (!flag) {
1045 ubifs_err("unrecognized mount option \"%s\" or missing value", 1046 ubifs_err(c, "unrecognized mount option \"%s\" or missing value",
1046 p); 1047 p);
1047 return -EINVAL; 1048 return -EINVAL;
1048 } 1049 }
@@ -1105,7 +1106,7 @@ again:
1105 } 1106 }
1106 1107
1107 /* Just disable bulk-read */ 1108 /* Just disable bulk-read */
1108 ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it", 1109 ubifs_warn(c, "cannot allocate %d bytes of memory for bulk-read, disabling it",
1109 c->max_bu_buf_len); 1110 c->max_bu_buf_len);
1110 c->mount_opts.bulk_read = 1; 1111 c->mount_opts.bulk_read = 1;
1111 c->bulk_read = 0; 1112 c->bulk_read = 0;
@@ -1124,7 +1125,7 @@ static int check_free_space(struct ubifs_info *c)
1124{ 1125{
1125 ubifs_assert(c->dark_wm > 0); 1126 ubifs_assert(c->dark_wm > 0);
1126 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1127 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1127 ubifs_err("insufficient free space to mount in R/W mode"); 1128 ubifs_err(c, "insufficient free space to mount in R/W mode");
1128 ubifs_dump_budg(c, &c->bi); 1129 ubifs_dump_budg(c, &c->bi);
1129 ubifs_dump_lprops(c); 1130 ubifs_dump_lprops(c);
1130 return -ENOSPC; 1131 return -ENOSPC;
@@ -1166,14 +1167,14 @@ static int mount_ubifs(struct ubifs_info *c)
1166 * This UBI volume is empty, and read-only, or the file system 1167 * This UBI volume is empty, and read-only, or the file system
1167 * is mounted read-only - we cannot format it. 1168 * is mounted read-only - we cannot format it.
1168 */ 1169 */
1169 ubifs_err("can't format empty UBI volume: read-only %s", 1170 ubifs_err(c, "can't format empty UBI volume: read-only %s",
1170 c->ro_media ? "UBI volume" : "mount"); 1171 c->ro_media ? "UBI volume" : "mount");
1171 err = -EROFS; 1172 err = -EROFS;
1172 goto out_free; 1173 goto out_free;
1173 } 1174 }
1174 1175
1175 if (c->ro_media && !c->ro_mount) { 1176 if (c->ro_media && !c->ro_mount) {
1176 ubifs_err("cannot mount read-write - read-only media"); 1177 ubifs_err(c, "cannot mount read-write - read-only media");
1177 err = -EROFS; 1178 err = -EROFS;
1178 goto out_free; 1179 goto out_free;
1179 } 1180 }
@@ -1221,7 +1222,7 @@ static int mount_ubifs(struct ubifs_info *c)
1221 * or overridden by mount options is actually compiled in. 1222 * or overridden by mount options is actually compiled in.
1222 */ 1223 */
1223 if (!ubifs_compr_present(c->default_compr)) { 1224 if (!ubifs_compr_present(c->default_compr)) {
1224 ubifs_err("'compressor \"%s\" is not compiled in", 1225 ubifs_err(c, "'compressor \"%s\" is not compiled in",
1225 ubifs_compr_name(c->default_compr)); 1226 ubifs_compr_name(c->default_compr));
1226 err = -ENOTSUPP; 1227 err = -ENOTSUPP;
1227 goto out_free; 1228 goto out_free;
@@ -1250,7 +1251,7 @@ static int mount_ubifs(struct ubifs_info *c)
1250 if (IS_ERR(c->bgt)) { 1251 if (IS_ERR(c->bgt)) {
1251 err = PTR_ERR(c->bgt); 1252 err = PTR_ERR(c->bgt);
1252 c->bgt = NULL; 1253 c->bgt = NULL;
1253 ubifs_err("cannot spawn \"%s\", error %d", 1254 ubifs_err(c, "cannot spawn \"%s\", error %d",
1254 c->bgt_name, err); 1255 c->bgt_name, err);
1255 goto out_wbufs; 1256 goto out_wbufs;
1256 } 1257 }
@@ -1264,7 +1265,7 @@ static int mount_ubifs(struct ubifs_info *c)
1264 init_constants_master(c); 1265 init_constants_master(c);
1265 1266
1266 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1267 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1267 ubifs_msg("recovery needed"); 1268 ubifs_msg(c, "recovery needed");
1268 c->need_recovery = 1; 1269 c->need_recovery = 1;
1269 } 1270 }
1270 1271
@@ -1284,7 +1285,7 @@ static int mount_ubifs(struct ubifs_info *c)
1284 goto out_lpt; 1285 goto out_lpt;
1285 } 1286 }
1286 1287
1287 if (!c->ro_mount) { 1288 if (!c->ro_mount && !c->need_recovery) {
1288 /* 1289 /*
1289 * Set the "dirty" flag so that if we reboot uncleanly we 1290 * Set the "dirty" flag so that if we reboot uncleanly we
1290 * will notice this immediately on the next mount. 1291 * will notice this immediately on the next mount.
@@ -1373,10 +1374,10 @@ static int mount_ubifs(struct ubifs_info *c)
1373 1374
1374 if (c->need_recovery) { 1375 if (c->need_recovery) {
1375 if (c->ro_mount) 1376 if (c->ro_mount)
1376 ubifs_msg("recovery deferred"); 1377 ubifs_msg(c, "recovery deferred");
1377 else { 1378 else {
1378 c->need_recovery = 0; 1379 c->need_recovery = 0;
1379 ubifs_msg("recovery completed"); 1380 ubifs_msg(c, "recovery completed");
1380 /* 1381 /*
1381 * GC LEB has to be empty and taken at this point. But 1382 * GC LEB has to be empty and taken at this point. But
1382 * the journal head LEBs may also be accounted as 1383 * the journal head LEBs may also be accounted as
@@ -1397,20 +1398,20 @@ static int mount_ubifs(struct ubifs_info *c)
1397 1398
1398 c->mounting = 0; 1399 c->mounting = 0;
1399 1400
1400 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", 1401 ubifs_msg(c, "UBIFS: mounted UBI device %d, volume %d, name \"%s\"%s",
1401 c->vi.ubi_num, c->vi.vol_id, c->vi.name, 1402 c->vi.ubi_num, c->vi.vol_id, c->vi.name,
1402 c->ro_mount ? ", R/O mode" : ""); 1403 c->ro_mount ? ", R/O mode" : "");
1403 x = (long long)c->main_lebs * c->leb_size; 1404 x = (long long)c->main_lebs * c->leb_size;
1404 y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; 1405 y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1405 ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", 1406 ubifs_msg(c, "LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
1406 c->leb_size, c->leb_size >> 10, c->min_io_size, 1407 c->leb_size, c->leb_size >> 10, c->min_io_size,
1407 c->max_write_size); 1408 c->max_write_size);
1408 ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)", 1409 ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
1409 x, x >> 20, c->main_lebs, 1410 x, x >> 20, c->main_lebs,
1410 y, y >> 20, c->log_lebs + c->max_bud_cnt); 1411 y, y >> 20, c->log_lebs + c->max_bud_cnt);
1411 ubifs_msg("reserved for root: %llu bytes (%llu KiB)", 1412 ubifs_msg(c, "reserved for root: %llu bytes (%llu KiB)",
1412 c->report_rp_size, c->report_rp_size >> 10); 1413 c->report_rp_size, c->report_rp_size >> 10);
1413 ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s", 1414 ubifs_msg(c, "media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
1414 c->fmt_version, c->ro_compat_version, 1415 c->fmt_version, c->ro_compat_version,
1415 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid, 1416 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
1416 c->big_lpt ? ", big LPT model" : ", small LPT model"); 1417 c->big_lpt ? ", big LPT model" : ", small LPT model");
@@ -1543,8 +1544,8 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1543 int err, lnum; 1544 int err, lnum;
1544 1545
1545 if (c->rw_incompat) { 1546 if (c->rw_incompat) {
1546 ubifs_err("the file-system is not R/W-compatible"); 1547 ubifs_err(c, "the file-system is not R/W-compatible");
1547 ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d", 1548 ubifs_msg(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
1548 c->fmt_version, c->ro_compat_version, 1549 c->fmt_version, c->ro_compat_version,
1549 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); 1550 UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
1550 return -EROFS; 1551 return -EROFS;
@@ -1581,7 +1582,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1581 } 1582 }
1582 1583
1583 if (c->need_recovery) { 1584 if (c->need_recovery) {
1584 ubifs_msg("completing deferred recovery"); 1585 ubifs_msg(c, "completing deferred recovery");
1585 err = ubifs_write_rcvrd_mst_node(c); 1586 err = ubifs_write_rcvrd_mst_node(c);
1586 if (err) 1587 if (err)
1587 goto out; 1588 goto out;
@@ -1630,7 +1631,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1630 if (IS_ERR(c->bgt)) { 1631 if (IS_ERR(c->bgt)) {
1631 err = PTR_ERR(c->bgt); 1632 err = PTR_ERR(c->bgt);
1632 c->bgt = NULL; 1633 c->bgt = NULL;
1633 ubifs_err("cannot spawn \"%s\", error %d", 1634 ubifs_err(c, "cannot spawn \"%s\", error %d",
1634 c->bgt_name, err); 1635 c->bgt_name, err);
1635 goto out; 1636 goto out;
1636 } 1637 }
@@ -1664,7 +1665,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1664 1665
1665 if (c->need_recovery) { 1666 if (c->need_recovery) {
1666 c->need_recovery = 0; 1667 c->need_recovery = 0;
1667 ubifs_msg("deferred recovery completed"); 1668 ubifs_msg(c, "deferred recovery completed");
1668 } else { 1669 } else {
1669 /* 1670 /*
1670 * Do not run the debugging space check if the were doing 1671 * Do not run the debugging space check if the were doing
@@ -1752,8 +1753,7 @@ static void ubifs_put_super(struct super_block *sb)
1752 int i; 1753 int i;
1753 struct ubifs_info *c = sb->s_fs_info; 1754 struct ubifs_info *c = sb->s_fs_info;
1754 1755
1755 ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, 1756 ubifs_msg(c, "un-mount UBI device %d", c->vi.ubi_num);
1756 c->vi.vol_id);
1757 1757
1758 /* 1758 /*
1759 * The following asserts are only valid if there has not been a failure 1759 * The following asserts are only valid if there has not been a failure
@@ -1809,7 +1809,7 @@ static void ubifs_put_super(struct super_block *sb)
1809 * next mount, so we just print a message and 1809 * next mount, so we just print a message and
1810 * continue to unmount normally. 1810 * continue to unmount normally.
1811 */ 1811 */
1812 ubifs_err("failed to write master node, error %d", 1812 ubifs_err(c, "failed to write master node, error %d",
1813 err); 1813 err);
1814 } else { 1814 } else {
1815 for (i = 0; i < c->jhead_cnt; i++) 1815 for (i = 0; i < c->jhead_cnt; i++)
@@ -1834,17 +1834,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1834 1834
1835 err = ubifs_parse_options(c, data, 1); 1835 err = ubifs_parse_options(c, data, 1);
1836 if (err) { 1836 if (err) {
1837 ubifs_err("invalid or unknown remount parameter"); 1837 ubifs_err(c, "invalid or unknown remount parameter");
1838 return err; 1838 return err;
1839 } 1839 }
1840 1840
1841 if (c->ro_mount && !(*flags & MS_RDONLY)) { 1841 if (c->ro_mount && !(*flags & MS_RDONLY)) {
1842 if (c->ro_error) { 1842 if (c->ro_error) {
1843 ubifs_msg("cannot re-mount R/W due to prior errors"); 1843 ubifs_msg(c, "cannot re-mount R/W due to prior errors");
1844 return -EROFS; 1844 return -EROFS;
1845 } 1845 }
1846 if (c->ro_media) { 1846 if (c->ro_media) {
1847 ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); 1847 ubifs_msg(c, "cannot re-mount R/W - UBI volume is R/O");
1848 return -EROFS; 1848 return -EROFS;
1849 } 1849 }
1850 err = ubifs_remount_rw(c); 1850 err = ubifs_remount_rw(c);
@@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1852 return err; 1852 return err;
1853 } else if (!c->ro_mount && (*flags & MS_RDONLY)) { 1853 } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
1854 if (c->ro_error) { 1854 if (c->ro_error) {
1855 ubifs_msg("cannot re-mount R/O due to prior errors"); 1855 ubifs_msg(c, "cannot re-mount R/O due to prior errors");
1856 return -EROFS; 1856 return -EROFS;
1857 } 1857 }
1858 ubifs_remount_ro(c); 1858 ubifs_remount_ro(c);
@@ -2104,8 +2104,8 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2104 */ 2104 */
2105 ubi = open_ubi(name, UBI_READONLY); 2105 ubi = open_ubi(name, UBI_READONLY);
2106 if (IS_ERR(ubi)) { 2106 if (IS_ERR(ubi)) {
2107 ubifs_err("cannot open \"%s\", error %d", 2107 pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
2108 name, (int)PTR_ERR(ubi)); 2108 current->pid, name, (int)PTR_ERR(ubi));
2109 return ERR_CAST(ubi); 2109 return ERR_CAST(ubi);
2110 } 2110 }
2111 2111
@@ -2233,8 +2233,8 @@ static int __init ubifs_init(void)
2233 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. 2233 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
2234 */ 2234 */
2235 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { 2235 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
2236 ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes", 2236 pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
2237 (unsigned int)PAGE_CACHE_SIZE); 2237 current->pid, (unsigned int)PAGE_CACHE_SIZE);
2238 return -EINVAL; 2238 return -EINVAL;
2239 } 2239 }
2240 2240
@@ -2257,7 +2257,8 @@ static int __init ubifs_init(void)
2257 2257
2258 err = register_filesystem(&ubifs_fs_type); 2258 err = register_filesystem(&ubifs_fs_type);
2259 if (err) { 2259 if (err) {
2260 ubifs_err("cannot register file system, error %d", err); 2260 pr_err("UBIFS error (pid %d): cannot register file system, error %d",
2261 current->pid, err);
2261 goto out_dbg; 2262 goto out_dbg;
2262 } 2263 }
2263 return 0; 2264 return 0;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 6793db0754f6..957f5757f374 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -98,7 +98,7 @@ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
98 else if (offs > o->offs) 98 else if (offs > o->offs)
99 p = &(*p)->rb_right; 99 p = &(*p)->rb_right;
100 else { 100 else {
101 ubifs_err("old idx added twice!"); 101 ubifs_err(c, "old idx added twice!");
102 kfree(old_idx); 102 kfree(old_idx);
103 return 0; 103 return 0;
104 } 104 }
@@ -447,7 +447,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
447 447
448 err = ubifs_leb_read(c, lnum, buf, offs, len, 1); 448 err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
449 if (err) { 449 if (err) {
450 ubifs_err("cannot read node type %d from LEB %d:%d, error %d", 450 ubifs_err(c, "cannot read node type %d from LEB %d:%d, error %d",
451 type, lnum, offs, err); 451 type, lnum, offs, err);
452 return err; 452 return err;
453 } 453 }
@@ -1684,27 +1684,27 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
1684 int err, len; 1684 int err, len;
1685 1685
1686 if (ch->node_type != UBIFS_DATA_NODE) { 1686 if (ch->node_type != UBIFS_DATA_NODE) {
1687 ubifs_err("bad node type (%d but expected %d)", 1687 ubifs_err(c, "bad node type (%d but expected %d)",
1688 ch->node_type, UBIFS_DATA_NODE); 1688 ch->node_type, UBIFS_DATA_NODE);
1689 goto out_err; 1689 goto out_err;
1690 } 1690 }
1691 1691
1692 err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); 1692 err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
1693 if (err) { 1693 if (err) {
1694 ubifs_err("expected node type %d", UBIFS_DATA_NODE); 1694 ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE);
1695 goto out; 1695 goto out;
1696 } 1696 }
1697 1697
1698 len = le32_to_cpu(ch->len); 1698 len = le32_to_cpu(ch->len);
1699 if (len != zbr->len) { 1699 if (len != zbr->len) {
1700 ubifs_err("bad node length %d, expected %d", len, zbr->len); 1700 ubifs_err(c, "bad node length %d, expected %d", len, zbr->len);
1701 goto out_err; 1701 goto out_err;
1702 } 1702 }
1703 1703
1704 /* Make sure the key of the read node is correct */ 1704 /* Make sure the key of the read node is correct */
1705 key_read(c, buf + UBIFS_KEY_OFFSET, &key1); 1705 key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
1706 if (!keys_eq(c, &zbr->key, &key1)) { 1706 if (!keys_eq(c, &zbr->key, &key1)) {
1707 ubifs_err("bad key in node at LEB %d:%d", 1707 ubifs_err(c, "bad key in node at LEB %d:%d",
1708 zbr->lnum, zbr->offs); 1708 zbr->lnum, zbr->offs);
1709 dbg_tnck(&zbr->key, "looked for key "); 1709 dbg_tnck(&zbr->key, "looked for key ");
1710 dbg_tnck(&key1, "found node's key "); 1710 dbg_tnck(&key1, "found node's key ");
@@ -1716,7 +1716,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
1716out_err: 1716out_err:
1717 err = -EINVAL; 1717 err = -EINVAL;
1718out: 1718out:
1719 ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); 1719 ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs);
1720 ubifs_dump_node(c, buf); 1720 ubifs_dump_node(c, buf);
1721 dump_stack(); 1721 dump_stack();
1722 return err; 1722 return err;
@@ -1741,7 +1741,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1741 len = bu->zbranch[bu->cnt - 1].offs; 1741 len = bu->zbranch[bu->cnt - 1].offs;
1742 len += bu->zbranch[bu->cnt - 1].len - offs; 1742 len += bu->zbranch[bu->cnt - 1].len - offs;
1743 if (len > bu->buf_len) { 1743 if (len > bu->buf_len) {
1744 ubifs_err("buffer too small %d vs %d", bu->buf_len, len); 1744 ubifs_err(c, "buffer too small %d vs %d", bu->buf_len, len);
1745 return -EINVAL; 1745 return -EINVAL;
1746 } 1746 }
1747 1747
@@ -1757,7 +1757,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1757 return -EAGAIN; 1757 return -EAGAIN;
1758 1758
1759 if (err && err != -EBADMSG) { 1759 if (err && err != -EBADMSG) {
1760 ubifs_err("failed to read from LEB %d:%d, error %d", 1760 ubifs_err(c, "failed to read from LEB %d:%d, error %d",
1761 lnum, offs, err); 1761 lnum, offs, err);
1762 dump_stack(); 1762 dump_stack();
1763 dbg_tnck(&bu->key, "key "); 1763 dbg_tnck(&bu->key, "key ");
@@ -3313,7 +3313,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3313 3313
3314out_dump: 3314out_dump:
3315 block = key_block(c, key); 3315 block = key_block(c, key);
3316 ubifs_err("inode %lu has size %lld, but there are data at offset %lld", 3316 ubifs_err(c, "inode %lu has size %lld, but there are data at offset %lld",
3317 (unsigned long)inode->i_ino, size, 3317 (unsigned long)inode->i_ino, size,
3318 ((loff_t)block) << UBIFS_BLOCK_SHIFT); 3318 ((loff_t)block) << UBIFS_BLOCK_SHIFT);
3319 mutex_unlock(&c->tnc_mutex); 3319 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 7a205e046776..b45345d701e7 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -53,7 +53,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
53 br->offs = cpu_to_le32(zbr->offs); 53 br->offs = cpu_to_le32(zbr->offs);
54 br->len = cpu_to_le32(zbr->len); 54 br->len = cpu_to_le32(zbr->len);
55 if (!zbr->lnum || !zbr->len) { 55 if (!zbr->lnum || !zbr->len) {
56 ubifs_err("bad ref in znode"); 56 ubifs_err(c, "bad ref in znode");
57 ubifs_dump_znode(c, znode); 57 ubifs_dump_znode(c, znode);
58 if (zbr->znode) 58 if (zbr->znode)
59 ubifs_dump_znode(c, zbr->znode); 59 ubifs_dump_znode(c, zbr->znode);
@@ -384,7 +384,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
384 * Do not print scary warnings if the debugging 384 * Do not print scary warnings if the debugging
385 * option which forces in-the-gaps is enabled. 385 * option which forces in-the-gaps is enabled.
386 */ 386 */
387 ubifs_warn("out of space"); 387 ubifs_warn(c, "out of space");
388 ubifs_dump_budg(c, &c->bi); 388 ubifs_dump_budg(c, &c->bi);
389 ubifs_dump_lprops(c); 389 ubifs_dump_lprops(c);
390 } 390 }
@@ -441,7 +441,7 @@ static int layout_in_empty_space(struct ubifs_info *c)
441 /* Determine the index node position */ 441 /* Determine the index node position */
442 if (lnum == -1) { 442 if (lnum == -1) {
443 if (c->ileb_nxt >= c->ileb_cnt) { 443 if (c->ileb_nxt >= c->ileb_cnt) {
444 ubifs_err("out of space"); 444 ubifs_err(c, "out of space");
445 return -ENOSPC; 445 return -ENOSPC;
446 } 446 }
447 lnum = c->ilebs[c->ileb_nxt++]; 447 lnum = c->ilebs[c->ileb_nxt++];
@@ -855,7 +855,7 @@ static int write_index(struct ubifs_info *c)
855 br->offs = cpu_to_le32(zbr->offs); 855 br->offs = cpu_to_le32(zbr->offs);
856 br->len = cpu_to_le32(zbr->len); 856 br->len = cpu_to_le32(zbr->len);
857 if (!zbr->lnum || !zbr->len) { 857 if (!zbr->lnum || !zbr->len) {
858 ubifs_err("bad ref in znode"); 858 ubifs_err(c, "bad ref in znode");
859 ubifs_dump_znode(c, znode); 859 ubifs_dump_znode(c, znode);
860 if (zbr->znode) 860 if (zbr->znode)
861 ubifs_dump_znode(c, zbr->znode); 861 ubifs_dump_znode(c, zbr->znode);
@@ -875,7 +875,7 @@ static int write_index(struct ubifs_info *c)
875 875
876 if (lnum != znode->lnum || offs != znode->offs || 876 if (lnum != znode->lnum || offs != znode->offs ||
877 len != znode->len) { 877 len != znode->len) {
878 ubifs_err("inconsistent znode posn"); 878 ubifs_err(c, "inconsistent znode posn");
879 return -EINVAL; 879 return -EINVAL;
880 } 880 }
881 881
@@ -973,7 +973,7 @@ static int write_index(struct ubifs_info *c)
973 973
974 if (lnum != c->dbg->new_ihead_lnum || 974 if (lnum != c->dbg->new_ihead_lnum ||
975 buf_offs != c->dbg->new_ihead_offs) { 975 buf_offs != c->dbg->new_ihead_offs) {
976 ubifs_err("inconsistent ihead"); 976 ubifs_err(c, "inconsistent ihead");
977 return -EINVAL; 977 return -EINVAL;
978 } 978 }
979 979
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index f6bf8995c7b1..93f5b7859e6f 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -293,9 +293,9 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
293 lnum, offs, znode->level, znode->child_cnt); 293 lnum, offs, znode->level, znode->child_cnt);
294 294
295 if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { 295 if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
296 ubifs_err("current fanout %d, branch count %d", 296 ubifs_err(c, "current fanout %d, branch count %d",
297 c->fanout, znode->child_cnt); 297 c->fanout, znode->child_cnt);
298 ubifs_err("max levels %d, znode level %d", 298 ubifs_err(c, "max levels %d, znode level %d",
299 UBIFS_MAX_LEVELS, znode->level); 299 UBIFS_MAX_LEVELS, znode->level);
300 err = 1; 300 err = 1;
301 goto out_dump; 301 goto out_dump;
@@ -316,7 +316,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
316 if (zbr->lnum < c->main_first || 316 if (zbr->lnum < c->main_first ||
317 zbr->lnum >= c->leb_cnt || zbr->offs < 0 || 317 zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
318 zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { 318 zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
319 ubifs_err("bad branch %d", i); 319 ubifs_err(c, "bad branch %d", i);
320 err = 2; 320 err = 2;
321 goto out_dump; 321 goto out_dump;
322 } 322 }
@@ -328,7 +328,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
328 case UBIFS_XENT_KEY: 328 case UBIFS_XENT_KEY:
329 break; 329 break;
330 default: 330 default:
331 ubifs_err("bad key type at slot %d: %d", 331 ubifs_err(c, "bad key type at slot %d: %d",
332 i, key_type(c, &zbr->key)); 332 i, key_type(c, &zbr->key));
333 err = 3; 333 err = 3;
334 goto out_dump; 334 goto out_dump;
@@ -340,17 +340,17 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
340 type = key_type(c, &zbr->key); 340 type = key_type(c, &zbr->key);
341 if (c->ranges[type].max_len == 0) { 341 if (c->ranges[type].max_len == 0) {
342 if (zbr->len != c->ranges[type].len) { 342 if (zbr->len != c->ranges[type].len) {
343 ubifs_err("bad target node (type %d) length (%d)", 343 ubifs_err(c, "bad target node (type %d) length (%d)",
344 type, zbr->len); 344 type, zbr->len);
345 ubifs_err("have to be %d", c->ranges[type].len); 345 ubifs_err(c, "have to be %d", c->ranges[type].len);
346 err = 4; 346 err = 4;
347 goto out_dump; 347 goto out_dump;
348 } 348 }
349 } else if (zbr->len < c->ranges[type].min_len || 349 } else if (zbr->len < c->ranges[type].min_len ||
350 zbr->len > c->ranges[type].max_len) { 350 zbr->len > c->ranges[type].max_len) {
351 ubifs_err("bad target node (type %d) length (%d)", 351 ubifs_err(c, "bad target node (type %d) length (%d)",
352 type, zbr->len); 352 type, zbr->len);
353 ubifs_err("have to be in range of %d-%d", 353 ubifs_err(c, "have to be in range of %d-%d",
354 c->ranges[type].min_len, 354 c->ranges[type].min_len,
355 c->ranges[type].max_len); 355 c->ranges[type].max_len);
356 err = 5; 356 err = 5;
@@ -370,12 +370,12 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
370 370
371 cmp = keys_cmp(c, key1, key2); 371 cmp = keys_cmp(c, key1, key2);
372 if (cmp > 0) { 372 if (cmp > 0) {
373 ubifs_err("bad key order (keys %d and %d)", i, i + 1); 373 ubifs_err(c, "bad key order (keys %d and %d)", i, i + 1);
374 err = 6; 374 err = 6;
375 goto out_dump; 375 goto out_dump;
376 } else if (cmp == 0 && !is_hash_key(c, key1)) { 376 } else if (cmp == 0 && !is_hash_key(c, key1)) {
377 /* These can only be keys with colliding hash */ 377 /* These can only be keys with colliding hash */
378 ubifs_err("keys %d and %d are not hashed but equivalent", 378 ubifs_err(c, "keys %d and %d are not hashed but equivalent",
379 i, i + 1); 379 i, i + 1);
380 err = 7; 380 err = 7;
381 goto out_dump; 381 goto out_dump;
@@ -386,7 +386,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
386 return 0; 386 return 0;
387 387
388out_dump: 388out_dump:
389 ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); 389 ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
390 ubifs_dump_node(c, idx); 390 ubifs_dump_node(c, idx);
391 kfree(idx); 391 kfree(idx);
392 return -EINVAL; 392 return -EINVAL;
@@ -482,7 +482,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
482 /* Make sure the key of the read node is correct */ 482 /* Make sure the key of the read node is correct */
483 key_read(c, node + UBIFS_KEY_OFFSET, &key1); 483 key_read(c, node + UBIFS_KEY_OFFSET, &key1);
484 if (!keys_eq(c, key, &key1)) { 484 if (!keys_eq(c, key, &key1)) {
485 ubifs_err("bad key in node at LEB %d:%d", 485 ubifs_err(c, "bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs); 486 zbr->lnum, zbr->offs);
487 dbg_tnck(key, "looked for key "); 487 dbg_tnck(key, "looked for key ");
488 dbg_tnck(&key1, "but found node's key "); 488 dbg_tnck(&key1, "but found node's key ");
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index bc04b9c69891..de759022f3d6 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -43,15 +43,19 @@
43#define UBIFS_VERSION 1 43#define UBIFS_VERSION 1
44 44
45/* Normal UBIFS messages */ 45/* Normal UBIFS messages */
46#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__) 46#define ubifs_msg(c, fmt, ...) \
47 pr_notice("UBIFS (ubi%d:%d): " fmt "\n", \
48 (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__)
47/* UBIFS error messages */ 49/* UBIFS error messages */
48#define ubifs_err(fmt, ...) \ 50#define ubifs_err(c, fmt, ...) \
49 pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \ 51 pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n", \
52 (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
50 __func__, ##__VA_ARGS__) 53 __func__, ##__VA_ARGS__)
51/* UBIFS warning messages */ 54/* UBIFS warning messages */
52#define ubifs_warn(fmt, ...) \ 55#define ubifs_warn(c, fmt, ...) \
53 pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \ 56 pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n", \
54 current->pid, __func__, ##__VA_ARGS__) 57 (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
58 __func__, ##__VA_ARGS__)
55/* 59/*
56 * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description 60 * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
57 * object as an argument. 61 * object as an argument.
@@ -59,7 +63,7 @@
59#define ubifs_errc(c, fmt, ...) \ 63#define ubifs_errc(c, fmt, ...) \
60 do { \ 64 do { \
61 if (!(c)->probing) \ 65 if (!(c)->probing) \
62 ubifs_err(fmt, ##__VA_ARGS__); \ 66 ubifs_err(c, fmt, ##__VA_ARGS__); \
63 } while (0) 67 } while (0)
64 68
65/* UBIFS file system VFS magic number */ 69/* UBIFS file system VFS magic number */
@@ -158,7 +162,7 @@
158#define WORST_COMPR_FACTOR 2 162#define WORST_COMPR_FACTOR 2
159 163
160/* 164/*
161 * How much memory is needed for a buffer where we comress a data node. 165 * How much memory is needed for a buffer where we compress a data node.
162 */ 166 */
163#define COMPRESSED_DATA_NODE_BUF_SZ \ 167#define COMPRESSED_DATA_NODE_BUF_SZ \
164 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) 168 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
@@ -664,7 +668,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
664 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes 668 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
665 * fields 669 * fields
666 * @softlimit: soft write-buffer timeout interval 670 * @softlimit: soft write-buffer timeout interval
667 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit 671 * @delta: hard and soft timeouts delta (the timer expire interval is @softlimit
668 * and @softlimit + @delta) 672 * and @softlimit + @delta)
669 * @timer: write-buffer timer 673 * @timer: write-buffer timer
670 * @no_timer: non-zero if this write-buffer does not have a timer 674 * @no_timer: non-zero if this write-buffer does not have a timer
@@ -930,9 +934,9 @@ struct ubifs_orphan {
930/** 934/**
931 * struct ubifs_mount_opts - UBIFS-specific mount options information. 935 * struct ubifs_mount_opts - UBIFS-specific mount options information.
932 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) 936 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
933 * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) 937 * @bulk_read: enable/disable bulk-reads (%0 default, %1 disable, %2 enable)
934 * @chk_data_crc: enable/disable CRC data checking when reading data nodes 938 * @chk_data_crc: enable/disable CRC data checking when reading data nodes
935 * (%0 default, %1 disabe, %2 enable) 939 * (%0 default, %1 disable, %2 enable)
936 * @override_compr: override default compressor (%0 - do not override and use 940 * @override_compr: override default compressor (%0 - do not override and use
937 * superblock compressor, %1 - override and use compressor 941 * superblock compressor, %1 - override and use compressor
938 * specified in @compr_type) 942 * specified in @compr_type)
@@ -962,9 +966,9 @@ struct ubifs_mount_opts {
962 * optimization) 966 * optimization)
963 * @nospace_rp: the same as @nospace, but additionally means that even reserved 967 * @nospace_rp: the same as @nospace, but additionally means that even reserved
964 * pool is full 968 * pool is full
965 * @page_budget: budget for a page (constant, nenver changed after mount) 969 * @page_budget: budget for a page (constant, never changed after mount)
966 * @inode_budget: budget for an inode (constant, nenver changed after mount) 970 * @inode_budget: budget for an inode (constant, never changed after mount)
967 * @dent_budget: budget for a directory entry (constant, nenver changed after 971 * @dent_budget: budget for a directory entry (constant, never changed after
968 * mount) 972 * mount)
969 */ 973 */
970struct ubifs_budg_info { 974struct ubifs_budg_info {
@@ -1787,10 +1791,10 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1787/* compressor.c */ 1791/* compressor.c */
1788int __init ubifs_compressors_init(void); 1792int __init ubifs_compressors_init(void);
1789void ubifs_compressors_exit(void); 1793void ubifs_compressors_exit(void);
1790void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, 1794void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len,
1791 int *compr_type); 1795 void *out_buf, int *out_len, int *compr_type);
1792int ubifs_decompress(const void *buf, int len, void *out, int *out_len, 1796int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
1793 int compr_type); 1797 void *out, int *out_len, int compr_type);
1794 1798
1795#include "debug.h" 1799#include "debug.h"
1796#include "misc.h" 1800#include "misc.h"
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index a92be244a6fb..3659b1934500 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -108,7 +108,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
108 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; 108 .dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
109 109
110 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) { 110 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) {
111 ubifs_err("inode %lu already has too many xattrs (%d), cannot create more", 111 ubifs_err(c, "inode %lu already has too many xattrs (%d), cannot create more",
112 host->i_ino, host_ui->xattr_cnt); 112 host->i_ino, host_ui->xattr_cnt);
113 return -ENOSPC; 113 return -ENOSPC;
114 } 114 }
@@ -120,7 +120,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
120 */ 120 */
121 names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1; 121 names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1;
122 if (names_len > XATTR_LIST_MAX) { 122 if (names_len > XATTR_LIST_MAX) {
123 ubifs_err("cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d", 123 ubifs_err(c, "cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d",
124 host->i_ino, names_len, XATTR_LIST_MAX); 124 host->i_ino, names_len, XATTR_LIST_MAX);
125 return -ENOSPC; 125 return -ENOSPC;
126 } 126 }
@@ -288,13 +288,13 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
288 288
289 inode = ubifs_iget(c->vfs_sb, inum); 289 inode = ubifs_iget(c->vfs_sb, inum);
290 if (IS_ERR(inode)) { 290 if (IS_ERR(inode)) {
291 ubifs_err("dead extended attribute entry, error %d", 291 ubifs_err(c, "dead extended attribute entry, error %d",
292 (int)PTR_ERR(inode)); 292 (int)PTR_ERR(inode));
293 return inode; 293 return inode;
294 } 294 }
295 if (ubifs_inode(inode)->xattr) 295 if (ubifs_inode(inode)->xattr)
296 return inode; 296 return inode;
297 ubifs_err("corrupt extended attribute entry"); 297 ubifs_err(c, "corrupt extended attribute entry");
298 iput(inode); 298 iput(inode);
299 return ERR_PTR(-EINVAL); 299 return ERR_PTR(-EINVAL);
300} 300}
@@ -412,7 +412,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
412 if (buf) { 412 if (buf) {
413 /* If @buf is %NULL we are supposed to return the length */ 413 /* If @buf is %NULL we are supposed to return the length */
414 if (ui->data_len > size) { 414 if (ui->data_len > size) {
415 ubifs_err("buffer size %zd, xattr len %d", 415 ubifs_err(c, "buffer size %zd, xattr len %d",
416 size, ui->data_len); 416 size, ui->data_len);
417 err = -ERANGE; 417 err = -ERANGE;
418 goto out_iput; 418 goto out_iput;
@@ -485,7 +485,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
485 485
486 kfree(pxent); 486 kfree(pxent);
487 if (err != -ENOENT) { 487 if (err != -ENOENT) {
488 ubifs_err("cannot find next direntry, error %d", err); 488 ubifs_err(c, "cannot find next direntry, error %d", err);
489 return err; 489 return err;
490 } 490 }
491 491
@@ -657,8 +657,10 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
657 &init_xattrs, 0); 657 &init_xattrs, 0);
658 mutex_unlock(&inode->i_mutex); 658 mutex_unlock(&inode->i_mutex);
659 659
660 if (err) 660 if (err) {
661 ubifs_err("cannot initialize security for inode %lu, error %d", 661 struct ubifs_info *c = dentry->i_sb->s_fs_info;
662 ubifs_err(c, "cannot initialize security for inode %lu, error %d",
662 inode->i_ino, err); 663 inode->i_ino, err);
664 }
663 return err; 665 return err;
664} 666}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1ba2baaf4367..6d6a96b4e73f 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -21,7 +21,6 @@
21 21
22#include "udfdecl.h" 22#include "udfdecl.h"
23 23
24#include <linux/buffer_head.h>
25#include <linux/bitops.h> 24#include <linux/bitops.h>
26 25
27#include "udf_i.h" 26#include "udf_i.h"
@@ -63,15 +62,14 @@ static int __load_block_bitmap(struct super_block *sb,
63 block_group, nr_groups); 62 block_group, nr_groups);
64 } 63 }
65 64
66 if (bitmap->s_block_bitmap[block_group]) { 65 if (bitmap->s_block_bitmap[block_group])
67 return block_group; 66 return block_group;
68 } else { 67
69 retval = read_block_bitmap(sb, bitmap, block_group, 68 retval = read_block_bitmap(sb, bitmap, block_group, block_group);
70 block_group); 69 if (retval < 0)
71 if (retval < 0) 70 return retval;
72 return retval; 71
73 return block_group; 72 return block_group;
74 }
75} 73}
76 74
77static inline int load_block_bitmap(struct super_block *sb, 75static inline int load_block_bitmap(struct super_block *sb,
@@ -358,7 +356,6 @@ static void udf_table_free_blocks(struct super_block *sb,
358 struct kernel_lb_addr eloc; 356 struct kernel_lb_addr eloc;
359 struct extent_position oepos, epos; 357 struct extent_position oepos, epos;
360 int8_t etype; 358 int8_t etype;
361 int i;
362 struct udf_inode_info *iinfo; 359 struct udf_inode_info *iinfo;
363 360
364 mutex_lock(&sbi->s_alloc_mutex); 361 mutex_lock(&sbi->s_alloc_mutex);
@@ -425,7 +422,6 @@ static void udf_table_free_blocks(struct super_block *sb,
425 } 422 }
426 423
427 if (epos.bh != oepos.bh) { 424 if (epos.bh != oepos.bh) {
428 i = -1;
429 oepos.block = epos.block; 425 oepos.block = epos.block;
430 brelse(oepos.bh); 426 brelse(oepos.bh);
431 get_bh(epos.bh); 427 get_bh(epos.bh);
@@ -762,7 +758,7 @@ inline int udf_prealloc_blocks(struct super_block *sb,
762 uint32_t block_count) 758 uint32_t block_count)
763{ 759{
764 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition]; 760 struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
765 sector_t allocated; 761 int allocated;
766 762
767 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) 763 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
768 allocated = udf_bitmap_prealloc_blocks(sb, 764 allocated = udf_bitmap_prealloc_blocks(sb,
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 05e90edd1992..541a12b5792d 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
30#include <linux/errno.h> 30#include <linux/errno.h>
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/buffer_head.h>
34 33
35#include "udf_i.h" 34#include "udf_i.h"
36#include "udf_sb.h" 35#include "udf_sb.h"
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 3e44f575fb9c..c763fda257bf 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -16,7 +16,6 @@
16 16
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/buffer_head.h>
20 19
21struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, 20struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
22 struct udf_fileident_bh *fibh, 21 struct udf_fileident_bh *fibh,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555fbeac..5dadad9960b9 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -33,8 +33,7 @@
33#include <linux/capability.h> 33#include <linux/capability.h>
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/buffer_head.h> 36#include <linux/uio.h>
37#include <linux/aio.h>
38 37
39#include "udf_i.h" 38#include "udf_i.h"
40#include "udf_sb.h" 39#include "udf_sb.h"
@@ -100,8 +99,7 @@ static int udf_adinicb_write_begin(struct file *file,
100 return 0; 99 return 0;
101} 100}
102 101
103static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, 102static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
104 struct iov_iter *iter,
105 loff_t offset) 103 loff_t offset)
106{ 104{
107 /* Fallback to buffered I/O. */ 105 /* Fallback to buffered I/O. */
@@ -121,21 +119,21 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
121 ssize_t retval; 119 ssize_t retval;
122 struct file *file = iocb->ki_filp; 120 struct file *file = iocb->ki_filp;
123 struct inode *inode = file_inode(file); 121 struct inode *inode = file_inode(file);
124 int err, pos;
125 size_t count = iocb->ki_nbytes;
126 struct udf_inode_info *iinfo = UDF_I(inode); 122 struct udf_inode_info *iinfo = UDF_I(inode);
123 int err;
127 124
128 mutex_lock(&inode->i_mutex); 125 mutex_lock(&inode->i_mutex);
126
127 retval = generic_write_checks(iocb, from);
128 if (retval <= 0)
129 goto out;
130
129 down_write(&iinfo->i_data_sem); 131 down_write(&iinfo->i_data_sem);
130 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 132 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
131 if (file->f_flags & O_APPEND) 133 loff_t end = iocb->ki_pos + iov_iter_count(from);
132 pos = inode->i_size;
133 else
134 pos = iocb->ki_pos;
135 134
136 if (inode->i_sb->s_blocksize < 135 if (inode->i_sb->s_blocksize <
137 (udf_file_entry_alloc_offset(inode) + 136 (udf_file_entry_alloc_offset(inode) + end)) {
138 pos + count)) {
139 err = udf_expand_file_adinicb(inode); 137 err = udf_expand_file_adinicb(inode);
140 if (err) { 138 if (err) {
141 mutex_unlock(&inode->i_mutex); 139 mutex_unlock(&inode->i_mutex);
@@ -143,16 +141,14 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
143 return err; 141 return err;
144 } 142 }
145 } else { 143 } else {
146 if (pos + count > inode->i_size) 144 iinfo->i_lenAlloc = max(end, inode->i_size);
147 iinfo->i_lenAlloc = pos + count;
148 else
149 iinfo->i_lenAlloc = inode->i_size;
150 up_write(&iinfo->i_data_sem); 145 up_write(&iinfo->i_data_sem);
151 } 146 }
152 } else 147 } else
153 up_write(&iinfo->i_data_sem); 148 up_write(&iinfo->i_data_sem);
154 149
155 retval = __generic_file_write_iter(iocb, from); 150 retval = __generic_file_write_iter(iocb, from);
151out:
156 mutex_unlock(&inode->i_mutex); 152 mutex_unlock(&inode->i_mutex);
157 153
158 if (retval > 0) { 154 if (retval > 0) {
@@ -240,12 +236,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
240} 236}
241 237
242const struct file_operations udf_file_operations = { 238const struct file_operations udf_file_operations = {
243 .read = new_sync_read,
244 .read_iter = generic_file_read_iter, 239 .read_iter = generic_file_read_iter,
245 .unlocked_ioctl = udf_ioctl, 240 .unlocked_ioctl = udf_ioctl,
246 .open = generic_file_open, 241 .open = generic_file_open,
247 .mmap = generic_file_mmap, 242 .mmap = generic_file_mmap,
248 .write = new_sync_write,
249 .write_iter = udf_file_write_iter, 243 .write_iter = udf_file_write_iter,
250 .release = udf_release_file, 244 .release = udf_release_file,
251 .fsync = generic_file_fsync, 245 .fsync = generic_file_fsync,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d599098d..6afac3d561ac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -33,12 +33,11 @@
33#include <linux/mm.h> 33#include <linux/mm.h>
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/buffer_head.h>
37#include <linux/writeback.h> 36#include <linux/writeback.h>
38#include <linux/slab.h> 37#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 38#include <linux/crc-itu-t.h>
40#include <linux/mpage.h> 39#include <linux/mpage.h>
41#include <linux/aio.h> 40#include <linux/uio.h>
42 41
43#include "udf_i.h" 42#include "udf_i.h"
44#include "udf_sb.h" 43#include "udf_sb.h"
@@ -215,8 +214,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
215 return ret; 214 return ret;
216} 215}
217 216
218static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, 217static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
219 struct iov_iter *iter,
220 loff_t offset) 218 loff_t offset)
221{ 219{
222 struct file *file = iocb->ki_filp; 220 struct file *file = iocb->ki_filp;
@@ -225,8 +223,8 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
225 size_t count = iov_iter_count(iter); 223 size_t count = iov_iter_count(iter);
226 ssize_t ret; 224 ssize_t ret;
227 225
228 ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); 226 ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block);
229 if (unlikely(ret < 0 && (rw & WRITE))) 227 if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE))
230 udf_write_failed(mapping, offset + count); 228 udf_write_failed(mapping, offset + count);
231 return ret; 229 return ret;
232} 230}
@@ -1637,7 +1635,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
1637 udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0)); 1635 udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0));
1638 if (!bh) { 1636 if (!bh) {
1639 udf_debug("getblk failure\n"); 1637 udf_debug("getblk failure\n");
1640 return -ENOMEM; 1638 return -EIO;
1641 } 1639 }
1642 1640
1643 lock_buffer(bh); 1641 lock_buffer(bh);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index c175b4dabc14..71d1c25f360d 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -23,7 +23,6 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/string.h> 25#include <linux/string.h>
26#include <linux/buffer_head.h>
27#include <linux/crc-itu-t.h> 26#include <linux/crc-itu-t.h>
28 27
29#include "udf_i.h" 28#include "udf_i.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 33b246b82c98..39661977c89c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
27#include <linux/errno.h> 27#include <linux/errno.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/buffer_head.h>
31#include <linux/sched.h> 30#include <linux/sched.h>
32#include <linux/crc-itu-t.h> 31#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 32#include <linux/exportfs.h>
@@ -569,8 +568,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
569 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 568 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
570 cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); 569 cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
571 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 570 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
572 if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) 571 dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
573 mark_inode_dirty(dir); 572 mark_inode_dirty(dir);
574 if (fibh.sbh != fibh.ebh) 573 if (fibh.sbh != fibh.ebh)
575 brelse(fibh.ebh); 574 brelse(fibh.ebh);
576 brelse(fibh.sbh); 575 brelse(fibh.sbh);
@@ -683,6 +682,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
683 cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY; 682 cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY;
684 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); 683 udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
685 inc_nlink(dir); 684 inc_nlink(dir);
685 dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
686 mark_inode_dirty(dir); 686 mark_inode_dirty(dir);
687 unlock_new_inode(inode); 687 unlock_new_inode(inode);
688 d_instantiate(dentry, inode); 688 d_instantiate(dentry, inode);
@@ -1024,6 +1024,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1024 inc_nlink(inode); 1024 inc_nlink(inode);
1025 inode->i_ctime = current_fs_time(inode->i_sb); 1025 inode->i_ctime = current_fs_time(inode->i_sb);
1026 mark_inode_dirty(inode); 1026 mark_inode_dirty(inode);
1027 dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
1028 mark_inode_dirty(dir);
1027 ihold(inode); 1029 ihold(inode);
1028 d_instantiate(dentry, inode); 1030 d_instantiate(dentry, inode);
1029 1031
@@ -1127,7 +1129,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1127 inode_dec_link_count(new_inode); 1129 inode_dec_link_count(new_inode);
1128 } 1130 }
1129 old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb); 1131 old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb);
1132 new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb);
1130 mark_inode_dirty(old_dir); 1133 mark_inode_dirty(old_dir);
1134 mark_inode_dirty(new_dir);
1131 1135
1132 if (dir_fi) { 1136 if (dir_fi) {
1133 dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location); 1137 dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index d6caf01a2097..5f861ed287c3 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -24,7 +24,6 @@
24 24
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h>
28#include <linux/mutex.h> 27#include <linux/mutex.h>
29 28
30uint32_t udf_get_pblock(struct super_block *sb, uint32_t block, 29uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f169411c4ea0..6299f341967b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
48#include <linux/stat.h> 48#include <linux/stat.h>
49#include <linux/cdrom.h> 49#include <linux/cdrom.h>
50#include <linux/nls.h> 50#include <linux/nls.h>
51#include <linux/buffer_head.h>
52#include <linux/vfs.h> 51#include <linux/vfs.h>
53#include <linux/vmalloc.h> 52#include <linux/vmalloc.h>
54#include <linux/errno.h> 53#include <linux/errno.h>
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index ac10ca939f26..8dfbc4025e2f 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/stat.h> 28#include <linux/stat.h>
29#include <linux/pagemap.h> 29#include <linux/pagemap.h>
30#include <linux/buffer_head.h>
31#include "udf_i.h" 30#include "udf_i.h"
32 31
33static int udf_pc_to_char(struct super_block *sb, unsigned char *from, 32static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8a9657d7f7c6..42b8c57795cb 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -22,7 +22,6 @@
22#include "udfdecl.h" 22#include "udfdecl.h"
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/mm.h> 24#include <linux/mm.h>
25#include <linux/buffer_head.h>
26 25
27#include "udf_i.h" 26#include "udf_i.h"
28#include "udf_sb.h" 27#include "udf_sb.h"
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index c84ec010a676..042ddbf110cc 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -35,9 +35,7 @@
35 35
36const struct file_operations ufs_file_operations = { 36const struct file_operations ufs_file_operations = {
37 .llseek = generic_file_llseek, 37 .llseek = generic_file_llseek,
38 .read = new_sync_read,
39 .read_iter = generic_file_read_iter, 38 .read_iter = generic_file_read_iter,
40 .write = new_sync_write,
41 .write_iter = generic_file_write_iter, 39 .write_iter = generic_file_write_iter,
42 .mmap = generic_file_mmap, 40 .mmap = generic_file_mmap,
43 .open = generic_file_open, 41 .open = generic_file_open,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1b8704..1d8eef9cf0f5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
31#include "xfs_bmap.h" 31#include "xfs_bmap.h"
32#include "xfs_bmap_util.h" 32#include "xfs_bmap_util.h"
33#include "xfs_bmap_btree.h" 33#include "xfs_bmap_btree.h"
34#include <linux/aio.h>
35#include <linux/gfp.h> 34#include <linux/gfp.h>
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/pagevec.h> 36#include <linux/pagevec.h>
@@ -1496,7 +1495,6 @@ xfs_end_io_direct_write(
1496 1495
1497STATIC ssize_t 1496STATIC ssize_t
1498xfs_vm_direct_IO( 1497xfs_vm_direct_IO(
1499 int rw,
1500 struct kiocb *iocb, 1498 struct kiocb *iocb,
1501 struct iov_iter *iter, 1499 struct iov_iter *iter,
1502 loff_t offset) 1500 loff_t offset)
@@ -1504,15 +1502,14 @@ xfs_vm_direct_IO(
1504 struct inode *inode = iocb->ki_filp->f_mapping->host; 1502 struct inode *inode = iocb->ki_filp->f_mapping->host;
1505 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1503 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1506 1504
1507 if (rw & WRITE) { 1505 if (iov_iter_rw(iter) == WRITE) {
1508 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, 1506 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1509 offset, xfs_get_blocks_direct, 1507 xfs_get_blocks_direct,
1510 xfs_end_io_direct_write, NULL, 1508 xfs_end_io_direct_write, NULL,
1511 DIO_ASYNC_EXTEND); 1509 DIO_ASYNC_EXTEND);
1512 } 1510 }
1513 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, 1511 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1514 offset, xfs_get_blocks_direct, 1512 xfs_get_blocks_direct, NULL, NULL, 0);
1515 NULL, NULL, 0);
1516} 1513}
1517 1514
1518/* 1515/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a2e1cb8a568b..1f12ad0a8585 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
38#include "xfs_icache.h" 38#include "xfs_icache.h"
39#include "xfs_pnfs.h" 39#include "xfs_pnfs.h"
40 40
41#include <linux/aio.h>
42#include <linux/dcache.h> 41#include <linux/dcache.h>
43#include <linux/falloc.h> 42#include <linux/falloc.h>
44#include <linux/pagevec.h> 43#include <linux/pagevec.h>
@@ -280,7 +279,7 @@ xfs_file_read_iter(
280 279
281 XFS_STATS_INC(xs_read_calls); 280 XFS_STATS_INC(xs_read_calls);
282 281
283 if (unlikely(file->f_flags & O_DIRECT)) 282 if (unlikely(iocb->ki_flags & IOCB_DIRECT))
284 ioflags |= XFS_IO_ISDIRECT; 283 ioflags |= XFS_IO_ISDIRECT;
285 if (file->f_mode & FMODE_NOCMTIME) 284 if (file->f_mode & FMODE_NOCMTIME)
286 ioflags |= XFS_IO_INVIS; 285 ioflags |= XFS_IO_INVIS;
@@ -545,18 +544,19 @@ xfs_zero_eof(
545 */ 544 */
546STATIC ssize_t 545STATIC ssize_t
547xfs_file_aio_write_checks( 546xfs_file_aio_write_checks(
548 struct file *file, 547 struct kiocb *iocb,
549 loff_t *pos, 548 struct iov_iter *from,
550 size_t *count,
551 int *iolock) 549 int *iolock)
552{ 550{
551 struct file *file = iocb->ki_filp;
553 struct inode *inode = file->f_mapping->host; 552 struct inode *inode = file->f_mapping->host;
554 struct xfs_inode *ip = XFS_I(inode); 553 struct xfs_inode *ip = XFS_I(inode);
555 int error = 0; 554 ssize_t error = 0;
555 size_t count = iov_iter_count(from);
556 556
557restart: 557restart:
558 error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); 558 error = generic_write_checks(iocb, from);
559 if (error) 559 if (error <= 0)
560 return error; 560 return error;
561 561
562 error = xfs_break_layouts(inode, iolock); 562 error = xfs_break_layouts(inode, iolock);
@@ -570,16 +570,17 @@ restart:
570 * iolock shared, we need to update it to exclusive which implies 570 * iolock shared, we need to update it to exclusive which implies
571 * having to redo all checks before. 571 * having to redo all checks before.
572 */ 572 */
573 if (*pos > i_size_read(inode)) { 573 if (iocb->ki_pos > i_size_read(inode)) {
574 bool zero = false; 574 bool zero = false;
575 575
576 if (*iolock == XFS_IOLOCK_SHARED) { 576 if (*iolock == XFS_IOLOCK_SHARED) {
577 xfs_rw_iunlock(ip, *iolock); 577 xfs_rw_iunlock(ip, *iolock);
578 *iolock = XFS_IOLOCK_EXCL; 578 *iolock = XFS_IOLOCK_EXCL;
579 xfs_rw_ilock(ip, *iolock); 579 xfs_rw_ilock(ip, *iolock);
580 iov_iter_reexpand(from, count);
580 goto restart; 581 goto restart;
581 } 582 }
582 error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); 583 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
583 if (error) 584 if (error)
584 return error; 585 return error;
585 } 586 }
@@ -679,10 +680,11 @@ xfs_file_dio_aio_write(
679 xfs_rw_ilock(ip, iolock); 680 xfs_rw_ilock(ip, iolock);
680 } 681 }
681 682
682 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 683 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
683 if (ret) 684 if (ret)
684 goto out; 685 goto out;
685 iov_iter_truncate(from, count); 686 count = iov_iter_count(from);
687 pos = iocb->ki_pos;
686 688
687 if (mapping->nrpages) { 689 if (mapping->nrpages) {
688 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 690 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -735,24 +737,22 @@ xfs_file_buffered_aio_write(
735 ssize_t ret; 737 ssize_t ret;
736 int enospc = 0; 738 int enospc = 0;
737 int iolock = XFS_IOLOCK_EXCL; 739 int iolock = XFS_IOLOCK_EXCL;
738 loff_t pos = iocb->ki_pos;
739 size_t count = iov_iter_count(from);
740 740
741 xfs_rw_ilock(ip, iolock); 741 xfs_rw_ilock(ip, iolock);
742 742
743 ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 743 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
744 if (ret) 744 if (ret)
745 goto out; 745 goto out;
746 746
747 iov_iter_truncate(from, count);
748 /* We can write back this queue in page reclaim */ 747 /* We can write back this queue in page reclaim */
749 current->backing_dev_info = inode_to_bdi(inode); 748 current->backing_dev_info = inode_to_bdi(inode);
750 749
751write_retry: 750write_retry:
752 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 751 trace_xfs_file_buffered_write(ip, iov_iter_count(from),
753 ret = generic_perform_write(file, from, pos); 752 iocb->ki_pos, 0);
753 ret = generic_perform_write(file, from, iocb->ki_pos);
754 if (likely(ret >= 0)) 754 if (likely(ret >= 0))
755 iocb->ki_pos = pos + ret; 755 iocb->ki_pos += ret;
756 756
757 /* 757 /*
758 * If we hit a space limit, try to free up some lingering preallocated 758 * If we hit a space limit, try to free up some lingering preallocated
@@ -804,7 +804,7 @@ xfs_file_write_iter(
804 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 804 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
805 return -EIO; 805 return -EIO;
806 806
807 if (unlikely(file->f_flags & O_DIRECT)) 807 if (unlikely(iocb->ki_flags & IOCB_DIRECT))
808 ret = xfs_file_dio_aio_write(iocb, from); 808 ret = xfs_file_dio_aio_write(iocb, from);
809 else 809 else
810 ret = xfs_file_buffered_aio_write(iocb, from); 810 ret = xfs_file_buffered_aio_write(iocb, from);
@@ -1387,8 +1387,6 @@ xfs_file_llseek(
1387 1387
1388const struct file_operations xfs_file_operations = { 1388const struct file_operations xfs_file_operations = {
1389 .llseek = xfs_file_llseek, 1389 .llseek = xfs_file_llseek,
1390 .read = new_sync_read,
1391 .write = new_sync_write,
1392 .read_iter = xfs_file_read_iter, 1390 .read_iter = xfs_file_read_iter,
1393 .write_iter = xfs_file_write_iter, 1391 .write_iter = xfs_file_write_iter,
1394 .splice_read = xfs_file_splice_read, 1392 .splice_read = xfs_file_splice_read,
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 0d4d3590cf85..996a04064894 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -168,10 +168,6 @@ extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
168 uint, struct qc_dqblk *); 168 uint, struct qc_dqblk *);
169extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, 169extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
170 struct qc_dqblk *); 170 struct qc_dqblk *);
171extern int xfs_qm_scall_getqstat(struct xfs_mount *,
172 struct fs_quota_stat *);
173extern int xfs_qm_scall_getqstatv(struct xfs_mount *,
174 struct fs_quota_statv *);
175extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); 171extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint);
176extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); 172extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
177 173
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9b965db45800..9a25c9275fb3 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -38,7 +38,6 @@
38STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 38STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
39STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 39STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
40 uint); 40 uint);
41STATIC uint xfs_qm_export_flags(uint);
42 41
43/* 42/*
44 * Turn off quota accounting and/or enforcement for all udquots and/or 43 * Turn off quota accounting and/or enforcement for all udquots and/or
@@ -389,159 +388,6 @@ xfs_qm_scall_quotaon(
389 return 0; 388 return 0;
390} 389}
391 390
392
393/*
394 * Return quota status information, such as uquota-off, enforcements, etc.
395 * for Q_XGETQSTAT command.
396 */
397int
398xfs_qm_scall_getqstat(
399 struct xfs_mount *mp,
400 struct fs_quota_stat *out)
401{
402 struct xfs_quotainfo *q = mp->m_quotainfo;
403 struct xfs_inode *uip = NULL;
404 struct xfs_inode *gip = NULL;
405 struct xfs_inode *pip = NULL;
406 bool tempuqip = false;
407 bool tempgqip = false;
408 bool temppqip = false;
409
410 memset(out, 0, sizeof(fs_quota_stat_t));
411
412 out->qs_version = FS_QSTAT_VERSION;
413 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
414 (XFS_ALL_QUOTA_ACCT|
415 XFS_ALL_QUOTA_ENFD));
416 uip = q->qi_uquotaip;
417 gip = q->qi_gquotaip;
418 pip = q->qi_pquotaip;
419 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
420 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
421 0, 0, &uip) == 0)
422 tempuqip = true;
423 }
424 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
425 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
426 0, 0, &gip) == 0)
427 tempgqip = true;
428 }
429 /*
430 * Q_XGETQSTAT doesn't have room for both group and project quotas.
431 * So, allow the project quota values to be copied out only if
432 * there is no group quota information available.
433 */
434 if (!gip) {
435 if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
436 if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
437 0, 0, &pip) == 0)
438 temppqip = true;
439 }
440 } else
441 pip = NULL;
442 if (uip) {
443 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
444 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
445 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
446 if (tempuqip)
447 IRELE(uip);
448 }
449
450 if (gip) {
451 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
452 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
453 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
454 if (tempgqip)
455 IRELE(gip);
456 }
457 if (pip) {
458 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
459 out->qs_gquota.qfs_nblks = pip->i_d.di_nblocks;
460 out->qs_gquota.qfs_nextents = pip->i_d.di_nextents;
461 if (temppqip)
462 IRELE(pip);
463 }
464 out->qs_incoredqs = q->qi_dquots;
465 out->qs_btimelimit = q->qi_btimelimit;
466 out->qs_itimelimit = q->qi_itimelimit;
467 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
468 out->qs_bwarnlimit = q->qi_bwarnlimit;
469 out->qs_iwarnlimit = q->qi_iwarnlimit;
470
471 return 0;
472}
473
474/*
475 * Return quota status information, such as uquota-off, enforcements, etc.
476 * for Q_XGETQSTATV command, to support separate project quota field.
477 */
478int
479xfs_qm_scall_getqstatv(
480 struct xfs_mount *mp,
481 struct fs_quota_statv *out)
482{
483 struct xfs_quotainfo *q = mp->m_quotainfo;
484 struct xfs_inode *uip = NULL;
485 struct xfs_inode *gip = NULL;
486 struct xfs_inode *pip = NULL;
487 bool tempuqip = false;
488 bool tempgqip = false;
489 bool temppqip = false;
490
491 out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
492 (XFS_ALL_QUOTA_ACCT|
493 XFS_ALL_QUOTA_ENFD));
494 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
495 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
496 out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
497
498 uip = q->qi_uquotaip;
499 gip = q->qi_gquotaip;
500 pip = q->qi_pquotaip;
501 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
502 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
503 0, 0, &uip) == 0)
504 tempuqip = true;
505 }
506 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
507 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
508 0, 0, &gip) == 0)
509 tempgqip = true;
510 }
511 if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
512 if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
513 0, 0, &pip) == 0)
514 temppqip = true;
515 }
516 if (uip) {
517 out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
518 out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
519 if (tempuqip)
520 IRELE(uip);
521 }
522
523 if (gip) {
524 out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
525 out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
526 if (tempgqip)
527 IRELE(gip);
528 }
529 if (pip) {
530 out->qs_pquota.qfs_nblks = pip->i_d.di_nblocks;
531 out->qs_pquota.qfs_nextents = pip->i_d.di_nextents;
532 if (temppqip)
533 IRELE(pip);
534 }
535 out->qs_incoredqs = q->qi_dquots;
536 out->qs_btimelimit = q->qi_btimelimit;
537 out->qs_itimelimit = q->qi_itimelimit;
538 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
539 out->qs_bwarnlimit = q->qi_bwarnlimit;
540 out->qs_iwarnlimit = q->qi_iwarnlimit;
541
542 return 0;
543}
544
545#define XFS_QC_MASK \ 391#define XFS_QC_MASK \
546 (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK) 392 (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK)
547 393
@@ -873,28 +719,6 @@ out_put:
873 return error; 719 return error;
874} 720}
875 721
876STATIC uint
877xfs_qm_export_flags(
878 uint flags)
879{
880 uint uflags;
881
882 uflags = 0;
883 if (flags & XFS_UQUOTA_ACCT)
884 uflags |= FS_QUOTA_UDQ_ACCT;
885 if (flags & XFS_GQUOTA_ACCT)
886 uflags |= FS_QUOTA_GDQ_ACCT;
887 if (flags & XFS_PQUOTA_ACCT)
888 uflags |= FS_QUOTA_PDQ_ACCT;
889 if (flags & XFS_UQUOTA_ENFD)
890 uflags |= FS_QUOTA_UDQ_ENFD;
891 if (flags & XFS_GQUOTA_ENFD)
892 uflags |= FS_QUOTA_GDQ_ENFD;
893 if (flags & XFS_PQUOTA_ENFD)
894 uflags |= FS_QUOTA_PDQ_ENFD;
895 return uflags;
896}
897
898 722
899STATIC int 723STATIC int
900xfs_dqrele_inode( 724xfs_dqrele_inode(
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 6923905ab33d..7795e0d01382 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -23,10 +23,81 @@
23#include "xfs_inode.h" 23#include "xfs_inode.h"
24#include "xfs_quota.h" 24#include "xfs_quota.h"
25#include "xfs_trans.h" 25#include "xfs_trans.h"
26#include "xfs_trace.h"
27#include "xfs_icache.h"
26#include "xfs_qm.h" 28#include "xfs_qm.h"
27#include <linux/quota.h> 29#include <linux/quota.h>
28 30
29 31
32static void
33xfs_qm_fill_state(
34 struct qc_type_state *tstate,
35 struct xfs_mount *mp,
36 struct xfs_inode *ip,
37 xfs_ino_t ino)
38{
39 struct xfs_quotainfo *q = mp->m_quotainfo;
40 bool tempqip = false;
41
42 tstate->ino = ino;
43 if (!ip && ino == NULLFSINO)
44 return;
45 if (!ip) {
46 if (xfs_iget(mp, NULL, ino, 0, 0, &ip))
47 return;
48 tempqip = true;
49 }
50 tstate->flags |= QCI_SYSFILE;
51 tstate->blocks = ip->i_d.di_nblocks;
52 tstate->nextents = ip->i_d.di_nextents;
53 tstate->spc_timelimit = q->qi_btimelimit;
54 tstate->ino_timelimit = q->qi_itimelimit;
55 tstate->rt_spc_timelimit = q->qi_rtbtimelimit;
56 tstate->spc_warnlimit = q->qi_bwarnlimit;
57 tstate->ino_warnlimit = q->qi_iwarnlimit;
58 tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
59 if (tempqip)
60 IRELE(ip);
61}
62
63/*
64 * Return quota status information, such as enforcements, quota file inode
65 * numbers etc.
66 */
67static int
68xfs_fs_get_quota_state(
69 struct super_block *sb,
70 struct qc_state *state)
71{
72 struct xfs_mount *mp = XFS_M(sb);
73 struct xfs_quotainfo *q = mp->m_quotainfo;
74
75 memset(state, 0, sizeof(*state));
76 if (!XFS_IS_QUOTA_RUNNING(mp))
77 return 0;
78 state->s_incoredqs = q->qi_dquots;
79 if (XFS_IS_UQUOTA_RUNNING(mp))
80 state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED;
81 if (XFS_IS_UQUOTA_ENFORCED(mp))
82 state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
83 if (XFS_IS_GQUOTA_RUNNING(mp))
84 state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED;
85 if (XFS_IS_GQUOTA_ENFORCED(mp))
86 state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
87 if (XFS_IS_PQUOTA_RUNNING(mp))
88 state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED;
89 if (XFS_IS_PQUOTA_ENFORCED(mp))
90 state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED;
91
92 xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip,
93 mp->m_sb.sb_uquotino);
94 xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip,
95 mp->m_sb.sb_gquotino);
96 xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip,
97 mp->m_sb.sb_pquotino);
98 return 0;
99}
100
30STATIC int 101STATIC int
31xfs_quota_type(int type) 102xfs_quota_type(int type)
32{ 103{
@@ -40,28 +111,40 @@ xfs_quota_type(int type)
40 } 111 }
41} 112}
42 113
43STATIC int 114#define XFS_QC_SETINFO_MASK (QC_TIMER_MASK | QC_WARNS_MASK)
44xfs_fs_get_xstate( 115
116/*
117 * Adjust quota timers & warnings
118 */
119static int
120xfs_fs_set_info(
45 struct super_block *sb, 121 struct super_block *sb,
46 struct fs_quota_stat *fqs) 122 int type,
123 struct qc_info *info)
47{ 124{
48 struct xfs_mount *mp = XFS_M(sb); 125 struct xfs_mount *mp = XFS_M(sb);
126 struct qc_dqblk newlim;
49 127
128 if (sb->s_flags & MS_RDONLY)
129 return -EROFS;
50 if (!XFS_IS_QUOTA_RUNNING(mp)) 130 if (!XFS_IS_QUOTA_RUNNING(mp))
51 return -ENOSYS; 131 return -ENOSYS;
52 return xfs_qm_scall_getqstat(mp, fqs); 132 if (!XFS_IS_QUOTA_ON(mp))
53} 133 return -ESRCH;
134 if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK)
135 return -EINVAL;
136 if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0)
137 return 0;
54 138
55STATIC int 139 newlim.d_fieldmask = info->i_fieldmask;
56xfs_fs_get_xstatev( 140 newlim.d_spc_timer = info->i_spc_timelimit;
57 struct super_block *sb, 141 newlim.d_ino_timer = info->i_ino_timelimit;
58 struct fs_quota_statv *fqs) 142 newlim.d_rt_spc_timer = info->i_rt_spc_timelimit;
59{ 143 newlim.d_ino_warns = info->i_ino_warnlimit;
60 struct xfs_mount *mp = XFS_M(sb); 144 newlim.d_spc_warns = info->i_spc_warnlimit;
145 newlim.d_rt_spc_warns = info->i_rt_spc_warnlimit;
61 146
62 if (!XFS_IS_QUOTA_RUNNING(mp)) 147 return xfs_qm_scall_setqlim(mp, 0, xfs_quota_type(type), &newlim);
63 return -ENOSYS;
64 return xfs_qm_scall_getqstatv(mp, fqs);
65} 148}
66 149
67static unsigned int 150static unsigned int
@@ -178,8 +261,8 @@ xfs_fs_set_dqblk(
178} 261}
179 262
180const struct quotactl_ops xfs_quotactl_operations = { 263const struct quotactl_ops xfs_quotactl_operations = {
181 .get_xstatev = xfs_fs_get_xstatev, 264 .get_state = xfs_fs_get_quota_state,
182 .get_xstate = xfs_fs_get_xstate, 265 .set_info = xfs_fs_set_info,
183 .quota_enable = xfs_quota_enable, 266 .quota_enable = xfs_quota_enable,
184 .quota_disable = xfs_quota_disable, 267 .quota_disable = xfs_quota_disable,
185 .rm_xquota = xfs_fs_rm_xquota, 268 .rm_xquota = xfs_fs_rm_xquota,